Improve teh securty of teh DB

2025-10-19 19:22:37 +02:00
parent 62971c07d7
commit 05da20357d
87 changed files with 7998 additions and 932 deletions
--- a/services/training/app/ml/trainer.py
+++ b/services/training/app/ml/trainer.py
@@ -119,9 +119,46 @@ class EnhancedBakeryMLTrainer:
                    logger.info("Multiple products detected for training",
                               products_count=len(products))

-                # Event 1: Training Started (0%) - update with actual product count
-                # Note: Initial event was already published by API endpoint, this updates with real count
-                await publish_training_started(job_id, tenant_id, len(products))
+                # Event 1: Training Started (0%) - update with actual product count AND time estimates
+                # Calculate accurate time estimates now that we know the actual product count
+                from app.utils.time_estimation import (
+                    calculate_initial_estimate,
+                    calculate_estimated_completion_time,
+                    get_historical_average_estimate
+                )
+
+                # Try to get historical average for more accurate estimates
+                try:
+                    historical_avg = await asyncio.get_event_loop().run_in_executor(
+                        None,
+                        get_historical_average_estimate,
+                        db_session,
+                        tenant_id
+                    )
+                    avg_time_per_product = historical_avg if historical_avg else 60.0
+                    logger.info("Using historical average for time estimation",
+                               avg_time_per_product=avg_time_per_product,
+                               has_historical_data=historical_avg is not None)
+                except Exception as e:
+                    logger.warning("Could not get historical average, using default",
+                                 error=str(e))
+                    avg_time_per_product = 60.0
+
+                estimated_duration_minutes = calculate_initial_estimate(
+                    total_products=len(products),
+                    avg_training_time_per_product=avg_time_per_product
+                )
+                estimated_completion_time = calculate_estimated_completion_time(estimated_duration_minutes)
+
+                # Note: Initial event was already published by API endpoint with estimated product count,
+                # this updates with real count and recalculated time estimates based on actual data
+                await publish_training_started(
+                    job_id=job_id,
+                    tenant_id=tenant_id,
+                    total_products=len(products),
+                    estimated_duration_minutes=estimated_duration_minutes,
+                    estimated_completion_time=estimated_completion_time.isoformat()
+                )
                
                # Create initial training log entry
                await repos['training_log'].update_log_progress(
@@ -135,10 +172,25 @@ class EnhancedBakeryMLTrainer:
                )

                # Event 2: Data Analysis (20%)
+                # Recalculate time remaining based on elapsed time
+                elapsed_seconds = (datetime.now(timezone.utc) - repos['training_log']._get_start_time(job_id) if hasattr(repos['training_log'], '_get_start_time') else 0) or 0
+
+                # Estimate remaining time: we've done ~20% of work (data analysis)
+                # Remaining 80% includes training all products
+                products_to_train = len(processed_data)
+                estimated_remaining_seconds = int(products_to_train * avg_time_per_product)
+
+                # Recalculate estimated completion time
+                estimated_completion_time_data_analysis = calculate_estimated_completion_time(
+                    estimated_remaining_seconds / 60
+                )
+
                await publish_data_analysis(
                    job_id,
                    tenant_id,
-                    f"Data analysis completed for {len(processed_data)} products"
+                    f"Data analysis completed for {len(processed_data)} products",
+                    estimated_time_remaining_seconds=estimated_remaining_seconds,
+                    estimated_completion_time=estimated_completion_time_data_analysis.isoformat()
                )
                
                # Train models for each processed product with progress aggregation