Fix new Frontend 12

2025-08-04 18:21:42 +02:00
parent d4c276c888
commit 35b02ca364
6 changed files with 528 additions and 174 deletions
--- a/services/training/app/ml/trainer.py
+++ b/services/training/app/ml/trainer.py
@@ -10,6 +10,8 @@ import numpy as np
 from datetime import datetime
 import logging
 import uuid
+import time
+from datetime import datetime

 from app.ml.data_processor import BakeryDataProcessor
 from app.ml.prophet_manager import BakeryProphetManager
@@ -75,6 +77,7 @@ class BakeryMLTrainer:
            processed_data = await self._process_all_products(
                sales_df, weather_df, traffic_df, products
            )
+            await publish_job_progress(job_id, tenant_id, 20, "feature_engineering", estimated_time_remaining_minutes=7)
            
            # Train models for each processed product
            logger.info("Training models for all products...")
@@ -84,6 +87,7 @@ class BakeryMLTrainer:
            
            # Calculate overall training summary
            summary = self._calculate_training_summary(training_results)
+            await publish_job_progress(job_id, tenant_id, 90, "model_validation", estimated_time_remaining_minutes=1)
            
            result = {
                "job_id": job_id,
@@ -354,6 +358,41 @@ class BakeryMLTrainer:
        
        return processed_data
    
+    def calculate_estimated_time_remaining(self, processing_times: List[float], completed: int, total: int) -> int:
+        """
+        Calculate estimated time remaining based on actual processing times
+        
+        Args:
+            processing_times: List of processing times for completed items (in seconds)
+            completed: Number of items completed so far
+            total: Total number of items to process
+        
+        Returns:
+            Estimated time remaining in minutes
+        """
+        if not processing_times or completed >= total:
+            return 0
+        
+        # Calculate average processing time
+        avg_time_per_item = sum(processing_times) / len(processing_times)
+        
+        # Use weighted average giving more weight to recent processing times
+        if len(processing_times) > 3:
+            # Use last 3 items for more accurate recent performance
+            recent_times = processing_times[-3:]
+            recent_avg = sum(recent_times) / len(recent_times)
+            # Weighted average: 70% recent, 30% overall
+            avg_time_per_item = (recent_avg * 0.7) + (avg_time_per_item * 0.3)
+        
+        # Calculate remaining items and estimated time
+        remaining_items = total - completed
+        estimated_seconds = remaining_items * avg_time_per_item
+        
+        # Convert to minutes and round up
+        estimated_minutes = max(1, int(estimated_seconds / 60) + (1 if estimated_seconds % 60 > 0 else 0))
+        
+        return estimated_minutes
+    
    async def _train_all_models(self,
                               tenant_id: str,
                               processed_data: Dict[str, pd.DataFrame],
@@ -361,7 +400,17 @@ class BakeryMLTrainer:
        """Train models for all processed products using Prophet manager"""
        training_results = {}
        
+        total_products = len(processed_data)
+        base_progress = 45
+        max_progress = 85  # or whatever your target end progress is
+        products_total = 0
+        i = 0
+        
+        start_time = time.time()
+        processing_times = []  # Store individual processing times
+
        for product_name, product_data in processed_data.items():
+            product_start_time = time.time()
            try:
                logger.info(f"Training model for product: {product_name}")
                
@@ -375,6 +424,7 @@ class BakeryMLTrainer:
                        'message': f'Need at least {settings.MIN_TRAINING_DATA_DAYS} data points, got {len(product_data)}'
                    }
                    logger.warning(f"Skipping {product_name}: insufficient data ({len(product_data)} < {settings.MIN_TRAINING_DATA_DAYS})")
+                    processing_times.append(time.time() - product_start_time)
                    continue
                
                # Train the model using Prophet manager
@@ -402,6 +452,29 @@ class BakeryMLTrainer:
                    'data_points': len(product_data) if product_data is not None else 0,
                    'failed_at': datetime.now().isoformat()
                }
+            
+            # Record processing time for this product
+            product_processing_time = time.time() - product_start_time
+            processing_times.append(product_processing_time)
+            
+            i += 1
+            current_progress = base_progress + int((i / total_products) * (max_progress - base_progress))
+            
+            # Calculate estimated time remaining
+            estimated_time_remaining_minutes = self.calculate_estimated_time_remaining(
+                processing_times, i, total_products
+            )
+            
+            await publish_job_progress(
+                job_id, 
+                tenant_id, 
+                current_progress, 
+                "model_training", 
+                product_name, 
+                products_total, 
+                total_products, 
+                estimated_time_remaining_minutes=estimated_time_remaining_minutes
+            )
        
        return training_results