Fix new services implementation 5

2025-08-15 17:53:59 +02:00
parent 03b4d4185d
commit f7de9115d1
43 changed files with 1714 additions and 891 deletions
--- a/services/training/app/api/training.py
+++ b/services/training/app/api/training.py
@@ -186,6 +186,15 @@ async def execute_enhanced_training_job_background(
    enhanced_training_service = EnhancedTrainingService(database_manager)

    try:
+        # Create initial training log entry first
+        await enhanced_training_service._update_job_status_repository(
+            job_id=job_id,
+            status="pending",
+            progress=0,
+            current_step="Starting enhanced training job",
+            tenant_id=tenant_id
+        )
+        
        # Publish job started event
        await publish_job_started(job_id, tenant_id, {
            "enhanced_features": True,
@@ -214,7 +223,8 @@ async def execute_enhanced_training_job_background(
            job_id=job_id,
            status="running",
            progress=0,
-            current_step="Initializing enhanced training pipeline"
+            current_step="Initializing enhanced training pipeline",
+            tenant_id=tenant_id
        )
        
        # Execute the enhanced training pipeline with repository pattern
@@ -232,7 +242,8 @@ async def execute_enhanced_training_job_background(
            status="completed",
            progress=100,
            current_step="Enhanced training completed successfully",
-            results=result
+            results=result,
+            tenant_id=tenant_id
        )
        
        # Publish enhanced completion event
@@ -262,7 +273,8 @@ async def execute_enhanced_training_job_background(
                status="failed",
                progress=0,
                current_step="Enhanced training failed",
-                error_message=str(training_error)
+                error_message=str(training_error),
+                tenant_id=tenant_id
            )
        except Exception as status_error:
            logger.error("Failed to update job status after training error",
--- a/services/training/app/ml/trainer.py
+++ b/services/training/app/ml/trainer.py
@@ -92,9 +92,27 @@ class EnhancedBakeryMLTrainer:
                
                # Get unique products from the sales data
                products = sales_df['inventory_product_id'].unique().tolist()
-                logger.info("Training enhanced models",
+                
+                # Debug: Log sales data details to understand why only one product is found
+                total_sales_records = len(sales_df)
+                sales_by_product = sales_df.groupby('inventory_product_id').size().to_dict()
+                
+                logger.info("Enhanced training pipeline - Sales data analysis",
+                           total_sales_records=total_sales_records,
                           products_count=len(products),
-                           products=products)
+                           products=products,
+                           sales_by_product=sales_by_product)
+                
+                if len(products) == 1:
+                    logger.warning("Only ONE product found in sales data - this may indicate a data fetching issue",
+                                 tenant_id=tenant_id,
+                                 single_product_id=products[0],
+                                 total_sales_records=total_sales_records)
+                elif len(products) == 0:
+                    raise ValueError("No products found in sales data")
+                else:
+                    logger.info("Multiple products detected for training",
+                               products_count=len(products))
                
                self.status_publisher.products_total = len(products)
                
@@ -512,7 +530,7 @@ class EnhancedBakeryMLTrainer:
                       from_column='quantity_sold',
                       to_column='quantity')
        
-        required_columns = ['date', 'product_name', 'quantity']
+        required_columns = ['date', 'inventory_product_id', 'quantity']
        missing_columns = [col for col in required_columns if col not in sales_df.columns]
        if missing_columns:
            raise ValueError(f"Missing required columns: {missing_columns}")
@@ -541,7 +559,7 @@ class EnhancedBakeryMLTrainer:
        try:
            logger.info("Enhanced model evaluation starting",
                       tenant_id=tenant_id,
-                       product_name=product_name)
+                       inventory_product_id=inventory_product_id)
            
            # Get database session and repositories
            async with self.database_manager.get_session() as db_session:
--- a/services/training/app/services/training_orchestrator.py
+++ b/services/training/app/services/training_orchestrator.py
@@ -574,13 +574,14 @@ class TrainingDataOrchestrator:
            if city_count >= 1:  # At least some city awareness
                city_aware_records += 1
            
-            # Record is valid if it has basic requirements
-            if record_score >= 2:
+            # Record is valid if it has basic requirements (date + any traffic field)
+            # Lowered requirement from >= 2 to >= 1 to accept records with just date or traffic data
+            if record_score >= 1:
                valid_records += 1
        
        total_records = len(traffic_data)
-        validity_threshold = 0.3
-        enhancement_threshold = 0.2  # Lower threshold for enhanced features
+        validity_threshold = 0.1  # Reduced from 0.3 to 0.1 - accept if 10% of records are valid
+        enhancement_threshold = 0.1  # Reduced threshold for enhanced features
        
        basic_validity = (valid_records / total_records) >= validity_threshold
        has_enhancements = (enhanced_records / total_records) >= enhancement_threshold
--- a/services/training/app/services/training_service.py
+++ b/services/training/app/services/training_service.py
@@ -141,6 +141,30 @@ class EnhancedTrainingService:
                    logger.error("Training aborted - no sales data", tenant_id=tenant_id, job_id=job_id)
                    raise ValueError(error_msg)
                
+                # Debug: Analyze the sales data structure to understand product distribution
+                sales_df_debug = pd.DataFrame(sales_data)
+                if 'inventory_product_id' in sales_df_debug.columns:
+                    unique_products_found = sales_df_debug['inventory_product_id'].unique()
+                    product_counts = sales_df_debug['inventory_product_id'].value_counts().to_dict()
+                    
+                    logger.info("Pre-flight sales data analysis",
+                               tenant_id=tenant_id, 
+                               job_id=job_id,
+                               total_sales_records=len(sales_data),
+                               unique_products_count=len(unique_products_found),
+                               unique_products=unique_products_found.tolist(),
+                               records_per_product=product_counts)
+                    
+                    if len(unique_products_found) == 1:
+                        logger.warning("POTENTIAL ISSUE: Only ONE unique product found in all sales data",
+                                     tenant_id=tenant_id,
+                                     single_product=unique_products_found[0],
+                                     record_count=len(sales_data))
+                else:
+                    logger.warning("No 'inventory_product_id' column found in sales data", 
+                                 tenant_id=tenant_id,
+                                 columns=list(sales_df_debug.columns))
+                
                logger.info(f"Pre-flight check passed: {len(sales_data)} sales records found", 
                           tenant_id=tenant_id, job_id=job_id)
                
@@ -536,18 +560,69 @@ class EnhancedTrainingService:
                                          progress: int = None,
                                          current_step: str = None,
                                          error_message: str = None,
-                                          results: Dict = None):
+                                          results: Dict = None,
+                                          tenant_id: str = None):
        """Update job status using repository pattern"""
        try:
            async with self.database_manager.get_session() as session:
                await self._init_repositories(session)
                
-                await self.training_log_repo.update_log_progress(
-                    job_id=job_id,
-                    progress=progress,
-                    current_step=current_step,
-                    status=status
-                )
+                # Check if log exists, create if not
+                existing_log = await self.training_log_repo.get_log_by_job_id(job_id)
+                
+                if not existing_log:
+                    # Create initial log entry
+                    if not tenant_id:
+                        # Extract tenant_id from job_id if not provided
+                        # Format: enhanced_training_{tenant_id}_{job_suffix}
+                        try:
+                            parts = job_id.split('_')
+                            if len(parts) >= 3 and parts[0] == 'enhanced' and parts[1] == 'training':
+                                tenant_id = parts[2]
+                        except Exception:
+                            logger.warning(f"Could not extract tenant_id from job_id {job_id}")
+                    
+                    if tenant_id:
+                        log_data = {
+                            "job_id": job_id,
+                            "tenant_id": tenant_id,
+                            "status": status or "pending",
+                            "progress": progress or 0,
+                            "current_step": current_step or "initializing",
+                            "start_time": datetime.utcnow()
+                        }
+                        
+                        if error_message:
+                            log_data["error_message"] = error_message
+                        if results:
+                            log_data["results"] = results
+                            
+                        await self.training_log_repo.create_training_log(log_data)
+                        logger.info("Created initial training log", job_id=job_id, tenant_id=tenant_id)
+                    else:
+                        logger.error("Cannot create training log without tenant_id", job_id=job_id)
+                        return
+                else:
+                    # Update existing log
+                    await self.training_log_repo.update_log_progress(
+                        job_id=job_id,
+                        progress=progress,
+                        current_step=current_step,
+                        status=status
+                    )
+                    
+                    # Update additional fields if provided
+                    if error_message or results:
+                        update_data = {}
+                        if error_message:
+                            update_data["error_message"] = error_message
+                        if results:
+                            update_data["results"] = results
+                        if status in ["completed", "failed"]:
+                            update_data["end_time"] = datetime.utcnow()
+                            
+                        if update_data:
+                            await self.training_log_repo.update(existing_log.id, update_data)
                
        except Exception as e:
            logger.error("Failed to update job status using repository",