Improve training code

2025-07-28 19:28:39 +02:00
parent 946015b80c
commit 98f546af12
15 changed files with 2534 additions and 2812 deletions
--- a/services/training/app/services/training_service.py
+++ b/services/training/app/services/training_service.py
@@ -1,721 +1,303 @@
 # services/training/app/services/training_service.py
 """
-Training service business logic
-Orchestrates ML training operations and manages job lifecycle
+Main Training Service - Coordinates the complete training process
+This is the entry point from the API layer
 """

 from typing import Dict, List, Any, Optional
-import logging
-from datetime import datetime, timedelta
-import asyncio
 import uuid
+import logging
+from datetime import datetime
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy import select, update, and_
-import httpx

-from app.models.training import ModelTrainingLog, TrainedModel
 from app.ml.trainer import BakeryMLTrainer
-from app.schemas.training import TrainingJobRequest, SingleProductTrainingRequest
-from app.services.messaging import publish_job_completed, publish_job_failed
-from app.core.config import settings
-from shared.monitoring.metrics import MetricsCollector
-from app.services.data_client import DataServiceClient
+from app.services.date_alignment_service import DateAlignmentService, DateRange, DataSourceType
+from app.services.training_orchestrator import TrainingDataOrchestrator
+
+from app.core.database import get_db_session

 logger = logging.getLogger(__name__)
-metrics = MetricsCollector("training-service")

 class TrainingService:
    """
-    Main service class for managing ML training operations.
-    Replaces the old Celery-based training system with clean async implementation.
+    Main training service that coordinates the complete training pipeline.
+    Entry point from API layer - handles business logic and orchestration.
    """
    
-    def __init__(self):
-        self.ml_trainer = BakeryMLTrainer()
-        self.data_client = DataServiceClient()
-        
-    async def _determine_sales_date_range(self, sales_data: List[Dict]) -> tuple[datetime, datetime]:
-        """Determine start and end dates from sales data with validation"""
-        if not sales_data:
-            raise ValueError("No sales data available to determine date range")
-        
-        dates = []
-        for record in sales_data:
-            if 'date' in record:
-                try:
-                    if isinstance(record['date'], str):
-                        # Handle various date string formats
-                        date_str = record['date'].replace('Z', '+00:00')
-                        if 'T' in date_str:
-                            parsed_date = datetime.fromisoformat(date_str)
-                        else:
-                            # Handle date-only strings
-                            parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
-                        dates.append(parsed_date)
-                    elif isinstance(record['date'], datetime):
-                        dates.append(record['date'])
-                except (ValueError, AttributeError) as e:
-                    logger.warning(f"Invalid date format in record: {record['date']} - {e}")
-                    continue
-        
-        if not dates:
-            raise ValueError("No valid dates found in sales data")
-        
-        start_date = min(dates)
-        end_date = max(dates)
-        
-        # Validate and adjust date range for external APIs
-        start_date, end_date = self._adjust_date_range_for_apis(start_date, end_date)
-        
-        logger.info(f"Determined and adjusted sales date range: {start_date} to {end_date}")
-        return start_date, end_date
-
-    def _adjust_date_range_for_apis(self, start_date: datetime, end_date: datetime) -> tuple[datetime, datetime]:
-        """Adjust date range to comply with external API limits"""
-        
-        # Weather and traffic APIs have a 90-day limit
-        MAX_DAYS = 90
-        
-        # Calculate current range
-        current_range = (end_date - start_date).days
-        
-        if current_range > MAX_DAYS:
-            logger.warning(f"Date range ({current_range} days) exceeds API limit ({MAX_DAYS} days). Adjusting...")
-            
-            # Keep the most recent data
-            start_date = end_date - timedelta(days=MAX_DAYS)
-            logger.info(f"Adjusted start_date to {start_date} to fit within {MAX_DAYS} day limit")
-        
-        # Ensure dates are not in the future
-        now = datetime.now()
-        if end_date > now:
-            end_date = now.replace(hour=0, minute=0, second=0, microsecond=0)
-            logger.info(f"Adjusted end_date to {end_date} (cannot be in future)")
-        
-        if start_date > now:
-            start_date = now.replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=30)
-            logger.info(f"Adjusted start_date to {start_date} (was in future)")
-        
-        # Ensure start_date is before end_date
-        if start_date >= end_date:
-            start_date = end_date - timedelta(days=30)  # Default to 30 days of data
-            logger.warning(f"start_date was not before end_date. Adjusted start_date to {start_date}")
+    def __init__(self, db_session: AsyncSession = None):
+        self.db_session = db_session
+        self.trainer = BakeryMLTrainer(db_session=db_session)  # Pass DB session
+        self.date_alignment_service = DateAlignmentService()
+        self.orchestrator = TrainingDataOrchestrator(
+            date_alignment_service=self.date_alignment_service
+        )
    
-        return start_date, end_date
+    async def start_training_job(
+        self,
+        tenant_id: str,
+        bakery_location: tuple[float, float] = (40.4168, -3.7038),  # Default Madrid
+        requested_start: Optional[datetime] = None,
+        requested_end: Optional[datetime] = None,
+        job_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Start a complete training job for a tenant.
+        
+        Args:
+            tenant_id: Tenant identifier
+            sales_data: Historical sales data
+            bakery_location: Bakery coordinates (lat, lon)
+            weather_data: Optional weather data
+            traffic_data: Optional traffic data
+            requested_start: Optional explicit start date
+            requested_end: Optional explicit end date
+            job_id: Optional job identifier
+            
+        Returns:
+            Training job results
+        """
+        if not job_id:
+            job_id = f"training_{tenant_id}_{uuid.uuid4().hex[:8]}"
+        
+        logger.info(f"Starting training job {job_id} for tenant {tenant_id}")
        
-    async def execute_training_job_simple(self, job_id: str, tenant_id_str: str, request: TrainingJobRequest):
-        """Simple wrapper that creates its own database session"""
        try:
-            # Import database_manager locally to avoid circular imports
-            from app.core.database import database_manager
-            
-            logger.info(f"Starting background training job {job_id} for tenant {tenant_id_str}")
-            
-            # Create new session for background task
-            async with database_manager.async_session_local() as session:
-                await self.execute_training_job(session, job_id, tenant_id_str, request)
-                await session.commit()
-                
-        except Exception as e:
-            logger.error(f"Background training job {job_id} failed: {str(e)}")
-            
-            # Try to update job status to failed
-            try:
-                from app.core.database import database_manager
-                async with database_manager.async_session_local() as error_session:
-                    await self._update_job_status(
-                        error_session, job_id, "failed", 0, 
-                        f"Training failed: {str(e)}", error_message=str(e)
-                    )
-                    await error_session.commit()
-            except Exception as update_error:
-                logger.error(f"Failed to update job status: {str(update_error)}")
-            
-            raise
-    
-    async def create_training_job(self,
-                                 db: AsyncSession,
-                                 tenant_id: str,
-                                 job_id: str,
-                                 config: Dict[str, Any]) -> ModelTrainingLog:
-        """Create a new training job record"""
-        try:
-            training_log = ModelTrainingLog(
-                job_id=job_id,
+            # Step 1: Prepare training dataset with date alignment and orchestration
+            logger.info("Step 1: Preparing and aligning training data")
+            training_dataset = await self.orchestrator.prepare_training_data(
                tenant_id=tenant_id,
-                status="pending",
-                progress=0,
-                current_step="Initializing training job",
-                start_time=datetime.now(),
-                config=config
-            )
-            
-            db.add(training_log)
-            await db.commit()
-            await db.refresh(training_log)
-            
-            logger.info(f"Created training job {job_id} for tenant {tenant_id}")
-            return training_log
-            
-        except Exception as e:
-            logger.error(f"Failed to create training job: {str(e)}")
-            await db.rollback()
-            raise
-    
-    async def create_single_product_job(self,
-                                       db: AsyncSession,
-                                       tenant_id: str,
-                                       product_name: str,
-                                       job_id: str,
-                                       config: Dict[str, Any]) -> ModelTrainingLog:
-        """Create a training job for a single product"""
-        try:
-            config["single_product"] = product_name
-            
-            training_log = ModelTrainingLog(
-                job_id=job_id,
-                tenant_id=tenant_id,
-                status="pending",
-                progress=0,
-                current_step=f"Initializing training for {product_name}",
-                start_time=datetime.now(),
-                config=config
-            )
-            
-            db.add(training_log)
-            await db.commit()
-            await db.refresh(training_log)
-            
-            logger.info(f"Created single product training job {job_id} for {product_name}")
-            return training_log
-            
-        except Exception as e:
-            logger.error(f"Failed to create single product training job: {str(e)}")
-            await db.rollback()
-            raise
-    
-    async def execute_training_job(self,
-                                  db: AsyncSession,
-                                  job_id: str,
-                                  tenant_id: str,
-                                  request: TrainingJobRequest):
-        """Execute a complete training job"""
-        try:
-            logger.info(f"Starting execution of training job {job_id}")
-            
-            # Update job status to running
-            await self._update_job_status(db, job_id, "running", 5, "Fetching training data")
-            
-            # Fetch sales data from data service
-            sales_data = await self.data_client.fetch_sales_data(tenant_id)
-            
-            if not sales_data:
-                raise ValueError("No sales data found for training")
-            
-            # Determine date range from sales data
-            start_date, end_date = await self._determine_sales_date_range(sales_data)
-            
-            # Convert dates to ISO format strings for API calls
-            start_date_str = start_date.isoformat()
-            end_date_str = end_date.isoformat()
-        
-            logger.info(f"Using date range for external APIs: {start_date_str} to {end_date_str}")
-            
-            # Fetch external data if requested using the sales date range
-            weather_data = []
-            traffic_data = []
-            
-            await self._update_job_status(db, job_id, "running", 15, "Fetching weather data")
-            try:
-                weather_data = await self.data_client.fetch_weather_data(
-                    tenant_id=tenant_id,
-                    start_date=start_date_str,
-                    end_date=end_date_str,
-                    latitude=40.4168,  # Madrid coordinates
-                    longitude=-3.7038
-                )
-                logger.info(f"Fetched {len(weather_data)} weather records")
-            except Exception as e:
-                logger.warning(f"Failed to fetch weather data: {e}. Continuing without weather data.")
-                weather_data = []
-        
-            await self._update_job_status(db, job_id, "running", 25, "Fetching traffic data")
-            try:
-                traffic_data = await self.data_client.fetch_traffic_data(
-                    tenant_id=tenant_id,
-                    start_date=start_date_str,
-                    end_date=end_date_str,
-                    latitude=40.4168,
-                    longitude=-3.7038
-                )
-                logger.info(f"Fetched {len(traffic_data)} traffic records")
-            except Exception as e:
-                logger.warning(f"Failed to fetch traffic data: {e}. Continuing without traffic data.")
-                traffic_data = []
-        
-            # Execute ML training
-            await self._update_job_status(db, job_id, "running", 35, "Processing training data")
-            
-            training_results = await self.ml_trainer.train_tenant_models(
-                tenant_id=tenant_id,
-                sales_data=sales_data,
-                weather_data=weather_data,
-                traffic_data=traffic_data,
+                bakery_location=bakery_location,
+                requested_start=requested_start,
+                requested_end=requested_end,
                job_id=job_id
            )
            
-            await self._update_job_status(db, job_id, "running", 85, "Storing trained models")
-            
-            # Store trained models in database
-            await self._store_trained_models(db, tenant_id, training_results)
-            
-            await self._update_job_status(
-                db, job_id, "completed", 100, "Training completed successfully",
-                results=training_results
+            # Step 2: Execute ML training pipeline
+            logger.info("Step 2: Starting ML training pipeline")
+            training_results = await self.trainer.train_tenant_models(
+                tenant_id=tenant_id,
+                training_dataset=training_dataset,
+                job_id=job_id
            )
            
-            # Publish completion event
-            await publish_job_completed(job_id, tenant_id, training_results)
+            # Step 3: Compile final results
+            final_result = {
+                "job_id": job_id,
+                "tenant_id": tenant_id,
+                "status": "completed",
+                "training_results": training_results,
+                "data_summary": {
+                    "sales_records": len(training_dataset.sales_data),
+                    "weather_records": len(training_dataset.weather_data),
+                    "traffic_records": len(training_dataset.traffic_data),
+                    "date_range": {
+                        "start": training_dataset.date_range.start.isoformat(),
+                        "end": training_dataset.date_range.end.isoformat()
+                    },
+                    "data_sources_used": [source.value for source in training_dataset.date_range.available_sources],
+                    "constraints_applied": training_dataset.date_range.constraints
+                },
+                "completed_at": datetime.now().isoformat()
+            }
            
-            logger.info(f"Training results {training_results}")
            logger.info(f"Training job {job_id} completed successfully")
-            metrics.increment_counter("training_jobs_completed")
+            return final_result
            
        except Exception as e:
            logger.error(f"Training job {job_id} failed: {str(e)}")
-            await self._update_job_status(
-                db, job_id, "failed", 0, f"Training failed: {str(e)}",
-                error_message=str(e)
-            )
-            
-            # Publish failure event
-            await publish_job_failed(job_id, tenant_id, str(e))
-            
-            metrics.increment_counter("training_jobs_failed")
-            raise
+            return {
+                "job_id": job_id,
+                "tenant_id": tenant_id,
+                "status": "failed",
+                "error_message": str(e),
+                "failed_at": datetime.now().isoformat()
+            }
    
-    async def execute_single_product_training(self,
-                                             db: AsyncSession,
-                                             job_id: str,
-                                             tenant_id: str,
-                                             product_name: str,
-                                             request: SingleProductTrainingRequest):
-        """Execute training for a single product"""
+    async def start_single_product_training(
+        self,
+        tenant_id: str,
+        product_name: str,
+        sales_data: List[Dict[str, Any]],
+        bakery_location: tuple[float, float] = (40.4168, -3.7038),
+        weather_data: Optional[List[Dict[str, Any]]] = None,
+        traffic_data: Optional[List[Dict[str, Any]]] = None,
+        job_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Train a model for a single product.
+        
+        Args:
+            tenant_id: Tenant identifier
+            product_name: Product name
+            sales_data: Historical sales data
+            bakery_location: Bakery coordinates
+            weather_data: Optional weather data
+            traffic_data: Optional traffic data
+            job_id: Optional job identifier
+            
+        Returns:
+            Single product training result
+        """
+        if not job_id:
+            job_id = f"single_{tenant_id}_{product_name}_{uuid.uuid4().hex[:8]}"
+        
+        logger.info(f"Starting single product training {job_id} for {product_name}")
+        
        try:
-            logger.info(f"Starting single product training {job_id} for {product_name}")
+            # Filter sales data for the specific product
+            product_sales = [
+                record for record in sales_data 
+                if record.get('product_name') == product_name
+            ]
            
-            # Update job status
-            await self._update_job_status(db, job_id, "running", 10, f"Fetching data for {product_name}")
+            if not product_sales:
+                raise ValueError(f"No sales data found for product: {product_name}")
            
-            # Fetch data
-            sales_data = await self._fetch_product_sales_data(tenant_id, product_name, request)
-            weather_data = []
-            traffic_data = []
-            
-            if request.include_weather:
-                await self._update_job_status(db, job_id, "running", 30, "Fetching weather data")
-                weather_data = await self.data_client.fetch_weather_data(tenant_id, request)
-            
-            if request.include_traffic:
-                await self._update_job_status(db, job_id, "running", 50, "Fetching traffic data")
-                traffic_data = await self.data_client.fetch_traffic_data(tenant_id, request)
-            
-            # Execute training
-            await self._update_job_status(db, job_id, "running", 70, f"Training model for {product_name}")
-            
-            training_result = await self.ml_trainer.train_single_product(
+            # Use the same pipeline but for single product
+            return await self.start_training_job(
                tenant_id=tenant_id,
-                product_name=product_name,
-                sales_data=sales_data,
+                sales_data=product_sales,
+                bakery_location=bakery_location,
                weather_data=weather_data,
                traffic_data=traffic_data,
                job_id=job_id
            )
            
-            # Store model
-            await self._update_job_status(db, job_id, "running", 90, "Storing trained model")
-            await self._store_single_trained_model(db, tenant_id, product_name, training_result)
-            
-            await self._update_job_status(
-                db, job_id, "completed", 100, f"Training completed for {product_name}",
-                results=training_result
-            )
-            
-            logger.info(f"Single product training {job_id} completed successfully")
-            metrics.increment_counter("single_product_training_completed")
-            
        except Exception as e:
            logger.error(f"Single product training {job_id} failed: {str(e)}")
-            await self._update_job_status(
-                db, job_id, "failed", 0, f"Training failed: {str(e)}",
-                error_message=str(e)
-            )
-            metrics.increment_counter("single_product_training_failed")
-            raise
+            return {
+                "job_id": job_id,
+                "tenant_id": tenant_id,
+                "product_name": product_name,
+                "status": "failed",
+                "error_message": str(e),
+                "failed_at": datetime.now().isoformat()
+            }
    
-    async def get_job_status(self,
-                            db: AsyncSession,
-                            job_id: str,
-                            tenant_id: str) -> Optional[ModelTrainingLog]:
-        """Get training job status"""
-        try:
-            result = await db.execute(
-                select(ModelTrainingLog).where(
-                    and_(
-                        ModelTrainingLog.job_id == job_id,
-                        ModelTrainingLog.tenant_id == tenant_id
-                    )
-                )
-            )
-            return result.scalar_one_or_none()
+    async def validate_training_data(
+        self,
+        tenant_id: str,
+        sales_data: List[Dict[str, Any]],
+        products: Optional[List[str]] = None
+    ) -> Dict[str, Any]:
+        """
+        Validate training data quality before starting training.
+        
+        Args:
+            tenant_id: Tenant identifier
+            sales_data: Sales data to validate
+            products: Optional list of specific products to validate
            
-        except Exception as e:
-            logger.error(f"Failed to get job status: {str(e)}")
-            return None
-    
-    async def list_training_jobs(self,
-                                db: AsyncSession,
-                                tenant_id: str,
-                                limit: int = 10,
-                                status_filter: Optional[str] = None) -> List[ModelTrainingLog]:
-        """List training jobs for a tenant"""
-        try:
-            query = select(ModelTrainingLog).where(
-                ModelTrainingLog.tenant_id == tenant_id
-            ).order_by(ModelTrainingLog.start_time.desc()).limit(limit)
-            
-            if status_filter:
-                query = query.where(ModelTrainingLog.status == status_filter)
-            
-            result = await db.execute(query)
-            return result.scalars().all()
-            
-        except Exception as e:
-            logger.error(f"Failed to list training jobs: {str(e)}")
-            return []
-    
-    async def cancel_training_job(self,
-                                 db: AsyncSession,
-                                 job_id: str,
-                                 tenant_id: str) -> bool:
-        """Cancel a training job"""
-        try:
-            result = await db.execute(
-                update(ModelTrainingLog)
-                .where(
-                    and_(
-                        ModelTrainingLog.job_id == job_id,
-                        ModelTrainingLog.tenant_id == tenant_id,
-                        ModelTrainingLog.status.in_(["pending", "running"])
-                    )
-                )
-                .values(
-                    status="cancelled",
-                    end_time=datetime.now(),
-                    current_step="Training cancelled by user"
-                )
-            )
-            
-            await db.commit()
-            
-            if result.rowcount > 0:
-                logger.info(f"Cancelled training job {job_id}")
-                return True
-            else:
-                logger.warning(f"Could not cancel training job {job_id} - not found or not cancellable")
-                return False
-                
-        except Exception as e:
-            logger.error(f"Failed to cancel training job: {str(e)}")
-            await db.rollback()
-            return False
-    
-    async def validate_training_data(self,
-                                    db: AsyncSession,
-                                    tenant_id: str,
-                                    config: Dict[str, Any]) -> Dict[str, Any]:
-        """Validate training data before starting a job"""
+        Returns:
+            Validation results
+        """
        try:
            logger.info(f"Validating training data for tenant {tenant_id}")
            
-            issues = []
-            recommendations = []
-            
-            # Fetch a sample of sales data to validate
-            sales_data = await self._fetch_sales_data(tenant_id, config, limit=1000)
-            
+            # Extract sales date range for validation
            if not sales_data:
-                issues.append("No sales data found for tenant")
                return {
-                    "is_valid": False,
-                    "issues": issues,
-                    "recommendations": ["Upload sales data before training"],
-                    "estimated_time_minutes": 0
+                    "valid": False,
+                    "errors": ["No sales data provided"],
+                    "warnings": []
                }
            
-            # Analyze data quality
-            products = set(item.get("product_name") for item in sales_data)
-            total_records = len(sales_data)
-            
-            # Check for sufficient data per product
-            product_counts = {}
-            for item in sales_data:
-                product = item.get("product_name")
-                if product:
-                    product_counts[product] = product_counts.get(product, 0) + 1
-            
-            insufficient_products = [
-                product for product, count in product_counts.items()
-                if count < config.get("min_data_points", 30)
-            ]
-            
-            if insufficient_products:
-                issues.append(f"Insufficient data for products: {', '.join(insufficient_products)}")
-                recommendations.append("Collect more historical data for these products")
-            
-            # Estimate training time
-            valid_products = len(products) - len(insufficient_products)
-            estimated_time = max(5, valid_products * 2)  # 2 minutes per product minimum
-            
-            is_valid = len(issues) == 0
-            
-            return {
-                "is_valid": is_valid,
-                "issues": issues,
-                "recommendations": recommendations,
-                "estimated_time_minutes": estimated_time,
-                "products_analyzed": len(products),
-                "total_data_points": total_records
-            }
-            
-        except Exception as e:
-            logger.error(f"Failed to validate training data: {str(e)}")
-            return {
-                "is_valid": False,
-                "issues": [f"Validation error: {str(e)}"],
-                "recommendations": ["Check data service connectivity"],
-                "estimated_time_minutes": 0
-            }
-    
-    async def _update_job_status(self,
-                                db: AsyncSession,
-                                job_id: str,
-                                status: str,
-                                progress: int,
-                                current_step: str,
-                                results: Optional[Dict] = None,
-                                error_message: Optional[str] = None):
-        """Update training job status"""
-        try:
-            update_values = {
-                "status": status,
-                "progress": progress,
-                "current_step": current_step
-            }
-            
-            if status == "completed":
-                update_values["end_time"] = datetime.now()
-            
-            if results:
-                update_values["results"] = results
-            
-            if error_message:
-                update_values["error_message"] = error_message
-                update_values["end_time"] = datetime.now()
-            
-            await db.execute(
-                update(ModelTrainingLog)
-                .where(ModelTrainingLog.job_id == job_id)
-                .values(**update_values)
+            # Create a mock training dataset to validate
+            mock_dataset = await self.orchestrator.prepare_training_data(
+                tenant_id=tenant_id,
+                sales_data=sales_data,
+                bakery_location=(40.4168, -3.7038),  # Default Madrid
+                job_id=f"validation_{uuid.uuid4().hex[:8]}"
            )
            
-            await db.commit()
+            # Validate the dataset
+            validation_results = self.orchestrator.validate_training_data_quality(mock_dataset)
+            
+            # Add product-specific information
+            unique_products = list(set(record.get('product_name', 'unknown') for record in sales_data))
+            product_data_points = {}
+            
+            for record in sales_data:
+                product = record.get('product_name', 'unknown')
+                product_data_points[product] = product_data_points.get(product, 0) + 1
+            
+            validation_results.update({
+                "products_found": unique_products,
+                "product_data_points": product_data_points,
+                "total_records": len(sales_data),
+                "date_range_info": {
+                    "start": mock_dataset.date_range.start.isoformat(),
+                    "end": mock_dataset.date_range.end.isoformat(),
+                    "duration_days": (mock_dataset.date_range.end - mock_dataset.date_range.start).days
+                }
+            })
+            
+            return validation_results
            
        except Exception as e:
-            logger.error(f"Failed to update job status: {str(e)}")
-            await db.rollback()
+            logger.error(f"Training data validation failed: {str(e)}")
+            return {
+                "valid": False,
+                "errors": [f"Validation failed: {str(e)}"],
+                "warnings": []
+            }
    
-    async def _store_trained_models(self,
-                                   db: AsyncSession,
-                                   tenant_id: str,
-                                   training_results: Dict[str, Any]):
-        """Store trained models in database"""
+    async def get_training_recommendations(
+        self,
+        tenant_id: str,
+        sales_data: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """
+        Get training recommendations based on data analysis.
+        
+        Args:
+            tenant_id: Tenant identifier
+            sales_data: Historical sales data
+            
+        Returns:
+            Training recommendations
+        """
        try:
-            models_to_store = []
+            logger.info(f"Generating training recommendations for tenant {tenant_id}")
            
-            for product_name, result in training_results.get("training_results", {}).items():
-                if result.get("status") == "success":
-                    model_info = result.get("model_info", {})
-                    
-                    trained_model = TrainedModel(
-                        tenant_id=tenant_id,
-                        product_name=product_name,
-                        model_id=model_info.get("model_id"),
-                        model_type=model_info.get("type", "prophet"),
-                        model_path=model_info.get("model_path"),
-                        version=1,  # Start with version 1
-                        training_samples=model_info.get("training_samples", 0),
-                        features=model_info.get("features", []),
-                        hyperparameters=model_info.get("hyperparameters", {}),
-                        training_metrics=model_info.get("training_metrics", {}),
-                        data_period_start=datetime.fromisoformat(
-                            model_info.get("data_period", {}).get("start_date", datetime.now().isoformat())
-                        ),
-                        data_period_end=datetime.fromisoformat(
-                            model_info.get("data_period", {}).get("end_date", datetime.now().isoformat())
-                        ),
-                        created_at=datetime.now(),
-                        is_active=True
-                    )
-                    
-                    models_to_store.append(trained_model)
+            # Analyze the data
+            validation_results = await self.validate_training_data(tenant_id, sales_data)
            
-            # Deactivate old models for these products
-            if models_to_store:
-                product_names = [model.product_name for model in models_to_store]
-                
-                await db.execute(
-                    update(TrainedModel)
-                    .where(
-                        and_(
-                            TrainedModel.tenant_id == tenant_id,
-                            TrainedModel.product_name.in_(product_names),
-                            TrainedModel.is_active == True
-                        )
-                    )
-                    .values(is_active=False)
-                )
-                
-                # Add new models
-                db.add_all(models_to_store)
-                await db.commit()
-                
-                logger.info(f"Stored {len(models_to_store)} trained models for tenant {tenant_id}")
+            recommendations = {
+                "should_retrain": True,
+                "reasons": [],
+                "recommended_products": [],
+                "optimal_config": {
+                    "include_weather": True,
+                    "include_traffic": True,
+                    "min_data_points": 30,
+                    "hyperparameter_optimization": True
+                }
+            }
+            
+            # Analyze data quality and provide recommendations
+            if validation_results.get("data_quality_score", 0) >= 80:
+                recommendations["reasons"].append("High quality data detected")
+            else:
+                recommendations["reasons"].append("Data quality could be improved")
+            
+            # Recommend products with sufficient data
+            product_data_points = validation_results.get("product_data_points", {})
+            for product, points in product_data_points.items():
+                if points >= 30:  # Minimum viable data points
+                    recommendations["recommended_products"].append(product)
+            
+            if len(recommendations["recommended_products"]) == 0:
+                recommendations["should_retrain"] = False
+                recommendations["reasons"].append("Insufficient data for reliable training")
+            
+            return recommendations
            
        except Exception as e:
-            logger.error(f"Failed to store trained models: {str(e)}")
-            await db.rollback()
-            raise
-    
-    async def _store_single_trained_model(self,
-                                         db: AsyncSession,
-                                         tenant_id: str,
-                                         product_name: str,
-                                         training_result: Dict[str, Any]):
-        """Store a single trained model"""
-        try:
-            if training_result.get("status") == "success":
-                model_info = training_result.get("model_info", {})
-                
-                # Deactivate old model for this product
-                await db.execute(
-                    update(TrainedModel)
-                    .where(
-                        and_(
-                            TrainedModel.tenant_id == tenant_id,
-                            TrainedModel.product_name == product_name,
-                            TrainedModel.is_active == True
-                        )
-                    )
-                    .values(is_active=False)
-                )
-                
-                # Create new model record
-                trained_model = TrainedModel(
-                    tenant_id=tenant_id,
-                    product_name=product_name,
-                    model_id=model_info.get("model_id"),
-                    model_type=model_info.get("type", "prophet"),
-                    model_path=model_info.get("model_path"),
-                    version=1,
-                    training_samples=model_info.get("training_samples", 0),
-                    features=model_info.get("features", []),
-                    hyperparameters=model_info.get("hyperparameters", {}),
-                    training_metrics=model_info.get("training_metrics", {}),
-                    data_period_start=datetime.fromisoformat(
-                        model_info.get("data_period", {}).get("start_date", datetime.now().isoformat())
-                    ),
-                    data_period_end=datetime.fromisoformat(
-                        model_info.get("data_period", {}).get("end_date", datetime.now().isoformat())
-                    ),
-                    created_at=datetime.now(),
-                    is_active=True
-                )
-                
-                db.add(trained_model)
-                await db.commit()
-                
-                logger.info(f"Stored trained model for {product_name}")
-            
-        except Exception as e:
-            logger.error(f"Failed to store trained model: {str(e)}")
-            await db.rollback()
-            raise
-    
-    async def get_training_logs(self,
-                               db: AsyncSession,
-                               job_id: str,
-                               tenant_id: str) -> Optional[List[str]]:
-        """Get detailed training logs for a job"""
-        try:
-            # For now, return basic log information from the database
-            # In a production system, you might store detailed logs separately
-            result = await db.execute(
-                select(ModelTrainingLog).where(
-                    and_(
-                        ModelTrainingLog.job_id == job_id,
-                        ModelTrainingLog.tenant_id == tenant_id
-                    )
-                )
-            )
-            
-            training_log = result.scalar_one_or_none()
-            
-            if training_log:
-                logs = [
-                    f"Job started at: {training_log.start_time}",
-                    f"Current status: {training_log.status}",
-                    f"Progress: {training_log.progress}%",
-                    f"Current step: {training_log.current_step}"
-                ]
-                
-                if training_log.end_time:
-                    logs.append(f"Job completed at: {training_log.end_time}")
-                
-                if training_log.error_message:
-                    logs.append(f"Error: {training_log.error_message}")
-                
-                if training_log.results:
-                    results = training_log.results
-                    logs.append(f"Models trained: {results.get('products_trained', 0)}")
-                    logs.append(f"Models failed: {results.get('products_failed', 0)}")
-                
-                return logs
-            
-            return None
-            
-        except Exception as e:
-            logger.error(f"Failed to get training logs: {str(e)}")
-            return None
-        
-    async def _determine_sales_date_range(self, sales_data: List[Dict]) -> tuple[datetime, datetime]:
-        """Determine start and end dates from sales data"""
-        if not sales_data:
-            raise ValueError("No sales data available to determine date range")
-        
-        dates = []
-        for record in sales_data:
-            if 'date' in record:
-                if isinstance(record['date'], str):
-                    dates.append(datetime.fromisoformat(record['date'].replace('Z', '+00:00')))
-                elif isinstance(record['date'], datetime):
-                    dates.append(record['date'])
-        
-        if not dates:
-            raise ValueError("No valid dates found in sales data")
-        
-        start_date = min(dates)
-        end_date = max(dates)
-        
-        logger.info(f"Determined sales date range: {start_date} to {end_date}")
-        return start_date, end_date
+            logger.error(f"Failed to generate training recommendations: {str(e)}")
+            return {
+                "should_retrain": False,
+                "reasons": [f"Error analyzing data: {str(e)}"],
+                "recommended_products": [],
+                "optimal_config": {}
+            }