Fix new services implementation 5

2025-08-15 17:53:59 +02:00
parent 03b4d4185d
commit f7de9115d1
43 changed files with 1714 additions and 891 deletions
--- a/services/external/app/external/apis/madrid_traffic_client.py
+++ b/services/external/app/external/apis/madrid_traffic_client.py
@@ -236,11 +236,19 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
        try:
            # Process by year and month to avoid memory issues
            current_date = start_date.replace(day=1)  # Start from beginning of month
+            now = datetime.now()
            
            while current_date <= end_date:
                year = current_date.year
                month = current_date.month
                
+                # Skip current month and future months (no historical data available yet)
+                if (year == now.year and month >= now.month) or year > now.year:
+                    self.logger.info("Skipping current/future month - no historical data available", 
+                                   year=year, month=month)
+                    current_date = self._next_month(current_date)
+                    continue
+                
                # Build historical URL
                zip_url = self.api_client._build_historical_url(year, month)
                
@@ -251,7 +259,7 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
                zip_content = await self.api_client.fetch_historical_zip(zip_url)
                if not zip_content:
                    self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
-                    current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
+                    current_date = self._next_month(current_date)
                    continue
                
                # Process ZIP content with enhanced parsing
@@ -286,11 +294,8 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
                               filtered_records=len(filtered_records),
                               total_records=len(historical_records))
                
-                # Move to next month
-                if current_date.month == 12:
-                    current_date = current_date.replace(year=current_date.year + 1, month=1)
-                else:
-                    current_date = current_date.replace(month=current_date.month + 1)
+                # Move to next month - extracted to helper method
+                current_date = self._next_month(current_date)
            
            return historical_records
            
@@ -347,4 +352,10 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
                            zip_url=zip_url, error=str(e))
            return []
    
+    def _next_month(self, current_date: datetime) -> datetime:
+        """Helper method to move to next month"""
+        if current_date.month == 12:
+            return current_date.replace(year=current_date.year + 1, month=1)
+        else:
+            return current_date.replace(month=current_date.month + 1)
    
--- a/services/external/app/external/clients/madrid_client.py
+++ b/services/external/app/external/clients/madrid_client.py
@@ -42,22 +42,9 @@ class MadridTrafficAPIClient(BaseAPIClient):

    def _build_historical_url(self, year: int, month: int) -> str:
        """Build historical ZIP URL for given year and month"""
-        # Madrid historical data URL pattern
-        base_url = "https://datos.madrid.es/egob/catalogo/208627"
-        
-        # URL numbering pattern (this may need adjustment based on actual URLs)
-        # Note: Historical data is only available for past periods, not current/future
-        if year == 2023:
-            url_number = 116 + (month - 1)  # 116-127 for 2023
-        elif year == 2024:
-            url_number = 128 + (month - 1)  # 128-139 for 2024
-        elif year == 2025:
-            # For 2025, use the continuing numbering from 2024
-            url_number = 140 + (month - 1)  # Starting from 140 for January 2025
-        else:
-            url_number = 116  # Fallback to 2023 data
-        
-        return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
+        # Madrid uses a direct file pattern now: https://datos.madrid.es/egobfiles/MANUAL/208627/MM-YYYY.zip
+        # Only historical data is available (not current month)
+        return f"https://datos.madrid.es/egobfiles/MANUAL/208627/{month:02d}-{year}.zip"

    async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
        """Fetch current traffic XML data"""
--- a/services/external/app/repositories/traffic_repository.py
+++ b/services/external/app/repositories/traffic_repository.py
@@ -84,18 +84,22 @@ class TrafficRepository:
            if not traffic_data_list:
                return 0

-            # Check for existing records to avoid duplicates
+            # Check for existing records to avoid duplicates - batch the queries to avoid parameter limit
            dates = [data.get('date') for data in traffic_data_list if data.get('date')]
            existing_dates = set()
            if dates:
-                existing_stmt = select(TrafficData.date).where(
-                    and_(
-                        TrafficData.location_id == location_id,
-                        TrafficData.date.in_(dates)
+                # PostgreSQL has a limit of 32767 parameters, so batch the queries
+                batch_size = 30000  # Safe batch size under the limit
+                for i in range(0, len(dates), batch_size):
+                    date_batch = dates[i:i + batch_size]
+                    existing_stmt = select(TrafficData.date).where(
+                        and_(
+                            TrafficData.location_id == location_id,
+                            TrafficData.date.in_(date_batch)
+                        )
                    )
-                )
-                result = await self.session.execute(existing_stmt)
-                existing_dates = {row[0] for row in result.fetchall()}
+                    result = await self.session.execute(existing_stmt)
+                    existing_dates.update({row[0] for row in result.fetchall()})
                logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")

            batch_records = []
--- a/services/forecasting/app/services/forecasting_service.py
+++ b/services/forecasting/app/services/forecasting_service.py
@@ -420,20 +420,20 @@ class EnhancedForecastingService:
            if prediction['prediction'] > 100:  # Threshold for high demand
                alerts_to_create.append({
                    "tenant_id": str(forecast.tenant_id),
-                    "forecast_id": forecast.id,
+                    "forecast_id": str(forecast.id),  # Convert UUID to string
                    "alert_type": "high_demand",
                    "severity": "high" if prediction['prediction'] > 200 else "medium",
-                    "message": f"High demand predicted for inventory product {forecast.inventory_product_id}: {prediction['prediction']:.1f} units"
+                    "message": f"High demand predicted for inventory product {str(forecast.inventory_product_id)}: {prediction['prediction']:.1f} units"
                })
            
            # Check for low demand alert
            elif prediction['prediction'] < 10:  # Threshold for low demand
                alerts_to_create.append({
                    "tenant_id": str(forecast.tenant_id),
-                    "forecast_id": forecast.id,
+                    "forecast_id": str(forecast.id),  # Convert UUID to string
                    "alert_type": "low_demand",
                    "severity": "low",
-                    "message": f"Low demand predicted for inventory product {forecast.inventory_product_id}: {prediction['prediction']:.1f} units"
+                    "message": f"Low demand predicted for inventory product {str(forecast.inventory_product_id)}: {prediction['prediction']:.1f} units"
                })
            
            # Check for stockout risk (very low prediction with narrow confidence interval)
@@ -441,10 +441,10 @@ class EnhancedForecastingService:
            if prediction['prediction'] < 5 and confidence_interval < 10:
                alerts_to_create.append({
                    "tenant_id": str(forecast.tenant_id),
-                    "forecast_id": forecast.id,
+                    "forecast_id": str(forecast.id),  # Convert UUID to string
                    "alert_type": "stockout_risk",
                    "severity": "critical",
-                    "message": f"Stockout risk for inventory product {forecast.inventory_product_id}: predicted {prediction['prediction']:.1f} units with high confidence"
+                    "message": f"Stockout risk for inventory product {str(forecast.inventory_product_id)}: predicted {prediction['prediction']:.1f} units with high confidence"
                })
            
            # Create alerts
@@ -462,7 +462,7 @@ class EnhancedForecastingService:
        return ForecastResponse(
            id=str(cache_entry.id),
            tenant_id=str(cache_entry.tenant_id),
-            inventory_product_id=cache_entry.inventory_product_id,
+            inventory_product_id=str(cache_entry.inventory_product_id),  # Convert UUID to string
            location=cache_entry.location,
            forecast_date=cache_entry.forecast_date,
            predicted_demand=cache_entry.predicted_demand,
@@ -486,7 +486,7 @@ class EnhancedForecastingService:
        return ForecastResponse(
            id=str(forecast.id),
            tenant_id=str(forecast.tenant_id),
-            inventory_product_id=forecast.inventory_product_id,
+            inventory_product_id=str(forecast.inventory_product_id),  # Convert UUID to string
            location=forecast.location,
            forecast_date=forecast.forecast_date,
            predicted_demand=forecast.predicted_demand,
@@ -514,7 +514,7 @@ class EnhancedForecastingService:
        return {
            "id": str(forecast.id),
            "tenant_id": str(forecast.tenant_id),
-            "inventory_product_id": forecast.inventory_product_id,
+            "inventory_product_id": str(forecast.inventory_product_id),  # Convert UUID to string
            "location": forecast.location,
            "forecast_date": forecast.forecast_date.isoformat(),
            "predicted_demand": forecast.predicted_demand,
--- a/services/inventory/app/repositories/ingredient_repository.py
+++ b/services/inventory/app/repositories/ingredient_repository.py
@@ -90,6 +90,13 @@ class IngredientRepository(BaseRepository[Ingredient, IngredientCreate, Ingredie
    ) -> List[Ingredient]:
        """Get ingredients for a tenant with filtering"""
        try:
+            # Handle search filter separately since it requires special query logic
+            if filters and filters.get('search'):
+                search_term = filters['search']
+                logger.info(f"Searching ingredients with term: '{search_term}'", tenant_id=tenant_id)
+                return await self.search_ingredients(tenant_id, search_term, skip, limit)
+            
+            # Handle other filters with standard multi-get
            query_filters = {'tenant_id': tenant_id}
            if filters:
                if filters.get('category'):
--- a/services/sales/app/api/sales.py
+++ b/services/sales/app/api/sales.py
@@ -26,28 +26,6 @@ def get_sales_service():
    """Dependency injection for SalesService"""
    return SalesService()

-@router.get("/tenants/{tenant_id}/sales/products")
-async def get_products_list(
-    tenant_id: UUID = Path(..., description="Tenant ID"),
-    current_user: Dict[str, Any] = Depends(get_current_user_dep),
-    sales_service: SalesService = Depends(get_sales_service)
-):
-    """Get list of products using repository pattern"""
-    try:
-        logger.debug("Getting products list with repository pattern", tenant_id=tenant_id)
-        
-        products = await sales_service.get_products_list(str(tenant_id))
-        
-        logger.debug("Products list retrieved using repository", 
-                    count=len(products),
-                    tenant_id=tenant_id)
-        return products
-        
-    except Exception as e:
-        logger.error("Failed to get products list", 
-                    error=str(e),
-                    tenant_id=tenant_id)
-        raise HTTPException(status_code=500, detail=f"Failed to get products list: {str(e)}")

@router.post("/tenants/{tenant_id}/sales", response_model=SalesDataResponse)
 async def create_sales_record(
--- a/services/sales/app/repositories/sales_repository.py
+++ b/services/sales/app/repositories/sales_repository.py
@@ -97,8 +97,9 @@ class SalesRepository(BaseRepository[SalesData, SalesDataCreate, SalesDataUpdate
                # Apply pagination
                stmt = stmt.offset(query_params.offset).limit(query_params.limit)
            else:
-                # Default ordering
-                stmt = stmt.order_by(desc(SalesData.date)).limit(50)
+                # Default ordering with safety limit for direct repository calls
+                # Note: API calls always provide query_params, so this only applies to direct usage
+                stmt = stmt.order_by(desc(SalesData.date)).limit(10000)
            
            result = await self.session.execute(stmt)
            records = result.scalars().all()
@@ -279,24 +280,3 @@ class SalesRepository(BaseRepository[SalesData, SalesDataCreate, SalesDataUpdate
            logger.error("Failed to validate sales record", error=str(e), record_id=record_id)
            raise
        
-    async def get_product_statistics(self, tenant_id: str) -> List[Dict[str, Any]]:
-        """Get product statistics for tenant"""
-        try:
-            # Note: product_name field was removed - product info now managed via inventory service
-            # This method should be updated to query products from inventory service
-            # For now, return inventory_product_ids to avoid breaking existing code
-            stmt = select(SalesData.inventory_product_id).where(
-                and_(
-                    SalesData.tenant_id == tenant_id,
-                    SalesData.inventory_product_id.is_not(None)
-                )
-            ).distinct()
-            
-            result = await self.session.execute(stmt)
-            products = [str(row[0]) for row in result if row[0]]
-            
-            return sorted(products)
-            
-        except Exception as e:
-            logger.error("Failed to get product categories", error=str(e), tenant_id=tenant_id)
-            raise
--- a/services/sales/app/services/ai_onboarding_service.py
+++ b/services/sales/app/services/ai_onboarding_service.py
@@ -286,14 +286,67 @@ class AIOnboardingService:
                )
                suggestions.append(suggestion)
            
-            business_model = BusinessModelAnalysis(
-                model=business_model_raw.get("model", "unknown"),
-                confidence=business_model_raw.get("confidence", 0.0),
-                ingredient_count=business_model_raw.get("ingredient_count", 0),
-                finished_product_count=business_model_raw.get("finished_product_count", 0),
-                ingredient_ratio=business_model_raw.get("ingredient_ratio", 0.0),
-                recommendations=business_model_raw.get("recommendations", [])
-            )
+            # Check if enhanced business intelligence data is available
+            bi_data = product_analysis.get('__business_intelligence__')
+            
+            if bi_data and bi_data.get('confidence_score', 0) > 0.6:
+                # Use enhanced business intelligence analysis
+                business_type = bi_data.get('business_type', 'bakery')
+                business_model_detected = bi_data.get('business_model', 'individual')
+                
+                # Map business intelligence results to existing model format
+                model_mapping = {
+                    'individual': 'individual_bakery',
+                    'central_distribution': 'central_baker_satellite',
+                    'central_bakery': 'central_baker_satellite',
+                    'hybrid': 'hybrid_bakery'
+                }
+                
+                mapped_model = model_mapping.get(business_model_detected, 'individual_bakery')
+                
+                # Count ingredients vs finished products from suggestions
+                ingredient_count = sum(1 for s in suggestions if s.product_type == 'ingredient')
+                finished_product_count = sum(1 for s in suggestions if s.product_type == 'finished_product')
+                total_products = len(suggestions)
+                ingredient_ratio = ingredient_count / total_products if total_products > 0 else 0.0
+                
+                # Enhanced recommendations based on BI analysis
+                enhanced_recommendations = bi_data.get('recommendations', [])
+                
+                # Add business type specific recommendations
+                if business_type == 'coffee_shop':
+                    enhanced_recommendations.extend([
+                        "Configure beverage inventory management",
+                        "Set up quick-service item tracking",
+                        "Enable all-day service optimization"
+                    ])
+                
+                business_model = BusinessModelAnalysis(
+                    model=mapped_model,
+                    confidence=bi_data.get('confidence_score', 0.0),
+                    ingredient_count=ingredient_count,
+                    finished_product_count=finished_product_count,
+                    ingredient_ratio=ingredient_ratio,
+                    recommendations=enhanced_recommendations[:6]  # Limit to top 6 recommendations
+                )
+                
+                logger.info("Using enhanced business intelligence for model analysis",
+                           detected_type=business_type,
+                           detected_model=business_model_detected,
+                           mapped_model=mapped_model,
+                           confidence=bi_data.get('confidence_score'))
+            else:
+                # Fallback to basic inventory service analysis
+                business_model = BusinessModelAnalysis(
+                    model=business_model_raw.get("model", "unknown"),
+                    confidence=business_model_raw.get("confidence", 0.0),
+                    ingredient_count=business_model_raw.get("ingredient_count", 0),
+                    finished_product_count=business_model_raw.get("finished_product_count", 0),
+                    ingredient_ratio=business_model_raw.get("ingredient_ratio", 0.0),
+                    recommendations=business_model_raw.get("recommendations", [])
+                )
+                
+                logger.info("Using basic inventory service business model analysis")
            
            # Calculate confidence metrics
            high_confidence_count = sum(1 for s in suggestions if s.confidence_score >= 0.7)
@@ -674,6 +727,85 @@ class AIOnboardingService:
                    "avg_unit_price": avg_unit_price
                }
            
+            # Add enhanced business intelligence analysis
+            try:
+                from app.services.business_intelligence_service import BusinessIntelligenceService
+                
+                bi_service = BusinessIntelligenceService()
+                
+                # Convert parsed data to format expected by BI service
+                sales_data = []
+                product_data = []
+                
+                for row in rows:
+                    # Create sales record from CSV row
+                    sales_record = {
+                        'date': row.get(date_column, ''),
+                        'product_name': row.get(product_column, ''),
+                        'name': row.get(product_column, ''),
+                        'quantity_sold': 0,
+                        'revenue': 0,
+                        'location_id': row.get('location', 'main'),
+                        'sales_channel': row.get('channel', 'in_store'),
+                        'supplier_name': row.get('supplier', ''),
+                        'brand': row.get('brand', '')
+                    }
+                    
+                    # Parse quantity
+                    if quantity_column:
+                        try:
+                            qty_raw = row.get(quantity_column, 1)
+                            if qty_raw and str(qty_raw).strip():
+                                sales_record['quantity_sold'] = int(float(str(qty_raw).replace(',', '.')))
+                        except:
+                            sales_record['quantity_sold'] = 1
+                    
+                    # Parse revenue
+                    if revenue_column:
+                        try:
+                            rev_raw = row.get(revenue_column)
+                            if rev_raw and str(rev_raw).strip():
+                                sales_record['revenue'] = float(str(rev_raw).replace(',', '.').replace('€', '').replace('$', '').strip())
+                        except:
+                            pass
+                    
+                    sales_data.append(sales_record)
+                    
+                    # Create product data entry
+                    product_data.append({
+                        'name': sales_record['product_name'],
+                        'supplier_name': sales_record.get('supplier_name', ''),
+                        'brand': sales_record.get('brand', '')
+                    })
+                
+                # Run business intelligence analysis
+                if sales_data:
+                    detection_result = await bi_service.analyze_business_from_sales_data(
+                        sales_data=sales_data,
+                        product_data=product_data
+                    )
+                    
+                    # Store business intelligence results in product_analysis
+                    product_analysis['__business_intelligence__'] = {
+                        "business_type": detection_result.business_type,
+                        "business_model": detection_result.business_model,
+                        "confidence_score": detection_result.confidence_score,
+                        "indicators": detection_result.indicators,
+                        "recommendations": detection_result.recommendations,
+                        "analysis_summary": f"{detection_result.business_type.title()} - {detection_result.business_model.replace('_', ' ').title()}"
+                    }
+                    
+                    logger.info("Enhanced business intelligence analysis completed",
+                               business_type=detection_result.business_type,
+                               business_model=detection_result.business_model,
+                               confidence=detection_result.confidence_score)
+                else:
+                    logger.warning("No sales data available for business intelligence analysis")
+                    
+            except Exception as bi_error:
+                logger.warning("Business intelligence analysis failed", error=str(bi_error))
+                # Continue with basic analysis even if BI fails
+            
            return product_analysis
            
        except Exception as e:
--- a/services/sales/app/services/data_import_service.py
+++ b/services/sales/app/services/data_import_service.py
@@ -428,7 +428,7 @@ class DataImportService:
        repository: SalesRepository, 
        filename: Optional[str] = None
    ) -> Dict[str, Any]:
-        """Enhanced CSV processing with better data handling"""
+        """Enhanced CSV processing with batch product resolution for better reliability"""
        try:
            reader = csv.DictReader(io.StringIO(csv_content))
            rows = list(reader)
@@ -445,22 +445,41 @@ class DataImportService:
            # Enhanced column mapping
            column_mapping = self._detect_columns(list(rows[0].keys()))
            
-            records_created = 0
-            errors = []
-            warnings = []
+            # Pre-process to extract unique products for batch creation
+            unique_products = set()
+            parsed_rows = []
            
-            logger.info(f"Processing {len(rows)} records from CSV with enhanced mapping")
+            logger.info(f"Pre-processing {len(rows)} records to identify unique products")
            
            for index, row in enumerate(rows):
                try:
                    # Enhanced data parsing and validation
                    parsed_data = await self._parse_row_data(row, column_mapping, index + 1)
-                    if parsed_data.get("skip"):
-                        errors.extend(parsed_data.get("errors", []))
-                        warnings.extend(parsed_data.get("warnings", []))
-                        continue
-                    
-                    # Resolve product name to inventory_product_id
+                    if not parsed_data.get("skip"):
+                        unique_products.add((
+                            parsed_data["product_name"],
+                            parsed_data.get("product_category", "general")
+                        ))
+                        parsed_rows.append((index, parsed_data))
+                except Exception as e:
+                    logger.warning(f"Failed to parse row {index + 1}: {e}")
+                    continue
+            
+            logger.info(f"Found {len(unique_products)} unique products, attempting batch resolution")
+            
+            # Try to resolve/create all unique products in batch
+            await self._batch_resolve_products(unique_products, tenant_id)
+            
+            # Now process the actual sales records
+            records_created = 0
+            errors = []
+            warnings = []
+            
+            logger.info(f"Processing {len(parsed_rows)} validated records for sales creation")
+            
+            for index, parsed_data in parsed_rows:
+                try:
+                    # Resolve product name to inventory_product_id (should be cached now)
                    inventory_product_id = await self._resolve_product_to_inventory_id(
                        parsed_data["product_name"], 
                        parsed_data.get("product_category"),
@@ -914,47 +933,57 @@ class DataImportService:
        logger.info("Import cache cleared for new session")
    
    async def _resolve_product_to_inventory_id(self, product_name: str, product_category: Optional[str], tenant_id: UUID) -> Optional[UUID]:
-        """Resolve a product name to an inventory_product_id via the inventory service with caching and rate limiting"""
+        """Resolve a product name to an inventory_product_id via the inventory service with improved error handling and fallback"""
        
        # Check cache first
        if product_name in self.product_cache:
            logger.debug("Product resolved from cache", product_name=product_name, tenant_id=tenant_id)
            return self.product_cache[product_name]
        
-        # Skip if this product already failed to resolve
+        # Skip if this product already failed to resolve after all attempts
        if product_name in self.failed_products:
            logger.debug("Skipping previously failed product", product_name=product_name, tenant_id=tenant_id)
            return None
        
-        max_retries = 3
-        base_delay = 1.0  # Start with 1 second delay
+        max_retries = 5  # Increased retries
+        base_delay = 2.0  # Increased base delay
+        fallback_retry_delay = 10.0  # Longer delay for fallback attempts
        
        for attempt in range(max_retries):
            try:
-                # Add delay before API calls to avoid rate limiting
+                # Add progressive delay to avoid rate limiting
                if attempt > 0:
-                    delay = base_delay * (2 ** (attempt - 1))  # Exponential backoff
+                    # Use longer delays for later attempts
+                    if attempt >= 3:
+                        delay = fallback_retry_delay  # Use fallback delay for later attempts
+                    else:
+                        delay = base_delay * (2 ** (attempt - 1))  # Exponential backoff
+                    
                    logger.info(f"Retrying product resolution after {delay}s delay", 
                              product_name=product_name, attempt=attempt, tenant_id=tenant_id)
                    await asyncio.sleep(delay)
                
                # First try to search for existing product by name
-                products = await self.inventory_client.search_products(product_name, tenant_id)
+                try:
+                    products = await self.inventory_client.search_products(product_name, tenant_id)
+                    
+                    if products:
+                        # Return the first matching product's ID
+                        product_id = products[0].get('id')
+                        if product_id:
+                            uuid_id = UUID(str(product_id))
+                            self.product_cache[product_name] = uuid_id  # Cache for future use
+                            logger.info("Resolved product to existing inventory ID", 
+                                      product_name=product_name, product_id=product_id, tenant_id=tenant_id)
+                            return uuid_id
+                except Exception as search_error:
+                    logger.warning("Product search failed, trying direct creation", 
+                                 product_name=product_name, error=str(search_error), tenant_id=tenant_id)
                
-                if products:
-                    # Return the first matching product's ID
-                    product_id = products[0].get('id')
-                    if product_id:
-                        uuid_id = UUID(str(product_id))
-                        self.product_cache[product_name] = uuid_id  # Cache for future use
-                        logger.info("Resolved product to existing inventory ID", 
-                                  product_name=product_name, product_id=product_id, tenant_id=tenant_id)
-                        return uuid_id
+                # Add delay before creation attempt to avoid hitting rate limits
+                await asyncio.sleep(1.0)
                
-                # Add small delay before creation attempt to avoid hitting rate limits
-                await asyncio.sleep(0.5)
-                
-                # If not found, create a new ingredient/product in inventory
+                # If not found or search failed, create a new ingredient/product in inventory
                ingredient_data = {
                    'name': product_name,
                    'type': 'finished_product',  # Assuming sales are of finished products
@@ -965,36 +994,133 @@ class DataImportService:
                    'category': product_category or 'general'
                }
                
-                created_product = await self.inventory_client.create_ingredient(ingredient_data, str(tenant_id))
-                if created_product and created_product.get('id'):
-                    product_id = created_product['id']
-                    uuid_id = UUID(str(product_id))
-                    self.product_cache[product_name] = uuid_id  # Cache for future use
-                    logger.info("Created new inventory product for sales data", 
-                              product_name=product_name, product_id=product_id, tenant_id=tenant_id)
-                    return uuid_id
+                try:
+                    created_product = await self.inventory_client.create_ingredient(ingredient_data, str(tenant_id))
+                    if created_product and created_product.get('id'):
+                        product_id = created_product['id']
+                        uuid_id = UUID(str(product_id))
+                        self.product_cache[product_name] = uuid_id  # Cache for future use
+                        logger.info("Created new inventory product for sales data", 
+                                  product_name=product_name, product_id=product_id, tenant_id=tenant_id)
+                        return uuid_id
+                except Exception as creation_error:
+                    logger.warning("Product creation failed", 
+                                 product_name=product_name, error=str(creation_error), tenant_id=tenant_id)
                
                logger.warning("Failed to resolve or create product in inventory", 
                             product_name=product_name, tenant_id=tenant_id, attempt=attempt)
                             
            except Exception as e:
                error_str = str(e)
-                if "429" in error_str or "rate limit" in error_str.lower():
-                    logger.warning("Rate limit hit, retrying", 
+                if "429" in error_str or "rate limit" in error_str.lower() or "too many requests" in error_str.lower():
+                    logger.warning("Rate limit or service overload detected, retrying with longer delay", 
                                 product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
                    if attempt < max_retries - 1:
                        continue  # Retry with exponential backoff
+                elif "503" in error_str or "502" in error_str or "service unavailable" in error_str.lower():
+                    logger.warning("Service unavailable, retrying with backoff", 
+                                 product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
+                    if attempt < max_retries - 1:
+                        continue  # Retry for service unavailable errors
+                elif "timeout" in error_str.lower() or "connection" in error_str.lower():
+                    logger.warning("Network issue detected, retrying", 
+                                 product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
+                    if attempt < max_retries - 1:
+                        continue  # Retry for network issues
                else:
-                    logger.error("Error resolving product to inventory ID", 
+                    logger.error("Non-retryable error resolving product to inventory ID", 
                               error=error_str, product_name=product_name, tenant_id=tenant_id)
-                    break  # Don't retry for non-rate-limit errors
+                    if attempt < max_retries - 1:
+                        # Still retry even for other errors, in case it's transient
+                        continue
+                    else:
+                        break  # Don't retry on final attempt
        
-        # If all retries failed, mark as failed and return None
+        # If all retries failed, log detailed error but don't mark as permanently failed yet
+        # Instead, we'll implement a fallback mechanism
+        logger.error("Failed to resolve product after all retries, attempting fallback", 
+                   product_name=product_name, tenant_id=tenant_id)
+        
+        # FALLBACK: Try to create a temporary product with minimal data
+        try:
+            # Use a simplified approach with minimal data
+            fallback_data = {
+                'name': product_name,
+                'type': 'finished_product',
+                'unit': 'unit',
+                'current_stock': 0,
+                'cost_per_unit': 0
+            }
+            
+            logger.info("Attempting fallback product creation with minimal data", 
+                      product_name=product_name, tenant_id=tenant_id)
+            
+            created_product = await self.inventory_client.create_ingredient(fallback_data, str(tenant_id))
+            if created_product and created_product.get('id'):
+                product_id = created_product['id']
+                uuid_id = UUID(str(product_id))
+                self.product_cache[product_name] = uuid_id
+                logger.info("SUCCESS: Fallback product creation succeeded", 
+                          product_name=product_name, product_id=product_id, tenant_id=tenant_id)
+                return uuid_id
+        except Exception as fallback_error:
+            logger.error("Fallback product creation also failed", 
+                       product_name=product_name, error=str(fallback_error), tenant_id=tenant_id)
+        
+        # Only mark as permanently failed after all attempts including fallback
        self.failed_products.add(product_name)
-        logger.error("Failed to resolve product after all retries", 
+        logger.error("CRITICAL: Permanently failed to resolve product - this will result in missing training data", 
                   product_name=product_name, tenant_id=tenant_id)
        return None
    
+    async def _batch_resolve_products(self, unique_products: set, tenant_id: str) -> None:
+        """Batch resolve/create products to reduce API calls and improve success rate"""
+        
+        if not unique_products:
+            return
+        
+        logger.info(f"Starting batch product resolution for {len(unique_products)} unique products")
+        
+        # Convert set to list for easier handling
+        products_list = list(unique_products)
+        batch_size = 5  # Process in smaller batches to avoid overwhelming the inventory service
+        
+        for i in range(0, len(products_list), batch_size):
+            batch = products_list[i:i + batch_size]
+            logger.info(f"Processing batch {i//batch_size + 1}/{(len(products_list) + batch_size - 1)//batch_size}")
+            
+            # Process each product in the batch with retry logic
+            for product_name, product_category in batch:
+                try:
+                    # Skip if already in cache or failed list
+                    if product_name in self.product_cache or product_name in self.failed_products:
+                        continue
+                    
+                    # Try to resolve the product
+                    await self._resolve_product_to_inventory_id(product_name, product_category, tenant_id)
+                    
+                    # Add small delay between products to be gentle on the API
+                    await asyncio.sleep(0.5)
+                    
+                except Exception as e:
+                    logger.warning(f"Failed to batch process product {product_name}: {e}")
+                    continue
+            
+            # Add delay between batches
+            if i + batch_size < len(products_list):
+                logger.info("Waiting between batches to avoid rate limiting...")
+                await asyncio.sleep(2.0)
+        
+        successful_resolutions = len([p for p, _ in products_list if p in self.product_cache])
+        failed_resolutions = len([p for p, _ in products_list if p in self.failed_products])
+        
+        logger.info(f"Batch product resolution completed: {successful_resolutions} successful, {failed_resolutions} failed")
+        
+        if failed_resolutions > 0:
+            logger.warning(f"ATTENTION: {failed_resolutions} products failed to resolve - these will be missing from training data")
+            
+        return
+    
    def _structure_messages(self, messages: List[Union[str, Dict]]) -> List[Dict[str, Any]]:
        """Convert string messages to structured format"""
        structured = []
--- a/services/sales/app/services/sales_service.py
+++ b/services/sales/app/services/sales_service.py
@@ -285,26 +285,6 @@ class SalesService:
            # Don't fail the main operation for auxiliary actions
            logger.warning("Failed to execute post-create actions", error=str(e), record_id=record.id)
            
-    async def get_products_list(self, tenant_id: str) -> List[Dict[str, Any]]:
-        """Get list of all products with sales data for tenant using repository pattern"""
-        try:
-            async with get_db_transaction() as db:
-                repository = SalesRepository(db)
-                
-                # Use repository method for product statistics
-                products = await repository.get_product_statistics(tenant_id)
-                
-                logger.debug("Products list retrieved successfully", 
-                            tenant_id=tenant_id, 
-                            product_count=len(products))
-                
-                return products
-                
-        except Exception as e:
-            logger.error("Failed to get products list", 
-                        error=str(e), 
-                        tenant_id=tenant_id)
-            raise DatabaseError(f"Failed to get products list: {str(e)}")
    
    # New inventory integration methods
    async def search_inventory_products(self, search_term: str, tenant_id: UUID, 
--- a/services/training/app/api/training.py
+++ b/services/training/app/api/training.py
@@ -186,6 +186,15 @@ async def execute_enhanced_training_job_background(
    enhanced_training_service = EnhancedTrainingService(database_manager)

    try:
+        # Create initial training log entry first
+        await enhanced_training_service._update_job_status_repository(
+            job_id=job_id,
+            status="pending",
+            progress=0,
+            current_step="Starting enhanced training job",
+            tenant_id=tenant_id
+        )
+        
        # Publish job started event
        await publish_job_started(job_id, tenant_id, {
            "enhanced_features": True,
@@ -214,7 +223,8 @@ async def execute_enhanced_training_job_background(
            job_id=job_id,
            status="running",
            progress=0,
-            current_step="Initializing enhanced training pipeline"
+            current_step="Initializing enhanced training pipeline",
+            tenant_id=tenant_id
        )
        
        # Execute the enhanced training pipeline with repository pattern
@@ -232,7 +242,8 @@ async def execute_enhanced_training_job_background(
            status="completed",
            progress=100,
            current_step="Enhanced training completed successfully",
-            results=result
+            results=result,
+            tenant_id=tenant_id
        )
        
        # Publish enhanced completion event
@@ -262,7 +273,8 @@ async def execute_enhanced_training_job_background(
                status="failed",
                progress=0,
                current_step="Enhanced training failed",
-                error_message=str(training_error)
+                error_message=str(training_error),
+                tenant_id=tenant_id
            )
        except Exception as status_error:
            logger.error("Failed to update job status after training error",
--- a/services/training/app/ml/trainer.py
+++ b/services/training/app/ml/trainer.py
@@ -92,9 +92,27 @@ class EnhancedBakeryMLTrainer:
                
                # Get unique products from the sales data
                products = sales_df['inventory_product_id'].unique().tolist()
-                logger.info("Training enhanced models",
+                
+                # Debug: Log sales data details to understand why only one product is found
+                total_sales_records = len(sales_df)
+                sales_by_product = sales_df.groupby('inventory_product_id').size().to_dict()
+                
+                logger.info("Enhanced training pipeline - Sales data analysis",
+                           total_sales_records=total_sales_records,
                           products_count=len(products),
-                           products=products)
+                           products=products,
+                           sales_by_product=sales_by_product)
+                
+                if len(products) == 1:
+                    logger.warning("Only ONE product found in sales data - this may indicate a data fetching issue",
+                                 tenant_id=tenant_id,
+                                 single_product_id=products[0],
+                                 total_sales_records=total_sales_records)
+                elif len(products) == 0:
+                    raise ValueError("No products found in sales data")
+                else:
+                    logger.info("Multiple products detected for training",
+                               products_count=len(products))
                
                self.status_publisher.products_total = len(products)
                
@@ -512,7 +530,7 @@ class EnhancedBakeryMLTrainer:
                       from_column='quantity_sold',
                       to_column='quantity')
        
-        required_columns = ['date', 'product_name', 'quantity']
+        required_columns = ['date', 'inventory_product_id', 'quantity']
        missing_columns = [col for col in required_columns if col not in sales_df.columns]
        if missing_columns:
            raise ValueError(f"Missing required columns: {missing_columns}")
@@ -541,7 +559,7 @@ class EnhancedBakeryMLTrainer:
        try:
            logger.info("Enhanced model evaluation starting",
                       tenant_id=tenant_id,
-                       product_name=product_name)
+                       inventory_product_id=inventory_product_id)
            
            # Get database session and repositories
            async with self.database_manager.get_session() as db_session:
--- a/services/training/app/services/training_orchestrator.py
+++ b/services/training/app/services/training_orchestrator.py
@@ -574,13 +574,14 @@ class TrainingDataOrchestrator:
            if city_count >= 1:  # At least some city awareness
                city_aware_records += 1
            
-            # Record is valid if it has basic requirements
-            if record_score >= 2:
+            # Record is valid if it has basic requirements (date + any traffic field)
+            # Lowered requirement from >= 2 to >= 1 to accept records with just date or traffic data
+            if record_score >= 1:
                valid_records += 1
        
        total_records = len(traffic_data)
-        validity_threshold = 0.3
-        enhancement_threshold = 0.2  # Lower threshold for enhanced features
+        validity_threshold = 0.1  # Reduced from 0.3 to 0.1 - accept if 10% of records are valid
+        enhancement_threshold = 0.1  # Reduced threshold for enhanced features
        
        basic_validity = (valid_records / total_records) >= validity_threshold
        has_enhancements = (enhanced_records / total_records) >= enhancement_threshold
--- a/services/training/app/services/training_service.py
+++ b/services/training/app/services/training_service.py
@@ -141,6 +141,30 @@ class EnhancedTrainingService:
                    logger.error("Training aborted - no sales data", tenant_id=tenant_id, job_id=job_id)
                    raise ValueError(error_msg)
                
+                # Debug: Analyze the sales data structure to understand product distribution
+                sales_df_debug = pd.DataFrame(sales_data)
+                if 'inventory_product_id' in sales_df_debug.columns:
+                    unique_products_found = sales_df_debug['inventory_product_id'].unique()
+                    product_counts = sales_df_debug['inventory_product_id'].value_counts().to_dict()
+                    
+                    logger.info("Pre-flight sales data analysis",
+                               tenant_id=tenant_id, 
+                               job_id=job_id,
+                               total_sales_records=len(sales_data),
+                               unique_products_count=len(unique_products_found),
+                               unique_products=unique_products_found.tolist(),
+                               records_per_product=product_counts)
+                    
+                    if len(unique_products_found) == 1:
+                        logger.warning("POTENTIAL ISSUE: Only ONE unique product found in all sales data",
+                                     tenant_id=tenant_id,
+                                     single_product=unique_products_found[0],
+                                     record_count=len(sales_data))
+                else:
+                    logger.warning("No 'inventory_product_id' column found in sales data", 
+                                 tenant_id=tenant_id,
+                                 columns=list(sales_df_debug.columns))
+                
                logger.info(f"Pre-flight check passed: {len(sales_data)} sales records found", 
                           tenant_id=tenant_id, job_id=job_id)
                
@@ -536,18 +560,69 @@ class EnhancedTrainingService:
                                          progress: int = None,
                                          current_step: str = None,
                                          error_message: str = None,
-                                          results: Dict = None):
+                                          results: Dict = None,
+                                          tenant_id: str = None):
        """Update job status using repository pattern"""
        try:
            async with self.database_manager.get_session() as session:
                await self._init_repositories(session)
                
-                await self.training_log_repo.update_log_progress(
-                    job_id=job_id,
-                    progress=progress,
-                    current_step=current_step,
-                    status=status
-                )
+                # Check if log exists, create if not
+                existing_log = await self.training_log_repo.get_log_by_job_id(job_id)
+                
+                if not existing_log:
+                    # Create initial log entry
+                    if not tenant_id:
+                        # Extract tenant_id from job_id if not provided
+                        # Format: enhanced_training_{tenant_id}_{job_suffix}
+                        try:
+                            parts = job_id.split('_')
+                            if len(parts) >= 3 and parts[0] == 'enhanced' and parts[1] == 'training':
+                                tenant_id = parts[2]
+                        except Exception:
+                            logger.warning(f"Could not extract tenant_id from job_id {job_id}")
+                    
+                    if tenant_id:
+                        log_data = {
+                            "job_id": job_id,
+                            "tenant_id": tenant_id,
+                            "status": status or "pending",
+                            "progress": progress or 0,
+                            "current_step": current_step or "initializing",
+                            "start_time": datetime.utcnow()
+                        }
+                        
+                        if error_message:
+                            log_data["error_message"] = error_message
+                        if results:
+                            log_data["results"] = results
+                            
+                        await self.training_log_repo.create_training_log(log_data)
+                        logger.info("Created initial training log", job_id=job_id, tenant_id=tenant_id)
+                    else:
+                        logger.error("Cannot create training log without tenant_id", job_id=job_id)
+                        return
+                else:
+                    # Update existing log
+                    await self.training_log_repo.update_log_progress(
+                        job_id=job_id,
+                        progress=progress,
+                        current_step=current_step,
+                        status=status
+                    )
+                    
+                    # Update additional fields if provided
+                    if error_message or results:
+                        update_data = {}
+                        if error_message:
+                            update_data["error_message"] = error_message
+                        if results:
+                            update_data["results"] = results
+                        if status in ["completed", "failed"]:
+                            update_data["end_time"] = datetime.utcnow()
+                            
+                        if update_data:
+                            await self.training_log_repo.update(existing_log.id, update_data)
                
        except Exception as e:
            logger.error("Failed to update job status using repository",