REFACTOR - Database logic

2025-08-08 09:08:41 +02:00
parent 0154365bfc
commit 488bb3ef93
113 changed files with 22842 additions and 6503 deletions
--- a/services/forecasting/app/services/init.py
+++ b/services/forecasting/app/services/init.py
@@ -0,0 +1,27 @@
+"""
+Forecasting Service Layer
+Business logic services for demand forecasting and prediction
+"""
+
+from .forecasting_service import ForecastingService, EnhancedForecastingService
+from .prediction_service import PredictionService
+from .model_client import ModelClient
+from .data_client import DataClient
+from .messaging import (
+    publish_forecast_generated,
+    publish_batch_forecast_completed,
+    publish_forecast_alert,
+    ForecastingStatusPublisher
+)
+
+__all__ = [
+    "ForecastingService",
+    "EnhancedForecastingService",
+    "PredictionService",
+    "ModelClient",
+    "DataClient",
+    "publish_forecast_generated",
+    "publish_batch_forecast_completed", 
+    "publish_forecast_alert",
+    "ForecastingStatusPublisher"
+]
--- a/services/forecasting/app/services/forecasting_service.py
+++ b/services/forecasting/app/services/forecasting_service.py
--- a/services/forecasting/app/services/messaging.py
+++ b/services/forecasting/app/services/messaging.py
@@ -149,4 +149,67 @@ async def publish_forecasts_deleted_event(tenant_id: str, deletion_stats: Dict[s
            }
        )
    except Exception as e:
-        logger.error("Failed to publish forecasts deletion event", error=str(e))
+        logger.error("Failed to publish forecasts deletion event", error=str(e))
+
+
+# Additional publishing functions for compatibility
+async def publish_forecast_generated(data: dict) -> bool:
+    """Publish forecast generated event"""
+    try:
+        if rabbitmq_client:
+            await rabbitmq_client.publish_event(
+                exchange="forecasting_events",
+                routing_key="forecast.generated",
+                message=data
+            )
+            return True
+    except Exception as e:
+        logger.error("Failed to publish forecast generated event", error=str(e))
+    return False
+
+async def publish_batch_forecast_completed(data: dict) -> bool:
+    """Publish batch forecast completed event"""
+    try:
+        if rabbitmq_client:
+            await rabbitmq_client.publish_event(
+                exchange="forecasting_events", 
+                routing_key="forecast.batch.completed",
+                message=data
+            )
+            return True
+    except Exception as e:
+        logger.error("Failed to publish batch forecast event", error=str(e))
+    return False
+
+async def publish_forecast_alert(data: dict) -> bool:
+    """Publish forecast alert event"""
+    try:
+        if rabbitmq_client:
+            await rabbitmq_client.publish_event(
+                exchange="forecasting_events",
+                routing_key="forecast.alert",
+                message=data
+            )
+            return True
+    except Exception as e:
+        logger.error("Failed to publish forecast alert event", error=str(e))
+    return False
+
+
+# Publisher class for compatibility
+class ForecastingStatusPublisher:
+    """Publisher for forecasting status events"""
+    
+    async def publish_status(self, status: str, data: dict) -> bool:
+        """Publish forecasting status"""
+        try:
+            if rabbitmq_client:
+                await rabbitmq_client.publish_event(
+                    exchange="forecasting_events",
+                    routing_key=f"forecast.status.{status}",
+                    message=data
+                )
+                return True
+        except Exception as e:
+            logger.error(f"Failed to publish {status} status", error=str(e))
+        return False
--- a/services/forecasting/app/services/model_client.py
+++ b/services/forecasting/app/services/model_client.py
@@ -9,17 +9,22 @@ from typing import Dict, Any, List, Optional

 # Import shared clients - no more code duplication!
 from shared.clients import get_service_clients, get_training_client, get_data_client
+from shared.database.base import create_database_manager
 from app.core.config import settings

 logger = structlog.get_logger()

 class ModelClient:
    """
-    Client for managing models in forecasting service
+    Client for managing models in forecasting service with dependency injection
    Shows how to call multiple services cleanly
    """
    
-    def __init__(self):
+    def __init__(self, database_manager=None):
+        self.database_manager = database_manager or create_database_manager(
+            settings.DATABASE_URL, "forecasting-service"
+        )
+        
        # Option 1: Get all clients at once
        self.clients = get_service_clients(settings, "forecasting")
        
@@ -114,6 +119,36 @@ class ModelClient:
            logger.error(f"Error selecting best model: {e}", tenant_id=tenant_id)
            return None
    
+    async def get_any_model_for_tenant(
+        self,
+        tenant_id: str
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get any available model for a tenant, used as fallback when specific product models aren't found
+        """
+        try:
+            # First try to get any active models for this tenant
+            models = await self.get_available_models(tenant_id)
+            
+            if models:
+                # Return the most recently trained model
+                sorted_models = sorted(models, key=lambda x: x.get('created_at', ''), reverse=True)
+                best_model = sorted_models[0]
+                logger.info("Found fallback model for tenant",
+                          tenant_id=tenant_id,
+                          model_id=best_model.get('id', 'unknown'),
+                          product=best_model.get('product_name', 'unknown'))
+                return best_model
+            
+            logger.warning("No fallback models available for tenant", tenant_id=tenant_id)
+            return None
+            
+        except Exception as e:
+            logger.error("Error getting fallback model for tenant",
+                        tenant_id=tenant_id,
+                        error=str(e))
+            return None
+
    async def validate_model_data_compatibility(
        self,
        tenant_id: str,
--- a/services/forecasting/app/services/prediction_service.py
+++ b/services/forecasting/app/services/prediction_service.py
@@ -19,20 +19,50 @@ import joblib

 from app.core.config import settings
 from shared.monitoring.metrics import MetricsCollector
+from shared.database.base import create_database_manager

 logger = structlog.get_logger()
 metrics = MetricsCollector("forecasting-service")

 class PredictionService:
    """
-    Service for loading ML models and generating predictions
+    Service for loading ML models and generating predictions with dependency injection
    Interfaces with trained Prophet models from the training service
    """
    
-    def __init__(self):
+    def __init__(self, database_manager=None):
+        self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
        self.model_cache = {}
        self.cache_ttl = 3600  # 1 hour cache
    
+    async def validate_prediction_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate prediction request"""
+        try:
+            required_fields = ["product_name", "model_id", "features"]
+            missing_fields = [field for field in required_fields if field not in request]
+            
+            if missing_fields:
+                return {
+                    "is_valid": False,
+                    "errors": [f"Missing required fields: {missing_fields}"],
+                    "validation_passed": False
+                }
+            
+            return {
+                "is_valid": True,
+                "errors": [],
+                "validation_passed": True,
+                "validated_fields": list(request.keys())
+            }
+            
+        except Exception as e:
+            logger.error("Validation error", error=str(e))
+            return {
+                "is_valid": False,
+                "errors": [str(e)],
+                "validation_passed": False
+            }
+
    async def predict(self, model_id: str, model_path: str, features: Dict[str, Any], 
                     confidence_level: float = 0.8) -> Dict[str, float]:
        """Generate prediction using trained model"""
@@ -74,10 +104,37 @@ class PredictionService:
            
            # Record metrics
            processing_time = (datetime.now() - start_time).total_seconds()
-            # Record metrics with proper type conversion
+            # Record metrics with proper registration and error handling
            try:
-                metrics.register_histogram("prediction_processing_time_seconds", float(processing_time))
-                metrics.increment_counter("predictions_served_total")
+                # Register metrics if not already registered
+                if "prediction_processing_time" not in metrics._histograms:
+                    metrics.register_histogram(
+                        "prediction_processing_time", 
+                        "Time taken to process predictions", 
+                        labels=['service', 'model_type']
+                    )
+                
+                if "predictions_served_total" not in metrics._counters:
+                    try:
+                        metrics.register_counter(
+                            "predictions_served_total", 
+                            "Total number of predictions served", 
+                            labels=['service', 'status']
+                        )
+                    except Exception as reg_error:
+                        # Metric might already exist in global registry
+                        logger.debug("Counter already exists in registry", error=str(reg_error))
+                
+                # Now record the metrics
+                metrics.observe_histogram(
+                    "prediction_processing_time", 
+                    processing_time,
+                    labels={'service': 'forecasting-service', 'model_type': 'prophet'}
+                )
+                metrics.increment_counter(
+                    "predictions_served_total", 
+                    labels={'service': 'forecasting-service', 'status': 'success'}
+                )
            except Exception as metrics_error:
                # Log metrics error but don't fail the prediction
                logger.warning("Failed to record metrics", error=str(metrics_error))
@@ -93,7 +150,19 @@ class PredictionService:
            logger.error("Error generating prediction", 
                        error=str(e), 
                        model_id=model_id)
-            metrics.increment_counter("prediction_errors_total")
+            try:
+                if "prediction_errors_total" not in metrics._counters:
+                    metrics.register_counter(
+                        "prediction_errors_total", 
+                        "Total number of prediction errors", 
+                        labels=['service', 'error_type']
+                    )
+                metrics.increment_counter(
+                    "prediction_errors_total",
+                    labels={'service': 'forecasting-service', 'error_type': 'prediction_failed'}
+                )
+            except Exception:
+                pass  # Don't fail on metrics errors
            raise
    
    async def _load_model(self, model_id: str, model_path: str):
@@ -268,139 +337,149 @@ class PredictionService:
            df['is_autumn'] = int(df['season'].iloc[0] == 4)
            df['is_winter'] = int(df['season'].iloc[0] == 1)
            
-            # Holiday features
-            df['is_holiday'] = int(features.get('is_holiday', False))
-            df['is_school_holiday'] = int(features.get('is_school_holiday', False))
+            # ✅ PERFORMANCE FIX: Build all features at once to avoid DataFrame fragmentation
            
-            # Month-based features (match training)
-            df['is_january'] = int(forecast_date.month == 1)
-            df['is_february'] = int(forecast_date.month == 2)
-            df['is_march'] = int(forecast_date.month == 3)
-            df['is_april'] = int(forecast_date.month == 4)
-            df['is_may'] = int(forecast_date.month == 5)
-            df['is_june'] = int(forecast_date.month == 6)
-            df['is_july'] = int(forecast_date.month == 7)
-            df['is_august'] = int(forecast_date.month == 8)
-            df['is_september'] = int(forecast_date.month == 9)
-            df['is_october'] = int(forecast_date.month == 10)
-            df['is_november'] = int(forecast_date.month == 11)
-            df['is_december'] = int(forecast_date.month == 12)
-            
-            # Special day features
-            df['is_month_start'] = int(forecast_date.day <= 3)
-            df['is_month_end'] = int(forecast_date.day >= 28)
-            df['is_payday_period'] = int((forecast_date.day <= 5) or (forecast_date.day >= 25))
-            
-            # ✅ FIX: Add ALL derived features that training service creates
-            
-            # Weather-based derived features
-            df['temp_squared'] = df['temperature'].iloc[0] ** 2
-            df['is_cold_day'] = int(df['temperature'].iloc[0] < 10)
-            df['is_hot_day'] = int(df['temperature'].iloc[0] > 25)
-            df['is_pleasant_day'] = int(10 <= df['temperature'].iloc[0] <= 25)
-            
-            # Humidity features
-            df['humidity_squared'] = df['humidity'].iloc[0] ** 2
-            df['is_high_humidity'] = int(df['humidity'].iloc[0] > 70)
-            df['is_low_humidity'] = int(df['humidity'].iloc[0] < 40)
-            
-            # Pressure features  
-            df['pressure_squared'] = df['pressure'].iloc[0] ** 2
-            df['is_high_pressure'] = int(df['pressure'].iloc[0] > 1020)
-            df['is_low_pressure'] = int(df['pressure'].iloc[0] < 1000)
-            
-            # Wind features
-            df['wind_squared'] = df['wind_speed'].iloc[0] ** 2
-            df['is_windy'] = int(df['wind_speed'].iloc[0] > 15)
-            df['is_calm'] = int(df['wind_speed'].iloc[0] < 5)
-            
-            # Precipitation features
-            df['precip_squared'] = df['precipitation'].iloc[0] ** 2
-            df['precip_log'] = float(np.log1p(df['precipitation'].iloc[0]))
-            df['is_rainy_day'] = int(df['precipitation'].iloc[0] > 0.1)
-            df['is_very_rainy_day'] = int(df['precipitation'].iloc[0] > 5.0)
-            df['is_heavy_rain'] = int(df['precipitation'].iloc[0] > 10)
-            df['rain_intensity'] = self._get_rain_intensity(df['precipitation'].iloc[0])
-            
-            # ✅ FIX: Add ALL traffic-based derived features
-            if df['traffic_volume'].iloc[0] > 0:
-                traffic = df['traffic_volume'].iloc[0]
-                df['high_traffic'] = int(traffic > 150)
-                df['low_traffic'] = int(traffic < 50)
-                df['traffic_normalized'] = float((traffic - 100) / 50)
-                df['traffic_squared'] = traffic ** 2
-                df['traffic_log'] = float(np.log1p(traffic))
-            else:
-                df['high_traffic'] = 0
-                df['low_traffic'] = 0  
-                df['traffic_normalized'] = 0.0
-                df['traffic_squared'] = 0.0
-                df['traffic_log'] = 0.0
-            
-            # ✅ FIX: Add pedestrian-based features
-            pedestrians = df['pedestrian_count'].iloc[0]
-            df['high_pedestrian_count'] = int(pedestrians > 100)
-            df['low_pedestrian_count'] = int(pedestrians < 25)
-            df['pedestrian_normalized'] = float((pedestrians - 50) / 25)
-            df['pedestrian_squared'] = pedestrians ** 2
-            df['pedestrian_log'] = float(np.log1p(pedestrians))
-            
-            # ✅ FIX: Add average_speed-based features  
-            avg_speed = df['average_speed'].iloc[0]
-            df['high_speed'] = int(avg_speed > 40)
-            df['low_speed'] = int(avg_speed < 20)
-            df['speed_normalized'] = float((avg_speed - 30) / 10)
-            df['speed_squared'] = avg_speed ** 2
-            df['speed_log'] = float(np.log1p(avg_speed))
-            
-            # ✅ FIX: Add congestion-based features
-            congestion = df['congestion_level'].iloc[0]
-            df['high_congestion'] = int(congestion > 3)
-            df['low_congestion'] = int(congestion < 2)
-            df['congestion_squared'] = congestion ** 2
-            
-            # ✅ FIX: Add ALL interaction features that training creates
-            
-            # Weekend interactions
-            is_weekend = df['is_weekend'].iloc[0]
+            # Extract values once to avoid repeated iloc calls
            temperature = df['temperature'].iloc[0]
-            df['weekend_temp_interaction'] = is_weekend * temperature
-            df['weekend_pleasant_weather'] = is_weekend * df['is_pleasant_day'].iloc[0]
-            df['weekend_traffic_interaction'] = is_weekend * df['traffic_volume'].iloc[0]
-            
-            # Holiday interactions
-            is_holiday = df['is_holiday'].iloc[0]
-            df['holiday_temp_interaction'] = is_holiday * temperature
-            df['holiday_traffic_interaction'] = is_holiday * df['traffic_volume'].iloc[0]
-            
-            # Season interactions
+            humidity = df['humidity'].iloc[0]
+            pressure = df['pressure'].iloc[0]
+            wind_speed = df['wind_speed'].iloc[0]
+            precipitation = df['precipitation'].iloc[0]
+            traffic = df['traffic_volume'].iloc[0]
+            pedestrians = df['pedestrian_count'].iloc[0]
+            avg_speed = df['average_speed'].iloc[0]
+            congestion = df['congestion_level'].iloc[0]
            season = df['season'].iloc[0]
-            df['season_temp_interaction'] = season * temperature
-            df['season_traffic_interaction'] = season * df['traffic_volume'].iloc[0]
+            is_weekend = df['is_weekend'].iloc[0]
            
-            # Rain-traffic interactions
-            is_rainy = df['is_rainy_day'].iloc[0]
-            df['rain_traffic_interaction'] = is_rainy * df['traffic_volume'].iloc[0]
-            df['rain_speed_interaction'] = is_rainy * df['average_speed'].iloc[0]
+            # Build all new features as a dictionary
+            new_features = {
+                # Holiday features
+                'is_holiday': int(features.get('is_holiday', False)),
+                'is_school_holiday': int(features.get('is_school_holiday', False)),
+                
+                # Month-based features
+                'is_january': int(forecast_date.month == 1),
+                'is_february': int(forecast_date.month == 2),
+                'is_march': int(forecast_date.month == 3),
+                'is_april': int(forecast_date.month == 4),
+                'is_may': int(forecast_date.month == 5),
+                'is_june': int(forecast_date.month == 6),
+                'is_july': int(forecast_date.month == 7),
+                'is_august': int(forecast_date.month == 8),
+                'is_september': int(forecast_date.month == 9),
+                'is_october': int(forecast_date.month == 10),
+                'is_november': int(forecast_date.month == 11),
+                'is_december': int(forecast_date.month == 12),
+                
+                # Special day features
+                'is_month_start': int(forecast_date.day <= 3),
+                'is_month_end': int(forecast_date.day >= 28),
+                'is_payday_period': int((forecast_date.day <= 5) or (forecast_date.day >= 25)),
+                
+                # Weather-based derived features
+                'temp_squared': temperature ** 2,
+                'is_cold_day': int(temperature < 10),
+                'is_hot_day': int(temperature > 25),
+                'is_pleasant_day': int(10 <= temperature <= 25),
+                
+                # Humidity features
+                'humidity_squared': humidity ** 2,
+                'is_high_humidity': int(humidity > 70),
+                'is_low_humidity': int(humidity < 40),
+                
+                # Pressure features
+                'pressure_squared': pressure ** 2,
+                'is_high_pressure': int(pressure > 1020),
+                'is_low_pressure': int(pressure < 1000),
+                
+                # Wind features
+                'wind_squared': wind_speed ** 2,
+                'is_windy': int(wind_speed > 15),
+                'is_calm': int(wind_speed < 5),
+                
+                # Precipitation features
+                'precip_squared': precipitation ** 2,
+                'precip_log': float(np.log1p(precipitation)),
+                'is_rainy_day': int(precipitation > 0.1),
+                'is_very_rainy_day': int(precipitation > 5.0),
+                'is_heavy_rain': int(precipitation > 10),
+                'rain_intensity': self._get_rain_intensity(precipitation),
+                
+                # Traffic-based features
+                'high_traffic': int(traffic > 150) if traffic > 0 else 0,
+                'low_traffic': int(traffic < 50) if traffic > 0 else 0,
+                'traffic_normalized': float((traffic - 100) / 50) if traffic > 0 else 0.0,
+                'traffic_squared': traffic ** 2,
+                'traffic_log': float(np.log1p(traffic)),
+                
+                # Pedestrian features
+                'high_pedestrian_count': int(pedestrians > 100),
+                'low_pedestrian_count': int(pedestrians < 25),
+                'pedestrian_normalized': float((pedestrians - 50) / 25),
+                'pedestrian_squared': pedestrians ** 2,
+                'pedestrian_log': float(np.log1p(pedestrians)),
+                
+                # Speed features
+                'high_speed': int(avg_speed > 40),
+                'low_speed': int(avg_speed < 20),
+                'speed_normalized': float((avg_speed - 30) / 10),
+                'speed_squared': avg_speed ** 2,
+                'speed_log': float(np.log1p(avg_speed)),
+                
+                # Congestion features
+                'high_congestion': int(congestion > 3),
+                'low_congestion': int(congestion < 2),
+                'congestion_squared': congestion ** 2,
+                
+                # Day features
+                'is_peak_bakery_day': int(day_of_week in [4, 5, 6]),
+                'is_high_demand_month': int(forecast_date.month in [6, 7, 8, 12]),
+                'is_warm_season': int(forecast_date.month in [4, 5, 6, 7, 8, 9])
+            }
            
-            # Day-weather interactions
-            df['day_temp_interaction'] = day_of_week * temperature
-            df['month_temp_interaction'] = forecast_date.month * temperature
+            # Calculate interaction features
+            is_holiday = new_features['is_holiday']
+            is_pleasant = new_features['is_pleasant_day']
+            is_rainy = new_features['is_rainy_day']
            
-            # Traffic-speed interactions
-            df['traffic_speed_interaction'] = df['traffic_volume'].iloc[0] * df['average_speed'].iloc[0]
-            df['pedestrian_speed_interaction'] = df['pedestrian_count'].iloc[0] * df['average_speed'].iloc[0]
+            interaction_features = {
+                # Weekend interactions
+                'weekend_temp_interaction': is_weekend * temperature,
+                'weekend_pleasant_weather': is_weekend * is_pleasant,
+                'weekend_traffic_interaction': is_weekend * traffic,
+                
+                # Holiday interactions
+                'holiday_temp_interaction': is_holiday * temperature,
+                'holiday_traffic_interaction': is_holiday * traffic,
+                
+                # Season interactions
+                'season_temp_interaction': season * temperature,
+                'season_traffic_interaction': season * traffic,
+                
+                # Rain-traffic interactions
+                'rain_traffic_interaction': is_rainy * traffic,
+                'rain_speed_interaction': is_rainy * avg_speed,
+                
+                # Day-weather interactions
+                'day_temp_interaction': day_of_week * temperature,
+                'month_temp_interaction': forecast_date.month * temperature,
+                
+                # Traffic-speed interactions
+                'traffic_speed_interaction': traffic * avg_speed,
+                'pedestrian_speed_interaction': pedestrians * avg_speed,
+                
+                # Congestion interactions
+                'congestion_temp_interaction': congestion * temperature,
+                'congestion_weekend_interaction': congestion * is_weekend
+            }
            
-            # Congestion-related interactions
-            df['congestion_temp_interaction'] = congestion * temperature
-            df['congestion_weekend_interaction'] = congestion * is_weekend
+            # Combine all features
+            all_new_features = {**new_features, **interaction_features}
            
-            # Add after the existing day-of-week features:
-            df['is_peak_bakery_day'] = int(day_of_week in [4, 5, 6])  # Friday, Saturday, Sunday
-
-            # Add after the month features:
-            df['is_high_demand_month'] = int(forecast_date.month in [6, 7, 8, 12])  # Summer and December
-            df['is_warm_season'] = int(forecast_date.month in [4, 5, 6, 7, 8, 9])  # Spring/summer months
+            # Add all features at once using pd.concat to avoid fragmentation
+            new_feature_df = pd.DataFrame([all_new_features])
+            df = pd.concat([df, new_feature_df], axis=1)
            
            logger.debug("Complete Prophet features prepared", 
                        feature_count=len(df.columns),