From 326638b52d0f870741bf65002c8980c69676fa40 Mon Sep 17 00:00:00 2001 From: Urtzi Alfaro Date: Wed, 30 Jul 2025 09:00:17 +0200 Subject: [PATCH] Start fixing forecast service 19 --- services/forecasting/app/main.py | 5 +- .../app/services/forecasting_service.py | 7 +- .../app/services/prediction_service.py | 201 +++++++++--------- 3 files changed, 112 insertions(+), 101 deletions(-) diff --git a/services/forecasting/app/main.py b/services/forecasting/app/main.py index de4f1ad5..808dcc64 100644 --- a/services/forecasting/app/main.py +++ b/services/forecasting/app/main.py @@ -46,9 +46,12 @@ async def lifespan(app: FastAPI): # Register custom metrics metrics_collector.register_counter("forecasts_generated_total", "Total forecasts generated") metrics_collector.register_counter("predictions_served_total", "Total predictions served") - metrics_collector.register_counter("prediction_errors_total", "Total prediction errors") + metrics_collector.register_counter("prediction_errors_total", "Total prediction errors") # ← MISSING REGISTRATION! metrics_collector.register_histogram("forecast_processing_time_seconds", "Time to process forecast request") + metrics_collector.register_histogram("prediction_processing_time_seconds", "Time to process prediction request") # ← ADD MISSING METRIC! metrics_collector.register_gauge("active_models_count", "Number of active models") + metrics_collector.register_counter("model_cache_hits_total", "Total model cache hits") # ← ADD USEFUL METRIC! + metrics_collector.register_counter("model_cache_misses_total", "Total model cache misses") # ← ADD USEFUL METRIC! # Start metrics server metrics_collector.start_metrics_server(8080) diff --git a/services/forecasting/app/services/forecasting_service.py b/services/forecasting/app/services/forecasting_service.py index a0fe7540..2a21025c 100644 --- a/services/forecasting/app/services/forecasting_service.py +++ b/services/forecasting/app/services/forecasting_service.py @@ -228,6 +228,7 @@ class ForecastingService: "humidity": weather.get("humidity", 65.0), "wind_speed": weather.get("wind_speed", 5.0), "pressure": weather.get("pressure", 1013.0), + 'weather_description': weather_data.get('description', 'clear') }) logger.info("Weather data acquired successfully", tenant_id=tenant_id) @@ -251,6 +252,8 @@ class ForecastingService: "humidity": current_weather.get("humidity", 65.0), "wind_speed": current_weather.get("wind_speed", 5.0), "pressure": current_weather.get("pressure", 1013.0), + 'weather_description': current_weather.get('description', 'clear') + }) logger.info("Using current weather as fallback", tenant_id=tenant_id) @@ -286,6 +289,7 @@ class ForecastingService: # features.update({ # "traffic_volume": traffic_data.get("traffic_volume", 100), # "pedestrian_count": traffic_data.get("pedestrian_count", 50), + # "average_speed2" traffic_data.get('average_speed', 30.0) # }) # logger.info("Traffic data acquired successfully", tenant_id=tenant_id) # return @@ -300,7 +304,8 @@ class ForecastingService: features.update({ "traffic_volume": int(100 * weekend_factor), "pedestrian_count": int(50 * weekend_factor), - "congestion_level": 1 + "congestion_level": 1, + 'average_speed': 30.0 }) logger.warning("Using default traffic values", tenant_id=tenant_id) diff --git a/services/forecasting/app/services/prediction_service.py b/services/forecasting/app/services/prediction_service.py index c1d307e9..1500dd87 100644 --- a/services/forecasting/app/services/prediction_service.py +++ b/services/forecasting/app/services/prediction_service.py @@ -214,7 +214,7 @@ class PredictionService: return False def _prepare_prophet_features(self, features: Dict[str, Any]) -> pd.DataFrame: - """Convert features to Prophet-compatible DataFrame - FIXED TO MATCH TRAINING""" + """Convert features to Prophet-compatible DataFrame - COMPLETE FEATURE MATCHING""" try: # Create base DataFrame with required 'ds' column @@ -222,27 +222,25 @@ class PredictionService: 'ds': [pd.to_datetime(features['date'])] }) - # Add numeric features with safe conversion - numeric_features = [ - 'temperature', 'precipitation', 'humidity', 'wind_speed', - 'traffic_volume', 'pedestrian_count', 'pressure' - ] + # ✅ FIX: Add ALL traffic features that training service uses + # Core traffic features + df['traffic_volume'] = float(features.get('traffic_volume', 100.0)) + df['pedestrian_count'] = float(features.get('pedestrian_count', 50.0)) + df['congestion_level'] = float(features.get('congestion_level', 1.0)) + df['average_speed'] = float(features.get('average_speed', 30.0)) # ← MISSING FEATURE! - for feature in numeric_features: - if feature in features and features[feature] is not None: - try: - df[feature] = float(features[feature]) - except (ValueError, TypeError): - logger.warning(f"Could not convert {feature} to float: {features[feature]}") - df[feature] = 0.0 - else: - df[feature] = 0.0 + # Weather features + df['temperature'] = float(features.get('temperature', 15.0)) + df['precipitation'] = float(features.get('precipitation', 0.0)) + df['humidity'] = float(features.get('humidity', 60.0)) + df['wind_speed'] = float(features.get('wind_speed', 5.0)) + df['pressure'] = float(features.get('pressure', 1013.0)) # Extract date information for temporal features forecast_date = pd.to_datetime(features['date']) day_of_week = forecast_date.weekday() # 0=Monday, 6=Sunday - # Add temporal features (MUST match training service exactly!) + # ✅ FIX: Add ALL temporal features (must match training exactly!) df['day_of_week'] = int(day_of_week) df['day_of_month'] = int(forecast_date.day) df['month'] = int(forecast_date.month) @@ -270,8 +268,9 @@ class PredictionService: # Holiday features df['is_holiday'] = int(features.get('is_holiday', False)) + df['is_school_holiday'] = int(features.get('is_school_holiday', False)) - # Month-based features + # Month-based features (match training) df['is_january'] = int(forecast_date.month == 1) df['is_february'] = int(forecast_date.month == 2) df['is_march'] = int(forecast_date.month == 3) @@ -285,121 +284,125 @@ class PredictionService: df['is_november'] = int(forecast_date.month == 11) df['is_december'] = int(forecast_date.month == 12) - # Additional features that might be in training data + # Special day features df['is_month_start'] = int(forecast_date.day <= 3) df['is_month_end'] = int(forecast_date.day >= 28) - df['is_quarter_start'] = int(forecast_date.month in [1, 4, 7, 10] and forecast_date.day <= 7) - df['is_quarter_end'] = int(forecast_date.month in [3, 6, 9, 12] and forecast_date.day >= 25) - - # Business context features - df['is_school_holiday'] = int(self._is_school_holiday(forecast_date)) df['is_payday_period'] = int((forecast_date.day <= 5) or (forecast_date.day >= 25)) - # Working day features - df['is_working_day'] = int(day_of_week < 5) # Monday-Friday - df['is_peak_bakery_day'] = int(day_of_week in [4, 5, 6]) # Friday, Saturday, Sunday + # ✅ FIX: Add ALL derived features that training service creates - # Seasonal demand patterns - df['is_high_demand_month'] = int(forecast_date.month in [6, 7, 8, 12]) - df['is_warm_season'] = int(forecast_date.month in [4, 5, 6, 7, 8, 9]) + # Weather-based derived features + df['temp_squared'] = df['temperature'].iloc[0] ** 2 + df['is_cold_day'] = int(df['temperature'].iloc[0] < 10) + df['is_hot_day'] = int(df['temperature'].iloc[0] > 25) + df['is_pleasant_day'] = int(10 <= df['temperature'].iloc[0] <= 25) + df['is_rainy_day'] = int(df['precipitation'].iloc[0] > 0.1) + df['is_very_rainy_day'] = int(df['precipitation'].iloc[0] > 5.0) - # Weather-based derived features (if weather data available) - if 'temperature' in df.columns: - temp = df['temperature'].iloc[0] - df['temp_squared'] = temp ** 2 # ✅ FIX: Added temp_squared - df['is_pleasant_day'] = int(18 <= temp <= 25) - df['temp_category'] = int(self._get_temp_category(temp)) - df['is_hot_day'] = int(temp > 25) - df['is_cold_day'] = int(temp < 10) + # Humidity features + df['humidity_squared'] = df['humidity'].iloc[0] ** 2 + df['is_high_humidity'] = int(df['humidity'].iloc[0] > 70) + df['is_low_humidity'] = int(df['humidity'].iloc[0] < 40) - if 'precipitation' in df.columns: - precip = df['precipitation'].iloc[0] - df['is_rainy_day'] = int(precip > 0.1) - df['is_heavy_rain'] = int(precip > 10.0) - df['rain_intensity'] = int(self._get_rain_intensity(precip)) + # Pressure features + df['pressure_squared'] = df['pressure'].iloc[0] ** 2 + df['is_high_pressure'] = int(df['pressure'].iloc[0] > 1020) + df['is_low_pressure'] = int(df['pressure'].iloc[0] < 1000) - # Traffic-based features - if 'traffic_volume' in df.columns and df['traffic_volume'].iloc[0] > 0: + # Wind features + df['wind_squared'] = df['wind_speed'].iloc[0] ** 2 + df['is_windy'] = int(df['wind_speed'].iloc[0] > 15) + df['is_calm'] = int(df['wind_speed'].iloc[0] < 5) + + # Precipitation features + df['precip_squared'] = df['precipitation'].iloc[0] ** 2 + df['precip_log'] = float(np.log1p(df['precipitation'].iloc[0])) + + # ✅ FIX: Add ALL traffic-based derived features + if df['traffic_volume'].iloc[0] > 0: traffic = df['traffic_volume'].iloc[0] - df['high_traffic'] = int(traffic > 150) # Assumption based on typical values + df['high_traffic'] = int(traffic > 150) df['low_traffic'] = int(traffic < 50) - df['traffic_normalized'] = float((traffic - 100) / 50) # Simple normalization - df['congestion_level'] = int(min(5, max(1, traffic // 50))) + df['traffic_normalized'] = float((traffic - 100) / 50) df['traffic_squared'] = traffic ** 2 - df['traffic_log'] = float(np.log1p(traffic)) # log(1+traffic) to handle zeros + df['traffic_log'] = float(np.log1p(traffic)) else: df['high_traffic'] = 0 - df['low_traffic'] = 0 + df['low_traffic'] = 0 df['traffic_normalized'] = 0.0 df['traffic_squared'] = 0.0 df['traffic_log'] = 0.0 - df['congestion_level'] = 1 - # Interaction features (common in training) - if 'is_weekend' in df.columns and 'temperature' in df.columns: - df['weekend_temp_interaction'] = df['is_weekend'].iloc[0] * df['temperature'].iloc[0] - df['weekend_pleasant_weather'] = df['is_weekend'].iloc[0] * df.get('is_pleasant_day', pd.Series([0])).iloc[0] + # ✅ FIX: Add pedestrian-based features + pedestrians = df['pedestrian_count'].iloc[0] + df['high_pedestrian_count'] = int(pedestrians > 100) + df['low_pedestrian_count'] = int(pedestrians < 25) + df['pedestrian_normalized'] = float((pedestrians - 50) / 25) + df['pedestrian_squared'] = pedestrians ** 2 + df['pedestrian_log'] = float(np.log1p(pedestrians)) - if 'is_holiday' in df.columns and 'temperature' in df.columns: - df['holiday_temp_interaction'] = df['is_holiday'].iloc[0] * df['temperature'].iloc[0] + # ✅ FIX: Add average_speed-based features + avg_speed = df['average_speed'].iloc[0] + df['high_speed'] = int(avg_speed > 40) + df['low_speed'] = int(avg_speed < 20) + df['speed_normalized'] = float((avg_speed - 30) / 10) + df['speed_squared'] = avg_speed ** 2 + df['speed_log'] = float(np.log1p(avg_speed)) - if 'season' in df.columns and 'temperature' in df.columns: - df['season_temp_interaction'] = df['season'].iloc[0] * df['temperature'].iloc[0] + # ✅ FIX: Add congestion-based features + congestion = df['congestion_level'].iloc[0] + df['high_congestion'] = int(congestion > 3) + df['low_congestion'] = int(congestion < 2) + df['congestion_squared'] = congestion ** 2 - # ✅ FIX: Add more interaction features that might be in training - if 'is_rainy_day' in df.columns and 'traffic_volume' in df.columns: - df['rain_traffic_interaction'] = df['is_rainy_day'].iloc[0] * df['traffic_volume'].iloc[0] + # ✅ FIX: Add ALL interaction features that training creates - if 'is_weekend' in df.columns and 'traffic_volume' in df.columns: - df['weekend_traffic_interaction'] = df['is_weekend'].iloc[0] * df['traffic_volume'].iloc[0] + # Weekend interactions + is_weekend = df['is_weekend'].iloc[0] + temperature = df['temperature'].iloc[0] + df['weekend_temp_interaction'] = is_weekend * temperature + df['weekend_pleasant_weather'] = is_weekend * df['is_pleasant_day'].iloc[0] + df['weekend_traffic_interaction'] = is_weekend * df['traffic_volume'].iloc[0] + + # Holiday interactions + is_holiday = df['is_holiday'].iloc[0] + df['holiday_temp_interaction'] = is_holiday * temperature + df['holiday_traffic_interaction'] = is_holiday * df['traffic_volume'].iloc[0] + + # Season interactions + season = df['season'].iloc[0] + df['season_temp_interaction'] = season * temperature + df['season_traffic_interaction'] = season * df['traffic_volume'].iloc[0] + + # Rain-traffic interactions + is_rainy = df['is_rainy_day'].iloc[0] + df['rain_traffic_interaction'] = is_rainy * df['traffic_volume'].iloc[0] + df['rain_speed_interaction'] = is_rainy * df['average_speed'].iloc[0] # Day-weather interactions - if 'day_of_week' in df.columns and 'temperature' in df.columns: - df['day_temp_interaction'] = df['day_of_week'].iloc[0] * df['temperature'].iloc[0] + df['day_temp_interaction'] = day_of_week * temperature + df['month_temp_interaction'] = forecast_date.month * temperature - if 'month' in df.columns and 'temperature' in df.columns: - df['month_temp_interaction'] = df['month'].iloc[0] * df['temperature'].iloc[0] + # Traffic-speed interactions + df['traffic_speed_interaction'] = df['traffic_volume'].iloc[0] * df['average_speed'].iloc[0] + df['pedestrian_speed_interaction'] = df['pedestrian_count'].iloc[0] * df['average_speed'].iloc[0] - # ✅ FIX: Add comprehensive derived features to match training + # Congestion-related interactions + df['congestion_temp_interaction'] = congestion * temperature + df['congestion_weekend_interaction'] = congestion * is_weekend - # Humidity-based features - if 'humidity' in df.columns: - humidity = df['humidity'].iloc[0] - df['humidity_squared'] = humidity ** 2 - df['is_high_humidity'] = int(humidity > 70) - df['is_low_humidity'] = int(humidity < 40) - - # Pressure-based features - if 'pressure' in df.columns: - pressure = df['pressure'].iloc[0] - df['pressure_squared'] = pressure ** 2 - df['is_high_pressure'] = int(pressure > 1020) - df['is_low_pressure'] = int(pressure < 1000) - - # Wind-based features - if 'wind_speed' in df.columns: - wind = df['wind_speed'].iloc[0] - df['wind_squared'] = wind ** 2 - df['is_windy'] = int(wind > 15) - df['is_calm'] = int(wind < 5) - - # Precipitation-based features (additional to basic ones) - if 'precipitation' in df.columns: - precip = df['precipitation'].iloc[0] - df['precip_squared'] = precip ** 2 - df['precip_log'] = float(np.log1p(precip)) - - logger.debug("Prophet features prepared with comprehensive derived features", + logger.debug("Complete Prophet features prepared", feature_count=len(df.columns), date=features['date'], season=df['season'].iloc[0], - day_of_week=day_of_week, - temp_squared=df.get('temp_squared', pd.Series([0])).iloc[0]) + traffic_volume=df['traffic_volume'].iloc[0], + average_speed=df['average_speed'].iloc[0], + pedestrian_count=df['pedestrian_count'].iloc[0]) return df except Exception as e: - logger.error(f"Error preparing Prophet features: {e}") + logger.error("Error preparing Prophet features", error=str(e)) raise def _get_season(self, month: int) -> int: