Start fixing forecast service 19

This commit is contained in:
Urtzi Alfaro
2025-07-30 09:00:17 +02:00
parent 024290e4c0
commit 326638b52d
3 changed files with 112 additions and 101 deletions

View File

@@ -46,9 +46,12 @@ async def lifespan(app: FastAPI):
# Register custom metrics # Register custom metrics
metrics_collector.register_counter("forecasts_generated_total", "Total forecasts generated") metrics_collector.register_counter("forecasts_generated_total", "Total forecasts generated")
metrics_collector.register_counter("predictions_served_total", "Total predictions served") metrics_collector.register_counter("predictions_served_total", "Total predictions served")
metrics_collector.register_counter("prediction_errors_total", "Total prediction errors") metrics_collector.register_counter("prediction_errors_total", "Total prediction errors") # ← MISSING REGISTRATION!
metrics_collector.register_histogram("forecast_processing_time_seconds", "Time to process forecast request") metrics_collector.register_histogram("forecast_processing_time_seconds", "Time to process forecast request")
metrics_collector.register_histogram("prediction_processing_time_seconds", "Time to process prediction request") # ← ADD MISSING METRIC!
metrics_collector.register_gauge("active_models_count", "Number of active models") metrics_collector.register_gauge("active_models_count", "Number of active models")
metrics_collector.register_counter("model_cache_hits_total", "Total model cache hits") # ← ADD USEFUL METRIC!
metrics_collector.register_counter("model_cache_misses_total", "Total model cache misses") # ← ADD USEFUL METRIC!
# Start metrics server # Start metrics server
metrics_collector.start_metrics_server(8080) metrics_collector.start_metrics_server(8080)

View File

@@ -228,6 +228,7 @@ class ForecastingService:
"humidity": weather.get("humidity", 65.0), "humidity": weather.get("humidity", 65.0),
"wind_speed": weather.get("wind_speed", 5.0), "wind_speed": weather.get("wind_speed", 5.0),
"pressure": weather.get("pressure", 1013.0), "pressure": weather.get("pressure", 1013.0),
'weather_description': weather_data.get('description', 'clear')
}) })
logger.info("Weather data acquired successfully", tenant_id=tenant_id) logger.info("Weather data acquired successfully", tenant_id=tenant_id)
@@ -251,6 +252,8 @@ class ForecastingService:
"humidity": current_weather.get("humidity", 65.0), "humidity": current_weather.get("humidity", 65.0),
"wind_speed": current_weather.get("wind_speed", 5.0), "wind_speed": current_weather.get("wind_speed", 5.0),
"pressure": current_weather.get("pressure", 1013.0), "pressure": current_weather.get("pressure", 1013.0),
'weather_description': current_weather.get('description', 'clear')
}) })
logger.info("Using current weather as fallback", tenant_id=tenant_id) logger.info("Using current weather as fallback", tenant_id=tenant_id)
@@ -286,6 +289,7 @@ class ForecastingService:
# features.update({ # features.update({
# "traffic_volume": traffic_data.get("traffic_volume", 100), # "traffic_volume": traffic_data.get("traffic_volume", 100),
# "pedestrian_count": traffic_data.get("pedestrian_count", 50), # "pedestrian_count": traffic_data.get("pedestrian_count", 50),
# "average_speed2" traffic_data.get('average_speed', 30.0)
# }) # })
# logger.info("Traffic data acquired successfully", tenant_id=tenant_id) # logger.info("Traffic data acquired successfully", tenant_id=tenant_id)
# return # return
@@ -300,7 +304,8 @@ class ForecastingService:
features.update({ features.update({
"traffic_volume": int(100 * weekend_factor), "traffic_volume": int(100 * weekend_factor),
"pedestrian_count": int(50 * weekend_factor), "pedestrian_count": int(50 * weekend_factor),
"congestion_level": 1 "congestion_level": 1,
'average_speed': 30.0
}) })
logger.warning("Using default traffic values", tenant_id=tenant_id) logger.warning("Using default traffic values", tenant_id=tenant_id)

View File

@@ -214,7 +214,7 @@ class PredictionService:
return False return False
def _prepare_prophet_features(self, features: Dict[str, Any]) -> pd.DataFrame: def _prepare_prophet_features(self, features: Dict[str, Any]) -> pd.DataFrame:
"""Convert features to Prophet-compatible DataFrame - FIXED TO MATCH TRAINING""" """Convert features to Prophet-compatible DataFrame - COMPLETE FEATURE MATCHING"""
try: try:
# Create base DataFrame with required 'ds' column # Create base DataFrame with required 'ds' column
@@ -222,27 +222,25 @@ class PredictionService:
'ds': [pd.to_datetime(features['date'])] 'ds': [pd.to_datetime(features['date'])]
}) })
# Add numeric features with safe conversion # ✅ FIX: Add ALL traffic features that training service uses
numeric_features = [ # Core traffic features
'temperature', 'precipitation', 'humidity', 'wind_speed', df['traffic_volume'] = float(features.get('traffic_volume', 100.0))
'traffic_volume', 'pedestrian_count', 'pressure' df['pedestrian_count'] = float(features.get('pedestrian_count', 50.0))
] df['congestion_level'] = float(features.get('congestion_level', 1.0))
df['average_speed'] = float(features.get('average_speed', 30.0)) # ← MISSING FEATURE!
for feature in numeric_features: # Weather features
if feature in features and features[feature] is not None: df['temperature'] = float(features.get('temperature', 15.0))
try: df['precipitation'] = float(features.get('precipitation', 0.0))
df[feature] = float(features[feature]) df['humidity'] = float(features.get('humidity', 60.0))
except (ValueError, TypeError): df['wind_speed'] = float(features.get('wind_speed', 5.0))
logger.warning(f"Could not convert {feature} to float: {features[feature]}") df['pressure'] = float(features.get('pressure', 1013.0))
df[feature] = 0.0
else:
df[feature] = 0.0
# Extract date information for temporal features # Extract date information for temporal features
forecast_date = pd.to_datetime(features['date']) forecast_date = pd.to_datetime(features['date'])
day_of_week = forecast_date.weekday() # 0=Monday, 6=Sunday day_of_week = forecast_date.weekday() # 0=Monday, 6=Sunday
# Add temporal features (MUST match training service exactly!) # ✅ FIX: Add ALL temporal features (must match training exactly!)
df['day_of_week'] = int(day_of_week) df['day_of_week'] = int(day_of_week)
df['day_of_month'] = int(forecast_date.day) df['day_of_month'] = int(forecast_date.day)
df['month'] = int(forecast_date.month) df['month'] = int(forecast_date.month)
@@ -270,8 +268,9 @@ class PredictionService:
# Holiday features # Holiday features
df['is_holiday'] = int(features.get('is_holiday', False)) df['is_holiday'] = int(features.get('is_holiday', False))
df['is_school_holiday'] = int(features.get('is_school_holiday', False))
# Month-based features # Month-based features (match training)
df['is_january'] = int(forecast_date.month == 1) df['is_january'] = int(forecast_date.month == 1)
df['is_february'] = int(forecast_date.month == 2) df['is_february'] = int(forecast_date.month == 2)
df['is_march'] = int(forecast_date.month == 3) df['is_march'] = int(forecast_date.month == 3)
@@ -285,121 +284,125 @@ class PredictionService:
df['is_november'] = int(forecast_date.month == 11) df['is_november'] = int(forecast_date.month == 11)
df['is_december'] = int(forecast_date.month == 12) df['is_december'] = int(forecast_date.month == 12)
# Additional features that might be in training data # Special day features
df['is_month_start'] = int(forecast_date.day <= 3) df['is_month_start'] = int(forecast_date.day <= 3)
df['is_month_end'] = int(forecast_date.day >= 28) df['is_month_end'] = int(forecast_date.day >= 28)
df['is_quarter_start'] = int(forecast_date.month in [1, 4, 7, 10] and forecast_date.day <= 7)
df['is_quarter_end'] = int(forecast_date.month in [3, 6, 9, 12] and forecast_date.day >= 25)
# Business context features
df['is_school_holiday'] = int(self._is_school_holiday(forecast_date))
df['is_payday_period'] = int((forecast_date.day <= 5) or (forecast_date.day >= 25)) df['is_payday_period'] = int((forecast_date.day <= 5) or (forecast_date.day >= 25))
# Working day features # ✅ FIX: Add ALL derived features that training service creates
df['is_working_day'] = int(day_of_week < 5) # Monday-Friday
df['is_peak_bakery_day'] = int(day_of_week in [4, 5, 6]) # Friday, Saturday, Sunday
# Seasonal demand patterns # Weather-based derived features
df['is_high_demand_month'] = int(forecast_date.month in [6, 7, 8, 12]) df['temp_squared'] = df['temperature'].iloc[0] ** 2
df['is_warm_season'] = int(forecast_date.month in [4, 5, 6, 7, 8, 9]) df['is_cold_day'] = int(df['temperature'].iloc[0] < 10)
df['is_hot_day'] = int(df['temperature'].iloc[0] > 25)
df['is_pleasant_day'] = int(10 <= df['temperature'].iloc[0] <= 25)
df['is_rainy_day'] = int(df['precipitation'].iloc[0] > 0.1)
df['is_very_rainy_day'] = int(df['precipitation'].iloc[0] > 5.0)
# Weather-based derived features (if weather data available) # Humidity features
if 'temperature' in df.columns: df['humidity_squared'] = df['humidity'].iloc[0] ** 2
temp = df['temperature'].iloc[0] df['is_high_humidity'] = int(df['humidity'].iloc[0] > 70)
df['temp_squared'] = temp ** 2 # ✅ FIX: Added temp_squared df['is_low_humidity'] = int(df['humidity'].iloc[0] < 40)
df['is_pleasant_day'] = int(18 <= temp <= 25)
df['temp_category'] = int(self._get_temp_category(temp))
df['is_hot_day'] = int(temp > 25)
df['is_cold_day'] = int(temp < 10)
if 'precipitation' in df.columns: # Pressure features
precip = df['precipitation'].iloc[0] df['pressure_squared'] = df['pressure'].iloc[0] ** 2
df['is_rainy_day'] = int(precip > 0.1) df['is_high_pressure'] = int(df['pressure'].iloc[0] > 1020)
df['is_heavy_rain'] = int(precip > 10.0) df['is_low_pressure'] = int(df['pressure'].iloc[0] < 1000)
df['rain_intensity'] = int(self._get_rain_intensity(precip))
# Traffic-based features # Wind features
if 'traffic_volume' in df.columns and df['traffic_volume'].iloc[0] > 0: df['wind_squared'] = df['wind_speed'].iloc[0] ** 2
df['is_windy'] = int(df['wind_speed'].iloc[0] > 15)
df['is_calm'] = int(df['wind_speed'].iloc[0] < 5)
# Precipitation features
df['precip_squared'] = df['precipitation'].iloc[0] ** 2
df['precip_log'] = float(np.log1p(df['precipitation'].iloc[0]))
# ✅ FIX: Add ALL traffic-based derived features
if df['traffic_volume'].iloc[0] > 0:
traffic = df['traffic_volume'].iloc[0] traffic = df['traffic_volume'].iloc[0]
df['high_traffic'] = int(traffic > 150) # Assumption based on typical values df['high_traffic'] = int(traffic > 150)
df['low_traffic'] = int(traffic < 50) df['low_traffic'] = int(traffic < 50)
df['traffic_normalized'] = float((traffic - 100) / 50) # Simple normalization df['traffic_normalized'] = float((traffic - 100) / 50)
df['congestion_level'] = int(min(5, max(1, traffic // 50)))
df['traffic_squared'] = traffic ** 2 df['traffic_squared'] = traffic ** 2
df['traffic_log'] = float(np.log1p(traffic)) # log(1+traffic) to handle zeros df['traffic_log'] = float(np.log1p(traffic))
else: else:
df['high_traffic'] = 0 df['high_traffic'] = 0
df['low_traffic'] = 0 df['low_traffic'] = 0
df['traffic_normalized'] = 0.0 df['traffic_normalized'] = 0.0
df['traffic_squared'] = 0.0 df['traffic_squared'] = 0.0
df['traffic_log'] = 0.0 df['traffic_log'] = 0.0
df['congestion_level'] = 1
# Interaction features (common in training) # ✅ FIX: Add pedestrian-based features
if 'is_weekend' in df.columns and 'temperature' in df.columns: pedestrians = df['pedestrian_count'].iloc[0]
df['weekend_temp_interaction'] = df['is_weekend'].iloc[0] * df['temperature'].iloc[0] df['high_pedestrian_count'] = int(pedestrians > 100)
df['weekend_pleasant_weather'] = df['is_weekend'].iloc[0] * df.get('is_pleasant_day', pd.Series([0])).iloc[0] df['low_pedestrian_count'] = int(pedestrians < 25)
df['pedestrian_normalized'] = float((pedestrians - 50) / 25)
df['pedestrian_squared'] = pedestrians ** 2
df['pedestrian_log'] = float(np.log1p(pedestrians))
if 'is_holiday' in df.columns and 'temperature' in df.columns: # ✅ FIX: Add average_speed-based features
df['holiday_temp_interaction'] = df['is_holiday'].iloc[0] * df['temperature'].iloc[0] avg_speed = df['average_speed'].iloc[0]
df['high_speed'] = int(avg_speed > 40)
df['low_speed'] = int(avg_speed < 20)
df['speed_normalized'] = float((avg_speed - 30) / 10)
df['speed_squared'] = avg_speed ** 2
df['speed_log'] = float(np.log1p(avg_speed))
if 'season' in df.columns and 'temperature' in df.columns: # ✅ FIX: Add congestion-based features
df['season_temp_interaction'] = df['season'].iloc[0] * df['temperature'].iloc[0] congestion = df['congestion_level'].iloc[0]
df['high_congestion'] = int(congestion > 3)
df['low_congestion'] = int(congestion < 2)
df['congestion_squared'] = congestion ** 2
# ✅ FIX: Add more interaction features that might be in training # ✅ FIX: Add ALL interaction features that training creates
if 'is_rainy_day' in df.columns and 'traffic_volume' in df.columns:
df['rain_traffic_interaction'] = df['is_rainy_day'].iloc[0] * df['traffic_volume'].iloc[0]
if 'is_weekend' in df.columns and 'traffic_volume' in df.columns: # Weekend interactions
df['weekend_traffic_interaction'] = df['is_weekend'].iloc[0] * df['traffic_volume'].iloc[0] is_weekend = df['is_weekend'].iloc[0]
temperature = df['temperature'].iloc[0]
df['weekend_temp_interaction'] = is_weekend * temperature
df['weekend_pleasant_weather'] = is_weekend * df['is_pleasant_day'].iloc[0]
df['weekend_traffic_interaction'] = is_weekend * df['traffic_volume'].iloc[0]
# Holiday interactions
is_holiday = df['is_holiday'].iloc[0]
df['holiday_temp_interaction'] = is_holiday * temperature
df['holiday_traffic_interaction'] = is_holiday * df['traffic_volume'].iloc[0]
# Season interactions
season = df['season'].iloc[0]
df['season_temp_interaction'] = season * temperature
df['season_traffic_interaction'] = season * df['traffic_volume'].iloc[0]
# Rain-traffic interactions
is_rainy = df['is_rainy_day'].iloc[0]
df['rain_traffic_interaction'] = is_rainy * df['traffic_volume'].iloc[0]
df['rain_speed_interaction'] = is_rainy * df['average_speed'].iloc[0]
# Day-weather interactions # Day-weather interactions
if 'day_of_week' in df.columns and 'temperature' in df.columns: df['day_temp_interaction'] = day_of_week * temperature
df['day_temp_interaction'] = df['day_of_week'].iloc[0] * df['temperature'].iloc[0] df['month_temp_interaction'] = forecast_date.month * temperature
if 'month' in df.columns and 'temperature' in df.columns: # Traffic-speed interactions
df['month_temp_interaction'] = df['month'].iloc[0] * df['temperature'].iloc[0] df['traffic_speed_interaction'] = df['traffic_volume'].iloc[0] * df['average_speed'].iloc[0]
df['pedestrian_speed_interaction'] = df['pedestrian_count'].iloc[0] * df['average_speed'].iloc[0]
# ✅ FIX: Add comprehensive derived features to match training # Congestion-related interactions
df['congestion_temp_interaction'] = congestion * temperature
df['congestion_weekend_interaction'] = congestion * is_weekend
# Humidity-based features logger.debug("Complete Prophet features prepared",
if 'humidity' in df.columns:
humidity = df['humidity'].iloc[0]
df['humidity_squared'] = humidity ** 2
df['is_high_humidity'] = int(humidity > 70)
df['is_low_humidity'] = int(humidity < 40)
# Pressure-based features
if 'pressure' in df.columns:
pressure = df['pressure'].iloc[0]
df['pressure_squared'] = pressure ** 2
df['is_high_pressure'] = int(pressure > 1020)
df['is_low_pressure'] = int(pressure < 1000)
# Wind-based features
if 'wind_speed' in df.columns:
wind = df['wind_speed'].iloc[0]
df['wind_squared'] = wind ** 2
df['is_windy'] = int(wind > 15)
df['is_calm'] = int(wind < 5)
# Precipitation-based features (additional to basic ones)
if 'precipitation' in df.columns:
precip = df['precipitation'].iloc[0]
df['precip_squared'] = precip ** 2
df['precip_log'] = float(np.log1p(precip))
logger.debug("Prophet features prepared with comprehensive derived features",
feature_count=len(df.columns), feature_count=len(df.columns),
date=features['date'], date=features['date'],
season=df['season'].iloc[0], season=df['season'].iloc[0],
day_of_week=day_of_week, traffic_volume=df['traffic_volume'].iloc[0],
temp_squared=df.get('temp_squared', pd.Series([0])).iloc[0]) average_speed=df['average_speed'].iloc[0],
pedestrian_count=df['pedestrian_count'].iloc[0])
return df return df
except Exception as e: except Exception as e:
logger.error(f"Error preparing Prophet features: {e}") logger.error("Error preparing Prophet features", error=str(e))
raise raise
def _get_season(self, month: int) -> int: def _get_season(self, month: int) -> int: