Start fixing forecast service 19

This commit is contained in:
Urtzi Alfaro
2025-07-30 09:00:17 +02:00
parent 024290e4c0
commit 326638b52d
3 changed files with 112 additions and 101 deletions

View File

@@ -46,9 +46,12 @@ async def lifespan(app: FastAPI):
# Register custom metrics
metrics_collector.register_counter("forecasts_generated_total", "Total forecasts generated")
metrics_collector.register_counter("predictions_served_total", "Total predictions served")
metrics_collector.register_counter("prediction_errors_total", "Total prediction errors")
metrics_collector.register_counter("prediction_errors_total", "Total prediction errors") # ← MISSING REGISTRATION!
metrics_collector.register_histogram("forecast_processing_time_seconds", "Time to process forecast request")
metrics_collector.register_histogram("prediction_processing_time_seconds", "Time to process prediction request") # ← ADD MISSING METRIC!
metrics_collector.register_gauge("active_models_count", "Number of active models")
metrics_collector.register_counter("model_cache_hits_total", "Total model cache hits") # ← ADD USEFUL METRIC!
metrics_collector.register_counter("model_cache_misses_total", "Total model cache misses") # ← ADD USEFUL METRIC!
# Start metrics server
metrics_collector.start_metrics_server(8080)

View File

@@ -228,6 +228,7 @@ class ForecastingService:
"humidity": weather.get("humidity", 65.0),
"wind_speed": weather.get("wind_speed", 5.0),
"pressure": weather.get("pressure", 1013.0),
'weather_description': weather_data.get('description', 'clear')
})
logger.info("Weather data acquired successfully", tenant_id=tenant_id)
@@ -251,6 +252,8 @@ class ForecastingService:
"humidity": current_weather.get("humidity", 65.0),
"wind_speed": current_weather.get("wind_speed", 5.0),
"pressure": current_weather.get("pressure", 1013.0),
'weather_description': current_weather.get('description', 'clear')
})
logger.info("Using current weather as fallback", tenant_id=tenant_id)
@@ -286,6 +289,7 @@ class ForecastingService:
# features.update({
# "traffic_volume": traffic_data.get("traffic_volume", 100),
# "pedestrian_count": traffic_data.get("pedestrian_count", 50),
# "average_speed2" traffic_data.get('average_speed', 30.0)
# })
# logger.info("Traffic data acquired successfully", tenant_id=tenant_id)
# return
@@ -300,7 +304,8 @@ class ForecastingService:
features.update({
"traffic_volume": int(100 * weekend_factor),
"pedestrian_count": int(50 * weekend_factor),
"congestion_level": 1
"congestion_level": 1,
'average_speed': 30.0
})
logger.warning("Using default traffic values", tenant_id=tenant_id)

View File

@@ -214,7 +214,7 @@ class PredictionService:
return False
def _prepare_prophet_features(self, features: Dict[str, Any]) -> pd.DataFrame:
"""Convert features to Prophet-compatible DataFrame - FIXED TO MATCH TRAINING"""
"""Convert features to Prophet-compatible DataFrame - COMPLETE FEATURE MATCHING"""
try:
# Create base DataFrame with required 'ds' column
@@ -222,27 +222,25 @@ class PredictionService:
'ds': [pd.to_datetime(features['date'])]
})
# Add numeric features with safe conversion
numeric_features = [
'temperature', 'precipitation', 'humidity', 'wind_speed',
'traffic_volume', 'pedestrian_count', 'pressure'
]
# ✅ FIX: Add ALL traffic features that training service uses
# Core traffic features
df['traffic_volume'] = float(features.get('traffic_volume', 100.0))
df['pedestrian_count'] = float(features.get('pedestrian_count', 50.0))
df['congestion_level'] = float(features.get('congestion_level', 1.0))
df['average_speed'] = float(features.get('average_speed', 30.0)) # ← MISSING FEATURE!
for feature in numeric_features:
if feature in features and features[feature] is not None:
try:
df[feature] = float(features[feature])
except (ValueError, TypeError):
logger.warning(f"Could not convert {feature} to float: {features[feature]}")
df[feature] = 0.0
else:
df[feature] = 0.0
# Weather features
df['temperature'] = float(features.get('temperature', 15.0))
df['precipitation'] = float(features.get('precipitation', 0.0))
df['humidity'] = float(features.get('humidity', 60.0))
df['wind_speed'] = float(features.get('wind_speed', 5.0))
df['pressure'] = float(features.get('pressure', 1013.0))
# Extract date information for temporal features
forecast_date = pd.to_datetime(features['date'])
day_of_week = forecast_date.weekday() # 0=Monday, 6=Sunday
# Add temporal features (MUST match training service exactly!)
# ✅ FIX: Add ALL temporal features (must match training exactly!)
df['day_of_week'] = int(day_of_week)
df['day_of_month'] = int(forecast_date.day)
df['month'] = int(forecast_date.month)
@@ -270,8 +268,9 @@ class PredictionService:
# Holiday features
df['is_holiday'] = int(features.get('is_holiday', False))
df['is_school_holiday'] = int(features.get('is_school_holiday', False))
# Month-based features
# Month-based features (match training)
df['is_january'] = int(forecast_date.month == 1)
df['is_february'] = int(forecast_date.month == 2)
df['is_march'] = int(forecast_date.month == 3)
@@ -285,121 +284,125 @@ class PredictionService:
df['is_november'] = int(forecast_date.month == 11)
df['is_december'] = int(forecast_date.month == 12)
# Additional features that might be in training data
# Special day features
df['is_month_start'] = int(forecast_date.day <= 3)
df['is_month_end'] = int(forecast_date.day >= 28)
df['is_quarter_start'] = int(forecast_date.month in [1, 4, 7, 10] and forecast_date.day <= 7)
df['is_quarter_end'] = int(forecast_date.month in [3, 6, 9, 12] and forecast_date.day >= 25)
# Business context features
df['is_school_holiday'] = int(self._is_school_holiday(forecast_date))
df['is_payday_period'] = int((forecast_date.day <= 5) or (forecast_date.day >= 25))
# Working day features
df['is_working_day'] = int(day_of_week < 5) # Monday-Friday
df['is_peak_bakery_day'] = int(day_of_week in [4, 5, 6]) # Friday, Saturday, Sunday
# ✅ FIX: Add ALL derived features that training service creates
# Seasonal demand patterns
df['is_high_demand_month'] = int(forecast_date.month in [6, 7, 8, 12])
df['is_warm_season'] = int(forecast_date.month in [4, 5, 6, 7, 8, 9])
# Weather-based derived features
df['temp_squared'] = df['temperature'].iloc[0] ** 2
df['is_cold_day'] = int(df['temperature'].iloc[0] < 10)
df['is_hot_day'] = int(df['temperature'].iloc[0] > 25)
df['is_pleasant_day'] = int(10 <= df['temperature'].iloc[0] <= 25)
df['is_rainy_day'] = int(df['precipitation'].iloc[0] > 0.1)
df['is_very_rainy_day'] = int(df['precipitation'].iloc[0] > 5.0)
# Weather-based derived features (if weather data available)
if 'temperature' in df.columns:
temp = df['temperature'].iloc[0]
df['temp_squared'] = temp ** 2 # ✅ FIX: Added temp_squared
df['is_pleasant_day'] = int(18 <= temp <= 25)
df['temp_category'] = int(self._get_temp_category(temp))
df['is_hot_day'] = int(temp > 25)
df['is_cold_day'] = int(temp < 10)
# Humidity features
df['humidity_squared'] = df['humidity'].iloc[0] ** 2
df['is_high_humidity'] = int(df['humidity'].iloc[0] > 70)
df['is_low_humidity'] = int(df['humidity'].iloc[0] < 40)
if 'precipitation' in df.columns:
precip = df['precipitation'].iloc[0]
df['is_rainy_day'] = int(precip > 0.1)
df['is_heavy_rain'] = int(precip > 10.0)
df['rain_intensity'] = int(self._get_rain_intensity(precip))
# Pressure features
df['pressure_squared'] = df['pressure'].iloc[0] ** 2
df['is_high_pressure'] = int(df['pressure'].iloc[0] > 1020)
df['is_low_pressure'] = int(df['pressure'].iloc[0] < 1000)
# Traffic-based features
if 'traffic_volume' in df.columns and df['traffic_volume'].iloc[0] > 0:
# Wind features
df['wind_squared'] = df['wind_speed'].iloc[0] ** 2
df['is_windy'] = int(df['wind_speed'].iloc[0] > 15)
df['is_calm'] = int(df['wind_speed'].iloc[0] < 5)
# Precipitation features
df['precip_squared'] = df['precipitation'].iloc[0] ** 2
df['precip_log'] = float(np.log1p(df['precipitation'].iloc[0]))
# ✅ FIX: Add ALL traffic-based derived features
if df['traffic_volume'].iloc[0] > 0:
traffic = df['traffic_volume'].iloc[0]
df['high_traffic'] = int(traffic > 150) # Assumption based on typical values
df['high_traffic'] = int(traffic > 150)
df['low_traffic'] = int(traffic < 50)
df['traffic_normalized'] = float((traffic - 100) / 50) # Simple normalization
df['congestion_level'] = int(min(5, max(1, traffic // 50)))
df['traffic_normalized'] = float((traffic - 100) / 50)
df['traffic_squared'] = traffic ** 2
df['traffic_log'] = float(np.log1p(traffic)) # log(1+traffic) to handle zeros
df['traffic_log'] = float(np.log1p(traffic))
else:
df['high_traffic'] = 0
df['low_traffic'] = 0
df['low_traffic'] = 0
df['traffic_normalized'] = 0.0
df['traffic_squared'] = 0.0
df['traffic_log'] = 0.0
df['congestion_level'] = 1
# Interaction features (common in training)
if 'is_weekend' in df.columns and 'temperature' in df.columns:
df['weekend_temp_interaction'] = df['is_weekend'].iloc[0] * df['temperature'].iloc[0]
df['weekend_pleasant_weather'] = df['is_weekend'].iloc[0] * df.get('is_pleasant_day', pd.Series([0])).iloc[0]
# ✅ FIX: Add pedestrian-based features
pedestrians = df['pedestrian_count'].iloc[0]
df['high_pedestrian_count'] = int(pedestrians > 100)
df['low_pedestrian_count'] = int(pedestrians < 25)
df['pedestrian_normalized'] = float((pedestrians - 50) / 25)
df['pedestrian_squared'] = pedestrians ** 2
df['pedestrian_log'] = float(np.log1p(pedestrians))
if 'is_holiday' in df.columns and 'temperature' in df.columns:
df['holiday_temp_interaction'] = df['is_holiday'].iloc[0] * df['temperature'].iloc[0]
# ✅ FIX: Add average_speed-based features
avg_speed = df['average_speed'].iloc[0]
df['high_speed'] = int(avg_speed > 40)
df['low_speed'] = int(avg_speed < 20)
df['speed_normalized'] = float((avg_speed - 30) / 10)
df['speed_squared'] = avg_speed ** 2
df['speed_log'] = float(np.log1p(avg_speed))
if 'season' in df.columns and 'temperature' in df.columns:
df['season_temp_interaction'] = df['season'].iloc[0] * df['temperature'].iloc[0]
# ✅ FIX: Add congestion-based features
congestion = df['congestion_level'].iloc[0]
df['high_congestion'] = int(congestion > 3)
df['low_congestion'] = int(congestion < 2)
df['congestion_squared'] = congestion ** 2
# ✅ FIX: Add more interaction features that might be in training
if 'is_rainy_day' in df.columns and 'traffic_volume' in df.columns:
df['rain_traffic_interaction'] = df['is_rainy_day'].iloc[0] * df['traffic_volume'].iloc[0]
# ✅ FIX: Add ALL interaction features that training creates
if 'is_weekend' in df.columns and 'traffic_volume' in df.columns:
df['weekend_traffic_interaction'] = df['is_weekend'].iloc[0] * df['traffic_volume'].iloc[0]
# Weekend interactions
is_weekend = df['is_weekend'].iloc[0]
temperature = df['temperature'].iloc[0]
df['weekend_temp_interaction'] = is_weekend * temperature
df['weekend_pleasant_weather'] = is_weekend * df['is_pleasant_day'].iloc[0]
df['weekend_traffic_interaction'] = is_weekend * df['traffic_volume'].iloc[0]
# Holiday interactions
is_holiday = df['is_holiday'].iloc[0]
df['holiday_temp_interaction'] = is_holiday * temperature
df['holiday_traffic_interaction'] = is_holiday * df['traffic_volume'].iloc[0]
# Season interactions
season = df['season'].iloc[0]
df['season_temp_interaction'] = season * temperature
df['season_traffic_interaction'] = season * df['traffic_volume'].iloc[0]
# Rain-traffic interactions
is_rainy = df['is_rainy_day'].iloc[0]
df['rain_traffic_interaction'] = is_rainy * df['traffic_volume'].iloc[0]
df['rain_speed_interaction'] = is_rainy * df['average_speed'].iloc[0]
# Day-weather interactions
if 'day_of_week' in df.columns and 'temperature' in df.columns:
df['day_temp_interaction'] = df['day_of_week'].iloc[0] * df['temperature'].iloc[0]
df['day_temp_interaction'] = day_of_week * temperature
df['month_temp_interaction'] = forecast_date.month * temperature
if 'month' in df.columns and 'temperature' in df.columns:
df['month_temp_interaction'] = df['month'].iloc[0] * df['temperature'].iloc[0]
# Traffic-speed interactions
df['traffic_speed_interaction'] = df['traffic_volume'].iloc[0] * df['average_speed'].iloc[0]
df['pedestrian_speed_interaction'] = df['pedestrian_count'].iloc[0] * df['average_speed'].iloc[0]
# ✅ FIX: Add comprehensive derived features to match training
# Congestion-related interactions
df['congestion_temp_interaction'] = congestion * temperature
df['congestion_weekend_interaction'] = congestion * is_weekend
# Humidity-based features
if 'humidity' in df.columns:
humidity = df['humidity'].iloc[0]
df['humidity_squared'] = humidity ** 2
df['is_high_humidity'] = int(humidity > 70)
df['is_low_humidity'] = int(humidity < 40)
# Pressure-based features
if 'pressure' in df.columns:
pressure = df['pressure'].iloc[0]
df['pressure_squared'] = pressure ** 2
df['is_high_pressure'] = int(pressure > 1020)
df['is_low_pressure'] = int(pressure < 1000)
# Wind-based features
if 'wind_speed' in df.columns:
wind = df['wind_speed'].iloc[0]
df['wind_squared'] = wind ** 2
df['is_windy'] = int(wind > 15)
df['is_calm'] = int(wind < 5)
# Precipitation-based features (additional to basic ones)
if 'precipitation' in df.columns:
precip = df['precipitation'].iloc[0]
df['precip_squared'] = precip ** 2
df['precip_log'] = float(np.log1p(precip))
logger.debug("Prophet features prepared with comprehensive derived features",
logger.debug("Complete Prophet features prepared",
feature_count=len(df.columns),
date=features['date'],
season=df['season'].iloc[0],
day_of_week=day_of_week,
temp_squared=df.get('temp_squared', pd.Series([0])).iloc[0])
traffic_volume=df['traffic_volume'].iloc[0],
average_speed=df['average_speed'].iloc[0],
pedestrian_count=df['pedestrian_count'].iloc[0])
return df
except Exception as e:
logger.error(f"Error preparing Prophet features: {e}")
logger.error("Error preparing Prophet features", error=str(e))
raise
def _get_season(self, month: int) -> int: