Start fixing forecast service 19
This commit is contained in:
@@ -46,9 +46,12 @@ async def lifespan(app: FastAPI):
|
||||
# Register custom metrics
|
||||
metrics_collector.register_counter("forecasts_generated_total", "Total forecasts generated")
|
||||
metrics_collector.register_counter("predictions_served_total", "Total predictions served")
|
||||
metrics_collector.register_counter("prediction_errors_total", "Total prediction errors")
|
||||
metrics_collector.register_counter("prediction_errors_total", "Total prediction errors") # ← MISSING REGISTRATION!
|
||||
metrics_collector.register_histogram("forecast_processing_time_seconds", "Time to process forecast request")
|
||||
metrics_collector.register_histogram("prediction_processing_time_seconds", "Time to process prediction request") # ← ADD MISSING METRIC!
|
||||
metrics_collector.register_gauge("active_models_count", "Number of active models")
|
||||
metrics_collector.register_counter("model_cache_hits_total", "Total model cache hits") # ← ADD USEFUL METRIC!
|
||||
metrics_collector.register_counter("model_cache_misses_total", "Total model cache misses") # ← ADD USEFUL METRIC!
|
||||
|
||||
# Start metrics server
|
||||
metrics_collector.start_metrics_server(8080)
|
||||
|
||||
@@ -228,6 +228,7 @@ class ForecastingService:
|
||||
"humidity": weather.get("humidity", 65.0),
|
||||
"wind_speed": weather.get("wind_speed", 5.0),
|
||||
"pressure": weather.get("pressure", 1013.0),
|
||||
'weather_description': weather_data.get('description', 'clear')
|
||||
})
|
||||
|
||||
logger.info("Weather data acquired successfully", tenant_id=tenant_id)
|
||||
@@ -251,6 +252,8 @@ class ForecastingService:
|
||||
"humidity": current_weather.get("humidity", 65.0),
|
||||
"wind_speed": current_weather.get("wind_speed", 5.0),
|
||||
"pressure": current_weather.get("pressure", 1013.0),
|
||||
'weather_description': current_weather.get('description', 'clear')
|
||||
|
||||
})
|
||||
|
||||
logger.info("Using current weather as fallback", tenant_id=tenant_id)
|
||||
@@ -286,6 +289,7 @@ class ForecastingService:
|
||||
# features.update({
|
||||
# "traffic_volume": traffic_data.get("traffic_volume", 100),
|
||||
# "pedestrian_count": traffic_data.get("pedestrian_count", 50),
|
||||
# "average_speed2" traffic_data.get('average_speed', 30.0)
|
||||
# })
|
||||
# logger.info("Traffic data acquired successfully", tenant_id=tenant_id)
|
||||
# return
|
||||
@@ -300,7 +304,8 @@ class ForecastingService:
|
||||
features.update({
|
||||
"traffic_volume": int(100 * weekend_factor),
|
||||
"pedestrian_count": int(50 * weekend_factor),
|
||||
"congestion_level": 1
|
||||
"congestion_level": 1,
|
||||
'average_speed': 30.0
|
||||
})
|
||||
|
||||
logger.warning("Using default traffic values", tenant_id=tenant_id)
|
||||
|
||||
@@ -214,7 +214,7 @@ class PredictionService:
|
||||
return False
|
||||
|
||||
def _prepare_prophet_features(self, features: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""Convert features to Prophet-compatible DataFrame - FIXED TO MATCH TRAINING"""
|
||||
"""Convert features to Prophet-compatible DataFrame - COMPLETE FEATURE MATCHING"""
|
||||
|
||||
try:
|
||||
# Create base DataFrame with required 'ds' column
|
||||
@@ -222,27 +222,25 @@ class PredictionService:
|
||||
'ds': [pd.to_datetime(features['date'])]
|
||||
})
|
||||
|
||||
# Add numeric features with safe conversion
|
||||
numeric_features = [
|
||||
'temperature', 'precipitation', 'humidity', 'wind_speed',
|
||||
'traffic_volume', 'pedestrian_count', 'pressure'
|
||||
]
|
||||
# ✅ FIX: Add ALL traffic features that training service uses
|
||||
# Core traffic features
|
||||
df['traffic_volume'] = float(features.get('traffic_volume', 100.0))
|
||||
df['pedestrian_count'] = float(features.get('pedestrian_count', 50.0))
|
||||
df['congestion_level'] = float(features.get('congestion_level', 1.0))
|
||||
df['average_speed'] = float(features.get('average_speed', 30.0)) # ← MISSING FEATURE!
|
||||
|
||||
for feature in numeric_features:
|
||||
if feature in features and features[feature] is not None:
|
||||
try:
|
||||
df[feature] = float(features[feature])
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Could not convert {feature} to float: {features[feature]}")
|
||||
df[feature] = 0.0
|
||||
else:
|
||||
df[feature] = 0.0
|
||||
# Weather features
|
||||
df['temperature'] = float(features.get('temperature', 15.0))
|
||||
df['precipitation'] = float(features.get('precipitation', 0.0))
|
||||
df['humidity'] = float(features.get('humidity', 60.0))
|
||||
df['wind_speed'] = float(features.get('wind_speed', 5.0))
|
||||
df['pressure'] = float(features.get('pressure', 1013.0))
|
||||
|
||||
# Extract date information for temporal features
|
||||
forecast_date = pd.to_datetime(features['date'])
|
||||
day_of_week = forecast_date.weekday() # 0=Monday, 6=Sunday
|
||||
|
||||
# Add temporal features (MUST match training service exactly!)
|
||||
# ✅ FIX: Add ALL temporal features (must match training exactly!)
|
||||
df['day_of_week'] = int(day_of_week)
|
||||
df['day_of_month'] = int(forecast_date.day)
|
||||
df['month'] = int(forecast_date.month)
|
||||
@@ -270,8 +268,9 @@ class PredictionService:
|
||||
|
||||
# Holiday features
|
||||
df['is_holiday'] = int(features.get('is_holiday', False))
|
||||
df['is_school_holiday'] = int(features.get('is_school_holiday', False))
|
||||
|
||||
# Month-based features
|
||||
# Month-based features (match training)
|
||||
df['is_january'] = int(forecast_date.month == 1)
|
||||
df['is_february'] = int(forecast_date.month == 2)
|
||||
df['is_march'] = int(forecast_date.month == 3)
|
||||
@@ -285,121 +284,125 @@ class PredictionService:
|
||||
df['is_november'] = int(forecast_date.month == 11)
|
||||
df['is_december'] = int(forecast_date.month == 12)
|
||||
|
||||
# Additional features that might be in training data
|
||||
# Special day features
|
||||
df['is_month_start'] = int(forecast_date.day <= 3)
|
||||
df['is_month_end'] = int(forecast_date.day >= 28)
|
||||
df['is_quarter_start'] = int(forecast_date.month in [1, 4, 7, 10] and forecast_date.day <= 7)
|
||||
df['is_quarter_end'] = int(forecast_date.month in [3, 6, 9, 12] and forecast_date.day >= 25)
|
||||
|
||||
# Business context features
|
||||
df['is_school_holiday'] = int(self._is_school_holiday(forecast_date))
|
||||
df['is_payday_period'] = int((forecast_date.day <= 5) or (forecast_date.day >= 25))
|
||||
|
||||
# Working day features
|
||||
df['is_working_day'] = int(day_of_week < 5) # Monday-Friday
|
||||
df['is_peak_bakery_day'] = int(day_of_week in [4, 5, 6]) # Friday, Saturday, Sunday
|
||||
# ✅ FIX: Add ALL derived features that training service creates
|
||||
|
||||
# Seasonal demand patterns
|
||||
df['is_high_demand_month'] = int(forecast_date.month in [6, 7, 8, 12])
|
||||
df['is_warm_season'] = int(forecast_date.month in [4, 5, 6, 7, 8, 9])
|
||||
# Weather-based derived features
|
||||
df['temp_squared'] = df['temperature'].iloc[0] ** 2
|
||||
df['is_cold_day'] = int(df['temperature'].iloc[0] < 10)
|
||||
df['is_hot_day'] = int(df['temperature'].iloc[0] > 25)
|
||||
df['is_pleasant_day'] = int(10 <= df['temperature'].iloc[0] <= 25)
|
||||
df['is_rainy_day'] = int(df['precipitation'].iloc[0] > 0.1)
|
||||
df['is_very_rainy_day'] = int(df['precipitation'].iloc[0] > 5.0)
|
||||
|
||||
# Weather-based derived features (if weather data available)
|
||||
if 'temperature' in df.columns:
|
||||
temp = df['temperature'].iloc[0]
|
||||
df['temp_squared'] = temp ** 2 # ✅ FIX: Added temp_squared
|
||||
df['is_pleasant_day'] = int(18 <= temp <= 25)
|
||||
df['temp_category'] = int(self._get_temp_category(temp))
|
||||
df['is_hot_day'] = int(temp > 25)
|
||||
df['is_cold_day'] = int(temp < 10)
|
||||
# Humidity features
|
||||
df['humidity_squared'] = df['humidity'].iloc[0] ** 2
|
||||
df['is_high_humidity'] = int(df['humidity'].iloc[0] > 70)
|
||||
df['is_low_humidity'] = int(df['humidity'].iloc[0] < 40)
|
||||
|
||||
if 'precipitation' in df.columns:
|
||||
precip = df['precipitation'].iloc[0]
|
||||
df['is_rainy_day'] = int(precip > 0.1)
|
||||
df['is_heavy_rain'] = int(precip > 10.0)
|
||||
df['rain_intensity'] = int(self._get_rain_intensity(precip))
|
||||
# Pressure features
|
||||
df['pressure_squared'] = df['pressure'].iloc[0] ** 2
|
||||
df['is_high_pressure'] = int(df['pressure'].iloc[0] > 1020)
|
||||
df['is_low_pressure'] = int(df['pressure'].iloc[0] < 1000)
|
||||
|
||||
# Traffic-based features
|
||||
if 'traffic_volume' in df.columns and df['traffic_volume'].iloc[0] > 0:
|
||||
# Wind features
|
||||
df['wind_squared'] = df['wind_speed'].iloc[0] ** 2
|
||||
df['is_windy'] = int(df['wind_speed'].iloc[0] > 15)
|
||||
df['is_calm'] = int(df['wind_speed'].iloc[0] < 5)
|
||||
|
||||
# Precipitation features
|
||||
df['precip_squared'] = df['precipitation'].iloc[0] ** 2
|
||||
df['precip_log'] = float(np.log1p(df['precipitation'].iloc[0]))
|
||||
|
||||
# ✅ FIX: Add ALL traffic-based derived features
|
||||
if df['traffic_volume'].iloc[0] > 0:
|
||||
traffic = df['traffic_volume'].iloc[0]
|
||||
df['high_traffic'] = int(traffic > 150) # Assumption based on typical values
|
||||
df['high_traffic'] = int(traffic > 150)
|
||||
df['low_traffic'] = int(traffic < 50)
|
||||
df['traffic_normalized'] = float((traffic - 100) / 50) # Simple normalization
|
||||
df['congestion_level'] = int(min(5, max(1, traffic // 50)))
|
||||
df['traffic_normalized'] = float((traffic - 100) / 50)
|
||||
df['traffic_squared'] = traffic ** 2
|
||||
df['traffic_log'] = float(np.log1p(traffic)) # log(1+traffic) to handle zeros
|
||||
df['traffic_log'] = float(np.log1p(traffic))
|
||||
else:
|
||||
df['high_traffic'] = 0
|
||||
df['low_traffic'] = 0
|
||||
df['low_traffic'] = 0
|
||||
df['traffic_normalized'] = 0.0
|
||||
df['traffic_squared'] = 0.0
|
||||
df['traffic_log'] = 0.0
|
||||
df['congestion_level'] = 1
|
||||
|
||||
# Interaction features (common in training)
|
||||
if 'is_weekend' in df.columns and 'temperature' in df.columns:
|
||||
df['weekend_temp_interaction'] = df['is_weekend'].iloc[0] * df['temperature'].iloc[0]
|
||||
df['weekend_pleasant_weather'] = df['is_weekend'].iloc[0] * df.get('is_pleasant_day', pd.Series([0])).iloc[0]
|
||||
# ✅ FIX: Add pedestrian-based features
|
||||
pedestrians = df['pedestrian_count'].iloc[0]
|
||||
df['high_pedestrian_count'] = int(pedestrians > 100)
|
||||
df['low_pedestrian_count'] = int(pedestrians < 25)
|
||||
df['pedestrian_normalized'] = float((pedestrians - 50) / 25)
|
||||
df['pedestrian_squared'] = pedestrians ** 2
|
||||
df['pedestrian_log'] = float(np.log1p(pedestrians))
|
||||
|
||||
if 'is_holiday' in df.columns and 'temperature' in df.columns:
|
||||
df['holiday_temp_interaction'] = df['is_holiday'].iloc[0] * df['temperature'].iloc[0]
|
||||
# ✅ FIX: Add average_speed-based features
|
||||
avg_speed = df['average_speed'].iloc[0]
|
||||
df['high_speed'] = int(avg_speed > 40)
|
||||
df['low_speed'] = int(avg_speed < 20)
|
||||
df['speed_normalized'] = float((avg_speed - 30) / 10)
|
||||
df['speed_squared'] = avg_speed ** 2
|
||||
df['speed_log'] = float(np.log1p(avg_speed))
|
||||
|
||||
if 'season' in df.columns and 'temperature' in df.columns:
|
||||
df['season_temp_interaction'] = df['season'].iloc[0] * df['temperature'].iloc[0]
|
||||
# ✅ FIX: Add congestion-based features
|
||||
congestion = df['congestion_level'].iloc[0]
|
||||
df['high_congestion'] = int(congestion > 3)
|
||||
df['low_congestion'] = int(congestion < 2)
|
||||
df['congestion_squared'] = congestion ** 2
|
||||
|
||||
# ✅ FIX: Add more interaction features that might be in training
|
||||
if 'is_rainy_day' in df.columns and 'traffic_volume' in df.columns:
|
||||
df['rain_traffic_interaction'] = df['is_rainy_day'].iloc[0] * df['traffic_volume'].iloc[0]
|
||||
# ✅ FIX: Add ALL interaction features that training creates
|
||||
|
||||
if 'is_weekend' in df.columns and 'traffic_volume' in df.columns:
|
||||
df['weekend_traffic_interaction'] = df['is_weekend'].iloc[0] * df['traffic_volume'].iloc[0]
|
||||
# Weekend interactions
|
||||
is_weekend = df['is_weekend'].iloc[0]
|
||||
temperature = df['temperature'].iloc[0]
|
||||
df['weekend_temp_interaction'] = is_weekend * temperature
|
||||
df['weekend_pleasant_weather'] = is_weekend * df['is_pleasant_day'].iloc[0]
|
||||
df['weekend_traffic_interaction'] = is_weekend * df['traffic_volume'].iloc[0]
|
||||
|
||||
# Holiday interactions
|
||||
is_holiday = df['is_holiday'].iloc[0]
|
||||
df['holiday_temp_interaction'] = is_holiday * temperature
|
||||
df['holiday_traffic_interaction'] = is_holiday * df['traffic_volume'].iloc[0]
|
||||
|
||||
# Season interactions
|
||||
season = df['season'].iloc[0]
|
||||
df['season_temp_interaction'] = season * temperature
|
||||
df['season_traffic_interaction'] = season * df['traffic_volume'].iloc[0]
|
||||
|
||||
# Rain-traffic interactions
|
||||
is_rainy = df['is_rainy_day'].iloc[0]
|
||||
df['rain_traffic_interaction'] = is_rainy * df['traffic_volume'].iloc[0]
|
||||
df['rain_speed_interaction'] = is_rainy * df['average_speed'].iloc[0]
|
||||
|
||||
# Day-weather interactions
|
||||
if 'day_of_week' in df.columns and 'temperature' in df.columns:
|
||||
df['day_temp_interaction'] = df['day_of_week'].iloc[0] * df['temperature'].iloc[0]
|
||||
df['day_temp_interaction'] = day_of_week * temperature
|
||||
df['month_temp_interaction'] = forecast_date.month * temperature
|
||||
|
||||
if 'month' in df.columns and 'temperature' in df.columns:
|
||||
df['month_temp_interaction'] = df['month'].iloc[0] * df['temperature'].iloc[0]
|
||||
# Traffic-speed interactions
|
||||
df['traffic_speed_interaction'] = df['traffic_volume'].iloc[0] * df['average_speed'].iloc[0]
|
||||
df['pedestrian_speed_interaction'] = df['pedestrian_count'].iloc[0] * df['average_speed'].iloc[0]
|
||||
|
||||
# ✅ FIX: Add comprehensive derived features to match training
|
||||
# Congestion-related interactions
|
||||
df['congestion_temp_interaction'] = congestion * temperature
|
||||
df['congestion_weekend_interaction'] = congestion * is_weekend
|
||||
|
||||
# Humidity-based features
|
||||
if 'humidity' in df.columns:
|
||||
humidity = df['humidity'].iloc[0]
|
||||
df['humidity_squared'] = humidity ** 2
|
||||
df['is_high_humidity'] = int(humidity > 70)
|
||||
df['is_low_humidity'] = int(humidity < 40)
|
||||
|
||||
# Pressure-based features
|
||||
if 'pressure' in df.columns:
|
||||
pressure = df['pressure'].iloc[0]
|
||||
df['pressure_squared'] = pressure ** 2
|
||||
df['is_high_pressure'] = int(pressure > 1020)
|
||||
df['is_low_pressure'] = int(pressure < 1000)
|
||||
|
||||
# Wind-based features
|
||||
if 'wind_speed' in df.columns:
|
||||
wind = df['wind_speed'].iloc[0]
|
||||
df['wind_squared'] = wind ** 2
|
||||
df['is_windy'] = int(wind > 15)
|
||||
df['is_calm'] = int(wind < 5)
|
||||
|
||||
# Precipitation-based features (additional to basic ones)
|
||||
if 'precipitation' in df.columns:
|
||||
precip = df['precipitation'].iloc[0]
|
||||
df['precip_squared'] = precip ** 2
|
||||
df['precip_log'] = float(np.log1p(precip))
|
||||
|
||||
logger.debug("Prophet features prepared with comprehensive derived features",
|
||||
logger.debug("Complete Prophet features prepared",
|
||||
feature_count=len(df.columns),
|
||||
date=features['date'],
|
||||
season=df['season'].iloc[0],
|
||||
day_of_week=day_of_week,
|
||||
temp_squared=df.get('temp_squared', pd.Series([0])).iloc[0])
|
||||
traffic_volume=df['traffic_volume'].iloc[0],
|
||||
average_speed=df['average_speed'].iloc[0],
|
||||
pedestrian_count=df['pedestrian_count'].iloc[0])
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error preparing Prophet features: {e}")
|
||||
logger.error("Error preparing Prophet features", error=str(e))
|
||||
raise
|
||||
|
||||
def _get_season(self, month: int) -> int:
|
||||
|
||||
Reference in New Issue
Block a user