REFACTOR - Database logic

This commit is contained in:
Urtzi Alfaro
2025-08-08 09:08:41 +02:00
parent 0154365bfc
commit 488bb3ef93
113 changed files with 22842 additions and 6503 deletions

View File

@@ -0,0 +1,27 @@
"""
Forecasting Service Layer
Business logic services for demand forecasting and prediction
"""
from .forecasting_service import ForecastingService, EnhancedForecastingService
from .prediction_service import PredictionService
from .model_client import ModelClient
from .data_client import DataClient
from .messaging import (
publish_forecast_generated,
publish_batch_forecast_completed,
publish_forecast_alert,
ForecastingStatusPublisher
)
__all__ = [
"ForecastingService",
"EnhancedForecastingService",
"PredictionService",
"ModelClient",
"DataClient",
"publish_forecast_generated",
"publish_batch_forecast_completed",
"publish_forecast_alert",
"ForecastingStatusPublisher"
]

File diff suppressed because it is too large Load Diff

View File

@@ -149,4 +149,67 @@ async def publish_forecasts_deleted_event(tenant_id: str, deletion_stats: Dict[s
}
)
except Exception as e:
logger.error("Failed to publish forecasts deletion event", error=str(e))
logger.error("Failed to publish forecasts deletion event", error=str(e))
# Additional publishing functions for compatibility
async def publish_forecast_generated(data: dict) -> bool:
"""Publish forecast generated event"""
try:
if rabbitmq_client:
await rabbitmq_client.publish_event(
exchange="forecasting_events",
routing_key="forecast.generated",
message=data
)
return True
except Exception as e:
logger.error("Failed to publish forecast generated event", error=str(e))
return False
async def publish_batch_forecast_completed(data: dict) -> bool:
"""Publish batch forecast completed event"""
try:
if rabbitmq_client:
await rabbitmq_client.publish_event(
exchange="forecasting_events",
routing_key="forecast.batch.completed",
message=data
)
return True
except Exception as e:
logger.error("Failed to publish batch forecast event", error=str(e))
return False
async def publish_forecast_alert(data: dict) -> bool:
"""Publish forecast alert event"""
try:
if rabbitmq_client:
await rabbitmq_client.publish_event(
exchange="forecasting_events",
routing_key="forecast.alert",
message=data
)
return True
except Exception as e:
logger.error("Failed to publish forecast alert event", error=str(e))
return False
# Publisher class for compatibility
class ForecastingStatusPublisher:
"""Publisher for forecasting status events"""
async def publish_status(self, status: str, data: dict) -> bool:
"""Publish forecasting status"""
try:
if rabbitmq_client:
await rabbitmq_client.publish_event(
exchange="forecasting_events",
routing_key=f"forecast.status.{status}",
message=data
)
return True
except Exception as e:
logger.error(f"Failed to publish {status} status", error=str(e))
return False

View File

@@ -9,17 +9,22 @@ from typing import Dict, Any, List, Optional
# Import shared clients - no more code duplication!
from shared.clients import get_service_clients, get_training_client, get_data_client
from shared.database.base import create_database_manager
from app.core.config import settings
logger = structlog.get_logger()
class ModelClient:
"""
Client for managing models in forecasting service
Client for managing models in forecasting service with dependency injection
Shows how to call multiple services cleanly
"""
def __init__(self):
def __init__(self, database_manager=None):
self.database_manager = database_manager or create_database_manager(
settings.DATABASE_URL, "forecasting-service"
)
# Option 1: Get all clients at once
self.clients = get_service_clients(settings, "forecasting")
@@ -114,6 +119,36 @@ class ModelClient:
logger.error(f"Error selecting best model: {e}", tenant_id=tenant_id)
return None
async def get_any_model_for_tenant(
self,
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""
Get any available model for a tenant, used as fallback when specific product models aren't found
"""
try:
# First try to get any active models for this tenant
models = await self.get_available_models(tenant_id)
if models:
# Return the most recently trained model
sorted_models = sorted(models, key=lambda x: x.get('created_at', ''), reverse=True)
best_model = sorted_models[0]
logger.info("Found fallback model for tenant",
tenant_id=tenant_id,
model_id=best_model.get('id', 'unknown'),
product=best_model.get('product_name', 'unknown'))
return best_model
logger.warning("No fallback models available for tenant", tenant_id=tenant_id)
return None
except Exception as e:
logger.error("Error getting fallback model for tenant",
tenant_id=tenant_id,
error=str(e))
return None
async def validate_model_data_compatibility(
self,
tenant_id: str,

View File

@@ -19,20 +19,50 @@ import joblib
from app.core.config import settings
from shared.monitoring.metrics import MetricsCollector
from shared.database.base import create_database_manager
logger = structlog.get_logger()
metrics = MetricsCollector("forecasting-service")
class PredictionService:
"""
Service for loading ML models and generating predictions
Service for loading ML models and generating predictions with dependency injection
Interfaces with trained Prophet models from the training service
"""
def __init__(self):
def __init__(self, database_manager=None):
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
self.model_cache = {}
self.cache_ttl = 3600 # 1 hour cache
async def validate_prediction_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
"""Validate prediction request"""
try:
required_fields = ["product_name", "model_id", "features"]
missing_fields = [field for field in required_fields if field not in request]
if missing_fields:
return {
"is_valid": False,
"errors": [f"Missing required fields: {missing_fields}"],
"validation_passed": False
}
return {
"is_valid": True,
"errors": [],
"validation_passed": True,
"validated_fields": list(request.keys())
}
except Exception as e:
logger.error("Validation error", error=str(e))
return {
"is_valid": False,
"errors": [str(e)],
"validation_passed": False
}
async def predict(self, model_id: str, model_path: str, features: Dict[str, Any],
confidence_level: float = 0.8) -> Dict[str, float]:
"""Generate prediction using trained model"""
@@ -74,10 +104,37 @@ class PredictionService:
# Record metrics
processing_time = (datetime.now() - start_time).total_seconds()
# Record metrics with proper type conversion
# Record metrics with proper registration and error handling
try:
metrics.register_histogram("prediction_processing_time_seconds", float(processing_time))
metrics.increment_counter("predictions_served_total")
# Register metrics if not already registered
if "prediction_processing_time" not in metrics._histograms:
metrics.register_histogram(
"prediction_processing_time",
"Time taken to process predictions",
labels=['service', 'model_type']
)
if "predictions_served_total" not in metrics._counters:
try:
metrics.register_counter(
"predictions_served_total",
"Total number of predictions served",
labels=['service', 'status']
)
except Exception as reg_error:
# Metric might already exist in global registry
logger.debug("Counter already exists in registry", error=str(reg_error))
# Now record the metrics
metrics.observe_histogram(
"prediction_processing_time",
processing_time,
labels={'service': 'forecasting-service', 'model_type': 'prophet'}
)
metrics.increment_counter(
"predictions_served_total",
labels={'service': 'forecasting-service', 'status': 'success'}
)
except Exception as metrics_error:
# Log metrics error but don't fail the prediction
logger.warning("Failed to record metrics", error=str(metrics_error))
@@ -93,7 +150,19 @@ class PredictionService:
logger.error("Error generating prediction",
error=str(e),
model_id=model_id)
metrics.increment_counter("prediction_errors_total")
try:
if "prediction_errors_total" not in metrics._counters:
metrics.register_counter(
"prediction_errors_total",
"Total number of prediction errors",
labels=['service', 'error_type']
)
metrics.increment_counter(
"prediction_errors_total",
labels={'service': 'forecasting-service', 'error_type': 'prediction_failed'}
)
except Exception:
pass # Don't fail on metrics errors
raise
async def _load_model(self, model_id: str, model_path: str):
@@ -268,139 +337,149 @@ class PredictionService:
df['is_autumn'] = int(df['season'].iloc[0] == 4)
df['is_winter'] = int(df['season'].iloc[0] == 1)
# Holiday features
df['is_holiday'] = int(features.get('is_holiday', False))
df['is_school_holiday'] = int(features.get('is_school_holiday', False))
# ✅ PERFORMANCE FIX: Build all features at once to avoid DataFrame fragmentation
# Month-based features (match training)
df['is_january'] = int(forecast_date.month == 1)
df['is_february'] = int(forecast_date.month == 2)
df['is_march'] = int(forecast_date.month == 3)
df['is_april'] = int(forecast_date.month == 4)
df['is_may'] = int(forecast_date.month == 5)
df['is_june'] = int(forecast_date.month == 6)
df['is_july'] = int(forecast_date.month == 7)
df['is_august'] = int(forecast_date.month == 8)
df['is_september'] = int(forecast_date.month == 9)
df['is_october'] = int(forecast_date.month == 10)
df['is_november'] = int(forecast_date.month == 11)
df['is_december'] = int(forecast_date.month == 12)
# Special day features
df['is_month_start'] = int(forecast_date.day <= 3)
df['is_month_end'] = int(forecast_date.day >= 28)
df['is_payday_period'] = int((forecast_date.day <= 5) or (forecast_date.day >= 25))
# ✅ FIX: Add ALL derived features that training service creates
# Weather-based derived features
df['temp_squared'] = df['temperature'].iloc[0] ** 2
df['is_cold_day'] = int(df['temperature'].iloc[0] < 10)
df['is_hot_day'] = int(df['temperature'].iloc[0] > 25)
df['is_pleasant_day'] = int(10 <= df['temperature'].iloc[0] <= 25)
# Humidity features
df['humidity_squared'] = df['humidity'].iloc[0] ** 2
df['is_high_humidity'] = int(df['humidity'].iloc[0] > 70)
df['is_low_humidity'] = int(df['humidity'].iloc[0] < 40)
# Pressure features
df['pressure_squared'] = df['pressure'].iloc[0] ** 2
df['is_high_pressure'] = int(df['pressure'].iloc[0] > 1020)
df['is_low_pressure'] = int(df['pressure'].iloc[0] < 1000)
# Wind features
df['wind_squared'] = df['wind_speed'].iloc[0] ** 2
df['is_windy'] = int(df['wind_speed'].iloc[0] > 15)
df['is_calm'] = int(df['wind_speed'].iloc[0] < 5)
# Precipitation features
df['precip_squared'] = df['precipitation'].iloc[0] ** 2
df['precip_log'] = float(np.log1p(df['precipitation'].iloc[0]))
df['is_rainy_day'] = int(df['precipitation'].iloc[0] > 0.1)
df['is_very_rainy_day'] = int(df['precipitation'].iloc[0] > 5.0)
df['is_heavy_rain'] = int(df['precipitation'].iloc[0] > 10)
df['rain_intensity'] = self._get_rain_intensity(df['precipitation'].iloc[0])
# ✅ FIX: Add ALL traffic-based derived features
if df['traffic_volume'].iloc[0] > 0:
traffic = df['traffic_volume'].iloc[0]
df['high_traffic'] = int(traffic > 150)
df['low_traffic'] = int(traffic < 50)
df['traffic_normalized'] = float((traffic - 100) / 50)
df['traffic_squared'] = traffic ** 2
df['traffic_log'] = float(np.log1p(traffic))
else:
df['high_traffic'] = 0
df['low_traffic'] = 0
df['traffic_normalized'] = 0.0
df['traffic_squared'] = 0.0
df['traffic_log'] = 0.0
# ✅ FIX: Add pedestrian-based features
pedestrians = df['pedestrian_count'].iloc[0]
df['high_pedestrian_count'] = int(pedestrians > 100)
df['low_pedestrian_count'] = int(pedestrians < 25)
df['pedestrian_normalized'] = float((pedestrians - 50) / 25)
df['pedestrian_squared'] = pedestrians ** 2
df['pedestrian_log'] = float(np.log1p(pedestrians))
# ✅ FIX: Add average_speed-based features
avg_speed = df['average_speed'].iloc[0]
df['high_speed'] = int(avg_speed > 40)
df['low_speed'] = int(avg_speed < 20)
df['speed_normalized'] = float((avg_speed - 30) / 10)
df['speed_squared'] = avg_speed ** 2
df['speed_log'] = float(np.log1p(avg_speed))
# ✅ FIX: Add congestion-based features
congestion = df['congestion_level'].iloc[0]
df['high_congestion'] = int(congestion > 3)
df['low_congestion'] = int(congestion < 2)
df['congestion_squared'] = congestion ** 2
# ✅ FIX: Add ALL interaction features that training creates
# Weekend interactions
is_weekend = df['is_weekend'].iloc[0]
# Extract values once to avoid repeated iloc calls
temperature = df['temperature'].iloc[0]
df['weekend_temp_interaction'] = is_weekend * temperature
df['weekend_pleasant_weather'] = is_weekend * df['is_pleasant_day'].iloc[0]
df['weekend_traffic_interaction'] = is_weekend * df['traffic_volume'].iloc[0]
# Holiday interactions
is_holiday = df['is_holiday'].iloc[0]
df['holiday_temp_interaction'] = is_holiday * temperature
df['holiday_traffic_interaction'] = is_holiday * df['traffic_volume'].iloc[0]
# Season interactions
humidity = df['humidity'].iloc[0]
pressure = df['pressure'].iloc[0]
wind_speed = df['wind_speed'].iloc[0]
precipitation = df['precipitation'].iloc[0]
traffic = df['traffic_volume'].iloc[0]
pedestrians = df['pedestrian_count'].iloc[0]
avg_speed = df['average_speed'].iloc[0]
congestion = df['congestion_level'].iloc[0]
season = df['season'].iloc[0]
df['season_temp_interaction'] = season * temperature
df['season_traffic_interaction'] = season * df['traffic_volume'].iloc[0]
is_weekend = df['is_weekend'].iloc[0]
# Rain-traffic interactions
is_rainy = df['is_rainy_day'].iloc[0]
df['rain_traffic_interaction'] = is_rainy * df['traffic_volume'].iloc[0]
df['rain_speed_interaction'] = is_rainy * df['average_speed'].iloc[0]
# Build all new features as a dictionary
new_features = {
# Holiday features
'is_holiday': int(features.get('is_holiday', False)),
'is_school_holiday': int(features.get('is_school_holiday', False)),
# Month-based features
'is_january': int(forecast_date.month == 1),
'is_february': int(forecast_date.month == 2),
'is_march': int(forecast_date.month == 3),
'is_april': int(forecast_date.month == 4),
'is_may': int(forecast_date.month == 5),
'is_june': int(forecast_date.month == 6),
'is_july': int(forecast_date.month == 7),
'is_august': int(forecast_date.month == 8),
'is_september': int(forecast_date.month == 9),
'is_october': int(forecast_date.month == 10),
'is_november': int(forecast_date.month == 11),
'is_december': int(forecast_date.month == 12),
# Special day features
'is_month_start': int(forecast_date.day <= 3),
'is_month_end': int(forecast_date.day >= 28),
'is_payday_period': int((forecast_date.day <= 5) or (forecast_date.day >= 25)),
# Weather-based derived features
'temp_squared': temperature ** 2,
'is_cold_day': int(temperature < 10),
'is_hot_day': int(temperature > 25),
'is_pleasant_day': int(10 <= temperature <= 25),
# Humidity features
'humidity_squared': humidity ** 2,
'is_high_humidity': int(humidity > 70),
'is_low_humidity': int(humidity < 40),
# Pressure features
'pressure_squared': pressure ** 2,
'is_high_pressure': int(pressure > 1020),
'is_low_pressure': int(pressure < 1000),
# Wind features
'wind_squared': wind_speed ** 2,
'is_windy': int(wind_speed > 15),
'is_calm': int(wind_speed < 5),
# Precipitation features
'precip_squared': precipitation ** 2,
'precip_log': float(np.log1p(precipitation)),
'is_rainy_day': int(precipitation > 0.1),
'is_very_rainy_day': int(precipitation > 5.0),
'is_heavy_rain': int(precipitation > 10),
'rain_intensity': self._get_rain_intensity(precipitation),
# Traffic-based features
'high_traffic': int(traffic > 150) if traffic > 0 else 0,
'low_traffic': int(traffic < 50) if traffic > 0 else 0,
'traffic_normalized': float((traffic - 100) / 50) if traffic > 0 else 0.0,
'traffic_squared': traffic ** 2,
'traffic_log': float(np.log1p(traffic)),
# Pedestrian features
'high_pedestrian_count': int(pedestrians > 100),
'low_pedestrian_count': int(pedestrians < 25),
'pedestrian_normalized': float((pedestrians - 50) / 25),
'pedestrian_squared': pedestrians ** 2,
'pedestrian_log': float(np.log1p(pedestrians)),
# Speed features
'high_speed': int(avg_speed > 40),
'low_speed': int(avg_speed < 20),
'speed_normalized': float((avg_speed - 30) / 10),
'speed_squared': avg_speed ** 2,
'speed_log': float(np.log1p(avg_speed)),
# Congestion features
'high_congestion': int(congestion > 3),
'low_congestion': int(congestion < 2),
'congestion_squared': congestion ** 2,
# Day features
'is_peak_bakery_day': int(day_of_week in [4, 5, 6]),
'is_high_demand_month': int(forecast_date.month in [6, 7, 8, 12]),
'is_warm_season': int(forecast_date.month in [4, 5, 6, 7, 8, 9])
}
# Day-weather interactions
df['day_temp_interaction'] = day_of_week * temperature
df['month_temp_interaction'] = forecast_date.month * temperature
# Calculate interaction features
is_holiday = new_features['is_holiday']
is_pleasant = new_features['is_pleasant_day']
is_rainy = new_features['is_rainy_day']
# Traffic-speed interactions
df['traffic_speed_interaction'] = df['traffic_volume'].iloc[0] * df['average_speed'].iloc[0]
df['pedestrian_speed_interaction'] = df['pedestrian_count'].iloc[0] * df['average_speed'].iloc[0]
interaction_features = {
# Weekend interactions
'weekend_temp_interaction': is_weekend * temperature,
'weekend_pleasant_weather': is_weekend * is_pleasant,
'weekend_traffic_interaction': is_weekend * traffic,
# Holiday interactions
'holiday_temp_interaction': is_holiday * temperature,
'holiday_traffic_interaction': is_holiday * traffic,
# Season interactions
'season_temp_interaction': season * temperature,
'season_traffic_interaction': season * traffic,
# Rain-traffic interactions
'rain_traffic_interaction': is_rainy * traffic,
'rain_speed_interaction': is_rainy * avg_speed,
# Day-weather interactions
'day_temp_interaction': day_of_week * temperature,
'month_temp_interaction': forecast_date.month * temperature,
# Traffic-speed interactions
'traffic_speed_interaction': traffic * avg_speed,
'pedestrian_speed_interaction': pedestrians * avg_speed,
# Congestion interactions
'congestion_temp_interaction': congestion * temperature,
'congestion_weekend_interaction': congestion * is_weekend
}
# Congestion-related interactions
df['congestion_temp_interaction'] = congestion * temperature
df['congestion_weekend_interaction'] = congestion * is_weekend
# Combine all features
all_new_features = {**new_features, **interaction_features}
# Add after the existing day-of-week features:
df['is_peak_bakery_day'] = int(day_of_week in [4, 5, 6]) # Friday, Saturday, Sunday
# Add after the month features:
df['is_high_demand_month'] = int(forecast_date.month in [6, 7, 8, 12]) # Summer and December
df['is_warm_season'] = int(forecast_date.month in [4, 5, 6, 7, 8, 9]) # Spring/summer months
# Add all features at once using pd.concat to avoid fragmentation
new_feature_df = pd.DataFrame([all_new_features])
df = pd.concat([df, new_feature_df], axis=1)
logger.debug("Complete Prophet features prepared",
feature_count=len(df.columns),