REFACTOR external service and improve websocket training

This commit is contained in:
Urtzi Alfaro
2025-10-09 14:11:02 +02:00
parent 7c72f83c51
commit 3c689b4f98
111 changed files with 13289 additions and 2374 deletions

View File

@@ -4,8 +4,9 @@ Main forecasting service that uses the repository pattern for data access
"""
import structlog
import uuid
from typing import Dict, List, Any, Optional
from datetime import datetime, date, timedelta
from datetime import datetime, date, timedelta, timezone
from sqlalchemy.ext.asyncio import AsyncSession
from app.ml.predictor import BakeryForecaster
@@ -138,29 +139,80 @@ class EnhancedForecastingService:
filters=filters)
return forecast_list
except Exception as e:
logger.error("Failed to get tenant forecasts",
logger.error("Failed to get tenant forecasts",
tenant_id=tenant_id,
error=str(e))
raise
async def list_forecasts(self, tenant_id: str, inventory_product_id: str = None,
start_date: date = None, end_date: date = None,
limit: int = 100, offset: int = 0) -> List[Dict]:
"""Alias for get_tenant_forecasts for API compatibility"""
return await self.get_tenant_forecasts(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
start_date=start_date,
end_date=end_date,
skip=offset,
limit=limit
)
async def get_forecast_by_id(self, forecast_id: str) -> Optional[Dict]:
"""Get forecast by ID"""
try:
# Implementation would use repository pattern
return None
async with self.database_manager.get_background_session() as session:
repos = await self._init_repositories(session)
forecast = await repos['forecast'].get(forecast_id)
if not forecast:
return None
return {
"id": str(forecast.id),
"tenant_id": str(forecast.tenant_id),
"inventory_product_id": str(forecast.inventory_product_id),
"location": forecast.location,
"forecast_date": forecast.forecast_date.isoformat(),
"predicted_demand": float(forecast.predicted_demand),
"confidence_lower": float(forecast.confidence_lower),
"confidence_upper": float(forecast.confidence_upper),
"confidence_level": float(forecast.confidence_level),
"model_id": forecast.model_id,
"model_version": forecast.model_version,
"algorithm": forecast.algorithm
}
except Exception as e:
logger.error("Failed to get forecast by ID", error=str(e))
raise
async def delete_forecast(self, forecast_id: str) -> bool:
"""Delete forecast"""
async def get_forecast(self, tenant_id: str, forecast_id: uuid.UUID) -> Optional[Dict]:
"""Get forecast by ID with tenant validation"""
forecast = await self.get_forecast_by_id(str(forecast_id))
if forecast and forecast["tenant_id"] == tenant_id:
return forecast
return None
async def delete_forecast(self, tenant_id: str, forecast_id: uuid.UUID) -> bool:
"""Delete forecast with tenant validation"""
try:
# Implementation would use repository pattern
return True
async with self.database_manager.get_background_session() as session:
repos = await self._init_repositories(session)
# First verify it belongs to the tenant
forecast = await repos['forecast'].get(str(forecast_id))
if not forecast or str(forecast.tenant_id) != tenant_id:
return False
# Delete it
await repos['forecast'].delete(str(forecast_id))
await session.commit()
logger.info("Forecast deleted", tenant_id=tenant_id, forecast_id=forecast_id)
return True
except Exception as e:
logger.error("Failed to delete forecast", error=str(e))
logger.error("Failed to delete forecast", error=str(e), tenant_id=tenant_id)
return False
@@ -237,7 +289,7 @@ class EnhancedForecastingService:
"""
Generate forecast using repository pattern with caching.
"""
start_time = datetime.utcnow()
start_time = datetime.now(timezone.utc)
try:
logger.info("Generating enhanced forecast",
@@ -310,7 +362,7 @@ class EnhancedForecastingService:
"weather_precipitation": features.get('precipitation'),
"weather_description": features.get('weather_description'),
"traffic_volume": features.get('traffic_volume'),
"processing_time_ms": int((datetime.utcnow() - start_time).total_seconds() * 1000),
"processing_time_ms": int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000),
"features_used": features
}
@@ -338,7 +390,7 @@ class EnhancedForecastingService:
return self._create_forecast_response_from_model(forecast)
except Exception as e:
processing_time = int((datetime.utcnow() - start_time).total_seconds() * 1000)
processing_time = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.error("Error generating enhanced forecast",
error=str(e),
tenant_id=tenant_id,
@@ -354,7 +406,7 @@ class EnhancedForecastingService:
"""
Generate multiple daily forecasts for the specified period.
"""
start_time = datetime.utcnow()
start_time = datetime.now(timezone.utc)
forecasts = []
try:
@@ -364,6 +416,26 @@ class EnhancedForecastingService:
forecast_days=request.forecast_days,
start_date=request.forecast_date.isoformat())
# Fetch weather forecast ONCE for all days to reduce API calls
weather_forecasts = await self.data_client.fetch_weather_forecast(
tenant_id=tenant_id,
days=request.forecast_days,
latitude=40.4168, # Madrid coordinates (could be parameterized per tenant)
longitude=-3.7038
)
# Create a mapping of dates to weather data for quick lookup
weather_map = {}
for weather in weather_forecasts:
weather_date = weather.get('forecast_date', '')
if isinstance(weather_date, str):
weather_date = weather_date.split('T')[0]
elif hasattr(weather_date, 'date'):
weather_date = weather_date.date().isoformat()
else:
weather_date = str(weather_date).split('T')[0]
weather_map[weather_date] = weather
# Generate a forecast for each day
for day_offset in range(request.forecast_days):
# Calculate the forecast date for this day
@@ -373,7 +445,6 @@ class EnhancedForecastingService:
current_date = parse(current_date).date()
if day_offset > 0:
from datetime import timedelta
current_date = current_date + timedelta(days=day_offset)
# Create a new request for this specific day
@@ -385,14 +456,14 @@ class EnhancedForecastingService:
confidence_level=request.confidence_level
)
# Generate forecast for this day
daily_forecast = await self.generate_forecast(tenant_id, daily_request)
# Generate forecast for this day, passing the weather data map
daily_forecast = await self.generate_forecast_with_weather_map(tenant_id, daily_request, weather_map)
forecasts.append(daily_forecast)
# Calculate summary statistics
total_demand = sum(f.predicted_demand for f in forecasts)
avg_confidence = sum(f.confidence_level for f in forecasts) / len(forecasts)
processing_time = int((datetime.utcnow() - start_time).total_seconds() * 1000)
processing_time = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
# Convert forecasts to dictionary format for the response
forecast_dicts = []
@@ -439,6 +510,124 @@ class EnhancedForecastingService:
tenant_id=tenant_id,
error=str(e))
raise
async def generate_forecast_with_weather_map(
self,
tenant_id: str,
request: ForecastRequest,
weather_map: Dict[str, Any]
) -> ForecastResponse:
"""
Generate forecast using a pre-fetched weather map to avoid multiple API calls.
"""
start_time = datetime.now(timezone.utc)
try:
logger.info("Generating enhanced forecast with weather map",
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
date=request.forecast_date.isoformat())
# Get session and initialize repositories
async with self.database_manager.get_background_session() as session:
repos = await self._init_repositories(session)
# Step 1: Check cache first
cached_prediction = await repos['cache'].get_cached_prediction(
tenant_id, request.inventory_product_id, request.location, request.forecast_date
)
if cached_prediction:
logger.debug("Using cached prediction",
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id)
return self._create_forecast_response_from_cache(cached_prediction)
# Step 2: Get model with validation
model_data = await self._get_latest_model_with_fallback(tenant_id, request.inventory_product_id)
if not model_data:
raise ValueError(f"No valid model available for product: {request.inventory_product_id}")
# Step 3: Prepare features with fallbacks, using the weather map
features = await self._prepare_forecast_features_with_fallbacks_and_weather_map(tenant_id, request, weather_map)
# Step 4: Generate prediction
prediction_result = await self.prediction_service.predict(
model_id=model_data['model_id'],
model_path=model_data['model_path'],
features=features,
confidence_level=request.confidence_level
)
# Step 5: Apply business rules
adjusted_prediction = self._apply_business_rules(
prediction_result, request, features
)
# Step 6: Save forecast using repository
# Convert forecast_date to datetime if it's a string
forecast_datetime = request.forecast_date
if isinstance(forecast_datetime, str):
from dateutil.parser import parse
forecast_datetime = parse(forecast_datetime)
forecast_data = {
"tenant_id": tenant_id,
"inventory_product_id": request.inventory_product_id,
"product_name": None, # Field is now nullable, use inventory_product_id as reference
"location": request.location,
"forecast_date": forecast_datetime,
"predicted_demand": adjusted_prediction['prediction'],
"confidence_lower": adjusted_prediction.get('lower_bound', adjusted_prediction['prediction'] * 0.8),
"confidence_upper": adjusted_prediction.get('upper_bound', adjusted_prediction['prediction'] * 1.2),
"confidence_level": request.confidence_level,
"model_id": model_data['model_id'],
"model_version": model_data.get('version', '1.0'),
"algorithm": model_data.get('algorithm', 'prophet'),
"business_type": features.get('business_type', 'individual'),
"is_holiday": features.get('is_holiday', False),
"is_weekend": features.get('is_weekend', False),
"day_of_week": features.get('day_of_week', 0),
"weather_temperature": features.get('temperature'),
"weather_precipitation": features.get('precipitation'),
"weather_description": features.get('weather_description'),
"traffic_volume": features.get('traffic_volume'),
"processing_time_ms": int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000),
"features_used": features
}
forecast = await repos['forecast'].create_forecast(forecast_data)
# Step 7: Cache the prediction
await repos['cache'].cache_prediction(
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
location=request.location,
forecast_date=forecast_datetime,
predicted_demand=adjusted_prediction['prediction'],
confidence_lower=adjusted_prediction.get('lower_bound', adjusted_prediction['prediction'] * 0.8),
confidence_upper=adjusted_prediction.get('upper_bound', adjusted_prediction['prediction'] * 1.2),
model_id=model_data['model_id'],
expires_in_hours=24
)
logger.info("Enhanced forecast generated successfully",
forecast_id=forecast.id,
tenant_id=tenant_id,
prediction=adjusted_prediction['prediction'])
return self._create_forecast_response_from_model(forecast)
except Exception as e:
processing_time = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.error("Error generating enhanced forecast",
error=str(e),
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
processing_time=processing_time)
raise
async def get_forecast_history(
self,
@@ -498,7 +687,7 @@ class EnhancedForecastingService:
"batch_analytics": batch_stats,
"cache_performance": cache_stats,
"performance_trends": performance_trends,
"generated_at": datetime.utcnow().isoformat()
"generated_at": datetime.now(timezone.utc).isoformat()
}
except Exception as e:
@@ -568,6 +757,10 @@ class EnhancedForecastingService:
is_holiday=False,
is_weekend=cache_entry.forecast_date.weekday() >= 5,
day_of_week=cache_entry.forecast_date.weekday(),
weather_temperature=None, # Not stored in cache
weather_precipitation=None, # Not stored in cache
weather_description=None, # Not stored in cache
traffic_volume=None, # Not stored in cache
created_at=cache_entry.created_at,
processing_time_ms=0, # From cache
features_used={}
@@ -649,8 +842,8 @@ class EnhancedForecastingService:
return None
async def _prepare_forecast_features_with_fallbacks(
self,
tenant_id: str,
self,
tenant_id: str,
request: ForecastRequest
) -> Dict[str, Any]:
"""Prepare features with comprehensive fallbacks"""
@@ -665,23 +858,137 @@ class EnhancedForecastingService:
"season": self._get_season(request.forecast_date.month),
"is_holiday": self._is_spanish_holiday(request.forecast_date),
}
# Add weather features (simplified)
features.update({
"temperature": 20.0, # Default values
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
# Add traffic features (simplified)
weekend_factor = 0.7 if features["is_weekend"] else 1.0
features.update({
"traffic_volume": int(100 * weekend_factor),
"pedestrian_count": int(50 * weekend_factor),
})
# Fetch REAL weather data from external service
try:
# Get weather forecast for next 7 days (covers most forecast requests)
weather_forecasts = await self.data_client.fetch_weather_forecast(
tenant_id=tenant_id,
days=7,
latitude=40.4168, # Madrid coordinates (could be parameterized per tenant)
longitude=-3.7038
)
# Find weather for the specific forecast date
forecast_date_str = request.forecast_date.isoformat().split('T')[0]
weather_for_date = None
for weather in weather_forecasts:
# Extract date from forecast_date field
weather_date = weather.get('forecast_date', '')
if isinstance(weather_date, str):
weather_date = weather_date.split('T')[0]
elif hasattr(weather_date, 'isoformat'):
weather_date = weather_date.date().isoformat()
else:
weather_date = str(weather_date).split('T')[0]
if weather_date == forecast_date_str:
weather_for_date = weather
break
if weather_for_date:
logger.info("Using REAL weather data from external service",
date=forecast_date_str,
temp=weather_for_date.get('temperature'),
precipitation=weather_for_date.get('precipitation'))
features.update({
"temperature": weather_for_date.get('temperature', 20.0),
"precipitation": weather_for_date.get('precipitation', 0.0),
"humidity": weather_for_date.get('humidity', 65.0),
"wind_speed": weather_for_date.get('wind_speed', 5.0),
"pressure": weather_for_date.get('pressure', 1013.0),
"weather_description": weather_for_date.get('description'),
})
else:
logger.warning("No weather data for specific date, using defaults",
date=forecast_date_str,
forecasts_count=len(weather_forecasts))
features.update({
"temperature": 20.0,
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
except Exception as e:
logger.error("Failed to fetch weather data, using defaults",
error=str(e),
date=request.forecast_date.isoformat())
# Fallback to defaults on error
features.update({
"temperature": 20.0,
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
# NOTE: Traffic features are NOT included in predictions
# Reason: We only have historical and real-time traffic data, not forecasts
# The model learns traffic patterns during training (using historical data)
# and applies those learned patterns via day_of_week, is_weekend, holidays
# Including fake/estimated traffic values would mislead the model
# See: TRAFFIC_DATA_ANALYSIS.md for full explanation
return features
async def _prepare_forecast_features_with_fallbacks_and_weather_map(
self,
tenant_id: str,
request: ForecastRequest,
weather_map: Dict[str, Any]
) -> Dict[str, Any]:
"""Prepare features with comprehensive fallbacks using a pre-fetched weather map"""
features = {
"date": request.forecast_date.isoformat(),
"day_of_week": request.forecast_date.weekday(),
"is_weekend": request.forecast_date.weekday() >= 5,
"day_of_month": request.forecast_date.day,
"month": request.forecast_date.month,
"quarter": (request.forecast_date.month - 1) // 3 + 1,
"week_of_year": request.forecast_date.isocalendar().week,
"season": self._get_season(request.forecast_date.month),
"is_holiday": self._is_spanish_holiday(request.forecast_date),
}
# Use the pre-fetched weather data from the weather map to avoid additional API calls
forecast_date_str = request.forecast_date.isoformat().split('T')[0]
weather_for_date = weather_map.get(forecast_date_str)
if weather_for_date:
logger.info("Using REAL weather data from external service via weather map",
date=forecast_date_str,
temp=weather_for_date.get('temperature'),
precipitation=weather_for_date.get('precipitation'))
features.update({
"temperature": weather_for_date.get('temperature', 20.0),
"precipitation": weather_for_date.get('precipitation', 0.0),
"humidity": weather_for_date.get('humidity', 65.0),
"wind_speed": weather_for_date.get('wind_speed', 5.0),
"pressure": weather_for_date.get('pressure', 1013.0),
"weather_description": weather_for_date.get('description'),
})
else:
logger.warning("No weather data for specific date in weather map, using defaults",
date=forecast_date_str)
features.update({
"temperature": 20.0,
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
# NOTE: Traffic features are NOT included in predictions
# Reason: We only have historical and real-time traffic data, not forecasts
# The model learns traffic patterns during training (using historical data)
# and applies those learned patterns via day_of_week, is_weekend, holidays
# Including fake/estimated traffic values would mislead the model
# See: TRAFFIC_DATA_ANALYSIS.md for full explanation
return features
def _get_season(self, month: int) -> int:
@@ -695,9 +1002,9 @@ class EnhancedForecastingService:
else:
return 4 # Autumn
def _is_spanish_holiday(self, date: datetime) -> bool:
def _is_spanish_holiday(self, date_obj: date) -> bool:
"""Check if a date is a major Spanish holiday"""
month_day = (date.month, date.day)
month_day = (date_obj.month, date_obj.day)
spanish_holidays = [
(1, 1), (1, 6), (5, 1), (8, 15), (10, 12),
(11, 1), (12, 6), (12, 8), (12, 25)
@@ -754,4 +1061,4 @@ class EnhancedForecastingService:
# Legacy compatibility alias
ForecastingService = EnhancedForecastingService
ForecastingService = EnhancedForecastingService