REFACTOR external service and improve websocket training

This commit is contained in:
Urtzi Alfaro
2025-10-09 14:11:02 +02:00
parent 7c72f83c51
commit 3c689b4f98
111 changed files with 13289 additions and 2374 deletions

View File

@@ -6,7 +6,7 @@ Forecasting Operations API - Business operations for forecast generation and pre
import structlog
from fastapi import APIRouter, Depends, HTTPException, status, Query, Path, Request
from typing import List, Dict, Any, Optional
from datetime import date, datetime
from datetime import date, datetime, timezone
import uuid
from app.services.forecasting_service import EnhancedForecastingService
@@ -50,6 +50,7 @@ async def generate_single_forecast(
request: ForecastRequest,
tenant_id: str = Path(..., description="Tenant ID"),
request_obj: Request = None,
current_user: dict = Depends(get_current_user_dep),
enhanced_forecasting_service: EnhancedForecastingService = Depends(get_enhanced_forecasting_service)
):
"""Generate a single product forecast"""
@@ -106,6 +107,7 @@ async def generate_multi_day_forecast(
request: ForecastRequest,
tenant_id: str = Path(..., description="Tenant ID"),
request_obj: Request = None,
current_user: dict = Depends(get_current_user_dep),
enhanced_forecasting_service: EnhancedForecastingService = Depends(get_enhanced_forecasting_service)
):
"""Generate multiple daily forecasts for the specified period"""
@@ -167,6 +169,7 @@ async def generate_batch_forecast(
request: BatchForecastRequest,
tenant_id: str = Path(..., description="Tenant ID"),
request_obj: Request = None,
current_user: dict = Depends(get_current_user_dep),
enhanced_forecasting_service: EnhancedForecastingService = Depends(get_enhanced_forecasting_service)
):
"""Generate forecasts for multiple products in batch"""
@@ -224,6 +227,7 @@ async def generate_realtime_prediction(
prediction_request: Dict[str, Any],
tenant_id: str = Path(..., description="Tenant ID"),
request_obj: Request = None,
current_user: dict = Depends(get_current_user_dep),
prediction_service: PredictionService = Depends(get_enhanced_prediction_service)
):
"""Generate real-time prediction"""
@@ -245,10 +249,12 @@ async def generate_realtime_prediction(
detail=f"Missing required fields: {missing_fields}"
)
prediction_result = await prediction_service.predict(
prediction_result = await prediction_service.predict_with_weather_forecast(
model_id=prediction_request["model_id"],
model_path=prediction_request.get("model_path", ""),
features=prediction_request["features"],
tenant_id=tenant_id,
days=prediction_request.get("days", 7),
confidence_level=prediction_request.get("confidence_level", 0.8)
)
@@ -257,15 +263,15 @@ async def generate_realtime_prediction(
logger.info("Real-time prediction generated successfully",
tenant_id=tenant_id,
prediction_value=prediction_result.get("prediction"))
days=len(prediction_result))
return {
"tenant_id": tenant_id,
"inventory_product_id": prediction_request["inventory_product_id"],
"model_id": prediction_request["model_id"],
"prediction": prediction_result.get("prediction"),
"confidence": prediction_result.get("confidence"),
"timestamp": datetime.utcnow().isoformat()
"predictions": prediction_result,
"days": len(prediction_result),
"timestamp": datetime.now(timezone.utc).isoformat()
}
except HTTPException:
@@ -295,6 +301,7 @@ async def generate_realtime_prediction(
async def generate_batch_predictions(
predictions_request: List[Dict[str, Any]],
tenant_id: str = Path(..., description="Tenant ID"),
current_user: dict = Depends(get_current_user_dep),
prediction_service: PredictionService = Depends(get_enhanced_prediction_service)
):
"""Generate batch predictions"""
@@ -304,16 +311,17 @@ async def generate_batch_predictions(
results = []
for pred_request in predictions_request:
try:
prediction_result = await prediction_service.predict(
prediction_result = await prediction_service.predict_with_weather_forecast(
model_id=pred_request["model_id"],
model_path=pred_request.get("model_path", ""),
features=pred_request["features"],
tenant_id=tenant_id,
days=pred_request.get("days", 7),
confidence_level=pred_request.get("confidence_level", 0.8)
)
results.append({
"inventory_product_id": pred_request.get("inventory_product_id"),
"prediction": prediction_result.get("prediction"),
"confidence": prediction_result.get("confidence"),
"predictions": prediction_result,
"success": True
})
except Exception as e:

View File

@@ -6,7 +6,7 @@ Business operations for "what-if" scenario testing and strategic planning
import structlog
from fastapi import APIRouter, Depends, HTTPException, status, Path, Request
from typing import List, Dict, Any
from datetime import date, datetime, timedelta
from datetime import date, datetime, timedelta, timezone
import uuid
from app.schemas.forecasts import (
@@ -65,7 +65,7 @@ async def simulate_scenario(
**PROFESSIONAL/ENTERPRISE ONLY**
"""
metrics = get_metrics_collector(request_obj)
start_time = datetime.utcnow()
start_time = datetime.now(timezone.utc)
try:
logger.info("Starting scenario simulation",
@@ -131,7 +131,7 @@ async def simulate_scenario(
)
# Calculate processing time
processing_time_ms = int((datetime.utcnow() - start_time).total_seconds() * 1000)
processing_time_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
if metrics:
metrics.increment_counter("scenario_simulations_success_total")
@@ -160,7 +160,7 @@ async def simulate_scenario(
insights=insights,
recommendations=recommendations,
risk_level=risk_level,
created_at=datetime.utcnow(),
created_at=datetime.now(timezone.utc),
processing_time_ms=processing_time_ms
)

View File

@@ -19,7 +19,7 @@ class Forecast(Base):
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
inventory_product_id = Column(UUID(as_uuid=True), nullable=False, index=True) # Reference to inventory service
product_name = Column(String(255), nullable=False, index=True) # Product name stored locally
product_name = Column(String(255), nullable=True, index=True) # Product name (optional - use inventory_product_id as reference)
location = Column(String(255), nullable=False, index=True)
# Forecast period

View File

@@ -6,7 +6,7 @@ Service-specific repository base class with forecasting utilities
from typing import Optional, List, Dict, Any, Type
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import text
from datetime import datetime, date, timedelta
from datetime import datetime, date, timedelta, timezone
import structlog
from shared.database.repository import BaseRepository
@@ -113,15 +113,15 @@ class ForecastingBaseRepository(BaseRepository):
limit: int = 100
) -> List:
"""Get recent records for a tenant"""
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
cutoff_time = datetime.now(timezone.utc) - timedelta(hours=hours)
return await self.get_by_date_range(
tenant_id, cutoff_time, datetime.utcnow(), skip, limit
tenant_id, cutoff_time, datetime.now(timezone.utc), skip, limit
)
async def cleanup_old_records(self, days_old: int = 90) -> int:
"""Clean up old forecasting records"""
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_old)
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_old)
table_name = self.model.__tablename__
# Use created_at or forecast_date for cleanup
@@ -156,9 +156,9 @@ class ForecastingBaseRepository(BaseRepository):
total_records = await self.count(filters={"tenant_id": tenant_id})
# Get recent activity (records in last 7 days)
seven_days_ago = datetime.utcnow() - timedelta(days=7)
seven_days_ago = datetime.now(timezone.utc) - timedelta(days=7)
recent_records = len(await self.get_by_date_range(
tenant_id, seven_days_ago, datetime.utcnow(), limit=1000
tenant_id, seven_days_ago, datetime.now(timezone.utc), limit=1000
))
# Get records by product if applicable

View File

@@ -6,7 +6,7 @@ Repository for forecast operations
from typing import Optional, List, Dict, Any
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, text, desc, func
from datetime import datetime, timedelta, date
from datetime import datetime, timedelta, date, timezone
import structlog
from .base import ForecastingBaseRepository
@@ -159,7 +159,7 @@ class ForecastRepository(ForecastingBaseRepository):
) -> Dict[str, Any]:
"""Get forecast accuracy metrics"""
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_back)
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
# Build base query conditions
conditions = ["tenant_id = :tenant_id", "forecast_date >= :cutoff_date"]
@@ -238,7 +238,7 @@ class ForecastRepository(ForecastingBaseRepository):
) -> Dict[str, Any]:
"""Get demand trends for a product"""
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_back)
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_back)
query_text = """
SELECT

View File

@@ -6,7 +6,7 @@ Repository for model performance metrics in forecasting service
from typing import Optional, List, Dict, Any
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import text
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import structlog
from .base import ForecastingBaseRepository
@@ -98,7 +98,7 @@ class PerformanceMetricRepository(ForecastingBaseRepository):
) -> Dict[str, Any]:
"""Get performance trends over time"""
try:
start_date = datetime.utcnow() - timedelta(days=days)
start_date = datetime.now(timezone.utc) - timedelta(days=days)
conditions = [
"tenant_id = :tenant_id",

View File

@@ -6,7 +6,7 @@ Repository for prediction batch operations
from typing import Optional, List, Dict, Any
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import text
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import structlog
from .base import ForecastingBaseRepository
@@ -81,7 +81,7 @@ class PredictionBatchRepository(ForecastingBaseRepository):
if status:
update_data["status"] = status
if status in ["completed", "failed"]:
update_data["completed_at"] = datetime.utcnow()
update_data["completed_at"] = datetime.now(timezone.utc)
if not update_data:
return await self.get_by_id(batch_id)
@@ -110,7 +110,7 @@ class PredictionBatchRepository(ForecastingBaseRepository):
try:
update_data = {
"status": "completed",
"completed_at": datetime.utcnow()
"completed_at": datetime.now(timezone.utc)
}
if processing_time_ms:
@@ -140,7 +140,7 @@ class PredictionBatchRepository(ForecastingBaseRepository):
try:
update_data = {
"status": "failed",
"completed_at": datetime.utcnow(),
"completed_at": datetime.now(timezone.utc),
"error_message": error_message
}
@@ -180,7 +180,7 @@ class PredictionBatchRepository(ForecastingBaseRepository):
update_data = {
"status": "cancelled",
"completed_at": datetime.utcnow(),
"completed_at": datetime.now(timezone.utc),
"cancelled_by": cancelled_by,
"error_message": f"Cancelled by {cancelled_by}" if cancelled_by else "Cancelled"
}
@@ -270,7 +270,7 @@ class PredictionBatchRepository(ForecastingBaseRepository):
avg_processing_times[row.status] = float(row.avg_processing_time_ms)
# Get recent activity (batches in last 7 days)
seven_days_ago = datetime.utcnow() - timedelta(days=7)
seven_days_ago = datetime.now(timezone.utc) - timedelta(days=7)
recent_query = text(f"""
SELECT COUNT(*) as count
FROM prediction_batches
@@ -315,7 +315,7 @@ class PredictionBatchRepository(ForecastingBaseRepository):
async def cleanup_old_batches(self, days_old: int = 30) -> int:
"""Clean up old completed/failed batches"""
try:
cutoff_date = datetime.utcnow() - timedelta(days=days_old)
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_old)
query_text = """
DELETE FROM prediction_batches
@@ -354,7 +354,7 @@ class PredictionBatchRepository(ForecastingBaseRepository):
if batch.completed_at:
elapsed_time_ms = int((batch.completed_at - batch.requested_at).total_seconds() * 1000)
elif batch.status in ["pending", "processing"]:
elapsed_time_ms = int((datetime.utcnow() - batch.requested_at).total_seconds() * 1000)
elapsed_time_ms = int((datetime.now(timezone.utc) - batch.requested_at).total_seconds() * 1000)
return {
"batch_id": str(batch.id),

View File

@@ -6,7 +6,7 @@ Repository for prediction cache operations
from typing import Optional, List, Dict, Any
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import text
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import structlog
import hashlib
@@ -50,7 +50,7 @@ class PredictionCacheRepository(ForecastingBaseRepository):
"""Cache a prediction result"""
try:
cache_key = self._generate_cache_key(tenant_id, inventory_product_id, location, forecast_date)
expires_at = datetime.utcnow() + timedelta(hours=expires_in_hours)
expires_at = datetime.now(timezone.utc) + timedelta(hours=expires_in_hours)
cache_data = {
"cache_key": cache_key,
@@ -102,7 +102,7 @@ class PredictionCacheRepository(ForecastingBaseRepository):
return None
# Check if cache entry has expired
if cache_entry.expires_at < datetime.utcnow():
if cache_entry.expires_at < datetime.now(timezone.utc):
logger.debug("Cache expired", cache_key=cache_key)
await self.delete(cache_entry.id)
return None
@@ -172,7 +172,7 @@ class PredictionCacheRepository(ForecastingBaseRepository):
WHERE expires_at < :now
"""
result = await self.session.execute(text(query_text), {"now": datetime.utcnow()})
result = await self.session.execute(text(query_text), {"now": datetime.now(timezone.utc)})
deleted_count = result.rowcount
logger.info("Cleaned up expired cache entries",
@@ -209,7 +209,7 @@ class PredictionCacheRepository(ForecastingBaseRepository):
{base_filter}
""")
params["now"] = datetime.utcnow()
params["now"] = datetime.now(timezone.utc)
result = await self.session.execute(stats_query, params)
row = result.fetchone()

View File

@@ -33,13 +33,13 @@ class DataClient:
async def fetch_weather_forecast(
self,
tenant_id: str,
days: str,
days: int = 7,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> List[Dict[str, Any]]:
"""
Fetch weather data for forecats
All the error handling and retry logic is now in the base client!
Fetch weather forecast data
Uses new v2.0 optimized endpoint via shared external client
"""
try:
weather_data = await self.external_client.get_weather_forecast(

View File

@@ -4,8 +4,9 @@ Main forecasting service that uses the repository pattern for data access
"""
import structlog
import uuid
from typing import Dict, List, Any, Optional
from datetime import datetime, date, timedelta
from datetime import datetime, date, timedelta, timezone
from sqlalchemy.ext.asyncio import AsyncSession
from app.ml.predictor import BakeryForecaster
@@ -138,29 +139,80 @@ class EnhancedForecastingService:
filters=filters)
return forecast_list
except Exception as e:
logger.error("Failed to get tenant forecasts",
logger.error("Failed to get tenant forecasts",
tenant_id=tenant_id,
error=str(e))
raise
async def list_forecasts(self, tenant_id: str, inventory_product_id: str = None,
start_date: date = None, end_date: date = None,
limit: int = 100, offset: int = 0) -> List[Dict]:
"""Alias for get_tenant_forecasts for API compatibility"""
return await self.get_tenant_forecasts(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
start_date=start_date,
end_date=end_date,
skip=offset,
limit=limit
)
async def get_forecast_by_id(self, forecast_id: str) -> Optional[Dict]:
"""Get forecast by ID"""
try:
# Implementation would use repository pattern
return None
async with self.database_manager.get_background_session() as session:
repos = await self._init_repositories(session)
forecast = await repos['forecast'].get(forecast_id)
if not forecast:
return None
return {
"id": str(forecast.id),
"tenant_id": str(forecast.tenant_id),
"inventory_product_id": str(forecast.inventory_product_id),
"location": forecast.location,
"forecast_date": forecast.forecast_date.isoformat(),
"predicted_demand": float(forecast.predicted_demand),
"confidence_lower": float(forecast.confidence_lower),
"confidence_upper": float(forecast.confidence_upper),
"confidence_level": float(forecast.confidence_level),
"model_id": forecast.model_id,
"model_version": forecast.model_version,
"algorithm": forecast.algorithm
}
except Exception as e:
logger.error("Failed to get forecast by ID", error=str(e))
raise
async def delete_forecast(self, forecast_id: str) -> bool:
"""Delete forecast"""
async def get_forecast(self, tenant_id: str, forecast_id: uuid.UUID) -> Optional[Dict]:
"""Get forecast by ID with tenant validation"""
forecast = await self.get_forecast_by_id(str(forecast_id))
if forecast and forecast["tenant_id"] == tenant_id:
return forecast
return None
async def delete_forecast(self, tenant_id: str, forecast_id: uuid.UUID) -> bool:
"""Delete forecast with tenant validation"""
try:
# Implementation would use repository pattern
return True
async with self.database_manager.get_background_session() as session:
repos = await self._init_repositories(session)
# First verify it belongs to the tenant
forecast = await repos['forecast'].get(str(forecast_id))
if not forecast or str(forecast.tenant_id) != tenant_id:
return False
# Delete it
await repos['forecast'].delete(str(forecast_id))
await session.commit()
logger.info("Forecast deleted", tenant_id=tenant_id, forecast_id=forecast_id)
return True
except Exception as e:
logger.error("Failed to delete forecast", error=str(e))
logger.error("Failed to delete forecast", error=str(e), tenant_id=tenant_id)
return False
@@ -237,7 +289,7 @@ class EnhancedForecastingService:
"""
Generate forecast using repository pattern with caching.
"""
start_time = datetime.utcnow()
start_time = datetime.now(timezone.utc)
try:
logger.info("Generating enhanced forecast",
@@ -310,7 +362,7 @@ class EnhancedForecastingService:
"weather_precipitation": features.get('precipitation'),
"weather_description": features.get('weather_description'),
"traffic_volume": features.get('traffic_volume'),
"processing_time_ms": int((datetime.utcnow() - start_time).total_seconds() * 1000),
"processing_time_ms": int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000),
"features_used": features
}
@@ -338,7 +390,7 @@ class EnhancedForecastingService:
return self._create_forecast_response_from_model(forecast)
except Exception as e:
processing_time = int((datetime.utcnow() - start_time).total_seconds() * 1000)
processing_time = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.error("Error generating enhanced forecast",
error=str(e),
tenant_id=tenant_id,
@@ -354,7 +406,7 @@ class EnhancedForecastingService:
"""
Generate multiple daily forecasts for the specified period.
"""
start_time = datetime.utcnow()
start_time = datetime.now(timezone.utc)
forecasts = []
try:
@@ -364,6 +416,26 @@ class EnhancedForecastingService:
forecast_days=request.forecast_days,
start_date=request.forecast_date.isoformat())
# Fetch weather forecast ONCE for all days to reduce API calls
weather_forecasts = await self.data_client.fetch_weather_forecast(
tenant_id=tenant_id,
days=request.forecast_days,
latitude=40.4168, # Madrid coordinates (could be parameterized per tenant)
longitude=-3.7038
)
# Create a mapping of dates to weather data for quick lookup
weather_map = {}
for weather in weather_forecasts:
weather_date = weather.get('forecast_date', '')
if isinstance(weather_date, str):
weather_date = weather_date.split('T')[0]
elif hasattr(weather_date, 'date'):
weather_date = weather_date.date().isoformat()
else:
weather_date = str(weather_date).split('T')[0]
weather_map[weather_date] = weather
# Generate a forecast for each day
for day_offset in range(request.forecast_days):
# Calculate the forecast date for this day
@@ -373,7 +445,6 @@ class EnhancedForecastingService:
current_date = parse(current_date).date()
if day_offset > 0:
from datetime import timedelta
current_date = current_date + timedelta(days=day_offset)
# Create a new request for this specific day
@@ -385,14 +456,14 @@ class EnhancedForecastingService:
confidence_level=request.confidence_level
)
# Generate forecast for this day
daily_forecast = await self.generate_forecast(tenant_id, daily_request)
# Generate forecast for this day, passing the weather data map
daily_forecast = await self.generate_forecast_with_weather_map(tenant_id, daily_request, weather_map)
forecasts.append(daily_forecast)
# Calculate summary statistics
total_demand = sum(f.predicted_demand for f in forecasts)
avg_confidence = sum(f.confidence_level for f in forecasts) / len(forecasts)
processing_time = int((datetime.utcnow() - start_time).total_seconds() * 1000)
processing_time = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
# Convert forecasts to dictionary format for the response
forecast_dicts = []
@@ -439,6 +510,124 @@ class EnhancedForecastingService:
tenant_id=tenant_id,
error=str(e))
raise
async def generate_forecast_with_weather_map(
self,
tenant_id: str,
request: ForecastRequest,
weather_map: Dict[str, Any]
) -> ForecastResponse:
"""
Generate forecast using a pre-fetched weather map to avoid multiple API calls.
"""
start_time = datetime.now(timezone.utc)
try:
logger.info("Generating enhanced forecast with weather map",
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
date=request.forecast_date.isoformat())
# Get session and initialize repositories
async with self.database_manager.get_background_session() as session:
repos = await self._init_repositories(session)
# Step 1: Check cache first
cached_prediction = await repos['cache'].get_cached_prediction(
tenant_id, request.inventory_product_id, request.location, request.forecast_date
)
if cached_prediction:
logger.debug("Using cached prediction",
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id)
return self._create_forecast_response_from_cache(cached_prediction)
# Step 2: Get model with validation
model_data = await self._get_latest_model_with_fallback(tenant_id, request.inventory_product_id)
if not model_data:
raise ValueError(f"No valid model available for product: {request.inventory_product_id}")
# Step 3: Prepare features with fallbacks, using the weather map
features = await self._prepare_forecast_features_with_fallbacks_and_weather_map(tenant_id, request, weather_map)
# Step 4: Generate prediction
prediction_result = await self.prediction_service.predict(
model_id=model_data['model_id'],
model_path=model_data['model_path'],
features=features,
confidence_level=request.confidence_level
)
# Step 5: Apply business rules
adjusted_prediction = self._apply_business_rules(
prediction_result, request, features
)
# Step 6: Save forecast using repository
# Convert forecast_date to datetime if it's a string
forecast_datetime = request.forecast_date
if isinstance(forecast_datetime, str):
from dateutil.parser import parse
forecast_datetime = parse(forecast_datetime)
forecast_data = {
"tenant_id": tenant_id,
"inventory_product_id": request.inventory_product_id,
"product_name": None, # Field is now nullable, use inventory_product_id as reference
"location": request.location,
"forecast_date": forecast_datetime,
"predicted_demand": adjusted_prediction['prediction'],
"confidence_lower": adjusted_prediction.get('lower_bound', adjusted_prediction['prediction'] * 0.8),
"confidence_upper": adjusted_prediction.get('upper_bound', adjusted_prediction['prediction'] * 1.2),
"confidence_level": request.confidence_level,
"model_id": model_data['model_id'],
"model_version": model_data.get('version', '1.0'),
"algorithm": model_data.get('algorithm', 'prophet'),
"business_type": features.get('business_type', 'individual'),
"is_holiday": features.get('is_holiday', False),
"is_weekend": features.get('is_weekend', False),
"day_of_week": features.get('day_of_week', 0),
"weather_temperature": features.get('temperature'),
"weather_precipitation": features.get('precipitation'),
"weather_description": features.get('weather_description'),
"traffic_volume": features.get('traffic_volume'),
"processing_time_ms": int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000),
"features_used": features
}
forecast = await repos['forecast'].create_forecast(forecast_data)
# Step 7: Cache the prediction
await repos['cache'].cache_prediction(
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
location=request.location,
forecast_date=forecast_datetime,
predicted_demand=adjusted_prediction['prediction'],
confidence_lower=adjusted_prediction.get('lower_bound', adjusted_prediction['prediction'] * 0.8),
confidence_upper=adjusted_prediction.get('upper_bound', adjusted_prediction['prediction'] * 1.2),
model_id=model_data['model_id'],
expires_in_hours=24
)
logger.info("Enhanced forecast generated successfully",
forecast_id=forecast.id,
tenant_id=tenant_id,
prediction=adjusted_prediction['prediction'])
return self._create_forecast_response_from_model(forecast)
except Exception as e:
processing_time = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.error("Error generating enhanced forecast",
error=str(e),
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
processing_time=processing_time)
raise
async def get_forecast_history(
self,
@@ -498,7 +687,7 @@ class EnhancedForecastingService:
"batch_analytics": batch_stats,
"cache_performance": cache_stats,
"performance_trends": performance_trends,
"generated_at": datetime.utcnow().isoformat()
"generated_at": datetime.now(timezone.utc).isoformat()
}
except Exception as e:
@@ -568,6 +757,10 @@ class EnhancedForecastingService:
is_holiday=False,
is_weekend=cache_entry.forecast_date.weekday() >= 5,
day_of_week=cache_entry.forecast_date.weekday(),
weather_temperature=None, # Not stored in cache
weather_precipitation=None, # Not stored in cache
weather_description=None, # Not stored in cache
traffic_volume=None, # Not stored in cache
created_at=cache_entry.created_at,
processing_time_ms=0, # From cache
features_used={}
@@ -649,8 +842,8 @@ class EnhancedForecastingService:
return None
async def _prepare_forecast_features_with_fallbacks(
self,
tenant_id: str,
self,
tenant_id: str,
request: ForecastRequest
) -> Dict[str, Any]:
"""Prepare features with comprehensive fallbacks"""
@@ -665,23 +858,137 @@ class EnhancedForecastingService:
"season": self._get_season(request.forecast_date.month),
"is_holiday": self._is_spanish_holiday(request.forecast_date),
}
# Add weather features (simplified)
features.update({
"temperature": 20.0, # Default values
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
# Add traffic features (simplified)
weekend_factor = 0.7 if features["is_weekend"] else 1.0
features.update({
"traffic_volume": int(100 * weekend_factor),
"pedestrian_count": int(50 * weekend_factor),
})
# Fetch REAL weather data from external service
try:
# Get weather forecast for next 7 days (covers most forecast requests)
weather_forecasts = await self.data_client.fetch_weather_forecast(
tenant_id=tenant_id,
days=7,
latitude=40.4168, # Madrid coordinates (could be parameterized per tenant)
longitude=-3.7038
)
# Find weather for the specific forecast date
forecast_date_str = request.forecast_date.isoformat().split('T')[0]
weather_for_date = None
for weather in weather_forecasts:
# Extract date from forecast_date field
weather_date = weather.get('forecast_date', '')
if isinstance(weather_date, str):
weather_date = weather_date.split('T')[0]
elif hasattr(weather_date, 'isoformat'):
weather_date = weather_date.date().isoformat()
else:
weather_date = str(weather_date).split('T')[0]
if weather_date == forecast_date_str:
weather_for_date = weather
break
if weather_for_date:
logger.info("Using REAL weather data from external service",
date=forecast_date_str,
temp=weather_for_date.get('temperature'),
precipitation=weather_for_date.get('precipitation'))
features.update({
"temperature": weather_for_date.get('temperature', 20.0),
"precipitation": weather_for_date.get('precipitation', 0.0),
"humidity": weather_for_date.get('humidity', 65.0),
"wind_speed": weather_for_date.get('wind_speed', 5.0),
"pressure": weather_for_date.get('pressure', 1013.0),
"weather_description": weather_for_date.get('description'),
})
else:
logger.warning("No weather data for specific date, using defaults",
date=forecast_date_str,
forecasts_count=len(weather_forecasts))
features.update({
"temperature": 20.0,
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
except Exception as e:
logger.error("Failed to fetch weather data, using defaults",
error=str(e),
date=request.forecast_date.isoformat())
# Fallback to defaults on error
features.update({
"temperature": 20.0,
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
# NOTE: Traffic features are NOT included in predictions
# Reason: We only have historical and real-time traffic data, not forecasts
# The model learns traffic patterns during training (using historical data)
# and applies those learned patterns via day_of_week, is_weekend, holidays
# Including fake/estimated traffic values would mislead the model
# See: TRAFFIC_DATA_ANALYSIS.md for full explanation
return features
async def _prepare_forecast_features_with_fallbacks_and_weather_map(
self,
tenant_id: str,
request: ForecastRequest,
weather_map: Dict[str, Any]
) -> Dict[str, Any]:
"""Prepare features with comprehensive fallbacks using a pre-fetched weather map"""
features = {
"date": request.forecast_date.isoformat(),
"day_of_week": request.forecast_date.weekday(),
"is_weekend": request.forecast_date.weekday() >= 5,
"day_of_month": request.forecast_date.day,
"month": request.forecast_date.month,
"quarter": (request.forecast_date.month - 1) // 3 + 1,
"week_of_year": request.forecast_date.isocalendar().week,
"season": self._get_season(request.forecast_date.month),
"is_holiday": self._is_spanish_holiday(request.forecast_date),
}
# Use the pre-fetched weather data from the weather map to avoid additional API calls
forecast_date_str = request.forecast_date.isoformat().split('T')[0]
weather_for_date = weather_map.get(forecast_date_str)
if weather_for_date:
logger.info("Using REAL weather data from external service via weather map",
date=forecast_date_str,
temp=weather_for_date.get('temperature'),
precipitation=weather_for_date.get('precipitation'))
features.update({
"temperature": weather_for_date.get('temperature', 20.0),
"precipitation": weather_for_date.get('precipitation', 0.0),
"humidity": weather_for_date.get('humidity', 65.0),
"wind_speed": weather_for_date.get('wind_speed', 5.0),
"pressure": weather_for_date.get('pressure', 1013.0),
"weather_description": weather_for_date.get('description'),
})
else:
logger.warning("No weather data for specific date in weather map, using defaults",
date=forecast_date_str)
features.update({
"temperature": 20.0,
"precipitation": 0.0,
"humidity": 65.0,
"wind_speed": 5.0,
"pressure": 1013.0,
})
# NOTE: Traffic features are NOT included in predictions
# Reason: We only have historical and real-time traffic data, not forecasts
# The model learns traffic patterns during training (using historical data)
# and applies those learned patterns via day_of_week, is_weekend, holidays
# Including fake/estimated traffic values would mislead the model
# See: TRAFFIC_DATA_ANALYSIS.md for full explanation
return features
def _get_season(self, month: int) -> int:
@@ -695,9 +1002,9 @@ class EnhancedForecastingService:
else:
return 4 # Autumn
def _is_spanish_holiday(self, date: datetime) -> bool:
def _is_spanish_holiday(self, date_obj: date) -> bool:
"""Check if a date is a major Spanish holiday"""
month_day = (date.month, date.day)
month_day = (date_obj.month, date_obj.day)
spanish_holidays = [
(1, 1), (1, 6), (5, 1), (8, 15), (10, 12),
(11, 1), (12, 6), (12, 8), (12, 25)
@@ -754,4 +1061,4 @@ class EnhancedForecastingService:
# Legacy compatibility alias
ForecastingService = EnhancedForecastingService
ForecastingService = EnhancedForecastingService

View File

@@ -138,7 +138,7 @@ async def publish_forecasts_deleted_event(tenant_id: str, deletion_stats: Dict[s
message={
"event_type": "tenant_forecasts_deleted",
"tenant_id": tenant_id,
"timestamp": datetime.utcnow().isoformat(),
"timestamp": datetime.now(timezone.utc).isoformat(),
"deletion_stats": deletion_stats
}
)

View File

@@ -164,7 +164,170 @@ class PredictionService:
except Exception:
pass # Don't fail on metrics errors
raise
async def predict_with_weather_forecast(
self,
model_id: str,
model_path: str,
features: Dict[str, Any],
tenant_id: str,
days: int = 7,
confidence_level: float = 0.8
) -> List[Dict[str, float]]:
"""
Generate predictions enriched with real weather forecast data
This method:
1. Loads the trained ML model
2. Fetches real weather forecast from external service
3. Enriches prediction features with actual forecast data
4. Generates weather-aware predictions
Args:
model_id: ID of the trained model
model_path: Path to model file
features: Base features for prediction
tenant_id: Tenant ID for weather forecast
days: Number of days to forecast
confidence_level: Confidence level for predictions
Returns:
List of predictions with weather-aware adjustments
"""
from app.services.data_client import data_client
start_time = datetime.now()
try:
logger.info("Generating weather-aware predictions",
model_id=model_id,
days=days)
# Step 1: Load ML model
model = await self._load_model(model_id, model_path)
if not model:
raise ValueError(f"Model {model_id} not found")
# Step 2: Fetch real weather forecast
latitude = features.get('latitude', 40.4168)
longitude = features.get('longitude', -3.7038)
weather_forecast = await data_client.fetch_weather_forecast(
tenant_id=tenant_id,
days=days,
latitude=latitude,
longitude=longitude
)
logger.info(f"Fetched weather forecast for {len(weather_forecast)} days",
tenant_id=tenant_id)
# Step 3: Generate predictions for each day with weather data
predictions = []
for day_offset in range(days):
# Get weather for this specific day
day_weather = weather_forecast[day_offset] if day_offset < len(weather_forecast) else {}
# Enrich features with actual weather forecast
enriched_features = features.copy()
enriched_features.update({
'temperature': day_weather.get('temperature', features.get('temperature', 20.0)),
'precipitation': day_weather.get('precipitation', features.get('precipitation', 0.0)),
'humidity': day_weather.get('humidity', features.get('humidity', 60.0)),
'wind_speed': day_weather.get('wind_speed', features.get('wind_speed', 10.0)),
'pressure': day_weather.get('pressure', features.get('pressure', 1013.0)),
'weather_description': day_weather.get('description', 'Clear')
})
# Prepare Prophet dataframe with weather features
prophet_df = self._prepare_prophet_features(enriched_features)
# Generate prediction for this day
forecast = model.predict(prophet_df)
prediction_value = float(forecast['yhat'].iloc[0])
lower_bound = float(forecast['yhat_lower'].iloc[0])
upper_bound = float(forecast['yhat_upper'].iloc[0])
# Apply weather-based adjustments (business rules)
adjusted_prediction = self._apply_weather_adjustments(
prediction_value,
day_weather,
features.get('product_category', 'general')
)
predictions.append({
"date": enriched_features['date'],
"prediction": max(0, adjusted_prediction),
"lower_bound": max(0, lower_bound),
"upper_bound": max(0, upper_bound),
"confidence_level": confidence_level,
"weather": {
"temperature": enriched_features['temperature'],
"precipitation": enriched_features['precipitation'],
"description": enriched_features['weather_description']
}
})
processing_time = (datetime.now() - start_time).total_seconds()
logger.info("Weather-aware predictions generated",
model_id=model_id,
days=len(predictions),
processing_time=processing_time)
return predictions
except Exception as e:
logger.error("Error generating weather-aware predictions",
error=str(e),
model_id=model_id)
raise
def _apply_weather_adjustments(
self,
base_prediction: float,
weather: Dict[str, Any],
product_category: str
) -> float:
"""
Apply business rules based on weather conditions
Adjusts predictions based on real weather forecast
"""
adjusted = base_prediction
temp = weather.get('temperature', 20.0)
precip = weather.get('precipitation', 0.0)
# Temperature-based adjustments
if product_category == 'ice_cream':
if temp > 30:
adjusted *= 1.4 # +40% for very hot days
elif temp > 25:
adjusted *= 1.2 # +20% for hot days
elif temp < 15:
adjusted *= 0.7 # -30% for cold days
elif product_category == 'bread':
if temp > 30:
adjusted *= 0.9 # -10% for very hot days
elif temp < 10:
adjusted *= 1.1 # +10% for cold days
elif product_category == 'coffee':
if temp < 15:
adjusted *= 1.2 # +20% for cold days
elif precip > 5:
adjusted *= 1.15 # +15% for rainy days
# Precipitation-based adjustments
if precip > 10: # Heavy rain
if product_category in ['pastry', 'coffee']:
adjusted *= 1.2 # People stay indoors, buy comfort food
return adjusted
async def _load_model(self, model_id: str, model_path: str):
"""Load model from file with improved validation and error handling"""

View File

@@ -0,0 +1,32 @@
"""make product_name nullable
Revision ID: a1b2c3d4e5f6
Revises: 706c5b559062
Create Date: 2025-10-09 04:55:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'a1b2c3d4e5f6'
down_revision: Union[str, None] = '706c5b559062'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Make product_name nullable since we use inventory_product_id as the primary reference
op.alter_column('forecasts', 'product_name',
existing_type=sa.VARCHAR(length=255),
nullable=True)
def downgrade() -> None:
# Revert to not null (requires data to be populated first)
op.alter_column('forecasts', 'product_name',
existing_type=sa.VARCHAR(length=255),
nullable=False)