Fix orchestrator issues

This commit is contained in:
Urtzi Alfaro
2025-11-05 22:54:14 +01:00
parent 80728eaa4e
commit 3ad093d38b
9 changed files with 422 additions and 484 deletions

View File

@@ -5,6 +5,7 @@ Main forecasting service that uses the repository pattern for data access
import structlog
import uuid
import asyncio
from typing import Dict, List, Any, Optional
from datetime import datetime, date, timedelta, timezone
from sqlalchemy.ext.asyncio import AsyncSession
@@ -63,8 +64,10 @@ class EnhancedForecastingService:
"""Generate batch forecasts using repository pattern"""
try:
# Implementation would use repository pattern to generate multiple forecasts
batch_uuid = uuid.uuid4()
return {
"batch_id": f"batch_{tenant_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
"id": str(batch_uuid), # UUID for database references
"batch_id": f"batch_{tenant_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", # Human-readable batch identifier
"tenant_id": tenant_id,
"forecasts": [],
"total_forecasts": 0,
@@ -368,7 +371,7 @@ class EnhancedForecastingService:
forecast = await repos['forecast'].create_forecast(forecast_data)
# Step 7: Cache the prediction
# Step 6: Cache the prediction
await repos['cache'].cache_prediction(
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
@@ -521,51 +524,62 @@ class EnhancedForecastingService:
Generate forecast using a pre-fetched weather map to avoid multiple API calls.
"""
start_time = datetime.now(timezone.utc)
try:
logger.info("Generating enhanced forecast with weather map",
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
date=request.forecast_date.isoformat())
# Get session and initialize repositories
# CRITICAL FIX: Get model BEFORE opening database session to prevent session blocking during HTTP calls
# This prevents holding database connections during potentially slow external API calls
logger.debug("Fetching model data before opening database session",
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id)
model_data = await self._get_latest_model_with_fallback(tenant_id, request.inventory_product_id)
if not model_data:
raise ValueError(f"No valid model available for product: {request.inventory_product_id}")
logger.debug("Model data fetched successfully",
tenant_id=tenant_id,
model_id=model_data.get('model_id'))
# Prepare features (this doesn't make external HTTP calls when using weather_map)
features = await self._prepare_forecast_features_with_fallbacks_and_weather_map(tenant_id, request, weather_map)
# Now open database session AFTER external HTTP calls are complete
async with self.database_manager.get_background_session() as session:
repos = await self._init_repositories(session)
# Step 1: Check cache first
cached_prediction = await repos['cache'].get_cached_prediction(
tenant_id, request.inventory_product_id, request.location, request.forecast_date
)
if cached_prediction:
logger.debug("Using cached prediction",
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id)
return self._create_forecast_response_from_cache(cached_prediction)
# Step 2: Get model with validation
model_data = await self._get_latest_model_with_fallback(tenant_id, request.inventory_product_id)
if not model_data:
raise ValueError(f"No valid model available for product: {request.inventory_product_id}")
# Step 3: Prepare features with fallbacks, using the weather map
features = await self._prepare_forecast_features_with_fallbacks_and_weather_map(tenant_id, request, weather_map)
# Step 4: Generate prediction
# Step 2: Model data already fetched above (before session opened)
# Step 3: Generate prediction
prediction_result = await self.prediction_service.predict(
model_id=model_data['model_id'],
model_path=model_data['model_path'],
features=features,
confidence_level=request.confidence_level
)
# Step 5: Apply business rules
# Step 4: Apply business rules
adjusted_prediction = self._apply_business_rules(
prediction_result, request, features
)
# Step 6: Save forecast using repository
# Step 5: Save forecast using repository
# Convert forecast_date to datetime if it's a string
forecast_datetime = request.forecast_date
if isinstance(forecast_datetime, str):
@@ -599,7 +613,7 @@ class EnhancedForecastingService:
forecast = await repos['forecast'].create_forecast(forecast_data)
# Step 7: Cache the prediction
# Step 6: Cache the prediction
await repos['cache'].cache_prediction(
tenant_id=tenant_id,
inventory_product_id=request.inventory_product_id,
@@ -813,32 +827,51 @@ class EnhancedForecastingService:
# Additional helper methods from original service
async def _get_latest_model_with_fallback(self, tenant_id: str, inventory_product_id: str) -> Optional[Dict[str, Any]]:
"""Get the latest trained model with fallback strategies"""
"""
Get the latest trained model with fallback strategies.
CRITICAL FIX: Added timeout protection to prevent hanging during external API calls.
This ensures we don't block indefinitely if the training service is unresponsive.
"""
try:
model_data = await self.model_client.get_best_model_for_forecasting(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id
# Add timeout protection (15 seconds) to prevent hanging
# This is shorter than the default 30s to fail fast and avoid blocking
model_data = await asyncio.wait_for(
self.model_client.get_best_model_for_forecasting(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id
),
timeout=15.0
)
if model_data:
logger.info("Found specific model for product",
inventory_product_id=inventory_product_id,
model_id=model_data.get('model_id'))
return model_data
# Fallback: Try to get any model for this tenant
fallback_model = await self.model_client.get_any_model_for_tenant(tenant_id)
# Fallback: Try to get any model for this tenant (also with timeout)
fallback_model = await asyncio.wait_for(
self.model_client.get_any_model_for_tenant(tenant_id),
timeout=15.0
)
if fallback_model:
logger.info("Using fallback model",
model_id=fallback_model.get('model_id'))
return fallback_model
logger.error("No models available for tenant", tenant_id=tenant_id)
return None
except asyncio.TimeoutError:
logger.error("Timeout fetching model data from training service",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
timeout_seconds=15)
return None
except Exception as e:
logger.error("Error getting model", error=str(e))
logger.error("Error getting model", error=str(e), tenant_id=tenant_id)
return None
async def _prepare_forecast_features_with_fallbacks(
@@ -857,6 +890,9 @@ class EnhancedForecastingService:
"week_of_year": request.forecast_date.isocalendar().week,
"season": self._get_season(request.forecast_date.month),
"is_holiday": self._is_spanish_holiday(request.forecast_date),
# CRITICAL FIX: Add tenant_id and inventory_product_id for historical feature enrichment
"tenant_id": tenant_id,
"inventory_product_id": request.inventory_product_id,
}
# Fetch REAL weather data from external service
@@ -951,6 +987,9 @@ class EnhancedForecastingService:
"week_of_year": request.forecast_date.isocalendar().week,
"season": self._get_season(request.forecast_date.month),
"is_holiday": self._is_spanish_holiday(request.forecast_date),
# CRITICAL FIX: Add tenant_id and inventory_product_id for historical feature enrichment
"tenant_id": tenant_id,
"inventory_product_id": request.inventory_product_id,
}
# Use the pre-fetched weather data from the weather map to avoid additional API calls