Start fixing forecast service API 3

This commit is contained in:
Urtzi Alfaro
2025-07-29 15:08:55 +02:00
parent dfb619a7b5
commit 84ed4a7a2e
14 changed files with 1607 additions and 447 deletions

View File

@@ -47,7 +47,7 @@ async def create_single_forecast(
)
# Generate forecast
forecast = await forecasting_service.generate_forecast(request, db)
forecast = await forecasting_service.generate_forecast(tenant_id, request, db)
# Convert to response model
return ForecastResponse(

View File

@@ -24,14 +24,7 @@ class ForecastRequest(BaseModel):
"""Request schema for generating forecasts"""
tenant_id: str = Field(..., description="Tenant ID")
product_name: str = Field(..., description="Product name")
location: str = Field(..., description="Location identifier")
forecast_date: date = Field(..., description="Date for which to generate forecast")
business_type: BusinessType = Field(BusinessType.INDIVIDUAL, description="Business model type")
# Optional context
include_weather: bool = Field(True, description="Include weather data in forecast")
include_traffic: bool = Field(True, description="Include traffic data in forecast")
confidence_level: float = Field(0.8, ge=0.5, le=0.95, description="Confidence level for intervals")
@validator('forecast_date')
def validate_forecast_date(cls, v):

View File

@@ -0,0 +1,64 @@
# services/training/app/services/data_client.py
"""
Training Service Data Client
Migrated to use shared service clients - much simpler now!
"""
import structlog
from typing import Dict, Any, List, Optional
from datetime import datetime
# Import the shared clients
from shared.clients import get_data_client, get_service_clients
from app.core.config import settings
logger = structlog.get_logger()
class DataClient:
"""
Data client for training service
Now uses the shared data service client under the hood
"""
def __init__(self):
# Get the shared data client configured for this service
self.data_client = get_data_client(settings, "forecasting")
# Or alternatively, get all clients at once:
# self.clients = get_service_clients(settings, "training")
# Then use: self.clients.data.get_sales_data(...)
async def fetch_weather_forecast(
self,
tenant_id: str,
days: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> List[Dict[str, Any]]:
"""
Fetch weather data for forecats
All the error handling and retry logic is now in the base client!
"""
try:
weather_data = await self.data_client.get_weather_forecast(
tenant_id=tenant_id,
days=days,
latitude=latitude,
longitude=longitude
)
if weather_data:
logger.info(f"Fetched {len(weather_data)} weather records",
tenant_id=tenant_id)
return weather_data
else:
logger.warning("No weather data returned", tenant_id=tenant_id)
return []
except Exception as e:
logger.error(f"Error fetching weather data: {e}", tenant_id=tenant_id)
return []
# Global instance - same as before, but much simpler implementation
data_client = DataClient()

View File

@@ -21,6 +21,8 @@ from app.services.prediction_service import PredictionService
from app.services.messaging import publish_forecast_completed, publish_alert_created
from app.core.config import settings
from shared.monitoring.metrics import MetricsCollector
from app.services.model_client import ModelClient
from app.services.data_client import DataClient
logger = structlog.get_logger()
metrics = MetricsCollector("forecasting-service")
@@ -33,6 +35,8 @@ class ForecastingService:
def __init__(self):
self.prediction_service = PredictionService()
self.model_client = ModelClient()
self.data_client = DataClient()
async def generate_forecast(self, request: ForecastRequest, db: AsyncSession) -> Forecast:
"""Generate a single forecast for a product"""
@@ -47,8 +51,7 @@ class ForecastingService:
# Get the latest trained model for this tenant/product
model_info = await self._get_latest_model(
request.tenant_id,
request.product_name,
request.location
request.product_name,
)
if not model_info:
@@ -66,10 +69,9 @@ class ForecastingService:
# Create forecast record
forecast = Forecast(
tenant_id=uuid.UUID(request.tenant_id),
product_name=request.product_name,
location=request.location,
forecast_date=datetime.combine(request.forecast_date, datetime.min.time()),
tenant_id=uuid.UUID(tenant_id),
product_name=product_name,
forecast_date=datetime.combine(forecast_date, datetime.min.time()),
# Prediction results
predicted_demand=prediction_result["demand"],
@@ -243,27 +245,12 @@ class ForecastingService:
logger.error("Error retrieving forecasts", error=str(e))
raise
async def _get_latest_model(self, tenant_id: str, product_name: str, location: str) -> Optional[Dict[str, Any]]:
async def _get_latest_model(self, tenant_id: str, product_name: str) -> Optional[Dict[str, Any]]:
"""Get the latest trained model for a tenant/product combination"""
try:
# Call training service to get model information
async with httpx.AsyncClient() as client:
response = await client.get(
f"{settings.TRAINING_SERVICE_URL}/tenants/{tenant_id}/models/{product_name}/active",
params={},
headers={"X-Service-Auth": settings.SERVICE_AUTH_TOKEN}
)
if response.status_code == 200:
return response.json()
elif response.status_code == 404:
logger.warning("No model found",
tenant_id=tenant_id,
product=product_name)
return None
else:
response.raise_for_status()
model_data = await self.data_client.get_best_model_for_forecasting(tenant_id, product_name)
return model_data
except Exception as e:
logger.error("Error getting latest model", error=str(e))
@@ -275,22 +262,15 @@ class ForecastingService:
features = {
"date": request.forecast_date.isoformat(),
"day_of_week": request.forecast_date.weekday(),
"is_weekend": request.forecast_date.weekday() >= 5,
"business_type": request.business_type.value
"is_weekend": request.forecast_date.weekday() >= 5
}
# Add Spanish holidays
features["is_holiday"] = await self._is_spanish_holiday(request.forecast_date)
# Add weather data if requested
if request.include_weather:
weather_data = await self._get_weather_forecast(request.forecast_date)
features.update(weather_data)
# Add traffic data if requested
if request.include_traffic:
traffic_data = await self._get_traffic_forecast(request.forecast_date, request.location)
features.update(traffic_data)
weather_data = await self._get_weather_forecast(request.tenant_id, 1)
features.update(weather_data)
return features
@@ -315,61 +295,16 @@ class ForecastingService:
logger.warning("Error checking holiday status", error=str(e))
return False
async def _get_weather_forecast(self, forecast_date: date) -> Dict[str, Any]:
async def _get_weather_forecast(self, tenant_id: str, days: str) -> Dict[str, Any]:
"""Get weather forecast for the date"""
try:
# Call data service for weather forecast
async with httpx.AsyncClient() as client:
response = await client.get(
f"{settings.DATA_SERVICE_URL}/api/v1/weather/forecast",
params={"date": forecast_date.isoformat()},
headers={"X-Service-Auth": settings.SERVICE_AUTH_TOKEN}
)
if response.status_code == 200:
weather = response.json()
return {
"temperature": weather.get("temperature"),
"precipitation": weather.get("precipitation"),
"humidity": weather.get("humidity"),
"weather_description": weather.get("description")
}
else:
return {}
weather_data = await self.data_client.fetch_weather_forecast(tenant_id, days)
return weather_data
except Exception as e:
logger.warning("Error getting weather forecast", error=str(e))
return {}
async def _get_traffic_forecast(self, forecast_date: date, location: str) -> Dict[str, Any]:
"""Get traffic forecast for the date and location"""
try:
# Call data service for traffic forecast
async with httpx.AsyncClient() as client:
response = await client.get(
f"{settings.DATA_SERVICE_URL}/api/v1/traffic/forecast",
params={
"date": forecast_date.isoformat(),
"location": location
},
headers={"X-Service-Auth": settings.SERVICE_AUTH_TOKEN}
)
if response.status_code == 200:
traffic = response.json()
return {
"traffic_volume": traffic.get("volume"),
"pedestrian_count": traffic.get("pedestrian_count")
}
else:
return {}
except Exception as e:
logger.warning("Error getting traffic forecast", error=str(e))
return {}
async def _check_and_create_alerts(self, forecast: Forecast, db: AsyncSession):
"""Check forecast and create alerts if needed"""

View File

@@ -0,0 +1,183 @@
# services/forecasting/app/services/model_client.py
"""
Forecast Service Model Client
Demonstrates calling training service to get models
"""
import structlog
from typing import Dict, Any, List, Optional
# Import shared clients - no more code duplication!
from shared.clients import get_service_clients, get_training_client, get_data_client
from app.core.config import settings
logger = structlog.get_logger()
class ModelClient:
"""
Client for managing models in forecasting service
Shows how to call multiple services cleanly
"""
def __init__(self):
# Option 1: Get all clients at once
self.clients = get_service_clients(settings, "forecasting")
# Option 2: Get specific clients
# self.training_client = get_training_client(settings, "forecasting")
# self.data_client = get_data_client(settings, "forecasting")
async def get_available_models(
self,
tenant_id: str,
model_type: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Get available trained models from training service
"""
try:
models = await self.clients.training.list_models(
tenant_id=tenant_id,
status="deployed", # Only get deployed models
model_type=model_type
)
if models:
logger.info(f"Found {len(models)} available models",
tenant_id=tenant_id, model_type=model_type)
return models
else:
logger.warning("No available models found", tenant_id=tenant_id)
return []
except Exception as e:
logger.error(f"Error fetching available models: {e}", tenant_id=tenant_id)
return []
async def get_best_model_for_forecasting(
self,
tenant_id: str,
product_id: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Get the best model for forecasting based on performance metrics
"""
try:
# Get latest model
latest_model = await self.clients.training.get_latest_model(
tenant_id=tenant_id,
model_type="forecasting"
)
if not latest_model:
logger.warning("No trained models found", tenant_id=tenant_id)
return None
# Get model metrics to validate quality
metrics = await self.clients.training.get_model_metrics(
tenant_id=tenant_id,
model_id=latest_model["id"]
)
if metrics and metrics.get("accuracy", 0) > 0.7: # 70% accuracy threshold
logger.info(f"Selected model {latest_model['id']} with accuracy {metrics.get('accuracy')}",
tenant_id=tenant_id)
return latest_model
else:
logger.warning(f"Model accuracy too low: {metrics.get('accuracy', 'unknown')}",
tenant_id=tenant_id)
return None
except Exception as e:
logger.error(f"Error selecting best model: {e}", tenant_id=tenant_id)
return None
async def validate_model_data_compatibility(
self,
tenant_id: str,
model_id: str,
forecast_start_date: str,
forecast_end_date: str
) -> Dict[str, Any]:
"""
Validate that we have sufficient data for the model to make forecasts
Demonstrates calling both training and data services
"""
try:
# Get model details from training service
model = await self.clients.training.get_model(
tenant_id=tenant_id,
model_id=model_id
)
if not model:
return {"is_valid": False, "error": "Model not found"}
# Get data statistics from data service
data_stats = await self.clients.data.get_data_statistics(
tenant_id=tenant_id,
start_date=forecast_start_date,
end_date=forecast_end_date
)
if not data_stats:
return {"is_valid": False, "error": "Could not retrieve data statistics"}
# Check if we have minimum required data points
min_required = model.get("metadata", {}).get("min_data_points", 30)
available_points = data_stats.get("total_records", 0)
is_valid = available_points >= min_required
result = {
"is_valid": is_valid,
"model_id": model_id,
"required_points": min_required,
"available_points": available_points,
"data_coverage": data_stats.get("coverage_percentage", 0)
}
if not is_valid:
result["error"] = f"Insufficient data: need {min_required}, have {available_points}"
logger.info("Model data compatibility check completed",
tenant_id=tenant_id, model_id=model_id, is_valid=is_valid)
return result
except Exception as e:
logger.error(f"Error validating model compatibility: {e}",
tenant_id=tenant_id, model_id=model_id)
return {"is_valid": False, "error": str(e)}
async def trigger_model_retraining(
self,
tenant_id: str,
include_weather: bool = True,
include_traffic: bool = False
) -> Optional[Dict[str, Any]]:
"""
Trigger a new training job if current model is outdated
"""
try:
# Create training job through training service
job = await self.clients.training.create_training_job(
tenant_id=tenant_id,
include_weather=include_weather,
include_traffic=include_traffic,
min_data_points=50 # Higher threshold for forecasting
)
if job:
logger.info(f"Training job created: {job['job_id']}", tenant_id=tenant_id)
return job
else:
logger.error("Failed to create training job", tenant_id=tenant_id)
return None
except Exception as e:
logger.error(f"Error triggering model retraining: {e}", tenant_id=tenant_id)
return None
# Global instance
model_client = ModelClient()

View File

@@ -1,81 +0,0 @@
import time
import structlog
from typing import Dict, Any
from shared.auth.jwt_handler import JWTHandler
from app.core.config import settings
logger = structlog.get_logger()
class ServiceAuthenticator:
"""Handles service-to-service authentication via gateway"""
def __init__(self):
self.jwt_handler = JWTHandler(settings.JWT_SECRET_KEY)
self._cached_token = None
self._token_expires_at = 0
async def get_service_token(self) -> str:
"""
Get a valid service token, using cache when possible
Creates JWT tokens that the gateway will accept
"""
current_time = int(time.time())
# Return cached token if still valid (with 5 min buffer)
if (self._cached_token and
self._token_expires_at > current_time + 300):
return self._cached_token
# Create new service token
token_expires_at = current_time + 3600 # 1 hour
service_payload = {
# ✅ Required fields for gateway middleware
"sub": "training-service",
"user_id": "training-service",
"email": "training-service@internal",
"type": "access", # ✅ Must be "access" for gateway
# ✅ Expiration and timing
"exp": token_expires_at,
"iat": current_time,
"iss": "training-service",
# ✅ Service identification
"service": "training",
"full_name": "Training Service",
"is_verified": True,
"is_active": True,
# ✅ Optional tenant context (can be overridden per request)
"tenant_id": None
}
try:
token = self.jwt_handler.create_access_token_from_payload(service_payload)
# Cache the token
self._cached_token = token
self._token_expires_at = token_expires_at
logger.debug("Created new service token", expires_at=token_expires_at)
return token
except Exception as e:
logger.error(f"Failed to create service token: {e}")
raise ValueError(f"Service token creation failed: {e}")
def get_request_headers(self, tenant_id: str = None) -> Dict[str, str]:
"""Get standard headers for service requests"""
headers = {
"X-Service": "training-service",
"User-Agent": "training-service/1.0.0"
}
if tenant_id:
headers["X-Tenant-ID"] = str(tenant_id)
return headers
# Global authenticator instance
service_auth = ServiceAuthenticator()

View File

@@ -1,219 +1,89 @@
import httpx
# services/training/app/services/data_client.py
"""
Training Service Data Client
Migrated to use shared service clients - much simpler now!
"""
import structlog
from typing import List, Dict, Any, Optional
from typing import Dict, Any, List, Optional
from datetime import datetime
# Import the shared clients
from shared.clients import get_data_client, get_service_clients
from app.core.config import settings
from app.core.service_auth import service_auth
logger = structlog.get_logger()
class DataServiceClient:
"""Client for fetching data through the API Gateway"""
class DataClient:
"""
Data client for training service
Now uses the shared data service client under the hood
"""
def __init__(self):
self.base_url = settings.API_GATEWAY_URL
self.timeout = 2000.0
async def fetch_sales_data(self, tenant_id: str) -> List[Dict[str, Any]]:
"""
Fetch all sales data for training (no pagination limits)
FIXED: Retrieves ALL records instead of being limited to 1000
"""
try:
# Get service token
token = await service_auth.get_service_token()
# Prepare headers
headers = service_auth.get_request_headers(tenant_id)
headers["Authorization"] = f"Bearer {token}"
all_records = []
page = 0
page_size = 5000 # Use maximum allowed by API
while True:
# Prepare query parameters for pagination
params = {
"limit": page_size,
"offset": page * page_size
}
logger.info(f"Fetching sales data page {page + 1} (offset: {page * page_size})",
tenant_id=tenant_id)
# Make GET request via gateway with pagination
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(
f"{self.base_url}/api/v1/tenants/{tenant_id}/sales",
headers=headers,
params=params
)
if response.status_code == 200:
page_data = response.json()
# Handle different response formats
if isinstance(page_data, list):
# Direct list response (no pagination metadata)
records = page_data
logger.info(f"Retrieved {len(records)} records from page {page + 1} (direct list)")
# For direct list responses, we need to check if we got the max possible
# If we got less than page_size, we're done
if len(records) == 0:
logger.info("No records in response, pagination complete")
break
elif len(records) < page_size:
# Got fewer than requested, this is the last page
all_records.extend(records)
logger.info(f"Final page: retrieved {len(records)} records, total: {len(all_records)}")
break
else:
# Got full page, there might be more
all_records.extend(records)
logger.info(f"Full page retrieved: {len(records)} records, continuing to next page")
elif isinstance(page_data, dict):
# Paginated response format
records = page_data.get('records', page_data.get('data', []))
total_available = page_data.get('total', 0)
logger.info(f"Retrieved {len(records)} records from page {page + 1} (paginated response)")
if not records:
logger.info("No more records found in paginated response")
break
all_records.extend(records)
# Check if we've got all available records
if len(all_records) >= total_available:
logger.info(f"Retrieved all available records: {len(all_records)}/{total_available}")
break
else:
logger.warning(f"Unexpected response format: {type(page_data)}")
records = []
break
page += 1
# Safety break to prevent infinite loops
if page > 100: # Max 500,000 records (100 * 5000)
logger.warning("Reached maximum page limit, stopping pagination")
break
elif response.status_code == 401:
logger.error("Authentication failed with gateway",
tenant_id=tenant_id,
response_text=response.text)
return []
elif response.status_code == 404:
logger.warning("Sales data endpoint not found",
tenant_id=tenant_id,
url=response.url)
return []
else:
logger.error(f"Gateway request failed: HTTP {response.status_code}",
tenant_id=tenant_id,
response_text=response.text)
return []
logger.info(f"Successfully fetched {len(all_records)} total sales records via gateway",
tenant_id=tenant_id)
return all_records
except httpx.TimeoutException:
logger.error("Timeout when fetching sales data via gateway",
tenant_id=tenant_id)
return []
except Exception as e:
logger.error(f"Error fetching sales data via gateway: {e}",
tenant_id=tenant_id)
return []
# Get the shared data client configured for this service
self.data_client = get_data_client(settings, "training")
async def fetch_weather_data(
self,
# Or alternatively, get all clients at once:
# self.clients = get_service_clients(settings, "training")
# Then use: self.clients.data.get_sales_data(...)
async def fetch_sales_data(
self,
tenant_id: str,
start_date: str,
end_date: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
start_date: Optional[str] = None,
end_date: Optional[str] = None,
product_id: Optional[str] = None,
fetch_all: bool = True
) -> List[Dict[str, Any]]:
"""
Fetch historical weather data for training via API Gateway using POST
Fetch sales data for training
Args:
tenant_id: Tenant identifier
start_date: Start date in ISO format
end_date: End date in ISO format
product_id: Optional product filter
fetch_all: If True, fetches ALL records using pagination (original behavior)
If False, fetches limited records (standard API response)
"""
try:
# Get service token
token = await service_auth.get_service_token()
# Prepare headers
headers = service_auth.get_request_headers(tenant_id)
headers["Authorization"] = f"Bearer {token}"
headers["Content-Type"] = "application/json"
# Prepare request payload with proper date handling
payload = {
"start_date": start_date, # Already in ISO format from calling code
"end_date": end_date, # Already in ISO format from calling code
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038
}
logger.info(f"Weather request payload: {payload}", tenant_id=tenant_id)
# Make POST request via gateway
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(
f"{self.base_url}/api/v1/tenants/{tenant_id}/weather/historical",
headers=headers,
json=payload
if fetch_all:
# Use paginated method to get ALL records (original behavior)
sales_data = await self.data_client.get_all_sales_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
product_id=product_id,
aggregation="daily",
page_size=5000, # Match original page size
max_pages=100 # Safety limit (500k records max)
)
else:
# Use standard method for limited results
sales_data = await self.data_client.get_sales_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
product_id=product_id,
aggregation="daily"
)
sales_data = sales_data or []
if sales_data:
logger.info(f"Fetched {len(sales_data)} sales records",
tenant_id=tenant_id, product_id=product_id, fetch_all=fetch_all)
return sales_data
else:
logger.warning("No sales data returned", tenant_id=tenant_id)
return []
logger.info(f"Weather data request: {response.status_code}",
tenant_id=tenant_id,
url=response.url)
if response.status_code == 200:
data = response.json()
logger.info(f"Successfully fetched {len(data)} weather records")
return data
elif response.status_code == 400:
error_details = response.text
logger.error(f"Weather API validation error (400): {error_details}")
# Try to parse the error and provide helpful info
try:
error_json = response.json()
if 'detail' in error_json:
detail = error_json['detail']
if 'End date must be after start date' in str(detail):
logger.error(f"Date range issue: start={start_date}, end={end_date}")
elif 'Date range cannot exceed 90 days' in str(detail):
logger.error(f"Date range too large: {start_date} to {end_date}")
except:
pass
return []
elif response.status_code == 401:
logger.error("Authentication failed for weather API")
return []
else:
logger.error(f"Failed to fetch weather data: {response.status_code} - {response.text}")
return []
except httpx.TimeoutException:
logger.error("Timeout when fetching weather data")
return []
except Exception as e:
logger.error(f"Error fetching weather data: {str(e)}")
logger.error(f"Error fetching sales data: {e}", tenant_id=tenant_id)
return []
async def fetch_traffic_data(
self,
async def fetch_weather_data(
self,
tenant_id: str,
start_date: str,
end_date: str,
@@ -221,65 +91,90 @@ class DataServiceClient:
longitude: Optional[float] = None
) -> List[Dict[str, Any]]:
"""
Fetch historical traffic data for training via API Gateway using POST
Fetch weather data for training
All the error handling and retry logic is now in the base client!
"""
try:
# Get service token
token = await service_auth.get_service_token()
# Prepare headers
headers = service_auth.get_request_headers(tenant_id)
headers["Authorization"] = f"Bearer {token}"
headers["Content-Type"] = "application/json"
# Prepare request payload
payload = {
"start_date": start_date, # Already in ISO format from calling code
"end_date": end_date, # Already in ISO format from calling code
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038
}
logger.info(f"Traffic request payload: {payload}", tenant_id=tenant_id)
# Madrid traffic data can take 5-10 minutes to download and process
timeout_config = httpx.Timeout(
connect=30.0, # Connection timeout
read=600.0, # Read timeout: 10 minutes (was 30s)
write=30.0, # Write timeout
pool=30.0 # Pool timeout
weather_data = await self.data_client.get_weather_historical(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
latitude=latitude,
longitude=longitude
)
# Make POST request via gateway
async with httpx.AsyncClient(timeout=timeout_config) as client:
response = await client.post(
f"{self.base_url}/api/v1/tenants/{tenant_id}/traffic/historical",
headers=headers,
json=payload
)
if weather_data:
logger.info(f"Fetched {len(weather_data)} weather records",
tenant_id=tenant_id)
return weather_data
else:
logger.warning("No weather data returned", tenant_id=tenant_id)
return []
logger.info(f"Traffic data request: {response.status_code}",
tenant_id=tenant_id,
url=response.url)
if response.status_code == 200:
data = response.json()
logger.info(f"Successfully fetched {len(data)} traffic records")
return data
elif response.status_code == 400:
error_details = response.text
logger.error(f"Traffic API validation error (400): {error_details}")
return []
elif response.status_code == 401:
logger.error("Authentication failed for traffic API")
return []
else:
logger.error(f"Failed to fetch traffic data: {response.status_code} - {response.text}")
return []
except httpx.TimeoutException:
logger.error("Timeout when fetching traffic data")
return []
except Exception as e:
logger.error(f"Error fetching traffic data: {str(e)}")
return []
logger.error(f"Error fetching weather data: {e}", tenant_id=tenant_id)
return []
async def fetch_traffic_data(
self,
tenant_id: str,
start_date: str,
end_date: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> List[Dict[str, Any]]:
"""
Fetch traffic data for training
"""
try:
traffic_data = await self.data_client.get_traffic_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
latitude=latitude,
longitude=longitude
)
if traffic_data:
logger.info(f"Fetched {len(traffic_data)} traffic records",
tenant_id=tenant_id)
return traffic_data
else:
logger.warning("No traffic data returned", tenant_id=tenant_id)
return []
except Exception as e:
logger.error(f"Error fetching traffic data: {e}", tenant_id=tenant_id)
return []
async def validate_data_quality(
self,
tenant_id: str,
start_date: str,
end_date: str
) -> Dict[str, Any]:
"""
Validate data quality before training
"""
try:
validation_result = await self.data_client.validate_data_quality(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date
)
if validation_result:
logger.info("Data validation completed",
tenant_id=tenant_id,
is_valid=validation_result.get("is_valid", False))
return validation_result
else:
logger.warning("Data validation failed", tenant_id=tenant_id)
return {"is_valid": False, "errors": ["Validation service unavailable"]}
except Exception as e:
logger.error(f"Error validating data: {e}", tenant_id=tenant_id)
return {"is_valid": False, "errors": [str(e)]}
# Global instance - same as before, but much simpler implementation
data_client = DataClient()

View File

@@ -13,7 +13,7 @@ from concurrent.futures import ThreadPoolExecutor
from datetime import timezone
import pandas as pd
from app.services.data_client import DataServiceClient
from app.services.data_client import DataClient
from app.services.date_alignment_service import DateAlignmentService, DateRange, DataSourceType, AlignedDateRange
logger = logging.getLogger(__name__)
@@ -37,7 +37,7 @@ class TrainingDataOrchestrator:
madrid_client=None,
weather_client=None,
date_alignment_service: DateAlignmentService = None):
self.data_client = DataServiceClient()
self.data_client = DataClient()
self.date_alignment_service = date_alignment_service or DateAlignmentService()
self.max_concurrent_requests = 3