237 lines
8.9 KiB
Python
237 lines
8.9 KiB
Python
# services/training/app/services/data_client.py
|
|
"""
|
|
Training Service Data Client
|
|
Migrated to use shared service clients - much simpler now!
|
|
"""
|
|
|
|
import structlog
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime
|
|
|
|
# Import the shared clients
|
|
from shared.clients import get_sales_client, get_external_client, get_service_clients
|
|
from app.core.config import settings
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
class DataClient:
|
|
"""
|
|
Data client for training service
|
|
Now uses the shared data service client under the hood
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Get the new specialized clients
|
|
self.sales_client = get_sales_client(settings, "training")
|
|
self.external_client = get_external_client(settings, "training")
|
|
|
|
# Check if the new method is available for stored traffic data
|
|
if hasattr(self.external_client, 'get_stored_traffic_data_for_training'):
|
|
self.supports_stored_traffic_data = True
|
|
else:
|
|
self.supports_stored_traffic_data = False
|
|
logger.warning("Stored traffic data method not available in external client")
|
|
|
|
# Or alternatively, get all clients at once:
|
|
# self.clients = get_service_clients(settings, "training")
|
|
# Then use: self.clients.sales.get_sales_data(...) and self.clients.external.get_weather_forecast(...)
|
|
|
|
async def fetch_sales_data(
|
|
self,
|
|
tenant_id: str,
|
|
start_date: Optional[str] = None,
|
|
end_date: Optional[str] = None,
|
|
product_id: Optional[str] = None,
|
|
fetch_all: bool = True
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Fetch sales data for training
|
|
|
|
Args:
|
|
tenant_id: Tenant identifier
|
|
start_date: Start date in ISO format
|
|
end_date: End date in ISO format
|
|
product_id: Optional product filter
|
|
fetch_all: If True, fetches ALL records using pagination (original behavior)
|
|
If False, fetches limited records (standard API response)
|
|
"""
|
|
try:
|
|
if fetch_all:
|
|
# Use paginated method to get ALL records (original behavior)
|
|
sales_data = await self.sales_client.get_all_sales_data(
|
|
tenant_id=tenant_id,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
product_id=product_id,
|
|
aggregation="daily",
|
|
page_size=1000, # Comply with API limit
|
|
max_pages=100 # Safety limit (500k records max)
|
|
)
|
|
else:
|
|
# Use standard method for limited results
|
|
sales_data = await self.sales_client.get_sales_data(
|
|
tenant_id=tenant_id,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
product_id=product_id,
|
|
aggregation="daily"
|
|
)
|
|
sales_data = sales_data or []
|
|
|
|
if sales_data:
|
|
logger.info(f"Fetched {len(sales_data)} sales records",
|
|
tenant_id=tenant_id, product_id=product_id, fetch_all=fetch_all)
|
|
return sales_data
|
|
else:
|
|
logger.warning("No sales data returned", tenant_id=tenant_id)
|
|
return []
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching sales data: {e}", tenant_id=tenant_id)
|
|
return []
|
|
|
|
async def fetch_weather_data(
|
|
self,
|
|
tenant_id: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
latitude: Optional[float] = None,
|
|
longitude: Optional[float] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Fetch weather data for training
|
|
All the error handling and retry logic is now in the base client!
|
|
"""
|
|
try:
|
|
weather_data = await self.external_client.get_weather_historical(
|
|
tenant_id=tenant_id,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
latitude=latitude,
|
|
longitude=longitude
|
|
)
|
|
|
|
if weather_data:
|
|
logger.info(f"Fetched {len(weather_data)} weather records",
|
|
tenant_id=tenant_id)
|
|
return weather_data
|
|
else:
|
|
logger.warning("No weather data returned", tenant_id=tenant_id)
|
|
return []
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching weather data: {e}", tenant_id=tenant_id)
|
|
return []
|
|
|
|
async def fetch_traffic_data(
|
|
self,
|
|
tenant_id: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
latitude: Optional[float] = None,
|
|
longitude: Optional[float] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Fetch traffic data for training
|
|
"""
|
|
try:
|
|
traffic_data = await self.external_client.get_traffic_data(
|
|
tenant_id=tenant_id,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
latitude=latitude,
|
|
longitude=longitude
|
|
)
|
|
|
|
if traffic_data:
|
|
logger.info(f"Fetched {len(traffic_data)} traffic records",
|
|
tenant_id=tenant_id)
|
|
return traffic_data
|
|
else:
|
|
logger.warning("No traffic data returned", tenant_id=tenant_id)
|
|
return []
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching traffic data: {e}", tenant_id=tenant_id)
|
|
return []
|
|
|
|
async def fetch_stored_traffic_data_for_training(
|
|
self,
|
|
tenant_id: str,
|
|
start_date: str,
|
|
end_date: str,
|
|
latitude: Optional[float] = None,
|
|
longitude: Optional[float] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Fetch stored traffic data specifically for training/re-training
|
|
This method accesses previously stored traffic data without making new API calls
|
|
"""
|
|
try:
|
|
if self.supports_stored_traffic_data:
|
|
# Use the dedicated stored traffic data method
|
|
stored_traffic_data = await self.external_client.get_stored_traffic_data_for_training(
|
|
tenant_id=tenant_id,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
latitude=latitude,
|
|
longitude=longitude
|
|
)
|
|
|
|
if stored_traffic_data:
|
|
logger.info(f"Retrieved {len(stored_traffic_data)} stored traffic records for training",
|
|
tenant_id=tenant_id)
|
|
return stored_traffic_data
|
|
else:
|
|
logger.warning("No stored traffic data available for training", tenant_id=tenant_id)
|
|
return []
|
|
else:
|
|
# Fallback to regular traffic data method
|
|
logger.info("Using fallback traffic data method for training")
|
|
return await self.fetch_traffic_data(
|
|
tenant_id=tenant_id,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
latitude=latitude,
|
|
longitude=longitude
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching stored traffic data for training: {e}", tenant_id=tenant_id)
|
|
return []
|
|
|
|
async def validate_data_quality(
|
|
self,
|
|
tenant_id: str,
|
|
start_date: str,
|
|
end_date: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Validate data quality before training
|
|
"""
|
|
try:
|
|
# Note: validation_data_quality may need to be implemented in one of the new services
|
|
# validation_result = await self.sales_client.validate_data_quality(
|
|
# tenant_id=tenant_id,
|
|
# start_date=start_date,
|
|
# end_date=end_date
|
|
# )
|
|
|
|
# Temporary implementation - assume data is valid for now
|
|
validation_result = {"is_valid": True, "message": "Validation temporarily disabled"}
|
|
|
|
if validation_result:
|
|
logger.info("Data validation completed",
|
|
tenant_id=tenant_id,
|
|
is_valid=validation_result.get("is_valid", False))
|
|
return validation_result
|
|
else:
|
|
logger.warning("Data validation failed", tenant_id=tenant_id)
|
|
return {"is_valid": False, "errors": ["Validation service unavailable"]}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error validating data: {e}", tenant_id=tenant_id)
|
|
return {"is_valid": False, "errors": [str(e)]}
|
|
|
|
# Global instance - same as before, but much simpler implementation
|
|
data_client = DataClient() |