Files
bakery-ia/services/training/app/services/data_client.py
2025-07-29 15:08:55 +02:00

180 lines
6.3 KiB
Python

# services/training/app/services/data_client.py
"""
Training Service Data Client
Migrated to use shared service clients - much simpler now!
"""
import structlog
from typing import Dict, Any, List, Optional
from datetime import datetime
# Import the shared clients
from shared.clients import get_data_client, get_service_clients
from app.core.config import settings
logger = structlog.get_logger()
class DataClient:
"""
Data client for training service
Now uses the shared data service client under the hood
"""
def __init__(self):
# Get the shared data client configured for this service
self.data_client = get_data_client(settings, "training")
# Or alternatively, get all clients at once:
# self.clients = get_service_clients(settings, "training")
# Then use: self.clients.data.get_sales_data(...)
async def fetch_sales_data(
self,
tenant_id: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
product_id: Optional[str] = None,
fetch_all: bool = True
) -> List[Dict[str, Any]]:
"""
Fetch sales data for training
Args:
tenant_id: Tenant identifier
start_date: Start date in ISO format
end_date: End date in ISO format
product_id: Optional product filter
fetch_all: If True, fetches ALL records using pagination (original behavior)
If False, fetches limited records (standard API response)
"""
try:
if fetch_all:
# Use paginated method to get ALL records (original behavior)
sales_data = await self.data_client.get_all_sales_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
product_id=product_id,
aggregation="daily",
page_size=5000, # Match original page size
max_pages=100 # Safety limit (500k records max)
)
else:
# Use standard method for limited results
sales_data = await self.data_client.get_sales_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
product_id=product_id,
aggregation="daily"
)
sales_data = sales_data or []
if sales_data:
logger.info(f"Fetched {len(sales_data)} sales records",
tenant_id=tenant_id, product_id=product_id, fetch_all=fetch_all)
return sales_data
else:
logger.warning("No sales data returned", tenant_id=tenant_id)
return []
except Exception as e:
logger.error(f"Error fetching sales data: {e}", tenant_id=tenant_id)
return []
async def fetch_weather_data(
self,
tenant_id: str,
start_date: str,
end_date: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> List[Dict[str, Any]]:
"""
Fetch weather data for training
All the error handling and retry logic is now in the base client!
"""
try:
weather_data = await self.data_client.get_weather_historical(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
latitude=latitude,
longitude=longitude
)
if weather_data:
logger.info(f"Fetched {len(weather_data)} weather records",
tenant_id=tenant_id)
return weather_data
else:
logger.warning("No weather data returned", tenant_id=tenant_id)
return []
except Exception as e:
logger.error(f"Error fetching weather data: {e}", tenant_id=tenant_id)
return []
async def fetch_traffic_data(
self,
tenant_id: str,
start_date: str,
end_date: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> List[Dict[str, Any]]:
"""
Fetch traffic data for training
"""
try:
traffic_data = await self.data_client.get_traffic_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
latitude=latitude,
longitude=longitude
)
if traffic_data:
logger.info(f"Fetched {len(traffic_data)} traffic records",
tenant_id=tenant_id)
return traffic_data
else:
logger.warning("No traffic data returned", tenant_id=tenant_id)
return []
except Exception as e:
logger.error(f"Error fetching traffic data: {e}", tenant_id=tenant_id)
return []
async def validate_data_quality(
self,
tenant_id: str,
start_date: str,
end_date: str
) -> Dict[str, Any]:
"""
Validate data quality before training
"""
try:
validation_result = await self.data_client.validate_data_quality(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date
)
if validation_result:
logger.info("Data validation completed",
tenant_id=tenant_id,
is_valid=validation_result.get("is_valid", False))
return validation_result
else:
logger.warning("Data validation failed", tenant_id=tenant_id)
return {"is_valid": False, "errors": ["Validation service unavailable"]}
except Exception as e:
logger.error(f"Error validating data: {e}", tenant_id=tenant_id)
return {"is_valid": False, "errors": [str(e)]}
# Global instance - same as before, but much simpler implementation
data_client = DataClient()