# services/training/app/services/data_client.py """ Training Service Data Client Migrated to use shared service clients - much simpler now! """ import structlog from typing import Dict, Any, List, Optional from datetime import datetime # Import the shared clients from shared.clients import get_sales_client, get_external_client, get_service_clients from app.core.config import settings logger = structlog.get_logger() class DataClient: """ Data client for training service Now uses the shared data service client under the hood """ def __init__(self): # Get the new specialized clients self.sales_client = get_sales_client(settings, "training") self.external_client = get_external_client(settings, "training") # Check if the new method is available for stored traffic data if hasattr(self.external_client, 'get_stored_traffic_data_for_training'): self.supports_stored_traffic_data = True else: self.supports_stored_traffic_data = False logger.warning("Stored traffic data method not available in external client") # Or alternatively, get all clients at once: # self.clients = get_service_clients(settings, "training") # Then use: self.clients.sales.get_sales_data(...) and self.clients.external.get_weather_forecast(...) async def fetch_sales_data( self, tenant_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None, product_id: Optional[str] = None, fetch_all: bool = True ) -> List[Dict[str, Any]]: """ Fetch sales data for training Args: tenant_id: Tenant identifier start_date: Start date in ISO format end_date: End date in ISO format product_id: Optional product filter fetch_all: If True, fetches ALL records using pagination (original behavior) If False, fetches limited records (standard API response) """ try: if fetch_all: # Use paginated method to get ALL records (original behavior) sales_data = await self.sales_client.get_all_sales_data( tenant_id=tenant_id, start_date=start_date, end_date=end_date, product_id=product_id, aggregation="daily", page_size=1000, # Comply with API limit max_pages=100 # Safety limit (500k records max) ) else: # Use standard method for limited results sales_data = await self.sales_client.get_sales_data( tenant_id=tenant_id, start_date=start_date, end_date=end_date, product_id=product_id, aggregation="daily" ) sales_data = sales_data or [] if sales_data: logger.info(f"Fetched {len(sales_data)} sales records", tenant_id=tenant_id, product_id=product_id, fetch_all=fetch_all) return sales_data else: logger.warning("No sales data returned", tenant_id=tenant_id) return [] except Exception as e: logger.error(f"Error fetching sales data: {e}", tenant_id=tenant_id) return [] async def fetch_weather_data( self, tenant_id: str, start_date: str, end_date: str, latitude: Optional[float] = None, longitude: Optional[float] = None ) -> List[Dict[str, Any]]: """ Fetch weather data for training All the error handling and retry logic is now in the base client! """ try: weather_data = await self.external_client.get_weather_historical( tenant_id=tenant_id, start_date=start_date, end_date=end_date, latitude=latitude, longitude=longitude ) if weather_data: logger.info(f"Fetched {len(weather_data)} weather records", tenant_id=tenant_id) return weather_data else: logger.warning("No weather data returned", tenant_id=tenant_id) return [] except Exception as e: logger.error(f"Error fetching weather data: {e}", tenant_id=tenant_id) return [] async def fetch_traffic_data( self, tenant_id: str, start_date: str, end_date: str, latitude: Optional[float] = None, longitude: Optional[float] = None ) -> List[Dict[str, Any]]: """ Fetch traffic data for training """ try: traffic_data = await self.external_client.get_traffic_data( tenant_id=tenant_id, start_date=start_date, end_date=end_date, latitude=latitude, longitude=longitude ) if traffic_data: logger.info(f"Fetched {len(traffic_data)} traffic records", tenant_id=tenant_id) return traffic_data else: logger.warning("No traffic data returned", tenant_id=tenant_id) return [] except Exception as e: logger.error(f"Error fetching traffic data: {e}", tenant_id=tenant_id) return [] async def fetch_stored_traffic_data_for_training( self, tenant_id: str, start_date: str, end_date: str, latitude: Optional[float] = None, longitude: Optional[float] = None ) -> List[Dict[str, Any]]: """ Fetch stored traffic data specifically for training/re-training This method accesses previously stored traffic data without making new API calls """ try: if self.supports_stored_traffic_data: # Use the dedicated stored traffic data method stored_traffic_data = await self.external_client.get_stored_traffic_data_for_training( tenant_id=tenant_id, start_date=start_date, end_date=end_date, latitude=latitude, longitude=longitude ) if stored_traffic_data: logger.info(f"Retrieved {len(stored_traffic_data)} stored traffic records for training", tenant_id=tenant_id) return stored_traffic_data else: logger.warning("No stored traffic data available for training", tenant_id=tenant_id) return [] else: # Fallback to regular traffic data method logger.info("Using fallback traffic data method for training") return await self.fetch_traffic_data( tenant_id=tenant_id, start_date=start_date, end_date=end_date, latitude=latitude, longitude=longitude ) except Exception as e: logger.error(f"Error fetching stored traffic data for training: {e}", tenant_id=tenant_id) return [] async def validate_data_quality( self, tenant_id: str, start_date: str, end_date: str ) -> Dict[str, Any]: """ Validate data quality before training """ try: # Note: validation_data_quality may need to be implemented in one of the new services # validation_result = await self.sales_client.validate_data_quality( # tenant_id=tenant_id, # start_date=start_date, # end_date=end_date # ) # Temporary implementation - assume data is valid for now validation_result = {"is_valid": True, "message": "Validation temporarily disabled"} if validation_result: logger.info("Data validation completed", tenant_id=tenant_id, is_valid=validation_result.get("is_valid", False)) return validation_result else: logger.warning("Data validation failed", tenant_id=tenant_id) return {"is_valid": False, "errors": ["Validation service unavailable"]} except Exception as e: logger.error(f"Error validating data: {e}", tenant_id=tenant_id) return {"is_valid": False, "errors": [str(e)]} # Global instance - same as before, but much simpler implementation data_client = DataClient()