# shared/clients/data_client.py """ Data Service Client Handles all API calls to the data service """ import httpx import structlog from typing import Dict, Any, Optional, List, Union from .base_service_client import BaseServiceClient from shared.config.base import BaseServiceSettings logger = structlog.get_logger() class DataServiceClient(BaseServiceClient): """Client for communicating with the data service""" def __init__(self, config: BaseServiceSettings, calling_service_name: str = "unknown"): super().__init__(calling_service_name, config) def get_service_base_path(self) -> str: return "/api/v1" # ================================================================ # SALES DATA (with advanced pagination support) # ================================================================ async def get_sales_data( self, tenant_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None, product_id: Optional[str] = None, aggregation: str = "daily" ) -> Optional[List[Dict[str, Any]]]: """Get sales data for a date range""" params = {"aggregation": aggregation} if start_date: params["start_date"] = start_date if end_date: params["end_date"] = end_date if product_id: params["product_id"] = product_id result = await self.get("sales", tenant_id=tenant_id, params=params) return result.get("sales", []) if result else None async def get_all_sales_data( self, tenant_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None, product_id: Optional[str] = None, aggregation: str = "daily", page_size: int = 5000, max_pages: int = 100 ) -> List[Dict[str, Any]]: """ Get ALL sales data using pagination (equivalent to original fetch_sales_data) Retrieves all records without pagination limits """ params = {"aggregation": aggregation} if start_date: params["start_date"] = start_date if end_date: params["end_date"] = end_date if product_id: params["product_id"] = product_id # Use the inherited paginated request method try: all_records = await self.get_paginated( "sales", tenant_id=tenant_id, params=params, page_size=page_size, max_pages=max_pages, timeout=2000.0 # Match original timeout ) logger.info(f"Successfully fetched {len(all_records)} total sales records via gateway", tenant_id=tenant_id) return all_records except AttributeError as e: # Fallback: implement pagination directly if inheritance isn't working logger.warning(f"Using fallback pagination due to: {e}") return await self._fallback_paginated_sales(tenant_id, params, page_size, max_pages) async def _fallback_paginated_sales( self, tenant_id: str, base_params: Dict[str, Any], page_size: int = 5000, max_pages: int = 100 ) -> List[Dict[str, Any]]: """ Fallback pagination implementation for sales data This replicates your original pagination logic directly """ all_records = [] page = 0 logger.info(f"Starting fallback paginated request for sales data", tenant_id=tenant_id, page_size=page_size) while page < max_pages: # Prepare pagination parameters params = base_params.copy() params.update({ "limit": page_size, "offset": page * page_size }) logger.info(f"Fetching sales data page {page + 1} (offset: {page * page_size})", tenant_id=tenant_id) # Make request using the base client's _make_request method result = await self._make_request( "GET", "sales", tenant_id=tenant_id, params=params, timeout=2000.0 ) if result is None: logger.error(f"Failed to fetch page {page + 1}", tenant_id=tenant_id) break # Handle different response formats (from your original code) if isinstance(result, list): # Direct list response (no pagination metadata) records = result logger.info(f"Retrieved {len(records)} records from page {page + 1} (direct list)") if len(records) == 0: logger.info("No records in response, pagination complete") break elif len(records) < page_size: # Got fewer than requested, this is the last page all_records.extend(records) logger.info(f"Final page: retrieved {len(records)} records, total: {len(all_records)}") break else: # Got full page, there might be more all_records.extend(records) logger.info(f"Full page retrieved: {len(records)} records, continuing to next page") elif isinstance(result, dict): # Paginated response format records = result.get('records', result.get('data', [])) total_available = result.get('total', 0) logger.info(f"Retrieved {len(records)} records from page {page + 1} (paginated response)") if not records: logger.info("No more records found in paginated response") break all_records.extend(records) # Check if we've got all available records if len(all_records) >= total_available: logger.info(f"Retrieved all available records: {len(all_records)}/{total_available}") break else: logger.warning(f"Unexpected response format: {type(result)}") break page += 1 logger.info(f"Fallback pagination complete: fetched {len(all_records)} total records", tenant_id=tenant_id, pages_fetched=page) return all_records async def upload_sales_data( self, tenant_id: str, sales_data: List[Dict[str, Any]] ) -> Optional[Dict[str, Any]]: """Upload sales data""" data = {"sales": sales_data} return await self.post("sales", data=data, tenant_id=tenant_id) # ================================================================ # WEATHER DATA # ================================================================ async def get_weather_historical( self, tenant_id: str, start_date: str, end_date: str, latitude: Optional[float] = None, longitude: Optional[float] = None ) -> Optional[List[Dict[str, Any]]]: """ Get weather data for a date range and location Uses POST request as per original implementation """ # Prepare request payload with proper date handling payload = { "start_date": start_date, # Already in ISO format from calling code "end_date": end_date, # Already in ISO format from calling code "latitude": latitude or 40.4168, # Default Madrid coordinates "longitude": longitude or -3.7038 } logger.info(f"Weather request payload: {payload}", tenant_id=tenant_id) # Use POST request with extended timeout result = await self._make_request( "POST", "weather/historical", tenant_id=tenant_id, data=payload, timeout=2000.0 # Match original timeout ) if result: logger.info(f"Successfully fetched {len(result)} weather records") return result else: logger.error("Failed to fetch weather data") return [] async def get_weather_forecast( self, tenant_id: str, days: int = 1, latitude: Optional[float] = None, longitude: Optional[float] = None ) -> Optional[List[Dict[str, Any]]]: """ Get weather forecast for location FIXED: Uses GET request with query parameters as expected by the weather API """ payload = { "latitude": latitude or 40.4168, # Default Madrid coordinates "longitude": longitude or -3.7038, "days": days } logger.info(f"Weather forecast request params: {payload}", tenant_id=tenant_id) result = await self._make_request( "POST", "weather/forecast", tenant_id=tenant_id, data=payload, timeout=200.0 ) if result: logger.info(f"Successfully fetched weather forecast for {days} days") return result else: logger.error("Failed to fetch weather forecast") return [] # ================================================================ # TRAFFIC DATA # ================================================================ async def get_traffic_data( self, tenant_id: str, start_date: str, end_date: str, latitude: Optional[float] = None, longitude: Optional[float] = None ) -> Optional[List[Dict[str, Any]]]: """ Get traffic data for a date range and location Uses POST request with extended timeout for Madrid traffic data processing """ # Prepare request payload payload = { "start_date": start_date, # Already in ISO format from calling code "end_date": end_date, # Already in ISO format from calling code "latitude": latitude or 40.4168, # Default Madrid coordinates "longitude": longitude or -3.7038 } logger.info(f"Traffic request payload: {payload}", tenant_id=tenant_id) # Madrid traffic data can take 5-10 minutes to download and process traffic_timeout = httpx.Timeout( connect=30.0, # Connection timeout read=600.0, # Read timeout: 10 minutes (was 30s) write=30.0, # Write timeout pool=30.0 # Pool timeout ) # Use POST request with extended timeout logger.info("Making traffic data request", url="traffic/historical", tenant_id=tenant_id, timeout=traffic_timeout.read) result = await self._make_request( "POST", "traffic/historical", tenant_id=tenant_id, data=payload, timeout=traffic_timeout ) if result: logger.info(f"Successfully fetched {len(result)} traffic records") return result else: logger.error("Failed to fetch traffic data - _make_request returned None") logger.error("This could be due to: network timeout, HTTP error, authentication failure, or service unavailable") return None async def get_stored_traffic_data_for_training( self, tenant_id: str, start_date: str, end_date: str, latitude: Optional[float] = None, longitude: Optional[float] = None ) -> Optional[List[Dict[str, Any]]]: """ Get stored traffic data specifically for model training/re-training This method prioritizes database-stored data over API calls """ # Prepare request payload payload = { "start_date": start_date, "end_date": end_date, "latitude": latitude or 40.4168, # Default Madrid coordinates "longitude": longitude or -3.7038, "stored_only": True # Flag to indicate we want stored data only } logger.info(f"Training traffic data request: {payload}", tenant_id=tenant_id) # Standard timeout since we're only querying the database training_timeout = httpx.Timeout( connect=30.0, read=120.0, # 2 minutes should be enough for database query write=30.0, pool=30.0 ) result = await self._make_request( "POST", "traffic/stored", # New endpoint for stored traffic data tenant_id=tenant_id, data=payload, timeout=training_timeout ) if result: logger.info(f"Successfully retrieved {len(result)} stored traffic records for training") return result else: logger.warning("No stored traffic data available for training") return None # ================================================================ # PRODUCTS # ================================================================ async def get_products(self, tenant_id: str) -> Optional[List[Dict[str, Any]]]: """Get all products for a tenant""" result = await self.get("products", tenant_id=tenant_id) return result.get("products", []) if result else None async def get_product(self, tenant_id: str, product_id: str) -> Optional[Dict[str, Any]]: """Get a specific product""" return await self.get(f"products/{product_id}", tenant_id=tenant_id) async def create_product( self, tenant_id: str, name: str, category: str, price: float, **kwargs ) -> Optional[Dict[str, Any]]: """Create a new product""" data = { "name": name, "category": category, "price": price, **kwargs } return await self.post("products", data=data, tenant_id=tenant_id) async def update_product( self, tenant_id: str, product_id: str, **updates ) -> Optional[Dict[str, Any]]: """Update a product""" return await self.put(f"products/{product_id}", data=updates, tenant_id=tenant_id) # ================================================================ # STORES & LOCATIONS # ================================================================ async def get_stores(self, tenant_id: str) -> Optional[List[Dict[str, Any]]]: """Get all stores for a tenant""" result = await self.get("stores", tenant_id=tenant_id) return result.get("stores", []) if result else None async def get_store(self, tenant_id: str, store_id: str) -> Optional[Dict[str, Any]]: """Get a specific store""" return await self.get(f"stores/{store_id}", tenant_id=tenant_id) # ================================================================ # DATA VALIDATION & HEALTH # ================================================================ async def validate_data_quality( self, tenant_id: str, start_date: str, end_date: str ) -> Optional[Dict[str, Any]]: """Validate data quality for a date range""" params = { "start_date": start_date, "end_date": end_date } return await self.get("validation", tenant_id=tenant_id, params=params) async def get_data_statistics( self, tenant_id: str, start_date: Optional[str] = None, end_date: Optional[str] = None ) -> Optional[Dict[str, Any]]: """Get data statistics for a tenant""" params = {} if start_date: params["start_date"] = start_date if end_date: params["end_date"] = end_date return await self.get("statistics", tenant_id=tenant_id, params=params)