Files
bakery-ia/shared/clients/data_client.py
2025-07-29 15:08:55 +02:00

399 lines
14 KiB
Python

# shared/clients/data_client.py
"""
Data Service Client
Handles all API calls to the data service
"""
import httpx
import structlog
from typing import Dict, Any, Optional, List, Union
from .base_service_client import BaseServiceClient
from shared.config.base import BaseServiceSettings
logger = structlog.get_logger()
class DataServiceClient(BaseServiceClient):
"""Client for communicating with the data service"""
def __init__(self, config: BaseServiceSettings, calling_service_name: str = "unknown"):
super().__init__(calling_service_name, config)
def get_service_base_path(self) -> str:
return "/api/v1"
# ================================================================
# SALES DATA (with advanced pagination support)
# ================================================================
async def get_sales_data(
self,
tenant_id: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
product_id: Optional[str] = None,
aggregation: str = "daily"
) -> Optional[List[Dict[str, Any]]]:
"""Get sales data for a date range"""
params = {"aggregation": aggregation}
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
if product_id:
params["product_id"] = product_id
result = await self.get("sales", tenant_id=tenant_id, params=params)
return result.get("sales", []) if result else None
async def get_all_sales_data(
self,
tenant_id: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None,
product_id: Optional[str] = None,
aggregation: str = "daily",
page_size: int = 5000,
max_pages: int = 100
) -> List[Dict[str, Any]]:
"""
Get ALL sales data using pagination (equivalent to original fetch_sales_data)
Retrieves all records without pagination limits
"""
params = {"aggregation": aggregation}
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
if product_id:
params["product_id"] = product_id
# Use the inherited paginated request method
try:
all_records = await self.get_paginated(
"sales",
tenant_id=tenant_id,
params=params,
page_size=page_size,
max_pages=max_pages,
timeout=2000.0 # Match original timeout
)
logger.info(f"Successfully fetched {len(all_records)} total sales records via gateway",
tenant_id=tenant_id)
return all_records
except AttributeError as e:
# Fallback: implement pagination directly if inheritance isn't working
logger.warning(f"Using fallback pagination due to: {e}")
return await self._fallback_paginated_sales(tenant_id, params, page_size, max_pages)
async def _fallback_paginated_sales(
self,
tenant_id: str,
base_params: Dict[str, Any],
page_size: int = 5000,
max_pages: int = 100
) -> List[Dict[str, Any]]:
"""
Fallback pagination implementation for sales data
This replicates your original pagination logic directly
"""
all_records = []
page = 0
logger.info(f"Starting fallback paginated request for sales data",
tenant_id=tenant_id, page_size=page_size)
while page < max_pages:
# Prepare pagination parameters
params = base_params.copy()
params.update({
"limit": page_size,
"offset": page * page_size
})
logger.info(f"Fetching sales data page {page + 1} (offset: {page * page_size})",
tenant_id=tenant_id)
# Make request using the base client's _make_request method
result = await self._make_request(
"GET",
"sales",
tenant_id=tenant_id,
params=params,
timeout=2000.0
)
if result is None:
logger.error(f"Failed to fetch page {page + 1}", tenant_id=tenant_id)
break
# Handle different response formats (from your original code)
if isinstance(result, list):
# Direct list response (no pagination metadata)
records = result
logger.info(f"Retrieved {len(records)} records from page {page + 1} (direct list)")
if len(records) == 0:
logger.info("No records in response, pagination complete")
break
elif len(records) < page_size:
# Got fewer than requested, this is the last page
all_records.extend(records)
logger.info(f"Final page: retrieved {len(records)} records, total: {len(all_records)}")
break
else:
# Got full page, there might be more
all_records.extend(records)
logger.info(f"Full page retrieved: {len(records)} records, continuing to next page")
elif isinstance(result, dict):
# Paginated response format
records = result.get('records', result.get('data', []))
total_available = result.get('total', 0)
logger.info(f"Retrieved {len(records)} records from page {page + 1} (paginated response)")
if not records:
logger.info("No more records found in paginated response")
break
all_records.extend(records)
# Check if we've got all available records
if len(all_records) >= total_available:
logger.info(f"Retrieved all available records: {len(all_records)}/{total_available}")
break
else:
logger.warning(f"Unexpected response format: {type(result)}")
break
page += 1
logger.info(f"Fallback pagination complete: fetched {len(all_records)} total records",
tenant_id=tenant_id, pages_fetched=page)
return all_records
async def upload_sales_data(
self,
tenant_id: str,
sales_data: List[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
"""Upload sales data"""
data = {"sales": sales_data}
return await self.post("sales", data=data, tenant_id=tenant_id)
# ================================================================
# WEATHER DATA
# ================================================================
async def get_weather_historical(
self,
tenant_id: str,
start_date: str,
end_date: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> Optional[List[Dict[str, Any]]]:
"""
Get weather data for a date range and location
Uses POST request as per original implementation
"""
# Prepare request payload with proper date handling
payload = {
"start_date": start_date, # Already in ISO format from calling code
"end_date": end_date, # Already in ISO format from calling code
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038
}
logger.info(f"Weather request payload: {payload}", tenant_id=tenant_id)
# Use POST request with extended timeout
result = await self._make_request(
"POST",
"weather/historical",
tenant_id=tenant_id,
data=payload,
timeout=2000.0 # Match original timeout
)
if result:
logger.info(f"Successfully fetched {len(result)} weather records")
return result
else:
logger.error("Failed to fetch weather data")
return []
async def get_weather_forecast(
self,
tenant_id: str,
days: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> Optional[List[Dict[str, Any]]]:
"""
Get weather data for a date range and location
Uses POST request as per original implementation
"""
# Prepare request payload with proper date handling
payload = {
"days": days, # Already in ISO format from calling code
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038
}
logger.info(f"Weather request payload: {payload}", tenant_id=tenant_id)
# Use POST request with extended timeout
result = await self._make_request(
"POST",
"weather/historical",
tenant_id=tenant_id,
data=payload,
timeout=2000.0 # Match original timeout
)
if result:
logger.info(f"Successfully fetched {len(result)} weather forecast for {days}")
return result
else:
logger.error("Failed to fetch weather data")
return []
# ================================================================
# TRAFFIC DATA
# ================================================================
async def get_traffic_data(
self,
tenant_id: str,
start_date: str,
end_date: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> Optional[List[Dict[str, Any]]]:
"""
Get traffic data for a date range and location
Uses POST request with extended timeout for Madrid traffic data processing
"""
# Prepare request payload
payload = {
"start_date": start_date, # Already in ISO format from calling code
"end_date": end_date, # Already in ISO format from calling code
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038
}
logger.info(f"Traffic request payload: {payload}", tenant_id=tenant_id)
# Madrid traffic data can take 5-10 minutes to download and process
traffic_timeout = httpx.Timeout(
connect=30.0, # Connection timeout
read=600.0, # Read timeout: 10 minutes (was 30s)
write=30.0, # Write timeout
pool=30.0 # Pool timeout
)
# Use POST request with extended timeout
result = await self._make_request(
"POST",
"traffic/historical",
tenant_id=tenant_id,
data=payload,
timeout=traffic_timeout
)
if result:
logger.info(f"Successfully fetched {len(result)} traffic records")
return result
else:
logger.error("Failed to fetch traffic data")
return []
# ================================================================
# PRODUCTS
# ================================================================
async def get_products(self, tenant_id: str) -> Optional[List[Dict[str, Any]]]:
"""Get all products for a tenant"""
result = await self.get("products", tenant_id=tenant_id)
return result.get("products", []) if result else None
async def get_product(self, tenant_id: str, product_id: str) -> Optional[Dict[str, Any]]:
"""Get a specific product"""
return await self.get(f"products/{product_id}", tenant_id=tenant_id)
async def create_product(
self,
tenant_id: str,
name: str,
category: str,
price: float,
**kwargs
) -> Optional[Dict[str, Any]]:
"""Create a new product"""
data = {
"name": name,
"category": category,
"price": price,
**kwargs
}
return await self.post("products", data=data, tenant_id=tenant_id)
async def update_product(
self,
tenant_id: str,
product_id: str,
**updates
) -> Optional[Dict[str, Any]]:
"""Update a product"""
return await self.put(f"products/{product_id}", data=updates, tenant_id=tenant_id)
# ================================================================
# STORES & LOCATIONS
# ================================================================
async def get_stores(self, tenant_id: str) -> Optional[List[Dict[str, Any]]]:
"""Get all stores for a tenant"""
result = await self.get("stores", tenant_id=tenant_id)
return result.get("stores", []) if result else None
async def get_store(self, tenant_id: str, store_id: str) -> Optional[Dict[str, Any]]:
"""Get a specific store"""
return await self.get(f"stores/{store_id}", tenant_id=tenant_id)
# ================================================================
# DATA VALIDATION & HEALTH
# ================================================================
async def validate_data_quality(
self,
tenant_id: str,
start_date: str,
end_date: str
) -> Optional[Dict[str, Any]]:
"""Validate data quality for a date range"""
params = {
"start_date": start_date,
"end_date": end_date
}
return await self.get("validation", tenant_id=tenant_id, params=params)
async def get_data_statistics(
self,
tenant_id: str,
start_date: Optional[str] = None,
end_date: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""Get data statistics for a tenant"""
params = {}
if start_date:
params["start_date"] = start_date
if end_date:
params["end_date"] = end_date
return await self.get("statistics", tenant_id=tenant_id, params=params)