Start fixing forecast service API 3

2025-07-29 15:08:55 +02:00
parent dfb619a7b5
commit 84ed4a7a2e
14 changed files with 1607 additions and 447 deletions
--- a/services/forecasting/app/api/forecasts.py
+++ b/services/forecasting/app/api/forecasts.py
@@ -47,7 +47,7 @@ async def create_single_forecast(
            )
        
        # Generate forecast
-        forecast = await forecasting_service.generate_forecast(request, db)
+        forecast = await forecasting_service.generate_forecast(tenant_id, request, db)
        
        # Convert to response model
        return ForecastResponse(
--- a/services/forecasting/app/schemas/forecasts.py
+++ b/services/forecasting/app/schemas/forecasts.py
@@ -24,14 +24,7 @@ class ForecastRequest(BaseModel):
    """Request schema for generating forecasts"""
    tenant_id: str = Field(..., description="Tenant ID")
    product_name: str = Field(..., description="Product name")
-    location: str = Field(..., description="Location identifier")
    forecast_date: date = Field(..., description="Date for which to generate forecast")
-    business_type: BusinessType = Field(BusinessType.INDIVIDUAL, description="Business model type")
-    
-    # Optional context
-    include_weather: bool = Field(True, description="Include weather data in forecast")
-    include_traffic: bool = Field(True, description="Include traffic data in forecast")
-    confidence_level: float = Field(0.8, ge=0.5, le=0.95, description="Confidence level for intervals")
    
    @validator('forecast_date')
    def validate_forecast_date(cls, v):
--- a/services/forecasting/app/services/data_client.py
+++ b/services/forecasting/app/services/data_client.py
@@ -0,0 +1,64 @@
+# services/training/app/services/data_client.py
+"""
+Training Service Data Client
+Migrated to use shared service clients - much simpler now!
+"""
+
+import structlog
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+
+# Import the shared clients
+from shared.clients import get_data_client, get_service_clients
+from app.core.config import settings
+
+logger = structlog.get_logger()
+
+class DataClient:
+    """
+    Data client for training service
+    Now uses the shared data service client under the hood
+    """
+    
+    def __init__(self):
+        # Get the shared data client configured for this service
+        self.data_client = get_data_client(settings, "forecasting")
+        
+        # Or alternatively, get all clients at once:
+        # self.clients = get_service_clients(settings, "training") 
+        # Then use: self.clients.data.get_sales_data(...)
+    
+    
+    async def fetch_weather_forecast(
+        self,
+        tenant_id: str,
+        days: str,
+        latitude: Optional[float] = None,
+        longitude: Optional[float] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Fetch weather data for forecats
+        All the error handling and retry logic is now in the base client!
+        """
+        try:
+            weather_data = await self.data_client.get_weather_forecast(
+                tenant_id=tenant_id,
+                days=days,
+                latitude=latitude,
+                longitude=longitude
+            )
+            
+            if weather_data:
+                logger.info(f"Fetched {len(weather_data)} weather records", 
+                           tenant_id=tenant_id)
+                return weather_data
+            else:
+                logger.warning("No weather data returned", tenant_id=tenant_id)
+                return []
+                
+        except Exception as e:
+            logger.error(f"Error fetching weather data: {e}", tenant_id=tenant_id)
+            return []
+
+# Global instance - same as before, but much simpler implementation
+data_client = DataClient()
--- a/services/forecasting/app/services/forecasting_service.py
+++ b/services/forecasting/app/services/forecasting_service.py
@@ -21,6 +21,8 @@ from app.services.prediction_service import PredictionService
 from app.services.messaging import publish_forecast_completed, publish_alert_created
 from app.core.config import settings
 from shared.monitoring.metrics import MetricsCollector
+from app.services.model_client import ModelClient
+from app.services.data_client import DataClient

 logger = structlog.get_logger()
 metrics = MetricsCollector("forecasting-service")
@@ -33,6 +35,8 @@ class ForecastingService:
    
    def __init__(self):
        self.prediction_service = PredictionService()
+        self.model_client = ModelClient()
+        self.data_client = DataClient()
    
    async def generate_forecast(self, request: ForecastRequest, db: AsyncSession) -> Forecast:
        """Generate a single forecast for a product"""
@@ -47,8 +51,7 @@ class ForecastingService:
            # Get the latest trained model for this tenant/product
            model_info = await self._get_latest_model(
                request.tenant_id, 
-                request.product_name, 
-                request.location
+                request.product_name,
            )
            
            if not model_info:
@@ -66,10 +69,9 @@ class ForecastingService:
            
            # Create forecast record
            forecast = Forecast(
-                tenant_id=uuid.UUID(request.tenant_id),
-                product_name=request.product_name,
-                location=request.location,
-                forecast_date=datetime.combine(request.forecast_date, datetime.min.time()),
+                tenant_id=uuid.UUID(tenant_id),
+                product_name=product_name,
+                forecast_date=datetime.combine(forecast_date, datetime.min.time()),
                
                # Prediction results
                predicted_demand=prediction_result["demand"],
@@ -243,27 +245,12 @@ class ForecastingService:
            logger.error("Error retrieving forecasts", error=str(e))
            raise
    
-    async def _get_latest_model(self, tenant_id: str, product_name: str, location: str) -> Optional[Dict[str, Any]]:
+    async def _get_latest_model(self, tenant_id: str, product_name: str) -> Optional[Dict[str, Any]]:
        """Get the latest trained model for a tenant/product combination"""
        
        try:
-            # Call training service to get model information
-            async with httpx.AsyncClient() as client:
-                response = await client.get(
-                    f"{settings.TRAINING_SERVICE_URL}/tenants/{tenant_id}/models/{product_name}/active",
-                    params={},
-                    headers={"X-Service-Auth": settings.SERVICE_AUTH_TOKEN}
-                )
-                
-                if response.status_code == 200:
-                    return response.json()
-                elif response.status_code == 404:
-                    logger.warning("No model found", 
-                                 tenant_id=tenant_id, 
-                                 product=product_name)
-                    return None
-                else:
-                    response.raise_for_status()
+            model_data = await self.data_client.get_best_model_for_forecasting(tenant_id, product_name)
+            return model_data
                    
        except Exception as e:
            logger.error("Error getting latest model", error=str(e))
@@ -275,22 +262,15 @@ class ForecastingService:
        features = {
            "date": request.forecast_date.isoformat(),
            "day_of_week": request.forecast_date.weekday(),
-            "is_weekend": request.forecast_date.weekday() >= 5,
-            "business_type": request.business_type.value
+            "is_weekend": request.forecast_date.weekday() >= 5
        }
        
        # Add Spanish holidays
        features["is_holiday"] = await self._is_spanish_holiday(request.forecast_date)
        
-        # Add weather data if requested
-        if request.include_weather:
-            weather_data = await self._get_weather_forecast(request.forecast_date)
-            features.update(weather_data)
-        
-        # Add traffic data if requested
-        if request.include_traffic:
-            traffic_data = await self._get_traffic_forecast(request.forecast_date, request.location)
-            features.update(traffic_data)
+   
+        weather_data = await self._get_weather_forecast(request.tenant_id, 1)
+        features.update(weather_data)
        
        return features
    
@@ -315,61 +295,16 @@ class ForecastingService:
            logger.warning("Error checking holiday status", error=str(e))
            return False
    
-    async def _get_weather_forecast(self, forecast_date: date) -> Dict[str, Any]:
+    async def _get_weather_forecast(self, tenant_id: str, days: str) -> Dict[str, Any]:
        """Get weather forecast for the date"""
        
        try:
-            # Call data service for weather forecast
-            async with httpx.AsyncClient() as client:
-                response = await client.get(
-                    f"{settings.DATA_SERVICE_URL}/api/v1/weather/forecast",
-                    params={"date": forecast_date.isoformat()},
-                    headers={"X-Service-Auth": settings.SERVICE_AUTH_TOKEN}
-                )
-                
-                if response.status_code == 200:
-                    weather = response.json()
-                    return {
-                        "temperature": weather.get("temperature"),
-                        "precipitation": weather.get("precipitation"),
-                        "humidity": weather.get("humidity"),
-                        "weather_description": weather.get("description")
-                    }
-                else:
-                    return {}
-                    
+            weather_data = await self.data_client.fetch_weather_forecast(tenant_id, days)
+            return weather_data       
        except Exception as e:
            logger.warning("Error getting weather forecast", error=str(e))
            return {}
    
-    async def _get_traffic_forecast(self, forecast_date: date, location: str) -> Dict[str, Any]:
-        """Get traffic forecast for the date and location"""
-        
-        try:
-            # Call data service for traffic forecast
-            async with httpx.AsyncClient() as client:
-                response = await client.get(
-                    f"{settings.DATA_SERVICE_URL}/api/v1/traffic/forecast",
-                    params={
-                        "date": forecast_date.isoformat(),
-                        "location": location
-                    },
-                    headers={"X-Service-Auth": settings.SERVICE_AUTH_TOKEN}
-                )
-                
-                if response.status_code == 200:
-                    traffic = response.json()
-                    return {
-                        "traffic_volume": traffic.get("volume"),
-                        "pedestrian_count": traffic.get("pedestrian_count")
-                    }
-                else:
-                    return {}
-                    
-        except Exception as e:
-            logger.warning("Error getting traffic forecast", error=str(e))
-            return {}
-    
    async def _check_and_create_alerts(self, forecast: Forecast, db: AsyncSession):
        """Check forecast and create alerts if needed"""
        
--- a/services/forecasting/app/services/model_client.py
+++ b/services/forecasting/app/services/model_client.py
@@ -0,0 +1,183 @@
+# services/forecasting/app/services/model_client.py
+"""
+Forecast Service Model Client
+Demonstrates calling training service to get models
+"""
+
+import structlog
+from typing import Dict, Any, List, Optional
+
+# Import shared clients - no more code duplication!
+from shared.clients import get_service_clients, get_training_client, get_data_client
+from app.core.config import settings
+
+logger = structlog.get_logger()
+
+class ModelClient:
+    """
+    Client for managing models in forecasting service
+    Shows how to call multiple services cleanly
+    """
+    
+    def __init__(self):
+        # Option 1: Get all clients at once
+        self.clients = get_service_clients(settings, "forecasting")
+        
+        # Option 2: Get specific clients
+        # self.training_client = get_training_client(settings, "forecasting")
+        # self.data_client = get_data_client(settings, "forecasting")
+    
+    async def get_available_models(
+        self,
+        tenant_id: str,
+        model_type: Optional[str] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Get available trained models from training service
+        """
+        try:
+            models = await self.clients.training.list_models(
+                tenant_id=tenant_id,
+                status="deployed",  # Only get deployed models
+                model_type=model_type
+            )
+            
+            if models:
+                logger.info(f"Found {len(models)} available models", 
+                           tenant_id=tenant_id, model_type=model_type)
+                return models
+            else:
+                logger.warning("No available models found", tenant_id=tenant_id)
+                return []
+                
+        except Exception as e:
+            logger.error(f"Error fetching available models: {e}", tenant_id=tenant_id)
+            return []
+    
+    async def get_best_model_for_forecasting(
+        self,
+        tenant_id: str,
+        product_id: Optional[str] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get the best model for forecasting based on performance metrics
+        """
+        try:
+            # Get latest model
+            latest_model = await self.clients.training.get_latest_model(
+                tenant_id=tenant_id,
+                model_type="forecasting"
+            )
+            
+            if not latest_model:
+                logger.warning("No trained models found", tenant_id=tenant_id)
+                return None
+            
+            # Get model metrics to validate quality
+            metrics = await self.clients.training.get_model_metrics(
+                tenant_id=tenant_id,
+                model_id=latest_model["id"]
+            )
+            
+            if metrics and metrics.get("accuracy", 0) > 0.7:  # 70% accuracy threshold
+                logger.info(f"Selected model {latest_model['id']} with accuracy {metrics.get('accuracy')}", 
+                           tenant_id=tenant_id)
+                return latest_model
+            else:
+                logger.warning(f"Model accuracy too low: {metrics.get('accuracy', 'unknown')}", 
+                              tenant_id=tenant_id)
+                return None
+                
+        except Exception as e:
+            logger.error(f"Error selecting best model: {e}", tenant_id=tenant_id)
+            return None
+    
+    async def validate_model_data_compatibility(
+        self,
+        tenant_id: str,
+        model_id: str,
+        forecast_start_date: str,
+        forecast_end_date: str
+    ) -> Dict[str, Any]:
+        """
+        Validate that we have sufficient data for the model to make forecasts
+        Demonstrates calling both training and data services
+        """
+        try:
+            # Get model details from training service
+            model = await self.clients.training.get_model(
+                tenant_id=tenant_id,
+                model_id=model_id
+            )
+            
+            if not model:
+                return {"is_valid": False, "error": "Model not found"}
+            
+            # Get data statistics from data service
+            data_stats = await self.clients.data.get_data_statistics(
+                tenant_id=tenant_id,
+                start_date=forecast_start_date,
+                end_date=forecast_end_date
+            )
+            
+            if not data_stats:
+                return {"is_valid": False, "error": "Could not retrieve data statistics"}
+            
+            # Check if we have minimum required data points
+            min_required = model.get("metadata", {}).get("min_data_points", 30)
+            available_points = data_stats.get("total_records", 0)
+            
+            is_valid = available_points >= min_required
+            
+            result = {
+                "is_valid": is_valid,
+                "model_id": model_id,
+                "required_points": min_required,
+                "available_points": available_points,
+                "data_coverage": data_stats.get("coverage_percentage", 0)
+            }
+            
+            if not is_valid:
+                result["error"] = f"Insufficient data: need {min_required}, have {available_points}"
+            
+            logger.info("Model data compatibility check completed", 
+                       tenant_id=tenant_id, model_id=model_id, is_valid=is_valid)
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"Error validating model compatibility: {e}", 
+                        tenant_id=tenant_id, model_id=model_id)
+            return {"is_valid": False, "error": str(e)}
+    
+    async def trigger_model_retraining(
+        self,
+        tenant_id: str,
+        include_weather: bool = True,
+        include_traffic: bool = False
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Trigger a new training job if current model is outdated
+        """
+        try:
+            # Create training job through training service
+            job = await self.clients.training.create_training_job(
+                tenant_id=tenant_id,
+                include_weather=include_weather,
+                include_traffic=include_traffic,
+                min_data_points=50  # Higher threshold for forecasting
+            )
+            
+            if job:
+                logger.info(f"Training job created: {job['job_id']}", tenant_id=tenant_id)
+                return job
+            else:
+                logger.error("Failed to create training job", tenant_id=tenant_id)
+                return None
+                
+        except Exception as e:
+            logger.error(f"Error triggering model retraining: {e}", tenant_id=tenant_id)
+            return None
+
+# Global instance
+model_client = ModelClient()
--- a/services/training/app/core/service_auth.py
+++ b/services/training/app/core/service_auth.py
@@ -1,81 +0,0 @@
-import time
-import structlog
-from typing import Dict, Any
-from shared.auth.jwt_handler import JWTHandler
-from app.core.config import settings
-
-logger = structlog.get_logger()
-
-class ServiceAuthenticator:
-    """Handles service-to-service authentication via gateway"""
-    
-    def __init__(self):
-        self.jwt_handler = JWTHandler(settings.JWT_SECRET_KEY)
-        self._cached_token = None
-        self._token_expires_at = 0
-    
-    async def get_service_token(self) -> str:
-        """
-        Get a valid service token, using cache when possible
-        Creates JWT tokens that the gateway will accept
-        """
-        current_time = int(time.time())
-        
-        # Return cached token if still valid (with 5 min buffer)
-        if (self._cached_token and 
-            self._token_expires_at > current_time + 300):
-            return self._cached_token
-        
-        # Create new service token
-        token_expires_at = current_time + 3600  # 1 hour
-        
-        service_payload = {
-            # ✅ Required fields for gateway middleware
-            "sub": "training-service",
-            "user_id": "training-service",
-            "email": "training-service@internal",
-            "type": "access",  # ✅ Must be "access" for gateway
-            
-            # ✅ Expiration and timing
-            "exp": token_expires_at,
-            "iat": current_time,
-            "iss": "training-service",
-            
-            # ✅ Service identification
-            "service": "training",
-            "full_name": "Training Service",
-            "is_verified": True,
-            "is_active": True,
-            
-            # ✅ Optional tenant context (can be overridden per request)
-            "tenant_id": None
-        }
-        
-        try:
-            token = self.jwt_handler.create_access_token_from_payload(service_payload)
-            
-            # Cache the token
-            self._cached_token = token
-            self._token_expires_at = token_expires_at
-            
-            logger.debug("Created new service token", expires_at=token_expires_at)
-            return token
-            
-        except Exception as e:
-            logger.error(f"Failed to create service token: {e}")
-            raise ValueError(f"Service token creation failed: {e}")
-    
-    def get_request_headers(self, tenant_id: str = None) -> Dict[str, str]:
-        """Get standard headers for service requests"""
-        headers = {
-            "X-Service": "training-service",
-            "User-Agent": "training-service/1.0.0"
-        }
-        
-        if tenant_id:
-            headers["X-Tenant-ID"] = str(tenant_id)
-            
-        return headers
-
-# Global authenticator instance
-service_auth = ServiceAuthenticator()
--- a/services/training/app/services/data_client.py
+++ b/services/training/app/services/data_client.py
@@ -1,219 +1,89 @@
-import httpx
+# services/training/app/services/data_client.py
+"""
+Training Service Data Client
+Migrated to use shared service clients - much simpler now!
+"""
+
 import structlog
-from typing import List, Dict, Any, Optional
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+
+# Import the shared clients
+from shared.clients import get_data_client, get_service_clients
 from app.core.config import settings
-from app.core.service_auth import service_auth

 logger = structlog.get_logger()

-class DataServiceClient:
-    """Client for fetching data through the API Gateway"""
+class DataClient:
+    """
+    Data client for training service
+    Now uses the shared data service client under the hood
+    """
    
    def __init__(self):
-        self.base_url = settings.API_GATEWAY_URL
-        self.timeout = 2000.0
-
-    async def fetch_sales_data(self, tenant_id: str) -> List[Dict[str, Any]]:
-        """
-        Fetch all sales data for training (no pagination limits)
-        FIXED: Retrieves ALL records instead of being limited to 1000
-        """
-        try:
-            # Get service token
-            token = await service_auth.get_service_token()
-            
-            # Prepare headers
-            headers = service_auth.get_request_headers(tenant_id)
-            headers["Authorization"] = f"Bearer {token}"
-            
-            all_records = []
-            page = 0
-            page_size = 5000  # Use maximum allowed by API
-            
-            while True:
-                # Prepare query parameters for pagination
-                params = {
-                    "limit": page_size,
-                    "offset": page * page_size
-                }
-                
-                logger.info(f"Fetching sales data page {page + 1} (offset: {page * page_size})", 
-                        tenant_id=tenant_id)
-                
-                # Make GET request via gateway with pagination
-                async with httpx.AsyncClient(timeout=self.timeout) as client:
-                    response = await client.get(
-                        f"{self.base_url}/api/v1/tenants/{tenant_id}/sales",
-                        headers=headers,
-                        params=params
-                    )
-                    
-                    if response.status_code == 200:
-                        page_data = response.json()
-                        
-                        # Handle different response formats
-                        if isinstance(page_data, list):
-                            # Direct list response (no pagination metadata)
-                            records = page_data
-                            logger.info(f"Retrieved {len(records)} records from page {page + 1} (direct list)")
-                            
-                            # For direct list responses, we need to check if we got the max possible
-                            # If we got less than page_size, we're done
-                            if len(records) == 0:
-                                logger.info("No records in response, pagination complete")
-                                break
-                            elif len(records) < page_size:
-                                # Got fewer than requested, this is the last page
-                                all_records.extend(records)
-                                logger.info(f"Final page: retrieved {len(records)} records, total: {len(all_records)}")
-                                break
-                            else:
-                                # Got full page, there might be more
-                                all_records.extend(records)
-                                logger.info(f"Full page retrieved: {len(records)} records, continuing to next page")
-                                
-                        elif isinstance(page_data, dict):
-                            # Paginated response format
-                            records = page_data.get('records', page_data.get('data', []))
-                            total_available = page_data.get('total', 0)
-                            
-                            logger.info(f"Retrieved {len(records)} records from page {page + 1} (paginated response)")
-                            
-                            if not records:
-                                logger.info("No more records found in paginated response")
-                                break
-                                
-                            all_records.extend(records)
-                            
-                            # Check if we've got all available records
-                            if len(all_records) >= total_available:
-                                logger.info(f"Retrieved all available records: {len(all_records)}/{total_available}")
-                                break
-                                
-                        else:
-                            logger.warning(f"Unexpected response format: {type(page_data)}")
-                            records = []
-                            break
-                        
-                        page += 1
-                        
-                        # Safety break to prevent infinite loops
-                        if page > 100:  # Max 500,000 records (100 * 5000)
-                            logger.warning("Reached maximum page limit, stopping pagination")
-                            break
-                    
-                    elif response.status_code == 401:
-                        logger.error("Authentication failed with gateway", 
-                                    tenant_id=tenant_id,
-                                    response_text=response.text)
-                        return []
-                        
-                    elif response.status_code == 404:
-                        logger.warning("Sales data endpoint not found", 
-                                    tenant_id=tenant_id,
-                                    url=response.url)
-                        return []
-                        
-                    else:
-                        logger.error(f"Gateway request failed: HTTP {response.status_code}", 
-                                    tenant_id=tenant_id,
-                                    response_text=response.text)
-                        return []
-            
-            logger.info(f"Successfully fetched {len(all_records)} total sales records via gateway", 
-                    tenant_id=tenant_id)
-            return all_records
-                        
-        except httpx.TimeoutException:
-            logger.error("Timeout when fetching sales data via gateway", 
-                        tenant_id=tenant_id)
-            return []
-            
-        except Exception as e:
-            logger.error(f"Error fetching sales data via gateway: {e}", 
-                        tenant_id=tenant_id)
-            return []
+        # Get the shared data client configured for this service
+        self.data_client = get_data_client(settings, "training")
        
-
-    async def fetch_weather_data(
-        self, 
+        # Or alternatively, get all clients at once:
+        # self.clients = get_service_clients(settings, "training") 
+        # Then use: self.clients.data.get_sales_data(...)
+    
+    async def fetch_sales_data(
+        self,
        tenant_id: str,
-        start_date: str,
-        end_date: str,
-        latitude: Optional[float] = None,
-        longitude: Optional[float] = None
+        start_date: Optional[str] = None,
+        end_date: Optional[str] = None,
+        product_id: Optional[str] = None,
+        fetch_all: bool = True
    ) -> List[Dict[str, Any]]:
        """
-        Fetch historical weather data for training via API Gateway using POST
+        Fetch sales data for training
+        
+        Args:
+            tenant_id: Tenant identifier
+            start_date: Start date in ISO format
+            end_date: End date in ISO format  
+            product_id: Optional product filter
+            fetch_all: If True, fetches ALL records using pagination (original behavior)
+                      If False, fetches limited records (standard API response)
        """
        try:
-            # Get service token
-            token = await service_auth.get_service_token()
-            
-            # Prepare headers
-            headers = service_auth.get_request_headers(tenant_id)
-            headers["Authorization"] = f"Bearer {token}"
-            headers["Content-Type"] = "application/json"
-            
-            # Prepare request payload with proper date handling
-            payload = {
-                "start_date": start_date,  # Already in ISO format from calling code
-                "end_date": end_date,      # Already in ISO format from calling code
-                "latitude": latitude or 40.4168,  # Default Madrid coordinates
-                "longitude": longitude or -3.7038
-            }
-            
-            logger.info(f"Weather request payload: {payload}", tenant_id=tenant_id)
-            
-            # Make POST request via gateway
-            async with httpx.AsyncClient(timeout=self.timeout) as client:
-                response = await client.post(
-                    f"{self.base_url}/api/v1/tenants/{tenant_id}/weather/historical",
-                    headers=headers,
-                    json=payload
+            if fetch_all:
+                # Use paginated method to get ALL records (original behavior)
+                sales_data = await self.data_client.get_all_sales_data(
+                    tenant_id=tenant_id,
+                    start_date=start_date,
+                    end_date=end_date,
+                    product_id=product_id,
+                    aggregation="daily",
+                    page_size=5000,  # Match original page size
+                    max_pages=100    # Safety limit (500k records max)
                )
+            else:
+                # Use standard method for limited results
+                sales_data = await self.data_client.get_sales_data(
+                    tenant_id=tenant_id,
+                    start_date=start_date,
+                    end_date=end_date,
+                    product_id=product_id,
+                    aggregation="daily"
+                )
+                sales_data = sales_data or []
+            
+            if sales_data:
+                logger.info(f"Fetched {len(sales_data)} sales records", 
+                           tenant_id=tenant_id, product_id=product_id, fetch_all=fetch_all)
+                return sales_data
+            else:
+                logger.warning("No sales data returned", tenant_id=tenant_id)
+                return []
                
-                logger.info(f"Weather data request: {response.status_code}", 
-                        tenant_id=tenant_id, 
-                        url=response.url)
-                
-                if response.status_code == 200:
-                    data = response.json()
-                    logger.info(f"Successfully fetched {len(data)} weather records")
-                    return data
-                elif response.status_code == 400:
-                    error_details = response.text
-                    logger.error(f"Weather API validation error (400): {error_details}")
-                    
-                    # Try to parse the error and provide helpful info
-                    try:
-                        error_json = response.json()
-                        if 'detail' in error_json:
-                            detail = error_json['detail']
-                            if 'End date must be after start date' in str(detail):
-                                logger.error(f"Date range issue: start={start_date}, end={end_date}")
-                            elif 'Date range cannot exceed 90 days' in str(detail):
-                                logger.error(f"Date range too large: {start_date} to {end_date}")
-                    except:
-                        pass
-                    
-                    return []
-                elif response.status_code == 401:
-                    logger.error("Authentication failed for weather API")
-                    return []
-                else:
-                    logger.error(f"Failed to fetch weather data: {response.status_code} - {response.text}")
-                    return []
-                    
-        except httpx.TimeoutException:
-            logger.error("Timeout when fetching weather data")
-            return []
        except Exception as e:
-            logger.error(f"Error fetching weather data: {str(e)}")
+            logger.error(f"Error fetching sales data: {e}", tenant_id=tenant_id)
            return []
-
-    async def fetch_traffic_data(
-        self, 
+    
+    async def fetch_weather_data(
+        self,
        tenant_id: str,
        start_date: str,
        end_date: str,
@@ -221,65 +91,90 @@ class DataServiceClient:
        longitude: Optional[float] = None
    ) -> List[Dict[str, Any]]:
        """
-        Fetch historical traffic data for training via API Gateway using POST
+        Fetch weather data for training
+        All the error handling and retry logic is now in the base client!
        """
        try:
-            # Get service token
-            token = await service_auth.get_service_token()
-            
-            # Prepare headers
-            headers = service_auth.get_request_headers(tenant_id)
-            headers["Authorization"] = f"Bearer {token}"
-            headers["Content-Type"] = "application/json"
-            
-            # Prepare request payload
-            payload = {
-                "start_date": start_date,  # Already in ISO format from calling code
-                "end_date": end_date,      # Already in ISO format from calling code
-                "latitude": latitude or 40.4168,  # Default Madrid coordinates
-                "longitude": longitude or -3.7038
-            }
-            
-            logger.info(f"Traffic request payload: {payload}", tenant_id=tenant_id)
-            
-            # Madrid traffic data can take 5-10 minutes to download and process
-            timeout_config = httpx.Timeout(
-                connect=30.0,    # Connection timeout
-                read=600.0,      # Read timeout: 10 minutes (was 30s)
-                write=30.0,      # Write timeout
-                pool=30.0        # Pool timeout
+            weather_data = await self.data_client.get_weather_historical(
+                tenant_id=tenant_id,
+                start_date=start_date,
+                end_date=end_date,
+                latitude=latitude,
+                longitude=longitude
            )
            
-            # Make POST request via gateway
-            async with httpx.AsyncClient(timeout=timeout_config) as client:
-                response = await client.post(
-                    f"{self.base_url}/api/v1/tenants/{tenant_id}/traffic/historical",
-                    headers=headers,
-                    json=payload
-                )
+            if weather_data:
+                logger.info(f"Fetched {len(weather_data)} weather records", 
+                           tenant_id=tenant_id)
+                return weather_data
+            else:
+                logger.warning("No weather data returned", tenant_id=tenant_id)
+                return []
                
-                logger.info(f"Traffic data request: {response.status_code}", 
-                        tenant_id=tenant_id, 
-                        url=response.url)
-                
-                if response.status_code == 200:
-                    data = response.json()
-                    logger.info(f"Successfully fetched {len(data)} traffic records")
-                    return data
-                elif response.status_code == 400:
-                    error_details = response.text
-                    logger.error(f"Traffic API validation error (400): {error_details}")
-                    return []
-                elif response.status_code == 401:
-                    logger.error("Authentication failed for traffic API")
-                    return []
-                else:
-                    logger.error(f"Failed to fetch traffic data: {response.status_code} - {response.text}")
-                    return []
-                    
-        except httpx.TimeoutException:
-            logger.error("Timeout when fetching traffic data")
-            return []
        except Exception as e:
-            logger.error(f"Error fetching traffic data: {str(e)}")
-            return []
+            logger.error(f"Error fetching weather data: {e}", tenant_id=tenant_id)
+            return []
+    
+    async def fetch_traffic_data(
+        self,
+        tenant_id: str,
+        start_date: str,
+        end_date: str,
+        latitude: Optional[float] = None,
+        longitude: Optional[float] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Fetch traffic data for training
+        """
+        try:
+            traffic_data = await self.data_client.get_traffic_data(
+                tenant_id=tenant_id,
+                start_date=start_date,
+                end_date=end_date,
+                latitude=latitude,
+                longitude=longitude
+            )
+            
+            if traffic_data:
+                logger.info(f"Fetched {len(traffic_data)} traffic records", 
+                           tenant_id=tenant_id)
+                return traffic_data
+            else:
+                logger.warning("No traffic data returned", tenant_id=tenant_id)
+                return []
+                
+        except Exception as e:
+            logger.error(f"Error fetching traffic data: {e}", tenant_id=tenant_id)
+            return []
+    
+    async def validate_data_quality(
+        self,
+        tenant_id: str,
+        start_date: str,
+        end_date: str
+    ) -> Dict[str, Any]:
+        """
+        Validate data quality before training
+        """
+        try:
+            validation_result = await self.data_client.validate_data_quality(
+                tenant_id=tenant_id,
+                start_date=start_date,
+                end_date=end_date
+            )
+            
+            if validation_result:
+                logger.info("Data validation completed", 
+                           tenant_id=tenant_id,
+                           is_valid=validation_result.get("is_valid", False))
+                return validation_result
+            else:
+                logger.warning("Data validation failed", tenant_id=tenant_id)
+                return {"is_valid": False, "errors": ["Validation service unavailable"]}
+                
+        except Exception as e:
+            logger.error(f"Error validating data: {e}", tenant_id=tenant_id)
+            return {"is_valid": False, "errors": [str(e)]}
+
+# Global instance - same as before, but much simpler implementation
+data_client = DataClient()
--- a/services/training/app/services/training_orchestrator.py
+++ b/services/training/app/services/training_orchestrator.py
@@ -13,7 +13,7 @@ from concurrent.futures import ThreadPoolExecutor
 from datetime import timezone
 import pandas as pd

-from app.services.data_client import DataServiceClient
+from app.services.data_client import DataClient
 from app.services.date_alignment_service import DateAlignmentService, DateRange, DataSourceType, AlignedDateRange

 logger = logging.getLogger(__name__)
@@ -37,7 +37,7 @@ class TrainingDataOrchestrator:
                 madrid_client=None, 
                 weather_client=None,
                 date_alignment_service: DateAlignmentService = None):
-        self.data_client = DataServiceClient()
+        self.data_client = DataClient()
        self.date_alignment_service = date_alignment_service or DateAlignmentService()
        self.max_concurrent_requests = 3