REFACTOR data service

This commit is contained in:
Urtzi Alfaro
2025-08-12 18:17:30 +02:00
parent 7c237c0acc
commit fbe7470ad9
149 changed files with 8528 additions and 7393 deletions

34
services/external/Dockerfile vendored Normal file
View File

@@ -0,0 +1,34 @@
# services/external/Dockerfile
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
g++ \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY services/external/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy shared modules first
COPY shared/ /app/shared/
# Copy application code
COPY services/external/app/ /app/app/
# Set Python path to include shared modules
ENV PYTHONPATH=/app
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5)" || exit 1
# Run the application
CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

1
services/external/app/__init__.py vendored Normal file
View File

@@ -0,0 +1 @@
# services/external/app/__init__.py

1
services/external/app/api/__init__.py vendored Normal file
View File

@@ -0,0 +1 @@
# services/external/app/api/__init__.py

184
services/external/app/api/traffic.py vendored Normal file
View File

@@ -0,0 +1,184 @@
# services/external/app/api/traffic.py
"""Traffic data API endpoints with improved error handling"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from typing import List, Dict, Any
from datetime import datetime, timedelta
import structlog
from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.services.traffic_service import TrafficService
from app.services.messaging import publish_traffic_updated
from app.schemas.traffic import (
TrafficDataResponse,
HistoricalTrafficRequest,
TrafficForecastRequest
)
from shared.auth.decorators import (
get_current_user_dep
)
router = APIRouter(tags=["traffic"])
traffic_service = TrafficService()
logger = structlog.get_logger()
@router.get("/tenants/{tenant_id}/traffic/current", response_model=TrafficDataResponse)
async def get_current_traffic(
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
):
"""Get current traffic data for location"""
try:
logger.debug("API: Getting current traffic", lat=latitude, lon=longitude)
traffic = await traffic_service.get_current_traffic(latitude, longitude)
if not traffic:
logger.warning("No traffic data available", lat=latitude, lon=longitude)
raise HTTPException(status_code=404, detail="Traffic data not available")
# Publish event (with error handling)
try:
await publish_traffic_updated({
"type": "current_requested",
"latitude": latitude,
"longitude": longitude,
"timestamp": datetime.utcnow().isoformat()
})
except Exception as pub_error:
logger.warning("Failed to publish traffic event", error=str(pub_error))
# Continue processing - event publishing failure shouldn't break the API
logger.debug("Successfully returning traffic data",
volume=traffic.traffic_volume,
congestion=traffic.congestion_level)
return traffic
except HTTPException:
# Re-raise HTTP exceptions
raise
except Exception as e:
logger.error("Unexpected error in traffic API", error=str(e))
import traceback
logger.error("Traffic API traceback", traceback=traceback.format_exc())
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post("/tenants/{tenant_id}/traffic/historical")
async def get_historical_traffic(
request: HistoricalTrafficRequest,
db: AsyncSession = Depends(get_db),
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
):
"""Get historical traffic data with date range in payload"""
try:
# Validate date range
if request.end_date <= request.start_date:
raise HTTPException(status_code=400, detail="End date must be after start date")
if (request.end_date - request.start_date).days > 1000:
raise HTTPException(status_code=400, detail="Date range cannot exceed 90 days")
historical_data = await traffic_service.get_historical_traffic(
request.latitude, request.longitude, request.start_date, request.end_date, str(tenant_id)
)
# Publish event (with error handling)
try:
await publish_traffic_updated({
"type": "historical_requested",
"latitude": request.latitude,
"longitude": request.longitude,
"start_date": request.start_date.isoformat(),
"end_date": request.end_date.isoformat(),
"records_count": len(historical_data),
"timestamp": datetime.utcnow().isoformat()
})
except Exception as pub_error:
logger.warning("Failed to publish historical traffic event", error=str(pub_error))
# Continue processing
return historical_data
except HTTPException:
raise
except Exception as e:
logger.error("Unexpected error in historical traffic API", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post("/tenants/{tenant_id}/traffic/forecast")
async def get_traffic_forecast(
request: TrafficForecastRequest,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
):
"""Get traffic forecast for location"""
try:
logger.debug("API: Getting traffic forecast",
lat=request.latitude, lon=request.longitude, hours=request.hours)
# For now, return mock forecast data since we don't have a real traffic forecast service
# In a real implementation, this would call a traffic forecasting service
# Generate mock forecast data for the requested hours
forecast_data = []
from datetime import datetime, timedelta
base_time = datetime.utcnow()
for hour in range(request.hours):
forecast_time = base_time + timedelta(hours=hour)
# Mock traffic pattern (higher during rush hours)
hour_of_day = forecast_time.hour
if 7 <= hour_of_day <= 9 or 17 <= hour_of_day <= 19: # Rush hours
traffic_volume = 120
pedestrian_count = 80
congestion_level = "high"
average_speed = 15
elif 22 <= hour_of_day or hour_of_day <= 6: # Night hours
traffic_volume = 20
pedestrian_count = 10
congestion_level = "low"
average_speed = 50
else: # Regular hours
traffic_volume = 60
pedestrian_count = 40
congestion_level = "medium"
average_speed = 35
# Use consistent TrafficDataResponse format
forecast_data.append({
"date": forecast_time.isoformat(),
"traffic_volume": traffic_volume,
"pedestrian_count": pedestrian_count,
"congestion_level": congestion_level,
"average_speed": average_speed,
"source": "madrid_opendata_forecast"
})
# Publish event (with error handling)
try:
await publish_traffic_updated({
"type": "forecast_requested",
"latitude": request.latitude,
"longitude": request.longitude,
"hours": request.hours,
"timestamp": datetime.utcnow().isoformat()
})
except Exception as pub_error:
logger.warning("Failed to publish traffic forecast event", error=str(pub_error))
# Continue processing
logger.debug("Successfully returning traffic forecast", records=len(forecast_data))
return forecast_data
except HTTPException:
raise
except Exception as e:
logger.error("Unexpected error in traffic forecast API", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

157
services/external/app/api/weather.py vendored Normal file
View File

@@ -0,0 +1,157 @@
# services/external/app/api/weather.py
"""
Weather API Endpoints
"""
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Path
from typing import List, Optional, Dict, Any
from datetime import datetime, date
import structlog
from uuid import UUID
from app.schemas.weather import (
WeatherDataResponse,
WeatherForecastResponse,
WeatherForecastRequest,
HistoricalWeatherRequest
)
from app.services.weather_service import WeatherService
from app.services.messaging import publish_weather_updated
# Import unified authentication from shared library
from shared.auth.decorators import (
get_current_user_dep,
get_current_tenant_id_dep
)
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
router = APIRouter(tags=["weather"])
logger = structlog.get_logger()
weather_service = WeatherService()
@router.get("/tenants/{tenant_id}/weather/current", response_model=WeatherDataResponse)
async def get_current_weather(
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
):
"""Get current weather data for location"""
try:
logger.debug("Getting current weather",
lat=latitude,
lon=longitude,
tenant_id=tenant_id,
user_id=current_user["user_id"])
weather = await weather_service.get_current_weather(latitude, longitude)
if not weather:
raise HTTPException(status_code=404, detail="Weather data not available")
# Publish event
try:
await publish_weather_updated({
"type": "current_weather_requested",
"tenant_id": tenant_id,
"latitude": latitude,
"longitude": longitude,
"requested_by": current_user["user_id"],
"timestamp": datetime.utcnow().isoformat()
})
except Exception as e:
logger.warning("Failed to publish weather event", error=str(e))
return weather
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get current weather", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post("/tenants/{tenant_id}/weather/historical")
async def get_historical_weather(
request: HistoricalWeatherRequest,
db: AsyncSession = Depends(get_db),
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
):
"""Get historical weather data with date range in payload"""
try:
# Validate date range
if request.end_date <= request.start_date:
raise HTTPException(status_code=400, detail="End date must be after start date")
if (request.end_date - request.start_date).days > 1000:
raise HTTPException(status_code=400, detail="Date range cannot exceed 90 days")
historical_data = await weather_service.get_historical_weather(
request.latitude, request.longitude, request.start_date, request.end_date)
# Publish event (with error handling)
try:
await publish_weather_updated({
"type": "historical_requested",
"latitude": request.latitude,
"longitude": request.longitude,
"start_date": request.start_date.isoformat(),
"end_date": request.end_date.isoformat(),
"records_count": len(historical_data),
"timestamp": datetime.utcnow().isoformat()
})
except Exception as pub_error:
logger.warning("Failed to publish historical weather event", error=str(pub_error))
# Continue processing
return historical_data
except HTTPException:
raise
except Exception as e:
logger.error("Unexpected error in historical weather API", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post("/tenants/{tenant_id}/weather/forecast", response_model=List[WeatherForecastResponse])
async def get_weather_forecast(
request: WeatherForecastRequest,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
):
"""Get weather forecast for location"""
try:
logger.debug("Getting weather forecast",
lat=request.latitude,
lon=request.longitude,
days=request.days,
tenant_id=tenant_id)
forecast = await weather_service.get_weather_forecast(request.latitude, request.longitude, request.days)
if not forecast:
raise HTTPException(status_code=404, detail="Weather forecast not available")
# Publish event
try:
await publish_weather_updated({
"type": "forecast_requested",
"tenant_id": tenant_id,
"latitude": request.latitude,
"longitude": request.longitude,
"days": request.days,
"requested_by": current_user["user_id"],
"timestamp": datetime.utcnow().isoformat()
})
except Exception as e:
logger.warning("Failed to publish forecast event", error=str(e))
return forecast
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get weather forecast", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

View File

@@ -0,0 +1 @@
# services/external/app/core/__init__.py

64
services/external/app/core/config.py vendored Normal file
View File

@@ -0,0 +1,64 @@
# services/external/app/core/config.py
from shared.config.base import BaseServiceSettings
import os
from pydantic import Field
class DataSettings(BaseServiceSettings):
"""Data service specific settings"""
# Service Identity
SERVICE_NAME: str = "external-service"
VERSION: str = "1.0.0"
APP_NAME: str = "Bakery External Data Service"
DESCRIPTION: str = "External data collection service for weather and traffic data"
# API Configuration
API_V1_STR: str = "/api/v1"
# Override database URL to use EXTERNAL_DATABASE_URL
DATABASE_URL: str = Field(
default="postgresql+asyncpg://external_user:external_pass123@external-db:5432/external_db",
env="EXTERNAL_DATABASE_URL"
)
# External API Configuration
AEMET_API_KEY: str = os.getenv("AEMET_API_KEY", "")
AEMET_BASE_URL: str = "https://opendata.aemet.es/opendata"
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "30"))
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "3"))
MADRID_OPENDATA_API_KEY: str = os.getenv("MADRID_OPENDATA_API_KEY", "")
MADRID_OPENDATA_BASE_URL: str = "https://datos.madrid.es"
MADRID_OPENDATA_TIMEOUT: int = int(os.getenv("MADRID_OPENDATA_TIMEOUT", "30"))
# Data Collection Configuration
WEATHER_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("WEATHER_COLLECTION_INTERVAL_HOURS", "1"))
TRAFFIC_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("TRAFFIC_COLLECTION_INTERVAL_HOURS", "1"))
EVENTS_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("EVENTS_COLLECTION_INTERVAL_HOURS", "6"))
# Cache TTL Configuration
WEATHER_CACHE_TTL_HOURS: int = int(os.getenv("WEATHER_CACHE_TTL_HOURS", "1"))
TRAFFIC_CACHE_TTL_HOURS: int = int(os.getenv("TRAFFIC_CACHE_TTL_HOURS", "1"))
EVENTS_CACHE_TTL_HOURS: int = int(os.getenv("EVENTS_CACHE_TTL_HOURS", "6"))
# Data Quality Configuration
DATA_VALIDATION_ENABLED: bool = os.getenv("DATA_VALIDATION_ENABLED", "true").lower() == "true"
OUTLIER_DETECTION_ENABLED: bool = os.getenv("OUTLIER_DETECTION_ENABLED", "true").lower() == "true"
DATA_COMPLETENESS_THRESHOLD: float = float(os.getenv("DATA_COMPLETENESS_THRESHOLD", "0.8"))
# Geolocation Settings (Madrid focus)
DEFAULT_LATITUDE: float = float(os.getenv("DEFAULT_LATITUDE", "40.4168")) # Madrid
DEFAULT_LONGITUDE: float = float(os.getenv("DEFAULT_LONGITUDE", "-3.7038")) # Madrid
LOCATION_RADIUS_KM: float = float(os.getenv("LOCATION_RADIUS_KM", "50.0"))
# Data Retention
RAW_DATA_RETENTION_DAYS: int = int(os.getenv("RAW_DATA_RETENTION_DAYS", "90"))
PROCESSED_DATA_RETENTION_DAYS: int = int(os.getenv("PROCESSED_DATA_RETENTION_DAYS", "365"))
# Batch Processing
BATCH_PROCESSING_ENABLED: bool = os.getenv("BATCH_PROCESSING_ENABLED", "true").lower() == "true"
BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "1000"))
PARALLEL_PROCESSING_WORKERS: int = int(os.getenv("PARALLEL_PROCESSING_WORKERS", "4"))
settings = DataSettings()

81
services/external/app/core/database.py vendored Normal file
View File

@@ -0,0 +1,81 @@
# services/external/app/core/database.py
"""
External Service Database Configuration using shared database manager
"""
import structlog
from contextlib import asynccontextmanager
from typing import AsyncGenerator
from app.core.config import settings
from shared.database.base import DatabaseManager, Base
logger = structlog.get_logger()
# Create database manager instance
database_manager = DatabaseManager(
database_url=settings.DATABASE_URL,
service_name="external-service"
)
async def get_db():
"""
Database dependency for FastAPI - using shared database manager
"""
async for session in database_manager.get_db():
yield session
async def init_db():
"""Initialize database tables using shared database manager"""
try:
logger.info("Initializing External Service database...")
# Import all models to ensure they're registered
from app.models import weather, traffic # noqa: F401
# Create all tables using database manager
await database_manager.create_tables(Base.metadata)
logger.info("External Service database initialized successfully")
except Exception as e:
logger.error("Failed to initialize database", error=str(e))
raise
async def close_db():
"""Close database connections using shared database manager"""
try:
await database_manager.close_connections()
logger.info("Database connections closed")
except Exception as e:
logger.error("Error closing database connections", error=str(e))
@asynccontextmanager
async def get_db_transaction():
"""
Context manager for database transactions using shared database manager
"""
async with database_manager.get_session() as session:
try:
async with session.begin():
yield session
except Exception as e:
logger.error("Transaction error", error=str(e))
raise
@asynccontextmanager
async def get_background_session():
"""
Context manager for background tasks using shared database manager
"""
async with database_manager.get_background_session() as session:
yield session
async def health_check():
"""Database health check using shared database manager"""
return await database_manager.health_check()

View File

704
services/external/app/external/aemet.py vendored Normal file
View File

@@ -0,0 +1,704 @@
# ================================================================
# services/data/app/external/aemet.py - REFACTORED VERSION
# ================================================================
"""AEMET (Spanish Weather Service) API client with improved modularity"""
import math
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime, timedelta
from dataclasses import dataclass
from enum import Enum
import structlog
from app.external.base_client import BaseAPIClient
from app.core.config import settings
logger = structlog.get_logger()
class WeatherSource(Enum):
"""Weather data source types"""
AEMET = "aemet"
SYNTHETIC = "synthetic"
DEFAULT = "default"
@dataclass
class WeatherStation:
"""Weather station data"""
id: str
name: str
latitude: float
longitude: float
@dataclass
class GeographicBounds:
"""Geographic boundary definition"""
min_lat: float
max_lat: float
min_lon: float
max_lon: float
def contains(self, latitude: float, longitude: float) -> bool:
"""Check if coordinates are within bounds"""
return (self.min_lat <= latitude <= self.max_lat and
self.min_lon <= longitude <= self.max_lon)
class AEMETConstants:
"""AEMET API constants and configuration"""
# API Configuration
MAX_DAYS_PER_REQUEST = 30
MADRID_MUNICIPALITY_CODE = "28079"
# Madrid geographic bounds
MADRID_BOUNDS = GeographicBounds(
min_lat=40.3, max_lat=40.6,
min_lon=-3.9, max_lon=-3.5
)
# Weather stations in Madrid area
MADRID_STATIONS = [
WeatherStation("3195", "Madrid Centro", 40.4117, -3.6780),
WeatherStation("3129", "Madrid Norte", 40.4677, -3.5552),
WeatherStation("3197", "Madrid Sur", 40.2987, -3.7216),
]
# Climate simulation parameters
BASE_TEMPERATURE_SEASONAL = 5.0
TEMPERATURE_SEASONAL_MULTIPLIER = 2.5
DAILY_TEMPERATURE_AMPLITUDE = 8.0
EARTH_RADIUS_KM = 6371.0
class WeatherDataParser:
"""Handles parsing of different weather data formats"""
@staticmethod
def safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
"""Safely convert value to float with fallback"""
try:
if value is None:
return default
return float(value)
except (ValueError, TypeError):
return default
@staticmethod
def extract_temperature_value(temp_data: Any) -> Optional[float]:
"""Extract temperature value from AEMET complex temperature structure"""
if temp_data is None:
return None
if isinstance(temp_data, (int, float)):
return float(temp_data)
if isinstance(temp_data, str):
try:
return float(temp_data)
except ValueError:
return None
if isinstance(temp_data, dict) and 'valor' in temp_data:
return WeatherDataParser.safe_float(temp_data['valor'])
if isinstance(temp_data, list) and len(temp_data) > 0:
first_item = temp_data[0]
if isinstance(first_item, dict) and 'valor' in first_item:
return WeatherDataParser.safe_float(first_item['valor'])
return None
@staticmethod
def generate_weather_description(temperature: Optional[float],
precipitation: Optional[float],
humidity: Optional[float]) -> str:
"""Generate weather description based on conditions"""
if precipitation and precipitation > 5.0:
return "Lluvioso"
elif precipitation and precipitation > 0.1:
return "Nuboso con lluvia"
elif humidity and humidity > 80:
return "Nuboso"
elif temperature and temperature > 25:
return "Soleado y cálido"
elif temperature and temperature < 5:
return "Frío"
else:
return "Variable"
def parse_current_weather(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Parse AEMET current weather data format"""
if not isinstance(data, dict):
logger.warning("Weather data is not a dictionary", data_type=type(data))
return self._get_default_weather_data()
try:
return {
"date": datetime.now(),
"temperature": self.safe_float(data.get("ta"), 15.0),
"precipitation": self.safe_float(data.get("prec"), 0.0),
"humidity": self.safe_float(data.get("hr"), 50.0),
"wind_speed": self.safe_float(data.get("vv"), 10.0),
"pressure": self.safe_float(data.get("pres"), 1013.0),
"description": str(data.get("descripcion", "Partly cloudy")),
"source": WeatherSource.AEMET.value
}
except Exception as e:
logger.error("Error parsing weather data", error=str(e), data=data)
return self._get_default_weather_data()
def parse_historical_data(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Parse AEMET historical weather data"""
parsed_data = []
try:
for record in data:
if not isinstance(record, dict):
continue
parsed_record = self._parse_single_historical_record(record)
if parsed_record:
parsed_data.append(parsed_record)
except Exception as e:
logger.error("Error parsing historical weather data", error=str(e))
return parsed_data
def parse_forecast_data(self, data: List[Dict[str, Any]], days: int) -> List[Dict[str, Any]]:
"""Parse AEMET forecast data"""
forecast = []
base_date = datetime.now().date()
if not isinstance(data, list):
logger.warning("Forecast data is not a list", data_type=type(data))
return []
try:
if len(data) > 0 and isinstance(data[0], dict):
aemet_data = data[0]
dias = aemet_data.get('prediccion', {}).get('dia', [])
if isinstance(dias, list) and len(dias) > 0:
forecast = self._parse_forecast_days(dias, days, base_date)
# Fill remaining days with synthetic data if needed
forecast = self._ensure_forecast_completeness(forecast, days)
except Exception as e:
logger.error("Error parsing AEMET forecast data", error=str(e))
forecast = []
return forecast
def _parse_single_historical_record(self, record: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Parse a single historical weather record"""
fecha_str = record.get('fecha')
if not fecha_str:
return None
try:
record_date = datetime.strptime(fecha_str, '%Y-%m-%d')
except ValueError:
logger.warning("Invalid date format in historical data", fecha=fecha_str)
return None
# Extract and calculate temperature
temp_max = self.safe_float(record.get('tmax'))
temp_min = self.safe_float(record.get('tmin'))
temperature = self._calculate_average_temperature(temp_max, temp_min)
# Extract other weather parameters
precipitation = self.safe_float(record.get('prec'), 0.0)
humidity = self.safe_float(record.get('hr'))
wind_speed = self.safe_float(record.get('velmedia'))
pressure = self._extract_pressure(record)
return {
"date": record_date,
"temperature": temperature,
"precipitation": precipitation,
"humidity": humidity,
"wind_speed": wind_speed,
"pressure": pressure,
"description": self.generate_weather_description(temperature, precipitation, humidity),
"source": WeatherSource.AEMET.value
}
def _calculate_average_temperature(self, temp_max: Optional[float], temp_min: Optional[float]) -> Optional[float]:
"""Calculate average temperature from max and min values"""
if temp_max and temp_min:
return (temp_max + temp_min) / 2
elif temp_max:
return temp_max - 5 # Estimate average from max
elif temp_min:
return temp_min + 5 # Estimate average from min
return None
def _extract_pressure(self, record: Dict[str, Any]) -> Optional[float]:
"""Extract pressure from historical record"""
pressure = self.safe_float(record.get('presMax'))
if not pressure:
pressure = self.safe_float(record.get('presMin'))
return pressure
def _parse_forecast_days(self, dias: List[Dict[str, Any]], days: int, base_date: datetime.date) -> List[Dict[str, Any]]:
"""Parse forecast days from AEMET data"""
forecast = []
for i, dia in enumerate(dias[:days]):
if not isinstance(dia, dict):
continue
forecast_date = base_date + timedelta(days=i)
forecast_day = self._parse_single_forecast_day(dia, forecast_date, i)
forecast.append(forecast_day)
return forecast
def _parse_single_forecast_day(self, dia: Dict[str, Any], forecast_date: datetime.date, day_index: int) -> Dict[str, Any]:
"""Parse a single forecast day"""
# Extract temperature
temp_data = dia.get('temperatura', {})
avg_temp = self._extract_forecast_temperature(temp_data)
# Extract precipitation probability
precip_prob = self._extract_precipitation_probability(dia.get('probPrecipitacion', []))
# Extract wind speed
wind_speed = self._extract_wind_speed(dia.get('viento', []))
# Generate description
description = self._generate_forecast_description(precip_prob)
return {
"forecast_date": datetime.combine(forecast_date, datetime.min.time()),
"generated_at": datetime.now(),
"temperature": round(avg_temp, 1),
"precipitation": precip_prob / 10, # Convert percentage to mm estimate
"humidity": 50.0 + (day_index % 20), # Estimate
"wind_speed": round(wind_speed, 1),
"description": description,
"source": WeatherSource.AEMET.value
}
def _extract_forecast_temperature(self, temp_data: Dict[str, Any]) -> float:
"""Extract temperature from forecast temperature data"""
if isinstance(temp_data, dict):
temp_max = self.extract_temperature_value(temp_data.get('maxima'))
temp_min = self.extract_temperature_value(temp_data.get('minima'))
if temp_max and temp_min:
return (temp_max + temp_min) / 2
return 15.0
def _extract_precipitation_probability(self, precip_data: List[Dict[str, Any]]) -> float:
"""Extract precipitation probability from forecast data"""
precip_prob = 0.0
if isinstance(precip_data, list):
for precip_item in precip_data:
if isinstance(precip_item, dict) and 'value' in precip_item:
precip_prob = max(precip_prob, self.safe_float(precip_item.get('value'), 0.0))
return precip_prob
def _extract_wind_speed(self, viento_data: List[Dict[str, Any]]) -> float:
"""Extract wind speed from forecast data"""
wind_speed = 10.0
if isinstance(viento_data, list):
for viento_item in viento_data:
if isinstance(viento_item, dict) and 'velocidad' in viento_item:
speed_values = viento_item.get('velocidad', [])
if isinstance(speed_values, list) and len(speed_values) > 0:
wind_speed = self.safe_float(speed_values[0], 10.0)
break
return wind_speed
def _generate_forecast_description(self, precip_prob: float) -> str:
"""Generate description based on precipitation probability"""
if precip_prob > 70:
return "Lluvioso"
elif precip_prob > 30:
return "Parcialmente nublado"
else:
return "Soleado"
def _ensure_forecast_completeness(self, forecast: List[Dict[str, Any]], days: int) -> List[Dict[str, Any]]:
"""Ensure forecast has the requested number of days"""
if len(forecast) < days:
remaining_days = days - len(forecast)
synthetic_generator = SyntheticWeatherGenerator()
synthetic_forecast = synthetic_generator.generate_forecast_sync(remaining_days, len(forecast))
forecast.extend(synthetic_forecast)
return forecast[:days]
def _get_default_weather_data(self) -> Dict[str, Any]:
"""Get default weather data structure"""
return {
"date": datetime.now(),
"temperature": 15.0,
"precipitation": 0.0,
"humidity": 50.0,
"wind_speed": 10.0,
"pressure": 1013.0,
"description": "Data not available",
"source": WeatherSource.DEFAULT.value
}
class SyntheticWeatherGenerator:
"""Generates realistic synthetic weather data for Madrid"""
def generate_current_weather(self) -> Dict[str, Any]:
"""Generate realistic synthetic current weather for Madrid"""
now = datetime.now()
month = now.month
hour = now.hour
# Madrid climate simulation
temperature = self._calculate_current_temperature(month, hour)
precipitation = self._calculate_current_precipitation(now, month)
return {
"date": now,
"temperature": round(temperature, 1),
"precipitation": precipitation,
"humidity": 45 + (month % 6) * 5,
"wind_speed": 8 + (hour % 12),
"pressure": 1013 + math.sin(now.day * 0.2) * 15,
"description": "Lluvioso" if precipitation > 0 else "Soleado",
"source": WeatherSource.SYNTHETIC.value
}
def generate_forecast_sync(self, days: int, start_offset: int = 0) -> List[Dict[str, Any]]:
"""Generate synthetic forecast data synchronously"""
forecast = []
base_date = datetime.now().date()
for i in range(days):
forecast_date = base_date + timedelta(days=start_offset + i)
forecast_day = self._generate_forecast_day(forecast_date, start_offset + i)
forecast.append(forecast_day)
return forecast
async def generate_forecast(self, days: int) -> List[Dict[str, Any]]:
"""Generate synthetic forecast data (async version for compatibility)"""
return self.generate_forecast_sync(days, 0)
def generate_historical_data(self, start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Generate synthetic historical weather data"""
historical_data = []
current_date = start_date
while current_date <= end_date:
historical_day = self._generate_historical_day(current_date)
historical_data.append(historical_day)
current_date += timedelta(days=1)
return historical_data
def _calculate_current_temperature(self, month: int, hour: int) -> float:
"""Calculate current temperature based on seasonal and daily patterns"""
base_temp = AEMETConstants.BASE_TEMPERATURE_SEASONAL + (month - 1) * AEMETConstants.TEMPERATURE_SEASONAL_MULTIPLIER
temp_variation = math.sin((hour - 6) * math.pi / 12) * AEMETConstants.DAILY_TEMPERATURE_AMPLITUDE
return base_temp + temp_variation
def _calculate_current_precipitation(self, now: datetime, month: int) -> float:
"""Calculate current precipitation based on seasonal patterns"""
rain_prob = 0.3 if month in [11, 12, 1, 2, 3] else 0.1
return 2.5 if hash(now.date()) % 100 < rain_prob * 100 else 0.0
def _generate_forecast_day(self, forecast_date: datetime.date, day_offset: int) -> Dict[str, Any]:
"""Generate a single forecast day"""
month = forecast_date.month
base_temp = AEMETConstants.BASE_TEMPERATURE_SEASONAL + (month - 1) * AEMETConstants.TEMPERATURE_SEASONAL_MULTIPLIER
temp_variation = ((day_offset) % 7 - 3) * 2 # Weekly variation
return {
"forecast_date": datetime.combine(forecast_date, datetime.min.time()),
"generated_at": datetime.now(),
"temperature": round(base_temp + temp_variation, 1),
"precipitation": 2.0 if day_offset % 5 == 0 else 0.0,
"humidity": 50 + (day_offset % 30),
"wind_speed": 10 + (day_offset % 15),
"description": "Lluvioso" if day_offset % 5 == 0 else "Soleado",
"source": WeatherSource.SYNTHETIC.value
}
def _generate_historical_day(self, date: datetime) -> Dict[str, Any]:
"""Generate a single historical day"""
month = date.month
base_temp = AEMETConstants.BASE_TEMPERATURE_SEASONAL + (month - 1) * AEMETConstants.TEMPERATURE_SEASONAL_MULTIPLIER
temp_variation = math.sin(date.day * 0.3) * 5
return {
"date": date,
"temperature": round(base_temp + temp_variation, 1),
"precipitation": 1.5 if date.day % 7 == 0 else 0.0,
"humidity": 45 + (date.day % 40),
"wind_speed": 8 + (date.day % 20),
"pressure": 1013 + math.sin(date.day * 0.2) * 20,
"description": "Variable",
"source": WeatherSource.SYNTHETIC.value
}
class LocationService:
"""Handles location-related operations"""
@staticmethod
def find_nearest_station(latitude: float, longitude: float) -> Optional[str]:
"""Find nearest weather station to given coordinates"""
try:
# Check if coordinates are reasonable (not extreme values)
if not (-90 <= latitude <= 90 and -180 <= longitude <= 180):
logger.warning("Invalid coordinate range", lat=latitude, lon=longitude)
return None
# Check if coordinates are too far from Madrid area (more than 1000km away)
madrid_center = (40.4168, -3.7038)
distance_to_madrid = LocationService.calculate_distance(
latitude, longitude, madrid_center[0], madrid_center[1]
)
if distance_to_madrid > 1000: # More than 1000km from Madrid
logger.warning("Coordinates too far from Madrid",
lat=latitude, lon=longitude, distance_km=distance_to_madrid)
return None
closest_station = None
min_distance = float('inf')
for station in AEMETConstants.MADRID_STATIONS:
distance = LocationService.calculate_distance(
latitude, longitude, station.latitude, station.longitude
)
if distance < min_distance:
min_distance = distance
closest_station = station.id
return closest_station
except Exception as e:
logger.error("Failed to find nearest station", error=str(e))
return None
@staticmethod
def get_municipality_code(latitude: float, longitude: float) -> Optional[str]:
"""Get municipality code for coordinates"""
if AEMETConstants.MADRID_BOUNDS.contains(latitude, longitude):
return AEMETConstants.MADRID_MUNICIPALITY_CODE
return None
@staticmethod
def calculate_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance between two coordinates using Haversine formula"""
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat/2) * math.sin(dlat/2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon/2) * math.sin(dlon/2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return AEMETConstants.EARTH_RADIUS_KM * c
class AEMETClient(BaseAPIClient):
"""AEMET (Spanish Weather Service) API client with improved modularity"""
def __init__(self):
super().__init__(
base_url="https://opendata.aemet.es/opendata/api",
api_key=settings.AEMET_API_KEY
)
self.parser = WeatherDataParser()
self.synthetic_generator = SyntheticWeatherGenerator()
self.location_service = LocationService()
async def get_current_weather(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Get current weather for coordinates"""
try:
station_id = self.location_service.find_nearest_station(latitude, longitude)
if not station_id:
logger.warning("No weather station found", lat=latitude, lon=longitude)
return await self._get_synthetic_current_weather()
weather_data = await self._fetch_current_weather_data(station_id)
if weather_data:
return self.parser.parse_current_weather(weather_data)
logger.info("Falling back to synthetic weather data", reason="invalid_weather_data")
return await self._get_synthetic_current_weather()
except Exception as e:
logger.error("Failed to get current weather", error=str(e))
return await self._get_synthetic_current_weather()
async def get_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[Dict[str, Any]]:
"""Get weather forecast for coordinates"""
try:
municipality_code = self.location_service.get_municipality_code(latitude, longitude)
if not municipality_code:
logger.info("No municipality code found, using synthetic data")
return await self.synthetic_generator.generate_forecast(days)
forecast_data = await self._fetch_forecast_data(municipality_code)
if forecast_data:
parsed_forecast = self.parser.parse_forecast_data(forecast_data, days)
if parsed_forecast:
return parsed_forecast
logger.info("Falling back to synthetic forecast data", reason="invalid_forecast_data")
return await self.synthetic_generator.generate_forecast(days)
except Exception as e:
logger.error("Failed to get weather forecast", error=str(e))
return await self.synthetic_generator.generate_forecast(days)
async def get_historical_weather(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical weather data"""
try:
logger.debug("Getting historical weather from AEMET API",
lat=latitude, lon=longitude,
start=start_date, end=end_date)
station_id = self.location_service.find_nearest_station(latitude, longitude)
if not station_id:
logger.warning("No weather station found for historical data",
lat=latitude, lon=longitude)
return self.synthetic_generator.generate_historical_data(start_date, end_date)
historical_data = await self._fetch_historical_data_in_chunks(
station_id, start_date, end_date
)
if historical_data:
logger.debug("Successfully fetched historical weather data",
total_count=len(historical_data))
return historical_data
else:
logger.info("No real historical data available, using synthetic data")
return self.synthetic_generator.generate_historical_data(start_date, end_date)
except Exception as e:
logger.error("Failed to get historical weather from AEMET API", error=str(e))
return self.synthetic_generator.generate_historical_data(start_date, end_date)
async def _fetch_current_weather_data(self, station_id: str) -> Optional[Dict[str, Any]]:
"""Fetch current weather data from AEMET API"""
endpoint = f"/observacion/convencional/datos/estacion/{station_id}"
initial_response = await self._get(endpoint)
if not self._is_valid_initial_response(initial_response):
return None
datos_url = initial_response.get("datos")
actual_weather_data = await self._fetch_from_url(datos_url)
if (actual_weather_data and isinstance(actual_weather_data, list)
and len(actual_weather_data) > 0):
return actual_weather_data[0]
return None
async def _fetch_forecast_data(self, municipality_code: str) -> Optional[List[Dict[str, Any]]]:
"""Fetch forecast data from AEMET API"""
endpoint = f"/prediccion/especifica/municipio/diaria/{municipality_code}"
initial_response = await self._get(endpoint)
if not self._is_valid_initial_response(initial_response):
return None
datos_url = initial_response.get("datos")
return await self._fetch_from_url(datos_url)
async def _fetch_historical_data_in_chunks(self,
station_id: str,
start_date: datetime,
end_date: datetime) -> List[Dict[str, Any]]:
"""Fetch historical data in chunks due to AEMET API limitations"""
historical_data = []
current_date = start_date
while current_date <= end_date:
chunk_end_date = min(
current_date + timedelta(days=AEMETConstants.MAX_DAYS_PER_REQUEST),
end_date
)
chunk_data = await self._fetch_historical_chunk(
station_id, current_date, chunk_end_date
)
if chunk_data:
historical_data.extend(chunk_data)
current_date = chunk_end_date + timedelta(days=1)
return historical_data
async def _fetch_historical_chunk(self,
station_id: str,
start_date: datetime,
end_date: datetime) -> List[Dict[str, Any]]:
"""Fetch a single chunk of historical data"""
start_str = start_date.strftime("%Y-%m-%dT00:00:00UTC")
end_str = end_date.strftime("%Y-%m-%dT23:59:59UTC")
endpoint = f"/valores/climatologicos/diarios/datos/fechaini/{start_str}/fechafin/{end_str}/estacion/{station_id}"
initial_response = await self._get(endpoint)
if not self._is_valid_initial_response(initial_response):
logger.warning("Invalid initial response from AEMET historical API",
start=start_str, end=end_str)
return []
datos_url = initial_response.get("datos")
if not datos_url:
logger.warning("No datos URL in AEMET historical response",
start=start_str, end=end_str)
return []
actual_historical_data = await self._fetch_from_url(datos_url)
if actual_historical_data and isinstance(actual_historical_data, list):
chunk_data = self.parser.parse_historical_data(actual_historical_data)
logger.debug("Fetched historical data chunk",
count=len(chunk_data), start=start_str, end=end_str)
return chunk_data
else:
logger.warning("No valid historical data received for chunk",
start=start_str, end=end_str)
return []
async def _fetch_from_url(self, url: str) -> Optional[List[Dict[str, Any]]]:
"""Fetch data from AEMET datos URL"""
try:
data = await self._fetch_url_directly(url)
if data and isinstance(data, list):
return data
else:
logger.warning("Expected list from datos URL", data_type=type(data))
return None
except Exception as e:
logger.error("Failed to fetch from datos URL", url=url, error=str(e))
return None
def _is_valid_initial_response(self, response: Any) -> bool:
"""Check if initial AEMET API response is valid"""
return (response and isinstance(response, dict) and
response.get("datos") and isinstance(response.get("datos"), str))
async def _get_synthetic_current_weather(self) -> Dict[str, Any]:
"""Get synthetic current weather data"""
return self.synthetic_generator.generate_current_weather()

View File

@@ -0,0 +1,10 @@
# ================================================================
# services/data/app/external/apis/__init__.py
# ================================================================
"""
External API clients module - Scalable architecture for multiple cities
"""
from .traffic import TrafficAPIClientFactory
__all__ = ["TrafficAPIClientFactory"]

View File

@@ -0,0 +1,350 @@
# ================================================================
# services/data/app/external/apis/madrid_traffic_client.py
# ================================================================
"""
Madrid traffic client - Orchestration layer only
Coordinates between HTTP client, data processor, and business logic components
"""
from datetime import datetime, timedelta, timezone
from typing import Dict, List, Any, Optional, Tuple
import structlog
from .traffic import BaseTrafficClient, SupportedCity
from ..base_client import BaseAPIClient
from ..clients.madrid_client import MadridTrafficAPIClient
from ..processors.madrid_processor import MadridTrafficDataProcessor
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
from ..models.madrid_models import TrafficRecord, CongestionLevel
class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
"""
Enhanced Madrid traffic client - Orchestration layer
Coordinates HTTP, processing, and business logic components
"""
# Madrid geographic bounds
MADRID_BOUNDS = {
'lat_min': 40.31, 'lat_max': 40.56,
'lon_min': -3.89, 'lon_max': -3.51
}
# Configuration constants
MAX_HISTORICAL_DAYS = 1095 # 3 years
MAX_CSV_PROCESSING_ROWS = 5000000
MEASUREMENT_POINTS_LIMIT = 20
def __init__(self):
BaseTrafficClient.__init__(self, SupportedCity.MADRID)
BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")
# Initialize components
self.api_client = MadridTrafficAPIClient()
self.processor = MadridTrafficDataProcessor()
self.analyzer = MadridTrafficAnalyzer()
self.logger = structlog.get_logger()
def supports_location(self, latitude: float, longitude: float) -> bool:
"""Check if location is within Madrid bounds"""
return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Get current traffic data with enhanced pedestrian inference"""
try:
if not self.supports_location(latitude, longitude):
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
return None
# Fetch XML data
xml_content = await self.api_client.fetch_current_traffic_xml()
if not xml_content:
self.logger.warning("No XML content received")
return None
# Parse XML data
traffic_points = self.processor.parse_traffic_xml(xml_content)
if not traffic_points:
self.logger.warning("No traffic points found in XML")
return None
# Find nearest traffic point
nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
if not nearest_point:
self.logger.warning("No nearby traffic points found")
return None
# Enhance with business logic
enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)
self.logger.info("Current traffic data retrieved",
point_id=nearest_point.get('measurement_point_id'),
distance=enhanced_data.get('distance_km', 0))
return enhanced_data
except Exception as e:
self.logger.error("Error getting current traffic", error=str(e))
return None
async def get_historical_traffic(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical traffic data with pedestrian enhancement"""
try:
if not self.supports_location(latitude, longitude):
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
return []
# Validate date range
if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
self.logger.warning("Date range too large, truncating",
requested_days=(end_date - start_date).days,
max_days=self.MAX_HISTORICAL_DAYS)
start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)
# Fetch measurement points registry
csv_content = await self.api_client.fetch_measurement_points_csv()
if not csv_content:
self.logger.error("Failed to fetch measurement points registry")
return []
# Parse measurement points
measurement_points = self.processor.parse_measurement_points_csv(csv_content)
if not measurement_points:
self.logger.error("No measurement points found")
return []
# Find nearest measurement points
nearest_points = self.analyzer.find_nearest_measurement_points(
measurement_points, latitude, longitude, num_points=3
)
if not nearest_points:
self.logger.warning("No nearby measurement points found")
return []
# Process historical data
historical_records = await self._fetch_historical_data_enhanced(
latitude, longitude, start_date, end_date, nearest_points
)
self.logger.info("Historical traffic data retrieved",
records_count=len(historical_records),
date_range=f"{start_date.date()} to {end_date.date()}")
return historical_records
except Exception as e:
self.logger.error("Error getting historical traffic", error=str(e))
return []
async def get_events(self, latitude: float, longitude: float,
radius_km: float = 5.0) -> List[Dict[str, Any]]:
"""Get traffic events (incidents, construction, etc.)"""
# Madrid doesn't provide separate events endpoint
# Return enhanced current traffic data as events
current_data = await self.get_current_traffic(latitude, longitude)
if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
return [{
'type': 'congestion',
'severity': current_data.get('congestion_level'),
'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
'location': {
'latitude': current_data.get('latitude'),
'longitude': current_data.get('longitude')
},
'timestamp': current_data.get('timestamp')
}]
return []
async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],
query_lat: float, query_lon: float) -> Dict[str, Any]:
"""Enhance traffic data with business logic and pedestrian inference"""
# Calculate distance
distance_km = self.analyzer.calculate_distance(
query_lat, query_lon,
traffic_point.get('latitude', 0),
traffic_point.get('longitude', 0)
)
# Classify road type
road_type = self.analyzer.classify_road_type(
traffic_point.get('measurement_point_name', '')
)
# Get congestion level
congestion_level = self.analyzer.get_congestion_level(
traffic_point.get('ocupacion', 0)
)
# Create traffic record for pedestrian inference
traffic_record = TrafficRecord(
date=datetime.now(timezone.utc),
traffic_volume=traffic_point.get('intensidad', 0),
occupation_percentage=int(traffic_point.get('ocupacion', 0)),
load_percentage=traffic_point.get('carga', 0),
average_speed=30, # Default speed
congestion_level=congestion_level,
pedestrian_count=0, # Will be calculated
measurement_point_id=traffic_point.get('measurement_point_id', ''),
measurement_point_name=traffic_point.get('measurement_point_name', ''),
road_type=road_type,
source='madrid_current_xml'
)
# Calculate pedestrian count
location_context = {
'latitude': traffic_point.get('latitude'),
'longitude': traffic_point.get('longitude'),
'measurement_point_name': traffic_point.get('measurement_point_name')
}
pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
traffic_record, location_context
)
# Build enhanced response
enhanced_data = {
'timestamp': datetime.now(timezone.utc),
'latitude': traffic_point.get('latitude'),
'longitude': traffic_point.get('longitude'),
'measurement_point_id': traffic_point.get('measurement_point_id'),
'measurement_point_name': traffic_point.get('measurement_point_name'),
'traffic_volume': traffic_point.get('intensidad', 0),
'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
'load_percentage': traffic_point.get('carga', 0),
'congestion_level': congestion_level,
'pedestrian_count': pedestrian_count,
'road_type': road_type,
'distance_km': distance_km,
'source': 'madrid_current_xml',
'city': 'madrid',
'inference_metadata': inference_metadata,
'raw_data': traffic_point
}
return enhanced_data
async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime,
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
"""Fetch and process historical traffic data"""
historical_records = []
try:
# Process by year and month to avoid memory issues
current_date = start_date.replace(day=1) # Start from beginning of month
while current_date <= end_date:
year = current_date.year
month = current_date.month
# Build historical URL
zip_url = self.api_client._build_historical_url(year, month)
self.logger.info("Processing historical ZIP file",
year=year, month=month, zip_url=zip_url)
# Fetch ZIP content
zip_content = await self.api_client.fetch_historical_zip(zip_url)
if not zip_content:
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
continue
# Process ZIP content with enhanced parsing
month_records = await self._process_historical_zip_enhanced(
zip_content, zip_url, latitude, longitude, nearest_points
)
# Filter by date range - ensure timezone consistency
# Make sure start_date and end_date have timezone info for comparison
start_tz = start_date if start_date.tzinfo else start_date.replace(tzinfo=timezone.utc)
end_tz = end_date if end_date.tzinfo else end_date.replace(tzinfo=timezone.utc)
filtered_records = []
for record in month_records:
record_date = record.get('date')
if not record_date:
continue
# Ensure record date has timezone info
if not record_date.tzinfo:
record_date = record_date.replace(tzinfo=timezone.utc)
# Now compare with consistent timezone info
if start_tz <= record_date <= end_tz:
filtered_records.append(record)
historical_records.extend(filtered_records)
self.logger.info("Month processing completed",
year=year, month=month,
month_records=len(month_records),
filtered_records=len(filtered_records),
total_records=len(historical_records))
# Move to next month
if current_date.month == 12:
current_date = current_date.replace(year=current_date.year + 1, month=1)
else:
current_date = current_date.replace(month=current_date.month + 1)
return historical_records
except Exception as e:
self.logger.error("Error fetching historical data", error=str(e))
return historical_records # Return partial results
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
latitude: float, longitude: float,
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
"""Process historical ZIP file with enhanced parsing"""
try:
import zipfile
import io
import csv
import gc
historical_records = []
nearest_ids = {p[0] for p in nearest_points}
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
for csv_filename in csv_files:
try:
# Read CSV content
with zip_file.open(csv_filename) as csv_file:
text_content = csv_file.read().decode('utf-8', errors='ignore')
# Process CSV in chunks using processor
csv_records = await self.processor.process_csv_content_chunked(
text_content, csv_filename, nearest_ids, nearest_points
)
historical_records.extend(csv_records)
# Force garbage collection
gc.collect()
except Exception as csv_error:
self.logger.warning("Error processing CSV file",
filename=csv_filename,
error=str(csv_error))
continue
self.logger.info("Historical ZIP processing completed",
zip_url=zip_url,
total_records=len(historical_records))
return historical_records
except Exception as e:
self.logger.error("Error processing historical ZIP file",
zip_url=zip_url, error=str(e))
return []

View File

@@ -0,0 +1,257 @@
# ================================================================
# services/data/app/external/apis/traffic.py
# ================================================================
"""
Traffic API abstraction layer for multiple cities
"""
import asyncio
from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum
from typing import Dict, List, Any, Optional, Tuple
import structlog
logger = structlog.get_logger()
class SupportedCity(Enum):
"""Supported cities for traffic data collection"""
MADRID = "madrid"
BARCELONA = "barcelona"
VALENCIA = "valencia"
class BaseTrafficClient(ABC):
"""
Abstract base class for city-specific traffic clients
Defines the contract that all traffic clients must implement
"""
def __init__(self, city: SupportedCity):
self.city = city
self.logger = structlog.get_logger().bind(city=city.value)
@abstractmethod
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Get current traffic data for location"""
pass
@abstractmethod
async def get_historical_traffic(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical traffic data"""
pass
@abstractmethod
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
"""Get traffic incidents and events"""
pass
@abstractmethod
def supports_location(self, latitude: float, longitude: float) -> bool:
"""Check if this client supports the given location"""
pass
class TrafficAPIClientFactory:
"""
Factory class to create appropriate traffic clients based on location
"""
# City geographical bounds
CITY_BOUNDS = {
SupportedCity.MADRID: {
'lat_min': 40.31, 'lat_max': 40.56,
'lon_min': -3.89, 'lon_max': -3.51
},
SupportedCity.BARCELONA: {
'lat_min': 41.32, 'lat_max': 41.47,
'lon_min': 2.05, 'lon_max': 2.25
},
SupportedCity.VALENCIA: {
'lat_min': 39.42, 'lat_max': 39.52,
'lon_min': -0.42, 'lon_max': -0.32
}
}
@classmethod
def get_client_for_location(cls, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
"""
Get appropriate traffic client for given location
Args:
latitude: Query location latitude
longitude: Query location longitude
Returns:
BaseTrafficClient instance or None if location not supported
"""
try:
# Check each city's bounds
for city, bounds in cls.CITY_BOUNDS.items():
if (bounds['lat_min'] <= latitude <= bounds['lat_max'] and
bounds['lon_min'] <= longitude <= bounds['lon_max']):
logger.info("Location matched to city",
city=city.value, lat=latitude, lon=longitude)
return cls._create_client(city)
# If no specific city matches, try to find closest supported city
closest_city = cls._find_closest_city(latitude, longitude)
if closest_city:
logger.info("Using closest city for location",
closest_city=closest_city.value, lat=latitude, lon=longitude)
return cls._create_client(closest_city)
logger.warning("No traffic client available for location",
lat=latitude, lon=longitude)
return None
except Exception as e:
logger.error("Error getting traffic client for location",
lat=latitude, lon=longitude, error=str(e))
return None
@classmethod
def _create_client(cls, city: SupportedCity) -> BaseTrafficClient:
"""Create traffic client for specific city"""
if city == SupportedCity.MADRID:
from .madrid_traffic_client import MadridTrafficClient
return MadridTrafficClient()
elif city == SupportedCity.BARCELONA:
# Future implementation
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
elif city == SupportedCity.VALENCIA:
# Future implementation
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
else:
raise ValueError(f"Unsupported city: {city}")
@classmethod
def _find_closest_city(cls, latitude: float, longitude: float) -> Optional[SupportedCity]:
"""Find closest supported city to given coordinates"""
import math
def distance(lat1, lon1, lat2, lon2):
"""Calculate distance between two coordinates"""
R = 6371 # Earth's radius in km
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat/2) * math.sin(dlat/2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon/2) * math.sin(dlon/2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return R * c
min_distance = float('inf')
closest_city = None
# City centers for distance calculation
city_centers = {
SupportedCity.MADRID: (40.4168, -3.7038),
SupportedCity.BARCELONA: (41.3851, 2.1734),
SupportedCity.VALENCIA: (39.4699, -0.3763)
}
for city, (city_lat, city_lon) in city_centers.items():
dist = distance(latitude, longitude, city_lat, city_lon)
if dist < min_distance and dist < 100: # Within 100km
min_distance = dist
closest_city = city
return closest_city
@classmethod
def get_supported_cities(cls) -> List[Dict[str, Any]]:
"""Get list of supported cities with their bounds"""
cities = []
for city, bounds in cls.CITY_BOUNDS.items():
cities.append({
"city": city.value,
"bounds": bounds,
"status": "active" if city == SupportedCity.MADRID else "planned"
})
return cities
class UniversalTrafficClient:
"""
Universal traffic client that delegates to appropriate city-specific clients
This is the main interface that external services should use
"""
def __init__(self):
self.factory = TrafficAPIClientFactory()
self.client_cache = {} # Cache clients for performance
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Get current traffic data for any supported location"""
try:
client = self._get_client_for_location(latitude, longitude)
if client:
return await client.get_current_traffic(latitude, longitude)
else:
logger.warning("No traffic data available for location",
lat=latitude, lon=longitude)
return None
except Exception as e:
logger.error("Error getting current traffic",
lat=latitude, lon=longitude, error=str(e))
return None
async def get_historical_traffic(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical traffic data for any supported location"""
try:
client = self._get_client_for_location(latitude, longitude)
if client:
return await client.get_historical_traffic(latitude, longitude, start_date, end_date)
else:
logger.warning("No historical traffic data available for location",
lat=latitude, lon=longitude)
return []
except Exception as e:
logger.error("Error getting historical traffic",
lat=latitude, lon=longitude, error=str(e))
return []
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
"""Get traffic events for any supported location"""
try:
client = self._get_client_for_location(latitude, longitude)
if client:
return await client.get_events(latitude, longitude, radius_km)
else:
return []
except Exception as e:
logger.error("Error getting traffic events",
lat=latitude, lon=longitude, error=str(e))
return []
def _get_client_for_location(self, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
"""Get cached or create new client for location"""
cache_key = f"{latitude:.4f},{longitude:.4f}"
if cache_key not in self.client_cache:
client = self.factory.get_client_for_location(latitude, longitude)
self.client_cache[cache_key] = client
return self.client_cache[cache_key]
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
"""Get information about traffic data availability for location"""
client = self._get_client_for_location(latitude, longitude)
if client:
return {
"supported": True,
"city": client.city.value,
"features": ["current_traffic", "historical_traffic", "events"]
}
else:
return {
"supported": False,
"city": None,
"features": [],
"message": "No traffic data available for this location"
}

View File

@@ -0,0 +1,139 @@
# ================================================================
# services/data/app/external/base_client.py
# ================================================================
"""Base HTTP client for external APIs - Enhanced for AEMET"""
import httpx
from typing import Dict, Any, Optional
import structlog
from datetime import datetime
logger = structlog.get_logger()
class BaseAPIClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
self.base_url = base_url
self.api_key = api_key
self.timeout = httpx.Timeout(30.0)
async def _get(self, endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Make GET request"""
try:
url = f"{self.base_url}{endpoint}"
# Add API key to params for AEMET (not headers)
request_params = params or {}
if self.api_key:
request_params["api_key"] = self.api_key
# Add headers if provided
request_headers = headers or {}
logger.debug("Making API request", url=url, params=request_params)
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, params=request_params, headers=request_headers)
response.raise_for_status()
# Log response for debugging
response_data = response.json()
logger.debug("API response received",
status_code=response.status_code,
response_keys=list(response_data.keys()) if isinstance(response_data, dict) else "non-dict")
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error", status_code=e.response.status_code, url=url, response_text=e.response.text[:200])
return None
except httpx.RequestError as e:
logger.error("Request error", error=str(e), url=url)
return None
except Exception as e:
logger.error("Unexpected error", error=str(e), url=url)
return None
async def _fetch_url_directly(self, url: str, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Fetch data directly from a full URL (for AEMET datos URLs)"""
try:
request_headers = headers or {}
logger.debug("Making direct URL request", url=url)
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
# Handle encoding issues common with Spanish data sources
try:
response_data = response.json()
except UnicodeDecodeError:
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
# Try common Spanish encodings
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
try:
text_content = response.content.decode(encoding)
import json
response_data = json.loads(text_content)
logger.info("Successfully decoded with encoding", encoding=encoding)
break
except (UnicodeDecodeError, json.JSONDecodeError):
continue
else:
logger.error("Failed to decode response with any encoding", url=url)
return None
logger.debug("Direct URL response received",
status_code=response.status_code,
data_type=type(response_data),
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error in direct fetch", status_code=e.response.status_code, url=url)
return None
except httpx.RequestError as e:
logger.error("Request error in direct fetch", error=str(e), url=url)
return None
except Exception as e:
logger.error("Unexpected error in direct fetch", error=str(e), url=url)
return None
async def _post(self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Make POST request"""
try:
url = f"{self.base_url}{endpoint}"
request_headers = headers or {}
if self.api_key:
request_headers["Authorization"] = f"Bearer {self.api_key}"
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(url, json=data, headers=request_headers)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
logger.error("HTTP error", status_code=e.response.status_code, url=url)
return None
except httpx.RequestError as e:
logger.error("Request error", error=str(e), url=url)
return None
except Exception as e:
logger.error("Unexpected error", error=str(e), url=url)
return None
async def get_direct(self, url: str, headers: Optional[Dict] = None, timeout: Optional[int] = None) -> httpx.Response:
"""
Public GET method for direct HTTP requests
Returns the raw httpx Response object for maximum flexibility
"""
request_headers = headers or {}
request_timeout = httpx.Timeout(timeout if timeout else 30.0)
async with httpx.AsyncClient(timeout=request_timeout, follow_redirects=True) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
return response

View File

@@ -0,0 +1,12 @@
# ================================================================
# services/data/app/external/clients/__init__.py
# ================================================================
"""
HTTP clients package
"""
from .madrid_client import MadridTrafficAPIClient
__all__ = [
'MadridTrafficAPIClient'
]

View File

@@ -0,0 +1,159 @@
# ================================================================
# services/data/app/external/clients/madrid_client.py
# ================================================================
"""
Pure HTTP client for Madrid traffic APIs
Handles only HTTP communication and response decoding
"""
import httpx
import structlog
from datetime import datetime
from typing import Optional, Dict, Any
from ..base_client import BaseAPIClient
class MadridTrafficAPIClient(BaseAPIClient):
"""Pure HTTP client for Madrid traffic APIs"""
TRAFFIC_ENDPOINT = "https://informo.madrid.es/informo/tmadrid/pm.xml"
MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv"
def __init__(self):
super().__init__(base_url="https://datos.madrid.es")
self.logger = structlog.get_logger()
def _decode_response_content(self, response) -> Optional[str]:
"""Decode response content with multiple encoding attempts"""
try:
return response.text
except UnicodeDecodeError:
# Try manual encoding for Spanish content
for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']:
try:
content = response.content.decode(encoding)
if content and len(content) > 100:
self.logger.debug("Successfully decoded with encoding", encoding=encoding)
return content
except UnicodeDecodeError:
continue
return None
def _build_historical_url(self, year: int, month: int) -> str:
"""Build historical ZIP URL for given year and month"""
# Madrid historical data URL pattern
base_url = "https://datos.madrid.es/egob/catalogo/208627"
# URL numbering pattern (this may need adjustment based on actual URLs)
# Note: Historical data is only available for past periods, not current/future
if year == 2023:
url_number = 116 + (month - 1) # 116-127 for 2023
elif year == 2024:
url_number = 128 + (month - 1) # 128-139 for 2024
elif year == 2025:
# For 2025, use the continuing numbering from 2024
url_number = 140 + (month - 1) # Starting from 140 for January 2025
else:
url_number = 116 # Fallback to 2023 data
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
"""Fetch current traffic XML data"""
endpoint = endpoint or self.TRAFFIC_ENDPOINT
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/xml,text/xml,*/*',
'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'no-cache',
'Referer': 'https://datos.madrid.es/'
}
response = await self.get_direct(endpoint, headers=headers, timeout=30)
if not response or response.status_code != 200:
self.logger.warning("Failed to fetch XML data",
endpoint=endpoint,
status=response.status_code if response else None)
return None
# Get XML content with encoding handling
xml_content = self._decode_response_content(response)
if not xml_content:
self.logger.debug("No XML content received", endpoint=endpoint)
return None
self.logger.debug("Madrid XML content fetched",
length=len(xml_content),
endpoint=endpoint)
return xml_content
except Exception as e:
self.logger.error("Error fetching traffic XML data",
endpoint=endpoint,
error=str(e))
return None
async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]:
"""Fetch measurement points CSV data"""
url = url or self.MEASUREMENT_POINTS_URL
try:
async with httpx.AsyncClient(
timeout=30.0,
headers={
'User-Agent': 'MadridTrafficClient/2.0',
'Accept': 'text/csv,application/csv,*/*'
},
follow_redirects=True
) as client:
self.logger.debug("Fetching measurement points registry", url=url)
response = await client.get(url)
if response.status_code == 200:
return response.text
else:
self.logger.warning("Failed to fetch measurement points",
status=response.status_code, url=url)
return None
except Exception as e:
self.logger.error("Error fetching measurement points registry",
url=url, error=str(e))
return None
async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]:
"""Fetch historical traffic ZIP file"""
try:
async with httpx.AsyncClient(
timeout=120.0, # Longer timeout for large files
headers={
'User-Agent': 'MadridTrafficClient/2.0',
'Accept': 'application/zip,*/*'
},
follow_redirects=True
) as client:
self.logger.debug("Fetching historical ZIP", url=zip_url)
response = await client.get(zip_url)
if response.status_code == 200:
self.logger.debug("Historical ZIP fetched",
url=zip_url,
size=len(response.content))
return response.content
else:
self.logger.warning("Failed to fetch historical ZIP",
status=response.status_code, url=zip_url)
return None
except Exception as e:
self.logger.error("Error fetching historical ZIP",
url=zip_url, error=str(e))
return None

View File

@@ -0,0 +1,20 @@
# ================================================================
# services/data/app/external/models/__init__.py
# ================================================================
"""
Madrid traffic models package
"""
from .madrid_models import (
TrafficServiceLevel,
CongestionLevel,
MeasurementPoint,
TrafficRecord
)
__all__ = [
'TrafficServiceLevel',
'CongestionLevel',
'MeasurementPoint',
'TrafficRecord'
]

View File

@@ -0,0 +1,66 @@
# ================================================================
# services/data/app/external/models/madrid_models.py
# ================================================================
"""
Data structures, enums, and dataclasses for Madrid traffic system
"""
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Optional
class TrafficServiceLevel(Enum):
"""Madrid traffic service levels"""
FLUID = 0
DENSE = 1
CONGESTED = 2
BLOCKED = 3
class CongestionLevel(Enum):
"""Standardized congestion levels"""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
BLOCKED = "blocked"
@dataclass
class MeasurementPoint:
"""Madrid measurement point data structure"""
id: str
latitude: float
longitude: float
distance: float
name: str
type: str
@dataclass
class TrafficRecord:
"""Standardized traffic record with pedestrian inference"""
date: datetime
traffic_volume: int
occupation_percentage: int
load_percentage: int
average_speed: int
congestion_level: str
pedestrian_count: int
measurement_point_id: str
measurement_point_name: str
road_type: str
source: str
district: Optional[str] = None
# Madrid-specific data
intensidad_raw: Optional[int] = None
ocupacion_raw: Optional[int] = None
carga_raw: Optional[int] = None
vmed_raw: Optional[int] = None
# Pedestrian inference metadata
pedestrian_multiplier: Optional[float] = None
time_pattern_factor: Optional[float] = None
district_factor: Optional[float] = None

View File

@@ -0,0 +1,14 @@
# ================================================================
# services/data/app/external/processors/__init__.py
# ================================================================
"""
Data processors package
"""
from .madrid_processor import MadridTrafficDataProcessor
from .madrid_business_logic import MadridTrafficAnalyzer
__all__ = [
'MadridTrafficDataProcessor',
'MadridTrafficAnalyzer'
]

View File

@@ -0,0 +1,346 @@
# ================================================================
# services/data/app/external/processors/madrid_business_logic.py
# ================================================================
"""
Business rules, inference, and domain logic for Madrid traffic data
Handles pedestrian inference, district mapping, road classification, and validation
"""
import math
import re
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
import structlog
from ..models.madrid_models import TrafficRecord, CongestionLevel
class MadridTrafficAnalyzer:
"""Handles business logic for Madrid traffic analysis"""
# Madrid district characteristics for pedestrian patterns
DISTRICT_MULTIPLIERS = {
'Centro': 2.5, # Historic center, high pedestrian activity
'Salamanca': 2.0, # Shopping area, high foot traffic
'Chamberí': 1.8, # Business district
'Retiro': 2.2, # Near park, high leisure activity
'Chamartín': 1.6, # Business/residential
'Tetuán': 1.4, # Mixed residential/commercial
'Fuencarral': 1.3, # Residential with commercial areas
'Moncloa': 1.7, # University area
'Latina': 1.5, # Residential area
'Carabanchel': 1.2, # Residential periphery
'Usera': 1.1, # Industrial/residential
'Villaverde': 1.0, # Industrial area
'Villa de Vallecas': 1.0, # Peripheral residential
'Vicálvaro': 0.9, # Peripheral
'San Blas': 1.1, # Residential
'Barajas': 0.8, # Airport area, low pedestrian activity
'Hortaleza': 1.2, # Mixed area
'Ciudad Lineal': 1.3, # Linear development
'Puente de Vallecas': 1.2, # Working class area
'Moratalaz': 1.1, # Residential
'Arganzuela': 1.6, # Near center, growing area
}
# Time-based patterns (hour of day)
TIME_PATTERNS = {
'morning_peak': {'hours': [7, 8, 9], 'multiplier': 2.0},
'lunch_peak': {'hours': [12, 13, 14], 'multiplier': 2.5},
'evening_peak': {'hours': [18, 19, 20], 'multiplier': 2.2},
'afternoon': {'hours': [15, 16, 17], 'multiplier': 1.8},
'late_evening': {'hours': [21, 22], 'multiplier': 1.5},
'night': {'hours': [23, 0, 1, 2, 3, 4, 5, 6], 'multiplier': 0.3},
'morning': {'hours': [10, 11], 'multiplier': 1.4}
}
# Road type specific patterns
ROAD_TYPE_BASE = {
'URB': 250, # Urban streets - high pedestrian activity
'M30': 50, # Ring road - minimal pedestrians
'C30': 75, # Secondary ring - some pedestrian access
'A': 25, # Highways - very low pedestrians
'R': 40 # Radial roads - low to moderate
}
# Weather impact on pedestrian activity
WEATHER_IMPACT = {
'rain': 0.6, # 40% reduction in rain
'hot_weather': 0.8, # 20% reduction when very hot
'cold_weather': 0.7, # 30% reduction when very cold
'normal': 1.0 # No impact
}
def __init__(self):
self.logger = structlog.get_logger()
def calculate_pedestrian_flow(
self,
traffic_record: TrafficRecord,
location_context: Optional[Dict[str, Any]] = None
) -> Tuple[int, Dict[str, float]]:
"""
Calculate pedestrian flow estimate with detailed metadata
Returns:
Tuple of (pedestrian_count, inference_metadata)
"""
# Base calculation from road type
road_type = traffic_record.road_type or 'URB'
base_pedestrians = self.ROAD_TYPE_BASE.get(road_type, 200)
# Time pattern adjustment
hour = traffic_record.date.hour
time_factor = self._get_time_pattern_factor(hour)
# District adjustment (if available)
district_factor = 1.0
district = traffic_record.district or self.infer_district_from_location(location_context)
if district:
district_factor = self.DISTRICT_MULTIPLIERS.get(district, 1.0)
# Traffic correlation adjustment
traffic_factor = self._calculate_traffic_correlation(traffic_record)
# Weather adjustment (if data available)
weather_factor = self._get_weather_factor(traffic_record.date, location_context)
# Weekend adjustment
weekend_factor = self._get_weekend_factor(traffic_record.date)
# Combined calculation
pedestrian_count = int(
base_pedestrians *
time_factor *
district_factor *
traffic_factor *
weather_factor *
weekend_factor
)
# Ensure reasonable bounds
pedestrian_count = max(10, min(2000, pedestrian_count))
# Metadata for model training
inference_metadata = {
'base_pedestrians': base_pedestrians,
'time_factor': time_factor,
'district_factor': district_factor,
'traffic_factor': traffic_factor,
'weather_factor': weather_factor,
'weekend_factor': weekend_factor,
'inferred_district': district,
'hour': hour,
'road_type': road_type
}
return pedestrian_count, inference_metadata
def _get_time_pattern_factor(self, hour: int) -> float:
"""Get time-based pedestrian activity multiplier"""
for pattern, config in self.TIME_PATTERNS.items():
if hour in config['hours']:
return config['multiplier']
return 1.0 # Default multiplier
def _calculate_traffic_correlation(self, traffic_record: TrafficRecord) -> float:
"""
Calculate pedestrian correlation with traffic patterns
Higher traffic in urban areas often correlates with more pedestrians
"""
if traffic_record.road_type == 'URB':
# Urban areas: moderate traffic indicates commercial activity
if 30 <= traffic_record.load_percentage <= 70:
return 1.3 # Sweet spot for pedestrian activity
elif traffic_record.load_percentage > 70:
return 0.9 # Too congested, pedestrians avoid
else:
return 1.0 # Normal correlation
else:
# Highway/ring roads: more traffic = fewer pedestrians
if traffic_record.load_percentage > 60:
return 0.5
else:
return 0.8
def _get_weather_factor(self, date: datetime, location_context: Optional[Dict] = None) -> float:
"""Estimate weather impact on pedestrian activity"""
# Simplified weather inference based on season and typical Madrid patterns
month = date.month
# Madrid seasonal patterns
if month in [12, 1, 2]: # Winter - cold weather impact
return self.WEATHER_IMPACT['cold_weather']
elif month in [7, 8]: # Summer - hot weather impact
return self.WEATHER_IMPACT['hot_weather']
elif month in [10, 11, 3, 4]: # Rainy seasons - moderate impact
return 0.85
else: # Spring/early summer - optimal weather
return 1.1
def _get_weekend_factor(self, date: datetime) -> float:
"""Weekend vs weekday pedestrian patterns"""
weekday = date.weekday()
hour = date.hour
if weekday >= 5: # Weekend
if 11 <= hour <= 16: # Weekend shopping/leisure hours
return 1.4
elif 20 <= hour <= 23: # Weekend evening activity
return 1.3
else:
return 0.9
else: # Weekday
return 1.0
def infer_district_from_location(self, location_context: Optional[Dict] = None) -> Optional[str]:
"""
Infer Madrid district from location context or coordinates
"""
if not location_context:
return None
lat = location_context.get('latitude')
lon = location_context.get('longitude')
if not (lat and lon):
return None
# Madrid district boundaries (simplified boundaries for inference)
districts = {
# Central districts
'Centro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.720, 'lon_max': -3.690},
'Arganzuela': {'lat_min': 40.385, 'lat_max': 40.410, 'lon_min': -3.720, 'lon_max': -3.680},
'Retiro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.690, 'lon_max': -3.660},
'Salamanca': {'lat_min': 40.420, 'lat_max': 40.445, 'lon_min': -3.690, 'lon_max': -3.660},
'Chamartín': {'lat_min': 40.445, 'lat_max': 40.480, 'lon_min': -3.690, 'lon_max': -3.660},
'Tetuán': {'lat_min': 40.445, 'lat_max': 40.470, 'lon_min': -3.720, 'lon_max': -3.690},
'Chamberí': {'lat_min': 40.425, 'lat_max': 40.450, 'lon_min': -3.720, 'lon_max': -3.690},
'Fuencarral-El Pardo': {'lat_min': 40.470, 'lat_max': 40.540, 'lon_min': -3.750, 'lon_max': -3.650},
'Moncloa-Aravaca': {'lat_min': 40.430, 'lat_max': 40.480, 'lon_min': -3.750, 'lon_max': -3.720},
'Latina': {'lat_min': 40.380, 'lat_max': 40.420, 'lon_min': -3.750, 'lon_max': -3.720},
'Carabanchel': {'lat_min': 40.350, 'lat_max': 40.390, 'lon_min': -3.750, 'lon_max': -3.720},
'Usera': {'lat_min': 40.350, 'lat_max': 40.385, 'lon_min': -3.720, 'lon_max': -3.690},
'Puente de Vallecas': {'lat_min': 40.370, 'lat_max': 40.410, 'lon_min': -3.680, 'lon_max': -3.640},
'Moratalaz': {'lat_min': 40.400, 'lat_max': 40.430, 'lon_min': -3.650, 'lon_max': -3.620},
'Ciudad Lineal': {'lat_min': 40.430, 'lat_max': 40.460, 'lon_min': -3.650, 'lon_max': -3.620},
'Hortaleza': {'lat_min': 40.460, 'lat_max': 40.500, 'lon_min': -3.650, 'lon_max': -3.620},
'Villaverde': {'lat_min': 40.320, 'lat_max': 40.360, 'lon_min': -3.720, 'lon_max': -3.680},
}
# Find matching district
for district_name, bounds in districts.items():
if (bounds['lat_min'] <= lat <= bounds['lat_max'] and
bounds['lon_min'] <= lon <= bounds['lon_max']):
return district_name
# Default for coordinates in Madrid but not matching specific districts
if 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5:
return 'Other Madrid'
return None
def classify_road_type(self, measurement_point_name: str) -> str:
"""Classify road type based on measurement point name"""
if not measurement_point_name:
return 'URB' # Default to urban
name_upper = measurement_point_name.upper()
# Highway patterns
if any(pattern in name_upper for pattern in ['A-', 'AP-', 'AUTOPISTA', 'AUTOVIA']):
return 'A'
# M-30 Ring road
if 'M-30' in name_upper or 'M30' in name_upper:
return 'M30'
# Other M roads (ring roads)
if re.search(r'M-[0-9]', name_upper) or re.search(r'M[0-9]', name_upper):
return 'C30'
# Radial roads (R-1, R-2, etc.)
if re.search(r'R-[0-9]', name_upper) or 'RADIAL' in name_upper:
return 'R'
# Default to urban street
return 'URB'
def validate_madrid_coordinates(self, lat: float, lon: float) -> bool:
"""Validate coordinates are within Madrid bounds"""
# Madrid metropolitan area bounds
return 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5
def get_congestion_level(self, occupation_pct: float) -> str:
"""Convert occupation percentage to congestion level"""
if occupation_pct >= 80:
return CongestionLevel.BLOCKED.value
elif occupation_pct >= 50:
return CongestionLevel.HIGH.value
elif occupation_pct >= 25:
return CongestionLevel.MEDIUM.value
else:
return CongestionLevel.LOW.value
def calculate_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance between two points in kilometers using Haversine formula"""
R = 6371 # Earth's radius in kilometers
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat/2) * math.sin(dlat/2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon/2) * math.sin(dlon/2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return R * c
def find_nearest_traffic_point(self, traffic_points: List[Dict[str, Any]],
latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Find the nearest traffic point to given coordinates"""
if not traffic_points:
return None
min_distance = float('inf')
nearest_point = None
for point in traffic_points:
point_lat = point.get('latitude')
point_lon = point.get('longitude')
if point_lat and point_lon:
distance = self.calculate_distance(latitude, longitude, point_lat, point_lon)
if distance < min_distance:
min_distance = distance
nearest_point = point
return nearest_point
def find_nearest_measurement_points(self, measurement_points: Dict[str, Dict[str, Any]],
latitude: float, longitude: float,
num_points: int = 3, max_distance_km: Optional[float] = 5.0) -> List[Tuple[str, Dict[str, Any], float]]:
"""Find nearest measurement points for historical data"""
distances = []
for point_id, point_data in measurement_points.items():
point_lat = point_data.get('latitude')
point_lon = point_data.get('longitude')
if point_lat and point_lon:
distance_km = self.calculate_distance(latitude, longitude, point_lat, point_lon)
distances.append((point_id, point_data, distance_km))
# Sort by distance and take nearest points
distances.sort(key=lambda x: x[2])
# Apply distance filter if specified
if max_distance_km is not None:
distances = [p for p in distances if p[2] <= max_distance_km]
nearest = distances[:num_points]
self.logger.info("Found nearest measurement points",
count=len(nearest),
nearest_distance_km=nearest[0][2] if nearest else None)
return nearest

View File

@@ -0,0 +1,478 @@
# ================================================================
# services/data/app/external/processors/madrid_processor.py
# ================================================================
"""
Data transformation and parsing for Madrid traffic data
Handles XML parsing, CSV processing, coordinate conversion, and data quality scoring
"""
import csv
import io
import math
import re
import xml.etree.ElementTree as ET
import zipfile
from datetime import datetime, timezone
from typing import Dict, List, Any, Optional, Tuple
import structlog
import pyproj
from ..models.madrid_models import TrafficRecord, MeasurementPoint, CongestionLevel
class MadridTrafficDataProcessor:
"""Handles all data transformation and parsing for Madrid traffic data"""
def __init__(self):
self.logger = structlog.get_logger()
# UTM Zone 30N (Madrid's coordinate system)
self.utm_proj = pyproj.Proj(proj='utm', zone=30, ellps='WGS84', datum='WGS84')
self.wgs84_proj = pyproj.Proj(proj='latlong', ellps='WGS84', datum='WGS84')
def safe_int(self, value: str) -> int:
"""Safely convert string to int"""
try:
return int(float(value.replace(',', '.')))
except (ValueError, TypeError):
return 0
def _safe_float(self, value: str) -> float:
"""Safely convert string to float"""
try:
return float(value.replace(',', '.'))
except (ValueError, TypeError):
return 0.0
def clean_madrid_xml(self, xml_content: str) -> str:
"""Clean and prepare Madrid XML content for parsing"""
if not xml_content:
return ""
# Remove BOM and extra whitespace
cleaned = xml_content.strip()
if cleaned.startswith('\ufeff'):
cleaned = cleaned[1:]
# Fix common XML issues
cleaned = re.sub(r'&(?!amp;|lt;|gt;|quot;|apos;)', '&amp;', cleaned)
# Ensure proper encoding declaration
if not cleaned.startswith('<?xml'):
cleaned = '<?xml version="1.0" encoding="UTF-8"?>\n' + cleaned
return cleaned
def convert_utm_to_latlon(self, utm_x: str, utm_y: str) -> Tuple[Optional[float], Optional[float]]:
"""Convert UTM coordinates to latitude/longitude"""
try:
utm_x_float = float(utm_x.replace(',', '.'))
utm_y_float = float(utm_y.replace(',', '.'))
# Convert from UTM Zone 30N to WGS84
longitude, latitude = pyproj.transform(self.utm_proj, self.wgs84_proj, utm_x_float, utm_y_float)
# Validate coordinates are in Madrid area
if 40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5:
return latitude, longitude
else:
self.logger.debug("Coordinates outside Madrid bounds",
lat=latitude, lon=longitude, utm_x=utm_x, utm_y=utm_y)
return None, None
except Exception as e:
self.logger.debug("UTM conversion error",
utm_x=utm_x, utm_y=utm_y, error=str(e))
return None, None
def parse_traffic_xml(self, xml_content: str) -> List[Dict[str, Any]]:
"""Parse Madrid traffic XML data"""
traffic_points = []
try:
cleaned_xml = self.clean_madrid_xml(xml_content)
root = ET.fromstring(cleaned_xml)
self.logger.debug("Madrid XML structure", root_tag=root.tag, children_count=len(list(root)))
if root.tag == 'pms':
pm_elements = root.findall('pm')
self.logger.debug("Found PM elements", count=len(pm_elements))
for pm in pm_elements:
try:
traffic_point = self._extract_madrid_pm_element(pm)
if self._is_valid_traffic_point(traffic_point):
traffic_points.append(traffic_point)
# Log first few points for debugging
if len(traffic_points) <= 3:
self.logger.debug("Sample traffic point",
id=traffic_point['idelem'],
lat=traffic_point['latitude'],
lon=traffic_point['longitude'],
intensity=traffic_point.get('intensidad'))
except Exception as e:
self.logger.debug("Error parsing PM element", error=str(e))
continue
else:
self.logger.warning("Unexpected XML root tag", root_tag=root.tag)
self.logger.debug("Madrid traffic XML parsing completed", valid_points=len(traffic_points))
return traffic_points
except ET.ParseError as e:
self.logger.warning("Failed to parse Madrid XML", error=str(e))
return self._extract_traffic_data_regex(xml_content)
except Exception as e:
self.logger.error("Error in Madrid traffic XML parsing", error=str(e))
return []
def _extract_madrid_pm_element(self, pm_element) -> Dict[str, Any]:
"""Extract traffic data from Madrid <pm> element with coordinate conversion"""
try:
point_data = {}
utm_x = utm_y = None
# Extract all child elements
for child in pm_element:
tag, text = child.tag, child.text.strip() if child.text else ''
if tag == 'idelem':
point_data['idelem'] = text
elif tag == 'descripcion':
point_data['descripcion'] = text
elif tag == 'intensidad':
point_data['intensidad'] = self.safe_int(text)
elif tag == 'ocupacion':
point_data['ocupacion'] = self._safe_float(text)
elif tag == 'carga':
point_data['carga'] = self.safe_int(text)
elif tag == 'nivelServicio':
point_data['nivelServicio'] = self.safe_int(text)
elif tag == 'st_x': # UTM X coordinate
utm_x = text
point_data['utm_x'] = text
elif tag == 'st_y': # UTM Y coordinate
utm_y = text
point_data['utm_y'] = text
elif tag == 'error':
point_data['error'] = text
elif tag in ['subarea', 'accesoAsociado', 'intensidadSat']:
point_data[tag] = text
# Convert coordinates
if utm_x and utm_y:
latitude, longitude = self.convert_utm_to_latlon(utm_x, utm_y)
if latitude and longitude:
point_data.update({
'latitude': latitude,
'longitude': longitude,
'measurement_point_id': point_data.get('idelem'),
'measurement_point_name': point_data.get('descripcion'),
'timestamp': datetime.now(timezone.utc),
'source': 'madrid_opendata_xml'
})
return point_data
else:
self.logger.debug("Invalid coordinates after conversion",
idelem=point_data.get('idelem'), utm_x=utm_x, utm_y=utm_y)
return {}
else:
self.logger.debug("Missing UTM coordinates", idelem=point_data.get('idelem'))
return {}
except Exception as e:
self.logger.debug("Error extracting PM element", error=str(e))
return {}
def _is_valid_traffic_point(self, traffic_point: Dict[str, Any]) -> bool:
"""Validate traffic point data"""
required_fields = ['idelem', 'latitude', 'longitude']
return all(field in traffic_point and traffic_point[field] for field in required_fields)
def _extract_traffic_data_regex(self, xml_content: str) -> List[Dict[str, Any]]:
"""Fallback regex-based extraction if XML parsing fails"""
traffic_points = []
try:
# Pattern to match PM elements
pm_pattern = r'<pm>(.*?)</pm>'
pm_matches = re.findall(pm_pattern, xml_content, re.DOTALL)
for pm_content in pm_matches:
traffic_point = {}
# Extract key fields
patterns = {
'idelem': r'<idelem>(.*?)</idelem>',
'descripcion': r'<descripcion>(.*?)</descripcion>',
'intensidad': r'<intensidad>(.*?)</intensidad>',
'ocupacion': r'<ocupacion>(.*?)</ocupacion>',
'st_x': r'<st_x>(.*?)</st_x>',
'st_y': r'<st_y>(.*?)</st_y>'
}
for field, pattern in patterns.items():
match = re.search(pattern, pm_content)
if match:
traffic_point[field] = match.group(1).strip()
# Convert coordinates
if 'st_x' in traffic_point and 'st_y' in traffic_point:
latitude, longitude = self.convert_utm_to_latlon(
traffic_point['st_x'], traffic_point['st_y']
)
if latitude and longitude:
traffic_point.update({
'latitude': latitude,
'longitude': longitude,
'intensidad': self.safe_int(traffic_point.get('intensidad', '0')),
'ocupacion': self._safe_float(traffic_point.get('ocupacion', '0')),
'measurement_point_id': traffic_point.get('idelem'),
'measurement_point_name': traffic_point.get('descripcion'),
'timestamp': datetime.now(timezone.utc),
'source': 'madrid_opendata_xml_regex'
})
traffic_points.append(traffic_point)
self.logger.debug("Regex extraction completed", points=len(traffic_points))
return traffic_points
except Exception as e:
self.logger.error("Error in regex extraction", error=str(e))
return []
def parse_measurement_points_csv(self, csv_content: str) -> Dict[str, Dict[str, Any]]:
"""Parse measurement points CSV into lookup dictionary"""
measurement_points = {}
try:
# Parse CSV with semicolon delimiter
csv_reader = csv.DictReader(io.StringIO(csv_content), delimiter=';')
processed_count = 0
for row in csv_reader:
try:
# Extract point ID and coordinates
point_id = row.get('id', '').strip()
if not point_id:
continue
processed_count += 1
# Try different coordinate field names
lat_str = ''
lon_str = ''
# Common coordinate field patterns
lat_fields = ['lat', 'latitude', 'latitud', 'y', 'utm_y']
lon_fields = ['lon', 'lng', 'longitude', 'longitud', 'x', 'utm_x']
for field in lat_fields:
if field in row and row[field].strip():
lat_str = row[field].strip()
break
for field in lon_fields:
if field in row and row[field].strip():
lon_str = row[field].strip()
break
if lat_str and lon_str:
try:
# Try direct lat/lon first
latitude = self._safe_float(lat_str)
longitude = self._safe_float(lon_str)
# If values look like UTM coordinates, convert them
if latitude > 1000 or longitude > 1000:
latitude, longitude = self.convert_utm_to_latlon(lon_str, lat_str)
if not latitude or not longitude:
continue
# Validate Madrid area
if not (40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5):
continue
measurement_points[point_id] = {
'id': point_id,
'latitude': latitude,
'longitude': longitude,
'name': row.get('nombre', row.get('descripcion', f"Point {point_id}")),
'type': row.get('tipo', 'traffic'),
'raw_data': dict(row) # Keep original data
}
except Exception as e:
self.logger.debug("Error processing point coordinates",
point_id=point_id, error=str(e))
continue
except Exception as e:
self.logger.debug("Error processing CSV row", error=str(e))
continue
self.logger.info("Parsed measurement points registry",
total_points=len(measurement_points))
return measurement_points
except Exception as e:
self.logger.error("Error parsing measurement points CSV", error=str(e))
return {}
def calculate_data_quality_score(self, row: Dict[str, str]) -> float:
"""Calculate data quality score for a traffic record"""
try:
score = 1.0
# Check for missing or invalid values
intensidad = row.get('intensidad', '').strip()
if not intensidad or intensidad in ['N', '', '0']:
score *= 0.7
ocupacion = row.get('ocupacion', '').strip()
if not ocupacion or ocupacion in ['N', '', '0']:
score *= 0.8
error_status = row.get('error', '').strip()
if error_status and error_status != 'N':
score *= 0.6
# Check for reasonable value ranges
try:
intensidad_val = self.safe_int(intensidad)
if intensidad_val < 0 or intensidad_val > 5000: # Unrealistic traffic volume
score *= 0.7
ocupacion_val = self.safe_int(ocupacion)
if ocupacion_val < 0 or ocupacion_val > 100: # Invalid percentage
score *= 0.5
except:
score *= 0.6
return max(0.1, score) # Minimum quality score
except Exception as e:
self.logger.debug("Error calculating quality score", error=str(e))
return 0.5 # Default medium quality
async def process_csv_content_chunked(self, text_content: str, csv_filename: str,
nearest_ids: set, nearest_points: list) -> list:
"""Process CSV content in chunks to prevent memory issues"""
import csv
import io
import gc
try:
csv_reader = csv.DictReader(io.StringIO(text_content), delimiter=';')
chunk_size = 10000
chunk_records = []
all_records = []
processed_count = 0
total_rows_seen = 0
for row in csv_reader:
total_rows_seen += 1
measurement_point_id = row.get('id', '').strip()
if measurement_point_id not in nearest_ids:
continue
try:
record_data = await self.parse_historical_csv_row(row, nearest_points)
if record_data:
chunk_records.append(record_data)
processed_count += 1
if len(chunk_records) >= chunk_size:
all_records.extend(chunk_records)
chunk_records = []
gc.collect()
except Exception as e:
if processed_count < 5:
self.logger.error("Row parsing exception",
row_num=total_rows_seen,
measurement_point_id=measurement_point_id,
error=str(e))
continue
# Process remaining records
if chunk_records:
all_records.extend(chunk_records)
chunk_records = []
gc.collect()
self.logger.info("Processed CSV file",
filename=csv_filename,
total_rows_read=total_rows_seen,
processed_records=processed_count)
return all_records
except Exception as e:
self.logger.error("Error processing CSV content",
filename=csv_filename, error=str(e))
return []
async def parse_historical_csv_row(self, row: dict, nearest_points: list) -> dict:
"""Parse a single row from Madrid's historical traffic CSV"""
try:
# Extract date
fecha_str = row.get('fecha', '').strip()
if not fecha_str:
return None
try:
from datetime import datetime, timezone
date_obj = datetime.strptime(fecha_str, '%Y-%m-%d %H:%M:%S')
date_obj = date_obj.replace(tzinfo=timezone.utc)
except Exception:
return None
measurement_point_id = row.get('id', '').strip()
# Find point data
point_match = next((p for p in nearest_points if p[0] == measurement_point_id), None)
if not point_match:
return None
point_data = point_match[1]
distance_km = point_match[2]
# Extract traffic data
intensidad = self.safe_int(row.get('intensidad', '0'))
ocupacion = self.safe_int(row.get('ocupacion', '0'))
carga = self.safe_int(row.get('carga', '0'))
vmed = self.safe_int(row.get('vmed', '0'))
# Build basic result (business logic will be applied elsewhere)
result = {
'date': date_obj,
'measurement_point_id': measurement_point_id,
'point_data': point_data,
'distance_km': distance_km,
'traffic_data': {
'intensidad': intensidad,
'ocupacion': ocupacion,
'carga': carga,
'vmed': vmed
},
'data_quality_score': self.calculate_data_quality_score(row),
'raw_row': row
}
return result
except Exception as e:
self.logger.debug("Error parsing historical CSV row", error=str(e))
return None

186
services/external/app/main.py vendored Normal file
View File

@@ -0,0 +1,186 @@
# services/external/app/main.py
"""
External Service Main Application
"""
import structlog
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from app.core.config import settings
from app.core.database import init_db, close_db
from shared.monitoring import setup_logging, HealthChecker
from shared.monitoring.metrics import setup_metrics_early
# Setup logging first
setup_logging("external-service", settings.LOG_LEVEL)
logger = structlog.get_logger()
# Global variables for lifespan access
metrics_collector = None
health_checker = None
# Create FastAPI app FIRST
app = FastAPI(
title="Bakery External Data Service",
description="External data collection service for weather, traffic, and events data",
version="1.0.0"
)
# Setup metrics BEFORE any middleware and BEFORE lifespan
metrics_collector = setup_metrics_early(app, "external-service")
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan events"""
global health_checker
# Startup
logger.info("Starting External Service...")
try:
# Initialize database
await init_db()
logger.info("Database initialized")
# Register custom metrics
metrics_collector.register_counter("weather_api_calls_total", "Total weather API calls")
metrics_collector.register_counter("weather_api_success_total", "Successful weather API calls")
metrics_collector.register_counter("weather_api_failures_total", "Failed weather API calls")
metrics_collector.register_counter("traffic_api_calls_total", "Total traffic API calls")
metrics_collector.register_counter("traffic_api_success_total", "Successful traffic API calls")
metrics_collector.register_counter("traffic_api_failures_total", "Failed traffic API calls")
metrics_collector.register_counter("data_collection_jobs_total", "Data collection jobs")
metrics_collector.register_counter("data_records_stored_total", "Data records stored")
metrics_collector.register_counter("data_quality_issues_total", "Data quality issues detected")
metrics_collector.register_histogram("weather_api_duration_seconds", "Weather API call duration")
metrics_collector.register_histogram("traffic_api_duration_seconds", "Traffic API call duration")
metrics_collector.register_histogram("data_collection_duration_seconds", "Data collection job duration")
metrics_collector.register_histogram("data_processing_duration_seconds", "Data processing duration")
# Setup health checker
health_checker = HealthChecker("external-service")
# Add database health check
async def check_database():
try:
from app.core.database import get_db
from sqlalchemy import text
async for db in get_db():
await db.execute(text("SELECT 1"))
return True
except Exception as e:
return f"Database error: {e}"
# Add external API health checks
async def check_weather_api():
try:
# Simple connectivity check
if settings.AEMET_API_KEY:
return True
else:
return "AEMET API key not configured"
except Exception as e:
return f"Weather API error: {e}"
async def check_traffic_api():
try:
# Simple connectivity check
if settings.MADRID_OPENDATA_API_KEY:
return True
else:
return "Madrid Open Data API key not configured"
except Exception as e:
return f"Traffic API error: {e}"
health_checker.add_check("database", check_database, timeout=5.0, critical=True)
health_checker.add_check("weather_api", check_weather_api, timeout=10.0, critical=False)
health_checker.add_check("traffic_api", check_traffic_api, timeout=10.0, critical=False)
# Store health checker in app state
app.state.health_checker = health_checker
logger.info("External Service started successfully")
except Exception as e:
logger.error(f"Failed to start External Service: {e}")
raise
yield
# Shutdown
logger.info("Shutting down External Service...")
await close_db()
# Set lifespan AFTER metrics setup
app.router.lifespan_context = lifespan
# CORS middleware (added after metrics setup)
app.add_middleware(
CORSMiddleware,
allow_origins=settings.CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include routers
from app.api.weather import router as weather_router
from app.api.traffic import router as traffic_router
app.include_router(weather_router, prefix="/api/v1", tags=["weather"])
app.include_router(traffic_router, prefix="/api/v1", tags=["traffic"])
# Health check endpoint
@app.get("/health")
async def health_check():
"""Comprehensive health check endpoint"""
if health_checker:
return await health_checker.check_health()
else:
return {
"service": "external-service",
"status": "healthy",
"version": "1.0.0"
}
# Root endpoint
@app.get("/")
async def root():
"""Root endpoint"""
return {
"service": "External Data Service",
"version": "1.0.0",
"status": "running",
"endpoints": {
"health": "/health",
"docs": "/docs",
"weather": "/api/v1/weather",
"traffic": "/api/v1/traffic",
"jobs": "/api/v1/jobs"
},
"data_sources": {
"weather": "AEMET (Spanish Weather Service)",
"traffic": "Madrid Open Data Portal",
"coverage": "Madrid, Spain"
}
}
# Exception handlers
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
"""Global exception handler with metrics"""
logger.error(f"Unhandled exception: {exc}", exc_info=True)
# Record error metric if available
if metrics_collector:
metrics_collector.increment_counter("errors_total", labels={"type": "unhandled"})
return JSONResponse(
status_code=500,
content={"detail": "Internal server error"}
)

View File

@@ -0,0 +1 @@
# services/external/app/models/__init__.py

294
services/external/app/models/traffic.py vendored Normal file
View File

@@ -0,0 +1,294 @@
# ================================================================
# services/data/app/models/traffic.py - Enhanced for Multiple Cities
# ================================================================
"""
Flexible traffic data models supporting multiple cities and extensible schemas
"""
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean, JSON
from sqlalchemy.dialects.postgresql import UUID
import uuid
from datetime import datetime, timezone
from typing import Dict, Any, Optional
from shared.database.base import Base
class TrafficData(Base):
"""
Flexible traffic data model supporting multiple cities
Designed to accommodate varying data structures across different cities
"""
__tablename__ = "traffic_data"
# Primary identification
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Location and temporal data
location_id = Column(String(100), nullable=False, index=True) # "lat,lon" or city-specific ID
city = Column(String(50), nullable=False, index=True) # madrid, barcelona, valencia, etc.
date = Column(DateTime(timezone=True), nullable=False, index=True)
# Core standardized traffic metrics (common across all cities)
traffic_volume = Column(Integer, nullable=True) # Vehicle count or intensity
congestion_level = Column(String(20), nullable=True) # low, medium, high, blocked
average_speed = Column(Float, nullable=True) # Average speed in km/h
# Enhanced metrics (may not be available for all cities)
occupation_percentage = Column(Float, nullable=True) # Road occupation %
load_percentage = Column(Float, nullable=True) # Traffic load %
pedestrian_count = Column(Integer, nullable=True) # Estimated pedestrian count
# Measurement point information
measurement_point_id = Column(String(100), nullable=True, index=True)
measurement_point_name = Column(String(500), nullable=True)
measurement_point_type = Column(String(50), nullable=True) # URB, M30, A, etc.
# Geographic data
latitude = Column(Float, nullable=True)
longitude = Column(Float, nullable=True)
district = Column(String(100), nullable=True) # City district/area
zone = Column(String(100), nullable=True) # Traffic zone or sector
# Data source and quality
source = Column(String(50), nullable=False, default="unknown") # madrid_opendata, synthetic, etc.
data_quality_score = Column(Float, nullable=True) # Quality score 0-100
is_synthetic = Column(Boolean, default=False)
has_pedestrian_inference = Column(Boolean, default=False)
# City-specific data (flexible JSON storage)
city_specific_data = Column(JSON, nullable=True) # Store city-specific fields
# Raw data backup
raw_data = Column(Text, nullable=True) # Original data for debugging
# Audit fields
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # For multi-tenancy
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc))
# Performance-optimized indexes
__table_args__ = (
# Core query patterns
Index('idx_traffic_location_date', 'location_id', 'date'),
Index('idx_traffic_city_date', 'city', 'date'),
Index('idx_traffic_tenant_date', 'tenant_id', 'date'),
# Advanced query patterns
Index('idx_traffic_city_location', 'city', 'location_id'),
Index('idx_traffic_measurement_point', 'city', 'measurement_point_id'),
Index('idx_traffic_district_date', 'city', 'district', 'date'),
# Training data queries
Index('idx_traffic_training', 'tenant_id', 'city', 'date', 'is_synthetic'),
Index('idx_traffic_quality', 'city', 'data_quality_score', 'date'),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert model to dictionary for API responses"""
result = {
'id': str(self.id),
'location_id': self.location_id,
'city': self.city,
'date': self.date.isoformat() if self.date else None,
'traffic_volume': self.traffic_volume,
'congestion_level': self.congestion_level,
'average_speed': self.average_speed,
'occupation_percentage': self.occupation_percentage,
'load_percentage': self.load_percentage,
'pedestrian_count': self.pedestrian_count,
'measurement_point_id': self.measurement_point_id,
'measurement_point_name': self.measurement_point_name,
'measurement_point_type': self.measurement_point_type,
'latitude': self.latitude,
'longitude': self.longitude,
'district': self.district,
'zone': self.zone,
'source': self.source,
'data_quality_score': self.data_quality_score,
'is_synthetic': self.is_synthetic,
'has_pedestrian_inference': self.has_pedestrian_inference,
'created_at': self.created_at.isoformat() if self.created_at else None
}
# Add city-specific data if present
if self.city_specific_data:
result['city_specific_data'] = self.city_specific_data
return result
def get_city_specific_field(self, field_name: str, default: Any = None) -> Any:
"""Safely get city-specific field value"""
if self.city_specific_data and isinstance(self.city_specific_data, dict):
return self.city_specific_data.get(field_name, default)
return default
def set_city_specific_field(self, field_name: str, value: Any) -> None:
"""Set city-specific field value"""
if not self.city_specific_data:
self.city_specific_data = {}
if not isinstance(self.city_specific_data, dict):
self.city_specific_data = {}
self.city_specific_data[field_name] = value
class TrafficMeasurementPoint(Base):
"""
Registry of traffic measurement points across all cities
Supports different city-specific measurement point schemas
"""
__tablename__ = "traffic_measurement_points"
# Primary identification
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Location and identification
city = Column(String(50), nullable=False, index=True)
measurement_point_id = Column(String(100), nullable=False, index=True) # City-specific ID
name = Column(String(500), nullable=True)
description = Column(Text, nullable=True)
# Geographic information
latitude = Column(Float, nullable=False)
longitude = Column(Float, nullable=False)
district = Column(String(100), nullable=True)
zone = Column(String(100), nullable=True)
# Classification
road_type = Column(String(50), nullable=True) # URB, M30, A, etc.
measurement_type = Column(String(50), nullable=True) # intensity, speed, etc.
point_category = Column(String(50), nullable=True) # urban, highway, ring_road
# Status and metadata
is_active = Column(Boolean, default=True)
installation_date = Column(DateTime(timezone=True), nullable=True)
last_data_received = Column(DateTime(timezone=True), nullable=True)
data_quality_rating = Column(Float, nullable=True) # Average quality 0-100
# City-specific point data
city_specific_metadata = Column(JSON, nullable=True)
# Audit fields
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
# Ensure unique measurement points per city
Index('idx_unique_city_point', 'city', 'measurement_point_id', unique=True),
# Geographic queries
Index('idx_points_city_location', 'city', 'latitude', 'longitude'),
Index('idx_points_district', 'city', 'district'),
Index('idx_points_road_type', 'city', 'road_type'),
# Status queries
Index('idx_points_active', 'city', 'is_active', 'last_data_received'),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert measurement point to dictionary"""
return {
'id': str(self.id),
'city': self.city,
'measurement_point_id': self.measurement_point_id,
'name': self.name,
'description': self.description,
'latitude': self.latitude,
'longitude': self.longitude,
'district': self.district,
'zone': self.zone,
'road_type': self.road_type,
'measurement_type': self.measurement_type,
'point_category': self.point_category,
'is_active': self.is_active,
'installation_date': self.installation_date.isoformat() if self.installation_date else None,
'last_data_received': self.last_data_received.isoformat() if self.last_data_received else None,
'data_quality_rating': self.data_quality_rating,
'city_specific_metadata': self.city_specific_metadata,
'created_at': self.created_at.isoformat() if self.created_at else None
}
class TrafficDataBackgroundJob(Base):
"""
Track background data collection jobs for multiple cities
Supports scheduling and monitoring of data fetching processes
"""
__tablename__ = "traffic_background_jobs"
# Primary identification
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Job configuration
job_type = Column(String(50), nullable=False) # historical_fetch, cleanup, etc.
city = Column(String(50), nullable=False, index=True)
location_pattern = Column(String(200), nullable=True) # Location pattern or specific coords
# Scheduling
scheduled_at = Column(DateTime(timezone=True), nullable=False)
started_at = Column(DateTime(timezone=True), nullable=True)
completed_at = Column(DateTime(timezone=True), nullable=True)
# Status tracking
status = Column(String(20), nullable=False, default='pending') # pending, running, completed, failed
progress_percentage = Column(Float, default=0.0)
records_processed = Column(Integer, default=0)
records_stored = Column(Integer, default=0)
# Date range for data jobs
data_start_date = Column(DateTime(timezone=True), nullable=True)
data_end_date = Column(DateTime(timezone=True), nullable=True)
# Results and error handling
success_count = Column(Integer, default=0)
error_count = Column(Integer, default=0)
error_message = Column(Text, nullable=True)
job_metadata = Column(JSON, nullable=True) # Additional job-specific data
# Tenant association
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
# Audit fields
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
# Job monitoring
Index('idx_jobs_city_status', 'city', 'status', 'scheduled_at'),
Index('idx_jobs_tenant_status', 'tenant_id', 'status', 'scheduled_at'),
Index('idx_jobs_type_city', 'job_type', 'city', 'scheduled_at'),
# Cleanup queries
Index('idx_jobs_completed', 'status', 'completed_at'),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert job to dictionary"""
return {
'id': str(self.id),
'job_type': self.job_type,
'city': self.city,
'location_pattern': self.location_pattern,
'scheduled_at': self.scheduled_at.isoformat() if self.scheduled_at else None,
'started_at': self.started_at.isoformat() if self.started_at else None,
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
'status': self.status,
'progress_percentage': self.progress_percentage,
'records_processed': self.records_processed,
'records_stored': self.records_stored,
'data_start_date': self.data_start_date.isoformat() if self.data_start_date else None,
'data_end_date': self.data_end_date.isoformat() if self.data_end_date else None,
'success_count': self.success_count,
'error_count': self.error_count,
'error_message': self.error_message,
'job_metadata': self.job_metadata,
'tenant_id': str(self.tenant_id) if self.tenant_id else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

74
services/external/app/models/weather.py vendored Normal file
View File

@@ -0,0 +1,74 @@
# ================================================================
# services/data/app/models/weather.py
# ================================================================
"""Weather data models"""
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean
from sqlalchemy.dialects.postgresql import UUID, JSON
import uuid
from datetime import datetime, timezone
from shared.database.base import Base
class WeatherData(Base):
__tablename__ = "weather_data"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
location_id = Column(String(100), nullable=False, index=True)
city = Column(String(50), nullable=False)
station_name = Column(String(200), nullable=True)
latitude = Column(Float, nullable=True)
longitude = Column(Float, nullable=True)
date = Column(DateTime(timezone=True), nullable=False, index=True)
forecast_date = Column(DateTime(timezone=True), nullable=True)
temperature = Column(Float, nullable=True) # Celsius
temperature_min = Column(Float, nullable=True)
temperature_max = Column(Float, nullable=True)
feels_like = Column(Float, nullable=True)
precipitation = Column(Float, nullable=True) # mm
precipitation_probability = Column(Float, nullable=True)
humidity = Column(Float, nullable=True) # percentage
wind_speed = Column(Float, nullable=True) # km/h
wind_direction = Column(Float, nullable=True)
wind_gust = Column(Float, nullable=True)
pressure = Column(Float, nullable=True) # hPa
visibility = Column(Float, nullable=True)
uv_index = Column(Float, nullable=True)
cloud_cover = Column(Float, nullable=True)
condition = Column(String(100), nullable=True)
description = Column(String(200), nullable=True)
weather_code = Column(String(20), nullable=True)
source = Column(String(50), nullable=False, default="aemet")
data_type = Column(String(20), nullable=False)
is_forecast = Column(Boolean, nullable=True)
data_quality_score = Column(Float, nullable=True)
raw_data = Column(JSON, nullable=True)
processed_data = Column(JSON, nullable=True)
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
Index('idx_weather_location_date', 'location_id', 'date'),
)
class WeatherForecast(Base):
__tablename__ = "weather_forecasts"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
location_id = Column(String(100), nullable=False, index=True)
forecast_date = Column(DateTime(timezone=True), nullable=False)
generated_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
temperature = Column(Float, nullable=True)
precipitation = Column(Float, nullable=True)
humidity = Column(Float, nullable=True)
wind_speed = Column(Float, nullable=True)
description = Column(String(200), nullable=True)
source = Column(String(50), nullable=False, default="aemet")
raw_data = Column(Text, nullable=True)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
Index('idx_forecast_location_date', 'location_id', 'forecast_date'),
)

View File

View File

@@ -0,0 +1,191 @@
# ================================================================
# services/data/app/repositories/traffic_repository.py
# ================================================================
"""
Traffic Repository - Enhanced for multiple cities with comprehensive data access patterns
Follows existing repository architecture while adding city-specific functionality
"""
from typing import Optional, List, Dict, Any, Type, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, or_, func, desc, asc, text, update, delete
from sqlalchemy.orm import selectinload
from datetime import datetime, timezone, timedelta
import structlog
from app.models.traffic import TrafficData
from app.schemas.traffic import TrafficDataCreate, TrafficDataResponse
from shared.database.exceptions import DatabaseError, ValidationError
logger = structlog.get_logger()
class TrafficRepository:
"""
Enhanced repository for traffic data operations across multiple cities
Provides city-aware queries and advanced traffic analytics
"""
def __init__(self, session: AsyncSession):
self.session = session
self.model = TrafficData
# ================================================================
# CORE TRAFFIC DATA OPERATIONS
# ================================================================
async def get_by_location_and_date_range(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None
) -> List[TrafficData]:
"""Get traffic data by location and date range"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
# Build base query
query = select(self.model).where(self.model.location_id == location_id)
# Add tenant filter if specified
if tenant_id:
query = query.where(self.model.tenant_id == tenant_id)
# Add date range filters
if start_date:
query = query.where(self.model.date >= start_date)
if end_date:
query = query.where(self.model.date <= end_date)
# Order by date
query = query.order_by(self.model.date)
result = await self.session.execute(query)
return result.scalars().all()
except Exception as e:
logger.error("Failed to get traffic data by location and date range",
latitude=latitude, longitude=longitude,
error=str(e))
raise DatabaseError(f"Failed to get traffic data: {str(e)}")
async def store_traffic_data_batch(
self,
traffic_data_list: List[Dict[str, Any]],
location_id: str,
tenant_id: Optional[str] = None
) -> int:
"""Store a batch of traffic data records with enhanced validation and duplicate handling."""
stored_count = 0
try:
if not traffic_data_list:
return 0
# Check for existing records to avoid duplicates
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
existing_dates = set()
if dates:
existing_stmt = select(TrafficData.date).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date.in_(dates)
)
)
result = await self.session.execute(existing_stmt)
existing_dates = {row[0] for row in result.fetchall()}
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
batch_records = []
for data in traffic_data_list:
record_date = data.get('date')
if not record_date or record_date in existing_dates:
continue # Skip duplicates
# Validate data before preparing for insertion
if self._validate_traffic_data(data):
batch_records.append({
'location_id': location_id,
'city': data.get('city', 'madrid'), # Default to madrid for historical data
'tenant_id': tenant_id, # Include tenant_id in batch insert
'date': record_date,
'traffic_volume': data.get('traffic_volume'),
'pedestrian_count': data.get('pedestrian_count'),
'congestion_level': data.get('congestion_level'),
'average_speed': data.get('average_speed'),
'source': data.get('source', 'unknown'),
'raw_data': str(data)
})
if batch_records:
# Use bulk insert for performance
await self.session.execute(
TrafficData.__table__.insert(),
batch_records
)
await self.session.commit()
stored_count = len(batch_records)
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
except Exception as e:
logger.error("Failed to store traffic data batch",
error=str(e), location_id=location_id)
await self.session.rollback()
raise DatabaseError(f"Batch store failed: {str(e)}")
return stored_count
def _validate_traffic_data(self, data: Dict[str, Any]) -> bool:
"""Validate traffic data before storage"""
required_fields = ['date']
# Check required fields
for field in required_fields:
if not data.get(field):
return False
# Validate data types and ranges
traffic_volume = data.get('traffic_volume')
if traffic_volume is not None and (traffic_volume < 0 or traffic_volume > 10000):
return False
pedestrian_count = data.get('pedestrian_count')
if pedestrian_count is not None and (pedestrian_count < 0 or pedestrian_count > 10000):
return False
average_speed = data.get('average_speed')
if average_speed is not None and (average_speed < 0 or average_speed > 200):
return False
congestion_level = data.get('congestion_level')
if congestion_level and congestion_level not in ['low', 'medium', 'high', 'blocked']:
return False
return True
async def get_historical_traffic_for_training(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[TrafficData]:
"""Retrieve stored traffic data for training ML models."""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= start_date,
TrafficData.date <= end_date
)
).order_by(TrafficData.date)
result = await self.session.execute(stmt)
return result.scalars().all()
except Exception as e:
logger.error("Failed to retrieve traffic data for training",
error=str(e), location_id=location_id)
raise DatabaseError(f"Training data retrieval failed: {str(e)}")

View File

@@ -0,0 +1,138 @@
# services/external/app/repositories/weather_repository.py
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, and_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import json
from app.models.weather import WeatherData
logger = structlog.get_logger()
class WeatherRepository:
"""
Repository for weather data operations, adapted for WeatherService.
"""
def __init__(self, session: AsyncSession):
self.session = session
async def get_historical_weather(self,
location_id: str,
start_date: datetime,
end_date: datetime) -> List[WeatherData]:
"""
Retrieves historical weather data for a specific location and date range.
This method directly supports the data retrieval logic in WeatherService.
"""
try:
stmt = select(WeatherData).where(
and_(
WeatherData.location_id == location_id,
WeatherData.date >= start_date,
WeatherData.date <= end_date
)
).order_by(WeatherData.date)
result = await self.session.execute(stmt)
records = result.scalars().all()
logger.debug(f"Retrieved {len(records)} historical records for location {location_id}")
return list(records)
except Exception as e:
logger.error(
"Failed to get historical weather from repository",
error=str(e),
location_id=location_id
)
raise
def _serialize_json_fields(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Serialize JSON fields (raw_data, processed_data) to ensure proper JSON storage
"""
serialized = data.copy()
# Serialize raw_data if present
if 'raw_data' in serialized and serialized['raw_data'] is not None:
if not isinstance(serialized['raw_data'], str):
try:
# Convert datetime objects to strings for JSON serialization
raw_data = serialized['raw_data']
if isinstance(raw_data, dict):
# Handle datetime objects in the dict
json_safe_data = {}
for k, v in raw_data.items():
if hasattr(v, 'isoformat'): # datetime-like object
json_safe_data[k] = v.isoformat()
else:
json_safe_data[k] = v
serialized['raw_data'] = json_safe_data
except Exception as e:
logger.warning(f"Could not serialize raw_data, storing as string: {e}")
serialized['raw_data'] = str(raw_data)
# Serialize processed_data if present
if 'processed_data' in serialized and serialized['processed_data'] is not None:
if not isinstance(serialized['processed_data'], str):
try:
processed_data = serialized['processed_data']
if isinstance(processed_data, dict):
json_safe_data = {}
for k, v in processed_data.items():
if hasattr(v, 'isoformat'): # datetime-like object
json_safe_data[k] = v.isoformat()
else:
json_safe_data[k] = v
serialized['processed_data'] = json_safe_data
except Exception as e:
logger.warning(f"Could not serialize processed_data, storing as string: {e}")
serialized['processed_data'] = str(processed_data)
return serialized
async def bulk_create_weather_data(self, weather_records: List[Dict[str, Any]]) -> None:
"""
Bulk inserts new weather records into the database.
Used by WeatherService after fetching new historical data from an external API.
"""
try:
if not weather_records:
return
# Serialize JSON fields before creating model instances
serialized_records = [self._serialize_json_fields(data) for data in weather_records]
records = [WeatherData(**data) for data in serialized_records]
self.session.add_all(records)
await self.session.commit()
logger.info(f"Successfully bulk inserted {len(records)} weather records")
except Exception as e:
await self.session.rollback()
logger.error(
"Failed to bulk create weather records",
error=str(e),
count=len(weather_records)
)
raise
async def create_weather_data(self, data: Dict[str, Any]) -> WeatherData:
"""
Creates a single new weather data record.
"""
try:
# Serialize JSON fields before creating model instance
serialized_data = self._serialize_json_fields(data)
new_record = WeatherData(**serialized_data)
self.session.add(new_record)
await self.session.commit()
await self.session.refresh(new_record)
logger.info(f"Created new weather record with ID {new_record.id}")
return new_record
except Exception as e:
await self.session.rollback()
logger.error("Failed to create single weather record", error=str(e))
raise

View File

@@ -0,0 +1 @@
# services/external/app/schemas/__init__.py

100
services/external/app/schemas/traffic.py vendored Normal file
View File

@@ -0,0 +1,100 @@
# services/external/app/schemas/traffic.py
"""
Traffic Service Pydantic Schemas
"""
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import Optional, List
from uuid import UUID
class TrafficDataBase(BaseModel):
"""Base traffic data schema"""
location_id: str = Field(..., max_length=100, description="Traffic monitoring location ID")
date: datetime = Field(..., description="Date and time of traffic measurement")
traffic_volume: Optional[int] = Field(None, ge=0, description="Vehicles per hour")
pedestrian_count: Optional[int] = Field(None, ge=0, description="Pedestrians per hour")
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$", description="Traffic congestion level")
average_speed: Optional[float] = Field(None, ge=0, le=200, description="Average speed in km/h")
source: str = Field("madrid_opendata", max_length=50, description="Data source")
raw_data: Optional[str] = Field(None, description="Raw data from source")
class TrafficDataCreate(TrafficDataBase):
"""Schema for creating traffic data"""
pass
class TrafficDataUpdate(BaseModel):
"""Schema for updating traffic data"""
traffic_volume: Optional[int] = Field(None, ge=0)
pedestrian_count: Optional[int] = Field(None, ge=0)
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$")
average_speed: Optional[float] = Field(None, ge=0, le=200)
raw_data: Optional[str] = None
class TrafficDataResponse(TrafficDataBase):
"""Schema for traffic data responses"""
id: str = Field(..., description="Unique identifier")
created_at: datetime = Field(..., description="Creation timestamp")
updated_at: datetime = Field(..., description="Last update timestamp")
@field_validator('id', mode='before')
@classmethod
def convert_uuid_to_string(cls, v):
if isinstance(v, UUID):
return str(v)
return v
class Config:
from_attributes = True
json_encoders = {
datetime: lambda v: v.isoformat()
}
class TrafficDataList(BaseModel):
"""Schema for paginated traffic data responses"""
data: List[TrafficDataResponse]
total: int = Field(..., description="Total number of records")
page: int = Field(..., description="Current page number")
per_page: int = Field(..., description="Records per page")
has_next: bool = Field(..., description="Whether there are more pages")
has_prev: bool = Field(..., description="Whether there are previous pages")
class TrafficAnalytics(BaseModel):
"""Schema for traffic analytics"""
location_id: str
period_start: datetime
period_end: datetime
avg_traffic_volume: Optional[float] = None
avg_pedestrian_count: Optional[float] = None
peak_traffic_hour: Optional[int] = None
peak_pedestrian_hour: Optional[int] = None
congestion_distribution: dict = Field(default_factory=dict)
avg_speed: Optional[float] = None
class TrafficDataResponse(BaseModel):
date: datetime
traffic_volume: Optional[int]
pedestrian_count: Optional[int]
congestion_level: Optional[str]
average_speed: Optional[float]
source: str
class LocationRequest(BaseModel):
latitude: float
longitude: float
address: Optional[str] = None
class DateRangeRequest(BaseModel):
start_date: datetime
end_date: datetime
class HistoricalTrafficRequest(BaseModel):
latitude: float
longitude: float
start_date: datetime
end_date: datetime
class TrafficForecastRequest(BaseModel):
latitude: float
longitude: float
hours: int = 24

161
services/external/app/schemas/weather.py vendored Normal file
View File

@@ -0,0 +1,161 @@
# services/external/app/schemas/weather.py
"""Weather data schemas"""
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import Optional, List
from uuid import UUID
class WeatherDataBase(BaseModel):
"""Base weather data schema"""
location_id: str = Field(..., max_length=100, description="Weather monitoring location ID")
date: datetime = Field(..., description="Date and time of weather measurement")
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Temperature in Celsius")
precipitation: Optional[float] = Field(None, ge=0, description="Precipitation in mm")
humidity: Optional[float] = Field(None, ge=0, le=100, description="Humidity percentage")
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Wind speed in km/h")
pressure: Optional[float] = Field(None, ge=800, le=1200, description="Atmospheric pressure in hPa")
description: Optional[str] = Field(None, max_length=200, description="Weather description")
source: str = Field("aemet", max_length=50, description="Data source")
raw_data: Optional[str] = Field(None, description="Raw data from source")
class WeatherDataCreate(WeatherDataBase):
"""Schema for creating weather data"""
pass
class WeatherDataUpdate(BaseModel):
"""Schema for updating weather data"""
temperature: Optional[float] = Field(None, ge=-50, le=60)
precipitation: Optional[float] = Field(None, ge=0)
humidity: Optional[float] = Field(None, ge=0, le=100)
wind_speed: Optional[float] = Field(None, ge=0, le=200)
pressure: Optional[float] = Field(None, ge=800, le=1200)
description: Optional[str] = Field(None, max_length=200)
raw_data: Optional[str] = None
class WeatherDataResponse(WeatherDataBase):
"""Schema for weather data responses"""
id: str = Field(..., description="Unique identifier")
created_at: datetime = Field(..., description="Creation timestamp")
updated_at: datetime = Field(..., description="Last update timestamp")
@field_validator('id', mode='before')
@classmethod
def convert_uuid_to_string(cls, v):
if isinstance(v, UUID):
return str(v)
return v
class Config:
from_attributes = True
json_encoders = {
datetime: lambda v: v.isoformat()
}
class WeatherForecastBase(BaseModel):
"""Base weather forecast schema"""
location_id: str = Field(..., max_length=100, description="Location ID")
forecast_date: datetime = Field(..., description="Date for forecast")
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Forecasted temperature")
precipitation: Optional[float] = Field(None, ge=0, description="Forecasted precipitation")
humidity: Optional[float] = Field(None, ge=0, le=100, description="Forecasted humidity")
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Forecasted wind speed")
description: Optional[str] = Field(None, max_length=200, description="Forecast description")
source: str = Field("aemet", max_length=50, description="Data source")
raw_data: Optional[str] = Field(None, description="Raw forecast data")
class WeatherForecastCreate(WeatherForecastBase):
"""Schema for creating weather forecasts"""
pass
class WeatherForecastResponse(WeatherForecastBase):
"""Schema for weather forecast responses"""
id: str = Field(..., description="Unique identifier")
generated_at: datetime = Field(..., description="When forecast was generated")
created_at: datetime = Field(..., description="Creation timestamp")
updated_at: datetime = Field(..., description="Last update timestamp")
@field_validator('id', mode='before')
@classmethod
def convert_uuid_to_string(cls, v):
if isinstance(v, UUID):
return str(v)
return v
class Config:
from_attributes = True
json_encoders = {
datetime: lambda v: v.isoformat()
}
class WeatherDataList(BaseModel):
"""Schema for paginated weather data responses"""
data: List[WeatherDataResponse]
total: int = Field(..., description="Total number of records")
page: int = Field(..., description="Current page number")
per_page: int = Field(..., description="Records per page")
has_next: bool = Field(..., description="Whether there are more pages")
has_prev: bool = Field(..., description="Whether there are previous pages")
class WeatherForecastList(BaseModel):
"""Schema for paginated weather forecast responses"""
forecasts: List[WeatherForecastResponse]
total: int = Field(..., description="Total number of forecasts")
page: int = Field(..., description="Current page number")
per_page: int = Field(..., description="Forecasts per page")
class WeatherAnalytics(BaseModel):
"""Schema for weather analytics"""
location_id: str
period_start: datetime
period_end: datetime
avg_temperature: Optional[float] = None
min_temperature: Optional[float] = None
max_temperature: Optional[float] = None
total_precipitation: Optional[float] = None
avg_humidity: Optional[float] = None
avg_wind_speed: Optional[float] = None
avg_pressure: Optional[float] = None
weather_conditions: dict = Field(default_factory=dict)
rainy_days: int = 0
sunny_days: int = 0
class WeatherDataResponse(BaseModel):
date: datetime
temperature: Optional[float]
precipitation: Optional[float]
humidity: Optional[float]
wind_speed: Optional[float]
pressure: Optional[float]
description: Optional[str]
source: str
class WeatherForecastResponse(BaseModel):
forecast_date: datetime
generated_at: datetime
temperature: Optional[float]
precipitation: Optional[float]
humidity: Optional[float]
wind_speed: Optional[float]
description: Optional[str]
source: str
class LocationRequest(BaseModel):
latitude: float
longitude: float
address: Optional[str] = None
class DateRangeRequest(BaseModel):
start_date: datetime
end_date: datetime
class HistoricalWeatherRequest(BaseModel):
latitude: float
longitude: float
start_date: datetime
end_date: datetime
class WeatherForecastRequest(BaseModel):
latitude: float
longitude: float
days: int

View File

@@ -0,0 +1 @@
# services/external/app/services/__init__.py

View File

@@ -0,0 +1,63 @@
# services/external/app/services/messaging.py
"""
External Service Messaging - Event Publishing using shared messaging infrastructure
"""
from shared.messaging.rabbitmq import RabbitMQClient
from app.core.config import settings
import structlog
logger = structlog.get_logger()
# Single global instance
data_publisher = RabbitMQClient(settings.RABBITMQ_URL, "data-service")
async def setup_messaging():
"""Initialize messaging for data service"""
try:
success = await data_publisher.connect()
if success:
logger.info("Data service messaging initialized")
else:
logger.warning("Data service messaging failed to initialize")
return success
except Exception as e:
logger.warning("Failed to setup messaging", error=str(e))
return False
async def cleanup_messaging():
"""Cleanup messaging for data service"""
try:
await data_publisher.disconnect()
logger.info("Data service messaging cleaned up")
except Exception as e:
logger.warning("Error during messaging cleanup", error=str(e))
async def publish_weather_updated(data: dict) -> bool:
"""Publish weather updated event"""
try:
return await data_publisher.publish_data_event("weather.updated", data)
except Exception as e:
logger.warning("Failed to publish weather updated event", error=str(e))
return False
async def publish_traffic_updated(data: dict) -> bool:
"""Publish traffic updated event"""
try:
return await data_publisher.publish_data_event("traffic.updated", data)
except Exception as e:
logger.warning("Failed to publish traffic updated event", error=str(e))
return False
# Health check for messaging
async def check_messaging_health() -> dict:
"""Check messaging system health"""
try:
if data_publisher.connected:
return {"status": "healthy", "service": "rabbitmq", "connected": True}
else:
return {"status": "unhealthy", "service": "rabbitmq", "connected": False, "error": "Not connected"}
except Exception as e:
return {"status": "unhealthy", "service": "rabbitmq", "connected": False, "error": str(e)}

View File

@@ -0,0 +1,298 @@
# ================================================================
# services/data/app/services/traffic_service.py
# ================================================================
"""
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
"""
import asyncio
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.external.apis.traffic import UniversalTrafficClient
from app.models.traffic import TrafficData
from app.repositories.traffic_repository import TrafficRepository
logger = structlog.get_logger()
from app.core.database import database_manager
class TrafficService:
"""
Abstracted traffic service providing unified interface for traffic data
Routes requests to appropriate city-specific clients automatically
"""
def __init__(self):
self.universal_client = UniversalTrafficClient()
self.database_manager = database_manager
async def get_current_traffic(
self,
latitude: float,
longitude: float,
tenant_id: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Get current traffic data for any supported location
Args:
latitude: Query location latitude
longitude: Query location longitude
tenant_id: Optional tenant identifier for logging/analytics
Returns:
Dict with current traffic data or None if not available
"""
try:
logger.info("Getting current traffic data",
lat=latitude, lon=longitude, tenant_id=tenant_id)
# Delegate to universal client
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
if traffic_data:
# Add service metadata
traffic_data['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude}
}
logger.info("Successfully retrieved current traffic data",
lat=latitude, lon=longitude,
source=traffic_data.get('source', 'unknown'))
return traffic_data
else:
logger.warning("No current traffic data available",
lat=latitude, lon=longitude)
return None
except Exception as e:
logger.error("Error getting current traffic data",
lat=latitude, lon=longitude, error=str(e))
return None
async def get_historical_traffic(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Get historical traffic data for any supported location with database storage
Args:
latitude: Query location latitude
longitude: Query location longitude
start_date: Start date for historical data
end_date: End date for historical data
tenant_id: Optional tenant identifier
Returns:
List of historical traffic data dictionaries
"""
try:
logger.info("Getting historical traffic data",
lat=latitude, lon=longitude,
start=start_date, end=end_date, tenant_id=tenant_id)
# Validate date range
if start_date >= end_date:
logger.warning("Invalid date range", start=start_date, end=end_date)
return []
location_id = f"{latitude:.4f},{longitude:.4f}"
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
# Check database first using the repository
db_records = await traffic_repo.get_by_location_and_date_range(
latitude, longitude, start_date, end_date, tenant_id
)
if db_records:
logger.info("Historical traffic data found in database",
count=len(db_records))
return [self._convert_db_record_to_dict(record) for record in db_records]
# Delegate to universal client if not in DB
traffic_data = await self.universal_client.get_historical_traffic(
latitude, longitude, start_date, end_date
)
if traffic_data:
# Add service metadata to each record
for record in traffic_data:
record['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'date_range': {
'start': start_date.isoformat(),
'end': end_date.isoformat()
}
}
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
# Store in database using the repository
stored_count = await traffic_repo.store_traffic_data_batch(
traffic_data, location_id, tenant_id
)
logger.info("Traffic data stored for re-training",
fetched=len(traffic_data), stored=stored_count,
location=location_id)
logger.info("Successfully retrieved historical traffic data",
lat=latitude, lon=longitude, records=len(traffic_data))
return traffic_data
else:
logger.info("No historical traffic data available",
lat=latitude, lon=longitude)
return []
except Exception as e:
logger.error("Error getting historical traffic data",
lat=latitude, lon=longitude, error=str(e))
return []
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
"""Convert database record to dictionary format"""
return {
'date': record.date,
'traffic_volume': record.traffic_volume,
'pedestrian_count': record.pedestrian_count,
'congestion_level': record.congestion_level,
'average_speed': record.average_speed,
'source': record.source,
'location_id': record.location_id,
'raw_data': record.raw_data
}
async def get_traffic_events(
self,
latitude: float,
longitude: float,
radius_km: float = 5.0,
tenant_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Get traffic events and incidents for any supported location
Args:
latitude: Query location latitude
longitude: Query location longitude
radius_km: Search radius in kilometers
tenant_id: Optional tenant identifier
Returns:
List of traffic events
"""
try:
logger.info("Getting traffic events",
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
# Delegate to universal client
events = await self.universal_client.get_events(latitude, longitude, radius_km)
# Add metadata to events
for event in events:
event['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'search_radius_km': radius_km
}
logger.info("Retrieved traffic events",
lat=latitude, lon=longitude, events=len(events))
return events
except Exception as e:
logger.error("Error getting traffic events",
lat=latitude, lon=longitude, error=str(e))
return []
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
"""
Get information about traffic data availability for location
Args:
latitude: Query location latitude
longitude: Query location longitude
Returns:
Dict with location support information
"""
try:
info = self.universal_client.get_location_info(latitude, longitude)
# Add service layer information
info['service_layer'] = {
'version': '2.0',
'abstraction_level': 'universal',
'supported_operations': [
'current_traffic',
'historical_traffic',
'traffic_events',
'bulk_requests'
]
}
return info
except Exception as e:
logger.error("Error getting location info",
lat=latitude, lon=longitude, error=str(e))
return {
'supported': False,
'error': str(e),
'service_layer': {'version': '2.0'}
}
async def get_stored_traffic_for_training(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[Dict[str, Any]]:
"""Retrieve stored traffic data specifically for training purposes"""
try:
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
records = await traffic_repo.get_historical_traffic_for_training(
latitude, longitude, start_date, end_date
)
# Convert to training format
training_data = []
for record in records:
training_data.append({
'date': record.date,
'traffic_volume': record.traffic_volume,
'pedestrian_count': record.pedestrian_count,
'congestion_level': record.congestion_level,
'average_speed': record.average_speed,
'location_id': record.location_id,
'source': record.source,
'measurement_point_id': record.raw_data # Contains additional metadata
})
logger.info(f"Retrieved {len(training_data)} traffic records for training",
location_id=f"{latitude:.4f},{longitude:.4f}", start=start_date, end=end_date)
return training_data
except Exception as e:
logger.error("Failed to retrieve traffic data for training",
error=str(e), location_id=f"{latitude:.4f},{longitude:.4f}")
return []

View File

@@ -0,0 +1,154 @@
# services/data/app/services/weather_service.py - REVISED VERSION
"""Weather data service with repository pattern"""
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.models.weather import WeatherData, WeatherForecast
from app.external.aemet import AEMETClient
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse
from app.repositories.weather_repository import WeatherRepository
logger = structlog.get_logger()
from app.core.database import database_manager
class WeatherService:
def __init__(self):
self.aemet_client = AEMETClient()
self.database_manager = database_manager
async def get_current_weather(self, latitude: float, longitude: float) -> Optional[WeatherDataResponse]:
"""Get current weather for location"""
try:
logger.debug("Getting current weather", lat=latitude, lon=longitude)
weather_data = await self.aemet_client.get_current_weather(latitude, longitude)
if weather_data:
logger.debug("Weather data received", source=weather_data.get('source'))
return WeatherDataResponse(**weather_data)
else:
logger.warning("No weather data received from AEMET client")
return None
except Exception as e:
logger.error("Failed to get current weather", error=str(e), lat=latitude, lon=longitude)
return None
async def get_weather_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[WeatherForecastResponse]:
"""Get weather forecast for location"""
try:
logger.debug("Getting weather forecast", lat=latitude, lon=longitude, days=days)
forecast_data = await self.aemet_client.get_forecast(latitude, longitude, days)
if forecast_data:
logger.debug("Forecast data received", count=len(forecast_data))
# Validate each forecast item before creating response
valid_forecasts = []
for item in forecast_data:
try:
if isinstance(item, dict):
# Ensure required fields are present
forecast_item = {
"forecast_date": item.get("forecast_date", datetime.now()),
"generated_at": item.get("generated_at", datetime.now()),
"temperature": float(item.get("temperature", 15.0)),
"precipitation": float(item.get("precipitation", 0.0)),
"humidity": float(item.get("humidity", 50.0)),
"wind_speed": float(item.get("wind_speed", 10.0)),
"description": str(item.get("description", "Variable")),
"source": str(item.get("source", "unknown"))
}
valid_forecasts.append(WeatherForecastResponse(**forecast_item))
else:
logger.warning("Invalid forecast item type", item_type=type(item))
except Exception as item_error:
logger.warning("Error processing forecast item", error=str(item_error), item=item)
continue
logger.debug("Valid forecasts processed", count=len(valid_forecasts))
return valid_forecasts
else:
logger.warning("No forecast data received from AEMET client")
return []
except Exception as e:
logger.error("Failed to get weather forecast", error=str(e), lat=latitude, lon=longitude)
return []
async def get_historical_weather(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[WeatherDataResponse]:
"""Get historical weather data"""
try:
logger.debug("Getting historical weather",
lat=latitude, lon=longitude,
start=start_date, end=end_date)
location_id = f"{latitude:.4f},{longitude:.4f}"
async with self.database_manager.get_session() as session:
weather_repository = WeatherRepository(session)
# Use the repository to get data from the database
db_records = await weather_repository.get_historical_weather(
location_id,
start_date,
end_date
)
if db_records:
logger.debug("Historical data found in database", count=len(db_records))
return [WeatherDataResponse(
date=record.date,
temperature=record.temperature,
precipitation=record.precipitation,
humidity=record.humidity,
wind_speed=record.wind_speed,
pressure=record.pressure,
description=record.description,
source=record.source
) for record in db_records]
# If not in database, fetch from API and store
logger.debug("Fetching historical data from AEMET API")
weather_data = await self.aemet_client.get_historical_weather(
latitude, longitude, start_date, end_date
)
if weather_data:
# Use the repository to store the new data
records_to_store = [{
"location_id": location_id,
"city": "Madrid", # Default city for AEMET data
"date": data.get('date', datetime.now()),
"temperature": data.get('temperature'),
"precipitation": data.get('precipitation'),
"humidity": data.get('humidity'),
"wind_speed": data.get('wind_speed'),
"pressure": data.get('pressure'),
"description": data.get('description'),
"source": "aemet",
"data_type": "historical",
"raw_data": data, # Pass as dict, not string
"tenant_id": None
} for data in weather_data]
async with self.database_manager.get_session() as session:
weather_repository = WeatherRepository(session)
await weather_repository.bulk_create_weather_data(records_to_store)
logger.debug("Historical data stored in database", count=len(weather_data))
return [WeatherDataResponse(**item) for item in weather_data]
else:
logger.warning("No historical weather data received")
return []
except Exception as e:
logger.error("Failed to get historical weather", error=str(e))
return []

19
services/external/pytest.ini vendored Normal file
View File

@@ -0,0 +1,19 @@
[tool:pytest]
testpaths = tests
asyncio_mode = auto
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts =
-v
--tb=short
--strict-markers
--disable-warnings
--cov=app
--cov-report=term-missing
--cov-report=html:htmlcov
markers =
unit: Unit tests
integration: Integration tests
slow: Slow running tests
external: Tests requiring external services

56
services/external/requirements.txt vendored Normal file
View File

@@ -0,0 +1,56 @@
# services/external/requirements.txt
# FastAPI and web framework
fastapi==0.104.1
uvicorn[standard]==0.24.0
# Database
sqlalchemy==2.0.23
psycopg2-binary==2.9.9
asyncpg==0.29.0
aiosqlite==0.19.0
alembic==1.12.1
# HTTP clients for external APIs
httpx==0.25.2
aiofiles==23.2.0
requests==2.31.0
# Data processing and time series
pandas==2.1.3
numpy==1.25.2
# Validation and serialization
pydantic==2.5.0
pydantic-settings==2.0.3
# Authentication and security
python-jose[cryptography]==3.3.0
# Logging and monitoring
structlog==23.2.0
prometheus-client==0.19.0
# Message queues
aio-pika==9.3.1
# Background job processing
redis==5.0.1
# Date and time handling
pytz==2023.3
python-dateutil==2.8.2
# XML parsing (for some APIs)
lxml==4.9.3
# Geospatial processing
pyproj==3.6.1
# Note: pytest and testing dependencies are in tests/requirements.txt
# Development
python-multipart==0.0.6
# External API specific
beautifulsoup4==4.12.2 # For web scraping if needed
xmltodict==0.13.0 # For XML API responses

1
services/external/shared/shared vendored Symbolic link
View File

@@ -0,0 +1 @@
/Users/urtzialfaro/Documents/bakery-ia/shared

314
services/external/tests/conftest.py vendored Normal file
View File

@@ -0,0 +1,314 @@
# services/external/tests/conftest.py
"""
Pytest configuration and fixtures for External Service tests
"""
import pytest
import asyncio
from datetime import datetime, timezone
from typing import AsyncGenerator
from uuid import uuid4, UUID
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.pool import StaticPool
from fastapi.testclient import TestClient
from app.main import app
from app.core.config import settings
from app.core.database import Base, get_db
from app.models.weather import WeatherData, WeatherStation
from app.models.traffic import TrafficData, TrafficMeasurementPoint
# Test database configuration
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
@pytest.fixture(scope="session")
def event_loop():
"""Create event loop for the test session"""
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture
async def test_engine():
"""Create test database engine"""
engine = create_async_engine(
TEST_DATABASE_URL,
poolclass=StaticPool,
connect_args={"check_same_thread": False}
)
# Create tables
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
yield engine
await engine.dispose()
@pytest.fixture
async def test_db_session(test_engine) -> AsyncGenerator[AsyncSession, None]:
"""Create test database session"""
async_session = async_sessionmaker(
test_engine, class_=AsyncSession, expire_on_commit=False
)
async with async_session() as session:
yield session
@pytest.fixture
def test_client():
"""Create test client"""
return TestClient(app)
@pytest.fixture
async def override_get_db(test_db_session):
"""Override get_db dependency for testing"""
async def _override_get_db():
yield test_db_session
app.dependency_overrides[get_db] = _override_get_db
yield
app.dependency_overrides.clear()
# Test data fixtures
@pytest.fixture
def sample_tenant_id() -> UUID:
"""Sample tenant ID for testing"""
return uuid4()
@pytest.fixture
def sample_weather_data() -> dict:
"""Sample weather data for testing"""
return {
"city": "madrid",
"location_id": "40.4168,-3.7038",
"date": datetime.now(timezone.utc),
"temperature": 18.5,
"humidity": 65.0,
"pressure": 1013.2,
"wind_speed": 10.2,
"condition": "partly_cloudy",
"description": "Parcialmente nublado",
"source": "aemet",
"data_type": "current",
"is_forecast": False,
"data_quality_score": 95.0
}
@pytest.fixture
def sample_traffic_data() -> dict:
"""Sample traffic data for testing"""
return {
"city": "madrid",
"location_id": "PM_M30_001",
"date": datetime.now(timezone.utc),
"measurement_point_id": "PM_M30_001",
"measurement_point_name": "M-30 Norte - Nudo Norte",
"measurement_point_type": "M30",
"traffic_volume": 850,
"average_speed": 65.2,
"congestion_level": "medium",
"occupation_percentage": 45.8,
"latitude": 40.4501,
"longitude": -3.6919,
"district": "Chamartín",
"source": "madrid_opendata",
"data_quality_score": 92.0,
"is_synthetic": False
}
@pytest.fixture
def sample_weather_forecast() -> list[dict]:
"""Sample weather forecast data"""
base_date = datetime.now(timezone.utc)
return [
{
"city": "madrid",
"location_id": "40.4168,-3.7038",
"date": base_date,
"forecast_date": base_date,
"temperature": 20.0,
"temperature_min": 15.0,
"temperature_max": 25.0,
"precipitation": 0.0,
"humidity": 60.0,
"wind_speed": 12.0,
"condition": "sunny",
"description": "Soleado",
"source": "aemet",
"data_type": "forecast",
"is_forecast": True,
"data_quality_score": 85.0
}
]
@pytest.fixture
async def populated_weather_db(test_db_session: AsyncSession, sample_weather_data: dict):
"""Database populated with weather test data"""
weather_record = WeatherData(**sample_weather_data)
test_db_session.add(weather_record)
await test_db_session.commit()
yield test_db_session
@pytest.fixture
async def populated_traffic_db(test_db_session: AsyncSession, sample_traffic_data: dict):
"""Database populated with traffic test data"""
traffic_record = TrafficData(**sample_traffic_data)
test_db_session.add(traffic_record)
await test_db_session.commit()
yield test_db_session
# Mock external API fixtures
@pytest.fixture
def mock_aemet_response():
"""Mock AEMET API response"""
return {
"date": datetime.now(timezone.utc),
"temperature": 18.5,
"humidity": 65.0,
"pressure": 1013.2,
"wind_speed": 10.2,
"description": "Parcialmente nublado",
"source": "aemet"
}
@pytest.fixture
def mock_madrid_traffic_xml():
"""Mock Madrid Open Data traffic XML"""
return """<?xml version="1.0" encoding="UTF-8"?>
<pms>
<pm codigo="PM_M30_001" nombre="M-30 Norte - Nudo Norte">
<intensidad>850</intensidad>
<ocupacion>45</ocupacion>
<velocidad>65</velocidad>
<fechahora>2024-01-15T10:30:00</fechahora>
</pm>
<pm codigo="PM_URB_002" nombre="Gran Vía - Plaza España">
<intensidad>320</intensidad>
<ocupacion>78</ocupacion>
<velocidad>25</velocidad>
<fechahora>2024-01-15T10:30:00</fechahora>
</pm>
</pms>"""
@pytest.fixture
def mock_messaging():
"""Mock messaging service"""
class MockMessaging:
def __init__(self):
self.published_events = []
async def publish_weather_updated(self, data):
self.published_events.append(("weather_updated", data))
return True
async def publish_traffic_updated(self, data):
self.published_events.append(("traffic_updated", data))
return True
async def publish_collection_job_started(self, data):
self.published_events.append(("job_started", data))
return True
async def publish_collection_job_completed(self, data):
self.published_events.append(("job_completed", data))
return True
return MockMessaging()
# Mock external clients
@pytest.fixture
def mock_aemet_client():
"""Mock AEMET client"""
class MockAEMETClient:
async def get_current_weather(self, lat, lon):
return {
"date": datetime.now(timezone.utc),
"temperature": 18.5,
"humidity": 65.0,
"pressure": 1013.2,
"wind_speed": 10.2,
"description": "Parcialmente nublado",
"source": "aemet"
}
async def get_forecast(self, lat, lon, days):
return [
{
"forecast_date": datetime.now(timezone.utc),
"temperature": 20.0,
"temperature_min": 15.0,
"temperature_max": 25.0,
"precipitation": 0.0,
"humidity": 60.0,
"wind_speed": 12.0,
"description": "Soleado",
"source": "aemet"
}
]
return MockAEMETClient()
@pytest.fixture
def mock_madrid_client():
"""Mock Madrid traffic client"""
class MockMadridClient:
async def fetch_current_traffic_xml(self):
return """<?xml version="1.0" encoding="UTF-8"?>
<pms>
<pm codigo="PM_TEST_001" nombre="Test Point">
<intensidad>500</intensidad>
<ocupacion>50</ocupacion>
<velocidad>50</velocidad>
<fechahora>2024-01-15T10:30:00</fechahora>
</pm>
</pms>"""
return MockMadridClient()
@pytest.fixture
def mock_madrid_processor():
"""Mock Madrid traffic processor"""
class MockMadridProcessor:
async def process_current_traffic_xml(self, xml_content):
return [
{
"city": "madrid",
"location_id": "PM_TEST_001",
"date": datetime.now(timezone.utc),
"measurement_point_id": "PM_TEST_001",
"measurement_point_name": "Test Point",
"measurement_point_type": "TEST",
"traffic_volume": 500,
"average_speed": 50.0,
"congestion_level": "medium",
"occupation_percentage": 50.0,
"latitude": 40.4168,
"longitude": -3.7038,
"district": "Centro",
"source": "madrid_opendata",
"data_quality_score": 90.0,
"is_synthetic": False
}
]
return MockMadridProcessor()

View File

@@ -0,0 +1,9 @@
# Testing dependencies for External Service
pytest==7.4.3
pytest-asyncio==0.21.1
pytest-mock==3.12.0
httpx==0.25.2
fastapi[all]==0.104.1
sqlalchemy[asyncio]==2.0.23
aiosqlite==0.19.0
coverage==7.3.2

View File

@@ -0,0 +1,393 @@
# services/external/tests/unit/test_repositories.py
"""
Unit tests for External Service Repositories
"""
import pytest
from datetime import datetime, timezone, timedelta
from uuid import uuid4
from app.repositories.weather_repository import WeatherRepository
from app.repositories.traffic_repository import TrafficRepository
from app.models.weather import WeatherData, WeatherStation, WeatherDataJob
from app.models.traffic import TrafficData, TrafficMeasurementPoint, TrafficDataJob
@pytest.mark.asyncio
class TestWeatherRepository:
"""Test Weather Repository operations"""
async def test_create_weather_data(self, test_db_session, sample_weather_data):
"""Test creating weather data"""
repository = WeatherRepository(test_db_session)
record = await repository.create_weather_data(sample_weather_data)
assert record is not None
assert record.id is not None
assert record.city == sample_weather_data["city"]
assert record.temperature == sample_weather_data["temperature"]
async def test_get_current_weather(self, populated_weather_db, sample_weather_data):
"""Test getting current weather data"""
repository = WeatherRepository(populated_weather_db)
result = await repository.get_current_weather("madrid")
assert result is not None
assert result.city == "madrid"
assert result.temperature == sample_weather_data["temperature"]
async def test_get_weather_forecast(self, test_db_session, sample_weather_forecast):
"""Test getting weather forecast"""
repository = WeatherRepository(test_db_session)
# Create forecast data
for forecast_item in sample_weather_forecast:
await repository.create_weather_data(forecast_item)
result = await repository.get_weather_forecast("madrid", 7)
assert len(result) == 1
assert result[0].is_forecast is True
async def test_get_historical_weather(self, test_db_session, sample_weather_data):
"""Test getting historical weather data"""
repository = WeatherRepository(test_db_session)
# Create historical data
historical_data = sample_weather_data.copy()
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
await repository.create_weather_data(historical_data)
start_date = datetime.now(timezone.utc) - timedelta(days=2)
end_date = datetime.now(timezone.utc)
result = await repository.get_historical_weather("madrid", start_date, end_date)
assert len(result) >= 1
async def test_create_weather_station(self, test_db_session):
"""Test creating weather station"""
repository = WeatherRepository(test_db_session)
station_data = {
"station_id": "TEST_001",
"name": "Test Station",
"city": "madrid",
"latitude": 40.4168,
"longitude": -3.7038,
"altitude": 650.0,
"is_active": True
}
station = await repository.create_weather_station(station_data)
assert station is not None
assert station.station_id == "TEST_001"
assert station.name == "Test Station"
async def test_get_weather_stations(self, test_db_session):
"""Test getting weather stations"""
repository = WeatherRepository(test_db_session)
# Create test station
station_data = {
"station_id": "TEST_001",
"name": "Test Station",
"city": "madrid",
"latitude": 40.4168,
"longitude": -3.7038,
"is_active": True
}
await repository.create_weather_station(station_data)
stations = await repository.get_weather_stations("madrid")
assert len(stations) == 1
assert stations[0].station_id == "TEST_001"
async def test_create_weather_job(self, test_db_session, sample_tenant_id):
"""Test creating weather data collection job"""
repository = WeatherRepository(test_db_session)
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_weather_job(job_data)
assert job is not None
assert job.job_type == "current"
assert job.status == "pending"
async def test_update_weather_job(self, test_db_session, sample_tenant_id):
"""Test updating weather job"""
repository = WeatherRepository(test_db_session)
# Create job first
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_weather_job(job_data)
# Update job
update_data = {
"status": "completed",
"completed_at": datetime.utcnow(),
"success_count": 1
}
success = await repository.update_weather_job(job.id, update_data)
assert success is True
async def test_get_weather_jobs(self, test_db_session, sample_tenant_id):
"""Test getting weather jobs"""
repository = WeatherRepository(test_db_session)
# Create test job
job_data = {
"job_type": "forecast",
"city": "madrid",
"status": "completed",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
await repository.create_weather_job(job_data)
jobs = await repository.get_weather_jobs()
assert len(jobs) >= 1
assert any(job.job_type == "forecast" for job in jobs)
@pytest.mark.asyncio
class TestTrafficRepository:
"""Test Traffic Repository operations"""
async def test_create_traffic_data(self, test_db_session, sample_traffic_data):
"""Test creating traffic data"""
repository = TrafficRepository(test_db_session)
# Convert sample data to list for bulk create
traffic_list = [sample_traffic_data]
count = await repository.bulk_create_traffic_data(traffic_list)
assert count == 1
async def test_get_current_traffic(self, populated_traffic_db, sample_traffic_data):
"""Test getting current traffic data"""
repository = TrafficRepository(populated_traffic_db)
result = await repository.get_current_traffic("madrid")
assert len(result) >= 1
assert result[0].city == "madrid"
async def test_get_current_traffic_with_filters(self, populated_traffic_db):
"""Test getting current traffic with filters"""
repository = TrafficRepository(populated_traffic_db)
result = await repository.get_current_traffic("madrid", district="Chamartín")
# Should return results based on filter
assert isinstance(result, list)
async def test_get_historical_traffic(self, test_db_session, sample_traffic_data):
"""Test getting historical traffic data"""
repository = TrafficRepository(test_db_session)
# Create historical data
historical_data = sample_traffic_data.copy()
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
await repository.bulk_create_traffic_data([historical_data])
start_date = datetime.now(timezone.utc) - timedelta(days=2)
end_date = datetime.now(timezone.utc)
result = await repository.get_historical_traffic("madrid", start_date, end_date)
assert len(result) >= 1
async def test_create_measurement_point(self, test_db_session):
"""Test creating traffic measurement point"""
repository = TrafficRepository(test_db_session)
point_data = {
"point_id": "TEST_POINT_001",
"name": "Test Measurement Point",
"city": "madrid",
"point_type": "TEST",
"latitude": 40.4168,
"longitude": -3.7038,
"district": "Centro",
"road_name": "Test Road",
"is_active": True
}
point = await repository.create_measurement_point(point_data)
assert point is not None
assert point.point_id == "TEST_POINT_001"
assert point.name == "Test Measurement Point"
async def test_get_measurement_points(self, test_db_session):
"""Test getting measurement points"""
repository = TrafficRepository(test_db_session)
# Create test point
point_data = {
"point_id": "TEST_POINT_001",
"name": "Test Point",
"city": "madrid",
"point_type": "TEST",
"latitude": 40.4168,
"longitude": -3.7038,
"is_active": True
}
await repository.create_measurement_point(point_data)
points = await repository.get_measurement_points("madrid")
assert len(points) == 1
assert points[0].point_id == "TEST_POINT_001"
async def test_get_measurement_points_with_filters(self, test_db_session):
"""Test getting measurement points with filters"""
repository = TrafficRepository(test_db_session)
# Create test points with different types
for i, point_type in enumerate(["M30", "URB", "TEST"]):
point_data = {
"point_id": f"TEST_POINT_{i:03d}",
"name": f"Test Point {i}",
"city": "madrid",
"point_type": point_type,
"latitude": 40.4168,
"longitude": -3.7038,
"is_active": True
}
await repository.create_measurement_point(point_data)
# Filter by type
points = await repository.get_measurement_points("madrid", road_type="M30")
assert len(points) == 1
assert points[0].point_type == "M30"
async def test_get_traffic_analytics(self, populated_traffic_db):
"""Test getting traffic analytics"""
repository = TrafficRepository(populated_traffic_db)
analytics = await repository.get_traffic_analytics("madrid")
assert isinstance(analytics, dict)
assert "total_measurements" in analytics
assert "average_volume" in analytics
async def test_create_traffic_job(self, test_db_session, sample_tenant_id):
"""Test creating traffic collection job"""
repository = TrafficRepository(test_db_session)
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_traffic_job(job_data)
assert job is not None
assert job.job_type == "current"
assert job.status == "pending"
async def test_update_traffic_job(self, test_db_session, sample_tenant_id):
"""Test updating traffic job"""
repository = TrafficRepository(test_db_session)
# Create job first
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_traffic_job(job_data)
# Update job
update_data = {
"status": "completed",
"completed_at": datetime.utcnow(),
"success_count": 10
}
success = await repository.update_traffic_job(job.id, update_data)
assert success is True
async def test_get_traffic_jobs(self, test_db_session, sample_tenant_id):
"""Test getting traffic jobs"""
repository = TrafficRepository(test_db_session)
# Create test job
job_data = {
"job_type": "historical",
"city": "madrid",
"status": "completed",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
await repository.create_traffic_job(job_data)
jobs = await repository.get_traffic_jobs()
assert len(jobs) >= 1
assert any(job.job_type == "historical" for job in jobs)
async def test_bulk_create_performance(self, test_db_session):
"""Test bulk create performance"""
repository = TrafficRepository(test_db_session)
# Create large dataset
bulk_data = []
for i in range(100):
data = {
"city": "madrid",
"location_id": f"PM_TEST_{i:03d}",
"date": datetime.now(timezone.utc),
"measurement_point_id": f"PM_TEST_{i:03d}",
"measurement_point_name": f"Test Point {i}",
"measurement_point_type": "TEST",
"traffic_volume": 100 + i,
"average_speed": 50.0,
"congestion_level": "medium",
"occupation_percentage": 50.0,
"latitude": 40.4168,
"longitude": -3.7038,
"source": "test"
}
bulk_data.append(data)
import time
start_time = time.time()
count = await repository.bulk_create_traffic_data(bulk_data)
end_time = time.time()
execution_time = end_time - start_time
assert count == 100
assert execution_time < 3.0 # Should complete in under 3 seconds

View File

@@ -0,0 +1,445 @@
# services/external/tests/unit/test_services.py
"""
Unit tests for External Service Services
"""
import pytest
from datetime import datetime, timezone, timedelta
from unittest.mock import AsyncMock, patch
from uuid import uuid4
from app.services.weather_service import WeatherService
from app.services.traffic_service import TrafficService
@pytest.mark.asyncio
class TestWeatherService:
"""Test Weather Service business logic"""
@pytest.fixture
def weather_service(self):
"""Create weather service instance"""
return WeatherService()
async def test_get_current_weather_from_cache(self, weather_service):
"""Test getting current weather from cache"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_weather = AsyncMock()
mock_weather.date = datetime.now(timezone.utc) - timedelta(minutes=30) # Fresh data
mock_weather.to_dict.return_value = {"temperature": 18.5, "city": "madrid"}
mock_repository.get_current_weather.return_value = mock_weather
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_current_weather("madrid")
assert result is not None
assert result["temperature"] == 18.5
assert result["city"] == "madrid"
async def test_get_current_weather_fetch_from_api(self, weather_service, mock_aemet_response):
"""Test getting current weather from API when cache is stale"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# No cached data or stale data
mock_repository.get_current_weather.return_value = None
mock_stored = AsyncMock()
mock_stored.to_dict.return_value = {"temperature": 20.0}
mock_repository.create_weather_data.return_value = mock_stored
# Mock AEMET client
mock_client = AsyncMock()
mock_client.get_current_weather.return_value = mock_aemet_response
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
weather_service.aemet_client = mock_client
result = await weather_service.get_current_weather("madrid")
assert result is not None
assert result["temperature"] == 20.0
mock_client.get_current_weather.assert_called_once()
async def test_get_weather_forecast_from_cache(self, weather_service):
"""Test getting weather forecast from cache"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_forecast = [AsyncMock(), AsyncMock()]
for item in mock_forecast:
item.created_at = datetime.now(timezone.utc) - timedelta(hours=1) # Fresh
item.to_dict.return_value = {"temperature": 22.0}
mock_repository.get_weather_forecast.return_value = mock_forecast
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_weather_forecast("madrid", 7)
assert len(result) == 2
assert all(item["temperature"] == 22.0 for item in result)
async def test_get_weather_forecast_fetch_from_api(self, weather_service):
"""Test getting weather forecast from API when cache is stale"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# No cached data
mock_repository.get_weather_forecast.return_value = []
mock_stored = AsyncMock()
mock_stored.to_dict.return_value = {"temperature": 25.0}
mock_repository.create_weather_data.return_value = mock_stored
# Mock AEMET client
mock_client = AsyncMock()
mock_client.get_forecast.return_value = [
{"forecast_date": datetime.now(), "temperature": 25.0}
]
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
weather_service.aemet_client = mock_client
result = await weather_service.get_weather_forecast("madrid", 7)
assert len(result) == 1
assert result[0]["temperature"] == 25.0
mock_client.get_forecast.assert_called_once()
async def test_get_historical_weather(self, weather_service, sample_tenant_id):
"""Test getting historical weather data"""
start_date = datetime.now(timezone.utc) - timedelta(days=7)
end_date = datetime.now(timezone.utc)
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_historical = [AsyncMock(), AsyncMock()]
for item in mock_historical:
item.to_dict.return_value = {"temperature": 18.0}
mock_repository.get_historical_weather.return_value = mock_historical
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_historical_weather(
"madrid", start_date, end_date, sample_tenant_id
)
assert len(result) == 2
assert all(item["temperature"] == 18.0 for item in result)
async def test_get_weather_stations(self, weather_service):
"""Test getting weather stations"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_stations = [AsyncMock()]
mock_stations[0].to_dict.return_value = {"station_id": "TEST_001"}
mock_repository.get_weather_stations.return_value = mock_stations
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_weather_stations("madrid")
assert len(result) == 1
assert result[0]["station_id"] == "TEST_001"
async def test_trigger_weather_collection(self, weather_service, sample_tenant_id):
"""Test triggering weather data collection"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_job = AsyncMock()
mock_job.id = uuid4()
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
mock_repository.create_weather_job.return_value = mock_job
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.trigger_weather_collection(
"madrid", "current", sample_tenant_id
)
assert result["status"] == "pending"
mock_repository.create_weather_job.assert_called_once()
async def test_process_weather_collection_job(self, weather_service):
"""Test processing weather collection job"""
job_id = uuid4()
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# Mock job
mock_job = AsyncMock()
mock_job.id = job_id
mock_job.job_type = "current"
mock_job.city = "madrid"
mock_repository.get_weather_jobs.return_value = [mock_job]
mock_repository.update_weather_job.return_value = True
# Mock updated job after completion
mock_updated_job = AsyncMock()
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
# Mock methods for different calls
def mock_get_jobs_side_effect():
return [mock_updated_job] # Return completed job
mock_repository.get_weather_jobs.side_effect = [
[mock_job], # First call returns pending job
[mock_updated_job] # Second call returns completed job
]
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
with patch.object(weather_service, '_collect_current_weather', return_value=1):
result = await weather_service.process_weather_collection_job(job_id)
assert result["status"] == "completed"
async def test_map_weather_condition(self, weather_service):
"""Test weather condition mapping"""
test_cases = [
("Soleado", "clear"),
("Nublado", "cloudy"),
("Parcialmente nublado", "partly_cloudy"),
("Lluvioso", "rainy"),
("Nevando", "snowy"),
("Tormenta", "stormy"),
("Desconocido", "unknown")
]
for description, expected in test_cases:
result = weather_service._map_weather_condition(description)
assert result == expected
@pytest.mark.asyncio
class TestTrafficService:
"""Test Traffic Service business logic"""
@pytest.fixture
def traffic_service(self):
"""Create traffic service instance"""
return TrafficService()
async def test_get_current_traffic_from_cache(self, traffic_service):
"""Test getting current traffic from cache"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_traffic = [AsyncMock()]
mock_traffic[0].date = datetime.now(timezone.utc) - timedelta(minutes=5) # Fresh
mock_traffic[0].to_dict.return_value = {"traffic_volume": 850}
mock_repository.get_current_traffic.return_value = mock_traffic
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_current_traffic("madrid")
assert len(result) == 1
assert result[0]["traffic_volume"] == 850
async def test_get_current_traffic_fetch_from_api(self, traffic_service, mock_madrid_traffic_xml):
"""Test getting current traffic from API when cache is stale"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# No cached data
mock_repository.get_current_traffic.return_value = []
mock_repository.bulk_create_traffic_data.return_value = 2
# Mock clients
mock_client = AsyncMock()
mock_client.fetch_current_traffic_xml.return_value = mock_madrid_traffic_xml
mock_processor = AsyncMock()
mock_processor.process_current_traffic_xml.return_value = [
{"traffic_volume": 850, "measurement_point_id": "PM_M30_001"},
{"traffic_volume": 320, "measurement_point_id": "PM_URB_002"}
]
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
traffic_service.madrid_client = mock_client
traffic_service.madrid_processor = mock_processor
result = await traffic_service.get_current_traffic("madrid")
assert len(result) == 2
assert result[0]["traffic_volume"] == 850
mock_client.fetch_current_traffic_xml.assert_called_once()
async def test_get_historical_traffic(self, traffic_service, sample_tenant_id):
"""Test getting historical traffic data"""
start_date = datetime.now(timezone.utc) - timedelta(days=7)
end_date = datetime.now(timezone.utc)
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_historical = [AsyncMock(), AsyncMock()]
for item in mock_historical:
item.to_dict.return_value = {"traffic_volume": 500}
mock_repository.get_historical_traffic.return_value = mock_historical
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_historical_traffic(
"madrid", start_date, end_date, tenant_id=sample_tenant_id
)
assert len(result) == 2
assert all(item["traffic_volume"] == 500 for item in result)
async def test_get_measurement_points(self, traffic_service):
"""Test getting measurement points"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_points = [AsyncMock()]
mock_points[0].to_dict.return_value = {"point_id": "PM_TEST_001"}
mock_repository.get_measurement_points.return_value = mock_points
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_measurement_points("madrid")
assert len(result) == 1
assert result[0]["point_id"] == "PM_TEST_001"
async def test_get_traffic_analytics(self, traffic_service):
"""Test getting traffic analytics"""
start_date = datetime.now(timezone.utc) - timedelta(days=30)
end_date = datetime.now(timezone.utc)
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_analytics = {
"total_measurements": 1000,
"average_volume": 650.5,
"peak_hour": "08:00"
}
mock_repository.get_traffic_analytics.return_value = mock_analytics
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_traffic_analytics(
"madrid", start_date, end_date
)
assert result["total_measurements"] == 1000
assert result["average_volume"] == 650.5
assert "generated_at" in result
async def test_trigger_traffic_collection(self, traffic_service, sample_tenant_id):
"""Test triggering traffic data collection"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_job = AsyncMock()
mock_job.id = uuid4()
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
mock_repository.create_traffic_job.return_value = mock_job
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.trigger_traffic_collection(
"madrid", "current", user_id=sample_tenant_id
)
assert result["status"] == "pending"
mock_repository.create_traffic_job.assert_called_once()
async def test_process_traffic_collection_job(self, traffic_service):
"""Test processing traffic collection job"""
job_id = uuid4()
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# Mock job
mock_job = AsyncMock()
mock_job.id = job_id
mock_job.job_type = "current"
mock_job.city = "madrid"
mock_job.location_pattern = None
mock_repository.get_traffic_jobs.return_value = [mock_job]
mock_repository.update_traffic_job.return_value = True
# Mock updated job after completion
mock_updated_job = AsyncMock()
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
mock_repository.get_traffic_jobs.side_effect = [
[mock_job], # First call returns pending job
[mock_updated_job] # Second call returns completed job
]
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
with patch.object(traffic_service, '_collect_current_traffic', return_value=125):
result = await traffic_service.process_traffic_collection_job(job_id)
assert result["status"] == "completed"
async def test_is_traffic_data_fresh(self, traffic_service):
"""Test traffic data freshness check"""
from app.models.traffic import TrafficData
# Fresh data (5 minutes old)
fresh_data = [AsyncMock()]
fresh_data[0].date = datetime.utcnow() - timedelta(minutes=5)
result = traffic_service._is_traffic_data_fresh(fresh_data)
assert result is True
# Stale data (15 minutes old)
stale_data = [AsyncMock()]
stale_data[0].date = datetime.utcnow() - timedelta(minutes=15)
result = traffic_service._is_traffic_data_fresh(stale_data)
assert result is False
# Empty data
result = traffic_service._is_traffic_data_fresh([])
assert result is False
async def test_collect_current_traffic(self, traffic_service):
"""Test current traffic collection"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_repository.bulk_create_traffic_data.return_value = 10
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
with patch.object(traffic_service, '_fetch_current_traffic_from_api', return_value=[{} for _ in range(10)]):
result = await traffic_service._collect_current_traffic("madrid", None)
assert result == 10