REFACTOR data service
This commit is contained in:
34
services/external/Dockerfile
vendored
Normal file
34
services/external/Dockerfile
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# services/external/Dockerfile
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
g++ \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY services/external/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy shared modules first
|
||||
COPY shared/ /app/shared/
|
||||
|
||||
# Copy application code
|
||||
COPY services/external/app/ /app/app/
|
||||
|
||||
# Set Python path to include shared modules
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8000/health', timeout=5)" || exit 1
|
||||
|
||||
# Run the application
|
||||
CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
1
services/external/app/__init__.py
vendored
Normal file
1
services/external/app/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/__init__.py
|
||||
1
services/external/app/api/__init__.py
vendored
Normal file
1
services/external/app/api/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/api/__init__.py
|
||||
184
services/external/app/api/traffic.py
vendored
Normal file
184
services/external/app/api/traffic.py
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
# services/external/app/api/traffic.py
|
||||
"""Traffic data API endpoints with improved error handling"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from typing import List, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from uuid import UUID
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.traffic_service import TrafficService
|
||||
from app.services.messaging import publish_traffic_updated
|
||||
from app.schemas.traffic import (
|
||||
TrafficDataResponse,
|
||||
HistoricalTrafficRequest,
|
||||
TrafficForecastRequest
|
||||
)
|
||||
|
||||
from shared.auth.decorators import (
|
||||
get_current_user_dep
|
||||
)
|
||||
|
||||
router = APIRouter(tags=["traffic"])
|
||||
traffic_service = TrafficService()
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@router.get("/tenants/{tenant_id}/traffic/current", response_model=TrafficDataResponse)
|
||||
async def get_current_traffic(
|
||||
latitude: float = Query(..., description="Latitude"),
|
||||
longitude: float = Query(..., description="Longitude"),
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
):
|
||||
"""Get current traffic data for location"""
|
||||
try:
|
||||
logger.debug("API: Getting current traffic", lat=latitude, lon=longitude)
|
||||
|
||||
traffic = await traffic_service.get_current_traffic(latitude, longitude)
|
||||
|
||||
if not traffic:
|
||||
logger.warning("No traffic data available", lat=latitude, lon=longitude)
|
||||
raise HTTPException(status_code=404, detail="Traffic data not available")
|
||||
|
||||
# Publish event (with error handling)
|
||||
try:
|
||||
await publish_traffic_updated({
|
||||
"type": "current_requested",
|
||||
"latitude": latitude,
|
||||
"longitude": longitude,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
})
|
||||
except Exception as pub_error:
|
||||
logger.warning("Failed to publish traffic event", error=str(pub_error))
|
||||
# Continue processing - event publishing failure shouldn't break the API
|
||||
|
||||
logger.debug("Successfully returning traffic data",
|
||||
volume=traffic.traffic_volume,
|
||||
congestion=traffic.congestion_level)
|
||||
return traffic
|
||||
|
||||
except HTTPException:
|
||||
# Re-raise HTTP exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in traffic API", error=str(e))
|
||||
import traceback
|
||||
logger.error("Traffic API traceback", traceback=traceback.format_exc())
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
|
||||
@router.post("/tenants/{tenant_id}/traffic/historical")
|
||||
async def get_historical_traffic(
|
||||
request: HistoricalTrafficRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
):
|
||||
"""Get historical traffic data with date range in payload"""
|
||||
try:
|
||||
# Validate date range
|
||||
if request.end_date <= request.start_date:
|
||||
raise HTTPException(status_code=400, detail="End date must be after start date")
|
||||
|
||||
if (request.end_date - request.start_date).days > 1000:
|
||||
raise HTTPException(status_code=400, detail="Date range cannot exceed 90 days")
|
||||
|
||||
historical_data = await traffic_service.get_historical_traffic(
|
||||
request.latitude, request.longitude, request.start_date, request.end_date, str(tenant_id)
|
||||
)
|
||||
|
||||
# Publish event (with error handling)
|
||||
try:
|
||||
await publish_traffic_updated({
|
||||
"type": "historical_requested",
|
||||
"latitude": request.latitude,
|
||||
"longitude": request.longitude,
|
||||
"start_date": request.start_date.isoformat(),
|
||||
"end_date": request.end_date.isoformat(),
|
||||
"records_count": len(historical_data),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
})
|
||||
except Exception as pub_error:
|
||||
logger.warning("Failed to publish historical traffic event", error=str(pub_error))
|
||||
# Continue processing
|
||||
|
||||
return historical_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in historical traffic API", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
|
||||
@router.post("/tenants/{tenant_id}/traffic/forecast")
|
||||
async def get_traffic_forecast(
|
||||
request: TrafficForecastRequest,
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
):
|
||||
"""Get traffic forecast for location"""
|
||||
try:
|
||||
logger.debug("API: Getting traffic forecast",
|
||||
lat=request.latitude, lon=request.longitude, hours=request.hours)
|
||||
|
||||
# For now, return mock forecast data since we don't have a real traffic forecast service
|
||||
# In a real implementation, this would call a traffic forecasting service
|
||||
|
||||
# Generate mock forecast data for the requested hours
|
||||
forecast_data = []
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
base_time = datetime.utcnow()
|
||||
for hour in range(request.hours):
|
||||
forecast_time = base_time + timedelta(hours=hour)
|
||||
|
||||
# Mock traffic pattern (higher during rush hours)
|
||||
hour_of_day = forecast_time.hour
|
||||
if 7 <= hour_of_day <= 9 or 17 <= hour_of_day <= 19: # Rush hours
|
||||
traffic_volume = 120
|
||||
pedestrian_count = 80
|
||||
congestion_level = "high"
|
||||
average_speed = 15
|
||||
elif 22 <= hour_of_day or hour_of_day <= 6: # Night hours
|
||||
traffic_volume = 20
|
||||
pedestrian_count = 10
|
||||
congestion_level = "low"
|
||||
average_speed = 50
|
||||
else: # Regular hours
|
||||
traffic_volume = 60
|
||||
pedestrian_count = 40
|
||||
congestion_level = "medium"
|
||||
average_speed = 35
|
||||
|
||||
# Use consistent TrafficDataResponse format
|
||||
forecast_data.append({
|
||||
"date": forecast_time.isoformat(),
|
||||
"traffic_volume": traffic_volume,
|
||||
"pedestrian_count": pedestrian_count,
|
||||
"congestion_level": congestion_level,
|
||||
"average_speed": average_speed,
|
||||
"source": "madrid_opendata_forecast"
|
||||
})
|
||||
|
||||
# Publish event (with error handling)
|
||||
try:
|
||||
await publish_traffic_updated({
|
||||
"type": "forecast_requested",
|
||||
"latitude": request.latitude,
|
||||
"longitude": request.longitude,
|
||||
"hours": request.hours,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
})
|
||||
except Exception as pub_error:
|
||||
logger.warning("Failed to publish traffic forecast event", error=str(pub_error))
|
||||
# Continue processing
|
||||
|
||||
logger.debug("Successfully returning traffic forecast", records=len(forecast_data))
|
||||
return forecast_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in traffic forecast API", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
157
services/external/app/api/weather.py
vendored
Normal file
157
services/external/app/api/weather.py
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
# services/external/app/api/weather.py
|
||||
"""
|
||||
Weather API Endpoints
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks, Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime, date
|
||||
import structlog
|
||||
from uuid import UUID
|
||||
|
||||
from app.schemas.weather import (
|
||||
WeatherDataResponse,
|
||||
WeatherForecastResponse,
|
||||
WeatherForecastRequest,
|
||||
HistoricalWeatherRequest
|
||||
)
|
||||
from app.services.weather_service import WeatherService
|
||||
from app.services.messaging import publish_weather_updated
|
||||
|
||||
# Import unified authentication from shared library
|
||||
from shared.auth.decorators import (
|
||||
get_current_user_dep,
|
||||
get_current_tenant_id_dep
|
||||
)
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.database import get_db
|
||||
|
||||
router = APIRouter(tags=["weather"])
|
||||
logger = structlog.get_logger()
|
||||
weather_service = WeatherService()
|
||||
|
||||
@router.get("/tenants/{tenant_id}/weather/current", response_model=WeatherDataResponse)
|
||||
async def get_current_weather(
|
||||
latitude: float = Query(..., description="Latitude"),
|
||||
longitude: float = Query(..., description="Longitude"),
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
):
|
||||
"""Get current weather data for location"""
|
||||
try:
|
||||
logger.debug("Getting current weather",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
tenant_id=tenant_id,
|
||||
user_id=current_user["user_id"])
|
||||
|
||||
weather = await weather_service.get_current_weather(latitude, longitude)
|
||||
|
||||
if not weather:
|
||||
raise HTTPException(status_code=404, detail="Weather data not available")
|
||||
|
||||
# Publish event
|
||||
try:
|
||||
await publish_weather_updated({
|
||||
"type": "current_weather_requested",
|
||||
"tenant_id": tenant_id,
|
||||
"latitude": latitude,
|
||||
"longitude": longitude,
|
||||
"requested_by": current_user["user_id"],
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish weather event", error=str(e))
|
||||
|
||||
return weather
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Failed to get current weather", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
|
||||
@router.post("/tenants/{tenant_id}/weather/historical")
|
||||
async def get_historical_weather(
|
||||
request: HistoricalWeatherRequest,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
):
|
||||
"""Get historical weather data with date range in payload"""
|
||||
try:
|
||||
# Validate date range
|
||||
if request.end_date <= request.start_date:
|
||||
raise HTTPException(status_code=400, detail="End date must be after start date")
|
||||
|
||||
if (request.end_date - request.start_date).days > 1000:
|
||||
raise HTTPException(status_code=400, detail="Date range cannot exceed 90 days")
|
||||
|
||||
historical_data = await weather_service.get_historical_weather(
|
||||
request.latitude, request.longitude, request.start_date, request.end_date)
|
||||
|
||||
# Publish event (with error handling)
|
||||
try:
|
||||
await publish_weather_updated({
|
||||
"type": "historical_requested",
|
||||
"latitude": request.latitude,
|
||||
"longitude": request.longitude,
|
||||
"start_date": request.start_date.isoformat(),
|
||||
"end_date": request.end_date.isoformat(),
|
||||
"records_count": len(historical_data),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
})
|
||||
except Exception as pub_error:
|
||||
logger.warning("Failed to publish historical weather event", error=str(pub_error))
|
||||
# Continue processing
|
||||
|
||||
return historical_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in historical weather API", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/weather/forecast", response_model=List[WeatherForecastResponse])
|
||||
async def get_weather_forecast(
|
||||
request: WeatherForecastRequest,
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
):
|
||||
"""Get weather forecast for location"""
|
||||
try:
|
||||
logger.debug("Getting weather forecast",
|
||||
lat=request.latitude,
|
||||
lon=request.longitude,
|
||||
days=request.days,
|
||||
tenant_id=tenant_id)
|
||||
|
||||
forecast = await weather_service.get_weather_forecast(request.latitude, request.longitude, request.days)
|
||||
|
||||
if not forecast:
|
||||
raise HTTPException(status_code=404, detail="Weather forecast not available")
|
||||
|
||||
# Publish event
|
||||
try:
|
||||
await publish_weather_updated({
|
||||
"type": "forecast_requested",
|
||||
"tenant_id": tenant_id,
|
||||
"latitude": request.latitude,
|
||||
"longitude": request.longitude,
|
||||
"days": request.days,
|
||||
"requested_by": current_user["user_id"],
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish forecast event", error=str(e))
|
||||
|
||||
return forecast
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Failed to get weather forecast", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||||
1
services/external/app/core/__init__.py
vendored
Normal file
1
services/external/app/core/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/core/__init__.py
|
||||
64
services/external/app/core/config.py
vendored
Normal file
64
services/external/app/core/config.py
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
# services/external/app/core/config.py
|
||||
|
||||
from shared.config.base import BaseServiceSettings
|
||||
import os
|
||||
from pydantic import Field
|
||||
|
||||
class DataSettings(BaseServiceSettings):
|
||||
"""Data service specific settings"""
|
||||
|
||||
# Service Identity
|
||||
SERVICE_NAME: str = "external-service"
|
||||
VERSION: str = "1.0.0"
|
||||
APP_NAME: str = "Bakery External Data Service"
|
||||
DESCRIPTION: str = "External data collection service for weather and traffic data"
|
||||
|
||||
# API Configuration
|
||||
API_V1_STR: str = "/api/v1"
|
||||
|
||||
# Override database URL to use EXTERNAL_DATABASE_URL
|
||||
DATABASE_URL: str = Field(
|
||||
default="postgresql+asyncpg://external_user:external_pass123@external-db:5432/external_db",
|
||||
env="EXTERNAL_DATABASE_URL"
|
||||
)
|
||||
|
||||
# External API Configuration
|
||||
AEMET_API_KEY: str = os.getenv("AEMET_API_KEY", "")
|
||||
AEMET_BASE_URL: str = "https://opendata.aemet.es/opendata"
|
||||
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "30"))
|
||||
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "3"))
|
||||
|
||||
MADRID_OPENDATA_API_KEY: str = os.getenv("MADRID_OPENDATA_API_KEY", "")
|
||||
MADRID_OPENDATA_BASE_URL: str = "https://datos.madrid.es"
|
||||
MADRID_OPENDATA_TIMEOUT: int = int(os.getenv("MADRID_OPENDATA_TIMEOUT", "30"))
|
||||
|
||||
# Data Collection Configuration
|
||||
WEATHER_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("WEATHER_COLLECTION_INTERVAL_HOURS", "1"))
|
||||
TRAFFIC_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("TRAFFIC_COLLECTION_INTERVAL_HOURS", "1"))
|
||||
EVENTS_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("EVENTS_COLLECTION_INTERVAL_HOURS", "6"))
|
||||
|
||||
# Cache TTL Configuration
|
||||
WEATHER_CACHE_TTL_HOURS: int = int(os.getenv("WEATHER_CACHE_TTL_HOURS", "1"))
|
||||
TRAFFIC_CACHE_TTL_HOURS: int = int(os.getenv("TRAFFIC_CACHE_TTL_HOURS", "1"))
|
||||
EVENTS_CACHE_TTL_HOURS: int = int(os.getenv("EVENTS_CACHE_TTL_HOURS", "6"))
|
||||
|
||||
# Data Quality Configuration
|
||||
DATA_VALIDATION_ENABLED: bool = os.getenv("DATA_VALIDATION_ENABLED", "true").lower() == "true"
|
||||
OUTLIER_DETECTION_ENABLED: bool = os.getenv("OUTLIER_DETECTION_ENABLED", "true").lower() == "true"
|
||||
DATA_COMPLETENESS_THRESHOLD: float = float(os.getenv("DATA_COMPLETENESS_THRESHOLD", "0.8"))
|
||||
|
||||
# Geolocation Settings (Madrid focus)
|
||||
DEFAULT_LATITUDE: float = float(os.getenv("DEFAULT_LATITUDE", "40.4168")) # Madrid
|
||||
DEFAULT_LONGITUDE: float = float(os.getenv("DEFAULT_LONGITUDE", "-3.7038")) # Madrid
|
||||
LOCATION_RADIUS_KM: float = float(os.getenv("LOCATION_RADIUS_KM", "50.0"))
|
||||
|
||||
# Data Retention
|
||||
RAW_DATA_RETENTION_DAYS: int = int(os.getenv("RAW_DATA_RETENTION_DAYS", "90"))
|
||||
PROCESSED_DATA_RETENTION_DAYS: int = int(os.getenv("PROCESSED_DATA_RETENTION_DAYS", "365"))
|
||||
|
||||
# Batch Processing
|
||||
BATCH_PROCESSING_ENABLED: bool = os.getenv("BATCH_PROCESSING_ENABLED", "true").lower() == "true"
|
||||
BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "1000"))
|
||||
PARALLEL_PROCESSING_WORKERS: int = int(os.getenv("PARALLEL_PROCESSING_WORKERS", "4"))
|
||||
|
||||
settings = DataSettings()
|
||||
81
services/external/app/core/database.py
vendored
Normal file
81
services/external/app/core/database.py
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
# services/external/app/core/database.py
|
||||
"""
|
||||
External Service Database Configuration using shared database manager
|
||||
"""
|
||||
|
||||
import structlog
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from app.core.config import settings
|
||||
from shared.database.base import DatabaseManager, Base
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Create database manager instance
|
||||
database_manager = DatabaseManager(
|
||||
database_url=settings.DATABASE_URL,
|
||||
service_name="external-service"
|
||||
)
|
||||
|
||||
async def get_db():
|
||||
"""
|
||||
Database dependency for FastAPI - using shared database manager
|
||||
"""
|
||||
async for session in database_manager.get_db():
|
||||
yield session
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""Initialize database tables using shared database manager"""
|
||||
try:
|
||||
logger.info("Initializing External Service database...")
|
||||
|
||||
# Import all models to ensure they're registered
|
||||
from app.models import weather, traffic # noqa: F401
|
||||
|
||||
# Create all tables using database manager
|
||||
await database_manager.create_tables(Base.metadata)
|
||||
|
||||
logger.info("External Service database initialized successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize database", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
async def close_db():
|
||||
"""Close database connections using shared database manager"""
|
||||
try:
|
||||
await database_manager.close_connections()
|
||||
logger.info("Database connections closed")
|
||||
except Exception as e:
|
||||
logger.error("Error closing database connections", error=str(e))
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_db_transaction():
|
||||
"""
|
||||
Context manager for database transactions using shared database manager
|
||||
"""
|
||||
async with database_manager.get_session() as session:
|
||||
try:
|
||||
async with session.begin():
|
||||
yield session
|
||||
except Exception as e:
|
||||
logger.error("Transaction error", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_background_session():
|
||||
"""
|
||||
Context manager for background tasks using shared database manager
|
||||
"""
|
||||
async with database_manager.get_background_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
async def health_check():
|
||||
"""Database health check using shared database manager"""
|
||||
return await database_manager.health_check()
|
||||
0
services/external/app/external/__init__.py
vendored
Normal file
0
services/external/app/external/__init__.py
vendored
Normal file
704
services/external/app/external/aemet.py
vendored
Normal file
704
services/external/app/external/aemet.py
vendored
Normal file
@@ -0,0 +1,704 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/aemet.py - REFACTORED VERSION
|
||||
# ================================================================
|
||||
"""AEMET (Spanish Weather Service) API client with improved modularity"""
|
||||
|
||||
import math
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
import structlog
|
||||
|
||||
from app.external.base_client import BaseAPIClient
|
||||
from app.core.config import settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class WeatherSource(Enum):
|
||||
"""Weather data source types"""
|
||||
AEMET = "aemet"
|
||||
SYNTHETIC = "synthetic"
|
||||
DEFAULT = "default"
|
||||
|
||||
|
||||
@dataclass
|
||||
class WeatherStation:
|
||||
"""Weather station data"""
|
||||
id: str
|
||||
name: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class GeographicBounds:
|
||||
"""Geographic boundary definition"""
|
||||
min_lat: float
|
||||
max_lat: float
|
||||
min_lon: float
|
||||
max_lon: float
|
||||
|
||||
def contains(self, latitude: float, longitude: float) -> bool:
|
||||
"""Check if coordinates are within bounds"""
|
||||
return (self.min_lat <= latitude <= self.max_lat and
|
||||
self.min_lon <= longitude <= self.max_lon)
|
||||
|
||||
|
||||
class AEMETConstants:
|
||||
"""AEMET API constants and configuration"""
|
||||
|
||||
# API Configuration
|
||||
MAX_DAYS_PER_REQUEST = 30
|
||||
MADRID_MUNICIPALITY_CODE = "28079"
|
||||
|
||||
# Madrid geographic bounds
|
||||
MADRID_BOUNDS = GeographicBounds(
|
||||
min_lat=40.3, max_lat=40.6,
|
||||
min_lon=-3.9, max_lon=-3.5
|
||||
)
|
||||
|
||||
# Weather stations in Madrid area
|
||||
MADRID_STATIONS = [
|
||||
WeatherStation("3195", "Madrid Centro", 40.4117, -3.6780),
|
||||
WeatherStation("3129", "Madrid Norte", 40.4677, -3.5552),
|
||||
WeatherStation("3197", "Madrid Sur", 40.2987, -3.7216),
|
||||
]
|
||||
|
||||
# Climate simulation parameters
|
||||
BASE_TEMPERATURE_SEASONAL = 5.0
|
||||
TEMPERATURE_SEASONAL_MULTIPLIER = 2.5
|
||||
DAILY_TEMPERATURE_AMPLITUDE = 8.0
|
||||
EARTH_RADIUS_KM = 6371.0
|
||||
|
||||
|
||||
class WeatherDataParser:
|
||||
"""Handles parsing of different weather data formats"""
|
||||
|
||||
@staticmethod
|
||||
def safe_float(value: Any, default: Optional[float] = None) -> Optional[float]:
|
||||
"""Safely convert value to float with fallback"""
|
||||
try:
|
||||
if value is None:
|
||||
return default
|
||||
return float(value)
|
||||
except (ValueError, TypeError):
|
||||
return default
|
||||
|
||||
@staticmethod
|
||||
def extract_temperature_value(temp_data: Any) -> Optional[float]:
|
||||
"""Extract temperature value from AEMET complex temperature structure"""
|
||||
if temp_data is None:
|
||||
return None
|
||||
|
||||
if isinstance(temp_data, (int, float)):
|
||||
return float(temp_data)
|
||||
|
||||
if isinstance(temp_data, str):
|
||||
try:
|
||||
return float(temp_data)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
if isinstance(temp_data, dict) and 'valor' in temp_data:
|
||||
return WeatherDataParser.safe_float(temp_data['valor'])
|
||||
|
||||
if isinstance(temp_data, list) and len(temp_data) > 0:
|
||||
first_item = temp_data[0]
|
||||
if isinstance(first_item, dict) and 'valor' in first_item:
|
||||
return WeatherDataParser.safe_float(first_item['valor'])
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def generate_weather_description(temperature: Optional[float],
|
||||
precipitation: Optional[float],
|
||||
humidity: Optional[float]) -> str:
|
||||
"""Generate weather description based on conditions"""
|
||||
if precipitation and precipitation > 5.0:
|
||||
return "Lluvioso"
|
||||
elif precipitation and precipitation > 0.1:
|
||||
return "Nuboso con lluvia"
|
||||
elif humidity and humidity > 80:
|
||||
return "Nuboso"
|
||||
elif temperature and temperature > 25:
|
||||
return "Soleado y cálido"
|
||||
elif temperature and temperature < 5:
|
||||
return "Frío"
|
||||
else:
|
||||
return "Variable"
|
||||
|
||||
def parse_current_weather(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Parse AEMET current weather data format"""
|
||||
if not isinstance(data, dict):
|
||||
logger.warning("Weather data is not a dictionary", data_type=type(data))
|
||||
return self._get_default_weather_data()
|
||||
|
||||
try:
|
||||
return {
|
||||
"date": datetime.now(),
|
||||
"temperature": self.safe_float(data.get("ta"), 15.0),
|
||||
"precipitation": self.safe_float(data.get("prec"), 0.0),
|
||||
"humidity": self.safe_float(data.get("hr"), 50.0),
|
||||
"wind_speed": self.safe_float(data.get("vv"), 10.0),
|
||||
"pressure": self.safe_float(data.get("pres"), 1013.0),
|
||||
"description": str(data.get("descripcion", "Partly cloudy")),
|
||||
"source": WeatherSource.AEMET.value
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Error parsing weather data", error=str(e), data=data)
|
||||
return self._get_default_weather_data()
|
||||
|
||||
def parse_historical_data(self, data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Parse AEMET historical weather data"""
|
||||
parsed_data = []
|
||||
|
||||
try:
|
||||
for record in data:
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
|
||||
parsed_record = self._parse_single_historical_record(record)
|
||||
if parsed_record:
|
||||
parsed_data.append(parsed_record)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error parsing historical weather data", error=str(e))
|
||||
|
||||
return parsed_data
|
||||
|
||||
def parse_forecast_data(self, data: List[Dict[str, Any]], days: int) -> List[Dict[str, Any]]:
|
||||
"""Parse AEMET forecast data"""
|
||||
forecast = []
|
||||
base_date = datetime.now().date()
|
||||
|
||||
if not isinstance(data, list):
|
||||
logger.warning("Forecast data is not a list", data_type=type(data))
|
||||
return []
|
||||
|
||||
try:
|
||||
if len(data) > 0 and isinstance(data[0], dict):
|
||||
aemet_data = data[0]
|
||||
dias = aemet_data.get('prediccion', {}).get('dia', [])
|
||||
|
||||
if isinstance(dias, list) and len(dias) > 0:
|
||||
forecast = self._parse_forecast_days(dias, days, base_date)
|
||||
|
||||
# Fill remaining days with synthetic data if needed
|
||||
forecast = self._ensure_forecast_completeness(forecast, days)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error parsing AEMET forecast data", error=str(e))
|
||||
forecast = []
|
||||
|
||||
return forecast
|
||||
|
||||
def _parse_single_historical_record(self, record: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Parse a single historical weather record"""
|
||||
fecha_str = record.get('fecha')
|
||||
if not fecha_str:
|
||||
return None
|
||||
|
||||
try:
|
||||
record_date = datetime.strptime(fecha_str, '%Y-%m-%d')
|
||||
except ValueError:
|
||||
logger.warning("Invalid date format in historical data", fecha=fecha_str)
|
||||
return None
|
||||
|
||||
# Extract and calculate temperature
|
||||
temp_max = self.safe_float(record.get('tmax'))
|
||||
temp_min = self.safe_float(record.get('tmin'))
|
||||
temperature = self._calculate_average_temperature(temp_max, temp_min)
|
||||
|
||||
# Extract other weather parameters
|
||||
precipitation = self.safe_float(record.get('prec'), 0.0)
|
||||
humidity = self.safe_float(record.get('hr'))
|
||||
wind_speed = self.safe_float(record.get('velmedia'))
|
||||
pressure = self._extract_pressure(record)
|
||||
|
||||
return {
|
||||
"date": record_date,
|
||||
"temperature": temperature,
|
||||
"precipitation": precipitation,
|
||||
"humidity": humidity,
|
||||
"wind_speed": wind_speed,
|
||||
"pressure": pressure,
|
||||
"description": self.generate_weather_description(temperature, precipitation, humidity),
|
||||
"source": WeatherSource.AEMET.value
|
||||
}
|
||||
|
||||
def _calculate_average_temperature(self, temp_max: Optional[float], temp_min: Optional[float]) -> Optional[float]:
|
||||
"""Calculate average temperature from max and min values"""
|
||||
if temp_max and temp_min:
|
||||
return (temp_max + temp_min) / 2
|
||||
elif temp_max:
|
||||
return temp_max - 5 # Estimate average from max
|
||||
elif temp_min:
|
||||
return temp_min + 5 # Estimate average from min
|
||||
return None
|
||||
|
||||
def _extract_pressure(self, record: Dict[str, Any]) -> Optional[float]:
|
||||
"""Extract pressure from historical record"""
|
||||
pressure = self.safe_float(record.get('presMax'))
|
||||
if not pressure:
|
||||
pressure = self.safe_float(record.get('presMin'))
|
||||
return pressure
|
||||
|
||||
def _parse_forecast_days(self, dias: List[Dict[str, Any]], days: int, base_date: datetime.date) -> List[Dict[str, Any]]:
|
||||
"""Parse forecast days from AEMET data"""
|
||||
forecast = []
|
||||
|
||||
for i, dia in enumerate(dias[:days]):
|
||||
if not isinstance(dia, dict):
|
||||
continue
|
||||
|
||||
forecast_date = base_date + timedelta(days=i)
|
||||
forecast_day = self._parse_single_forecast_day(dia, forecast_date, i)
|
||||
forecast.append(forecast_day)
|
||||
|
||||
return forecast
|
||||
|
||||
def _parse_single_forecast_day(self, dia: Dict[str, Any], forecast_date: datetime.date, day_index: int) -> Dict[str, Any]:
|
||||
"""Parse a single forecast day"""
|
||||
# Extract temperature
|
||||
temp_data = dia.get('temperatura', {})
|
||||
avg_temp = self._extract_forecast_temperature(temp_data)
|
||||
|
||||
# Extract precipitation probability
|
||||
precip_prob = self._extract_precipitation_probability(dia.get('probPrecipitacion', []))
|
||||
|
||||
# Extract wind speed
|
||||
wind_speed = self._extract_wind_speed(dia.get('viento', []))
|
||||
|
||||
# Generate description
|
||||
description = self._generate_forecast_description(precip_prob)
|
||||
|
||||
return {
|
||||
"forecast_date": datetime.combine(forecast_date, datetime.min.time()),
|
||||
"generated_at": datetime.now(),
|
||||
"temperature": round(avg_temp, 1),
|
||||
"precipitation": precip_prob / 10, # Convert percentage to mm estimate
|
||||
"humidity": 50.0 + (day_index % 20), # Estimate
|
||||
"wind_speed": round(wind_speed, 1),
|
||||
"description": description,
|
||||
"source": WeatherSource.AEMET.value
|
||||
}
|
||||
|
||||
def _extract_forecast_temperature(self, temp_data: Dict[str, Any]) -> float:
|
||||
"""Extract temperature from forecast temperature data"""
|
||||
if isinstance(temp_data, dict):
|
||||
temp_max = self.extract_temperature_value(temp_data.get('maxima'))
|
||||
temp_min = self.extract_temperature_value(temp_data.get('minima'))
|
||||
if temp_max and temp_min:
|
||||
return (temp_max + temp_min) / 2
|
||||
return 15.0
|
||||
|
||||
def _extract_precipitation_probability(self, precip_data: List[Dict[str, Any]]) -> float:
|
||||
"""Extract precipitation probability from forecast data"""
|
||||
precip_prob = 0.0
|
||||
if isinstance(precip_data, list):
|
||||
for precip_item in precip_data:
|
||||
if isinstance(precip_item, dict) and 'value' in precip_item:
|
||||
precip_prob = max(precip_prob, self.safe_float(precip_item.get('value'), 0.0))
|
||||
return precip_prob
|
||||
|
||||
def _extract_wind_speed(self, viento_data: List[Dict[str, Any]]) -> float:
|
||||
"""Extract wind speed from forecast data"""
|
||||
wind_speed = 10.0
|
||||
if isinstance(viento_data, list):
|
||||
for viento_item in viento_data:
|
||||
if isinstance(viento_item, dict) and 'velocidad' in viento_item:
|
||||
speed_values = viento_item.get('velocidad', [])
|
||||
if isinstance(speed_values, list) and len(speed_values) > 0:
|
||||
wind_speed = self.safe_float(speed_values[0], 10.0)
|
||||
break
|
||||
return wind_speed
|
||||
|
||||
def _generate_forecast_description(self, precip_prob: float) -> str:
|
||||
"""Generate description based on precipitation probability"""
|
||||
if precip_prob > 70:
|
||||
return "Lluvioso"
|
||||
elif precip_prob > 30:
|
||||
return "Parcialmente nublado"
|
||||
else:
|
||||
return "Soleado"
|
||||
|
||||
def _ensure_forecast_completeness(self, forecast: List[Dict[str, Any]], days: int) -> List[Dict[str, Any]]:
|
||||
"""Ensure forecast has the requested number of days"""
|
||||
if len(forecast) < days:
|
||||
remaining_days = days - len(forecast)
|
||||
synthetic_generator = SyntheticWeatherGenerator()
|
||||
synthetic_forecast = synthetic_generator.generate_forecast_sync(remaining_days, len(forecast))
|
||||
forecast.extend(synthetic_forecast)
|
||||
|
||||
return forecast[:days]
|
||||
|
||||
def _get_default_weather_data(self) -> Dict[str, Any]:
|
||||
"""Get default weather data structure"""
|
||||
return {
|
||||
"date": datetime.now(),
|
||||
"temperature": 15.0,
|
||||
"precipitation": 0.0,
|
||||
"humidity": 50.0,
|
||||
"wind_speed": 10.0,
|
||||
"pressure": 1013.0,
|
||||
"description": "Data not available",
|
||||
"source": WeatherSource.DEFAULT.value
|
||||
}
|
||||
|
||||
|
||||
class SyntheticWeatherGenerator:
|
||||
"""Generates realistic synthetic weather data for Madrid"""
|
||||
|
||||
def generate_current_weather(self) -> Dict[str, Any]:
|
||||
"""Generate realistic synthetic current weather for Madrid"""
|
||||
now = datetime.now()
|
||||
month = now.month
|
||||
hour = now.hour
|
||||
|
||||
# Madrid climate simulation
|
||||
temperature = self._calculate_current_temperature(month, hour)
|
||||
precipitation = self._calculate_current_precipitation(now, month)
|
||||
|
||||
return {
|
||||
"date": now,
|
||||
"temperature": round(temperature, 1),
|
||||
"precipitation": precipitation,
|
||||
"humidity": 45 + (month % 6) * 5,
|
||||
"wind_speed": 8 + (hour % 12),
|
||||
"pressure": 1013 + math.sin(now.day * 0.2) * 15,
|
||||
"description": "Lluvioso" if precipitation > 0 else "Soleado",
|
||||
"source": WeatherSource.SYNTHETIC.value
|
||||
}
|
||||
|
||||
def generate_forecast_sync(self, days: int, start_offset: int = 0) -> List[Dict[str, Any]]:
|
||||
"""Generate synthetic forecast data synchronously"""
|
||||
forecast = []
|
||||
base_date = datetime.now().date()
|
||||
|
||||
for i in range(days):
|
||||
forecast_date = base_date + timedelta(days=start_offset + i)
|
||||
forecast_day = self._generate_forecast_day(forecast_date, start_offset + i)
|
||||
forecast.append(forecast_day)
|
||||
|
||||
return forecast
|
||||
|
||||
async def generate_forecast(self, days: int) -> List[Dict[str, Any]]:
|
||||
"""Generate synthetic forecast data (async version for compatibility)"""
|
||||
return self.generate_forecast_sync(days, 0)
|
||||
|
||||
def generate_historical_data(self, start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Generate synthetic historical weather data"""
|
||||
historical_data = []
|
||||
current_date = start_date
|
||||
|
||||
while current_date <= end_date:
|
||||
historical_day = self._generate_historical_day(current_date)
|
||||
historical_data.append(historical_day)
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
return historical_data
|
||||
|
||||
def _calculate_current_temperature(self, month: int, hour: int) -> float:
|
||||
"""Calculate current temperature based on seasonal and daily patterns"""
|
||||
base_temp = AEMETConstants.BASE_TEMPERATURE_SEASONAL + (month - 1) * AEMETConstants.TEMPERATURE_SEASONAL_MULTIPLIER
|
||||
temp_variation = math.sin((hour - 6) * math.pi / 12) * AEMETConstants.DAILY_TEMPERATURE_AMPLITUDE
|
||||
return base_temp + temp_variation
|
||||
|
||||
def _calculate_current_precipitation(self, now: datetime, month: int) -> float:
|
||||
"""Calculate current precipitation based on seasonal patterns"""
|
||||
rain_prob = 0.3 if month in [11, 12, 1, 2, 3] else 0.1
|
||||
return 2.5 if hash(now.date()) % 100 < rain_prob * 100 else 0.0
|
||||
|
||||
def _generate_forecast_day(self, forecast_date: datetime.date, day_offset: int) -> Dict[str, Any]:
|
||||
"""Generate a single forecast day"""
|
||||
month = forecast_date.month
|
||||
base_temp = AEMETConstants.BASE_TEMPERATURE_SEASONAL + (month - 1) * AEMETConstants.TEMPERATURE_SEASONAL_MULTIPLIER
|
||||
temp_variation = ((day_offset) % 7 - 3) * 2 # Weekly variation
|
||||
|
||||
return {
|
||||
"forecast_date": datetime.combine(forecast_date, datetime.min.time()),
|
||||
"generated_at": datetime.now(),
|
||||
"temperature": round(base_temp + temp_variation, 1),
|
||||
"precipitation": 2.0 if day_offset % 5 == 0 else 0.0,
|
||||
"humidity": 50 + (day_offset % 30),
|
||||
"wind_speed": 10 + (day_offset % 15),
|
||||
"description": "Lluvioso" if day_offset % 5 == 0 else "Soleado",
|
||||
"source": WeatherSource.SYNTHETIC.value
|
||||
}
|
||||
|
||||
def _generate_historical_day(self, date: datetime) -> Dict[str, Any]:
|
||||
"""Generate a single historical day"""
|
||||
month = date.month
|
||||
base_temp = AEMETConstants.BASE_TEMPERATURE_SEASONAL + (month - 1) * AEMETConstants.TEMPERATURE_SEASONAL_MULTIPLIER
|
||||
temp_variation = math.sin(date.day * 0.3) * 5
|
||||
|
||||
return {
|
||||
"date": date,
|
||||
"temperature": round(base_temp + temp_variation, 1),
|
||||
"precipitation": 1.5 if date.day % 7 == 0 else 0.0,
|
||||
"humidity": 45 + (date.day % 40),
|
||||
"wind_speed": 8 + (date.day % 20),
|
||||
"pressure": 1013 + math.sin(date.day * 0.2) * 20,
|
||||
"description": "Variable",
|
||||
"source": WeatherSource.SYNTHETIC.value
|
||||
}
|
||||
|
||||
|
||||
class LocationService:
|
||||
"""Handles location-related operations"""
|
||||
|
||||
@staticmethod
|
||||
def find_nearest_station(latitude: float, longitude: float) -> Optional[str]:
|
||||
"""Find nearest weather station to given coordinates"""
|
||||
try:
|
||||
# Check if coordinates are reasonable (not extreme values)
|
||||
if not (-90 <= latitude <= 90 and -180 <= longitude <= 180):
|
||||
logger.warning("Invalid coordinate range", lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
# Check if coordinates are too far from Madrid area (more than 1000km away)
|
||||
madrid_center = (40.4168, -3.7038)
|
||||
distance_to_madrid = LocationService.calculate_distance(
|
||||
latitude, longitude, madrid_center[0], madrid_center[1]
|
||||
)
|
||||
|
||||
if distance_to_madrid > 1000: # More than 1000km from Madrid
|
||||
logger.warning("Coordinates too far from Madrid",
|
||||
lat=latitude, lon=longitude, distance_km=distance_to_madrid)
|
||||
return None
|
||||
|
||||
closest_station = None
|
||||
min_distance = float('inf')
|
||||
|
||||
for station in AEMETConstants.MADRID_STATIONS:
|
||||
distance = LocationService.calculate_distance(
|
||||
latitude, longitude, station.latitude, station.longitude
|
||||
)
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
closest_station = station.id
|
||||
|
||||
return closest_station
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to find nearest station", error=str(e))
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_municipality_code(latitude: float, longitude: float) -> Optional[str]:
|
||||
"""Get municipality code for coordinates"""
|
||||
if AEMETConstants.MADRID_BOUNDS.contains(latitude, longitude):
|
||||
return AEMETConstants.MADRID_MUNICIPALITY_CODE
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def calculate_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Calculate distance between two coordinates using Haversine formula"""
|
||||
dlat = math.radians(lat2 - lat1)
|
||||
dlon = math.radians(lon2 - lon1)
|
||||
|
||||
a = (math.sin(dlat/2) * math.sin(dlat/2) +
|
||||
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
|
||||
math.sin(dlon/2) * math.sin(dlon/2))
|
||||
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
|
||||
return AEMETConstants.EARTH_RADIUS_KM * c
|
||||
|
||||
|
||||
class AEMETClient(BaseAPIClient):
|
||||
"""AEMET (Spanish Weather Service) API client with improved modularity"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
base_url="https://opendata.aemet.es/opendata/api",
|
||||
api_key=settings.AEMET_API_KEY
|
||||
)
|
||||
self.parser = WeatherDataParser()
|
||||
self.synthetic_generator = SyntheticWeatherGenerator()
|
||||
self.location_service = LocationService()
|
||||
|
||||
async def get_current_weather(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get current weather for coordinates"""
|
||||
try:
|
||||
station_id = self.location_service.find_nearest_station(latitude, longitude)
|
||||
if not station_id:
|
||||
logger.warning("No weather station found", lat=latitude, lon=longitude)
|
||||
return await self._get_synthetic_current_weather()
|
||||
|
||||
weather_data = await self._fetch_current_weather_data(station_id)
|
||||
if weather_data:
|
||||
return self.parser.parse_current_weather(weather_data)
|
||||
|
||||
logger.info("Falling back to synthetic weather data", reason="invalid_weather_data")
|
||||
return await self._get_synthetic_current_weather()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get current weather", error=str(e))
|
||||
return await self._get_synthetic_current_weather()
|
||||
|
||||
async def get_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[Dict[str, Any]]:
|
||||
"""Get weather forecast for coordinates"""
|
||||
try:
|
||||
municipality_code = self.location_service.get_municipality_code(latitude, longitude)
|
||||
if not municipality_code:
|
||||
logger.info("No municipality code found, using synthetic data")
|
||||
return await self.synthetic_generator.generate_forecast(days)
|
||||
|
||||
forecast_data = await self._fetch_forecast_data(municipality_code)
|
||||
if forecast_data:
|
||||
parsed_forecast = self.parser.parse_forecast_data(forecast_data, days)
|
||||
if parsed_forecast:
|
||||
return parsed_forecast
|
||||
|
||||
logger.info("Falling back to synthetic forecast data", reason="invalid_forecast_data")
|
||||
return await self.synthetic_generator.generate_forecast(days)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get weather forecast", error=str(e))
|
||||
return await self.synthetic_generator.generate_forecast(days)
|
||||
|
||||
async def get_historical_weather(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Get historical weather data"""
|
||||
try:
|
||||
logger.debug("Getting historical weather from AEMET API",
|
||||
lat=latitude, lon=longitude,
|
||||
start=start_date, end=end_date)
|
||||
|
||||
station_id = self.location_service.find_nearest_station(latitude, longitude)
|
||||
if not station_id:
|
||||
logger.warning("No weather station found for historical data",
|
||||
lat=latitude, lon=longitude)
|
||||
return self.synthetic_generator.generate_historical_data(start_date, end_date)
|
||||
|
||||
historical_data = await self._fetch_historical_data_in_chunks(
|
||||
station_id, start_date, end_date
|
||||
)
|
||||
|
||||
if historical_data:
|
||||
logger.debug("Successfully fetched historical weather data",
|
||||
total_count=len(historical_data))
|
||||
return historical_data
|
||||
else:
|
||||
logger.info("No real historical data available, using synthetic data")
|
||||
return self.synthetic_generator.generate_historical_data(start_date, end_date)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get historical weather from AEMET API", error=str(e))
|
||||
return self.synthetic_generator.generate_historical_data(start_date, end_date)
|
||||
|
||||
async def _fetch_current_weather_data(self, station_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch current weather data from AEMET API"""
|
||||
endpoint = f"/observacion/convencional/datos/estacion/{station_id}"
|
||||
initial_response = await self._get(endpoint)
|
||||
|
||||
if not self._is_valid_initial_response(initial_response):
|
||||
return None
|
||||
|
||||
datos_url = initial_response.get("datos")
|
||||
actual_weather_data = await self._fetch_from_url(datos_url)
|
||||
|
||||
if (actual_weather_data and isinstance(actual_weather_data, list)
|
||||
and len(actual_weather_data) > 0):
|
||||
return actual_weather_data[0]
|
||||
|
||||
return None
|
||||
|
||||
async def _fetch_forecast_data(self, municipality_code: str) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Fetch forecast data from AEMET API"""
|
||||
endpoint = f"/prediccion/especifica/municipio/diaria/{municipality_code}"
|
||||
initial_response = await self._get(endpoint)
|
||||
|
||||
if not self._is_valid_initial_response(initial_response):
|
||||
return None
|
||||
|
||||
datos_url = initial_response.get("datos")
|
||||
return await self._fetch_from_url(datos_url)
|
||||
|
||||
async def _fetch_historical_data_in_chunks(self,
|
||||
station_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Fetch historical data in chunks due to AEMET API limitations"""
|
||||
historical_data = []
|
||||
current_date = start_date
|
||||
|
||||
while current_date <= end_date:
|
||||
chunk_end_date = min(
|
||||
current_date + timedelta(days=AEMETConstants.MAX_DAYS_PER_REQUEST),
|
||||
end_date
|
||||
)
|
||||
|
||||
chunk_data = await self._fetch_historical_chunk(
|
||||
station_id, current_date, chunk_end_date
|
||||
)
|
||||
|
||||
if chunk_data:
|
||||
historical_data.extend(chunk_data)
|
||||
|
||||
current_date = chunk_end_date + timedelta(days=1)
|
||||
|
||||
return historical_data
|
||||
|
||||
async def _fetch_historical_chunk(self,
|
||||
station_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Fetch a single chunk of historical data"""
|
||||
start_str = start_date.strftime("%Y-%m-%dT00:00:00UTC")
|
||||
end_str = end_date.strftime("%Y-%m-%dT23:59:59UTC")
|
||||
|
||||
endpoint = f"/valores/climatologicos/diarios/datos/fechaini/{start_str}/fechafin/{end_str}/estacion/{station_id}"
|
||||
initial_response = await self._get(endpoint)
|
||||
|
||||
if not self._is_valid_initial_response(initial_response):
|
||||
logger.warning("Invalid initial response from AEMET historical API",
|
||||
start=start_str, end=end_str)
|
||||
return []
|
||||
|
||||
datos_url = initial_response.get("datos")
|
||||
if not datos_url:
|
||||
logger.warning("No datos URL in AEMET historical response",
|
||||
start=start_str, end=end_str)
|
||||
return []
|
||||
|
||||
actual_historical_data = await self._fetch_from_url(datos_url)
|
||||
|
||||
if actual_historical_data and isinstance(actual_historical_data, list):
|
||||
chunk_data = self.parser.parse_historical_data(actual_historical_data)
|
||||
logger.debug("Fetched historical data chunk",
|
||||
count=len(chunk_data), start=start_str, end=end_str)
|
||||
return chunk_data
|
||||
else:
|
||||
logger.warning("No valid historical data received for chunk",
|
||||
start=start_str, end=end_str)
|
||||
return []
|
||||
|
||||
async def _fetch_from_url(self, url: str) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Fetch data from AEMET datos URL"""
|
||||
try:
|
||||
data = await self._fetch_url_directly(url)
|
||||
|
||||
if data and isinstance(data, list):
|
||||
return data
|
||||
else:
|
||||
logger.warning("Expected list from datos URL", data_type=type(data))
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to fetch from datos URL", url=url, error=str(e))
|
||||
return None
|
||||
|
||||
def _is_valid_initial_response(self, response: Any) -> bool:
|
||||
"""Check if initial AEMET API response is valid"""
|
||||
return (response and isinstance(response, dict) and
|
||||
response.get("datos") and isinstance(response.get("datos"), str))
|
||||
|
||||
async def _get_synthetic_current_weather(self) -> Dict[str, Any]:
|
||||
"""Get synthetic current weather data"""
|
||||
return self.synthetic_generator.generate_current_weather()
|
||||
10
services/external/app/external/apis/__init__.py
vendored
Normal file
10
services/external/app/external/apis/__init__.py
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/apis/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
External API clients module - Scalable architecture for multiple cities
|
||||
"""
|
||||
|
||||
from .traffic import TrafficAPIClientFactory
|
||||
|
||||
__all__ = ["TrafficAPIClientFactory"]
|
||||
350
services/external/app/external/apis/madrid_traffic_client.py
vendored
Normal file
350
services/external/app/external/apis/madrid_traffic_client.py
vendored
Normal file
@@ -0,0 +1,350 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/apis/madrid_traffic_client.py
|
||||
# ================================================================
|
||||
"""
|
||||
Madrid traffic client - Orchestration layer only
|
||||
Coordinates between HTTP client, data processor, and business logic components
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
|
||||
from .traffic import BaseTrafficClient, SupportedCity
|
||||
from ..base_client import BaseAPIClient
|
||||
from ..clients.madrid_client import MadridTrafficAPIClient
|
||||
from ..processors.madrid_processor import MadridTrafficDataProcessor
|
||||
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
|
||||
from ..models.madrid_models import TrafficRecord, CongestionLevel
|
||||
|
||||
class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
"""
|
||||
Enhanced Madrid traffic client - Orchestration layer
|
||||
Coordinates HTTP, processing, and business logic components
|
||||
"""
|
||||
|
||||
# Madrid geographic bounds
|
||||
MADRID_BOUNDS = {
|
||||
'lat_min': 40.31, 'lat_max': 40.56,
|
||||
'lon_min': -3.89, 'lon_max': -3.51
|
||||
}
|
||||
|
||||
# Configuration constants
|
||||
MAX_HISTORICAL_DAYS = 1095 # 3 years
|
||||
MAX_CSV_PROCESSING_ROWS = 5000000
|
||||
MEASUREMENT_POINTS_LIMIT = 20
|
||||
|
||||
def __init__(self):
|
||||
BaseTrafficClient.__init__(self, SupportedCity.MADRID)
|
||||
BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")
|
||||
|
||||
# Initialize components
|
||||
self.api_client = MadridTrafficAPIClient()
|
||||
self.processor = MadridTrafficDataProcessor()
|
||||
self.analyzer = MadridTrafficAnalyzer()
|
||||
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
def supports_location(self, latitude: float, longitude: float) -> bool:
|
||||
"""Check if location is within Madrid bounds"""
|
||||
return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
|
||||
self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])
|
||||
|
||||
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get current traffic data with enhanced pedestrian inference"""
|
||||
try:
|
||||
if not self.supports_location(latitude, longitude):
|
||||
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
# Fetch XML data
|
||||
xml_content = await self.api_client.fetch_current_traffic_xml()
|
||||
if not xml_content:
|
||||
self.logger.warning("No XML content received")
|
||||
return None
|
||||
|
||||
# Parse XML data
|
||||
traffic_points = self.processor.parse_traffic_xml(xml_content)
|
||||
if not traffic_points:
|
||||
self.logger.warning("No traffic points found in XML")
|
||||
return None
|
||||
|
||||
# Find nearest traffic point
|
||||
nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
|
||||
if not nearest_point:
|
||||
self.logger.warning("No nearby traffic points found")
|
||||
return None
|
||||
|
||||
# Enhance with business logic
|
||||
enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)
|
||||
|
||||
self.logger.info("Current traffic data retrieved",
|
||||
point_id=nearest_point.get('measurement_point_id'),
|
||||
distance=enhanced_data.get('distance_km', 0))
|
||||
|
||||
return enhanced_data
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error getting current traffic", error=str(e))
|
||||
return None
|
||||
|
||||
async def get_historical_traffic(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Get historical traffic data with pedestrian enhancement"""
|
||||
try:
|
||||
if not self.supports_location(latitude, longitude):
|
||||
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
# Validate date range
|
||||
if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
|
||||
self.logger.warning("Date range too large, truncating",
|
||||
requested_days=(end_date - start_date).days,
|
||||
max_days=self.MAX_HISTORICAL_DAYS)
|
||||
start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)
|
||||
|
||||
# Fetch measurement points registry
|
||||
csv_content = await self.api_client.fetch_measurement_points_csv()
|
||||
if not csv_content:
|
||||
self.logger.error("Failed to fetch measurement points registry")
|
||||
return []
|
||||
|
||||
# Parse measurement points
|
||||
measurement_points = self.processor.parse_measurement_points_csv(csv_content)
|
||||
if not measurement_points:
|
||||
self.logger.error("No measurement points found")
|
||||
return []
|
||||
|
||||
# Find nearest measurement points
|
||||
nearest_points = self.analyzer.find_nearest_measurement_points(
|
||||
measurement_points, latitude, longitude, num_points=3
|
||||
)
|
||||
|
||||
if not nearest_points:
|
||||
self.logger.warning("No nearby measurement points found")
|
||||
return []
|
||||
|
||||
# Process historical data
|
||||
historical_records = await self._fetch_historical_data_enhanced(
|
||||
latitude, longitude, start_date, end_date, nearest_points
|
||||
)
|
||||
|
||||
self.logger.info("Historical traffic data retrieved",
|
||||
records_count=len(historical_records),
|
||||
date_range=f"{start_date.date()} to {end_date.date()}")
|
||||
|
||||
return historical_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error getting historical traffic", error=str(e))
|
||||
return []
|
||||
|
||||
async def get_events(self, latitude: float, longitude: float,
|
||||
radius_km: float = 5.0) -> List[Dict[str, Any]]:
|
||||
"""Get traffic events (incidents, construction, etc.)"""
|
||||
# Madrid doesn't provide separate events endpoint
|
||||
# Return enhanced current traffic data as events
|
||||
current_data = await self.get_current_traffic(latitude, longitude)
|
||||
if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
|
||||
return [{
|
||||
'type': 'congestion',
|
||||
'severity': current_data.get('congestion_level'),
|
||||
'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
|
||||
'location': {
|
||||
'latitude': current_data.get('latitude'),
|
||||
'longitude': current_data.get('longitude')
|
||||
},
|
||||
'timestamp': current_data.get('timestamp')
|
||||
}]
|
||||
return []
|
||||
|
||||
|
||||
async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],
|
||||
query_lat: float, query_lon: float) -> Dict[str, Any]:
|
||||
"""Enhance traffic data with business logic and pedestrian inference"""
|
||||
# Calculate distance
|
||||
distance_km = self.analyzer.calculate_distance(
|
||||
query_lat, query_lon,
|
||||
traffic_point.get('latitude', 0),
|
||||
traffic_point.get('longitude', 0)
|
||||
)
|
||||
|
||||
# Classify road type
|
||||
road_type = self.analyzer.classify_road_type(
|
||||
traffic_point.get('measurement_point_name', '')
|
||||
)
|
||||
|
||||
# Get congestion level
|
||||
congestion_level = self.analyzer.get_congestion_level(
|
||||
traffic_point.get('ocupacion', 0)
|
||||
)
|
||||
|
||||
# Create traffic record for pedestrian inference
|
||||
traffic_record = TrafficRecord(
|
||||
date=datetime.now(timezone.utc),
|
||||
traffic_volume=traffic_point.get('intensidad', 0),
|
||||
occupation_percentage=int(traffic_point.get('ocupacion', 0)),
|
||||
load_percentage=traffic_point.get('carga', 0),
|
||||
average_speed=30, # Default speed
|
||||
congestion_level=congestion_level,
|
||||
pedestrian_count=0, # Will be calculated
|
||||
measurement_point_id=traffic_point.get('measurement_point_id', ''),
|
||||
measurement_point_name=traffic_point.get('measurement_point_name', ''),
|
||||
road_type=road_type,
|
||||
source='madrid_current_xml'
|
||||
)
|
||||
|
||||
# Calculate pedestrian count
|
||||
location_context = {
|
||||
'latitude': traffic_point.get('latitude'),
|
||||
'longitude': traffic_point.get('longitude'),
|
||||
'measurement_point_name': traffic_point.get('measurement_point_name')
|
||||
}
|
||||
|
||||
pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
|
||||
traffic_record, location_context
|
||||
)
|
||||
|
||||
# Build enhanced response
|
||||
enhanced_data = {
|
||||
'timestamp': datetime.now(timezone.utc),
|
||||
'latitude': traffic_point.get('latitude'),
|
||||
'longitude': traffic_point.get('longitude'),
|
||||
'measurement_point_id': traffic_point.get('measurement_point_id'),
|
||||
'measurement_point_name': traffic_point.get('measurement_point_name'),
|
||||
'traffic_volume': traffic_point.get('intensidad', 0),
|
||||
'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
|
||||
'load_percentage': traffic_point.get('carga', 0),
|
||||
'congestion_level': congestion_level,
|
||||
'pedestrian_count': pedestrian_count,
|
||||
'road_type': road_type,
|
||||
'distance_km': distance_km,
|
||||
'source': 'madrid_current_xml',
|
||||
'city': 'madrid',
|
||||
'inference_metadata': inference_metadata,
|
||||
'raw_data': traffic_point
|
||||
}
|
||||
|
||||
return enhanced_data
|
||||
|
||||
async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime,
|
||||
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
|
||||
"""Fetch and process historical traffic data"""
|
||||
historical_records = []
|
||||
|
||||
try:
|
||||
# Process by year and month to avoid memory issues
|
||||
current_date = start_date.replace(day=1) # Start from beginning of month
|
||||
|
||||
while current_date <= end_date:
|
||||
year = current_date.year
|
||||
month = current_date.month
|
||||
|
||||
# Build historical URL
|
||||
zip_url = self.api_client._build_historical_url(year, month)
|
||||
|
||||
self.logger.info("Processing historical ZIP file",
|
||||
year=year, month=month, zip_url=zip_url)
|
||||
|
||||
# Fetch ZIP content
|
||||
zip_content = await self.api_client.fetch_historical_zip(zip_url)
|
||||
if not zip_content:
|
||||
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
|
||||
current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
|
||||
continue
|
||||
|
||||
# Process ZIP content with enhanced parsing
|
||||
month_records = await self._process_historical_zip_enhanced(
|
||||
zip_content, zip_url, latitude, longitude, nearest_points
|
||||
)
|
||||
|
||||
# Filter by date range - ensure timezone consistency
|
||||
# Make sure start_date and end_date have timezone info for comparison
|
||||
start_tz = start_date if start_date.tzinfo else start_date.replace(tzinfo=timezone.utc)
|
||||
end_tz = end_date if end_date.tzinfo else end_date.replace(tzinfo=timezone.utc)
|
||||
|
||||
filtered_records = []
|
||||
for record in month_records:
|
||||
record_date = record.get('date')
|
||||
if not record_date:
|
||||
continue
|
||||
|
||||
# Ensure record date has timezone info
|
||||
if not record_date.tzinfo:
|
||||
record_date = record_date.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Now compare with consistent timezone info
|
||||
if start_tz <= record_date <= end_tz:
|
||||
filtered_records.append(record)
|
||||
|
||||
historical_records.extend(filtered_records)
|
||||
|
||||
self.logger.info("Month processing completed",
|
||||
year=year, month=month,
|
||||
month_records=len(month_records),
|
||||
filtered_records=len(filtered_records),
|
||||
total_records=len(historical_records))
|
||||
|
||||
# Move to next month
|
||||
if current_date.month == 12:
|
||||
current_date = current_date.replace(year=current_date.year + 1, month=1)
|
||||
else:
|
||||
current_date = current_date.replace(month=current_date.month + 1)
|
||||
|
||||
return historical_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching historical data", error=str(e))
|
||||
return historical_records # Return partial results
|
||||
|
||||
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
|
||||
latitude: float, longitude: float,
|
||||
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
|
||||
"""Process historical ZIP file with enhanced parsing"""
|
||||
try:
|
||||
import zipfile
|
||||
import io
|
||||
import csv
|
||||
import gc
|
||||
|
||||
historical_records = []
|
||||
nearest_ids = {p[0] for p in nearest_points}
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
|
||||
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
|
||||
|
||||
for csv_filename in csv_files:
|
||||
try:
|
||||
# Read CSV content
|
||||
with zip_file.open(csv_filename) as csv_file:
|
||||
text_content = csv_file.read().decode('utf-8', errors='ignore')
|
||||
|
||||
# Process CSV in chunks using processor
|
||||
csv_records = await self.processor.process_csv_content_chunked(
|
||||
text_content, csv_filename, nearest_ids, nearest_points
|
||||
)
|
||||
|
||||
historical_records.extend(csv_records)
|
||||
|
||||
# Force garbage collection
|
||||
gc.collect()
|
||||
|
||||
except Exception as csv_error:
|
||||
self.logger.warning("Error processing CSV file",
|
||||
filename=csv_filename,
|
||||
error=str(csv_error))
|
||||
continue
|
||||
|
||||
self.logger.info("Historical ZIP processing completed",
|
||||
zip_url=zip_url,
|
||||
total_records=len(historical_records))
|
||||
|
||||
return historical_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error processing historical ZIP file",
|
||||
zip_url=zip_url, error=str(e))
|
||||
return []
|
||||
|
||||
|
||||
257
services/external/app/external/apis/traffic.py
vendored
Normal file
257
services/external/app/external/apis/traffic.py
vendored
Normal file
@@ -0,0 +1,257 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/apis/traffic.py
|
||||
# ================================================================
|
||||
"""
|
||||
Traffic API abstraction layer for multiple cities
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class SupportedCity(Enum):
|
||||
"""Supported cities for traffic data collection"""
|
||||
MADRID = "madrid"
|
||||
BARCELONA = "barcelona"
|
||||
VALENCIA = "valencia"
|
||||
|
||||
|
||||
class BaseTrafficClient(ABC):
|
||||
"""
|
||||
Abstract base class for city-specific traffic clients
|
||||
Defines the contract that all traffic clients must implement
|
||||
"""
|
||||
|
||||
def __init__(self, city: SupportedCity):
|
||||
self.city = city
|
||||
self.logger = structlog.get_logger().bind(city=city.value)
|
||||
|
||||
@abstractmethod
|
||||
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get current traffic data for location"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_historical_traffic(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Get historical traffic data"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
|
||||
"""Get traffic incidents and events"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def supports_location(self, latitude: float, longitude: float) -> bool:
|
||||
"""Check if this client supports the given location"""
|
||||
pass
|
||||
|
||||
|
||||
class TrafficAPIClientFactory:
|
||||
"""
|
||||
Factory class to create appropriate traffic clients based on location
|
||||
"""
|
||||
|
||||
# City geographical bounds
|
||||
CITY_BOUNDS = {
|
||||
SupportedCity.MADRID: {
|
||||
'lat_min': 40.31, 'lat_max': 40.56,
|
||||
'lon_min': -3.89, 'lon_max': -3.51
|
||||
},
|
||||
SupportedCity.BARCELONA: {
|
||||
'lat_min': 41.32, 'lat_max': 41.47,
|
||||
'lon_min': 2.05, 'lon_max': 2.25
|
||||
},
|
||||
SupportedCity.VALENCIA: {
|
||||
'lat_min': 39.42, 'lat_max': 39.52,
|
||||
'lon_min': -0.42, 'lon_max': -0.32
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_client_for_location(cls, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
|
||||
"""
|
||||
Get appropriate traffic client for given location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
|
||||
Returns:
|
||||
BaseTrafficClient instance or None if location not supported
|
||||
"""
|
||||
try:
|
||||
# Check each city's bounds
|
||||
for city, bounds in cls.CITY_BOUNDS.items():
|
||||
if (bounds['lat_min'] <= latitude <= bounds['lat_max'] and
|
||||
bounds['lon_min'] <= longitude <= bounds['lon_max']):
|
||||
|
||||
logger.info("Location matched to city",
|
||||
city=city.value, lat=latitude, lon=longitude)
|
||||
return cls._create_client(city)
|
||||
|
||||
# If no specific city matches, try to find closest supported city
|
||||
closest_city = cls._find_closest_city(latitude, longitude)
|
||||
if closest_city:
|
||||
logger.info("Using closest city for location",
|
||||
closest_city=closest_city.value, lat=latitude, lon=longitude)
|
||||
return cls._create_client(closest_city)
|
||||
|
||||
logger.warning("No traffic client available for location",
|
||||
lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting traffic client for location",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _create_client(cls, city: SupportedCity) -> BaseTrafficClient:
|
||||
"""Create traffic client for specific city"""
|
||||
if city == SupportedCity.MADRID:
|
||||
from .madrid_traffic_client import MadridTrafficClient
|
||||
return MadridTrafficClient()
|
||||
elif city == SupportedCity.BARCELONA:
|
||||
# Future implementation
|
||||
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
|
||||
elif city == SupportedCity.VALENCIA:
|
||||
# Future implementation
|
||||
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
|
||||
else:
|
||||
raise ValueError(f"Unsupported city: {city}")
|
||||
|
||||
@classmethod
|
||||
def _find_closest_city(cls, latitude: float, longitude: float) -> Optional[SupportedCity]:
|
||||
"""Find closest supported city to given coordinates"""
|
||||
import math
|
||||
|
||||
def distance(lat1, lon1, lat2, lon2):
|
||||
"""Calculate distance between two coordinates"""
|
||||
R = 6371 # Earth's radius in km
|
||||
dlat = math.radians(lat2 - lat1)
|
||||
dlon = math.radians(lon2 - lon1)
|
||||
a = (math.sin(dlat/2) * math.sin(dlat/2) +
|
||||
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
|
||||
math.sin(dlon/2) * math.sin(dlon/2))
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
|
||||
return R * c
|
||||
|
||||
min_distance = float('inf')
|
||||
closest_city = None
|
||||
|
||||
# City centers for distance calculation
|
||||
city_centers = {
|
||||
SupportedCity.MADRID: (40.4168, -3.7038),
|
||||
SupportedCity.BARCELONA: (41.3851, 2.1734),
|
||||
SupportedCity.VALENCIA: (39.4699, -0.3763)
|
||||
}
|
||||
|
||||
for city, (city_lat, city_lon) in city_centers.items():
|
||||
dist = distance(latitude, longitude, city_lat, city_lon)
|
||||
if dist < min_distance and dist < 100: # Within 100km
|
||||
min_distance = dist
|
||||
closest_city = city
|
||||
|
||||
return closest_city
|
||||
|
||||
@classmethod
|
||||
def get_supported_cities(cls) -> List[Dict[str, Any]]:
|
||||
"""Get list of supported cities with their bounds"""
|
||||
cities = []
|
||||
for city, bounds in cls.CITY_BOUNDS.items():
|
||||
cities.append({
|
||||
"city": city.value,
|
||||
"bounds": bounds,
|
||||
"status": "active" if city == SupportedCity.MADRID else "planned"
|
||||
})
|
||||
return cities
|
||||
|
||||
|
||||
class UniversalTrafficClient:
|
||||
"""
|
||||
Universal traffic client that delegates to appropriate city-specific clients
|
||||
This is the main interface that external services should use
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.factory = TrafficAPIClientFactory()
|
||||
self.client_cache = {} # Cache clients for performance
|
||||
|
||||
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get current traffic data for any supported location"""
|
||||
try:
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return await client.get_current_traffic(latitude, longitude)
|
||||
else:
|
||||
logger.warning("No traffic data available for location",
|
||||
lat=latitude, lon=longitude)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Error getting current traffic",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return None
|
||||
|
||||
async def get_historical_traffic(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Get historical traffic data for any supported location"""
|
||||
try:
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return await client.get_historical_traffic(latitude, longitude, start_date, end_date)
|
||||
else:
|
||||
logger.warning("No historical traffic data available for location",
|
||||
lat=latitude, lon=longitude)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error("Error getting historical traffic",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
|
||||
"""Get traffic events for any supported location"""
|
||||
try:
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return await client.get_events(latitude, longitude, radius_km)
|
||||
else:
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error("Error getting traffic events",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def _get_client_for_location(self, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
|
||||
"""Get cached or create new client for location"""
|
||||
cache_key = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
if cache_key not in self.client_cache:
|
||||
client = self.factory.get_client_for_location(latitude, longitude)
|
||||
self.client_cache[cache_key] = client
|
||||
|
||||
return self.client_cache[cache_key]
|
||||
|
||||
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
|
||||
"""Get information about traffic data availability for location"""
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return {
|
||||
"supported": True,
|
||||
"city": client.city.value,
|
||||
"features": ["current_traffic", "historical_traffic", "events"]
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"supported": False,
|
||||
"city": None,
|
||||
"features": [],
|
||||
"message": "No traffic data available for this location"
|
||||
}
|
||||
139
services/external/app/external/base_client.py
vendored
Normal file
139
services/external/app/external/base_client.py
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/base_client.py
|
||||
# ================================================================
|
||||
"""Base HTTP client for external APIs - Enhanced for AEMET"""
|
||||
|
||||
import httpx
|
||||
from typing import Dict, Any, Optional
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class BaseAPIClient:
|
||||
|
||||
def __init__(self, base_url: str, api_key: Optional[str] = None):
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.timeout = httpx.Timeout(30.0)
|
||||
|
||||
async def _get(self, endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Make GET request"""
|
||||
try:
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
# Add API key to params for AEMET (not headers)
|
||||
request_params = params or {}
|
||||
if self.api_key:
|
||||
request_params["api_key"] = self.api_key
|
||||
|
||||
# Add headers if provided
|
||||
request_headers = headers or {}
|
||||
|
||||
logger.debug("Making API request", url=url, params=request_params)
|
||||
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(url, params=request_params, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Log response for debugging
|
||||
response_data = response.json()
|
||||
logger.debug("API response received",
|
||||
status_code=response.status_code,
|
||||
response_keys=list(response_data.keys()) if isinstance(response_data, dict) else "non-dict")
|
||||
|
||||
return response_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error", status_code=e.response.status_code, url=url, response_text=e.response.text[:200])
|
||||
return None
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error", error=str(e), url=url)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error", error=str(e), url=url)
|
||||
return None
|
||||
|
||||
async def _fetch_url_directly(self, url: str, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch data directly from a full URL (for AEMET datos URLs)"""
|
||||
try:
|
||||
request_headers = headers or {}
|
||||
|
||||
logger.debug("Making direct URL request", url=url)
|
||||
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(url, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Handle encoding issues common with Spanish data sources
|
||||
try:
|
||||
response_data = response.json()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
|
||||
# Try common Spanish encodings
|
||||
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
|
||||
try:
|
||||
text_content = response.content.decode(encoding)
|
||||
import json
|
||||
response_data = json.loads(text_content)
|
||||
logger.info("Successfully decoded with encoding", encoding=encoding)
|
||||
break
|
||||
except (UnicodeDecodeError, json.JSONDecodeError):
|
||||
continue
|
||||
else:
|
||||
logger.error("Failed to decode response with any encoding", url=url)
|
||||
return None
|
||||
|
||||
logger.debug("Direct URL response received",
|
||||
status_code=response.status_code,
|
||||
data_type=type(response_data),
|
||||
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
|
||||
|
||||
return response_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error in direct fetch", status_code=e.response.status_code, url=url)
|
||||
return None
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error in direct fetch", error=str(e), url=url)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in direct fetch", error=str(e), url=url)
|
||||
return None
|
||||
|
||||
async def _post(self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Make POST request"""
|
||||
try:
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
request_headers = headers or {}
|
||||
if self.api_key:
|
||||
request_headers["Authorization"] = f"Bearer {self.api_key}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(url, json=data, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error", status_code=e.response.status_code, url=url)
|
||||
return None
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error", error=str(e), url=url)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error", error=str(e), url=url)
|
||||
return None
|
||||
|
||||
async def get_direct(self, url: str, headers: Optional[Dict] = None, timeout: Optional[int] = None) -> httpx.Response:
|
||||
"""
|
||||
Public GET method for direct HTTP requests
|
||||
Returns the raw httpx Response object for maximum flexibility
|
||||
"""
|
||||
request_headers = headers or {}
|
||||
request_timeout = httpx.Timeout(timeout if timeout else 30.0)
|
||||
|
||||
async with httpx.AsyncClient(timeout=request_timeout, follow_redirects=True) as client:
|
||||
response = await client.get(url, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
12
services/external/app/external/clients/__init__.py
vendored
Normal file
12
services/external/app/external/clients/__init__.py
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/clients/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
HTTP clients package
|
||||
"""
|
||||
|
||||
from .madrid_client import MadridTrafficAPIClient
|
||||
|
||||
__all__ = [
|
||||
'MadridTrafficAPIClient'
|
||||
]
|
||||
159
services/external/app/external/clients/madrid_client.py
vendored
Normal file
159
services/external/app/external/clients/madrid_client.py
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/clients/madrid_client.py
|
||||
# ================================================================
|
||||
"""
|
||||
Pure HTTP client for Madrid traffic APIs
|
||||
Handles only HTTP communication and response decoding
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from ..base_client import BaseAPIClient
|
||||
|
||||
|
||||
class MadridTrafficAPIClient(BaseAPIClient):
|
||||
"""Pure HTTP client for Madrid traffic APIs"""
|
||||
|
||||
TRAFFIC_ENDPOINT = "https://informo.madrid.es/informo/tmadrid/pm.xml"
|
||||
MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(base_url="https://datos.madrid.es")
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
def _decode_response_content(self, response) -> Optional[str]:
|
||||
"""Decode response content with multiple encoding attempts"""
|
||||
try:
|
||||
return response.text
|
||||
except UnicodeDecodeError:
|
||||
# Try manual encoding for Spanish content
|
||||
for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']:
|
||||
try:
|
||||
content = response.content.decode(encoding)
|
||||
if content and len(content) > 100:
|
||||
self.logger.debug("Successfully decoded with encoding", encoding=encoding)
|
||||
return content
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _build_historical_url(self, year: int, month: int) -> str:
|
||||
"""Build historical ZIP URL for given year and month"""
|
||||
# Madrid historical data URL pattern
|
||||
base_url = "https://datos.madrid.es/egob/catalogo/208627"
|
||||
|
||||
# URL numbering pattern (this may need adjustment based on actual URLs)
|
||||
# Note: Historical data is only available for past periods, not current/future
|
||||
if year == 2023:
|
||||
url_number = 116 + (month - 1) # 116-127 for 2023
|
||||
elif year == 2024:
|
||||
url_number = 128 + (month - 1) # 128-139 for 2024
|
||||
elif year == 2025:
|
||||
# For 2025, use the continuing numbering from 2024
|
||||
url_number = 140 + (month - 1) # Starting from 140 for January 2025
|
||||
else:
|
||||
url_number = 116 # Fallback to 2023 data
|
||||
|
||||
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
|
||||
|
||||
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch current traffic XML data"""
|
||||
endpoint = endpoint or self.TRAFFIC_ENDPOINT
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Accept': 'application/xml,text/xml,*/*',
|
||||
'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Referer': 'https://datos.madrid.es/'
|
||||
}
|
||||
|
||||
response = await self.get_direct(endpoint, headers=headers, timeout=30)
|
||||
|
||||
if not response or response.status_code != 200:
|
||||
self.logger.warning("Failed to fetch XML data",
|
||||
endpoint=endpoint,
|
||||
status=response.status_code if response else None)
|
||||
return None
|
||||
|
||||
# Get XML content with encoding handling
|
||||
xml_content = self._decode_response_content(response)
|
||||
if not xml_content:
|
||||
self.logger.debug("No XML content received", endpoint=endpoint)
|
||||
return None
|
||||
|
||||
self.logger.debug("Madrid XML content fetched",
|
||||
length=len(xml_content),
|
||||
endpoint=endpoint)
|
||||
|
||||
return xml_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching traffic XML data",
|
||||
endpoint=endpoint,
|
||||
error=str(e))
|
||||
return None
|
||||
|
||||
async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch measurement points CSV data"""
|
||||
url = url or self.MEASUREMENT_POINTS_URL
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
headers={
|
||||
'User-Agent': 'MadridTrafficClient/2.0',
|
||||
'Accept': 'text/csv,application/csv,*/*'
|
||||
},
|
||||
follow_redirects=True
|
||||
) as client:
|
||||
|
||||
self.logger.debug("Fetching measurement points registry", url=url)
|
||||
response = await client.get(url)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
self.logger.warning("Failed to fetch measurement points",
|
||||
status=response.status_code, url=url)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching measurement points registry",
|
||||
url=url, error=str(e))
|
||||
return None
|
||||
|
||||
async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]:
|
||||
"""Fetch historical traffic ZIP file"""
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=120.0, # Longer timeout for large files
|
||||
headers={
|
||||
'User-Agent': 'MadridTrafficClient/2.0',
|
||||
'Accept': 'application/zip,*/*'
|
||||
},
|
||||
follow_redirects=True
|
||||
) as client:
|
||||
|
||||
self.logger.debug("Fetching historical ZIP", url=zip_url)
|
||||
response = await client.get(zip_url)
|
||||
|
||||
if response.status_code == 200:
|
||||
self.logger.debug("Historical ZIP fetched",
|
||||
url=zip_url,
|
||||
size=len(response.content))
|
||||
return response.content
|
||||
else:
|
||||
self.logger.warning("Failed to fetch historical ZIP",
|
||||
status=response.status_code, url=zip_url)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching historical ZIP",
|
||||
url=zip_url, error=str(e))
|
||||
return None
|
||||
20
services/external/app/external/models/__init__.py
vendored
Normal file
20
services/external/app/external/models/__init__.py
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/models/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
Madrid traffic models package
|
||||
"""
|
||||
|
||||
from .madrid_models import (
|
||||
TrafficServiceLevel,
|
||||
CongestionLevel,
|
||||
MeasurementPoint,
|
||||
TrafficRecord
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'TrafficServiceLevel',
|
||||
'CongestionLevel',
|
||||
'MeasurementPoint',
|
||||
'TrafficRecord'
|
||||
]
|
||||
66
services/external/app/external/models/madrid_models.py
vendored
Normal file
66
services/external/app/external/models/madrid_models.py
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/models/madrid_models.py
|
||||
# ================================================================
|
||||
"""
|
||||
Data structures, enums, and dataclasses for Madrid traffic system
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class TrafficServiceLevel(Enum):
|
||||
"""Madrid traffic service levels"""
|
||||
FLUID = 0
|
||||
DENSE = 1
|
||||
CONGESTED = 2
|
||||
BLOCKED = 3
|
||||
|
||||
|
||||
class CongestionLevel(Enum):
|
||||
"""Standardized congestion levels"""
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
BLOCKED = "blocked"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MeasurementPoint:
|
||||
"""Madrid measurement point data structure"""
|
||||
id: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
distance: float
|
||||
name: str
|
||||
type: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrafficRecord:
|
||||
"""Standardized traffic record with pedestrian inference"""
|
||||
date: datetime
|
||||
traffic_volume: int
|
||||
occupation_percentage: int
|
||||
load_percentage: int
|
||||
average_speed: int
|
||||
congestion_level: str
|
||||
pedestrian_count: int
|
||||
measurement_point_id: str
|
||||
measurement_point_name: str
|
||||
road_type: str
|
||||
source: str
|
||||
district: Optional[str] = None
|
||||
|
||||
# Madrid-specific data
|
||||
intensidad_raw: Optional[int] = None
|
||||
ocupacion_raw: Optional[int] = None
|
||||
carga_raw: Optional[int] = None
|
||||
vmed_raw: Optional[int] = None
|
||||
|
||||
# Pedestrian inference metadata
|
||||
pedestrian_multiplier: Optional[float] = None
|
||||
time_pattern_factor: Optional[float] = None
|
||||
district_factor: Optional[float] = None
|
||||
14
services/external/app/external/processors/__init__.py
vendored
Normal file
14
services/external/app/external/processors/__init__.py
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/processors/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
Data processors package
|
||||
"""
|
||||
|
||||
from .madrid_processor import MadridTrafficDataProcessor
|
||||
from .madrid_business_logic import MadridTrafficAnalyzer
|
||||
|
||||
__all__ = [
|
||||
'MadridTrafficDataProcessor',
|
||||
'MadridTrafficAnalyzer'
|
||||
]
|
||||
346
services/external/app/external/processors/madrid_business_logic.py
vendored
Normal file
346
services/external/app/external/processors/madrid_business_logic.py
vendored
Normal file
@@ -0,0 +1,346 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/processors/madrid_business_logic.py
|
||||
# ================================================================
|
||||
"""
|
||||
Business rules, inference, and domain logic for Madrid traffic data
|
||||
Handles pedestrian inference, district mapping, road classification, and validation
|
||||
"""
|
||||
|
||||
import math
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
|
||||
from ..models.madrid_models import TrafficRecord, CongestionLevel
|
||||
|
||||
|
||||
class MadridTrafficAnalyzer:
|
||||
"""Handles business logic for Madrid traffic analysis"""
|
||||
|
||||
# Madrid district characteristics for pedestrian patterns
|
||||
DISTRICT_MULTIPLIERS = {
|
||||
'Centro': 2.5, # Historic center, high pedestrian activity
|
||||
'Salamanca': 2.0, # Shopping area, high foot traffic
|
||||
'Chamberí': 1.8, # Business district
|
||||
'Retiro': 2.2, # Near park, high leisure activity
|
||||
'Chamartín': 1.6, # Business/residential
|
||||
'Tetuán': 1.4, # Mixed residential/commercial
|
||||
'Fuencarral': 1.3, # Residential with commercial areas
|
||||
'Moncloa': 1.7, # University area
|
||||
'Latina': 1.5, # Residential area
|
||||
'Carabanchel': 1.2, # Residential periphery
|
||||
'Usera': 1.1, # Industrial/residential
|
||||
'Villaverde': 1.0, # Industrial area
|
||||
'Villa de Vallecas': 1.0, # Peripheral residential
|
||||
'Vicálvaro': 0.9, # Peripheral
|
||||
'San Blas': 1.1, # Residential
|
||||
'Barajas': 0.8, # Airport area, low pedestrian activity
|
||||
'Hortaleza': 1.2, # Mixed area
|
||||
'Ciudad Lineal': 1.3, # Linear development
|
||||
'Puente de Vallecas': 1.2, # Working class area
|
||||
'Moratalaz': 1.1, # Residential
|
||||
'Arganzuela': 1.6, # Near center, growing area
|
||||
}
|
||||
|
||||
# Time-based patterns (hour of day)
|
||||
TIME_PATTERNS = {
|
||||
'morning_peak': {'hours': [7, 8, 9], 'multiplier': 2.0},
|
||||
'lunch_peak': {'hours': [12, 13, 14], 'multiplier': 2.5},
|
||||
'evening_peak': {'hours': [18, 19, 20], 'multiplier': 2.2},
|
||||
'afternoon': {'hours': [15, 16, 17], 'multiplier': 1.8},
|
||||
'late_evening': {'hours': [21, 22], 'multiplier': 1.5},
|
||||
'night': {'hours': [23, 0, 1, 2, 3, 4, 5, 6], 'multiplier': 0.3},
|
||||
'morning': {'hours': [10, 11], 'multiplier': 1.4}
|
||||
}
|
||||
|
||||
# Road type specific patterns
|
||||
ROAD_TYPE_BASE = {
|
||||
'URB': 250, # Urban streets - high pedestrian activity
|
||||
'M30': 50, # Ring road - minimal pedestrians
|
||||
'C30': 75, # Secondary ring - some pedestrian access
|
||||
'A': 25, # Highways - very low pedestrians
|
||||
'R': 40 # Radial roads - low to moderate
|
||||
}
|
||||
|
||||
# Weather impact on pedestrian activity
|
||||
WEATHER_IMPACT = {
|
||||
'rain': 0.6, # 40% reduction in rain
|
||||
'hot_weather': 0.8, # 20% reduction when very hot
|
||||
'cold_weather': 0.7, # 30% reduction when very cold
|
||||
'normal': 1.0 # No impact
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
def calculate_pedestrian_flow(
|
||||
self,
|
||||
traffic_record: TrafficRecord,
|
||||
location_context: Optional[Dict[str, Any]] = None
|
||||
) -> Tuple[int, Dict[str, float]]:
|
||||
"""
|
||||
Calculate pedestrian flow estimate with detailed metadata
|
||||
|
||||
Returns:
|
||||
Tuple of (pedestrian_count, inference_metadata)
|
||||
"""
|
||||
# Base calculation from road type
|
||||
road_type = traffic_record.road_type or 'URB'
|
||||
base_pedestrians = self.ROAD_TYPE_BASE.get(road_type, 200)
|
||||
|
||||
# Time pattern adjustment
|
||||
hour = traffic_record.date.hour
|
||||
time_factor = self._get_time_pattern_factor(hour)
|
||||
|
||||
# District adjustment (if available)
|
||||
district_factor = 1.0
|
||||
district = traffic_record.district or self.infer_district_from_location(location_context)
|
||||
if district:
|
||||
district_factor = self.DISTRICT_MULTIPLIERS.get(district, 1.0)
|
||||
|
||||
# Traffic correlation adjustment
|
||||
traffic_factor = self._calculate_traffic_correlation(traffic_record)
|
||||
|
||||
# Weather adjustment (if data available)
|
||||
weather_factor = self._get_weather_factor(traffic_record.date, location_context)
|
||||
|
||||
# Weekend adjustment
|
||||
weekend_factor = self._get_weekend_factor(traffic_record.date)
|
||||
|
||||
# Combined calculation
|
||||
pedestrian_count = int(
|
||||
base_pedestrians *
|
||||
time_factor *
|
||||
district_factor *
|
||||
traffic_factor *
|
||||
weather_factor *
|
||||
weekend_factor
|
||||
)
|
||||
|
||||
# Ensure reasonable bounds
|
||||
pedestrian_count = max(10, min(2000, pedestrian_count))
|
||||
|
||||
# Metadata for model training
|
||||
inference_metadata = {
|
||||
'base_pedestrians': base_pedestrians,
|
||||
'time_factor': time_factor,
|
||||
'district_factor': district_factor,
|
||||
'traffic_factor': traffic_factor,
|
||||
'weather_factor': weather_factor,
|
||||
'weekend_factor': weekend_factor,
|
||||
'inferred_district': district,
|
||||
'hour': hour,
|
||||
'road_type': road_type
|
||||
}
|
||||
|
||||
return pedestrian_count, inference_metadata
|
||||
|
||||
def _get_time_pattern_factor(self, hour: int) -> float:
|
||||
"""Get time-based pedestrian activity multiplier"""
|
||||
for pattern, config in self.TIME_PATTERNS.items():
|
||||
if hour in config['hours']:
|
||||
return config['multiplier']
|
||||
return 1.0 # Default multiplier
|
||||
|
||||
def _calculate_traffic_correlation(self, traffic_record: TrafficRecord) -> float:
|
||||
"""
|
||||
Calculate pedestrian correlation with traffic patterns
|
||||
Higher traffic in urban areas often correlates with more pedestrians
|
||||
"""
|
||||
if traffic_record.road_type == 'URB':
|
||||
# Urban areas: moderate traffic indicates commercial activity
|
||||
if 30 <= traffic_record.load_percentage <= 70:
|
||||
return 1.3 # Sweet spot for pedestrian activity
|
||||
elif traffic_record.load_percentage > 70:
|
||||
return 0.9 # Too congested, pedestrians avoid
|
||||
else:
|
||||
return 1.0 # Normal correlation
|
||||
else:
|
||||
# Highway/ring roads: more traffic = fewer pedestrians
|
||||
if traffic_record.load_percentage > 60:
|
||||
return 0.5
|
||||
else:
|
||||
return 0.8
|
||||
|
||||
def _get_weather_factor(self, date: datetime, location_context: Optional[Dict] = None) -> float:
|
||||
"""Estimate weather impact on pedestrian activity"""
|
||||
# Simplified weather inference based on season and typical Madrid patterns
|
||||
month = date.month
|
||||
|
||||
# Madrid seasonal patterns
|
||||
if month in [12, 1, 2]: # Winter - cold weather impact
|
||||
return self.WEATHER_IMPACT['cold_weather']
|
||||
elif month in [7, 8]: # Summer - hot weather impact
|
||||
return self.WEATHER_IMPACT['hot_weather']
|
||||
elif month in [10, 11, 3, 4]: # Rainy seasons - moderate impact
|
||||
return 0.85
|
||||
else: # Spring/early summer - optimal weather
|
||||
return 1.1
|
||||
|
||||
def _get_weekend_factor(self, date: datetime) -> float:
|
||||
"""Weekend vs weekday pedestrian patterns"""
|
||||
weekday = date.weekday()
|
||||
hour = date.hour
|
||||
|
||||
if weekday >= 5: # Weekend
|
||||
if 11 <= hour <= 16: # Weekend shopping/leisure hours
|
||||
return 1.4
|
||||
elif 20 <= hour <= 23: # Weekend evening activity
|
||||
return 1.3
|
||||
else:
|
||||
return 0.9
|
||||
else: # Weekday
|
||||
return 1.0
|
||||
|
||||
def infer_district_from_location(self, location_context: Optional[Dict] = None) -> Optional[str]:
|
||||
"""
|
||||
Infer Madrid district from location context or coordinates
|
||||
"""
|
||||
if not location_context:
|
||||
return None
|
||||
|
||||
lat = location_context.get('latitude')
|
||||
lon = location_context.get('longitude')
|
||||
|
||||
if not (lat and lon):
|
||||
return None
|
||||
|
||||
# Madrid district boundaries (simplified boundaries for inference)
|
||||
districts = {
|
||||
# Central districts
|
||||
'Centro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Arganzuela': {'lat_min': 40.385, 'lat_max': 40.410, 'lon_min': -3.720, 'lon_max': -3.680},
|
||||
'Retiro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.690, 'lon_max': -3.660},
|
||||
'Salamanca': {'lat_min': 40.420, 'lat_max': 40.445, 'lon_min': -3.690, 'lon_max': -3.660},
|
||||
'Chamartín': {'lat_min': 40.445, 'lat_max': 40.480, 'lon_min': -3.690, 'lon_max': -3.660},
|
||||
'Tetuán': {'lat_min': 40.445, 'lat_max': 40.470, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Chamberí': {'lat_min': 40.425, 'lat_max': 40.450, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Fuencarral-El Pardo': {'lat_min': 40.470, 'lat_max': 40.540, 'lon_min': -3.750, 'lon_max': -3.650},
|
||||
'Moncloa-Aravaca': {'lat_min': 40.430, 'lat_max': 40.480, 'lon_min': -3.750, 'lon_max': -3.720},
|
||||
'Latina': {'lat_min': 40.380, 'lat_max': 40.420, 'lon_min': -3.750, 'lon_max': -3.720},
|
||||
'Carabanchel': {'lat_min': 40.350, 'lat_max': 40.390, 'lon_min': -3.750, 'lon_max': -3.720},
|
||||
'Usera': {'lat_min': 40.350, 'lat_max': 40.385, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Puente de Vallecas': {'lat_min': 40.370, 'lat_max': 40.410, 'lon_min': -3.680, 'lon_max': -3.640},
|
||||
'Moratalaz': {'lat_min': 40.400, 'lat_max': 40.430, 'lon_min': -3.650, 'lon_max': -3.620},
|
||||
'Ciudad Lineal': {'lat_min': 40.430, 'lat_max': 40.460, 'lon_min': -3.650, 'lon_max': -3.620},
|
||||
'Hortaleza': {'lat_min': 40.460, 'lat_max': 40.500, 'lon_min': -3.650, 'lon_max': -3.620},
|
||||
'Villaverde': {'lat_min': 40.320, 'lat_max': 40.360, 'lon_min': -3.720, 'lon_max': -3.680},
|
||||
}
|
||||
|
||||
# Find matching district
|
||||
for district_name, bounds in districts.items():
|
||||
if (bounds['lat_min'] <= lat <= bounds['lat_max'] and
|
||||
bounds['lon_min'] <= lon <= bounds['lon_max']):
|
||||
return district_name
|
||||
|
||||
# Default for coordinates in Madrid but not matching specific districts
|
||||
if 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5:
|
||||
return 'Other Madrid'
|
||||
|
||||
return None
|
||||
|
||||
def classify_road_type(self, measurement_point_name: str) -> str:
|
||||
"""Classify road type based on measurement point name"""
|
||||
if not measurement_point_name:
|
||||
return 'URB' # Default to urban
|
||||
|
||||
name_upper = measurement_point_name.upper()
|
||||
|
||||
# Highway patterns
|
||||
if any(pattern in name_upper for pattern in ['A-', 'AP-', 'AUTOPISTA', 'AUTOVIA']):
|
||||
return 'A'
|
||||
|
||||
# M-30 Ring road
|
||||
if 'M-30' in name_upper or 'M30' in name_upper:
|
||||
return 'M30'
|
||||
|
||||
# Other M roads (ring roads)
|
||||
if re.search(r'M-[0-9]', name_upper) or re.search(r'M[0-9]', name_upper):
|
||||
return 'C30'
|
||||
|
||||
# Radial roads (R-1, R-2, etc.)
|
||||
if re.search(r'R-[0-9]', name_upper) or 'RADIAL' in name_upper:
|
||||
return 'R'
|
||||
|
||||
# Default to urban street
|
||||
return 'URB'
|
||||
|
||||
def validate_madrid_coordinates(self, lat: float, lon: float) -> bool:
|
||||
"""Validate coordinates are within Madrid bounds"""
|
||||
# Madrid metropolitan area bounds
|
||||
return 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5
|
||||
|
||||
def get_congestion_level(self, occupation_pct: float) -> str:
|
||||
"""Convert occupation percentage to congestion level"""
|
||||
if occupation_pct >= 80:
|
||||
return CongestionLevel.BLOCKED.value
|
||||
elif occupation_pct >= 50:
|
||||
return CongestionLevel.HIGH.value
|
||||
elif occupation_pct >= 25:
|
||||
return CongestionLevel.MEDIUM.value
|
||||
else:
|
||||
return CongestionLevel.LOW.value
|
||||
|
||||
def calculate_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Calculate distance between two points in kilometers using Haversine formula"""
|
||||
R = 6371 # Earth's radius in kilometers
|
||||
|
||||
dlat = math.radians(lat2 - lat1)
|
||||
dlon = math.radians(lon2 - lon1)
|
||||
a = (math.sin(dlat/2) * math.sin(dlat/2) +
|
||||
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
|
||||
math.sin(dlon/2) * math.sin(dlon/2))
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def find_nearest_traffic_point(self, traffic_points: List[Dict[str, Any]],
|
||||
latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Find the nearest traffic point to given coordinates"""
|
||||
if not traffic_points:
|
||||
return None
|
||||
|
||||
min_distance = float('inf')
|
||||
nearest_point = None
|
||||
|
||||
for point in traffic_points:
|
||||
point_lat = point.get('latitude')
|
||||
point_lon = point.get('longitude')
|
||||
|
||||
if point_lat and point_lon:
|
||||
distance = self.calculate_distance(latitude, longitude, point_lat, point_lon)
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
nearest_point = point
|
||||
|
||||
return nearest_point
|
||||
|
||||
def find_nearest_measurement_points(self, measurement_points: Dict[str, Dict[str, Any]],
|
||||
latitude: float, longitude: float,
|
||||
num_points: int = 3, max_distance_km: Optional[float] = 5.0) -> List[Tuple[str, Dict[str, Any], float]]:
|
||||
"""Find nearest measurement points for historical data"""
|
||||
distances = []
|
||||
|
||||
for point_id, point_data in measurement_points.items():
|
||||
point_lat = point_data.get('latitude')
|
||||
point_lon = point_data.get('longitude')
|
||||
|
||||
if point_lat and point_lon:
|
||||
distance_km = self.calculate_distance(latitude, longitude, point_lat, point_lon)
|
||||
distances.append((point_id, point_data, distance_km))
|
||||
|
||||
# Sort by distance and take nearest points
|
||||
distances.sort(key=lambda x: x[2])
|
||||
|
||||
# Apply distance filter if specified
|
||||
if max_distance_km is not None:
|
||||
distances = [p for p in distances if p[2] <= max_distance_km]
|
||||
|
||||
nearest = distances[:num_points]
|
||||
|
||||
self.logger.info("Found nearest measurement points",
|
||||
count=len(nearest),
|
||||
nearest_distance_km=nearest[0][2] if nearest else None)
|
||||
|
||||
return nearest
|
||||
478
services/external/app/external/processors/madrid_processor.py
vendored
Normal file
478
services/external/app/external/processors/madrid_processor.py
vendored
Normal file
@@ -0,0 +1,478 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/processors/madrid_processor.py
|
||||
# ================================================================
|
||||
"""
|
||||
Data transformation and parsing for Madrid traffic data
|
||||
Handles XML parsing, CSV processing, coordinate conversion, and data quality scoring
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import math
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
import pyproj
|
||||
|
||||
from ..models.madrid_models import TrafficRecord, MeasurementPoint, CongestionLevel
|
||||
|
||||
|
||||
class MadridTrafficDataProcessor:
|
||||
"""Handles all data transformation and parsing for Madrid traffic data"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = structlog.get_logger()
|
||||
# UTM Zone 30N (Madrid's coordinate system)
|
||||
self.utm_proj = pyproj.Proj(proj='utm', zone=30, ellps='WGS84', datum='WGS84')
|
||||
self.wgs84_proj = pyproj.Proj(proj='latlong', ellps='WGS84', datum='WGS84')
|
||||
|
||||
def safe_int(self, value: str) -> int:
|
||||
"""Safely convert string to int"""
|
||||
try:
|
||||
return int(float(value.replace(',', '.')))
|
||||
except (ValueError, TypeError):
|
||||
return 0
|
||||
|
||||
def _safe_float(self, value: str) -> float:
|
||||
"""Safely convert string to float"""
|
||||
try:
|
||||
return float(value.replace(',', '.'))
|
||||
except (ValueError, TypeError):
|
||||
return 0.0
|
||||
|
||||
def clean_madrid_xml(self, xml_content: str) -> str:
|
||||
"""Clean and prepare Madrid XML content for parsing"""
|
||||
if not xml_content:
|
||||
return ""
|
||||
|
||||
# Remove BOM and extra whitespace
|
||||
cleaned = xml_content.strip()
|
||||
if cleaned.startswith('\ufeff'):
|
||||
cleaned = cleaned[1:]
|
||||
|
||||
# Fix common XML issues
|
||||
cleaned = re.sub(r'&(?!amp;|lt;|gt;|quot;|apos;)', '&', cleaned)
|
||||
|
||||
# Ensure proper encoding declaration
|
||||
if not cleaned.startswith('<?xml'):
|
||||
cleaned = '<?xml version="1.0" encoding="UTF-8"?>\n' + cleaned
|
||||
|
||||
return cleaned
|
||||
|
||||
def convert_utm_to_latlon(self, utm_x: str, utm_y: str) -> Tuple[Optional[float], Optional[float]]:
|
||||
"""Convert UTM coordinates to latitude/longitude"""
|
||||
try:
|
||||
utm_x_float = float(utm_x.replace(',', '.'))
|
||||
utm_y_float = float(utm_y.replace(',', '.'))
|
||||
|
||||
# Convert from UTM Zone 30N to WGS84
|
||||
longitude, latitude = pyproj.transform(self.utm_proj, self.wgs84_proj, utm_x_float, utm_y_float)
|
||||
|
||||
# Validate coordinates are in Madrid area
|
||||
if 40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5:
|
||||
return latitude, longitude
|
||||
else:
|
||||
self.logger.debug("Coordinates outside Madrid bounds",
|
||||
lat=latitude, lon=longitude, utm_x=utm_x, utm_y=utm_y)
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("UTM conversion error",
|
||||
utm_x=utm_x, utm_y=utm_y, error=str(e))
|
||||
return None, None
|
||||
|
||||
def parse_traffic_xml(self, xml_content: str) -> List[Dict[str, Any]]:
|
||||
"""Parse Madrid traffic XML data"""
|
||||
traffic_points = []
|
||||
|
||||
try:
|
||||
cleaned_xml = self.clean_madrid_xml(xml_content)
|
||||
root = ET.fromstring(cleaned_xml)
|
||||
|
||||
self.logger.debug("Madrid XML structure", root_tag=root.tag, children_count=len(list(root)))
|
||||
|
||||
if root.tag == 'pms':
|
||||
pm_elements = root.findall('pm')
|
||||
self.logger.debug("Found PM elements", count=len(pm_elements))
|
||||
|
||||
for pm in pm_elements:
|
||||
try:
|
||||
traffic_point = self._extract_madrid_pm_element(pm)
|
||||
|
||||
if self._is_valid_traffic_point(traffic_point):
|
||||
traffic_points.append(traffic_point)
|
||||
|
||||
# Log first few points for debugging
|
||||
if len(traffic_points) <= 3:
|
||||
self.logger.debug("Sample traffic point",
|
||||
id=traffic_point['idelem'],
|
||||
lat=traffic_point['latitude'],
|
||||
lon=traffic_point['longitude'],
|
||||
intensity=traffic_point.get('intensidad'))
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error parsing PM element", error=str(e))
|
||||
continue
|
||||
else:
|
||||
self.logger.warning("Unexpected XML root tag", root_tag=root.tag)
|
||||
|
||||
self.logger.debug("Madrid traffic XML parsing completed", valid_points=len(traffic_points))
|
||||
return traffic_points
|
||||
|
||||
except ET.ParseError as e:
|
||||
self.logger.warning("Failed to parse Madrid XML", error=str(e))
|
||||
return self._extract_traffic_data_regex(xml_content)
|
||||
except Exception as e:
|
||||
self.logger.error("Error in Madrid traffic XML parsing", error=str(e))
|
||||
return []
|
||||
|
||||
def _extract_madrid_pm_element(self, pm_element) -> Dict[str, Any]:
|
||||
"""Extract traffic data from Madrid <pm> element with coordinate conversion"""
|
||||
try:
|
||||
point_data = {}
|
||||
utm_x = utm_y = None
|
||||
|
||||
# Extract all child elements
|
||||
for child in pm_element:
|
||||
tag, text = child.tag, child.text.strip() if child.text else ''
|
||||
|
||||
if tag == 'idelem':
|
||||
point_data['idelem'] = text
|
||||
elif tag == 'descripcion':
|
||||
point_data['descripcion'] = text
|
||||
elif tag == 'intensidad':
|
||||
point_data['intensidad'] = self.safe_int(text)
|
||||
elif tag == 'ocupacion':
|
||||
point_data['ocupacion'] = self._safe_float(text)
|
||||
elif tag == 'carga':
|
||||
point_data['carga'] = self.safe_int(text)
|
||||
elif tag == 'nivelServicio':
|
||||
point_data['nivelServicio'] = self.safe_int(text)
|
||||
elif tag == 'st_x': # UTM X coordinate
|
||||
utm_x = text
|
||||
point_data['utm_x'] = text
|
||||
elif tag == 'st_y': # UTM Y coordinate
|
||||
utm_y = text
|
||||
point_data['utm_y'] = text
|
||||
elif tag == 'error':
|
||||
point_data['error'] = text
|
||||
elif tag in ['subarea', 'accesoAsociado', 'intensidadSat']:
|
||||
point_data[tag] = text
|
||||
|
||||
# Convert coordinates
|
||||
if utm_x and utm_y:
|
||||
latitude, longitude = self.convert_utm_to_latlon(utm_x, utm_y)
|
||||
|
||||
if latitude and longitude:
|
||||
point_data.update({
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'measurement_point_id': point_data.get('idelem'),
|
||||
'measurement_point_name': point_data.get('descripcion'),
|
||||
'timestamp': datetime.now(timezone.utc),
|
||||
'source': 'madrid_opendata_xml'
|
||||
})
|
||||
|
||||
return point_data
|
||||
else:
|
||||
self.logger.debug("Invalid coordinates after conversion",
|
||||
idelem=point_data.get('idelem'), utm_x=utm_x, utm_y=utm_y)
|
||||
return {}
|
||||
else:
|
||||
self.logger.debug("Missing UTM coordinates", idelem=point_data.get('idelem'))
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error extracting PM element", error=str(e))
|
||||
return {}
|
||||
|
||||
def _is_valid_traffic_point(self, traffic_point: Dict[str, Any]) -> bool:
|
||||
"""Validate traffic point data"""
|
||||
required_fields = ['idelem', 'latitude', 'longitude']
|
||||
return all(field in traffic_point and traffic_point[field] for field in required_fields)
|
||||
|
||||
def _extract_traffic_data_regex(self, xml_content: str) -> List[Dict[str, Any]]:
|
||||
"""Fallback regex-based extraction if XML parsing fails"""
|
||||
traffic_points = []
|
||||
|
||||
try:
|
||||
# Pattern to match PM elements
|
||||
pm_pattern = r'<pm>(.*?)</pm>'
|
||||
pm_matches = re.findall(pm_pattern, xml_content, re.DOTALL)
|
||||
|
||||
for pm_content in pm_matches:
|
||||
traffic_point = {}
|
||||
|
||||
# Extract key fields
|
||||
patterns = {
|
||||
'idelem': r'<idelem>(.*?)</idelem>',
|
||||
'descripcion': r'<descripcion>(.*?)</descripcion>',
|
||||
'intensidad': r'<intensidad>(.*?)</intensidad>',
|
||||
'ocupacion': r'<ocupacion>(.*?)</ocupacion>',
|
||||
'st_x': r'<st_x>(.*?)</st_x>',
|
||||
'st_y': r'<st_y>(.*?)</st_y>'
|
||||
}
|
||||
|
||||
for field, pattern in patterns.items():
|
||||
match = re.search(pattern, pm_content)
|
||||
if match:
|
||||
traffic_point[field] = match.group(1).strip()
|
||||
|
||||
# Convert coordinates
|
||||
if 'st_x' in traffic_point and 'st_y' in traffic_point:
|
||||
latitude, longitude = self.convert_utm_to_latlon(
|
||||
traffic_point['st_x'], traffic_point['st_y']
|
||||
)
|
||||
|
||||
if latitude and longitude:
|
||||
traffic_point.update({
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'intensidad': self.safe_int(traffic_point.get('intensidad', '0')),
|
||||
'ocupacion': self._safe_float(traffic_point.get('ocupacion', '0')),
|
||||
'measurement_point_id': traffic_point.get('idelem'),
|
||||
'measurement_point_name': traffic_point.get('descripcion'),
|
||||
'timestamp': datetime.now(timezone.utc),
|
||||
'source': 'madrid_opendata_xml_regex'
|
||||
})
|
||||
|
||||
traffic_points.append(traffic_point)
|
||||
|
||||
self.logger.debug("Regex extraction completed", points=len(traffic_points))
|
||||
return traffic_points
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error in regex extraction", error=str(e))
|
||||
return []
|
||||
|
||||
def parse_measurement_points_csv(self, csv_content: str) -> Dict[str, Dict[str, Any]]:
|
||||
"""Parse measurement points CSV into lookup dictionary"""
|
||||
measurement_points = {}
|
||||
|
||||
try:
|
||||
# Parse CSV with semicolon delimiter
|
||||
csv_reader = csv.DictReader(io.StringIO(csv_content), delimiter=';')
|
||||
|
||||
processed_count = 0
|
||||
for row in csv_reader:
|
||||
try:
|
||||
# Extract point ID and coordinates
|
||||
point_id = row.get('id', '').strip()
|
||||
if not point_id:
|
||||
continue
|
||||
|
||||
processed_count += 1
|
||||
|
||||
# Try different coordinate field names
|
||||
lat_str = ''
|
||||
lon_str = ''
|
||||
|
||||
# Common coordinate field patterns
|
||||
lat_fields = ['lat', 'latitude', 'latitud', 'y', 'utm_y']
|
||||
lon_fields = ['lon', 'lng', 'longitude', 'longitud', 'x', 'utm_x']
|
||||
|
||||
for field in lat_fields:
|
||||
if field in row and row[field].strip():
|
||||
lat_str = row[field].strip()
|
||||
break
|
||||
|
||||
for field in lon_fields:
|
||||
if field in row and row[field].strip():
|
||||
lon_str = row[field].strip()
|
||||
break
|
||||
|
||||
if lat_str and lon_str:
|
||||
try:
|
||||
# Try direct lat/lon first
|
||||
latitude = self._safe_float(lat_str)
|
||||
longitude = self._safe_float(lon_str)
|
||||
|
||||
# If values look like UTM coordinates, convert them
|
||||
if latitude > 1000 or longitude > 1000:
|
||||
latitude, longitude = self.convert_utm_to_latlon(lon_str, lat_str)
|
||||
if not latitude or not longitude:
|
||||
continue
|
||||
|
||||
# Validate Madrid area
|
||||
if not (40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5):
|
||||
continue
|
||||
|
||||
measurement_points[point_id] = {
|
||||
'id': point_id,
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'name': row.get('nombre', row.get('descripcion', f"Point {point_id}")),
|
||||
'type': row.get('tipo', 'traffic'),
|
||||
'raw_data': dict(row) # Keep original data
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error processing point coordinates",
|
||||
point_id=point_id, error=str(e))
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error processing CSV row", error=str(e))
|
||||
continue
|
||||
|
||||
self.logger.info("Parsed measurement points registry",
|
||||
total_points=len(measurement_points))
|
||||
return measurement_points
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error parsing measurement points CSV", error=str(e))
|
||||
return {}
|
||||
|
||||
def calculate_data_quality_score(self, row: Dict[str, str]) -> float:
|
||||
"""Calculate data quality score for a traffic record"""
|
||||
try:
|
||||
score = 1.0
|
||||
|
||||
# Check for missing or invalid values
|
||||
intensidad = row.get('intensidad', '').strip()
|
||||
if not intensidad or intensidad in ['N', '', '0']:
|
||||
score *= 0.7
|
||||
|
||||
ocupacion = row.get('ocupacion', '').strip()
|
||||
if not ocupacion or ocupacion in ['N', '', '0']:
|
||||
score *= 0.8
|
||||
|
||||
error_status = row.get('error', '').strip()
|
||||
if error_status and error_status != 'N':
|
||||
score *= 0.6
|
||||
|
||||
# Check for reasonable value ranges
|
||||
try:
|
||||
intensidad_val = self.safe_int(intensidad)
|
||||
if intensidad_val < 0 or intensidad_val > 5000: # Unrealistic traffic volume
|
||||
score *= 0.7
|
||||
|
||||
ocupacion_val = self.safe_int(ocupacion)
|
||||
if ocupacion_val < 0 or ocupacion_val > 100: # Invalid percentage
|
||||
score *= 0.5
|
||||
|
||||
except:
|
||||
score *= 0.6
|
||||
|
||||
return max(0.1, score) # Minimum quality score
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error calculating quality score", error=str(e))
|
||||
return 0.5 # Default medium quality
|
||||
|
||||
async def process_csv_content_chunked(self, text_content: str, csv_filename: str,
|
||||
nearest_ids: set, nearest_points: list) -> list:
|
||||
"""Process CSV content in chunks to prevent memory issues"""
|
||||
import csv
|
||||
import io
|
||||
import gc
|
||||
|
||||
try:
|
||||
csv_reader = csv.DictReader(io.StringIO(text_content), delimiter=';')
|
||||
|
||||
chunk_size = 10000
|
||||
chunk_records = []
|
||||
all_records = []
|
||||
processed_count = 0
|
||||
total_rows_seen = 0
|
||||
|
||||
for row in csv_reader:
|
||||
total_rows_seen += 1
|
||||
measurement_point_id = row.get('id', '').strip()
|
||||
|
||||
if measurement_point_id not in nearest_ids:
|
||||
continue
|
||||
|
||||
try:
|
||||
record_data = await self.parse_historical_csv_row(row, nearest_points)
|
||||
|
||||
if record_data:
|
||||
chunk_records.append(record_data)
|
||||
processed_count += 1
|
||||
|
||||
if len(chunk_records) >= chunk_size:
|
||||
all_records.extend(chunk_records)
|
||||
chunk_records = []
|
||||
gc.collect()
|
||||
|
||||
except Exception as e:
|
||||
if processed_count < 5:
|
||||
self.logger.error("Row parsing exception",
|
||||
row_num=total_rows_seen,
|
||||
measurement_point_id=measurement_point_id,
|
||||
error=str(e))
|
||||
continue
|
||||
|
||||
# Process remaining records
|
||||
if chunk_records:
|
||||
all_records.extend(chunk_records)
|
||||
chunk_records = []
|
||||
gc.collect()
|
||||
|
||||
self.logger.info("Processed CSV file",
|
||||
filename=csv_filename,
|
||||
total_rows_read=total_rows_seen,
|
||||
processed_records=processed_count)
|
||||
|
||||
return all_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error processing CSV content",
|
||||
filename=csv_filename, error=str(e))
|
||||
return []
|
||||
|
||||
async def parse_historical_csv_row(self, row: dict, nearest_points: list) -> dict:
|
||||
"""Parse a single row from Madrid's historical traffic CSV"""
|
||||
try:
|
||||
# Extract date
|
||||
fecha_str = row.get('fecha', '').strip()
|
||||
if not fecha_str:
|
||||
return None
|
||||
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
date_obj = datetime.strptime(fecha_str, '%Y-%m-%d %H:%M:%S')
|
||||
date_obj = date_obj.replace(tzinfo=timezone.utc)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
measurement_point_id = row.get('id', '').strip()
|
||||
|
||||
# Find point data
|
||||
point_match = next((p for p in nearest_points if p[0] == measurement_point_id), None)
|
||||
if not point_match:
|
||||
return None
|
||||
|
||||
point_data = point_match[1]
|
||||
distance_km = point_match[2]
|
||||
|
||||
# Extract traffic data
|
||||
intensidad = self.safe_int(row.get('intensidad', '0'))
|
||||
ocupacion = self.safe_int(row.get('ocupacion', '0'))
|
||||
carga = self.safe_int(row.get('carga', '0'))
|
||||
vmed = self.safe_int(row.get('vmed', '0'))
|
||||
|
||||
# Build basic result (business logic will be applied elsewhere)
|
||||
result = {
|
||||
'date': date_obj,
|
||||
'measurement_point_id': measurement_point_id,
|
||||
'point_data': point_data,
|
||||
'distance_km': distance_km,
|
||||
'traffic_data': {
|
||||
'intensidad': intensidad,
|
||||
'ocupacion': ocupacion,
|
||||
'carga': carga,
|
||||
'vmed': vmed
|
||||
},
|
||||
'data_quality_score': self.calculate_data_quality_score(row),
|
||||
'raw_row': row
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error parsing historical CSV row", error=str(e))
|
||||
return None
|
||||
186
services/external/app/main.py
vendored
Normal file
186
services/external/app/main.py
vendored
Normal file
@@ -0,0 +1,186 @@
|
||||
# services/external/app/main.py
|
||||
"""
|
||||
External Service Main Application
|
||||
"""
|
||||
|
||||
import structlog
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import init_db, close_db
|
||||
from shared.monitoring import setup_logging, HealthChecker
|
||||
from shared.monitoring.metrics import setup_metrics_early
|
||||
|
||||
# Setup logging first
|
||||
setup_logging("external-service", settings.LOG_LEVEL)
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Global variables for lifespan access
|
||||
metrics_collector = None
|
||||
health_checker = None
|
||||
|
||||
# Create FastAPI app FIRST
|
||||
app = FastAPI(
|
||||
title="Bakery External Data Service",
|
||||
description="External data collection service for weather, traffic, and events data",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Setup metrics BEFORE any middleware and BEFORE lifespan
|
||||
metrics_collector = setup_metrics_early(app, "external-service")
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan events"""
|
||||
global health_checker
|
||||
|
||||
# Startup
|
||||
logger.info("Starting External Service...")
|
||||
|
||||
try:
|
||||
# Initialize database
|
||||
await init_db()
|
||||
logger.info("Database initialized")
|
||||
|
||||
# Register custom metrics
|
||||
metrics_collector.register_counter("weather_api_calls_total", "Total weather API calls")
|
||||
metrics_collector.register_counter("weather_api_success_total", "Successful weather API calls")
|
||||
metrics_collector.register_counter("weather_api_failures_total", "Failed weather API calls")
|
||||
|
||||
metrics_collector.register_counter("traffic_api_calls_total", "Total traffic API calls")
|
||||
metrics_collector.register_counter("traffic_api_success_total", "Successful traffic API calls")
|
||||
metrics_collector.register_counter("traffic_api_failures_total", "Failed traffic API calls")
|
||||
|
||||
metrics_collector.register_counter("data_collection_jobs_total", "Data collection jobs")
|
||||
metrics_collector.register_counter("data_records_stored_total", "Data records stored")
|
||||
metrics_collector.register_counter("data_quality_issues_total", "Data quality issues detected")
|
||||
|
||||
metrics_collector.register_histogram("weather_api_duration_seconds", "Weather API call duration")
|
||||
metrics_collector.register_histogram("traffic_api_duration_seconds", "Traffic API call duration")
|
||||
metrics_collector.register_histogram("data_collection_duration_seconds", "Data collection job duration")
|
||||
metrics_collector.register_histogram("data_processing_duration_seconds", "Data processing duration")
|
||||
|
||||
# Setup health checker
|
||||
health_checker = HealthChecker("external-service")
|
||||
|
||||
# Add database health check
|
||||
async def check_database():
|
||||
try:
|
||||
from app.core.database import get_db
|
||||
from sqlalchemy import text
|
||||
async for db in get_db():
|
||||
await db.execute(text("SELECT 1"))
|
||||
return True
|
||||
except Exception as e:
|
||||
return f"Database error: {e}"
|
||||
|
||||
# Add external API health checks
|
||||
async def check_weather_api():
|
||||
try:
|
||||
# Simple connectivity check
|
||||
if settings.AEMET_API_KEY:
|
||||
return True
|
||||
else:
|
||||
return "AEMET API key not configured"
|
||||
except Exception as e:
|
||||
return f"Weather API error: {e}"
|
||||
|
||||
async def check_traffic_api():
|
||||
try:
|
||||
# Simple connectivity check
|
||||
if settings.MADRID_OPENDATA_API_KEY:
|
||||
return True
|
||||
else:
|
||||
return "Madrid Open Data API key not configured"
|
||||
except Exception as e:
|
||||
return f"Traffic API error: {e}"
|
||||
|
||||
health_checker.add_check("database", check_database, timeout=5.0, critical=True)
|
||||
health_checker.add_check("weather_api", check_weather_api, timeout=10.0, critical=False)
|
||||
health_checker.add_check("traffic_api", check_traffic_api, timeout=10.0, critical=False)
|
||||
|
||||
# Store health checker in app state
|
||||
app.state.health_checker = health_checker
|
||||
|
||||
logger.info("External Service started successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start External Service: {e}")
|
||||
raise
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down External Service...")
|
||||
await close_db()
|
||||
|
||||
# Set lifespan AFTER metrics setup
|
||||
app.router.lifespan_context = lifespan
|
||||
|
||||
# CORS middleware (added after metrics setup)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.CORS_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
from app.api.weather import router as weather_router
|
||||
from app.api.traffic import router as traffic_router
|
||||
app.include_router(weather_router, prefix="/api/v1", tags=["weather"])
|
||||
app.include_router(traffic_router, prefix="/api/v1", tags=["traffic"])
|
||||
|
||||
# Health check endpoint
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Comprehensive health check endpoint"""
|
||||
if health_checker:
|
||||
return await health_checker.check_health()
|
||||
else:
|
||||
return {
|
||||
"service": "external-service",
|
||||
"status": "healthy",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
|
||||
# Root endpoint
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint"""
|
||||
return {
|
||||
"service": "External Data Service",
|
||||
"version": "1.0.0",
|
||||
"status": "running",
|
||||
"endpoints": {
|
||||
"health": "/health",
|
||||
"docs": "/docs",
|
||||
"weather": "/api/v1/weather",
|
||||
"traffic": "/api/v1/traffic",
|
||||
"jobs": "/api/v1/jobs"
|
||||
},
|
||||
"data_sources": {
|
||||
"weather": "AEMET (Spanish Weather Service)",
|
||||
"traffic": "Madrid Open Data Portal",
|
||||
"coverage": "Madrid, Spain"
|
||||
}
|
||||
}
|
||||
|
||||
# Exception handlers
|
||||
@app.exception_handler(Exception)
|
||||
async def global_exception_handler(request: Request, exc: Exception):
|
||||
"""Global exception handler with metrics"""
|
||||
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
||||
|
||||
# Record error metric if available
|
||||
if metrics_collector:
|
||||
metrics_collector.increment_counter("errors_total", labels={"type": "unhandled"})
|
||||
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"detail": "Internal server error"}
|
||||
)
|
||||
1
services/external/app/models/__init__.py
vendored
Normal file
1
services/external/app/models/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/models/__init__.py
|
||||
294
services/external/app/models/traffic.py
vendored
Normal file
294
services/external/app/models/traffic.py
vendored
Normal file
@@ -0,0 +1,294 @@
|
||||
# ================================================================
|
||||
# services/data/app/models/traffic.py - Enhanced for Multiple Cities
|
||||
# ================================================================
|
||||
"""
|
||||
Flexible traffic data models supporting multiple cities and extensible schemas
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean, JSON
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from shared.database.base import Base
|
||||
|
||||
|
||||
class TrafficData(Base):
|
||||
"""
|
||||
Flexible traffic data model supporting multiple cities
|
||||
Designed to accommodate varying data structures across different cities
|
||||
"""
|
||||
__tablename__ = "traffic_data"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Location and temporal data
|
||||
location_id = Column(String(100), nullable=False, index=True) # "lat,lon" or city-specific ID
|
||||
city = Column(String(50), nullable=False, index=True) # madrid, barcelona, valencia, etc.
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
|
||||
# Core standardized traffic metrics (common across all cities)
|
||||
traffic_volume = Column(Integer, nullable=True) # Vehicle count or intensity
|
||||
congestion_level = Column(String(20), nullable=True) # low, medium, high, blocked
|
||||
average_speed = Column(Float, nullable=True) # Average speed in km/h
|
||||
|
||||
# Enhanced metrics (may not be available for all cities)
|
||||
occupation_percentage = Column(Float, nullable=True) # Road occupation %
|
||||
load_percentage = Column(Float, nullable=True) # Traffic load %
|
||||
pedestrian_count = Column(Integer, nullable=True) # Estimated pedestrian count
|
||||
|
||||
# Measurement point information
|
||||
measurement_point_id = Column(String(100), nullable=True, index=True)
|
||||
measurement_point_name = Column(String(500), nullable=True)
|
||||
measurement_point_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
||||
|
||||
# Geographic data
|
||||
latitude = Column(Float, nullable=True)
|
||||
longitude = Column(Float, nullable=True)
|
||||
district = Column(String(100), nullable=True) # City district/area
|
||||
zone = Column(String(100), nullable=True) # Traffic zone or sector
|
||||
|
||||
# Data source and quality
|
||||
source = Column(String(50), nullable=False, default="unknown") # madrid_opendata, synthetic, etc.
|
||||
data_quality_score = Column(Float, nullable=True) # Quality score 0-100
|
||||
is_synthetic = Column(Boolean, default=False)
|
||||
has_pedestrian_inference = Column(Boolean, default=False)
|
||||
|
||||
# City-specific data (flexible JSON storage)
|
||||
city_specific_data = Column(JSON, nullable=True) # Store city-specific fields
|
||||
|
||||
# Raw data backup
|
||||
raw_data = Column(Text, nullable=True) # Original data for debugging
|
||||
|
||||
# Audit fields
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # For multi-tenancy
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
# Performance-optimized indexes
|
||||
__table_args__ = (
|
||||
# Core query patterns
|
||||
Index('idx_traffic_location_date', 'location_id', 'date'),
|
||||
Index('idx_traffic_city_date', 'city', 'date'),
|
||||
Index('idx_traffic_tenant_date', 'tenant_id', 'date'),
|
||||
|
||||
# Advanced query patterns
|
||||
Index('idx_traffic_city_location', 'city', 'location_id'),
|
||||
Index('idx_traffic_measurement_point', 'city', 'measurement_point_id'),
|
||||
Index('idx_traffic_district_date', 'city', 'district', 'date'),
|
||||
|
||||
# Training data queries
|
||||
Index('idx_traffic_training', 'tenant_id', 'city', 'date', 'is_synthetic'),
|
||||
Index('idx_traffic_quality', 'city', 'data_quality_score', 'date'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert model to dictionary for API responses"""
|
||||
result = {
|
||||
'id': str(self.id),
|
||||
'location_id': self.location_id,
|
||||
'city': self.city,
|
||||
'date': self.date.isoformat() if self.date else None,
|
||||
'traffic_volume': self.traffic_volume,
|
||||
'congestion_level': self.congestion_level,
|
||||
'average_speed': self.average_speed,
|
||||
'occupation_percentage': self.occupation_percentage,
|
||||
'load_percentage': self.load_percentage,
|
||||
'pedestrian_count': self.pedestrian_count,
|
||||
'measurement_point_id': self.measurement_point_id,
|
||||
'measurement_point_name': self.measurement_point_name,
|
||||
'measurement_point_type': self.measurement_point_type,
|
||||
'latitude': self.latitude,
|
||||
'longitude': self.longitude,
|
||||
'district': self.district,
|
||||
'zone': self.zone,
|
||||
'source': self.source,
|
||||
'data_quality_score': self.data_quality_score,
|
||||
'is_synthetic': self.is_synthetic,
|
||||
'has_pedestrian_inference': self.has_pedestrian_inference,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
# Add city-specific data if present
|
||||
if self.city_specific_data:
|
||||
result['city_specific_data'] = self.city_specific_data
|
||||
|
||||
return result
|
||||
|
||||
def get_city_specific_field(self, field_name: str, default: Any = None) -> Any:
|
||||
"""Safely get city-specific field value"""
|
||||
if self.city_specific_data and isinstance(self.city_specific_data, dict):
|
||||
return self.city_specific_data.get(field_name, default)
|
||||
return default
|
||||
|
||||
def set_city_specific_field(self, field_name: str, value: Any) -> None:
|
||||
"""Set city-specific field value"""
|
||||
if not self.city_specific_data:
|
||||
self.city_specific_data = {}
|
||||
if not isinstance(self.city_specific_data, dict):
|
||||
self.city_specific_data = {}
|
||||
self.city_specific_data[field_name] = value
|
||||
|
||||
|
||||
class TrafficMeasurementPoint(Base):
|
||||
"""
|
||||
Registry of traffic measurement points across all cities
|
||||
Supports different city-specific measurement point schemas
|
||||
"""
|
||||
__tablename__ = "traffic_measurement_points"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Location and identification
|
||||
city = Column(String(50), nullable=False, index=True)
|
||||
measurement_point_id = Column(String(100), nullable=False, index=True) # City-specific ID
|
||||
name = Column(String(500), nullable=True)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
# Geographic information
|
||||
latitude = Column(Float, nullable=False)
|
||||
longitude = Column(Float, nullable=False)
|
||||
district = Column(String(100), nullable=True)
|
||||
zone = Column(String(100), nullable=True)
|
||||
|
||||
# Classification
|
||||
road_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
||||
measurement_type = Column(String(50), nullable=True) # intensity, speed, etc.
|
||||
point_category = Column(String(50), nullable=True) # urban, highway, ring_road
|
||||
|
||||
# Status and metadata
|
||||
is_active = Column(Boolean, default=True)
|
||||
installation_date = Column(DateTime(timezone=True), nullable=True)
|
||||
last_data_received = Column(DateTime(timezone=True), nullable=True)
|
||||
data_quality_rating = Column(Float, nullable=True) # Average quality 0-100
|
||||
|
||||
# City-specific point data
|
||||
city_specific_metadata = Column(JSON, nullable=True)
|
||||
|
||||
# Audit fields
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
# Ensure unique measurement points per city
|
||||
Index('idx_unique_city_point', 'city', 'measurement_point_id', unique=True),
|
||||
|
||||
# Geographic queries
|
||||
Index('idx_points_city_location', 'city', 'latitude', 'longitude'),
|
||||
Index('idx_points_district', 'city', 'district'),
|
||||
Index('idx_points_road_type', 'city', 'road_type'),
|
||||
|
||||
# Status queries
|
||||
Index('idx_points_active', 'city', 'is_active', 'last_data_received'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert measurement point to dictionary"""
|
||||
return {
|
||||
'id': str(self.id),
|
||||
'city': self.city,
|
||||
'measurement_point_id': self.measurement_point_id,
|
||||
'name': self.name,
|
||||
'description': self.description,
|
||||
'latitude': self.latitude,
|
||||
'longitude': self.longitude,
|
||||
'district': self.district,
|
||||
'zone': self.zone,
|
||||
'road_type': self.road_type,
|
||||
'measurement_type': self.measurement_type,
|
||||
'point_category': self.point_category,
|
||||
'is_active': self.is_active,
|
||||
'installation_date': self.installation_date.isoformat() if self.installation_date else None,
|
||||
'last_data_received': self.last_data_received.isoformat() if self.last_data_received else None,
|
||||
'data_quality_rating': self.data_quality_rating,
|
||||
'city_specific_metadata': self.city_specific_metadata,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
|
||||
class TrafficDataBackgroundJob(Base):
|
||||
"""
|
||||
Track background data collection jobs for multiple cities
|
||||
Supports scheduling and monitoring of data fetching processes
|
||||
"""
|
||||
__tablename__ = "traffic_background_jobs"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Job configuration
|
||||
job_type = Column(String(50), nullable=False) # historical_fetch, cleanup, etc.
|
||||
city = Column(String(50), nullable=False, index=True)
|
||||
location_pattern = Column(String(200), nullable=True) # Location pattern or specific coords
|
||||
|
||||
# Scheduling
|
||||
scheduled_at = Column(DateTime(timezone=True), nullable=False)
|
||||
started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Status tracking
|
||||
status = Column(String(20), nullable=False, default='pending') # pending, running, completed, failed
|
||||
progress_percentage = Column(Float, default=0.0)
|
||||
records_processed = Column(Integer, default=0)
|
||||
records_stored = Column(Integer, default=0)
|
||||
|
||||
# Date range for data jobs
|
||||
data_start_date = Column(DateTime(timezone=True), nullable=True)
|
||||
data_end_date = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Results and error handling
|
||||
success_count = Column(Integer, default=0)
|
||||
error_count = Column(Integer, default=0)
|
||||
error_message = Column(Text, nullable=True)
|
||||
job_metadata = Column(JSON, nullable=True) # Additional job-specific data
|
||||
|
||||
# Tenant association
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
||||
|
||||
# Audit fields
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
# Job monitoring
|
||||
Index('idx_jobs_city_status', 'city', 'status', 'scheduled_at'),
|
||||
Index('idx_jobs_tenant_status', 'tenant_id', 'status', 'scheduled_at'),
|
||||
Index('idx_jobs_type_city', 'job_type', 'city', 'scheduled_at'),
|
||||
|
||||
# Cleanup queries
|
||||
Index('idx_jobs_completed', 'status', 'completed_at'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert job to dictionary"""
|
||||
return {
|
||||
'id': str(self.id),
|
||||
'job_type': self.job_type,
|
||||
'city': self.city,
|
||||
'location_pattern': self.location_pattern,
|
||||
'scheduled_at': self.scheduled_at.isoformat() if self.scheduled_at else None,
|
||||
'started_at': self.started_at.isoformat() if self.started_at else None,
|
||||
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
||||
'status': self.status,
|
||||
'progress_percentage': self.progress_percentage,
|
||||
'records_processed': self.records_processed,
|
||||
'records_stored': self.records_stored,
|
||||
'data_start_date': self.data_start_date.isoformat() if self.data_start_date else None,
|
||||
'data_end_date': self.data_end_date.isoformat() if self.data_end_date else None,
|
||||
'success_count': self.success_count,
|
||||
'error_count': self.error_count,
|
||||
'error_message': self.error_message,
|
||||
'job_metadata': self.job_metadata,
|
||||
'tenant_id': str(self.tenant_id) if self.tenant_id else None,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None,
|
||||
'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
74
services/external/app/models/weather.py
vendored
Normal file
74
services/external/app/models/weather.py
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# ================================================================
|
||||
# services/data/app/models/weather.py
|
||||
# ================================================================
|
||||
"""Weather data models"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSON
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from shared.database.base import Base
|
||||
|
||||
class WeatherData(Base):
|
||||
__tablename__ = "weather_data"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
location_id = Column(String(100), nullable=False, index=True)
|
||||
city = Column(String(50), nullable=False)
|
||||
station_name = Column(String(200), nullable=True)
|
||||
latitude = Column(Float, nullable=True)
|
||||
longitude = Column(Float, nullable=True)
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
forecast_date = Column(DateTime(timezone=True), nullable=True)
|
||||
temperature = Column(Float, nullable=True) # Celsius
|
||||
temperature_min = Column(Float, nullable=True)
|
||||
temperature_max = Column(Float, nullable=True)
|
||||
feels_like = Column(Float, nullable=True)
|
||||
precipitation = Column(Float, nullable=True) # mm
|
||||
precipitation_probability = Column(Float, nullable=True)
|
||||
humidity = Column(Float, nullable=True) # percentage
|
||||
wind_speed = Column(Float, nullable=True) # km/h
|
||||
wind_direction = Column(Float, nullable=True)
|
||||
wind_gust = Column(Float, nullable=True)
|
||||
pressure = Column(Float, nullable=True) # hPa
|
||||
visibility = Column(Float, nullable=True)
|
||||
uv_index = Column(Float, nullable=True)
|
||||
cloud_cover = Column(Float, nullable=True)
|
||||
condition = Column(String(100), nullable=True)
|
||||
description = Column(String(200), nullable=True)
|
||||
weather_code = Column(String(20), nullable=True)
|
||||
source = Column(String(50), nullable=False, default="aemet")
|
||||
data_type = Column(String(20), nullable=False)
|
||||
is_forecast = Column(Boolean, nullable=True)
|
||||
data_quality_score = Column(Float, nullable=True)
|
||||
raw_data = Column(JSON, nullable=True)
|
||||
processed_data = Column(JSON, nullable=True)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_weather_location_date', 'location_id', 'date'),
|
||||
)
|
||||
|
||||
class WeatherForecast(Base):
|
||||
__tablename__ = "weather_forecasts"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
location_id = Column(String(100), nullable=False, index=True)
|
||||
forecast_date = Column(DateTime(timezone=True), nullable=False)
|
||||
generated_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
|
||||
temperature = Column(Float, nullable=True)
|
||||
precipitation = Column(Float, nullable=True)
|
||||
humidity = Column(Float, nullable=True)
|
||||
wind_speed = Column(Float, nullable=True)
|
||||
description = Column(String(200), nullable=True)
|
||||
source = Column(String(50), nullable=False, default="aemet")
|
||||
raw_data = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_forecast_location_date', 'location_id', 'forecast_date'),
|
||||
)
|
||||
0
services/external/app/repositories/__init__.py
vendored
Normal file
0
services/external/app/repositories/__init__.py
vendored
Normal file
191
services/external/app/repositories/traffic_repository.py
vendored
Normal file
191
services/external/app/repositories/traffic_repository.py
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
# ================================================================
|
||||
# services/data/app/repositories/traffic_repository.py
|
||||
# ================================================================
|
||||
"""
|
||||
Traffic Repository - Enhanced for multiple cities with comprehensive data access patterns
|
||||
Follows existing repository architecture while adding city-specific functionality
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Any, Type, Tuple
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, or_, func, desc, asc, text, update, delete
|
||||
from sqlalchemy.orm import selectinload
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import structlog
|
||||
|
||||
from app.models.traffic import TrafficData
|
||||
from app.schemas.traffic import TrafficDataCreate, TrafficDataResponse
|
||||
from shared.database.exceptions import DatabaseError, ValidationError
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class TrafficRepository:
|
||||
"""
|
||||
Enhanced repository for traffic data operations across multiple cities
|
||||
Provides city-aware queries and advanced traffic analytics
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
self.model = TrafficData
|
||||
|
||||
# ================================================================
|
||||
# CORE TRAFFIC DATA OPERATIONS
|
||||
# ================================================================
|
||||
|
||||
async def get_by_location_and_date_range(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[TrafficData]:
|
||||
"""Get traffic data by location and date range"""
|
||||
try:
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
# Build base query
|
||||
query = select(self.model).where(self.model.location_id == location_id)
|
||||
|
||||
# Add tenant filter if specified
|
||||
if tenant_id:
|
||||
query = query.where(self.model.tenant_id == tenant_id)
|
||||
|
||||
# Add date range filters
|
||||
if start_date:
|
||||
query = query.where(self.model.date >= start_date)
|
||||
|
||||
if end_date:
|
||||
query = query.where(self.model.date <= end_date)
|
||||
|
||||
# Order by date
|
||||
query = query.order_by(self.model.date)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
return result.scalars().all()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get traffic data by location and date range",
|
||||
latitude=latitude, longitude=longitude,
|
||||
error=str(e))
|
||||
raise DatabaseError(f"Failed to get traffic data: {str(e)}")
|
||||
|
||||
async def store_traffic_data_batch(
|
||||
self,
|
||||
traffic_data_list: List[Dict[str, Any]],
|
||||
location_id: str,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> int:
|
||||
"""Store a batch of traffic data records with enhanced validation and duplicate handling."""
|
||||
stored_count = 0
|
||||
try:
|
||||
if not traffic_data_list:
|
||||
return 0
|
||||
|
||||
# Check for existing records to avoid duplicates
|
||||
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
|
||||
existing_dates = set()
|
||||
if dates:
|
||||
existing_stmt = select(TrafficData.date).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date.in_(dates)
|
||||
)
|
||||
)
|
||||
result = await self.session.execute(existing_stmt)
|
||||
existing_dates = {row[0] for row in result.fetchall()}
|
||||
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
|
||||
|
||||
batch_records = []
|
||||
for data in traffic_data_list:
|
||||
record_date = data.get('date')
|
||||
if not record_date or record_date in existing_dates:
|
||||
continue # Skip duplicates
|
||||
|
||||
# Validate data before preparing for insertion
|
||||
if self._validate_traffic_data(data):
|
||||
batch_records.append({
|
||||
'location_id': location_id,
|
||||
'city': data.get('city', 'madrid'), # Default to madrid for historical data
|
||||
'tenant_id': tenant_id, # Include tenant_id in batch insert
|
||||
'date': record_date,
|
||||
'traffic_volume': data.get('traffic_volume'),
|
||||
'pedestrian_count': data.get('pedestrian_count'),
|
||||
'congestion_level': data.get('congestion_level'),
|
||||
'average_speed': data.get('average_speed'),
|
||||
'source': data.get('source', 'unknown'),
|
||||
'raw_data': str(data)
|
||||
})
|
||||
|
||||
if batch_records:
|
||||
# Use bulk insert for performance
|
||||
await self.session.execute(
|
||||
TrafficData.__table__.insert(),
|
||||
batch_records
|
||||
)
|
||||
await self.session.commit()
|
||||
stored_count = len(batch_records)
|
||||
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to store traffic data batch",
|
||||
error=str(e), location_id=location_id)
|
||||
await self.session.rollback()
|
||||
raise DatabaseError(f"Batch store failed: {str(e)}")
|
||||
|
||||
return stored_count
|
||||
|
||||
def _validate_traffic_data(self, data: Dict[str, Any]) -> bool:
|
||||
"""Validate traffic data before storage"""
|
||||
required_fields = ['date']
|
||||
|
||||
# Check required fields
|
||||
for field in required_fields:
|
||||
if not data.get(field):
|
||||
return False
|
||||
|
||||
# Validate data types and ranges
|
||||
traffic_volume = data.get('traffic_volume')
|
||||
if traffic_volume is not None and (traffic_volume < 0 or traffic_volume > 10000):
|
||||
return False
|
||||
|
||||
pedestrian_count = data.get('pedestrian_count')
|
||||
if pedestrian_count is not None and (pedestrian_count < 0 or pedestrian_count > 10000):
|
||||
return False
|
||||
|
||||
average_speed = data.get('average_speed')
|
||||
if average_speed is not None and (average_speed < 0 or average_speed > 200):
|
||||
return False
|
||||
|
||||
congestion_level = data.get('congestion_level')
|
||||
if congestion_level and congestion_level not in ['low', 'medium', 'high', 'blocked']:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def get_historical_traffic_for_training(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[TrafficData]:
|
||||
"""Retrieve stored traffic data for training ML models."""
|
||||
try:
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
stmt = select(TrafficData).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date >= start_date,
|
||||
TrafficData.date <= end_date
|
||||
)
|
||||
).order_by(TrafficData.date)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalars().all()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to retrieve traffic data for training",
|
||||
error=str(e), location_id=location_id)
|
||||
raise DatabaseError(f"Training data retrieval failed: {str(e)}")
|
||||
138
services/external/app/repositories/weather_repository.py
vendored
Normal file
138
services/external/app/repositories/weather_repository.py
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
# services/external/app/repositories/weather_repository.py
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
import json
|
||||
|
||||
from app.models.weather import WeatherData
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class WeatherRepository:
|
||||
"""
|
||||
Repository for weather data operations, adapted for WeatherService.
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def get_historical_weather(self,
|
||||
location_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[WeatherData]:
|
||||
"""
|
||||
Retrieves historical weather data for a specific location and date range.
|
||||
This method directly supports the data retrieval logic in WeatherService.
|
||||
"""
|
||||
try:
|
||||
stmt = select(WeatherData).where(
|
||||
and_(
|
||||
WeatherData.location_id == location_id,
|
||||
WeatherData.date >= start_date,
|
||||
WeatherData.date <= end_date
|
||||
)
|
||||
).order_by(WeatherData.date)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
records = result.scalars().all()
|
||||
logger.debug(f"Retrieved {len(records)} historical records for location {location_id}")
|
||||
return list(records)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to get historical weather from repository",
|
||||
error=str(e),
|
||||
location_id=location_id
|
||||
)
|
||||
raise
|
||||
|
||||
def _serialize_json_fields(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Serialize JSON fields (raw_data, processed_data) to ensure proper JSON storage
|
||||
"""
|
||||
serialized = data.copy()
|
||||
|
||||
# Serialize raw_data if present
|
||||
if 'raw_data' in serialized and serialized['raw_data'] is not None:
|
||||
if not isinstance(serialized['raw_data'], str):
|
||||
try:
|
||||
# Convert datetime objects to strings for JSON serialization
|
||||
raw_data = serialized['raw_data']
|
||||
if isinstance(raw_data, dict):
|
||||
# Handle datetime objects in the dict
|
||||
json_safe_data = {}
|
||||
for k, v in raw_data.items():
|
||||
if hasattr(v, 'isoformat'): # datetime-like object
|
||||
json_safe_data[k] = v.isoformat()
|
||||
else:
|
||||
json_safe_data[k] = v
|
||||
serialized['raw_data'] = json_safe_data
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not serialize raw_data, storing as string: {e}")
|
||||
serialized['raw_data'] = str(raw_data)
|
||||
|
||||
# Serialize processed_data if present
|
||||
if 'processed_data' in serialized and serialized['processed_data'] is not None:
|
||||
if not isinstance(serialized['processed_data'], str):
|
||||
try:
|
||||
processed_data = serialized['processed_data']
|
||||
if isinstance(processed_data, dict):
|
||||
json_safe_data = {}
|
||||
for k, v in processed_data.items():
|
||||
if hasattr(v, 'isoformat'): # datetime-like object
|
||||
json_safe_data[k] = v.isoformat()
|
||||
else:
|
||||
json_safe_data[k] = v
|
||||
serialized['processed_data'] = json_safe_data
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not serialize processed_data, storing as string: {e}")
|
||||
serialized['processed_data'] = str(processed_data)
|
||||
|
||||
return serialized
|
||||
|
||||
async def bulk_create_weather_data(self, weather_records: List[Dict[str, Any]]) -> None:
|
||||
"""
|
||||
Bulk inserts new weather records into the database.
|
||||
Used by WeatherService after fetching new historical data from an external API.
|
||||
"""
|
||||
try:
|
||||
if not weather_records:
|
||||
return
|
||||
|
||||
# Serialize JSON fields before creating model instances
|
||||
serialized_records = [self._serialize_json_fields(data) for data in weather_records]
|
||||
records = [WeatherData(**data) for data in serialized_records]
|
||||
self.session.add_all(records)
|
||||
await self.session.commit()
|
||||
logger.info(f"Successfully bulk inserted {len(records)} weather records")
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Failed to bulk create weather records",
|
||||
error=str(e),
|
||||
count=len(weather_records)
|
||||
)
|
||||
raise
|
||||
|
||||
async def create_weather_data(self, data: Dict[str, Any]) -> WeatherData:
|
||||
"""
|
||||
Creates a single new weather data record.
|
||||
"""
|
||||
try:
|
||||
# Serialize JSON fields before creating model instance
|
||||
serialized_data = self._serialize_json_fields(data)
|
||||
new_record = WeatherData(**serialized_data)
|
||||
self.session.add(new_record)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(new_record)
|
||||
logger.info(f"Created new weather record with ID {new_record.id}")
|
||||
return new_record
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error("Failed to create single weather record", error=str(e))
|
||||
raise
|
||||
1
services/external/app/schemas/__init__.py
vendored
Normal file
1
services/external/app/schemas/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/schemas/__init__.py
|
||||
100
services/external/app/schemas/traffic.py
vendored
Normal file
100
services/external/app/schemas/traffic.py
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
# services/external/app/schemas/traffic.py
|
||||
"""
|
||||
Traffic Service Pydantic Schemas
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from uuid import UUID
|
||||
|
||||
class TrafficDataBase(BaseModel):
|
||||
"""Base traffic data schema"""
|
||||
location_id: str = Field(..., max_length=100, description="Traffic monitoring location ID")
|
||||
date: datetime = Field(..., description="Date and time of traffic measurement")
|
||||
traffic_volume: Optional[int] = Field(None, ge=0, description="Vehicles per hour")
|
||||
pedestrian_count: Optional[int] = Field(None, ge=0, description="Pedestrians per hour")
|
||||
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$", description="Traffic congestion level")
|
||||
average_speed: Optional[float] = Field(None, ge=0, le=200, description="Average speed in km/h")
|
||||
source: str = Field("madrid_opendata", max_length=50, description="Data source")
|
||||
raw_data: Optional[str] = Field(None, description="Raw data from source")
|
||||
|
||||
class TrafficDataCreate(TrafficDataBase):
|
||||
"""Schema for creating traffic data"""
|
||||
pass
|
||||
|
||||
class TrafficDataUpdate(BaseModel):
|
||||
"""Schema for updating traffic data"""
|
||||
traffic_volume: Optional[int] = Field(None, ge=0)
|
||||
pedestrian_count: Optional[int] = Field(None, ge=0)
|
||||
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$")
|
||||
average_speed: Optional[float] = Field(None, ge=0, le=200)
|
||||
raw_data: Optional[str] = None
|
||||
|
||||
class TrafficDataResponse(TrafficDataBase):
|
||||
"""Schema for traffic data responses"""
|
||||
id: str = Field(..., description="Unique identifier")
|
||||
created_at: datetime = Field(..., description="Creation timestamp")
|
||||
updated_at: datetime = Field(..., description="Last update timestamp")
|
||||
|
||||
@field_validator('id', mode='before')
|
||||
@classmethod
|
||||
def convert_uuid_to_string(cls, v):
|
||||
if isinstance(v, UUID):
|
||||
return str(v)
|
||||
return v
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
|
||||
class TrafficDataList(BaseModel):
|
||||
"""Schema for paginated traffic data responses"""
|
||||
data: List[TrafficDataResponse]
|
||||
total: int = Field(..., description="Total number of records")
|
||||
page: int = Field(..., description="Current page number")
|
||||
per_page: int = Field(..., description="Records per page")
|
||||
has_next: bool = Field(..., description="Whether there are more pages")
|
||||
has_prev: bool = Field(..., description="Whether there are previous pages")
|
||||
|
||||
class TrafficAnalytics(BaseModel):
|
||||
"""Schema for traffic analytics"""
|
||||
location_id: str
|
||||
period_start: datetime
|
||||
period_end: datetime
|
||||
avg_traffic_volume: Optional[float] = None
|
||||
avg_pedestrian_count: Optional[float] = None
|
||||
peak_traffic_hour: Optional[int] = None
|
||||
peak_pedestrian_hour: Optional[int] = None
|
||||
congestion_distribution: dict = Field(default_factory=dict)
|
||||
avg_speed: Optional[float] = None
|
||||
|
||||
class TrafficDataResponse(BaseModel):
|
||||
date: datetime
|
||||
traffic_volume: Optional[int]
|
||||
pedestrian_count: Optional[int]
|
||||
congestion_level: Optional[str]
|
||||
average_speed: Optional[float]
|
||||
source: str
|
||||
|
||||
class LocationRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
address: Optional[str] = None
|
||||
|
||||
class DateRangeRequest(BaseModel):
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class HistoricalTrafficRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class TrafficForecastRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
hours: int = 24
|
||||
161
services/external/app/schemas/weather.py
vendored
Normal file
161
services/external/app/schemas/weather.py
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
# services/external/app/schemas/weather.py
|
||||
"""Weather data schemas"""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from uuid import UUID
|
||||
|
||||
class WeatherDataBase(BaseModel):
|
||||
"""Base weather data schema"""
|
||||
location_id: str = Field(..., max_length=100, description="Weather monitoring location ID")
|
||||
date: datetime = Field(..., description="Date and time of weather measurement")
|
||||
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Temperature in Celsius")
|
||||
precipitation: Optional[float] = Field(None, ge=0, description="Precipitation in mm")
|
||||
humidity: Optional[float] = Field(None, ge=0, le=100, description="Humidity percentage")
|
||||
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Wind speed in km/h")
|
||||
pressure: Optional[float] = Field(None, ge=800, le=1200, description="Atmospheric pressure in hPa")
|
||||
description: Optional[str] = Field(None, max_length=200, description="Weather description")
|
||||
source: str = Field("aemet", max_length=50, description="Data source")
|
||||
raw_data: Optional[str] = Field(None, description="Raw data from source")
|
||||
|
||||
class WeatherDataCreate(WeatherDataBase):
|
||||
"""Schema for creating weather data"""
|
||||
pass
|
||||
|
||||
class WeatherDataUpdate(BaseModel):
|
||||
"""Schema for updating weather data"""
|
||||
temperature: Optional[float] = Field(None, ge=-50, le=60)
|
||||
precipitation: Optional[float] = Field(None, ge=0)
|
||||
humidity: Optional[float] = Field(None, ge=0, le=100)
|
||||
wind_speed: Optional[float] = Field(None, ge=0, le=200)
|
||||
pressure: Optional[float] = Field(None, ge=800, le=1200)
|
||||
description: Optional[str] = Field(None, max_length=200)
|
||||
raw_data: Optional[str] = None
|
||||
|
||||
class WeatherDataResponse(WeatherDataBase):
|
||||
"""Schema for weather data responses"""
|
||||
id: str = Field(..., description="Unique identifier")
|
||||
created_at: datetime = Field(..., description="Creation timestamp")
|
||||
updated_at: datetime = Field(..., description="Last update timestamp")
|
||||
|
||||
@field_validator('id', mode='before')
|
||||
@classmethod
|
||||
def convert_uuid_to_string(cls, v):
|
||||
if isinstance(v, UUID):
|
||||
return str(v)
|
||||
return v
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
|
||||
class WeatherForecastBase(BaseModel):
|
||||
"""Base weather forecast schema"""
|
||||
location_id: str = Field(..., max_length=100, description="Location ID")
|
||||
forecast_date: datetime = Field(..., description="Date for forecast")
|
||||
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Forecasted temperature")
|
||||
precipitation: Optional[float] = Field(None, ge=0, description="Forecasted precipitation")
|
||||
humidity: Optional[float] = Field(None, ge=0, le=100, description="Forecasted humidity")
|
||||
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Forecasted wind speed")
|
||||
description: Optional[str] = Field(None, max_length=200, description="Forecast description")
|
||||
source: str = Field("aemet", max_length=50, description="Data source")
|
||||
raw_data: Optional[str] = Field(None, description="Raw forecast data")
|
||||
|
||||
class WeatherForecastCreate(WeatherForecastBase):
|
||||
"""Schema for creating weather forecasts"""
|
||||
pass
|
||||
|
||||
class WeatherForecastResponse(WeatherForecastBase):
|
||||
"""Schema for weather forecast responses"""
|
||||
id: str = Field(..., description="Unique identifier")
|
||||
generated_at: datetime = Field(..., description="When forecast was generated")
|
||||
created_at: datetime = Field(..., description="Creation timestamp")
|
||||
updated_at: datetime = Field(..., description="Last update timestamp")
|
||||
|
||||
@field_validator('id', mode='before')
|
||||
@classmethod
|
||||
def convert_uuid_to_string(cls, v):
|
||||
if isinstance(v, UUID):
|
||||
return str(v)
|
||||
return v
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
|
||||
class WeatherDataList(BaseModel):
|
||||
"""Schema for paginated weather data responses"""
|
||||
data: List[WeatherDataResponse]
|
||||
total: int = Field(..., description="Total number of records")
|
||||
page: int = Field(..., description="Current page number")
|
||||
per_page: int = Field(..., description="Records per page")
|
||||
has_next: bool = Field(..., description="Whether there are more pages")
|
||||
has_prev: bool = Field(..., description="Whether there are previous pages")
|
||||
|
||||
class WeatherForecastList(BaseModel):
|
||||
"""Schema for paginated weather forecast responses"""
|
||||
forecasts: List[WeatherForecastResponse]
|
||||
total: int = Field(..., description="Total number of forecasts")
|
||||
page: int = Field(..., description="Current page number")
|
||||
per_page: int = Field(..., description="Forecasts per page")
|
||||
|
||||
class WeatherAnalytics(BaseModel):
|
||||
"""Schema for weather analytics"""
|
||||
location_id: str
|
||||
period_start: datetime
|
||||
period_end: datetime
|
||||
avg_temperature: Optional[float] = None
|
||||
min_temperature: Optional[float] = None
|
||||
max_temperature: Optional[float] = None
|
||||
total_precipitation: Optional[float] = None
|
||||
avg_humidity: Optional[float] = None
|
||||
avg_wind_speed: Optional[float] = None
|
||||
avg_pressure: Optional[float] = None
|
||||
weather_conditions: dict = Field(default_factory=dict)
|
||||
rainy_days: int = 0
|
||||
sunny_days: int = 0
|
||||
|
||||
class WeatherDataResponse(BaseModel):
|
||||
date: datetime
|
||||
temperature: Optional[float]
|
||||
precipitation: Optional[float]
|
||||
humidity: Optional[float]
|
||||
wind_speed: Optional[float]
|
||||
pressure: Optional[float]
|
||||
description: Optional[str]
|
||||
source: str
|
||||
|
||||
class WeatherForecastResponse(BaseModel):
|
||||
forecast_date: datetime
|
||||
generated_at: datetime
|
||||
temperature: Optional[float]
|
||||
precipitation: Optional[float]
|
||||
humidity: Optional[float]
|
||||
wind_speed: Optional[float]
|
||||
description: Optional[str]
|
||||
source: str
|
||||
|
||||
class LocationRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
address: Optional[str] = None
|
||||
|
||||
class DateRangeRequest(BaseModel):
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class HistoricalWeatherRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class WeatherForecastRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
days: int
|
||||
1
services/external/app/services/__init__.py
vendored
Normal file
1
services/external/app/services/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/services/__init__.py
|
||||
63
services/external/app/services/messaging.py
vendored
Normal file
63
services/external/app/services/messaging.py
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
# services/external/app/services/messaging.py
|
||||
"""
|
||||
External Service Messaging - Event Publishing using shared messaging infrastructure
|
||||
"""
|
||||
|
||||
from shared.messaging.rabbitmq import RabbitMQClient
|
||||
from app.core.config import settings
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Single global instance
|
||||
data_publisher = RabbitMQClient(settings.RABBITMQ_URL, "data-service")
|
||||
|
||||
async def setup_messaging():
|
||||
"""Initialize messaging for data service"""
|
||||
try:
|
||||
success = await data_publisher.connect()
|
||||
if success:
|
||||
logger.info("Data service messaging initialized")
|
||||
else:
|
||||
logger.warning("Data service messaging failed to initialize")
|
||||
return success
|
||||
except Exception as e:
|
||||
logger.warning("Failed to setup messaging", error=str(e))
|
||||
return False
|
||||
|
||||
async def cleanup_messaging():
|
||||
"""Cleanup messaging for data service"""
|
||||
try:
|
||||
await data_publisher.disconnect()
|
||||
logger.info("Data service messaging cleaned up")
|
||||
except Exception as e:
|
||||
logger.warning("Error during messaging cleanup", error=str(e))
|
||||
|
||||
async def publish_weather_updated(data: dict) -> bool:
|
||||
"""Publish weather updated event"""
|
||||
try:
|
||||
return await data_publisher.publish_data_event("weather.updated", data)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish weather updated event", error=str(e))
|
||||
return False
|
||||
|
||||
async def publish_traffic_updated(data: dict) -> bool:
|
||||
"""Publish traffic updated event"""
|
||||
try:
|
||||
return await data_publisher.publish_data_event("traffic.updated", data)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish traffic updated event", error=str(e))
|
||||
return False
|
||||
|
||||
|
||||
|
||||
# Health check for messaging
|
||||
async def check_messaging_health() -> dict:
|
||||
"""Check messaging system health"""
|
||||
try:
|
||||
if data_publisher.connected:
|
||||
return {"status": "healthy", "service": "rabbitmq", "connected": True}
|
||||
else:
|
||||
return {"status": "unhealthy", "service": "rabbitmq", "connected": False, "error": "Not connected"}
|
||||
except Exception as e:
|
||||
return {"status": "unhealthy", "service": "rabbitmq", "connected": False, "error": str(e)}
|
||||
298
services/external/app/services/traffic_service.py
vendored
Normal file
298
services/external/app/services/traffic_service.py
vendored
Normal file
@@ -0,0 +1,298 @@
|
||||
# ================================================================
|
||||
# services/data/app/services/traffic_service.py
|
||||
# ================================================================
|
||||
"""
|
||||
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.external.apis.traffic import UniversalTrafficClient
|
||||
from app.models.traffic import TrafficData
|
||||
from app.repositories.traffic_repository import TrafficRepository
|
||||
|
||||
logger = structlog.get_logger()
|
||||
from app.core.database import database_manager
|
||||
|
||||
class TrafficService:
|
||||
"""
|
||||
Abstracted traffic service providing unified interface for traffic data
|
||||
Routes requests to appropriate city-specific clients automatically
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.universal_client = UniversalTrafficClient()
|
||||
self.database_manager = database_manager
|
||||
|
||||
async def get_current_traffic(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get current traffic data for any supported location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
tenant_id: Optional tenant identifier for logging/analytics
|
||||
|
||||
Returns:
|
||||
Dict with current traffic data or None if not available
|
||||
"""
|
||||
try:
|
||||
logger.info("Getting current traffic data",
|
||||
lat=latitude, lon=longitude, tenant_id=tenant_id)
|
||||
|
||||
# Delegate to universal client
|
||||
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
|
||||
|
||||
if traffic_data:
|
||||
# Add service metadata
|
||||
traffic_data['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude}
|
||||
}
|
||||
|
||||
logger.info("Successfully retrieved current traffic data",
|
||||
lat=latitude, lon=longitude,
|
||||
source=traffic_data.get('source', 'unknown'))
|
||||
|
||||
return traffic_data
|
||||
else:
|
||||
logger.warning("No current traffic data available",
|
||||
lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting current traffic data",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return None
|
||||
|
||||
async def get_historical_traffic(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get historical traffic data for any supported location with database storage
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
start_date: Start date for historical data
|
||||
end_date: End date for historical data
|
||||
tenant_id: Optional tenant identifier
|
||||
|
||||
Returns:
|
||||
List of historical traffic data dictionaries
|
||||
"""
|
||||
try:
|
||||
logger.info("Getting historical traffic data",
|
||||
lat=latitude, lon=longitude,
|
||||
start=start_date, end=end_date, tenant_id=tenant_id)
|
||||
|
||||
# Validate date range
|
||||
if start_date >= end_date:
|
||||
logger.warning("Invalid date range", start=start_date, end=end_date)
|
||||
return []
|
||||
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
# Check database first using the repository
|
||||
db_records = await traffic_repo.get_by_location_and_date_range(
|
||||
latitude, longitude, start_date, end_date, tenant_id
|
||||
)
|
||||
|
||||
if db_records:
|
||||
logger.info("Historical traffic data found in database",
|
||||
count=len(db_records))
|
||||
return [self._convert_db_record_to_dict(record) for record in db_records]
|
||||
|
||||
# Delegate to universal client if not in DB
|
||||
traffic_data = await self.universal_client.get_historical_traffic(
|
||||
latitude, longitude, start_date, end_date
|
||||
)
|
||||
|
||||
if traffic_data:
|
||||
# Add service metadata to each record
|
||||
for record in traffic_data:
|
||||
record['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'date_range': {
|
||||
'start': start_date.isoformat(),
|
||||
'end': end_date.isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
# Store in database using the repository
|
||||
stored_count = await traffic_repo.store_traffic_data_batch(
|
||||
traffic_data, location_id, tenant_id
|
||||
)
|
||||
logger.info("Traffic data stored for re-training",
|
||||
fetched=len(traffic_data), stored=stored_count,
|
||||
location=location_id)
|
||||
|
||||
logger.info("Successfully retrieved historical traffic data",
|
||||
lat=latitude, lon=longitude, records=len(traffic_data))
|
||||
|
||||
return traffic_data
|
||||
else:
|
||||
logger.info("No historical traffic data available",
|
||||
lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting historical traffic data",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
|
||||
"""Convert database record to dictionary format"""
|
||||
return {
|
||||
'date': record.date,
|
||||
'traffic_volume': record.traffic_volume,
|
||||
'pedestrian_count': record.pedestrian_count,
|
||||
'congestion_level': record.congestion_level,
|
||||
'average_speed': record.average_speed,
|
||||
'source': record.source,
|
||||
'location_id': record.location_id,
|
||||
'raw_data': record.raw_data
|
||||
}
|
||||
|
||||
async def get_traffic_events(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
radius_km: float = 5.0,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get traffic events and incidents for any supported location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
radius_km: Search radius in kilometers
|
||||
tenant_id: Optional tenant identifier
|
||||
|
||||
Returns:
|
||||
List of traffic events
|
||||
"""
|
||||
try:
|
||||
logger.info("Getting traffic events",
|
||||
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
|
||||
|
||||
# Delegate to universal client
|
||||
events = await self.universal_client.get_events(latitude, longitude, radius_km)
|
||||
|
||||
# Add metadata to events
|
||||
for event in events:
|
||||
event['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'search_radius_km': radius_km
|
||||
}
|
||||
|
||||
logger.info("Retrieved traffic events",
|
||||
lat=latitude, lon=longitude, events=len(events))
|
||||
|
||||
return events
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting traffic events",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about traffic data availability for location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
|
||||
Returns:
|
||||
Dict with location support information
|
||||
"""
|
||||
try:
|
||||
info = self.universal_client.get_location_info(latitude, longitude)
|
||||
|
||||
# Add service layer information
|
||||
info['service_layer'] = {
|
||||
'version': '2.0',
|
||||
'abstraction_level': 'universal',
|
||||
'supported_operations': [
|
||||
'current_traffic',
|
||||
'historical_traffic',
|
||||
'traffic_events',
|
||||
'bulk_requests'
|
||||
]
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting location info",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return {
|
||||
'supported': False,
|
||||
'error': str(e),
|
||||
'service_layer': {'version': '2.0'}
|
||||
}
|
||||
|
||||
async def get_stored_traffic_for_training(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Retrieve stored traffic data specifically for training purposes"""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
records = await traffic_repo.get_historical_traffic_for_training(
|
||||
latitude, longitude, start_date, end_date
|
||||
)
|
||||
|
||||
# Convert to training format
|
||||
training_data = []
|
||||
for record in records:
|
||||
training_data.append({
|
||||
'date': record.date,
|
||||
'traffic_volume': record.traffic_volume,
|
||||
'pedestrian_count': record.pedestrian_count,
|
||||
'congestion_level': record.congestion_level,
|
||||
'average_speed': record.average_speed,
|
||||
'location_id': record.location_id,
|
||||
'source': record.source,
|
||||
'measurement_point_id': record.raw_data # Contains additional metadata
|
||||
})
|
||||
|
||||
logger.info(f"Retrieved {len(training_data)} traffic records for training",
|
||||
location_id=f"{latitude:.4f},{longitude:.4f}", start=start_date, end=end_date)
|
||||
|
||||
return training_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to retrieve traffic data for training",
|
||||
error=str(e), location_id=f"{latitude:.4f},{longitude:.4f}")
|
||||
return []
|
||||
154
services/external/app/services/weather_service.py
vendored
Normal file
154
services/external/app/services/weather_service.py
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
# services/data/app/services/weather_service.py - REVISED VERSION
|
||||
|
||||
"""Weather data service with repository pattern"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.models.weather import WeatherData, WeatherForecast
|
||||
from app.external.aemet import AEMETClient
|
||||
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse
|
||||
from app.repositories.weather_repository import WeatherRepository
|
||||
|
||||
logger = structlog.get_logger()
|
||||
from app.core.database import database_manager
|
||||
|
||||
class WeatherService:
|
||||
|
||||
def __init__(self):
|
||||
self.aemet_client = AEMETClient()
|
||||
self.database_manager = database_manager
|
||||
|
||||
async def get_current_weather(self, latitude: float, longitude: float) -> Optional[WeatherDataResponse]:
|
||||
"""Get current weather for location"""
|
||||
try:
|
||||
logger.debug("Getting current weather", lat=latitude, lon=longitude)
|
||||
weather_data = await self.aemet_client.get_current_weather(latitude, longitude)
|
||||
|
||||
if weather_data:
|
||||
logger.debug("Weather data received", source=weather_data.get('source'))
|
||||
return WeatherDataResponse(**weather_data)
|
||||
else:
|
||||
logger.warning("No weather data received from AEMET client")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get current weather", error=str(e), lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
async def get_weather_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[WeatherForecastResponse]:
|
||||
"""Get weather forecast for location"""
|
||||
try:
|
||||
logger.debug("Getting weather forecast", lat=latitude, lon=longitude, days=days)
|
||||
forecast_data = await self.aemet_client.get_forecast(latitude, longitude, days)
|
||||
|
||||
if forecast_data:
|
||||
logger.debug("Forecast data received", count=len(forecast_data))
|
||||
# Validate each forecast item before creating response
|
||||
valid_forecasts = []
|
||||
for item in forecast_data:
|
||||
try:
|
||||
if isinstance(item, dict):
|
||||
# Ensure required fields are present
|
||||
forecast_item = {
|
||||
"forecast_date": item.get("forecast_date", datetime.now()),
|
||||
"generated_at": item.get("generated_at", datetime.now()),
|
||||
"temperature": float(item.get("temperature", 15.0)),
|
||||
"precipitation": float(item.get("precipitation", 0.0)),
|
||||
"humidity": float(item.get("humidity", 50.0)),
|
||||
"wind_speed": float(item.get("wind_speed", 10.0)),
|
||||
"description": str(item.get("description", "Variable")),
|
||||
"source": str(item.get("source", "unknown"))
|
||||
}
|
||||
valid_forecasts.append(WeatherForecastResponse(**forecast_item))
|
||||
else:
|
||||
logger.warning("Invalid forecast item type", item_type=type(item))
|
||||
except Exception as item_error:
|
||||
logger.warning("Error processing forecast item", error=str(item_error), item=item)
|
||||
continue
|
||||
|
||||
logger.debug("Valid forecasts processed", count=len(valid_forecasts))
|
||||
return valid_forecasts
|
||||
else:
|
||||
logger.warning("No forecast data received from AEMET client")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get weather forecast", error=str(e), lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
async def get_historical_weather(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[WeatherDataResponse]:
|
||||
"""Get historical weather data"""
|
||||
try:
|
||||
logger.debug("Getting historical weather",
|
||||
lat=latitude, lon=longitude,
|
||||
start=start_date, end=end_date)
|
||||
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
weather_repository = WeatherRepository(session)
|
||||
# Use the repository to get data from the database
|
||||
db_records = await weather_repository.get_historical_weather(
|
||||
location_id,
|
||||
start_date,
|
||||
end_date
|
||||
)
|
||||
|
||||
if db_records:
|
||||
logger.debug("Historical data found in database", count=len(db_records))
|
||||
return [WeatherDataResponse(
|
||||
date=record.date,
|
||||
temperature=record.temperature,
|
||||
precipitation=record.precipitation,
|
||||
humidity=record.humidity,
|
||||
wind_speed=record.wind_speed,
|
||||
pressure=record.pressure,
|
||||
description=record.description,
|
||||
source=record.source
|
||||
) for record in db_records]
|
||||
|
||||
# If not in database, fetch from API and store
|
||||
logger.debug("Fetching historical data from AEMET API")
|
||||
weather_data = await self.aemet_client.get_historical_weather(
|
||||
latitude, longitude, start_date, end_date
|
||||
)
|
||||
|
||||
if weather_data:
|
||||
# Use the repository to store the new data
|
||||
records_to_store = [{
|
||||
"location_id": location_id,
|
||||
"city": "Madrid", # Default city for AEMET data
|
||||
"date": data.get('date', datetime.now()),
|
||||
"temperature": data.get('temperature'),
|
||||
"precipitation": data.get('precipitation'),
|
||||
"humidity": data.get('humidity'),
|
||||
"wind_speed": data.get('wind_speed'),
|
||||
"pressure": data.get('pressure'),
|
||||
"description": data.get('description'),
|
||||
"source": "aemet",
|
||||
"data_type": "historical",
|
||||
"raw_data": data, # Pass as dict, not string
|
||||
"tenant_id": None
|
||||
} for data in weather_data]
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
weather_repository = WeatherRepository(session)
|
||||
await weather_repository.bulk_create_weather_data(records_to_store)
|
||||
|
||||
logger.debug("Historical data stored in database", count=len(weather_data))
|
||||
|
||||
return [WeatherDataResponse(**item) for item in weather_data]
|
||||
else:
|
||||
logger.warning("No historical weather data received")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get historical weather", error=str(e))
|
||||
return []
|
||||
19
services/external/pytest.ini
vendored
Normal file
19
services/external/pytest.ini
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
[tool:pytest]
|
||||
testpaths = tests
|
||||
asyncio_mode = auto
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
addopts =
|
||||
-v
|
||||
--tb=short
|
||||
--strict-markers
|
||||
--disable-warnings
|
||||
--cov=app
|
||||
--cov-report=term-missing
|
||||
--cov-report=html:htmlcov
|
||||
markers =
|
||||
unit: Unit tests
|
||||
integration: Integration tests
|
||||
slow: Slow running tests
|
||||
external: Tests requiring external services
|
||||
56
services/external/requirements.txt
vendored
Normal file
56
services/external/requirements.txt
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
# services/external/requirements.txt
|
||||
# FastAPI and web framework
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
|
||||
# Database
|
||||
sqlalchemy==2.0.23
|
||||
psycopg2-binary==2.9.9
|
||||
asyncpg==0.29.0
|
||||
aiosqlite==0.19.0
|
||||
alembic==1.12.1
|
||||
|
||||
# HTTP clients for external APIs
|
||||
httpx==0.25.2
|
||||
aiofiles==23.2.0
|
||||
requests==2.31.0
|
||||
|
||||
# Data processing and time series
|
||||
pandas==2.1.3
|
||||
numpy==1.25.2
|
||||
|
||||
# Validation and serialization
|
||||
pydantic==2.5.0
|
||||
pydantic-settings==2.0.3
|
||||
|
||||
# Authentication and security
|
||||
python-jose[cryptography]==3.3.0
|
||||
|
||||
# Logging and monitoring
|
||||
structlog==23.2.0
|
||||
prometheus-client==0.19.0
|
||||
|
||||
# Message queues
|
||||
aio-pika==9.3.1
|
||||
|
||||
# Background job processing
|
||||
redis==5.0.1
|
||||
|
||||
# Date and time handling
|
||||
pytz==2023.3
|
||||
python-dateutil==2.8.2
|
||||
|
||||
# XML parsing (for some APIs)
|
||||
lxml==4.9.3
|
||||
|
||||
# Geospatial processing
|
||||
pyproj==3.6.1
|
||||
|
||||
# Note: pytest and testing dependencies are in tests/requirements.txt
|
||||
|
||||
# Development
|
||||
python-multipart==0.0.6
|
||||
|
||||
# External API specific
|
||||
beautifulsoup4==4.12.2 # For web scraping if needed
|
||||
xmltodict==0.13.0 # For XML API responses
|
||||
1
services/external/shared/shared
vendored
Symbolic link
1
services/external/shared/shared
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
/Users/urtzialfaro/Documents/bakery-ia/shared
|
||||
314
services/external/tests/conftest.py
vendored
Normal file
314
services/external/tests/conftest.py
vendored
Normal file
@@ -0,0 +1,314 @@
|
||||
# services/external/tests/conftest.py
|
||||
"""
|
||||
Pytest configuration and fixtures for External Service tests
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
from typing import AsyncGenerator
|
||||
from uuid import uuid4, UUID
|
||||
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.main import app
|
||||
from app.core.config import settings
|
||||
from app.core.database import Base, get_db
|
||||
from app.models.weather import WeatherData, WeatherStation
|
||||
from app.models.traffic import TrafficData, TrafficMeasurementPoint
|
||||
|
||||
|
||||
# Test database configuration
|
||||
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def event_loop():
|
||||
"""Create event loop for the test session"""
|
||||
loop = asyncio.new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def test_engine():
|
||||
"""Create test database engine"""
|
||||
engine = create_async_engine(
|
||||
TEST_DATABASE_URL,
|
||||
poolclass=StaticPool,
|
||||
connect_args={"check_same_thread": False}
|
||||
)
|
||||
|
||||
# Create tables
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
yield engine
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def test_db_session(test_engine) -> AsyncGenerator[AsyncSession, None]:
|
||||
"""Create test database session"""
|
||||
async_session = async_sessionmaker(
|
||||
test_engine, class_=AsyncSession, expire_on_commit=False
|
||||
)
|
||||
|
||||
async with async_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_client():
|
||||
"""Create test client"""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def override_get_db(test_db_session):
|
||||
"""Override get_db dependency for testing"""
|
||||
async def _override_get_db():
|
||||
yield test_db_session
|
||||
|
||||
app.dependency_overrides[get_db] = _override_get_db
|
||||
yield
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
# Test data fixtures
|
||||
@pytest.fixture
|
||||
def sample_tenant_id() -> UUID:
|
||||
"""Sample tenant ID for testing"""
|
||||
return uuid4()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_weather_data() -> dict:
|
||||
"""Sample weather data for testing"""
|
||||
return {
|
||||
"city": "madrid",
|
||||
"location_id": "40.4168,-3.7038",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"temperature": 18.5,
|
||||
"humidity": 65.0,
|
||||
"pressure": 1013.2,
|
||||
"wind_speed": 10.2,
|
||||
"condition": "partly_cloudy",
|
||||
"description": "Parcialmente nublado",
|
||||
"source": "aemet",
|
||||
"data_type": "current",
|
||||
"is_forecast": False,
|
||||
"data_quality_score": 95.0
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_traffic_data() -> dict:
|
||||
"""Sample traffic data for testing"""
|
||||
return {
|
||||
"city": "madrid",
|
||||
"location_id": "PM_M30_001",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"measurement_point_id": "PM_M30_001",
|
||||
"measurement_point_name": "M-30 Norte - Nudo Norte",
|
||||
"measurement_point_type": "M30",
|
||||
"traffic_volume": 850,
|
||||
"average_speed": 65.2,
|
||||
"congestion_level": "medium",
|
||||
"occupation_percentage": 45.8,
|
||||
"latitude": 40.4501,
|
||||
"longitude": -3.6919,
|
||||
"district": "Chamartín",
|
||||
"source": "madrid_opendata",
|
||||
"data_quality_score": 92.0,
|
||||
"is_synthetic": False
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_weather_forecast() -> list[dict]:
|
||||
"""Sample weather forecast data"""
|
||||
base_date = datetime.now(timezone.utc)
|
||||
return [
|
||||
{
|
||||
"city": "madrid",
|
||||
"location_id": "40.4168,-3.7038",
|
||||
"date": base_date,
|
||||
"forecast_date": base_date,
|
||||
"temperature": 20.0,
|
||||
"temperature_min": 15.0,
|
||||
"temperature_max": 25.0,
|
||||
"precipitation": 0.0,
|
||||
"humidity": 60.0,
|
||||
"wind_speed": 12.0,
|
||||
"condition": "sunny",
|
||||
"description": "Soleado",
|
||||
"source": "aemet",
|
||||
"data_type": "forecast",
|
||||
"is_forecast": True,
|
||||
"data_quality_score": 85.0
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def populated_weather_db(test_db_session: AsyncSession, sample_weather_data: dict):
|
||||
"""Database populated with weather test data"""
|
||||
weather_record = WeatherData(**sample_weather_data)
|
||||
test_db_session.add(weather_record)
|
||||
await test_db_session.commit()
|
||||
yield test_db_session
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def populated_traffic_db(test_db_session: AsyncSession, sample_traffic_data: dict):
|
||||
"""Database populated with traffic test data"""
|
||||
traffic_record = TrafficData(**sample_traffic_data)
|
||||
test_db_session.add(traffic_record)
|
||||
await test_db_session.commit()
|
||||
yield test_db_session
|
||||
|
||||
|
||||
# Mock external API fixtures
|
||||
@pytest.fixture
|
||||
def mock_aemet_response():
|
||||
"""Mock AEMET API response"""
|
||||
return {
|
||||
"date": datetime.now(timezone.utc),
|
||||
"temperature": 18.5,
|
||||
"humidity": 65.0,
|
||||
"pressure": 1013.2,
|
||||
"wind_speed": 10.2,
|
||||
"description": "Parcialmente nublado",
|
||||
"source": "aemet"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_madrid_traffic_xml():
|
||||
"""Mock Madrid Open Data traffic XML"""
|
||||
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pms>
|
||||
<pm codigo="PM_M30_001" nombre="M-30 Norte - Nudo Norte">
|
||||
<intensidad>850</intensidad>
|
||||
<ocupacion>45</ocupacion>
|
||||
<velocidad>65</velocidad>
|
||||
<fechahora>2024-01-15T10:30:00</fechahora>
|
||||
</pm>
|
||||
<pm codigo="PM_URB_002" nombre="Gran Vía - Plaza España">
|
||||
<intensidad>320</intensidad>
|
||||
<ocupacion>78</ocupacion>
|
||||
<velocidad>25</velocidad>
|
||||
<fechahora>2024-01-15T10:30:00</fechahora>
|
||||
</pm>
|
||||
</pms>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_messaging():
|
||||
"""Mock messaging service"""
|
||||
class MockMessaging:
|
||||
def __init__(self):
|
||||
self.published_events = []
|
||||
|
||||
async def publish_weather_updated(self, data):
|
||||
self.published_events.append(("weather_updated", data))
|
||||
return True
|
||||
|
||||
async def publish_traffic_updated(self, data):
|
||||
self.published_events.append(("traffic_updated", data))
|
||||
return True
|
||||
|
||||
async def publish_collection_job_started(self, data):
|
||||
self.published_events.append(("job_started", data))
|
||||
return True
|
||||
|
||||
async def publish_collection_job_completed(self, data):
|
||||
self.published_events.append(("job_completed", data))
|
||||
return True
|
||||
|
||||
return MockMessaging()
|
||||
|
||||
|
||||
# Mock external clients
|
||||
@pytest.fixture
|
||||
def mock_aemet_client():
|
||||
"""Mock AEMET client"""
|
||||
class MockAEMETClient:
|
||||
async def get_current_weather(self, lat, lon):
|
||||
return {
|
||||
"date": datetime.now(timezone.utc),
|
||||
"temperature": 18.5,
|
||||
"humidity": 65.0,
|
||||
"pressure": 1013.2,
|
||||
"wind_speed": 10.2,
|
||||
"description": "Parcialmente nublado",
|
||||
"source": "aemet"
|
||||
}
|
||||
|
||||
async def get_forecast(self, lat, lon, days):
|
||||
return [
|
||||
{
|
||||
"forecast_date": datetime.now(timezone.utc),
|
||||
"temperature": 20.0,
|
||||
"temperature_min": 15.0,
|
||||
"temperature_max": 25.0,
|
||||
"precipitation": 0.0,
|
||||
"humidity": 60.0,
|
||||
"wind_speed": 12.0,
|
||||
"description": "Soleado",
|
||||
"source": "aemet"
|
||||
}
|
||||
]
|
||||
|
||||
return MockAEMETClient()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_madrid_client():
|
||||
"""Mock Madrid traffic client"""
|
||||
class MockMadridClient:
|
||||
async def fetch_current_traffic_xml(self):
|
||||
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pms>
|
||||
<pm codigo="PM_TEST_001" nombre="Test Point">
|
||||
<intensidad>500</intensidad>
|
||||
<ocupacion>50</ocupacion>
|
||||
<velocidad>50</velocidad>
|
||||
<fechahora>2024-01-15T10:30:00</fechahora>
|
||||
</pm>
|
||||
</pms>"""
|
||||
|
||||
return MockMadridClient()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_madrid_processor():
|
||||
"""Mock Madrid traffic processor"""
|
||||
class MockMadridProcessor:
|
||||
async def process_current_traffic_xml(self, xml_content):
|
||||
return [
|
||||
{
|
||||
"city": "madrid",
|
||||
"location_id": "PM_TEST_001",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"measurement_point_id": "PM_TEST_001",
|
||||
"measurement_point_name": "Test Point",
|
||||
"measurement_point_type": "TEST",
|
||||
"traffic_volume": 500,
|
||||
"average_speed": 50.0,
|
||||
"congestion_level": "medium",
|
||||
"occupation_percentage": 50.0,
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"district": "Centro",
|
||||
"source": "madrid_opendata",
|
||||
"data_quality_score": 90.0,
|
||||
"is_synthetic": False
|
||||
}
|
||||
]
|
||||
|
||||
return MockMadridProcessor()
|
||||
9
services/external/tests/requirements.txt
vendored
Normal file
9
services/external/tests/requirements.txt
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
# Testing dependencies for External Service
|
||||
pytest==7.4.3
|
||||
pytest-asyncio==0.21.1
|
||||
pytest-mock==3.12.0
|
||||
httpx==0.25.2
|
||||
fastapi[all]==0.104.1
|
||||
sqlalchemy[asyncio]==2.0.23
|
||||
aiosqlite==0.19.0
|
||||
coverage==7.3.2
|
||||
393
services/external/tests/unit/test_repositories.py
vendored
Normal file
393
services/external/tests/unit/test_repositories.py
vendored
Normal file
@@ -0,0 +1,393 @@
|
||||
# services/external/tests/unit/test_repositories.py
|
||||
"""
|
||||
Unit tests for External Service Repositories
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from uuid import uuid4
|
||||
|
||||
from app.repositories.weather_repository import WeatherRepository
|
||||
from app.repositories.traffic_repository import TrafficRepository
|
||||
from app.models.weather import WeatherData, WeatherStation, WeatherDataJob
|
||||
from app.models.traffic import TrafficData, TrafficMeasurementPoint, TrafficDataJob
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestWeatherRepository:
|
||||
"""Test Weather Repository operations"""
|
||||
|
||||
async def test_create_weather_data(self, test_db_session, sample_weather_data):
|
||||
"""Test creating weather data"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
record = await repository.create_weather_data(sample_weather_data)
|
||||
|
||||
assert record is not None
|
||||
assert record.id is not None
|
||||
assert record.city == sample_weather_data["city"]
|
||||
assert record.temperature == sample_weather_data["temperature"]
|
||||
|
||||
async def test_get_current_weather(self, populated_weather_db, sample_weather_data):
|
||||
"""Test getting current weather data"""
|
||||
repository = WeatherRepository(populated_weather_db)
|
||||
|
||||
result = await repository.get_current_weather("madrid")
|
||||
|
||||
assert result is not None
|
||||
assert result.city == "madrid"
|
||||
assert result.temperature == sample_weather_data["temperature"]
|
||||
|
||||
async def test_get_weather_forecast(self, test_db_session, sample_weather_forecast):
|
||||
"""Test getting weather forecast"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create forecast data
|
||||
for forecast_item in sample_weather_forecast:
|
||||
await repository.create_weather_data(forecast_item)
|
||||
|
||||
result = await repository.get_weather_forecast("madrid", 7)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].is_forecast is True
|
||||
|
||||
async def test_get_historical_weather(self, test_db_session, sample_weather_data):
|
||||
"""Test getting historical weather data"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create historical data
|
||||
historical_data = sample_weather_data.copy()
|
||||
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
|
||||
await repository.create_weather_data(historical_data)
|
||||
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=2)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
result = await repository.get_historical_weather("madrid", start_date, end_date)
|
||||
|
||||
assert len(result) >= 1
|
||||
|
||||
async def test_create_weather_station(self, test_db_session):
|
||||
"""Test creating weather station"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
station_data = {
|
||||
"station_id": "TEST_001",
|
||||
"name": "Test Station",
|
||||
"city": "madrid",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"altitude": 650.0,
|
||||
"is_active": True
|
||||
}
|
||||
|
||||
station = await repository.create_weather_station(station_data)
|
||||
|
||||
assert station is not None
|
||||
assert station.station_id == "TEST_001"
|
||||
assert station.name == "Test Station"
|
||||
|
||||
async def test_get_weather_stations(self, test_db_session):
|
||||
"""Test getting weather stations"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create test station
|
||||
station_data = {
|
||||
"station_id": "TEST_001",
|
||||
"name": "Test Station",
|
||||
"city": "madrid",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"is_active": True
|
||||
}
|
||||
await repository.create_weather_station(station_data)
|
||||
|
||||
stations = await repository.get_weather_stations("madrid")
|
||||
|
||||
assert len(stations) == 1
|
||||
assert stations[0].station_id == "TEST_001"
|
||||
|
||||
async def test_create_weather_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test creating weather data collection job"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
|
||||
job = await repository.create_weather_job(job_data)
|
||||
|
||||
assert job is not None
|
||||
assert job.job_type == "current"
|
||||
assert job.status == "pending"
|
||||
|
||||
async def test_update_weather_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test updating weather job"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create job first
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
job = await repository.create_weather_job(job_data)
|
||||
|
||||
# Update job
|
||||
update_data = {
|
||||
"status": "completed",
|
||||
"completed_at": datetime.utcnow(),
|
||||
"success_count": 1
|
||||
}
|
||||
|
||||
success = await repository.update_weather_job(job.id, update_data)
|
||||
|
||||
assert success is True
|
||||
|
||||
async def test_get_weather_jobs(self, test_db_session, sample_tenant_id):
|
||||
"""Test getting weather jobs"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create test job
|
||||
job_data = {
|
||||
"job_type": "forecast",
|
||||
"city": "madrid",
|
||||
"status": "completed",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
await repository.create_weather_job(job_data)
|
||||
|
||||
jobs = await repository.get_weather_jobs()
|
||||
|
||||
assert len(jobs) >= 1
|
||||
assert any(job.job_type == "forecast" for job in jobs)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTrafficRepository:
|
||||
"""Test Traffic Repository operations"""
|
||||
|
||||
async def test_create_traffic_data(self, test_db_session, sample_traffic_data):
|
||||
"""Test creating traffic data"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Convert sample data to list for bulk create
|
||||
traffic_list = [sample_traffic_data]
|
||||
|
||||
count = await repository.bulk_create_traffic_data(traffic_list)
|
||||
|
||||
assert count == 1
|
||||
|
||||
async def test_get_current_traffic(self, populated_traffic_db, sample_traffic_data):
|
||||
"""Test getting current traffic data"""
|
||||
repository = TrafficRepository(populated_traffic_db)
|
||||
|
||||
result = await repository.get_current_traffic("madrid")
|
||||
|
||||
assert len(result) >= 1
|
||||
assert result[0].city == "madrid"
|
||||
|
||||
async def test_get_current_traffic_with_filters(self, populated_traffic_db):
|
||||
"""Test getting current traffic with filters"""
|
||||
repository = TrafficRepository(populated_traffic_db)
|
||||
|
||||
result = await repository.get_current_traffic("madrid", district="Chamartín")
|
||||
|
||||
# Should return results based on filter
|
||||
assert isinstance(result, list)
|
||||
|
||||
async def test_get_historical_traffic(self, test_db_session, sample_traffic_data):
|
||||
"""Test getting historical traffic data"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create historical data
|
||||
historical_data = sample_traffic_data.copy()
|
||||
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
|
||||
await repository.bulk_create_traffic_data([historical_data])
|
||||
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=2)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
result = await repository.get_historical_traffic("madrid", start_date, end_date)
|
||||
|
||||
assert len(result) >= 1
|
||||
|
||||
async def test_create_measurement_point(self, test_db_session):
|
||||
"""Test creating traffic measurement point"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
point_data = {
|
||||
"point_id": "TEST_POINT_001",
|
||||
"name": "Test Measurement Point",
|
||||
"city": "madrid",
|
||||
"point_type": "TEST",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"district": "Centro",
|
||||
"road_name": "Test Road",
|
||||
"is_active": True
|
||||
}
|
||||
|
||||
point = await repository.create_measurement_point(point_data)
|
||||
|
||||
assert point is not None
|
||||
assert point.point_id == "TEST_POINT_001"
|
||||
assert point.name == "Test Measurement Point"
|
||||
|
||||
async def test_get_measurement_points(self, test_db_session):
|
||||
"""Test getting measurement points"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create test point
|
||||
point_data = {
|
||||
"point_id": "TEST_POINT_001",
|
||||
"name": "Test Point",
|
||||
"city": "madrid",
|
||||
"point_type": "TEST",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"is_active": True
|
||||
}
|
||||
await repository.create_measurement_point(point_data)
|
||||
|
||||
points = await repository.get_measurement_points("madrid")
|
||||
|
||||
assert len(points) == 1
|
||||
assert points[0].point_id == "TEST_POINT_001"
|
||||
|
||||
async def test_get_measurement_points_with_filters(self, test_db_session):
|
||||
"""Test getting measurement points with filters"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create test points with different types
|
||||
for i, point_type in enumerate(["M30", "URB", "TEST"]):
|
||||
point_data = {
|
||||
"point_id": f"TEST_POINT_{i:03d}",
|
||||
"name": f"Test Point {i}",
|
||||
"city": "madrid",
|
||||
"point_type": point_type,
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"is_active": True
|
||||
}
|
||||
await repository.create_measurement_point(point_data)
|
||||
|
||||
# Filter by type
|
||||
points = await repository.get_measurement_points("madrid", road_type="M30")
|
||||
|
||||
assert len(points) == 1
|
||||
assert points[0].point_type == "M30"
|
||||
|
||||
async def test_get_traffic_analytics(self, populated_traffic_db):
|
||||
"""Test getting traffic analytics"""
|
||||
repository = TrafficRepository(populated_traffic_db)
|
||||
|
||||
analytics = await repository.get_traffic_analytics("madrid")
|
||||
|
||||
assert isinstance(analytics, dict)
|
||||
assert "total_measurements" in analytics
|
||||
assert "average_volume" in analytics
|
||||
|
||||
async def test_create_traffic_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test creating traffic collection job"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
|
||||
job = await repository.create_traffic_job(job_data)
|
||||
|
||||
assert job is not None
|
||||
assert job.job_type == "current"
|
||||
assert job.status == "pending"
|
||||
|
||||
async def test_update_traffic_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test updating traffic job"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create job first
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
job = await repository.create_traffic_job(job_data)
|
||||
|
||||
# Update job
|
||||
update_data = {
|
||||
"status": "completed",
|
||||
"completed_at": datetime.utcnow(),
|
||||
"success_count": 10
|
||||
}
|
||||
|
||||
success = await repository.update_traffic_job(job.id, update_data)
|
||||
|
||||
assert success is True
|
||||
|
||||
async def test_get_traffic_jobs(self, test_db_session, sample_tenant_id):
|
||||
"""Test getting traffic jobs"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create test job
|
||||
job_data = {
|
||||
"job_type": "historical",
|
||||
"city": "madrid",
|
||||
"status": "completed",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
await repository.create_traffic_job(job_data)
|
||||
|
||||
jobs = await repository.get_traffic_jobs()
|
||||
|
||||
assert len(jobs) >= 1
|
||||
assert any(job.job_type == "historical" for job in jobs)
|
||||
|
||||
async def test_bulk_create_performance(self, test_db_session):
|
||||
"""Test bulk create performance"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create large dataset
|
||||
bulk_data = []
|
||||
for i in range(100):
|
||||
data = {
|
||||
"city": "madrid",
|
||||
"location_id": f"PM_TEST_{i:03d}",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"measurement_point_id": f"PM_TEST_{i:03d}",
|
||||
"measurement_point_name": f"Test Point {i}",
|
||||
"measurement_point_type": "TEST",
|
||||
"traffic_volume": 100 + i,
|
||||
"average_speed": 50.0,
|
||||
"congestion_level": "medium",
|
||||
"occupation_percentage": 50.0,
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"source": "test"
|
||||
}
|
||||
bulk_data.append(data)
|
||||
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
count = await repository.bulk_create_traffic_data(bulk_data)
|
||||
|
||||
end_time = time.time()
|
||||
execution_time = end_time - start_time
|
||||
|
||||
assert count == 100
|
||||
assert execution_time < 3.0 # Should complete in under 3 seconds
|
||||
445
services/external/tests/unit/test_services.py
vendored
Normal file
445
services/external/tests/unit/test_services.py
vendored
Normal file
@@ -0,0 +1,445 @@
|
||||
# services/external/tests/unit/test_services.py
|
||||
"""
|
||||
Unit tests for External Service Services
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from uuid import uuid4
|
||||
|
||||
from app.services.weather_service import WeatherService
|
||||
from app.services.traffic_service import TrafficService
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestWeatherService:
|
||||
"""Test Weather Service business logic"""
|
||||
|
||||
@pytest.fixture
|
||||
def weather_service(self):
|
||||
"""Create weather service instance"""
|
||||
return WeatherService()
|
||||
|
||||
async def test_get_current_weather_from_cache(self, weather_service):
|
||||
"""Test getting current weather from cache"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_weather = AsyncMock()
|
||||
mock_weather.date = datetime.now(timezone.utc) - timedelta(minutes=30) # Fresh data
|
||||
mock_weather.to_dict.return_value = {"temperature": 18.5, "city": "madrid"}
|
||||
mock_repository.get_current_weather.return_value = mock_weather
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_current_weather("madrid")
|
||||
|
||||
assert result is not None
|
||||
assert result["temperature"] == 18.5
|
||||
assert result["city"] == "madrid"
|
||||
|
||||
async def test_get_current_weather_fetch_from_api(self, weather_service, mock_aemet_response):
|
||||
"""Test getting current weather from API when cache is stale"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
# No cached data or stale data
|
||||
mock_repository.get_current_weather.return_value = None
|
||||
mock_stored = AsyncMock()
|
||||
mock_stored.to_dict.return_value = {"temperature": 20.0}
|
||||
mock_repository.create_weather_data.return_value = mock_stored
|
||||
|
||||
# Mock AEMET client
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get_current_weather.return_value = mock_aemet_response
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
weather_service.aemet_client = mock_client
|
||||
|
||||
result = await weather_service.get_current_weather("madrid")
|
||||
|
||||
assert result is not None
|
||||
assert result["temperature"] == 20.0
|
||||
mock_client.get_current_weather.assert_called_once()
|
||||
|
||||
async def test_get_weather_forecast_from_cache(self, weather_service):
|
||||
"""Test getting weather forecast from cache"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_forecast = [AsyncMock(), AsyncMock()]
|
||||
for item in mock_forecast:
|
||||
item.created_at = datetime.now(timezone.utc) - timedelta(hours=1) # Fresh
|
||||
item.to_dict.return_value = {"temperature": 22.0}
|
||||
mock_repository.get_weather_forecast.return_value = mock_forecast
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_weather_forecast("madrid", 7)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(item["temperature"] == 22.0 for item in result)
|
||||
|
||||
async def test_get_weather_forecast_fetch_from_api(self, weather_service):
|
||||
"""Test getting weather forecast from API when cache is stale"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
# No cached data
|
||||
mock_repository.get_weather_forecast.return_value = []
|
||||
mock_stored = AsyncMock()
|
||||
mock_stored.to_dict.return_value = {"temperature": 25.0}
|
||||
mock_repository.create_weather_data.return_value = mock_stored
|
||||
|
||||
# Mock AEMET client
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get_forecast.return_value = [
|
||||
{"forecast_date": datetime.now(), "temperature": 25.0}
|
||||
]
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
weather_service.aemet_client = mock_client
|
||||
|
||||
result = await weather_service.get_weather_forecast("madrid", 7)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["temperature"] == 25.0
|
||||
mock_client.get_forecast.assert_called_once()
|
||||
|
||||
async def test_get_historical_weather(self, weather_service, sample_tenant_id):
|
||||
"""Test getting historical weather data"""
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_historical = [AsyncMock(), AsyncMock()]
|
||||
for item in mock_historical:
|
||||
item.to_dict.return_value = {"temperature": 18.0}
|
||||
mock_repository.get_historical_weather.return_value = mock_historical
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_historical_weather(
|
||||
"madrid", start_date, end_date, sample_tenant_id
|
||||
)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(item["temperature"] == 18.0 for item in result)
|
||||
|
||||
async def test_get_weather_stations(self, weather_service):
|
||||
"""Test getting weather stations"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_stations = [AsyncMock()]
|
||||
mock_stations[0].to_dict.return_value = {"station_id": "TEST_001"}
|
||||
mock_repository.get_weather_stations.return_value = mock_stations
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_weather_stations("madrid")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["station_id"] == "TEST_001"
|
||||
|
||||
async def test_trigger_weather_collection(self, weather_service, sample_tenant_id):
|
||||
"""Test triggering weather data collection"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = uuid4()
|
||||
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
|
||||
mock_repository.create_weather_job.return_value = mock_job
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.trigger_weather_collection(
|
||||
"madrid", "current", sample_tenant_id
|
||||
)
|
||||
|
||||
assert result["status"] == "pending"
|
||||
mock_repository.create_weather_job.assert_called_once()
|
||||
|
||||
async def test_process_weather_collection_job(self, weather_service):
|
||||
"""Test processing weather collection job"""
|
||||
job_id = uuid4()
|
||||
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
|
||||
# Mock job
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = job_id
|
||||
mock_job.job_type = "current"
|
||||
mock_job.city = "madrid"
|
||||
|
||||
mock_repository.get_weather_jobs.return_value = [mock_job]
|
||||
mock_repository.update_weather_job.return_value = True
|
||||
|
||||
# Mock updated job after completion
|
||||
mock_updated_job = AsyncMock()
|
||||
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
|
||||
|
||||
# Mock methods for different calls
|
||||
def mock_get_jobs_side_effect():
|
||||
return [mock_updated_job] # Return completed job
|
||||
|
||||
mock_repository.get_weather_jobs.side_effect = [
|
||||
[mock_job], # First call returns pending job
|
||||
[mock_updated_job] # Second call returns completed job
|
||||
]
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
with patch.object(weather_service, '_collect_current_weather', return_value=1):
|
||||
result = await weather_service.process_weather_collection_job(job_id)
|
||||
|
||||
assert result["status"] == "completed"
|
||||
|
||||
async def test_map_weather_condition(self, weather_service):
|
||||
"""Test weather condition mapping"""
|
||||
test_cases = [
|
||||
("Soleado", "clear"),
|
||||
("Nublado", "cloudy"),
|
||||
("Parcialmente nublado", "partly_cloudy"),
|
||||
("Lluvioso", "rainy"),
|
||||
("Nevando", "snowy"),
|
||||
("Tormenta", "stormy"),
|
||||
("Desconocido", "unknown")
|
||||
]
|
||||
|
||||
for description, expected in test_cases:
|
||||
result = weather_service._map_weather_condition(description)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTrafficService:
|
||||
"""Test Traffic Service business logic"""
|
||||
|
||||
@pytest.fixture
|
||||
def traffic_service(self):
|
||||
"""Create traffic service instance"""
|
||||
return TrafficService()
|
||||
|
||||
async def test_get_current_traffic_from_cache(self, traffic_service):
|
||||
"""Test getting current traffic from cache"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_traffic = [AsyncMock()]
|
||||
mock_traffic[0].date = datetime.now(timezone.utc) - timedelta(minutes=5) # Fresh
|
||||
mock_traffic[0].to_dict.return_value = {"traffic_volume": 850}
|
||||
mock_repository.get_current_traffic.return_value = mock_traffic
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_current_traffic("madrid")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["traffic_volume"] == 850
|
||||
|
||||
async def test_get_current_traffic_fetch_from_api(self, traffic_service, mock_madrid_traffic_xml):
|
||||
"""Test getting current traffic from API when cache is stale"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
# No cached data
|
||||
mock_repository.get_current_traffic.return_value = []
|
||||
mock_repository.bulk_create_traffic_data.return_value = 2
|
||||
|
||||
# Mock clients
|
||||
mock_client = AsyncMock()
|
||||
mock_client.fetch_current_traffic_xml.return_value = mock_madrid_traffic_xml
|
||||
|
||||
mock_processor = AsyncMock()
|
||||
mock_processor.process_current_traffic_xml.return_value = [
|
||||
{"traffic_volume": 850, "measurement_point_id": "PM_M30_001"},
|
||||
{"traffic_volume": 320, "measurement_point_id": "PM_URB_002"}
|
||||
]
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
traffic_service.madrid_client = mock_client
|
||||
traffic_service.madrid_processor = mock_processor
|
||||
|
||||
result = await traffic_service.get_current_traffic("madrid")
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0]["traffic_volume"] == 850
|
||||
mock_client.fetch_current_traffic_xml.assert_called_once()
|
||||
|
||||
async def test_get_historical_traffic(self, traffic_service, sample_tenant_id):
|
||||
"""Test getting historical traffic data"""
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_historical = [AsyncMock(), AsyncMock()]
|
||||
for item in mock_historical:
|
||||
item.to_dict.return_value = {"traffic_volume": 500}
|
||||
mock_repository.get_historical_traffic.return_value = mock_historical
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_historical_traffic(
|
||||
"madrid", start_date, end_date, tenant_id=sample_tenant_id
|
||||
)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(item["traffic_volume"] == 500 for item in result)
|
||||
|
||||
async def test_get_measurement_points(self, traffic_service):
|
||||
"""Test getting measurement points"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_points = [AsyncMock()]
|
||||
mock_points[0].to_dict.return_value = {"point_id": "PM_TEST_001"}
|
||||
mock_repository.get_measurement_points.return_value = mock_points
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_measurement_points("madrid")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["point_id"] == "PM_TEST_001"
|
||||
|
||||
async def test_get_traffic_analytics(self, traffic_service):
|
||||
"""Test getting traffic analytics"""
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=30)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_analytics = {
|
||||
"total_measurements": 1000,
|
||||
"average_volume": 650.5,
|
||||
"peak_hour": "08:00"
|
||||
}
|
||||
mock_repository.get_traffic_analytics.return_value = mock_analytics
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_traffic_analytics(
|
||||
"madrid", start_date, end_date
|
||||
)
|
||||
|
||||
assert result["total_measurements"] == 1000
|
||||
assert result["average_volume"] == 650.5
|
||||
assert "generated_at" in result
|
||||
|
||||
async def test_trigger_traffic_collection(self, traffic_service, sample_tenant_id):
|
||||
"""Test triggering traffic data collection"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = uuid4()
|
||||
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
|
||||
mock_repository.create_traffic_job.return_value = mock_job
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.trigger_traffic_collection(
|
||||
"madrid", "current", user_id=sample_tenant_id
|
||||
)
|
||||
|
||||
assert result["status"] == "pending"
|
||||
mock_repository.create_traffic_job.assert_called_once()
|
||||
|
||||
async def test_process_traffic_collection_job(self, traffic_service):
|
||||
"""Test processing traffic collection job"""
|
||||
job_id = uuid4()
|
||||
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
|
||||
# Mock job
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = job_id
|
||||
mock_job.job_type = "current"
|
||||
mock_job.city = "madrid"
|
||||
mock_job.location_pattern = None
|
||||
|
||||
mock_repository.get_traffic_jobs.return_value = [mock_job]
|
||||
mock_repository.update_traffic_job.return_value = True
|
||||
|
||||
# Mock updated job after completion
|
||||
mock_updated_job = AsyncMock()
|
||||
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
|
||||
|
||||
mock_repository.get_traffic_jobs.side_effect = [
|
||||
[mock_job], # First call returns pending job
|
||||
[mock_updated_job] # Second call returns completed job
|
||||
]
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
with patch.object(traffic_service, '_collect_current_traffic', return_value=125):
|
||||
result = await traffic_service.process_traffic_collection_job(job_id)
|
||||
|
||||
assert result["status"] == "completed"
|
||||
|
||||
async def test_is_traffic_data_fresh(self, traffic_service):
|
||||
"""Test traffic data freshness check"""
|
||||
from app.models.traffic import TrafficData
|
||||
|
||||
# Fresh data (5 minutes old)
|
||||
fresh_data = [AsyncMock()]
|
||||
fresh_data[0].date = datetime.utcnow() - timedelta(minutes=5)
|
||||
|
||||
result = traffic_service._is_traffic_data_fresh(fresh_data)
|
||||
assert result is True
|
||||
|
||||
# Stale data (15 minutes old)
|
||||
stale_data = [AsyncMock()]
|
||||
stale_data[0].date = datetime.utcnow() - timedelta(minutes=15)
|
||||
|
||||
result = traffic_service._is_traffic_data_fresh(stale_data)
|
||||
assert result is False
|
||||
|
||||
# Empty data
|
||||
result = traffic_service._is_traffic_data_fresh([])
|
||||
assert result is False
|
||||
|
||||
async def test_collect_current_traffic(self, traffic_service):
|
||||
"""Test current traffic collection"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_repository.bulk_create_traffic_data.return_value = 10
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
with patch.object(traffic_service, '_fetch_current_traffic_from_api', return_value=[{} for _ in range(10)]):
|
||||
result = await traffic_service._collect_current_traffic("madrid", None)
|
||||
|
||||
assert result == 10
|
||||
Reference in New Issue
Block a user