REFACTOR external service and improve websocket training

This commit is contained in:
Urtzi Alfaro
2025-10-09 14:11:02 +02:00
parent 7c72f83c51
commit 3c689b4f98
111 changed files with 13289 additions and 2374 deletions

View File

@@ -0,0 +1,391 @@
# services/external/app/api/city_operations.py
"""
City Operations API - New endpoints for city-based data access
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from typing import List
from datetime import datetime
from uuid import UUID
import structlog
from app.schemas.city_data import CityInfoResponse, DataAvailabilityResponse
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse, WeatherForecastAPIResponse
from app.schemas.traffic import TrafficDataResponse
from app.registry.city_registry import CityRegistry
from app.registry.geolocation_mapper import GeolocationMapper
from app.repositories.city_data_repository import CityDataRepository
from app.cache.redis_cache import ExternalDataCache
from app.services.weather_service import WeatherService
from app.services.traffic_service import TrafficService
from shared.routing.route_builder import RouteBuilder
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
route_builder = RouteBuilder('external')
router = APIRouter(tags=["city-operations"])
logger = structlog.get_logger()
@router.get(
route_builder.build_base_route("cities"),
response_model=List[CityInfoResponse]
)
async def list_supported_cities():
"""List all enabled cities with data availability"""
registry = CityRegistry()
cities = registry.get_enabled_cities()
return [
CityInfoResponse(
city_id=city.city_id,
name=city.name,
country=city.country.value,
latitude=city.latitude,
longitude=city.longitude,
radius_km=city.radius_km,
weather_provider=city.weather_provider.value,
traffic_provider=city.traffic_provider.value,
enabled=city.enabled
)
for city in cities
]
@router.get(
route_builder.build_operations_route("cities/{city_id}/availability"),
response_model=DataAvailabilityResponse
)
async def get_city_data_availability(
city_id: str = Path(..., description="City ID"),
db: AsyncSession = Depends(get_db)
):
"""Get data availability for a specific city"""
registry = CityRegistry()
city = registry.get_city(city_id)
if not city:
raise HTTPException(status_code=404, detail="City not found")
from sqlalchemy import text
weather_stmt = text(
"SELECT MIN(date), MAX(date), COUNT(*) FROM city_weather_data WHERE city_id = :city_id"
)
weather_result = await db.execute(weather_stmt, {"city_id": city_id})
weather_row = weather_result.fetchone()
weather_min, weather_max, weather_count = weather_row if weather_row else (None, None, 0)
traffic_stmt = text(
"SELECT MIN(date), MAX(date), COUNT(*) FROM city_traffic_data WHERE city_id = :city_id"
)
traffic_result = await db.execute(traffic_stmt, {"city_id": city_id})
traffic_row = traffic_result.fetchone()
traffic_min, traffic_max, traffic_count = traffic_row if traffic_row else (None, None, 0)
return DataAvailabilityResponse(
city_id=city_id,
city_name=city.name,
weather_available=weather_count > 0,
weather_start_date=weather_min.isoformat() if weather_min else None,
weather_end_date=weather_max.isoformat() if weather_max else None,
weather_record_count=weather_count or 0,
traffic_available=traffic_count > 0,
traffic_start_date=traffic_min.isoformat() if traffic_min else None,
traffic_end_date=traffic_max.isoformat() if traffic_max else None,
traffic_record_count=traffic_count or 0
)
@router.get(
route_builder.build_operations_route("historical-weather-optimized"),
response_model=List[WeatherDataResponse]
)
async def get_historical_weather_optimized(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
start_date: datetime = Query(..., description="Start date"),
end_date: datetime = Query(..., description="End date"),
db: AsyncSession = Depends(get_db)
):
"""
Get historical weather data using city-based cached data
This is the FAST endpoint for training service
"""
try:
mapper = GeolocationMapper()
mapping = mapper.map_tenant_to_city(latitude, longitude)
if not mapping:
raise HTTPException(
status_code=404,
detail="No supported city found for this location"
)
city, distance = mapping
logger.info(
"Fetching historical weather from cache",
tenant_id=tenant_id,
city=city.name,
distance_km=round(distance, 2)
)
cache = ExternalDataCache()
cached_data = await cache.get_cached_weather(
city.city_id, start_date, end_date
)
if cached_data:
logger.info("Weather cache hit", records=len(cached_data))
return cached_data
repo = CityDataRepository(db)
db_records = await repo.get_weather_by_city_and_range(
city.city_id, start_date, end_date
)
response_data = [
WeatherDataResponse(
id=str(record.id),
location_id=f"{city.city_id}_{record.date.date()}",
date=record.date,
temperature=record.temperature,
precipitation=record.precipitation,
humidity=record.humidity,
wind_speed=record.wind_speed,
pressure=record.pressure,
description=record.description,
source=record.source,
raw_data=None,
created_at=record.created_at,
updated_at=record.updated_at
)
for record in db_records
]
await cache.set_cached_weather(
city.city_id, start_date, end_date, response_data
)
logger.info(
"Historical weather data retrieved",
records=len(response_data),
source="database"
)
return response_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching historical weather", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
route_builder.build_operations_route("historical-traffic-optimized"),
response_model=List[TrafficDataResponse]
)
async def get_historical_traffic_optimized(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
start_date: datetime = Query(..., description="Start date"),
end_date: datetime = Query(..., description="End date"),
db: AsyncSession = Depends(get_db)
):
"""
Get historical traffic data using city-based cached data
This is the FAST endpoint for training service
"""
try:
mapper = GeolocationMapper()
mapping = mapper.map_tenant_to_city(latitude, longitude)
if not mapping:
raise HTTPException(
status_code=404,
detail="No supported city found for this location"
)
city, distance = mapping
logger.info(
"Fetching historical traffic from cache",
tenant_id=tenant_id,
city=city.name,
distance_km=round(distance, 2)
)
cache = ExternalDataCache()
cached_data = await cache.get_cached_traffic(
city.city_id, start_date, end_date
)
if cached_data:
logger.info("Traffic cache hit", records=len(cached_data))
return cached_data
logger.debug("Starting DB query for traffic", city_id=city.city_id)
repo = CityDataRepository(db)
db_records = await repo.get_traffic_by_city_and_range(
city.city_id, start_date, end_date
)
logger.debug("DB query completed", records=len(db_records))
logger.debug("Creating response objects")
response_data = [
TrafficDataResponse(
date=record.date,
traffic_volume=record.traffic_volume,
pedestrian_count=record.pedestrian_count,
congestion_level=record.congestion_level,
average_speed=record.average_speed,
source=record.source
)
for record in db_records
]
logger.debug("Response objects created", count=len(response_data))
logger.debug("Caching traffic data")
await cache.set_cached_traffic(
city.city_id, start_date, end_date, response_data
)
logger.debug("Caching completed")
logger.info(
"Historical traffic data retrieved",
records=len(response_data),
source="database"
)
return response_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching historical traffic", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
# ================================================================
# REAL-TIME & FORECAST ENDPOINTS
# ================================================================
@router.get(
route_builder.build_operations_route("weather/current"),
response_model=WeatherDataResponse
)
async def get_current_weather(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude")
):
"""
Get current weather for a location (real-time data from AEMET)
"""
try:
weather_service = WeatherService()
weather_data = await weather_service.get_current_weather(latitude, longitude)
if not weather_data:
raise HTTPException(
status_code=404,
detail="No weather data available for this location"
)
logger.info(
"Current weather retrieved",
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude
)
return weather_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching current weather", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
route_builder.build_operations_route("weather/forecast")
)
async def get_weather_forecast(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
days: int = Query(7, ge=1, le=14, description="Number of days to forecast")
):
"""
Get weather forecast for a location (from AEMET)
Returns list of forecast objects with: forecast_date, generated_at, temperature, precipitation, humidity, wind_speed, description, source
"""
try:
weather_service = WeatherService()
forecast_data = await weather_service.get_weather_forecast(latitude, longitude, days)
if not forecast_data:
raise HTTPException(
status_code=404,
detail="No forecast data available for this location"
)
logger.info(
"Weather forecast retrieved",
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude,
days=days,
count=len(forecast_data)
)
return forecast_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching weather forecast", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
route_builder.build_operations_route("traffic/current"),
response_model=TrafficDataResponse
)
async def get_current_traffic(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude")
):
"""
Get current traffic conditions for a location (real-time data from Madrid OpenData)
"""
try:
traffic_service = TrafficService()
traffic_data = await traffic_service.get_current_traffic(latitude, longitude)
if not traffic_data:
raise HTTPException(
status_code=404,
detail="No traffic data available for this location"
)
logger.info(
"Current traffic retrieved",
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude
)
return traffic_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching current traffic", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")

View File

@@ -1,407 +0,0 @@
# services/external/app/api/external_operations.py
"""
External Operations API - Business operations for fetching external data
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from typing import List, Dict, Any
from datetime import datetime
from uuid import UUID
import structlog
from app.schemas.weather import (
WeatherDataResponse,
WeatherForecastResponse,
WeatherForecastRequest,
HistoricalWeatherRequest,
HourlyForecastRequest,
HourlyForecastResponse
)
from app.schemas.traffic import (
TrafficDataResponse,
TrafficForecastRequest,
HistoricalTrafficRequest
)
from app.services.weather_service import WeatherService
from app.services.traffic_service import TrafficService
from app.services.messaging import publish_weather_updated, publish_traffic_updated
from shared.auth.decorators import get_current_user_dep
from shared.auth.access_control import require_user_role
from shared.routing.route_builder import RouteBuilder
route_builder = RouteBuilder('external')
router = APIRouter(tags=["external-operations"])
logger = structlog.get_logger()
def get_weather_service():
"""Dependency injection for WeatherService"""
return WeatherService()
def get_traffic_service():
"""Dependency injection for TrafficService"""
return TrafficService()
# Weather Operations
@router.get(
route_builder.build_operations_route("weather/current"),
response_model=WeatherDataResponse
)
@require_user_role(['viewer', 'member', 'admin', 'owner'])
async def get_current_weather(
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
weather_service: WeatherService = Depends(get_weather_service)
):
"""Get current weather data for location from external API"""
try:
logger.debug("Getting current weather",
lat=latitude,
lon=longitude,
tenant_id=tenant_id,
user_id=current_user["user_id"])
weather = await weather_service.get_current_weather(latitude, longitude)
if not weather:
raise HTTPException(status_code=503, detail="Weather service temporarily unavailable")
try:
await publish_weather_updated({
"type": "current_weather_requested",
"tenant_id": str(tenant_id),
"latitude": latitude,
"longitude": longitude,
"requested_by": current_user["user_id"],
"timestamp": datetime.utcnow().isoformat()
})
except Exception as e:
logger.warning("Failed to publish weather event", error=str(e))
return weather
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get current weather", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post(
route_builder.build_operations_route("weather/historical"),
response_model=List[WeatherDataResponse]
)
@require_user_role(['viewer', 'member', 'admin', 'owner'])
async def get_historical_weather(
request: HistoricalWeatherRequest,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
weather_service: WeatherService = Depends(get_weather_service)
):
"""Get historical weather data with date range"""
try:
if request.end_date <= request.start_date:
raise HTTPException(status_code=400, detail="End date must be after start date")
if (request.end_date - request.start_date).days > 1000:
raise HTTPException(status_code=400, detail="Date range cannot exceed 90 days")
historical_data = await weather_service.get_historical_weather(
request.latitude, request.longitude, request.start_date, request.end_date)
try:
await publish_weather_updated({
"type": "historical_requested",
"latitude": request.latitude,
"longitude": request.longitude,
"start_date": request.start_date.isoformat(),
"end_date": request.end_date.isoformat(),
"records_count": len(historical_data),
"timestamp": datetime.utcnow().isoformat()
})
except Exception as pub_error:
logger.warning("Failed to publish historical weather event", error=str(pub_error))
return historical_data
except HTTPException:
raise
except Exception as e:
logger.error("Unexpected error in historical weather API", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post(
route_builder.build_operations_route("weather/forecast"),
response_model=List[WeatherForecastResponse]
)
@require_user_role(['viewer', 'member', 'admin', 'owner'])
async def get_weather_forecast(
request: WeatherForecastRequest,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
weather_service: WeatherService = Depends(get_weather_service)
):
"""Get weather forecast for location"""
try:
logger.debug("Getting weather forecast",
lat=request.latitude,
lon=request.longitude,
days=request.days,
tenant_id=tenant_id)
forecast = await weather_service.get_weather_forecast(request.latitude, request.longitude, request.days)
if not forecast:
logger.info("Weather forecast unavailable - returning empty list")
return []
try:
await publish_weather_updated({
"type": "forecast_requested",
"tenant_id": str(tenant_id),
"latitude": request.latitude,
"longitude": request.longitude,
"days": request.days,
"requested_by": current_user["user_id"],
"timestamp": datetime.utcnow().isoformat()
})
except Exception as e:
logger.warning("Failed to publish forecast event", error=str(e))
return forecast
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get weather forecast", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post(
route_builder.build_operations_route("weather/hourly-forecast"),
response_model=List[HourlyForecastResponse]
)
@require_user_role(['viewer', 'member', 'admin', 'owner'])
async def get_hourly_weather_forecast(
request: HourlyForecastRequest,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
weather_service: WeatherService = Depends(get_weather_service)
):
"""Get hourly weather forecast for location"""
try:
logger.debug("Getting hourly weather forecast",
lat=request.latitude,
lon=request.longitude,
hours=request.hours,
tenant_id=tenant_id)
hourly_forecast = await weather_service.get_hourly_forecast(
request.latitude, request.longitude, request.hours
)
if not hourly_forecast:
logger.info("Hourly weather forecast unavailable - returning empty list")
return []
try:
await publish_weather_updated({
"type": "hourly_forecast_requested",
"tenant_id": str(tenant_id),
"latitude": request.latitude,
"longitude": request.longitude,
"hours": request.hours,
"requested_by": current_user["user_id"],
"forecast_count": len(hourly_forecast),
"timestamp": datetime.utcnow().isoformat()
})
except Exception as e:
logger.warning("Failed to publish hourly forecast event", error=str(e))
return hourly_forecast
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get hourly weather forecast", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.get(
route_builder.build_operations_route("weather-status"),
response_model=dict
)
async def get_weather_status(
weather_service: WeatherService = Depends(get_weather_service)
):
"""Get weather API status and diagnostics"""
try:
aemet_status = "unknown"
aemet_message = "Not tested"
try:
test_weather = await weather_service.get_current_weather(40.4168, -3.7038)
if test_weather and hasattr(test_weather, 'source') and test_weather.source == "aemet":
aemet_status = "healthy"
aemet_message = "AEMET API responding correctly"
elif test_weather and hasattr(test_weather, 'source') and test_weather.source == "synthetic":
aemet_status = "degraded"
aemet_message = "Using synthetic weather data (AEMET API unavailable)"
else:
aemet_status = "unknown"
aemet_message = "Weather source unknown"
except Exception as test_error:
aemet_status = "unhealthy"
aemet_message = f"AEMET API test failed: {str(test_error)}"
return {
"status": aemet_status,
"message": aemet_message,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error("Weather status check failed", error=str(e))
raise HTTPException(status_code=500, detail=f"Status check failed: {str(e)}")
# Traffic Operations
@router.get(
route_builder.build_operations_route("traffic/current"),
response_model=TrafficDataResponse
)
@require_user_role(['viewer', 'member', 'admin', 'owner'])
async def get_current_traffic(
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
traffic_service: TrafficService = Depends(get_traffic_service)
):
"""Get current traffic data for location from external API"""
try:
logger.debug("Getting current traffic",
lat=latitude,
lon=longitude,
tenant_id=tenant_id,
user_id=current_user["user_id"])
traffic = await traffic_service.get_current_traffic(latitude, longitude)
if not traffic:
raise HTTPException(status_code=503, detail="Traffic service temporarily unavailable")
try:
await publish_traffic_updated({
"type": "current_traffic_requested",
"tenant_id": str(tenant_id),
"latitude": latitude,
"longitude": longitude,
"requested_by": current_user["user_id"],
"timestamp": datetime.utcnow().isoformat()
})
except Exception as e:
logger.warning("Failed to publish traffic event", error=str(e))
return traffic
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get current traffic", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post(
route_builder.build_operations_route("traffic/historical"),
response_model=List[TrafficDataResponse]
)
@require_user_role(['viewer', 'member', 'admin', 'owner'])
async def get_historical_traffic(
request: HistoricalTrafficRequest,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
traffic_service: TrafficService = Depends(get_traffic_service)
):
"""Get historical traffic data with date range"""
try:
if request.end_date <= request.start_date:
raise HTTPException(status_code=400, detail="End date must be after start date")
historical_data = await traffic_service.get_historical_traffic(
request.latitude, request.longitude, request.start_date, request.end_date)
try:
await publish_traffic_updated({
"type": "historical_requested",
"latitude": request.latitude,
"longitude": request.longitude,
"start_date": request.start_date.isoformat(),
"end_date": request.end_date.isoformat(),
"records_count": len(historical_data),
"timestamp": datetime.utcnow().isoformat()
})
except Exception as pub_error:
logger.warning("Failed to publish historical traffic event", error=str(pub_error))
return historical_data
except HTTPException:
raise
except Exception as e:
logger.error("Unexpected error in historical traffic API", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.post(
route_builder.build_operations_route("traffic/forecast"),
response_model=List[TrafficDataResponse]
)
@require_user_role(['viewer', 'member', 'admin', 'owner'])
async def get_traffic_forecast(
request: TrafficForecastRequest,
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
traffic_service: TrafficService = Depends(get_traffic_service)
):
"""Get traffic forecast for location"""
try:
logger.debug("Getting traffic forecast",
lat=request.latitude,
lon=request.longitude,
hours=request.hours,
tenant_id=tenant_id)
forecast = await traffic_service.get_traffic_forecast(request.latitude, request.longitude, request.hours)
if not forecast:
logger.info("Traffic forecast unavailable - returning empty list")
return []
try:
await publish_traffic_updated({
"type": "forecast_requested",
"tenant_id": str(tenant_id),
"latitude": request.latitude,
"longitude": request.longitude,
"hours": request.hours,
"requested_by": current_user["user_id"],
"timestamp": datetime.utcnow().isoformat()
})
except Exception as e:
logger.warning("Failed to publish traffic forecast event", error=str(e))
return forecast
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get traffic forecast", error=str(e))
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

View File

@@ -0,0 +1 @@
"""Cache module for external data service"""

View File

@@ -0,0 +1,178 @@
# services/external/app/cache/redis_cache.py
"""
Redis cache layer for fast training data access
"""
from typing import List, Dict, Any, Optional
import json
from datetime import datetime, timedelta
import structlog
import redis.asyncio as redis
from app.core.config import settings
logger = structlog.get_logger()
class ExternalDataCache:
"""Redis cache for external data service"""
def __init__(self):
self.redis_client = redis.from_url(
settings.REDIS_URL,
encoding="utf-8",
decode_responses=True
)
self.ttl = 86400 * 7
def _weather_cache_key(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> str:
"""Generate cache key for weather data"""
return f"weather:{city_id}:{start_date.date()}:{end_date.date()}"
async def get_cached_weather(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> Optional[List[Dict[str, Any]]]:
"""Get cached weather data"""
try:
key = self._weather_cache_key(city_id, start_date, end_date)
cached = await self.redis_client.get(key)
if cached:
logger.debug("Weather cache hit", city_id=city_id, key=key)
return json.loads(cached)
logger.debug("Weather cache miss", city_id=city_id, key=key)
return None
except Exception as e:
logger.error("Error reading weather cache", error=str(e))
return None
async def set_cached_weather(
self,
city_id: str,
start_date: datetime,
end_date: datetime,
data: List[Dict[str, Any]]
):
"""Set cached weather data"""
try:
key = self._weather_cache_key(city_id, start_date, end_date)
serializable_data = []
for record in data:
# Handle both dict and Pydantic model objects
if hasattr(record, 'model_dump'):
record_dict = record.model_dump()
elif hasattr(record, 'dict'):
record_dict = record.dict()
else:
record_dict = record.copy() if isinstance(record, dict) else dict(record)
# Convert any datetime fields to ISO format strings
for key_name, value in record_dict.items():
if isinstance(value, datetime):
record_dict[key_name] = value.isoformat()
serializable_data.append(record_dict)
await self.redis_client.setex(
key,
self.ttl,
json.dumps(serializable_data)
)
logger.debug("Weather data cached", city_id=city_id, records=len(data))
except Exception as e:
logger.error("Error caching weather data", error=str(e))
def _traffic_cache_key(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> str:
"""Generate cache key for traffic data"""
return f"traffic:{city_id}:{start_date.date()}:{end_date.date()}"
async def get_cached_traffic(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> Optional[List[Dict[str, Any]]]:
"""Get cached traffic data"""
try:
key = self._traffic_cache_key(city_id, start_date, end_date)
cached = await self.redis_client.get(key)
if cached:
logger.debug("Traffic cache hit", city_id=city_id, key=key)
return json.loads(cached)
logger.debug("Traffic cache miss", city_id=city_id, key=key)
return None
except Exception as e:
logger.error("Error reading traffic cache", error=str(e))
return None
async def set_cached_traffic(
self,
city_id: str,
start_date: datetime,
end_date: datetime,
data: List[Dict[str, Any]]
):
"""Set cached traffic data"""
try:
key = self._traffic_cache_key(city_id, start_date, end_date)
serializable_data = []
for record in data:
# Handle both dict and Pydantic model objects
if hasattr(record, 'model_dump'):
record_dict = record.model_dump()
elif hasattr(record, 'dict'):
record_dict = record.dict()
else:
record_dict = record.copy() if isinstance(record, dict) else dict(record)
# Convert any datetime fields to ISO format strings
for key_name, value in record_dict.items():
if isinstance(value, datetime):
record_dict[key_name] = value.isoformat()
serializable_data.append(record_dict)
await self.redis_client.setex(
key,
self.ttl,
json.dumps(serializable_data)
)
logger.debug("Traffic data cached", city_id=city_id, records=len(data))
except Exception as e:
logger.error("Error caching traffic data", error=str(e))
async def invalidate_city_cache(self, city_id: str):
"""Invalidate all cache entries for a city"""
try:
pattern = f"*:{city_id}:*"
async for key in self.redis_client.scan_iter(match=pattern):
await self.redis_client.delete(key)
logger.info("City cache invalidated", city_id=city_id)
except Exception as e:
logger.error("Error invalidating cache", error=str(e))

View File

@@ -37,8 +37,8 @@ class DataSettings(BaseServiceSettings):
# External API Configuration
AEMET_API_KEY: str = os.getenv("AEMET_API_KEY", "")
AEMET_BASE_URL: str = "https://opendata.aemet.es/opendata"
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "60")) # Increased default
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "3"))
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "90")) # Increased for unstable API
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "5")) # More retries for connection issues
AEMET_ENABLED: bool = os.getenv("AEMET_ENABLED", "true").lower() == "true" # Allow disabling AEMET
MADRID_OPENDATA_API_KEY: str = os.getenv("MADRID_OPENDATA_API_KEY", "")

View File

@@ -842,10 +842,19 @@ class AEMETClient(BaseAPIClient):
"""Fetch forecast data from AEMET API"""
endpoint = f"/prediccion/especifica/municipio/diaria/{municipality_code}"
initial_response = await self._get(endpoint)
# Check for AEMET error responses
if initial_response and isinstance(initial_response, dict):
aemet_estado = initial_response.get("estado")
if aemet_estado == 404 or aemet_estado == "404":
logger.warning("AEMET API returned 404 error",
mensaje=initial_response.get("descripcion"),
municipality=municipality_code)
return None
if not self._is_valid_initial_response(initial_response):
return None
datos_url = initial_response.get("datos")
return await self._fetch_from_url(datos_url)
@@ -854,42 +863,65 @@ class AEMETClient(BaseAPIClient):
# Note: AEMET hourly forecast API endpoint
endpoint = f"/prediccion/especifica/municipio/horaria/{municipality_code}"
logger.info("Requesting AEMET hourly forecast", endpoint=endpoint, municipality=municipality_code)
initial_response = await self._get(endpoint)
# Check for AEMET error responses
if initial_response and isinstance(initial_response, dict):
aemet_estado = initial_response.get("estado")
if aemet_estado == 404 or aemet_estado == "404":
logger.warning("AEMET API returned 404 error for hourly forecast",
mensaje=initial_response.get("descripcion"),
municipality=municipality_code)
return None
if not self._is_valid_initial_response(initial_response):
logger.warning("Invalid initial response from AEMET hourly API",
logger.warning("Invalid initial response from AEMET hourly API",
response=initial_response, municipality=municipality_code)
return None
datos_url = initial_response.get("datos")
logger.info("Fetching hourly data from AEMET datos URL", url=datos_url)
return await self._fetch_from_url(datos_url)
async def _fetch_historical_data_in_chunks(self,
station_id: str,
start_date: datetime,
async def _fetch_historical_data_in_chunks(self,
station_id: str,
start_date: datetime,
end_date: datetime) -> List[Dict[str, Any]]:
"""Fetch historical data in chunks due to AEMET API limitations"""
import asyncio
historical_data = []
current_date = start_date
chunk_count = 0
while current_date <= end_date:
chunk_end_date = min(
current_date + timedelta(days=AEMETConstants.MAX_DAYS_PER_REQUEST),
current_date + timedelta(days=AEMETConstants.MAX_DAYS_PER_REQUEST),
end_date
)
# Add delay to respect rate limits (AEMET allows ~60 requests/minute)
# Wait 2 seconds between requests to stay well under the limit
if chunk_count > 0:
await asyncio.sleep(2)
chunk_data = await self._fetch_historical_chunk(
station_id, current_date, chunk_end_date
)
if chunk_data:
historical_data.extend(chunk_data)
current_date = chunk_end_date + timedelta(days=1)
chunk_count += 1
# Log progress every 5 chunks
if chunk_count % 5 == 0:
logger.info("Historical data fetch progress",
chunks_fetched=chunk_count,
records_so_far=len(historical_data))
return historical_data
async def _fetch_historical_chunk(self,
@@ -930,13 +962,37 @@ class AEMETClient(BaseAPIClient):
"""Fetch data from AEMET datos URL"""
try:
data = await self._fetch_url_directly(url)
if data and isinstance(data, list):
return data
else:
logger.warning("Expected list from datos URL", data_type=type(data))
if data is None:
logger.warning("No data received from datos URL", url=url)
return None
# Check if we got an AEMET error response (dict with estado/descripcion)
if isinstance(data, dict):
aemet_estado = data.get("estado")
aemet_mensaje = data.get("descripcion")
if aemet_estado or aemet_mensaje:
logger.warning("AEMET datos URL returned error response",
estado=aemet_estado,
mensaje=aemet_mensaje,
url=url)
return None
else:
# It's a dict but not an error response - unexpected format
logger.warning("Expected list from datos URL but got dict",
data_type=type(data),
keys=list(data.keys())[:5],
url=url)
return None
if isinstance(data, list):
return data
logger.warning("Unexpected data type from datos URL",
data_type=type(data), url=url)
return None
except Exception as e:
logger.error("Failed to fetch from datos URL", url=url, error=str(e))
return None

View File

@@ -318,49 +318,86 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
latitude: float, longitude: float,
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
"""Process historical ZIP file with enhanced parsing"""
"""Process historical ZIP file with memory-efficient streaming"""
try:
import zipfile
import io
import csv
import gc
historical_records = []
nearest_ids = {p[0] for p in nearest_points}
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
for csv_filename in csv_files:
try:
# Read CSV content
# Stream CSV file line-by-line to avoid loading entire file into memory
with zip_file.open(csv_filename) as csv_file:
text_content = csv_file.read().decode('utf-8', errors='ignore')
# Process CSV in chunks using processor
csv_records = await self.processor.process_csv_content_chunked(
text_content, csv_filename, nearest_ids, nearest_points
)
historical_records.extend(csv_records)
# Force garbage collection
# Use TextIOWrapper for efficient line-by-line reading
import codecs
text_wrapper = codecs.iterdecode(csv_file, 'utf-8', errors='ignore')
csv_reader = csv.DictReader(text_wrapper, delimiter=';')
# Process in small batches
batch_size = 5000
batch_records = []
row_count = 0
for row in csv_reader:
row_count += 1
measurement_point_id = row.get('id', '').strip()
# Skip rows we don't need
if measurement_point_id not in nearest_ids:
continue
try:
record_data = await self.processor.parse_historical_csv_row(row, nearest_points)
if record_data:
batch_records.append(record_data)
# Store and clear batch when full
if len(batch_records) >= batch_size:
historical_records.extend(batch_records)
batch_records = []
gc.collect()
except Exception:
continue
# Store remaining records
if batch_records:
historical_records.extend(batch_records)
batch_records = []
self.logger.info("CSV file processed",
filename=csv_filename,
rows_scanned=row_count,
records_extracted=len(historical_records))
# Aggressive garbage collection after each CSV
gc.collect()
except Exception as csv_error:
self.logger.warning("Error processing CSV file",
filename=csv_filename,
self.logger.warning("Error processing CSV file",
filename=csv_filename,
error=str(csv_error))
continue
self.logger.info("Historical ZIP processing completed",
self.logger.info("Historical ZIP processing completed",
zip_url=zip_url,
total_records=len(historical_records))
# Final cleanup
del zip_content
gc.collect()
return historical_records
except Exception as e:
self.logger.error("Error processing historical ZIP file",
self.logger.error("Error processing historical ZIP file",
zip_url=zip_url, error=str(e))
return []

View File

@@ -50,8 +50,20 @@ class BaseAPIClient:
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error", status_code=e.response.status_code, url=url,
logger.error("HTTP error", status_code=e.response.status_code, url=url,
response_text=e.response.text[:200], attempt=attempt + 1)
# Handle rate limiting (429) with longer backoff
if e.response.status_code == 429:
import asyncio
# Exponential backoff: 5s, 15s, 45s for rate limits
wait_time = 5 * (3 ** attempt)
logger.warning(f"Rate limit hit, waiting {wait_time}s before retry",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
if attempt < self.retries - 1:
continue
if attempt == self.retries - 1: # Last attempt
return None
except httpx.RequestError as e:
@@ -72,51 +84,87 @@ class BaseAPIClient:
return None
async def _fetch_url_directly(self, url: str, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Fetch data directly from a full URL (for AEMET datos URLs)"""
try:
request_headers = headers or {}
logger.debug("Making direct URL request", url=url)
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
# Handle encoding issues common with Spanish data sources
try:
response_data = response.json()
except UnicodeDecodeError:
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
# Try common Spanish encodings
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
try:
text_content = response.content.decode(encoding)
import json
response_data = json.loads(text_content)
logger.info("Successfully decoded with encoding", encoding=encoding)
break
except (UnicodeDecodeError, json.JSONDecodeError):
continue
else:
logger.error("Failed to decode response with any encoding", url=url)
return None
logger.debug("Direct URL response received",
status_code=response.status_code,
data_type=type(response_data),
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error in direct fetch", status_code=e.response.status_code, url=url)
return None
except httpx.RequestError as e:
logger.error("Request error in direct fetch", error=str(e), url=url)
return None
except Exception as e:
logger.error("Unexpected error in direct fetch", error=str(e), url=url)
return None
"""Fetch data directly from a full URL (for AEMET datos URLs) with retry logic"""
request_headers = headers or {}
logger.debug("Making direct URL request", url=url)
# Retry logic for unstable AEMET datos URLs
for attempt in range(self.retries):
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
# Handle encoding issues common with Spanish data sources
try:
response_data = response.json()
except UnicodeDecodeError:
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
# Try common Spanish encodings
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
try:
text_content = response.content.decode(encoding)
import json
response_data = json.loads(text_content)
logger.info("Successfully decoded with encoding", encoding=encoding)
break
except (UnicodeDecodeError, json.JSONDecodeError):
continue
else:
logger.error("Failed to decode response with any encoding", url=url)
if attempt < self.retries - 1:
continue
return None
logger.debug("Direct URL response received",
status_code=response.status_code,
data_type=type(response_data),
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error in direct fetch",
status_code=e.response.status_code,
url=url,
attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
# Wait before retry
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying datos URL in {wait_time}s",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except httpx.RequestError as e:
logger.error("Request error in direct fetch",
error=str(e), url=url, attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
# Wait before retry
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying datos URL in {wait_time}s",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except Exception as e:
logger.error("Unexpected error in direct fetch",
error=str(e), url=url, attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
return None
async def _post(self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Make POST request"""

View File

@@ -0,0 +1 @@
"""Data ingestion module for multi-city external data"""

View File

@@ -0,0 +1,20 @@
# services/external/app/ingestion/adapters/__init__.py
"""
Adapter registry - Maps city IDs to adapter implementations
"""
from typing import Dict, Type
from ..base_adapter import CityDataAdapter
from .madrid_adapter import MadridAdapter
ADAPTER_REGISTRY: Dict[str, Type[CityDataAdapter]] = {
"madrid": MadridAdapter,
}
def get_adapter(city_id: str, config: Dict) -> CityDataAdapter:
"""Factory to instantiate appropriate adapter"""
adapter_class = ADAPTER_REGISTRY.get(city_id)
if not adapter_class:
raise ValueError(f"No adapter registered for city: {city_id}")
return adapter_class(city_id, config)

View File

@@ -0,0 +1,131 @@
# services/external/app/ingestion/adapters/madrid_adapter.py
"""
Madrid city data adapter - Uses existing AEMET and Madrid OpenData clients
"""
from typing import List, Dict, Any
from datetime import datetime
import structlog
from ..base_adapter import CityDataAdapter
from app.external.aemet import AEMETClient
from app.external.apis.madrid_traffic_client import MadridTrafficClient
logger = structlog.get_logger()
class MadridAdapter(CityDataAdapter):
"""Adapter for Madrid using AEMET + Madrid OpenData"""
def __init__(self, city_id: str, config: Dict[str, Any]):
super().__init__(city_id, config)
self.aemet_client = AEMETClient()
self.traffic_client = MadridTrafficClient()
self.madrid_lat = 40.4168
self.madrid_lon = -3.7038
async def fetch_historical_weather(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical weather from AEMET"""
try:
logger.info(
"Fetching Madrid historical weather",
start=start_date.isoformat(),
end=end_date.isoformat()
)
weather_data = await self.aemet_client.get_historical_weather(
self.madrid_lat,
self.madrid_lon,
start_date,
end_date
)
for record in weather_data:
record['city_id'] = self.city_id
record['city_name'] = 'Madrid'
logger.info(
"Madrid weather data fetched",
records=len(weather_data)
)
return weather_data
except Exception as e:
logger.error("Error fetching Madrid weather", error=str(e))
return []
async def fetch_historical_traffic(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical traffic from Madrid OpenData"""
try:
logger.info(
"Fetching Madrid historical traffic",
start=start_date.isoformat(),
end=end_date.isoformat()
)
traffic_data = await self.traffic_client.get_historical_traffic(
self.madrid_lat,
self.madrid_lon,
start_date,
end_date
)
for record in traffic_data:
record['city_id'] = self.city_id
record['city_name'] = 'Madrid'
logger.info(
"Madrid traffic data fetched",
records=len(traffic_data)
)
return traffic_data
except Exception as e:
logger.error("Error fetching Madrid traffic", error=str(e))
return []
async def validate_connection(self) -> bool:
"""Validate connection to AEMET and Madrid OpenData
Note: Validation is lenient - passes if traffic API works.
AEMET rate limits may cause weather validation to fail during initialization.
"""
try:
test_traffic = await self.traffic_client.get_current_traffic(
self.madrid_lat,
self.madrid_lon
)
# Traffic API must work (critical for operations)
if test_traffic is None:
logger.error("Traffic API validation failed - this is critical")
return False
# Try weather API, but don't fail validation if rate limited
test_weather = await self.aemet_client.get_current_weather(
self.madrid_lat,
self.madrid_lon
)
if test_weather is None:
logger.warning("Weather API validation failed (likely rate limited) - proceeding anyway")
else:
logger.info("Weather API validation successful")
# Pass validation if traffic works (weather can be fetched later)
return True
except Exception as e:
logger.error("Madrid adapter connection validation failed", error=str(e))
return False

View File

@@ -0,0 +1,43 @@
# services/external/app/ingestion/base_adapter.py
"""
Base adapter interface for city-specific data sources
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any
from datetime import datetime
class CityDataAdapter(ABC):
"""Abstract base class for city-specific data adapters"""
def __init__(self, city_id: str, config: Dict[str, Any]):
self.city_id = city_id
self.config = config
@abstractmethod
async def fetch_historical_weather(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical weather data for date range"""
pass
@abstractmethod
async def fetch_historical_traffic(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical traffic data for date range"""
pass
@abstractmethod
async def validate_connection(self) -> bool:
"""Validate connection to data source"""
pass
def get_city_id(self) -> str:
"""Get city identifier"""
return self.city_id

View File

@@ -0,0 +1,268 @@
# services/external/app/ingestion/ingestion_manager.py
"""
Data Ingestion Manager - Coordinates multi-city data collection
"""
from typing import List, Dict, Any
from datetime import datetime, timedelta
import structlog
import asyncio
from app.registry.city_registry import CityRegistry
from .adapters import get_adapter
from app.repositories.city_data_repository import CityDataRepository
from app.core.database import database_manager
logger = structlog.get_logger()
class DataIngestionManager:
"""Orchestrates data ingestion across all cities"""
def __init__(self):
self.registry = CityRegistry()
self.database_manager = database_manager
async def initialize_all_cities(self, months: int = 24):
"""
Initialize historical data for all enabled cities
Called by Kubernetes Init Job
"""
enabled_cities = self.registry.get_enabled_cities()
logger.info(
"Starting full data initialization",
cities=len(enabled_cities),
months=months
)
end_date = datetime.now()
start_date = end_date - timedelta(days=months * 30)
tasks = [
self.initialize_city(city.city_id, start_date, end_date)
for city in enabled_cities
]
results = await asyncio.gather(*tasks, return_exceptions=True)
successes = sum(1 for r in results if r is True)
failures = len(results) - successes
logger.info(
"Data initialization complete",
total=len(results),
successes=successes,
failures=failures
)
return successes == len(results)
async def initialize_city(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> bool:
"""Initialize historical data for a single city (idempotent)"""
try:
city = self.registry.get_city(city_id)
if not city:
logger.error("City not found", city_id=city_id)
return False
logger.info(
"Initializing city data",
city=city.name,
start=start_date.date(),
end=end_date.date()
)
# Check if data already exists (idempotency)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
coverage = await repo.get_data_coverage(city_id, start_date, end_date)
days_in_range = (end_date - start_date).days
expected_records = days_in_range # One record per day minimum
# If we have >= 90% coverage, skip initialization
threshold = expected_records * 0.9
weather_sufficient = coverage['weather'] >= threshold
traffic_sufficient = coverage['traffic'] >= threshold
if weather_sufficient and traffic_sufficient:
logger.info(
"City data already initialized, skipping",
city=city.name,
weather_records=coverage['weather'],
traffic_records=coverage['traffic'],
threshold=int(threshold)
)
return True
logger.info(
"Insufficient data coverage, proceeding with initialization",
city=city.name,
existing_weather=coverage['weather'],
existing_traffic=coverage['traffic'],
expected=expected_records
)
adapter = get_adapter(
city_id,
{
"weather_config": city.weather_config,
"traffic_config": city.traffic_config
}
)
if not await adapter.validate_connection():
logger.error("Adapter validation failed", city=city.name)
return False
weather_data = await adapter.fetch_historical_weather(
start_date, end_date
)
traffic_data = await adapter.fetch_historical_traffic(
start_date, end_date
)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
weather_stored = await repo.bulk_store_weather(
city_id, weather_data
)
traffic_stored = await repo.bulk_store_traffic(
city_id, traffic_data
)
logger.info(
"City initialization complete",
city=city.name,
weather_records=weather_stored,
traffic_records=traffic_stored
)
return True
except Exception as e:
logger.error(
"City initialization failed",
city_id=city_id,
error=str(e)
)
return False
async def rotate_monthly_data(self):
"""
Rotate 24-month window: delete old, ingest new
Called by Kubernetes CronJob monthly
"""
enabled_cities = self.registry.get_enabled_cities()
logger.info("Starting monthly data rotation", cities=len(enabled_cities))
now = datetime.now()
cutoff_date = now - timedelta(days=24 * 30)
last_month_end = now.replace(day=1) - timedelta(days=1)
last_month_start = last_month_end.replace(day=1)
tasks = []
for city in enabled_cities:
tasks.append(
self._rotate_city_data(
city.city_id,
cutoff_date,
last_month_start,
last_month_end
)
)
results = await asyncio.gather(*tasks, return_exceptions=True)
successes = sum(1 for r in results if r is True)
logger.info(
"Monthly rotation complete",
total=len(results),
successes=successes
)
async def _rotate_city_data(
self,
city_id: str,
cutoff_date: datetime,
new_start: datetime,
new_end: datetime
) -> bool:
"""Rotate data for a single city"""
try:
city = self.registry.get_city(city_id)
if not city:
return False
logger.info(
"Rotating city data",
city=city.name,
cutoff=cutoff_date.date(),
new_month=new_start.strftime("%Y-%m")
)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
deleted_weather = await repo.delete_weather_before(
city_id, cutoff_date
)
deleted_traffic = await repo.delete_traffic_before(
city_id, cutoff_date
)
logger.info(
"Old data deleted",
city=city.name,
weather_deleted=deleted_weather,
traffic_deleted=deleted_traffic
)
adapter = get_adapter(city_id, {
"weather_config": city.weather_config,
"traffic_config": city.traffic_config
})
new_weather = await adapter.fetch_historical_weather(
new_start, new_end
)
new_traffic = await adapter.fetch_historical_traffic(
new_start, new_end
)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
weather_stored = await repo.bulk_store_weather(
city_id, new_weather
)
traffic_stored = await repo.bulk_store_traffic(
city_id, new_traffic
)
logger.info(
"New data ingested",
city=city.name,
weather_added=weather_stored,
traffic_added=traffic_stored
)
return True
except Exception as e:
logger.error(
"City rotation failed",
city_id=city_id,
error=str(e)
)
return False

View File

@@ -0,0 +1 @@
"""Kubernetes job scripts for data initialization and rotation"""

View File

@@ -0,0 +1,54 @@
# services/external/app/jobs/initialize_data.py
"""
Kubernetes Init Job - Initialize 24-month historical data
"""
import asyncio
import argparse
import sys
import logging
import structlog
from app.ingestion.ingestion_manager import DataIngestionManager
from app.core.database import database_manager
logger = structlog.get_logger()
async def main(months: int = 24):
"""Initialize historical data for all enabled cities"""
logger.info("Starting data initialization job", months=months)
try:
manager = DataIngestionManager()
success = await manager.initialize_all_cities(months=months)
if success:
logger.info("✅ Data initialization completed successfully")
sys.exit(0)
else:
logger.error("❌ Data initialization failed")
sys.exit(1)
except Exception as e:
logger.error("❌ Fatal error during initialization", error=str(e))
sys.exit(1)
finally:
await database_manager.close_connections()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Initialize historical data")
parser.add_argument("--months", type=int, default=24, help="Number of months to load")
parser.add_argument("--log-level", default="INFO", help="Log level")
args = parser.parse_args()
# Convert string log level to logging constant
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(log_level)
)
asyncio.run(main(months=args.months))

View File

@@ -0,0 +1,50 @@
# services/external/app/jobs/rotate_data.py
"""
Kubernetes CronJob - Monthly data rotation (24-month window)
"""
import asyncio
import argparse
import sys
import logging
import structlog
from app.ingestion.ingestion_manager import DataIngestionManager
from app.core.database import database_manager
logger = structlog.get_logger()
async def main():
"""Rotate 24-month data window"""
logger.info("Starting monthly data rotation job")
try:
manager = DataIngestionManager()
await manager.rotate_monthly_data()
logger.info("✅ Data rotation completed successfully")
sys.exit(0)
except Exception as e:
logger.error("❌ Fatal error during rotation", error=str(e))
sys.exit(1)
finally:
await database_manager.close_connections()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Rotate historical data")
parser.add_argument("--log-level", default="INFO", help="Log level")
parser.add_argument("--notify-slack", type=bool, default=False, help="Send Slack notification")
args = parser.parse_args()
# Convert string log level to logging constant
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(log_level)
)
asyncio.run(main())

View File

@@ -10,7 +10,7 @@ from app.core.database import database_manager
from app.services.messaging import setup_messaging, cleanup_messaging
from shared.service_base import StandardFastAPIService
# Include routers
from app.api import weather_data, traffic_data, external_operations
from app.api import weather_data, traffic_data, city_operations
class ExternalService(StandardFastAPIService):
@@ -179,4 +179,4 @@ service.setup_standard_endpoints()
# Include routers
service.add_router(weather_data.router)
service.add_router(traffic_data.router)
service.add_router(external_operations.router)
service.add_router(city_operations.router) # New v2.0 city-based optimized endpoints

View File

@@ -16,6 +16,9 @@ from .weather import (
WeatherForecast,
)
from .city_weather import CityWeatherData
from .city_traffic import CityTrafficData
# List all models for easier access
__all__ = [
# Traffic models
@@ -25,4 +28,7 @@ __all__ = [
# Weather models
"WeatherData",
"WeatherForecast",
# City-based models (new)
"CityWeatherData",
"CityTrafficData",
]

View File

@@ -0,0 +1,36 @@
# services/external/app/models/city_traffic.py
"""
City Traffic Data Model - Shared city-based traffic storage
"""
from sqlalchemy import Column, String, Integer, Float, DateTime, Text, Index
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime
import uuid
from app.core.database import Base
class CityTrafficData(Base):
"""City-based historical traffic data"""
__tablename__ = "city_traffic_data"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
city_id = Column(String(50), nullable=False, index=True)
date = Column(DateTime(timezone=True), nullable=False, index=True)
traffic_volume = Column(Integer, nullable=True)
pedestrian_count = Column(Integer, nullable=True)
congestion_level = Column(String(20), nullable=True)
average_speed = Column(Float, nullable=True)
source = Column(String(50), nullable=False)
raw_data = Column(JSONB, nullable=True)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('idx_city_traffic_lookup', 'city_id', 'date'),
)

View File

@@ -0,0 +1,38 @@
# services/external/app/models/city_weather.py
"""
City Weather Data Model - Shared city-based weather storage
"""
from sqlalchemy import Column, String, Float, DateTime, Text, Index
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime
import uuid
from app.core.database import Base
class CityWeatherData(Base):
"""City-based historical weather data"""
__tablename__ = "city_weather_data"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
city_id = Column(String(50), nullable=False, index=True)
date = Column(DateTime(timezone=True), nullable=False, index=True)
temperature = Column(Float, nullable=True)
precipitation = Column(Float, nullable=True)
humidity = Column(Float, nullable=True)
wind_speed = Column(Float, nullable=True)
pressure = Column(Float, nullable=True)
description = Column(String(200), nullable=True)
source = Column(String(50), nullable=False)
raw_data = Column(JSONB, nullable=True)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('idx_city_weather_lookup', 'city_id', 'date'),
)

View File

@@ -0,0 +1 @@
"""City registry module for multi-city support"""

View File

@@ -0,0 +1,163 @@
# services/external/app/registry/city_registry.py
"""
City Registry - Configuration-driven multi-city support
"""
from dataclasses import dataclass
from typing import List, Optional, Dict, Any
from enum import Enum
import math
class Country(str, Enum):
SPAIN = "ES"
FRANCE = "FR"
class WeatherProvider(str, Enum):
AEMET = "aemet"
METEO_FRANCE = "meteo_france"
OPEN_WEATHER = "open_weather"
class TrafficProvider(str, Enum):
MADRID_OPENDATA = "madrid_opendata"
VALENCIA_OPENDATA = "valencia_opendata"
BARCELONA_OPENDATA = "barcelona_opendata"
@dataclass
class CityDefinition:
"""City configuration with data source specifications"""
city_id: str
name: str
country: Country
latitude: float
longitude: float
radius_km: float
weather_provider: WeatherProvider
weather_config: Dict[str, Any]
traffic_provider: TrafficProvider
traffic_config: Dict[str, Any]
timezone: str
population: int
enabled: bool = True
class CityRegistry:
"""Central registry of supported cities"""
CITIES: List[CityDefinition] = [
CityDefinition(
city_id="madrid",
name="Madrid",
country=Country.SPAIN,
latitude=40.4168,
longitude=-3.7038,
radius_km=30.0,
weather_provider=WeatherProvider.AEMET,
weather_config={
"station_ids": ["3195", "3129", "3197"],
"municipality_code": "28079"
},
traffic_provider=TrafficProvider.MADRID_OPENDATA,
traffic_config={
"current_xml_url": "https://datos.madrid.es/egob/catalogo/...",
"historical_base_url": "https://datos.madrid.es/...",
"measurement_points_csv": "https://datos.madrid.es/..."
},
timezone="Europe/Madrid",
population=3_200_000
),
CityDefinition(
city_id="valencia",
name="Valencia",
country=Country.SPAIN,
latitude=39.4699,
longitude=-0.3763,
radius_km=25.0,
weather_provider=WeatherProvider.AEMET,
weather_config={
"station_ids": ["8416"],
"municipality_code": "46250"
},
traffic_provider=TrafficProvider.VALENCIA_OPENDATA,
traffic_config={
"api_endpoint": "https://valencia.opendatasoft.com/api/..."
},
timezone="Europe/Madrid",
population=800_000,
enabled=False
),
CityDefinition(
city_id="barcelona",
name="Barcelona",
country=Country.SPAIN,
latitude=41.3851,
longitude=2.1734,
radius_km=30.0,
weather_provider=WeatherProvider.AEMET,
weather_config={
"station_ids": ["0076"],
"municipality_code": "08019"
},
traffic_provider=TrafficProvider.BARCELONA_OPENDATA,
traffic_config={
"api_endpoint": "https://opendata-ajuntament.barcelona.cat/..."
},
timezone="Europe/Madrid",
population=1_600_000,
enabled=False
)
]
@classmethod
def get_enabled_cities(cls) -> List[CityDefinition]:
"""Get all enabled cities"""
return [city for city in cls.CITIES if city.enabled]
@classmethod
def get_city(cls, city_id: str) -> Optional[CityDefinition]:
"""Get city by ID"""
for city in cls.CITIES:
if city.city_id == city_id:
return city
return None
@classmethod
def find_nearest_city(cls, latitude: float, longitude: float) -> Optional[CityDefinition]:
"""Find nearest enabled city to coordinates"""
enabled_cities = cls.get_enabled_cities()
if not enabled_cities:
return None
min_distance = float('inf')
nearest_city = None
for city in enabled_cities:
distance = cls._haversine_distance(
latitude, longitude,
city.latitude, city.longitude
)
if distance <= city.radius_km and distance < min_distance:
min_distance = distance
nearest_city = city
return nearest_city
@staticmethod
def _haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance in km between two coordinates"""
R = 6371
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat/2) ** 2 +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon/2) ** 2)
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return R * c

View File

@@ -0,0 +1,58 @@
# services/external/app/registry/geolocation_mapper.py
"""
Geolocation Mapper - Maps tenant locations to cities
"""
from typing import Optional, Tuple
import structlog
from .city_registry import CityRegistry, CityDefinition
logger = structlog.get_logger()
class GeolocationMapper:
"""Maps tenant coordinates to nearest supported city"""
def __init__(self):
self.registry = CityRegistry()
def map_tenant_to_city(
self,
latitude: float,
longitude: float
) -> Optional[Tuple[CityDefinition, float]]:
"""
Map tenant coordinates to nearest city
Returns:
Tuple of (CityDefinition, distance_km) or None if no match
"""
nearest_city = self.registry.find_nearest_city(latitude, longitude)
if not nearest_city:
logger.warning(
"No supported city found for coordinates",
lat=latitude,
lon=longitude
)
return None
distance = self.registry._haversine_distance(
latitude, longitude,
nearest_city.latitude, nearest_city.longitude
)
logger.info(
"Mapped tenant to city",
lat=latitude,
lon=longitude,
city=nearest_city.name,
distance_km=round(distance, 2)
)
return (nearest_city, distance)
def validate_location_support(self, latitude: float, longitude: float) -> bool:
"""Check if coordinates are supported"""
result = self.map_tenant_to_city(latitude, longitude)
return result is not None

View File

@@ -0,0 +1,249 @@
# services/external/app/repositories/city_data_repository.py
"""
City Data Repository - Manages shared city-based data storage
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, delete, and_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.models.city_weather import CityWeatherData
from app.models.city_traffic import CityTrafficData
logger = structlog.get_logger()
class CityDataRepository:
"""Repository for city-based historical data"""
def __init__(self, session: AsyncSession):
self.session = session
async def bulk_store_weather(
self,
city_id: str,
weather_records: List[Dict[str, Any]]
) -> int:
"""Bulk insert weather records for a city"""
if not weather_records:
return 0
try:
objects = []
for record in weather_records:
obj = CityWeatherData(
city_id=city_id,
date=record.get('date'),
temperature=record.get('temperature'),
precipitation=record.get('precipitation'),
humidity=record.get('humidity'),
wind_speed=record.get('wind_speed'),
pressure=record.get('pressure'),
description=record.get('description'),
source=record.get('source', 'ingestion'),
raw_data=record.get('raw_data')
)
objects.append(obj)
self.session.add_all(objects)
await self.session.commit()
logger.info(
"Weather data stored",
city_id=city_id,
records=len(objects)
)
return len(objects)
except Exception as e:
await self.session.rollback()
logger.error(
"Error storing weather data",
city_id=city_id,
error=str(e)
)
raise
async def get_weather_by_city_and_range(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> List[CityWeatherData]:
"""Get weather data for city within date range"""
stmt = select(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date >= start_date,
CityWeatherData.date <= end_date
)
).order_by(CityWeatherData.date)
result = await self.session.execute(stmt)
return result.scalars().all()
async def delete_weather_before(
self,
city_id: str,
cutoff_date: datetime
) -> int:
"""Delete weather records older than cutoff date"""
stmt = delete(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date < cutoff_date
)
)
result = await self.session.execute(stmt)
await self.session.commit()
return result.rowcount
async def bulk_store_traffic(
self,
city_id: str,
traffic_records: List[Dict[str, Any]]
) -> int:
"""Bulk insert traffic records for a city"""
if not traffic_records:
return 0
try:
objects = []
for record in traffic_records:
obj = CityTrafficData(
city_id=city_id,
date=record.get('date'),
traffic_volume=record.get('traffic_volume'),
pedestrian_count=record.get('pedestrian_count'),
congestion_level=record.get('congestion_level'),
average_speed=record.get('average_speed'),
source=record.get('source', 'ingestion'),
raw_data=record.get('raw_data')
)
objects.append(obj)
self.session.add_all(objects)
await self.session.commit()
logger.info(
"Traffic data stored",
city_id=city_id,
records=len(objects)
)
return len(objects)
except Exception as e:
await self.session.rollback()
logger.error(
"Error storing traffic data",
city_id=city_id,
error=str(e)
)
raise
async def get_traffic_by_city_and_range(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> List[CityTrafficData]:
"""Get traffic data for city within date range - aggregated daily"""
from sqlalchemy import func, cast, Date
# Aggregate hourly data to daily averages to avoid loading hundreds of thousands of records
stmt = select(
cast(CityTrafficData.date, Date).label('date'),
func.avg(CityTrafficData.traffic_volume).label('traffic_volume'),
func.avg(CityTrafficData.pedestrian_count).label('pedestrian_count'),
func.avg(CityTrafficData.average_speed).label('average_speed'),
func.max(CityTrafficData.source).label('source')
).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date >= start_date,
CityTrafficData.date <= end_date
)
).group_by(
cast(CityTrafficData.date, Date)
).order_by(
cast(CityTrafficData.date, Date)
)
result = await self.session.execute(stmt)
# Convert aggregated rows to CityTrafficData objects
traffic_records = []
for row in result:
record = CityTrafficData(
city_id=city_id,
date=datetime.combine(row.date, datetime.min.time()),
traffic_volume=int(row.traffic_volume) if row.traffic_volume else None,
pedestrian_count=int(row.pedestrian_count) if row.pedestrian_count else None,
congestion_level='medium', # Default since we're averaging
average_speed=float(row.average_speed) if row.average_speed else None,
source=row.source or 'aggregated'
)
traffic_records.append(record)
return traffic_records
async def delete_traffic_before(
self,
city_id: str,
cutoff_date: datetime
) -> int:
"""Delete traffic records older than cutoff date"""
stmt = delete(CityTrafficData).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date < cutoff_date
)
)
result = await self.session.execute(stmt)
await self.session.commit()
return result.rowcount
async def get_data_coverage(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> Dict[str, int]:
"""
Check how much data exists for a city in a date range
Returns dict with counts: {'weather': X, 'traffic': Y}
"""
# Count weather records
weather_stmt = select(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date >= start_date,
CityWeatherData.date <= end_date
)
)
weather_result = await self.session.execute(weather_stmt)
weather_count = len(weather_result.scalars().all())
# Count traffic records
traffic_stmt = select(CityTrafficData).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date >= start_date,
CityTrafficData.date <= end_date
)
)
traffic_result = await self.session.execute(traffic_stmt)
traffic_count = len(traffic_result.scalars().all())
return {
'weather': weather_count,
'traffic': traffic_count
}

View File

@@ -0,0 +1,36 @@
# services/external/app/schemas/city_data.py
"""
City Data Schemas - New response types for city-based operations
"""
from pydantic import BaseModel, Field
from typing import Optional
class CityInfoResponse(BaseModel):
"""Information about a supported city"""
city_id: str
name: str
country: str
latitude: float
longitude: float
radius_km: float
weather_provider: str
traffic_provider: str
enabled: bool
class DataAvailabilityResponse(BaseModel):
"""Data availability for a city"""
city_id: str
city_name: str
weather_available: bool
weather_start_date: Optional[str] = None
weather_end_date: Optional[str] = None
weather_record_count: int = 0
traffic_available: bool
traffic_start_date: Optional[str] = None
traffic_end_date: Optional[str] = None
traffic_record_count: int = 0

View File

@@ -120,26 +120,6 @@ class WeatherAnalytics(BaseModel):
rainy_days: int = 0
sunny_days: int = 0
class WeatherDataResponse(BaseModel):
date: datetime
temperature: Optional[float]
precipitation: Optional[float]
humidity: Optional[float]
wind_speed: Optional[float]
pressure: Optional[float]
description: Optional[str]
source: str
class WeatherForecastResponse(BaseModel):
forecast_date: datetime
generated_at: datetime
temperature: Optional[float]
precipitation: Optional[float]
humidity: Optional[float]
wind_speed: Optional[float]
description: Optional[str]
source: str
class LocationRequest(BaseModel):
latitude: float
longitude: float
@@ -174,4 +154,20 @@ class HourlyForecastResponse(BaseModel):
wind_speed: Optional[float]
description: Optional[str]
source: str
hour: int
hour: int
class WeatherForecastAPIResponse(BaseModel):
"""Simplified schema for API weather forecast responses (without database fields)"""
forecast_date: datetime = Field(..., description="Date for forecast")
generated_at: datetime = Field(..., description="When forecast was generated")
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Forecasted temperature")
precipitation: Optional[float] = Field(None, ge=0, description="Forecasted precipitation")
humidity: Optional[float] = Field(None, ge=0, le=100, description="Forecasted humidity")
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Forecasted wind speed")
description: Optional[str] = Field(None, max_length=200, description="Forecast description")
source: str = Field("aemet", max_length=50, description="Data source")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}

View File

@@ -9,7 +9,7 @@ import structlog
from app.models.weather import WeatherData, WeatherForecast
from app.external.aemet import AEMETClient
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse, HourlyForecastResponse
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse, WeatherForecastAPIResponse, HourlyForecastResponse
from app.repositories.weather_repository import WeatherRepository
logger = structlog.get_logger()
@@ -58,23 +58,26 @@ class WeatherService:
source="error"
)
async def get_weather_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[WeatherForecastResponse]:
"""Get weather forecast for location"""
async def get_weather_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[Dict[str, Any]]:
"""Get weather forecast for location - returns plain dicts"""
try:
logger.debug("Getting weather forecast", lat=latitude, lon=longitude, days=days)
forecast_data = await self.aemet_client.get_forecast(latitude, longitude, days)
if forecast_data:
logger.debug("Forecast data received", count=len(forecast_data))
# Validate each forecast item before creating response
# Validate and normalize each forecast item
valid_forecasts = []
for item in forecast_data:
try:
if isinstance(item, dict):
# Ensure required fields are present
# Ensure required fields are present and convert to serializable format
forecast_date = item.get("forecast_date", datetime.now())
generated_at = item.get("generated_at", datetime.now())
forecast_item = {
"forecast_date": item.get("forecast_date", datetime.now()),
"generated_at": item.get("generated_at", datetime.now()),
"forecast_date": forecast_date.isoformat() if isinstance(forecast_date, datetime) else str(forecast_date),
"generated_at": generated_at.isoformat() if isinstance(generated_at, datetime) else str(generated_at),
"temperature": float(item.get("temperature", 15.0)),
"precipitation": float(item.get("precipitation", 0.0)),
"humidity": float(item.get("humidity", 50.0)),
@@ -82,19 +85,19 @@ class WeatherService:
"description": str(item.get("description", "Variable")),
"source": str(item.get("source", "unknown"))
}
valid_forecasts.append(WeatherForecastResponse(**forecast_item))
valid_forecasts.append(forecast_item)
else:
logger.warning("Invalid forecast item type", item_type=type(item))
except Exception as item_error:
logger.warning("Error processing forecast item", error=str(item_error), item=item)
continue
logger.debug("Valid forecasts processed", count=len(valid_forecasts))
return valid_forecasts
else:
logger.warning("No forecast data received from AEMET client")
return []
except Exception as e:
logger.error("Failed to get weather forecast", error=str(e), lat=latitude, lon=longitude)
return []