Improve the traffic fetching system

This commit is contained in:
Urtzi Alfaro
2025-08-10 17:31:38 +02:00
parent 312fdc8ef3
commit 3c2acc934a
16 changed files with 3866 additions and 1981 deletions

View File

@@ -1,122 +1,283 @@
# ================================================================
# services/data/app/services/traffic_service.py - FIXED VERSION
# services/data/app/services/traffic_service.py
# ================================================================
"""Traffic data service with improved error handling"""
"""
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
"""
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
import asyncio
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_
import structlog
from app.external.apis.traffic import UniversalTrafficClient
from app.models.traffic import TrafficData
from app.external.madrid_opendata import MadridOpenDataClient
from app.schemas.external import TrafficDataResponse
import uuid
from app.core.performance import (
async_cache,
monitor_performance,
global_connection_pool,
global_performance_monitor,
batch_process
)
logger = structlog.get_logger()
class TrafficService:
"""
Abstracted traffic service providing unified interface for traffic data
Routes requests to appropriate city-specific clients automatically
"""
def __init__(self):
self.madrid_client = MadridOpenDataClient()
self.universal_client = UniversalTrafficClient()
self.logger = structlog.get_logger(__name__)
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[TrafficDataResponse]:
"""Get current traffic data for location"""
@async_cache(ttl=300) # Cache for 5 minutes
@monitor_performance(monitor=global_performance_monitor)
async def get_current_traffic(
self,
latitude: float,
longitude: float,
tenant_id: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Get current traffic data for any supported location
Args:
latitude: Query location latitude
longitude: Query location longitude
tenant_id: Optional tenant identifier for logging/analytics
Returns:
Dict with current traffic data or None if not available
"""
try:
logger.debug("Getting current traffic", lat=latitude, lon=longitude)
traffic_data = await self.madrid_client.get_current_traffic(latitude, longitude)
self.logger.info("Getting current traffic data",
lat=latitude, lon=longitude, tenant_id=tenant_id)
# Delegate to universal client
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
if traffic_data:
logger.debug("Traffic data received", source=traffic_data.get('source'))
# Add service metadata
traffic_data['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude}
}
# Validate and clean traffic data before creating response
# Use keyword arguments instead of unpacking
response = TrafficDataResponse(
date=traffic_data.get("date", datetime.now()),
traffic_volume=int(traffic_data.get("traffic_volume", 100)),
pedestrian_count=int(traffic_data.get("pedestrian_count", 150)),
congestion_level=str(traffic_data.get("congestion_level", "medium")),
average_speed=float(traffic_data.get("average_speed", 25.0)), # Fixed: use float, not int
source=str(traffic_data.get("source", "unknown"))
)
self.logger.info("Successfully retrieved current traffic data",
lat=latitude, lon=longitude,
source=traffic_data.get('source', 'unknown'))
logger.debug("Successfully created traffic response",
traffic_volume=response.traffic_volume,
congestion_level=response.congestion_level)
return response
return traffic_data
else:
logger.warning("No traffic data received from Madrid client")
self.logger.warning("No current traffic data available",
lat=latitude, lon=longitude)
return None
except Exception as e:
logger.error("Failed to get current traffic", error=str(e), lat=latitude, lon=longitude)
# Log the full traceback for debugging
import traceback
logger.error("Traffic service traceback", traceback=traceback.format_exc())
self.logger.error("Error getting current traffic data",
lat=latitude, lon=longitude, error=str(e))
return None
async def get_historical_traffic(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
db: AsyncSession) -> List[TrafficDataResponse]:
"""Get historical traffic data with enhanced storage for re-training"""
@async_cache(ttl=1800) # Cache for 30 minutes (historical data changes less frequently)
@monitor_performance(monitor=global_performance_monitor)
async def get_historical_traffic(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None,
db: Optional[AsyncSession] = None
) -> List[Dict[str, Any]]:
"""
Get historical traffic data for any supported location with database storage
Args:
latitude: Query location latitude
longitude: Query location longitude
start_date: Start date for historical data
end_date: End date for historical data
tenant_id: Optional tenant identifier
db: Optional database session for storage
Returns:
List of historical traffic data dictionaries
"""
try:
logger.debug("Getting historical traffic",
lat=latitude, lon=longitude,
start=start_date, end=end_date)
self.logger.info("Getting historical traffic data",
lat=latitude, lon=longitude,
start=start_date, end=end_date, tenant_id=tenant_id)
# Validate date range
if start_date >= end_date:
self.logger.warning("Invalid date range", start=start_date, end=end_date)
return []
# Check database first
location_id = f"{latitude:.4f},{longitude:.4f}"
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= start_date,
TrafficData.date <= end_date
)
).order_by(TrafficData.date)
result = await db.execute(stmt)
db_records = result.scalars().all()
# Check database first if session provided
if db:
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= start_date,
TrafficData.date <= end_date
)
).order_by(TrafficData.date)
result = await db.execute(stmt)
db_records = result.scalars().all()
if db_records:
self.logger.info("Historical traffic data found in database",
count=len(db_records))
return [self._convert_db_record_to_dict(record) for record in db_records]
if db_records:
logger.debug("Historical traffic data found in database", count=len(db_records))
return [TrafficDataResponse(
date=record.date,
traffic_volume=record.traffic_volume,
pedestrian_count=record.pedestrian_count,
congestion_level=record.congestion_level,
average_speed=record.average_speed,
source=record.source
) for record in db_records]
# If not in database, fetch from API and store
logger.debug("Fetching historical data from MADRID OPEN DATA")
traffic_data = await self.madrid_client.get_historical_traffic(
# Delegate to universal client
traffic_data = await self.universal_client.get_historical_traffic(
latitude, longitude, start_date, end_date
)
if traffic_data:
# Enhanced storage with better error handling and validation
stored_count = await self._store_traffic_data_batch(
traffic_data, location_id, db
)
logger.info("Traffic data stored for re-training",
fetched=len(traffic_data), stored=stored_count, location=location_id)
return [TrafficDataResponse(**item) for item in traffic_data]
# Add service metadata to each record
for record in traffic_data:
record['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'date_range': {
'start': start_date.isoformat(),
'end': end_date.isoformat()
}
}
# Store in database if session provided
if db:
stored_count = await self._store_traffic_data_batch(
traffic_data, location_id, db
)
self.logger.info("Traffic data stored for re-training",
fetched=len(traffic_data), stored=stored_count,
location=location_id)
self.logger.info("Successfully retrieved historical traffic data",
lat=latitude, lon=longitude, records=len(traffic_data))
return traffic_data
else:
logger.warning("No historical traffic data received")
self.logger.info("No historical traffic data available",
lat=latitude, lon=longitude)
return []
except Exception as e:
logger.error("Failed to get historical traffic", error=str(e))
self.logger.error("Error getting historical traffic data",
lat=latitude, lon=longitude, error=str(e))
return []
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
"""Convert database record to dictionary format"""
return {
'date': record.date,
'traffic_volume': record.traffic_volume,
'pedestrian_count': record.pedestrian_count,
'congestion_level': record.congestion_level,
'average_speed': record.average_speed,
'source': record.source,
'location_id': record.location_id,
'raw_data': record.raw_data
}
async def get_traffic_events(
self,
latitude: float,
longitude: float,
radius_km: float = 5.0,
tenant_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Get traffic events and incidents for any supported location
Args:
latitude: Query location latitude
longitude: Query location longitude
radius_km: Search radius in kilometers
tenant_id: Optional tenant identifier
Returns:
List of traffic events
"""
try:
self.logger.info("Getting traffic events",
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
# Delegate to universal client
events = await self.universal_client.get_events(latitude, longitude, radius_km)
# Add metadata to events
for event in events:
event['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'search_radius_km': radius_km
}
self.logger.info("Retrieved traffic events",
lat=latitude, lon=longitude, events=len(events))
return events
except Exception as e:
self.logger.error("Error getting traffic events",
lat=latitude, lon=longitude, error=str(e))
return []
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
"""
Get information about traffic data availability for location
Args:
latitude: Query location latitude
longitude: Query location longitude
Returns:
Dict with location support information
"""
try:
info = self.universal_client.get_location_info(latitude, longitude)
# Add service layer information
info['service_layer'] = {
'version': '2.0',
'abstraction_level': 'universal',
'supported_operations': [
'current_traffic',
'historical_traffic',
'traffic_events',
'bulk_requests'
]
}
return info
except Exception as e:
self.logger.error("Error getting location info",
lat=latitude, lon=longitude, error=str(e))
return {
'supported': False,
'error': str(e),
'service_layer': {'version': '2.0'}
}
async def store_traffic_data(self,
latitude: float,
longitude: float,
@@ -176,7 +337,8 @@ class TrafficService:
else:
existing_dates = set()
# Store only new records
# Prepare batch of new records for bulk insert
batch_records = []
for data in traffic_data:
try:
record_date = data.get('date')
@@ -188,32 +350,41 @@ class TrafficService:
logger.warning("Invalid traffic data, skipping", data=data)
continue
traffic_record = TrafficData(
location_id=location_id,
date=record_date,
traffic_volume=data.get('traffic_volume'),
pedestrian_count=data.get('pedestrian_count'),
congestion_level=data.get('congestion_level'),
average_speed=data.get('average_speed'),
source=data.get('source', 'madrid_opendata'),
raw_data=str(data)
)
db.add(traffic_record)
stored_count += 1
# Commit in batches to avoid memory issues
if stored_count % 100 == 0:
await db.commit()
logger.debug(f"Committed batch of {stored_count} records")
# Prepare record data for bulk insert
record_data = {
'location_id': location_id,
'date': record_date,
'traffic_volume': data.get('traffic_volume'),
'pedestrian_count': data.get('pedestrian_count'),
'congestion_level': data.get('congestion_level'),
'average_speed': data.get('average_speed'),
'source': data.get('source', 'madrid_opendata'),
'raw_data': str(data)
}
batch_records.append(record_data)
except Exception as record_error:
logger.warning("Failed to store individual traffic record",
logger.warning("Failed to prepare traffic record",
error=str(record_error), data=data)
continue
# Final commit
await db.commit()
# Use efficient bulk insert instead of individual records
if batch_records:
# Process in chunks to avoid memory issues
chunk_size = 5000
for i in range(0, len(batch_records), chunk_size):
chunk = batch_records[i:i + chunk_size]
# Use SQLAlchemy bulk insert for maximum performance
await db.execute(
TrafficData.__table__.insert(),
chunk
)
await db.commit()
stored_count += len(chunk)
logger.debug(f"Bulk inserted {len(chunk)} records (total: {stored_count})")
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
except Exception as e: