Improve the traffic fetching system
This commit is contained in:
@@ -1,122 +1,283 @@
|
||||
# ================================================================
|
||||
# services/data/app/services/traffic_service.py - FIXED VERSION
|
||||
# services/data/app/services/traffic_service.py
|
||||
# ================================================================
|
||||
"""Traffic data service with improved error handling"""
|
||||
"""
|
||||
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_
|
||||
import structlog
|
||||
|
||||
from app.external.apis.traffic import UniversalTrafficClient
|
||||
from app.models.traffic import TrafficData
|
||||
from app.external.madrid_opendata import MadridOpenDataClient
|
||||
from app.schemas.external import TrafficDataResponse
|
||||
|
||||
import uuid
|
||||
from app.core.performance import (
|
||||
async_cache,
|
||||
monitor_performance,
|
||||
global_connection_pool,
|
||||
global_performance_monitor,
|
||||
batch_process
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class TrafficService:
|
||||
"""
|
||||
Abstracted traffic service providing unified interface for traffic data
|
||||
Routes requests to appropriate city-specific clients automatically
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.madrid_client = MadridOpenDataClient()
|
||||
self.universal_client = UniversalTrafficClient()
|
||||
self.logger = structlog.get_logger(__name__)
|
||||
|
||||
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[TrafficDataResponse]:
|
||||
"""Get current traffic data for location"""
|
||||
@async_cache(ttl=300) # Cache for 5 minutes
|
||||
@monitor_performance(monitor=global_performance_monitor)
|
||||
async def get_current_traffic(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get current traffic data for any supported location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
tenant_id: Optional tenant identifier for logging/analytics
|
||||
|
||||
Returns:
|
||||
Dict with current traffic data or None if not available
|
||||
"""
|
||||
try:
|
||||
logger.debug("Getting current traffic", lat=latitude, lon=longitude)
|
||||
traffic_data = await self.madrid_client.get_current_traffic(latitude, longitude)
|
||||
self.logger.info("Getting current traffic data",
|
||||
lat=latitude, lon=longitude, tenant_id=tenant_id)
|
||||
|
||||
# Delegate to universal client
|
||||
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
|
||||
|
||||
if traffic_data:
|
||||
logger.debug("Traffic data received", source=traffic_data.get('source'))
|
||||
# Add service metadata
|
||||
traffic_data['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude}
|
||||
}
|
||||
|
||||
# Validate and clean traffic data before creating response
|
||||
# Use keyword arguments instead of unpacking
|
||||
response = TrafficDataResponse(
|
||||
date=traffic_data.get("date", datetime.now()),
|
||||
traffic_volume=int(traffic_data.get("traffic_volume", 100)),
|
||||
pedestrian_count=int(traffic_data.get("pedestrian_count", 150)),
|
||||
congestion_level=str(traffic_data.get("congestion_level", "medium")),
|
||||
average_speed=float(traffic_data.get("average_speed", 25.0)), # Fixed: use float, not int
|
||||
source=str(traffic_data.get("source", "unknown"))
|
||||
)
|
||||
self.logger.info("Successfully retrieved current traffic data",
|
||||
lat=latitude, lon=longitude,
|
||||
source=traffic_data.get('source', 'unknown'))
|
||||
|
||||
logger.debug("Successfully created traffic response",
|
||||
traffic_volume=response.traffic_volume,
|
||||
congestion_level=response.congestion_level)
|
||||
return response
|
||||
return traffic_data
|
||||
else:
|
||||
logger.warning("No traffic data received from Madrid client")
|
||||
self.logger.warning("No current traffic data available",
|
||||
lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get current traffic", error=str(e), lat=latitude, lon=longitude)
|
||||
# Log the full traceback for debugging
|
||||
import traceback
|
||||
logger.error("Traffic service traceback", traceback=traceback.format_exc())
|
||||
self.logger.error("Error getting current traffic data",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return None
|
||||
|
||||
async def get_historical_traffic(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
db: AsyncSession) -> List[TrafficDataResponse]:
|
||||
"""Get historical traffic data with enhanced storage for re-training"""
|
||||
@async_cache(ttl=1800) # Cache for 30 minutes (historical data changes less frequently)
|
||||
@monitor_performance(monitor=global_performance_monitor)
|
||||
async def get_historical_traffic(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
tenant_id: Optional[str] = None,
|
||||
db: Optional[AsyncSession] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get historical traffic data for any supported location with database storage
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
start_date: Start date for historical data
|
||||
end_date: End date for historical data
|
||||
tenant_id: Optional tenant identifier
|
||||
db: Optional database session for storage
|
||||
|
||||
Returns:
|
||||
List of historical traffic data dictionaries
|
||||
"""
|
||||
try:
|
||||
logger.debug("Getting historical traffic",
|
||||
lat=latitude, lon=longitude,
|
||||
start=start_date, end=end_date)
|
||||
self.logger.info("Getting historical traffic data",
|
||||
lat=latitude, lon=longitude,
|
||||
start=start_date, end=end_date, tenant_id=tenant_id)
|
||||
|
||||
# Validate date range
|
||||
if start_date >= end_date:
|
||||
self.logger.warning("Invalid date range", start=start_date, end=end_date)
|
||||
return []
|
||||
|
||||
# Check database first
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
stmt = select(TrafficData).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date >= start_date,
|
||||
TrafficData.date <= end_date
|
||||
)
|
||||
).order_by(TrafficData.date)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
db_records = result.scalars().all()
|
||||
# Check database first if session provided
|
||||
if db:
|
||||
stmt = select(TrafficData).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date >= start_date,
|
||||
TrafficData.date <= end_date
|
||||
)
|
||||
).order_by(TrafficData.date)
|
||||
|
||||
result = await db.execute(stmt)
|
||||
db_records = result.scalars().all()
|
||||
|
||||
if db_records:
|
||||
self.logger.info("Historical traffic data found in database",
|
||||
count=len(db_records))
|
||||
return [self._convert_db_record_to_dict(record) for record in db_records]
|
||||
|
||||
if db_records:
|
||||
logger.debug("Historical traffic data found in database", count=len(db_records))
|
||||
return [TrafficDataResponse(
|
||||
date=record.date,
|
||||
traffic_volume=record.traffic_volume,
|
||||
pedestrian_count=record.pedestrian_count,
|
||||
congestion_level=record.congestion_level,
|
||||
average_speed=record.average_speed,
|
||||
source=record.source
|
||||
) for record in db_records]
|
||||
|
||||
# If not in database, fetch from API and store
|
||||
logger.debug("Fetching historical data from MADRID OPEN DATA")
|
||||
traffic_data = await self.madrid_client.get_historical_traffic(
|
||||
# Delegate to universal client
|
||||
traffic_data = await self.universal_client.get_historical_traffic(
|
||||
latitude, longitude, start_date, end_date
|
||||
)
|
||||
|
||||
if traffic_data:
|
||||
# Enhanced storage with better error handling and validation
|
||||
stored_count = await self._store_traffic_data_batch(
|
||||
traffic_data, location_id, db
|
||||
)
|
||||
logger.info("Traffic data stored for re-training",
|
||||
fetched=len(traffic_data), stored=stored_count, location=location_id)
|
||||
|
||||
return [TrafficDataResponse(**item) for item in traffic_data]
|
||||
# Add service metadata to each record
|
||||
for record in traffic_data:
|
||||
record['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'date_range': {
|
||||
'start': start_date.isoformat(),
|
||||
'end': end_date.isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
# Store in database if session provided
|
||||
if db:
|
||||
stored_count = await self._store_traffic_data_batch(
|
||||
traffic_data, location_id, db
|
||||
)
|
||||
self.logger.info("Traffic data stored for re-training",
|
||||
fetched=len(traffic_data), stored=stored_count,
|
||||
location=location_id)
|
||||
|
||||
self.logger.info("Successfully retrieved historical traffic data",
|
||||
lat=latitude, lon=longitude, records=len(traffic_data))
|
||||
|
||||
return traffic_data
|
||||
else:
|
||||
logger.warning("No historical traffic data received")
|
||||
self.logger.info("No historical traffic data available",
|
||||
lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get historical traffic", error=str(e))
|
||||
self.logger.error("Error getting historical traffic data",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
|
||||
"""Convert database record to dictionary format"""
|
||||
return {
|
||||
'date': record.date,
|
||||
'traffic_volume': record.traffic_volume,
|
||||
'pedestrian_count': record.pedestrian_count,
|
||||
'congestion_level': record.congestion_level,
|
||||
'average_speed': record.average_speed,
|
||||
'source': record.source,
|
||||
'location_id': record.location_id,
|
||||
'raw_data': record.raw_data
|
||||
}
|
||||
|
||||
async def get_traffic_events(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
radius_km: float = 5.0,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get traffic events and incidents for any supported location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
radius_km: Search radius in kilometers
|
||||
tenant_id: Optional tenant identifier
|
||||
|
||||
Returns:
|
||||
List of traffic events
|
||||
"""
|
||||
try:
|
||||
self.logger.info("Getting traffic events",
|
||||
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
|
||||
|
||||
# Delegate to universal client
|
||||
events = await self.universal_client.get_events(latitude, longitude, radius_km)
|
||||
|
||||
# Add metadata to events
|
||||
for event in events:
|
||||
event['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'search_radius_km': radius_km
|
||||
}
|
||||
|
||||
self.logger.info("Retrieved traffic events",
|
||||
lat=latitude, lon=longitude, events=len(events))
|
||||
|
||||
return events
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error getting traffic events",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about traffic data availability for location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
|
||||
Returns:
|
||||
Dict with location support information
|
||||
"""
|
||||
try:
|
||||
info = self.universal_client.get_location_info(latitude, longitude)
|
||||
|
||||
# Add service layer information
|
||||
info['service_layer'] = {
|
||||
'version': '2.0',
|
||||
'abstraction_level': 'universal',
|
||||
'supported_operations': [
|
||||
'current_traffic',
|
||||
'historical_traffic',
|
||||
'traffic_events',
|
||||
'bulk_requests'
|
||||
]
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error getting location info",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return {
|
||||
'supported': False,
|
||||
'error': str(e),
|
||||
'service_layer': {'version': '2.0'}
|
||||
}
|
||||
|
||||
async def store_traffic_data(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
@@ -176,7 +337,8 @@ class TrafficService:
|
||||
else:
|
||||
existing_dates = set()
|
||||
|
||||
# Store only new records
|
||||
# Prepare batch of new records for bulk insert
|
||||
batch_records = []
|
||||
for data in traffic_data:
|
||||
try:
|
||||
record_date = data.get('date')
|
||||
@@ -188,32 +350,41 @@ class TrafficService:
|
||||
logger.warning("Invalid traffic data, skipping", data=data)
|
||||
continue
|
||||
|
||||
traffic_record = TrafficData(
|
||||
location_id=location_id,
|
||||
date=record_date,
|
||||
traffic_volume=data.get('traffic_volume'),
|
||||
pedestrian_count=data.get('pedestrian_count'),
|
||||
congestion_level=data.get('congestion_level'),
|
||||
average_speed=data.get('average_speed'),
|
||||
source=data.get('source', 'madrid_opendata'),
|
||||
raw_data=str(data)
|
||||
)
|
||||
|
||||
db.add(traffic_record)
|
||||
stored_count += 1
|
||||
|
||||
# Commit in batches to avoid memory issues
|
||||
if stored_count % 100 == 0:
|
||||
await db.commit()
|
||||
logger.debug(f"Committed batch of {stored_count} records")
|
||||
# Prepare record data for bulk insert
|
||||
record_data = {
|
||||
'location_id': location_id,
|
||||
'date': record_date,
|
||||
'traffic_volume': data.get('traffic_volume'),
|
||||
'pedestrian_count': data.get('pedestrian_count'),
|
||||
'congestion_level': data.get('congestion_level'),
|
||||
'average_speed': data.get('average_speed'),
|
||||
'source': data.get('source', 'madrid_opendata'),
|
||||
'raw_data': str(data)
|
||||
}
|
||||
batch_records.append(record_data)
|
||||
|
||||
except Exception as record_error:
|
||||
logger.warning("Failed to store individual traffic record",
|
||||
logger.warning("Failed to prepare traffic record",
|
||||
error=str(record_error), data=data)
|
||||
continue
|
||||
|
||||
# Final commit
|
||||
await db.commit()
|
||||
# Use efficient bulk insert instead of individual records
|
||||
if batch_records:
|
||||
# Process in chunks to avoid memory issues
|
||||
chunk_size = 5000
|
||||
for i in range(0, len(batch_records), chunk_size):
|
||||
chunk = batch_records[i:i + chunk_size]
|
||||
|
||||
# Use SQLAlchemy bulk insert for maximum performance
|
||||
await db.execute(
|
||||
TrafficData.__table__.insert(),
|
||||
chunk
|
||||
)
|
||||
await db.commit()
|
||||
stored_count += len(chunk)
|
||||
|
||||
logger.debug(f"Bulk inserted {len(chunk)} records (total: {stored_count})")
|
||||
|
||||
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user