Files
bakery-ia/services/data/app/services/traffic_service.py
2025-08-10 17:31:38 +02:00

468 lines
19 KiB
Python

# ================================================================
# services/data/app/services/traffic_service.py
# ================================================================
"""
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
"""
import asyncio
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_
import structlog
from app.external.apis.traffic import UniversalTrafficClient
from app.models.traffic import TrafficData
from app.core.performance import (
async_cache,
monitor_performance,
global_connection_pool,
global_performance_monitor,
batch_process
)
logger = structlog.get_logger()
class TrafficService:
"""
Abstracted traffic service providing unified interface for traffic data
Routes requests to appropriate city-specific clients automatically
"""
def __init__(self):
self.universal_client = UniversalTrafficClient()
self.logger = structlog.get_logger(__name__)
@async_cache(ttl=300) # Cache for 5 minutes
@monitor_performance(monitor=global_performance_monitor)
async def get_current_traffic(
self,
latitude: float,
longitude: float,
tenant_id: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Get current traffic data for any supported location
Args:
latitude: Query location latitude
longitude: Query location longitude
tenant_id: Optional tenant identifier for logging/analytics
Returns:
Dict with current traffic data or None if not available
"""
try:
self.logger.info("Getting current traffic data",
lat=latitude, lon=longitude, tenant_id=tenant_id)
# Delegate to universal client
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
if traffic_data:
# Add service metadata
traffic_data['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude}
}
self.logger.info("Successfully retrieved current traffic data",
lat=latitude, lon=longitude,
source=traffic_data.get('source', 'unknown'))
return traffic_data
else:
self.logger.warning("No current traffic data available",
lat=latitude, lon=longitude)
return None
except Exception as e:
self.logger.error("Error getting current traffic data",
lat=latitude, lon=longitude, error=str(e))
return None
@async_cache(ttl=1800) # Cache for 30 minutes (historical data changes less frequently)
@monitor_performance(monitor=global_performance_monitor)
async def get_historical_traffic(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None,
db: Optional[AsyncSession] = None
) -> List[Dict[str, Any]]:
"""
Get historical traffic data for any supported location with database storage
Args:
latitude: Query location latitude
longitude: Query location longitude
start_date: Start date for historical data
end_date: End date for historical data
tenant_id: Optional tenant identifier
db: Optional database session for storage
Returns:
List of historical traffic data dictionaries
"""
try:
self.logger.info("Getting historical traffic data",
lat=latitude, lon=longitude,
start=start_date, end=end_date, tenant_id=tenant_id)
# Validate date range
if start_date >= end_date:
self.logger.warning("Invalid date range", start=start_date, end=end_date)
return []
location_id = f"{latitude:.4f},{longitude:.4f}"
# Check database first if session provided
if db:
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= start_date,
TrafficData.date <= end_date
)
).order_by(TrafficData.date)
result = await db.execute(stmt)
db_records = result.scalars().all()
if db_records:
self.logger.info("Historical traffic data found in database",
count=len(db_records))
return [self._convert_db_record_to_dict(record) for record in db_records]
# Delegate to universal client
traffic_data = await self.universal_client.get_historical_traffic(
latitude, longitude, start_date, end_date
)
if traffic_data:
# Add service metadata to each record
for record in traffic_data:
record['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'date_range': {
'start': start_date.isoformat(),
'end': end_date.isoformat()
}
}
# Store in database if session provided
if db:
stored_count = await self._store_traffic_data_batch(
traffic_data, location_id, db
)
self.logger.info("Traffic data stored for re-training",
fetched=len(traffic_data), stored=stored_count,
location=location_id)
self.logger.info("Successfully retrieved historical traffic data",
lat=latitude, lon=longitude, records=len(traffic_data))
return traffic_data
else:
self.logger.info("No historical traffic data available",
lat=latitude, lon=longitude)
return []
except Exception as e:
self.logger.error("Error getting historical traffic data",
lat=latitude, lon=longitude, error=str(e))
return []
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
"""Convert database record to dictionary format"""
return {
'date': record.date,
'traffic_volume': record.traffic_volume,
'pedestrian_count': record.pedestrian_count,
'congestion_level': record.congestion_level,
'average_speed': record.average_speed,
'source': record.source,
'location_id': record.location_id,
'raw_data': record.raw_data
}
async def get_traffic_events(
self,
latitude: float,
longitude: float,
radius_km: float = 5.0,
tenant_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Get traffic events and incidents for any supported location
Args:
latitude: Query location latitude
longitude: Query location longitude
radius_km: Search radius in kilometers
tenant_id: Optional tenant identifier
Returns:
List of traffic events
"""
try:
self.logger.info("Getting traffic events",
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
# Delegate to universal client
events = await self.universal_client.get_events(latitude, longitude, radius_km)
# Add metadata to events
for event in events:
event['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'search_radius_km': radius_km
}
self.logger.info("Retrieved traffic events",
lat=latitude, lon=longitude, events=len(events))
return events
except Exception as e:
self.logger.error("Error getting traffic events",
lat=latitude, lon=longitude, error=str(e))
return []
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
"""
Get information about traffic data availability for location
Args:
latitude: Query location latitude
longitude: Query location longitude
Returns:
Dict with location support information
"""
try:
info = self.universal_client.get_location_info(latitude, longitude)
# Add service layer information
info['service_layer'] = {
'version': '2.0',
'abstraction_level': 'universal',
'supported_operations': [
'current_traffic',
'historical_traffic',
'traffic_events',
'bulk_requests'
]
}
return info
except Exception as e:
self.logger.error("Error getting location info",
lat=latitude, lon=longitude, error=str(e))
return {
'supported': False,
'error': str(e),
'service_layer': {'version': '2.0'}
}
async def store_traffic_data(self,
latitude: float,
longitude: float,
traffic_data: Dict[str, Any],
db: AsyncSession) -> bool:
"""Store single traffic data record to database"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
traffic_record = TrafficData(
location_id=location_id,
date=traffic_data.get("date", datetime.now()),
traffic_volume=traffic_data.get("traffic_volume"),
pedestrian_count=traffic_data.get("pedestrian_count"),
congestion_level=traffic_data.get("congestion_level"),
average_speed=traffic_data.get("average_speed"),
source=traffic_data.get("source", "madrid_opendata"),
raw_data=str(traffic_data) if traffic_data else None
)
db.add(traffic_record)
await db.commit()
logger.debug("Traffic data stored successfully", location_id=location_id)
return True
except Exception as e:
logger.error("Failed to store traffic data", error=str(e))
await db.rollback()
return False
async def _store_traffic_data_batch(self,
traffic_data: List[Dict[str, Any]],
location_id: str,
db: AsyncSession) -> int:
"""Store batch of traffic data with enhanced validation and duplicate handling"""
stored_count = 0
try:
# Check for existing records to avoid duplicates
if traffic_data:
dates = [data.get('date') for data in traffic_data if data.get('date')]
if dates:
# Query existing records for this location and date range
existing_stmt = select(TrafficData.date).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date.in_(dates)
)
)
result = await db.execute(existing_stmt)
existing_dates = {row[0] for row in result.fetchall()}
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
else:
existing_dates = set()
else:
existing_dates = set()
# Prepare batch of new records for bulk insert
batch_records = []
for data in traffic_data:
try:
record_date = data.get('date')
if not record_date or record_date in existing_dates:
continue # Skip duplicates
# Validate required fields
if not self._validate_traffic_data(data):
logger.warning("Invalid traffic data, skipping", data=data)
continue
# Prepare record data for bulk insert
record_data = {
'location_id': location_id,
'date': record_date,
'traffic_volume': data.get('traffic_volume'),
'pedestrian_count': data.get('pedestrian_count'),
'congestion_level': data.get('congestion_level'),
'average_speed': data.get('average_speed'),
'source': data.get('source', 'madrid_opendata'),
'raw_data': str(data)
}
batch_records.append(record_data)
except Exception as record_error:
logger.warning("Failed to prepare traffic record",
error=str(record_error), data=data)
continue
# Use efficient bulk insert instead of individual records
if batch_records:
# Process in chunks to avoid memory issues
chunk_size = 5000
for i in range(0, len(batch_records), chunk_size):
chunk = batch_records[i:i + chunk_size]
# Use SQLAlchemy bulk insert for maximum performance
await db.execute(
TrafficData.__table__.insert(),
chunk
)
await db.commit()
stored_count += len(chunk)
logger.debug(f"Bulk inserted {len(chunk)} records (total: {stored_count})")
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
except Exception as e:
logger.error("Failed to store traffic data batch",
error=str(e), location_id=location_id)
await db.rollback()
return stored_count
def _validate_traffic_data(self, data: Dict[str, Any]) -> bool:
"""Validate traffic data before storage"""
required_fields = ['date']
# Check required fields
for field in required_fields:
if not data.get(field):
return False
# Validate data types and ranges
traffic_volume = data.get('traffic_volume')
if traffic_volume is not None and (traffic_volume < 0 or traffic_volume > 10000):
return False
pedestrian_count = data.get('pedestrian_count')
if pedestrian_count is not None and (pedestrian_count < 0 or pedestrian_count > 10000):
return False
average_speed = data.get('average_speed')
if average_speed is not None and (average_speed < 0 or average_speed > 200):
return False
congestion_level = data.get('congestion_level')
if congestion_level and congestion_level not in ['low', 'medium', 'high', 'blocked']:
return False
return True
async def get_stored_traffic_for_training(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
db: AsyncSession) -> List[Dict[str, Any]]:
"""Retrieve stored traffic data specifically for training purposes"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= start_date,
TrafficData.date <= end_date
)
).order_by(TrafficData.date)
result = await db.execute(stmt)
records = result.scalars().all()
# Convert to training format
training_data = []
for record in records:
training_data.append({
'date': record.date,
'traffic_volume': record.traffic_volume,
'pedestrian_count': record.pedestrian_count,
'congestion_level': record.congestion_level,
'average_speed': record.average_speed,
'location_id': record.location_id,
'source': record.source,
'measurement_point_id': record.raw_data # Contains additional metadata
})
logger.info(f"Retrieved {len(training_data)} traffic records for training",
location_id=location_id, start=start_date, end=end_date)
return training_data
except Exception as e:
logger.error("Failed to retrieve traffic data for training",
error=str(e), location_id=location_id)
return []