468 lines
19 KiB
Python
468 lines
19 KiB
Python
# ================================================================
|
|
# services/data/app/services/traffic_service.py
|
|
# ================================================================
|
|
"""
|
|
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
|
|
"""
|
|
|
|
import asyncio
|
|
from datetime import datetime
|
|
from typing import Dict, List, Any, Optional, Tuple
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select, and_
|
|
import structlog
|
|
|
|
from app.external.apis.traffic import UniversalTrafficClient
|
|
from app.models.traffic import TrafficData
|
|
from app.core.performance import (
|
|
async_cache,
|
|
monitor_performance,
|
|
global_connection_pool,
|
|
global_performance_monitor,
|
|
batch_process
|
|
)
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class TrafficService:
|
|
"""
|
|
Abstracted traffic service providing unified interface for traffic data
|
|
Routes requests to appropriate city-specific clients automatically
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.universal_client = UniversalTrafficClient()
|
|
self.logger = structlog.get_logger(__name__)
|
|
|
|
@async_cache(ttl=300) # Cache for 5 minutes
|
|
@monitor_performance(monitor=global_performance_monitor)
|
|
async def get_current_traffic(
|
|
self,
|
|
latitude: float,
|
|
longitude: float,
|
|
tenant_id: Optional[str] = None
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get current traffic data for any supported location
|
|
|
|
Args:
|
|
latitude: Query location latitude
|
|
longitude: Query location longitude
|
|
tenant_id: Optional tenant identifier for logging/analytics
|
|
|
|
Returns:
|
|
Dict with current traffic data or None if not available
|
|
"""
|
|
try:
|
|
self.logger.info("Getting current traffic data",
|
|
lat=latitude, lon=longitude, tenant_id=tenant_id)
|
|
|
|
# Delegate to universal client
|
|
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
|
|
|
|
if traffic_data:
|
|
# Add service metadata
|
|
traffic_data['service_metadata'] = {
|
|
'request_timestamp': datetime.now().isoformat(),
|
|
'tenant_id': tenant_id,
|
|
'service_version': '2.0',
|
|
'query_location': {'latitude': latitude, 'longitude': longitude}
|
|
}
|
|
|
|
self.logger.info("Successfully retrieved current traffic data",
|
|
lat=latitude, lon=longitude,
|
|
source=traffic_data.get('source', 'unknown'))
|
|
|
|
return traffic_data
|
|
else:
|
|
self.logger.warning("No current traffic data available",
|
|
lat=latitude, lon=longitude)
|
|
return None
|
|
|
|
except Exception as e:
|
|
self.logger.error("Error getting current traffic data",
|
|
lat=latitude, lon=longitude, error=str(e))
|
|
return None
|
|
|
|
@async_cache(ttl=1800) # Cache for 30 minutes (historical data changes less frequently)
|
|
@monitor_performance(monitor=global_performance_monitor)
|
|
async def get_historical_traffic(
|
|
self,
|
|
latitude: float,
|
|
longitude: float,
|
|
start_date: datetime,
|
|
end_date: datetime,
|
|
tenant_id: Optional[str] = None,
|
|
db: Optional[AsyncSession] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get historical traffic data for any supported location with database storage
|
|
|
|
Args:
|
|
latitude: Query location latitude
|
|
longitude: Query location longitude
|
|
start_date: Start date for historical data
|
|
end_date: End date for historical data
|
|
tenant_id: Optional tenant identifier
|
|
db: Optional database session for storage
|
|
|
|
Returns:
|
|
List of historical traffic data dictionaries
|
|
"""
|
|
try:
|
|
self.logger.info("Getting historical traffic data",
|
|
lat=latitude, lon=longitude,
|
|
start=start_date, end=end_date, tenant_id=tenant_id)
|
|
|
|
# Validate date range
|
|
if start_date >= end_date:
|
|
self.logger.warning("Invalid date range", start=start_date, end=end_date)
|
|
return []
|
|
|
|
location_id = f"{latitude:.4f},{longitude:.4f}"
|
|
|
|
# Check database first if session provided
|
|
if db:
|
|
stmt = select(TrafficData).where(
|
|
and_(
|
|
TrafficData.location_id == location_id,
|
|
TrafficData.date >= start_date,
|
|
TrafficData.date <= end_date
|
|
)
|
|
).order_by(TrafficData.date)
|
|
|
|
result = await db.execute(stmt)
|
|
db_records = result.scalars().all()
|
|
|
|
if db_records:
|
|
self.logger.info("Historical traffic data found in database",
|
|
count=len(db_records))
|
|
return [self._convert_db_record_to_dict(record) for record in db_records]
|
|
|
|
# Delegate to universal client
|
|
traffic_data = await self.universal_client.get_historical_traffic(
|
|
latitude, longitude, start_date, end_date
|
|
)
|
|
|
|
if traffic_data:
|
|
# Add service metadata to each record
|
|
for record in traffic_data:
|
|
record['service_metadata'] = {
|
|
'request_timestamp': datetime.now().isoformat(),
|
|
'tenant_id': tenant_id,
|
|
'service_version': '2.0',
|
|
'query_location': {'latitude': latitude, 'longitude': longitude},
|
|
'date_range': {
|
|
'start': start_date.isoformat(),
|
|
'end': end_date.isoformat()
|
|
}
|
|
}
|
|
|
|
# Store in database if session provided
|
|
if db:
|
|
stored_count = await self._store_traffic_data_batch(
|
|
traffic_data, location_id, db
|
|
)
|
|
self.logger.info("Traffic data stored for re-training",
|
|
fetched=len(traffic_data), stored=stored_count,
|
|
location=location_id)
|
|
|
|
self.logger.info("Successfully retrieved historical traffic data",
|
|
lat=latitude, lon=longitude, records=len(traffic_data))
|
|
|
|
return traffic_data
|
|
else:
|
|
self.logger.info("No historical traffic data available",
|
|
lat=latitude, lon=longitude)
|
|
return []
|
|
|
|
except Exception as e:
|
|
self.logger.error("Error getting historical traffic data",
|
|
lat=latitude, lon=longitude, error=str(e))
|
|
return []
|
|
|
|
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
|
|
"""Convert database record to dictionary format"""
|
|
return {
|
|
'date': record.date,
|
|
'traffic_volume': record.traffic_volume,
|
|
'pedestrian_count': record.pedestrian_count,
|
|
'congestion_level': record.congestion_level,
|
|
'average_speed': record.average_speed,
|
|
'source': record.source,
|
|
'location_id': record.location_id,
|
|
'raw_data': record.raw_data
|
|
}
|
|
|
|
async def get_traffic_events(
|
|
self,
|
|
latitude: float,
|
|
longitude: float,
|
|
radius_km: float = 5.0,
|
|
tenant_id: Optional[str] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get traffic events and incidents for any supported location
|
|
|
|
Args:
|
|
latitude: Query location latitude
|
|
longitude: Query location longitude
|
|
radius_km: Search radius in kilometers
|
|
tenant_id: Optional tenant identifier
|
|
|
|
Returns:
|
|
List of traffic events
|
|
"""
|
|
try:
|
|
self.logger.info("Getting traffic events",
|
|
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
|
|
|
|
# Delegate to universal client
|
|
events = await self.universal_client.get_events(latitude, longitude, radius_km)
|
|
|
|
# Add metadata to events
|
|
for event in events:
|
|
event['service_metadata'] = {
|
|
'request_timestamp': datetime.now().isoformat(),
|
|
'tenant_id': tenant_id,
|
|
'service_version': '2.0',
|
|
'query_location': {'latitude': latitude, 'longitude': longitude},
|
|
'search_radius_km': radius_km
|
|
}
|
|
|
|
self.logger.info("Retrieved traffic events",
|
|
lat=latitude, lon=longitude, events=len(events))
|
|
|
|
return events
|
|
|
|
except Exception as e:
|
|
self.logger.error("Error getting traffic events",
|
|
lat=latitude, lon=longitude, error=str(e))
|
|
return []
|
|
|
|
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
|
|
"""
|
|
Get information about traffic data availability for location
|
|
|
|
Args:
|
|
latitude: Query location latitude
|
|
longitude: Query location longitude
|
|
|
|
Returns:
|
|
Dict with location support information
|
|
"""
|
|
try:
|
|
info = self.universal_client.get_location_info(latitude, longitude)
|
|
|
|
# Add service layer information
|
|
info['service_layer'] = {
|
|
'version': '2.0',
|
|
'abstraction_level': 'universal',
|
|
'supported_operations': [
|
|
'current_traffic',
|
|
'historical_traffic',
|
|
'traffic_events',
|
|
'bulk_requests'
|
|
]
|
|
}
|
|
|
|
return info
|
|
|
|
except Exception as e:
|
|
self.logger.error("Error getting location info",
|
|
lat=latitude, lon=longitude, error=str(e))
|
|
return {
|
|
'supported': False,
|
|
'error': str(e),
|
|
'service_layer': {'version': '2.0'}
|
|
}
|
|
|
|
async def store_traffic_data(self,
|
|
latitude: float,
|
|
longitude: float,
|
|
traffic_data: Dict[str, Any],
|
|
db: AsyncSession) -> bool:
|
|
"""Store single traffic data record to database"""
|
|
try:
|
|
location_id = f"{latitude:.4f},{longitude:.4f}"
|
|
|
|
traffic_record = TrafficData(
|
|
location_id=location_id,
|
|
date=traffic_data.get("date", datetime.now()),
|
|
traffic_volume=traffic_data.get("traffic_volume"),
|
|
pedestrian_count=traffic_data.get("pedestrian_count"),
|
|
congestion_level=traffic_data.get("congestion_level"),
|
|
average_speed=traffic_data.get("average_speed"),
|
|
source=traffic_data.get("source", "madrid_opendata"),
|
|
raw_data=str(traffic_data) if traffic_data else None
|
|
)
|
|
|
|
db.add(traffic_record)
|
|
await db.commit()
|
|
|
|
logger.debug("Traffic data stored successfully", location_id=location_id)
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to store traffic data", error=str(e))
|
|
await db.rollback()
|
|
return False
|
|
|
|
async def _store_traffic_data_batch(self,
|
|
traffic_data: List[Dict[str, Any]],
|
|
location_id: str,
|
|
db: AsyncSession) -> int:
|
|
"""Store batch of traffic data with enhanced validation and duplicate handling"""
|
|
stored_count = 0
|
|
|
|
try:
|
|
# Check for existing records to avoid duplicates
|
|
if traffic_data:
|
|
dates = [data.get('date') for data in traffic_data if data.get('date')]
|
|
if dates:
|
|
# Query existing records for this location and date range
|
|
existing_stmt = select(TrafficData.date).where(
|
|
and_(
|
|
TrafficData.location_id == location_id,
|
|
TrafficData.date.in_(dates)
|
|
)
|
|
)
|
|
result = await db.execute(existing_stmt)
|
|
existing_dates = {row[0] for row in result.fetchall()}
|
|
|
|
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
|
|
else:
|
|
existing_dates = set()
|
|
else:
|
|
existing_dates = set()
|
|
|
|
# Prepare batch of new records for bulk insert
|
|
batch_records = []
|
|
for data in traffic_data:
|
|
try:
|
|
record_date = data.get('date')
|
|
if not record_date or record_date in existing_dates:
|
|
continue # Skip duplicates
|
|
|
|
# Validate required fields
|
|
if not self._validate_traffic_data(data):
|
|
logger.warning("Invalid traffic data, skipping", data=data)
|
|
continue
|
|
|
|
# Prepare record data for bulk insert
|
|
record_data = {
|
|
'location_id': location_id,
|
|
'date': record_date,
|
|
'traffic_volume': data.get('traffic_volume'),
|
|
'pedestrian_count': data.get('pedestrian_count'),
|
|
'congestion_level': data.get('congestion_level'),
|
|
'average_speed': data.get('average_speed'),
|
|
'source': data.get('source', 'madrid_opendata'),
|
|
'raw_data': str(data)
|
|
}
|
|
batch_records.append(record_data)
|
|
|
|
except Exception as record_error:
|
|
logger.warning("Failed to prepare traffic record",
|
|
error=str(record_error), data=data)
|
|
continue
|
|
|
|
# Use efficient bulk insert instead of individual records
|
|
if batch_records:
|
|
# Process in chunks to avoid memory issues
|
|
chunk_size = 5000
|
|
for i in range(0, len(batch_records), chunk_size):
|
|
chunk = batch_records[i:i + chunk_size]
|
|
|
|
# Use SQLAlchemy bulk insert for maximum performance
|
|
await db.execute(
|
|
TrafficData.__table__.insert(),
|
|
chunk
|
|
)
|
|
await db.commit()
|
|
stored_count += len(chunk)
|
|
|
|
logger.debug(f"Bulk inserted {len(chunk)} records (total: {stored_count})")
|
|
|
|
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to store traffic data batch",
|
|
error=str(e), location_id=location_id)
|
|
await db.rollback()
|
|
|
|
return stored_count
|
|
|
|
def _validate_traffic_data(self, data: Dict[str, Any]) -> bool:
|
|
"""Validate traffic data before storage"""
|
|
required_fields = ['date']
|
|
|
|
# Check required fields
|
|
for field in required_fields:
|
|
if not data.get(field):
|
|
return False
|
|
|
|
# Validate data types and ranges
|
|
traffic_volume = data.get('traffic_volume')
|
|
if traffic_volume is not None and (traffic_volume < 0 or traffic_volume > 10000):
|
|
return False
|
|
|
|
pedestrian_count = data.get('pedestrian_count')
|
|
if pedestrian_count is not None and (pedestrian_count < 0 or pedestrian_count > 10000):
|
|
return False
|
|
|
|
average_speed = data.get('average_speed')
|
|
if average_speed is not None and (average_speed < 0 or average_speed > 200):
|
|
return False
|
|
|
|
congestion_level = data.get('congestion_level')
|
|
if congestion_level and congestion_level not in ['low', 'medium', 'high', 'blocked']:
|
|
return False
|
|
|
|
return True
|
|
|
|
async def get_stored_traffic_for_training(self,
|
|
latitude: float,
|
|
longitude: float,
|
|
start_date: datetime,
|
|
end_date: datetime,
|
|
db: AsyncSession) -> List[Dict[str, Any]]:
|
|
"""Retrieve stored traffic data specifically for training purposes"""
|
|
try:
|
|
location_id = f"{latitude:.4f},{longitude:.4f}"
|
|
|
|
stmt = select(TrafficData).where(
|
|
and_(
|
|
TrafficData.location_id == location_id,
|
|
TrafficData.date >= start_date,
|
|
TrafficData.date <= end_date
|
|
)
|
|
).order_by(TrafficData.date)
|
|
|
|
result = await db.execute(stmt)
|
|
records = result.scalars().all()
|
|
|
|
# Convert to training format
|
|
training_data = []
|
|
for record in records:
|
|
training_data.append({
|
|
'date': record.date,
|
|
'traffic_volume': record.traffic_volume,
|
|
'pedestrian_count': record.pedestrian_count,
|
|
'congestion_level': record.congestion_level,
|
|
'average_speed': record.average_speed,
|
|
'location_id': record.location_id,
|
|
'source': record.source,
|
|
'measurement_point_id': record.raw_data # Contains additional metadata
|
|
})
|
|
|
|
logger.info(f"Retrieved {len(training_data)} traffic records for training",
|
|
location_id=location_id, start=start_date, end=end_date)
|
|
|
|
return training_data
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to retrieve traffic data for training",
|
|
error=str(e), location_id=location_id)
|
|
return [] |