Files
bakery-ia/services/data/app/external/apis/madrid_traffic_client.py

350 lines
15 KiB
Python
Raw Normal View History

2025-08-10 17:31:38 +02:00
# ================================================================
# services/data/app/external/apis/madrid_traffic_client.py
# ================================================================
"""
2025-08-10 18:32:47 +02:00
Madrid traffic client - Orchestration layer only
Coordinates between HTTP client, data processor, and business logic components
2025-08-10 17:31:38 +02:00
"""
from datetime import datetime, timedelta, timezone
2025-08-10 18:32:47 +02:00
from typing import Dict, List, Any, Optional, Tuple
2025-08-10 17:31:38 +02:00
import structlog
from .traffic import BaseTrafficClient, SupportedCity
from ..base_client import BaseAPIClient
2025-08-10 18:32:47 +02:00
from ..clients.madrid_client import MadridTrafficAPIClient
from ..processors.madrid_processor import MadridTrafficDataProcessor
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
from ..models.madrid_models import TrafficRecord, CongestionLevel
2025-08-10 17:31:38 +02:00
from app.core.performance import (
rate_limit,
2025-08-10 18:32:47 +02:00
async_cache,
2025-08-10 17:31:38 +02:00
monitor_performance,
2025-08-10 18:32:47 +02:00
global_performance_monitor
2025-08-10 17:31:38 +02:00
)
class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
"""
2025-08-10 18:32:47 +02:00
Enhanced Madrid traffic client - Orchestration layer
Coordinates HTTP, processing, and business logic components
2025-08-10 17:31:38 +02:00
"""
# Madrid geographic bounds
MADRID_BOUNDS = {
'lat_min': 40.31, 'lat_max': 40.56,
'lon_min': -3.89, 'lon_max': -3.51
}
# Configuration constants
MAX_HISTORICAL_DAYS = 1095 # 3 years
2025-08-10 18:32:47 +02:00
MAX_CSV_PROCESSING_ROWS = 5000000
2025-08-10 17:31:38 +02:00
MEASUREMENT_POINTS_LIMIT = 20
def __init__(self):
BaseTrafficClient.__init__(self, SupportedCity.MADRID)
BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")
2025-08-10 18:32:47 +02:00
# Initialize components
self.api_client = MadridTrafficAPIClient()
self.processor = MadridTrafficDataProcessor()
self.analyzer = MadridTrafficAnalyzer()
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
self.logger = structlog.get_logger()
2025-08-10 17:31:38 +02:00
def supports_location(self, latitude: float, longitude: float) -> bool:
"""Check if location is within Madrid bounds"""
return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])
2025-08-10 18:32:47 +02:00
@rate_limit(calls=30, period=60)
@async_cache(ttl=300)
2025-08-10 17:31:38 +02:00
@monitor_performance(monitor=global_performance_monitor)
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
2025-08-10 18:32:47 +02:00
"""Get current traffic data with enhanced pedestrian inference"""
2025-08-10 17:31:38 +02:00
try:
if not self.supports_location(latitude, longitude):
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
return None
2025-08-10 18:32:47 +02:00
# Fetch XML data
xml_content = await self.api_client.fetch_current_traffic_xml()
if not xml_content:
self.logger.warning("No XML content received")
return None
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Parse XML data
traffic_points = self.processor.parse_traffic_xml(xml_content)
if not traffic_points:
self.logger.warning("No traffic points found in XML")
return None
# Find nearest traffic point
nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
if not nearest_point:
self.logger.warning("No nearby traffic points found")
return None
# Enhance with business logic
enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)
self.logger.info("Current traffic data retrieved",
point_id=nearest_point.get('measurement_point_id'),
distance=enhanced_data.get('distance_km', 0))
return enhanced_data
2025-08-10 17:31:38 +02:00
except Exception as e:
2025-08-10 18:32:47 +02:00
self.logger.error("Error getting current traffic", error=str(e))
2025-08-10 17:31:38 +02:00
return None
2025-08-10 18:32:47 +02:00
@rate_limit(calls=10, period=60)
2025-08-10 17:31:38 +02:00
@monitor_performance(monitor=global_performance_monitor)
async def get_historical_traffic(self, latitude: float, longitude: float,
2025-08-10 18:32:47 +02:00
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical traffic data with pedestrian enhancement"""
2025-08-10 17:31:38 +02:00
try:
if not self.supports_location(latitude, longitude):
2025-08-10 18:32:47 +02:00
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
2025-08-10 17:31:38 +02:00
return []
2025-08-10 18:32:47 +02:00
# Validate date range
if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
self.logger.warning("Date range too large, truncating",
requested_days=(end_date - start_date).days,
max_days=self.MAX_HISTORICAL_DAYS)
start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)
# Fetch measurement points registry
csv_content = await self.api_client.fetch_measurement_points_csv()
if not csv_content:
self.logger.error("Failed to fetch measurement points registry")
2025-08-10 17:31:38 +02:00
return []
2025-08-10 18:32:47 +02:00
# Parse measurement points
measurement_points = self.processor.parse_measurement_points_csv(csv_content)
if not measurement_points:
self.logger.error("No measurement points found")
2025-08-10 17:31:38 +02:00
return []
2025-08-10 18:32:47 +02:00
# Find nearest measurement points
nearest_points = self.analyzer.find_nearest_measurement_points(
measurement_points, latitude, longitude, num_points=3
2025-08-10 17:31:38 +02:00
)
2025-08-10 18:32:47 +02:00
if not nearest_points:
self.logger.warning("No nearby measurement points found")
return []
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Process historical data
historical_records = await self._fetch_historical_data_enhanced(
latitude, longitude, start_date, end_date, nearest_points
2025-08-10 17:31:38 +02:00
)
2025-08-10 18:32:47 +02:00
self.logger.info("Historical traffic data retrieved",
records_count=len(historical_records),
date_range=f"{start_date.date()} to {end_date.date()}")
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
return historical_records
2025-08-10 17:31:38 +02:00
except Exception as e:
2025-08-10 18:32:47 +02:00
self.logger.error("Error getting historical traffic", error=str(e))
return []
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
async def get_events(self, latitude: float, longitude: float,
radius_km: float = 5.0) -> List[Dict[str, Any]]:
"""Get traffic events (incidents, construction, etc.)"""
# Madrid doesn't provide separate events endpoint
# Return enhanced current traffic data as events
current_data = await self.get_current_traffic(latitude, longitude)
if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
return [{
'type': 'congestion',
'severity': current_data.get('congestion_level'),
'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
'location': {
'latitude': current_data.get('latitude'),
'longitude': current_data.get('longitude')
},
'timestamp': current_data.get('timestamp')
}]
return []
async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],
query_lat: float, query_lon: float) -> Dict[str, Any]:
"""Enhance traffic data with business logic and pedestrian inference"""
# Calculate distance
distance_km = self.analyzer.calculate_distance(
query_lat, query_lon,
traffic_point.get('latitude', 0),
traffic_point.get('longitude', 0)
)
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Classify road type
road_type = self.analyzer.classify_road_type(
traffic_point.get('measurement_point_name', '')
)
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Get congestion level
congestion_level = self.analyzer.get_congestion_level(
traffic_point.get('ocupacion', 0)
2025-08-10 17:31:38 +02:00
)
2025-08-10 18:32:47 +02:00
# Create traffic record for pedestrian inference
traffic_record = TrafficRecord(
date=datetime.now(timezone.utc),
traffic_volume=traffic_point.get('intensidad', 0),
occupation_percentage=int(traffic_point.get('ocupacion', 0)),
load_percentage=traffic_point.get('carga', 0),
average_speed=30, # Default speed
congestion_level=congestion_level,
pedestrian_count=0, # Will be calculated
measurement_point_id=traffic_point.get('measurement_point_id', ''),
measurement_point_name=traffic_point.get('measurement_point_name', ''),
road_type=road_type,
source='madrid_current_xml'
2025-08-10 17:31:38 +02:00
)
2025-08-10 18:32:47 +02:00
# Calculate pedestrian count
location_context = {
'latitude': traffic_point.get('latitude'),
'longitude': traffic_point.get('longitude'),
'measurement_point_name': traffic_point.get('measurement_point_name')
2025-08-10 17:31:38 +02:00
}
2025-08-10 18:32:47 +02:00
pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
traffic_record, location_context
)
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Build enhanced response
enhanced_data = {
'timestamp': datetime.now(timezone.utc),
'latitude': traffic_point.get('latitude'),
'longitude': traffic_point.get('longitude'),
'measurement_point_id': traffic_point.get('measurement_point_id'),
'measurement_point_name': traffic_point.get('measurement_point_name'),
'traffic_volume': traffic_point.get('intensidad', 0),
'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
'load_percentage': traffic_point.get('carga', 0),
'congestion_level': congestion_level,
'pedestrian_count': pedestrian_count,
'road_type': road_type,
'distance_km': distance_km,
'source': 'madrid_current_xml',
'city': 'madrid',
'inference_metadata': inference_metadata,
'raw_data': traffic_point
}
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
return enhanced_data
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime,
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
"""Fetch and process historical traffic data"""
historical_records = []
2025-08-10 17:31:38 +02:00
try:
2025-08-10 18:32:47 +02:00
# Process by year and month to avoid memory issues
current_date = start_date.replace(day=1) # Start from beginning of month
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
while current_date <= end_date:
year = current_date.year
month = current_date.month
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Build historical URL
zip_url = self.api_client._build_historical_url(year, month)
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
self.logger.info("Processing historical ZIP file",
year=year, month=month, zip_url=zip_url)
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Fetch ZIP content
zip_content = await self.api_client.fetch_historical_zip(zip_url)
if not zip_content:
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
2025-08-10 17:31:38 +02:00
continue
2025-08-10 18:32:47 +02:00
# Process ZIP content with enhanced parsing
month_records = await self._process_historical_zip_enhanced(
zip_content, zip_url, latitude, longitude, nearest_points
)
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
# Filter by date range
filtered_records = [
record for record in month_records
if start_date <= record.get('date', datetime.min.replace(tzinfo=timezone.utc)) <= end_date
]
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
historical_records.extend(filtered_records)
self.logger.info("Month processing completed",
year=year, month=month,
month_records=len(month_records),
filtered_records=len(filtered_records),
total_records=len(historical_records))
2025-08-10 17:31:38 +02:00
# Move to next month
if current_date.month == 12:
current_date = current_date.replace(year=current_date.year + 1, month=1)
else:
current_date = current_date.replace(month=current_date.month + 1)
2025-08-10 18:32:47 +02:00
return historical_records
2025-08-10 17:31:38 +02:00
except Exception as e:
2025-08-10 18:32:47 +02:00
self.logger.error("Error fetching historical data", error=str(e))
return historical_records # Return partial results
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
latitude: float, longitude: float,
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
"""Process historical ZIP file with enhanced parsing"""
2025-08-10 17:31:38 +02:00
try:
2025-08-10 18:32:47 +02:00
import zipfile
import io
import csv
import gc
2025-08-10 17:31:38 +02:00
historical_records = []
2025-08-10 18:32:47 +02:00
nearest_ids = {p[0] for p in nearest_points}
2025-08-10 17:31:38 +02:00
2025-08-10 18:32:47 +02:00
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
2025-08-10 17:31:38 +02:00
for csv_filename in csv_files:
try:
# Read CSV content
with zip_file.open(csv_filename) as csv_file:
2025-08-10 18:32:47 +02:00
text_content = csv_file.read().decode('utf-8', errors='ignore')
# Process CSV in chunks using processor
csv_records = await self.processor.process_csv_content_chunked(
text_content, csv_filename, nearest_ids, nearest_points
)
historical_records.extend(csv_records)
# Force garbage collection
gc.collect()
2025-08-10 17:31:38 +02:00
except Exception as csv_error:
self.logger.warning("Error processing CSV file",
filename=csv_filename,
error=str(csv_error))
continue
self.logger.info("Historical ZIP processing completed",
zip_url=zip_url,
total_records=len(historical_records))
return historical_records
except Exception as e:
self.logger.error("Error processing historical ZIP file",
zip_url=zip_url, error=str(e))
return []