2025-08-10 17:31:38 +02:00
|
|
|
# ================================================================
|
|
|
|
|
# services/data/app/external/apis/madrid_traffic_client.py
|
|
|
|
|
# ================================================================
|
|
|
|
|
"""
|
2025-08-10 18:32:47 +02:00
|
|
|
Madrid traffic client - Orchestration layer only
|
|
|
|
|
Coordinates between HTTP client, data processor, and business logic components
|
2025-08-10 17:31:38 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from datetime import datetime, timedelta, timezone
|
2025-08-10 18:32:47 +02:00
|
|
|
from typing import Dict, List, Any, Optional, Tuple
|
2025-08-10 17:31:38 +02:00
|
|
|
import structlog
|
|
|
|
|
|
|
|
|
|
from .traffic import BaseTrafficClient, SupportedCity
|
|
|
|
|
from ..base_client import BaseAPIClient
|
2025-08-10 18:32:47 +02:00
|
|
|
from ..clients.madrid_client import MadridTrafficAPIClient
|
|
|
|
|
from ..processors.madrid_processor import MadridTrafficDataProcessor
|
|
|
|
|
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
|
|
|
|
|
from ..models.madrid_models import TrafficRecord, CongestionLevel
|
2025-08-10 17:31:38 +02:00
|
|
|
from app.core.performance import (
|
|
|
|
|
rate_limit,
|
2025-08-10 18:32:47 +02:00
|
|
|
async_cache,
|
2025-08-10 17:31:38 +02:00
|
|
|
monitor_performance,
|
2025-08-10 18:32:47 +02:00
|
|
|
global_performance_monitor
|
2025-08-10 17:31:38 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
|
|
|
|
"""
|
2025-08-10 18:32:47 +02:00
|
|
|
Enhanced Madrid traffic client - Orchestration layer
|
|
|
|
|
Coordinates HTTP, processing, and business logic components
|
2025-08-10 17:31:38 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# Madrid geographic bounds
|
|
|
|
|
MADRID_BOUNDS = {
|
|
|
|
|
'lat_min': 40.31, 'lat_max': 40.56,
|
|
|
|
|
'lon_min': -3.89, 'lon_max': -3.51
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Configuration constants
|
|
|
|
|
MAX_HISTORICAL_DAYS = 1095 # 3 years
|
2025-08-10 18:32:47 +02:00
|
|
|
MAX_CSV_PROCESSING_ROWS = 5000000
|
2025-08-10 17:31:38 +02:00
|
|
|
MEASUREMENT_POINTS_LIMIT = 20
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
BaseTrafficClient.__init__(self, SupportedCity.MADRID)
|
|
|
|
|
BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Initialize components
|
|
|
|
|
self.api_client = MadridTrafficAPIClient()
|
|
|
|
|
self.processor = MadridTrafficDataProcessor()
|
|
|
|
|
self.analyzer = MadridTrafficAnalyzer()
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
self.logger = structlog.get_logger()
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
def supports_location(self, latitude: float, longitude: float) -> bool:
|
|
|
|
|
"""Check if location is within Madrid bounds"""
|
|
|
|
|
return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
|
|
|
|
|
self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
@rate_limit(calls=30, period=60)
|
|
|
|
|
@async_cache(ttl=300)
|
2025-08-10 17:31:38 +02:00
|
|
|
@monitor_performance(monitor=global_performance_monitor)
|
|
|
|
|
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
2025-08-10 18:32:47 +02:00
|
|
|
"""Get current traffic data with enhanced pedestrian inference"""
|
2025-08-10 17:31:38 +02:00
|
|
|
try:
|
|
|
|
|
if not self.supports_location(latitude, longitude):
|
|
|
|
|
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
|
|
|
|
|
return None
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Fetch XML data
|
|
|
|
|
xml_content = await self.api_client.fetch_current_traffic_xml()
|
|
|
|
|
if not xml_content:
|
|
|
|
|
self.logger.warning("No XML content received")
|
|
|
|
|
return None
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Parse XML data
|
|
|
|
|
traffic_points = self.processor.parse_traffic_xml(xml_content)
|
|
|
|
|
if not traffic_points:
|
|
|
|
|
self.logger.warning("No traffic points found in XML")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Find nearest traffic point
|
|
|
|
|
nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
|
|
|
|
|
if not nearest_point:
|
|
|
|
|
self.logger.warning("No nearby traffic points found")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Enhance with business logic
|
|
|
|
|
enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)
|
|
|
|
|
|
|
|
|
|
self.logger.info("Current traffic data retrieved",
|
|
|
|
|
point_id=nearest_point.get('measurement_point_id'),
|
|
|
|
|
distance=enhanced_data.get('distance_km', 0))
|
|
|
|
|
|
|
|
|
|
return enhanced_data
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-08-10 18:32:47 +02:00
|
|
|
self.logger.error("Error getting current traffic", error=str(e))
|
2025-08-10 17:31:38 +02:00
|
|
|
return None
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
@rate_limit(calls=10, period=60)
|
2025-08-10 17:31:38 +02:00
|
|
|
@monitor_performance(monitor=global_performance_monitor)
|
|
|
|
|
async def get_historical_traffic(self, latitude: float, longitude: float,
|
2025-08-10 18:32:47 +02:00
|
|
|
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
|
|
|
|
"""Get historical traffic data with pedestrian enhancement"""
|
2025-08-10 17:31:38 +02:00
|
|
|
try:
|
|
|
|
|
if not self.supports_location(latitude, longitude):
|
2025-08-10 18:32:47 +02:00
|
|
|
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
|
2025-08-10 17:31:38 +02:00
|
|
|
return []
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Validate date range
|
|
|
|
|
if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
|
|
|
|
|
self.logger.warning("Date range too large, truncating",
|
|
|
|
|
requested_days=(end_date - start_date).days,
|
|
|
|
|
max_days=self.MAX_HISTORICAL_DAYS)
|
|
|
|
|
start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)
|
|
|
|
|
|
|
|
|
|
# Fetch measurement points registry
|
|
|
|
|
csv_content = await self.api_client.fetch_measurement_points_csv()
|
|
|
|
|
if not csv_content:
|
|
|
|
|
self.logger.error("Failed to fetch measurement points registry")
|
2025-08-10 17:31:38 +02:00
|
|
|
return []
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Parse measurement points
|
|
|
|
|
measurement_points = self.processor.parse_measurement_points_csv(csv_content)
|
|
|
|
|
if not measurement_points:
|
|
|
|
|
self.logger.error("No measurement points found")
|
2025-08-10 17:31:38 +02:00
|
|
|
return []
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Find nearest measurement points
|
|
|
|
|
nearest_points = self.analyzer.find_nearest_measurement_points(
|
|
|
|
|
measurement_points, latitude, longitude, num_points=3
|
2025-08-10 17:31:38 +02:00
|
|
|
)
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
if not nearest_points:
|
|
|
|
|
self.logger.warning("No nearby measurement points found")
|
|
|
|
|
return []
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Process historical data
|
|
|
|
|
historical_records = await self._fetch_historical_data_enhanced(
|
|
|
|
|
latitude, longitude, start_date, end_date, nearest_points
|
2025-08-10 17:31:38 +02:00
|
|
|
)
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
self.logger.info("Historical traffic data retrieved",
|
|
|
|
|
records_count=len(historical_records),
|
|
|
|
|
date_range=f"{start_date.date()} to {end_date.date()}")
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
return historical_records
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-08-10 18:32:47 +02:00
|
|
|
self.logger.error("Error getting historical traffic", error=str(e))
|
|
|
|
|
return []
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
async def get_events(self, latitude: float, longitude: float,
|
|
|
|
|
radius_km: float = 5.0) -> List[Dict[str, Any]]:
|
|
|
|
|
"""Get traffic events (incidents, construction, etc.)"""
|
|
|
|
|
# Madrid doesn't provide separate events endpoint
|
|
|
|
|
# Return enhanced current traffic data as events
|
|
|
|
|
current_data = await self.get_current_traffic(latitude, longitude)
|
|
|
|
|
if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
|
|
|
|
|
return [{
|
|
|
|
|
'type': 'congestion',
|
|
|
|
|
'severity': current_data.get('congestion_level'),
|
|
|
|
|
'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
|
|
|
|
|
'location': {
|
|
|
|
|
'latitude': current_data.get('latitude'),
|
|
|
|
|
'longitude': current_data.get('longitude')
|
|
|
|
|
},
|
|
|
|
|
'timestamp': current_data.get('timestamp')
|
|
|
|
|
}]
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],
|
|
|
|
|
query_lat: float, query_lon: float) -> Dict[str, Any]:
|
|
|
|
|
"""Enhance traffic data with business logic and pedestrian inference"""
|
|
|
|
|
# Calculate distance
|
|
|
|
|
distance_km = self.analyzer.calculate_distance(
|
|
|
|
|
query_lat, query_lon,
|
|
|
|
|
traffic_point.get('latitude', 0),
|
|
|
|
|
traffic_point.get('longitude', 0)
|
|
|
|
|
)
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Classify road type
|
|
|
|
|
road_type = self.analyzer.classify_road_type(
|
|
|
|
|
traffic_point.get('measurement_point_name', '')
|
|
|
|
|
)
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Get congestion level
|
|
|
|
|
congestion_level = self.analyzer.get_congestion_level(
|
|
|
|
|
traffic_point.get('ocupacion', 0)
|
2025-08-10 17:31:38 +02:00
|
|
|
)
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Create traffic record for pedestrian inference
|
|
|
|
|
traffic_record = TrafficRecord(
|
|
|
|
|
date=datetime.now(timezone.utc),
|
|
|
|
|
traffic_volume=traffic_point.get('intensidad', 0),
|
|
|
|
|
occupation_percentage=int(traffic_point.get('ocupacion', 0)),
|
|
|
|
|
load_percentage=traffic_point.get('carga', 0),
|
|
|
|
|
average_speed=30, # Default speed
|
|
|
|
|
congestion_level=congestion_level,
|
|
|
|
|
pedestrian_count=0, # Will be calculated
|
|
|
|
|
measurement_point_id=traffic_point.get('measurement_point_id', ''),
|
|
|
|
|
measurement_point_name=traffic_point.get('measurement_point_name', ''),
|
|
|
|
|
road_type=road_type,
|
|
|
|
|
source='madrid_current_xml'
|
2025-08-10 17:31:38 +02:00
|
|
|
)
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Calculate pedestrian count
|
|
|
|
|
location_context = {
|
|
|
|
|
'latitude': traffic_point.get('latitude'),
|
|
|
|
|
'longitude': traffic_point.get('longitude'),
|
|
|
|
|
'measurement_point_name': traffic_point.get('measurement_point_name')
|
2025-08-10 17:31:38 +02:00
|
|
|
}
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
|
|
|
|
|
traffic_record, location_context
|
|
|
|
|
)
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Build enhanced response
|
|
|
|
|
enhanced_data = {
|
|
|
|
|
'timestamp': datetime.now(timezone.utc),
|
|
|
|
|
'latitude': traffic_point.get('latitude'),
|
|
|
|
|
'longitude': traffic_point.get('longitude'),
|
|
|
|
|
'measurement_point_id': traffic_point.get('measurement_point_id'),
|
|
|
|
|
'measurement_point_name': traffic_point.get('measurement_point_name'),
|
|
|
|
|
'traffic_volume': traffic_point.get('intensidad', 0),
|
|
|
|
|
'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
|
|
|
|
|
'load_percentage': traffic_point.get('carga', 0),
|
|
|
|
|
'congestion_level': congestion_level,
|
|
|
|
|
'pedestrian_count': pedestrian_count,
|
|
|
|
|
'road_type': road_type,
|
|
|
|
|
'distance_km': distance_km,
|
|
|
|
|
'source': 'madrid_current_xml',
|
|
|
|
|
'city': 'madrid',
|
|
|
|
|
'inference_metadata': inference_metadata,
|
|
|
|
|
'raw_data': traffic_point
|
|
|
|
|
}
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
return enhanced_data
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
|
|
|
|
|
start_date: datetime, end_date: datetime,
|
|
|
|
|
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
|
|
|
|
|
"""Fetch and process historical traffic data"""
|
|
|
|
|
historical_records = []
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
try:
|
2025-08-10 18:32:47 +02:00
|
|
|
# Process by year and month to avoid memory issues
|
|
|
|
|
current_date = start_date.replace(day=1) # Start from beginning of month
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
while current_date <= end_date:
|
|
|
|
|
year = current_date.year
|
|
|
|
|
month = current_date.month
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Build historical URL
|
|
|
|
|
zip_url = self.api_client._build_historical_url(year, month)
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
self.logger.info("Processing historical ZIP file",
|
|
|
|
|
year=year, month=month, zip_url=zip_url)
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Fetch ZIP content
|
|
|
|
|
zip_content = await self.api_client.fetch_historical_zip(zip_url)
|
|
|
|
|
if not zip_content:
|
|
|
|
|
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
|
|
|
|
|
current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
|
2025-08-10 17:31:38 +02:00
|
|
|
continue
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Process ZIP content with enhanced parsing
|
|
|
|
|
month_records = await self._process_historical_zip_enhanced(
|
|
|
|
|
zip_content, zip_url, latitude, longitude, nearest_points
|
|
|
|
|
)
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
# Filter by date range
|
|
|
|
|
filtered_records = [
|
|
|
|
|
record for record in month_records
|
|
|
|
|
if start_date <= record.get('date', datetime.min.replace(tzinfo=timezone.utc)) <= end_date
|
|
|
|
|
]
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
historical_records.extend(filtered_records)
|
|
|
|
|
|
|
|
|
|
self.logger.info("Month processing completed",
|
|
|
|
|
year=year, month=month,
|
|
|
|
|
month_records=len(month_records),
|
|
|
|
|
filtered_records=len(filtered_records),
|
|
|
|
|
total_records=len(historical_records))
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
# Move to next month
|
|
|
|
|
if current_date.month == 12:
|
|
|
|
|
current_date = current_date.replace(year=current_date.year + 1, month=1)
|
|
|
|
|
else:
|
|
|
|
|
current_date = current_date.replace(month=current_date.month + 1)
|
|
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
return historical_records
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-08-10 18:32:47 +02:00
|
|
|
self.logger.error("Error fetching historical data", error=str(e))
|
|
|
|
|
return historical_records # Return partial results
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
|
|
|
|
|
latitude: float, longitude: float,
|
|
|
|
|
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
|
|
|
|
|
"""Process historical ZIP file with enhanced parsing"""
|
2025-08-10 17:31:38 +02:00
|
|
|
try:
|
2025-08-10 18:32:47 +02:00
|
|
|
import zipfile
|
|
|
|
|
import io
|
|
|
|
|
import csv
|
|
|
|
|
import gc
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
historical_records = []
|
2025-08-10 18:32:47 +02:00
|
|
|
nearest_ids = {p[0] for p in nearest_points}
|
2025-08-10 17:31:38 +02:00
|
|
|
|
2025-08-10 18:32:47 +02:00
|
|
|
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
|
|
|
|
|
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
|
2025-08-10 17:31:38 +02:00
|
|
|
|
|
|
|
|
for csv_filename in csv_files:
|
|
|
|
|
try:
|
|
|
|
|
# Read CSV content
|
|
|
|
|
with zip_file.open(csv_filename) as csv_file:
|
2025-08-10 18:32:47 +02:00
|
|
|
text_content = csv_file.read().decode('utf-8', errors='ignore')
|
|
|
|
|
|
|
|
|
|
# Process CSV in chunks using processor
|
|
|
|
|
csv_records = await self.processor.process_csv_content_chunked(
|
|
|
|
|
text_content, csv_filename, nearest_ids, nearest_points
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
historical_records.extend(csv_records)
|
|
|
|
|
|
|
|
|
|
# Force garbage collection
|
|
|
|
|
gc.collect()
|
|
|
|
|
|
2025-08-10 17:31:38 +02:00
|
|
|
except Exception as csv_error:
|
|
|
|
|
self.logger.warning("Error processing CSV file",
|
|
|
|
|
filename=csv_filename,
|
|
|
|
|
error=str(csv_error))
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
self.logger.info("Historical ZIP processing completed",
|
|
|
|
|
zip_url=zip_url,
|
|
|
|
|
total_records=len(historical_records))
|
|
|
|
|
|
|
|
|
|
return historical_records
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.logger.error("Error processing historical ZIP file",
|
|
|
|
|
zip_url=zip_url, error=str(e))
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|