Files
bakery-ia/services/data/app/external/clients/madrid_client.py
2025-08-10 18:32:47 +02:00

155 lines
6.3 KiB
Python

# ================================================================
# services/data/app/external/clients/madrid_client.py
# ================================================================
"""
Pure HTTP client for Madrid traffic APIs
Handles only HTTP communication and response decoding
"""
import httpx
import structlog
from datetime import datetime
from typing import Optional, Dict, Any
from ..base_client import BaseAPIClient
class MadridTrafficAPIClient(BaseAPIClient):
"""Pure HTTP client for Madrid traffic APIs"""
TRAFFIC_ENDPOINT = "https://datos.madrid.es/egob/catalogo/202468-10-intensidad-trafico.xml"
MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv"
def __init__(self):
super().__init__(base_url="https://datos.madrid.es")
self.logger = structlog.get_logger()
def _decode_response_content(self, response) -> Optional[str]:
"""Decode response content with multiple encoding attempts"""
try:
return response.text
except UnicodeDecodeError:
# Try manual encoding for Spanish content
for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']:
try:
content = response.content.decode(encoding)
if content and len(content) > 100:
self.logger.debug("Successfully decoded with encoding", encoding=encoding)
return content
except UnicodeDecodeError:
continue
return None
def _build_historical_url(self, year: int, month: int) -> str:
"""Build historical ZIP URL for given year and month"""
# Madrid historical data URL pattern
base_url = "https://datos.madrid.es/egob/catalogo/208627"
# URL numbering pattern (this may need adjustment based on actual URLs)
if year == 2023:
url_number = 116 + (month - 1) # 116-127 for 2023
elif year == 2024:
url_number = 128 + (month - 1) # 128-139 for 2024
else:
url_number = 116 # Fallback
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
"""Fetch current traffic XML data"""
endpoint = endpoint or self.TRAFFIC_ENDPOINT
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/xml,text/xml,*/*',
'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'no-cache',
'Referer': 'https://datos.madrid.es/'
}
response = await self.get(endpoint, headers=headers, timeout=30)
if not response or response.status_code != 200:
self.logger.warning("Failed to fetch XML data",
endpoint=endpoint,
status=response.status_code if response else None)
return None
# Get XML content with encoding handling
xml_content = self._decode_response_content(response)
if not xml_content:
self.logger.debug("No XML content received", endpoint=endpoint)
return None
self.logger.debug("Madrid XML content fetched",
length=len(xml_content),
endpoint=endpoint)
return xml_content
except Exception as e:
self.logger.error("Error fetching traffic XML data",
endpoint=endpoint,
error=str(e))
return None
async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]:
"""Fetch measurement points CSV data"""
url = url or self.MEASUREMENT_POINTS_URL
try:
async with httpx.AsyncClient(
timeout=30.0,
headers={
'User-Agent': 'MadridTrafficClient/2.0',
'Accept': 'text/csv,application/csv,*/*'
},
follow_redirects=True
) as client:
self.logger.debug("Fetching measurement points registry", url=url)
response = await client.get(url)
if response.status_code == 200:
return response.text
else:
self.logger.warning("Failed to fetch measurement points",
status=response.status_code, url=url)
return None
except Exception as e:
self.logger.error("Error fetching measurement points registry",
url=url, error=str(e))
return None
async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]:
"""Fetch historical traffic ZIP file"""
try:
async with httpx.AsyncClient(
timeout=120.0, # Longer timeout for large files
headers={
'User-Agent': 'MadridTrafficClient/2.0',
'Accept': 'application/zip,*/*'
},
follow_redirects=True
) as client:
self.logger.debug("Fetching historical ZIP", url=zip_url)
response = await client.get(zip_url)
if response.status_code == 200:
self.logger.debug("Historical ZIP fetched",
url=zip_url,
size=len(response.content))
return response.content
else:
self.logger.warning("Failed to fetch historical ZIP",
status=response.status_code, url=zip_url)
return None
except Exception as e:
self.logger.error("Error fetching historical ZIP",
url=zip_url, error=str(e))
return None