155 lines
6.3 KiB
Python
155 lines
6.3 KiB
Python
# ================================================================
|
|
# services/data/app/external/clients/madrid_client.py
|
|
# ================================================================
|
|
"""
|
|
Pure HTTP client for Madrid traffic APIs
|
|
Handles only HTTP communication and response decoding
|
|
"""
|
|
|
|
import httpx
|
|
import structlog
|
|
from datetime import datetime
|
|
from typing import Optional, Dict, Any
|
|
|
|
from ..base_client import BaseAPIClient
|
|
|
|
|
|
class MadridTrafficAPIClient(BaseAPIClient):
|
|
"""Pure HTTP client for Madrid traffic APIs"""
|
|
|
|
TRAFFIC_ENDPOINT = "https://datos.madrid.es/egob/catalogo/202468-10-intensidad-trafico.xml"
|
|
MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv"
|
|
|
|
def __init__(self):
|
|
super().__init__(base_url="https://datos.madrid.es")
|
|
self.logger = structlog.get_logger()
|
|
|
|
def _decode_response_content(self, response) -> Optional[str]:
|
|
"""Decode response content with multiple encoding attempts"""
|
|
try:
|
|
return response.text
|
|
except UnicodeDecodeError:
|
|
# Try manual encoding for Spanish content
|
|
for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']:
|
|
try:
|
|
content = response.content.decode(encoding)
|
|
if content and len(content) > 100:
|
|
self.logger.debug("Successfully decoded with encoding", encoding=encoding)
|
|
return content
|
|
except UnicodeDecodeError:
|
|
continue
|
|
return None
|
|
|
|
def _build_historical_url(self, year: int, month: int) -> str:
|
|
"""Build historical ZIP URL for given year and month"""
|
|
# Madrid historical data URL pattern
|
|
base_url = "https://datos.madrid.es/egob/catalogo/208627"
|
|
|
|
# URL numbering pattern (this may need adjustment based on actual URLs)
|
|
if year == 2023:
|
|
url_number = 116 + (month - 1) # 116-127 for 2023
|
|
elif year == 2024:
|
|
url_number = 128 + (month - 1) # 128-139 for 2024
|
|
else:
|
|
url_number = 116 # Fallback
|
|
|
|
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
|
|
|
|
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
|
|
"""Fetch current traffic XML data"""
|
|
endpoint = endpoint or self.TRAFFIC_ENDPOINT
|
|
|
|
try:
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
'Accept': 'application/xml,text/xml,*/*',
|
|
'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
'Cache-Control': 'no-cache',
|
|
'Referer': 'https://datos.madrid.es/'
|
|
}
|
|
|
|
response = await self.get(endpoint, headers=headers, timeout=30)
|
|
|
|
if not response or response.status_code != 200:
|
|
self.logger.warning("Failed to fetch XML data",
|
|
endpoint=endpoint,
|
|
status=response.status_code if response else None)
|
|
return None
|
|
|
|
# Get XML content with encoding handling
|
|
xml_content = self._decode_response_content(response)
|
|
if not xml_content:
|
|
self.logger.debug("No XML content received", endpoint=endpoint)
|
|
return None
|
|
|
|
self.logger.debug("Madrid XML content fetched",
|
|
length=len(xml_content),
|
|
endpoint=endpoint)
|
|
|
|
return xml_content
|
|
|
|
except Exception as e:
|
|
self.logger.error("Error fetching traffic XML data",
|
|
endpoint=endpoint,
|
|
error=str(e))
|
|
return None
|
|
|
|
async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]:
|
|
"""Fetch measurement points CSV data"""
|
|
url = url or self.MEASUREMENT_POINTS_URL
|
|
|
|
try:
|
|
async with httpx.AsyncClient(
|
|
timeout=30.0,
|
|
headers={
|
|
'User-Agent': 'MadridTrafficClient/2.0',
|
|
'Accept': 'text/csv,application/csv,*/*'
|
|
},
|
|
follow_redirects=True
|
|
) as client:
|
|
|
|
self.logger.debug("Fetching measurement points registry", url=url)
|
|
response = await client.get(url)
|
|
|
|
if response.status_code == 200:
|
|
return response.text
|
|
else:
|
|
self.logger.warning("Failed to fetch measurement points",
|
|
status=response.status_code, url=url)
|
|
return None
|
|
|
|
except Exception as e:
|
|
self.logger.error("Error fetching measurement points registry",
|
|
url=url, error=str(e))
|
|
return None
|
|
|
|
async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]:
|
|
"""Fetch historical traffic ZIP file"""
|
|
try:
|
|
async with httpx.AsyncClient(
|
|
timeout=120.0, # Longer timeout for large files
|
|
headers={
|
|
'User-Agent': 'MadridTrafficClient/2.0',
|
|
'Accept': 'application/zip,*/*'
|
|
},
|
|
follow_redirects=True
|
|
) as client:
|
|
|
|
self.logger.debug("Fetching historical ZIP", url=zip_url)
|
|
response = await client.get(zip_url)
|
|
|
|
if response.status_code == 200:
|
|
self.logger.debug("Historical ZIP fetched",
|
|
url=zip_url,
|
|
size=len(response.content))
|
|
return response.content
|
|
else:
|
|
self.logger.warning("Failed to fetch historical ZIP",
|
|
status=response.status_code, url=zip_url)
|
|
return None
|
|
|
|
except Exception as e:
|
|
self.logger.error("Error fetching historical ZIP",
|
|
url=zip_url, error=str(e))
|
|
return None |