# ================================================================ # services/data/app/external/clients/madrid_client.py # ================================================================ """ Pure HTTP client for Madrid traffic APIs Handles only HTTP communication and response decoding """ import httpx import structlog from datetime import datetime from typing import Optional, Dict, Any from ..base_client import BaseAPIClient class MadridTrafficAPIClient(BaseAPIClient): """Pure HTTP client for Madrid traffic APIs""" TRAFFIC_ENDPOINT = "https://datos.madrid.es/egob/catalogo/202468-10-intensidad-trafico.xml" MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv" def __init__(self): super().__init__(base_url="https://datos.madrid.es") self.logger = structlog.get_logger() def _decode_response_content(self, response) -> Optional[str]: """Decode response content with multiple encoding attempts""" try: return response.text except UnicodeDecodeError: # Try manual encoding for Spanish content for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']: try: content = response.content.decode(encoding) if content and len(content) > 100: self.logger.debug("Successfully decoded with encoding", encoding=encoding) return content except UnicodeDecodeError: continue return None def _build_historical_url(self, year: int, month: int) -> str: """Build historical ZIP URL for given year and month""" # Madrid historical data URL pattern base_url = "https://datos.madrid.es/egob/catalogo/208627" # URL numbering pattern (this may need adjustment based on actual URLs) if year == 2023: url_number = 116 + (month - 1) # 116-127 for 2023 elif year == 2024: url_number = 128 + (month - 1) # 128-139 for 2024 else: url_number = 116 # Fallback return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip" async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]: """Fetch current traffic XML data""" endpoint = endpoint or self.TRAFFIC_ENDPOINT try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Accept': 'application/xml,text/xml,*/*', 'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Cache-Control': 'no-cache', 'Referer': 'https://datos.madrid.es/' } response = await self.get(endpoint, headers=headers, timeout=30) if not response or response.status_code != 200: self.logger.warning("Failed to fetch XML data", endpoint=endpoint, status=response.status_code if response else None) return None # Get XML content with encoding handling xml_content = self._decode_response_content(response) if not xml_content: self.logger.debug("No XML content received", endpoint=endpoint) return None self.logger.debug("Madrid XML content fetched", length=len(xml_content), endpoint=endpoint) return xml_content except Exception as e: self.logger.error("Error fetching traffic XML data", endpoint=endpoint, error=str(e)) return None async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]: """Fetch measurement points CSV data""" url = url or self.MEASUREMENT_POINTS_URL try: async with httpx.AsyncClient( timeout=30.0, headers={ 'User-Agent': 'MadridTrafficClient/2.0', 'Accept': 'text/csv,application/csv,*/*' }, follow_redirects=True ) as client: self.logger.debug("Fetching measurement points registry", url=url) response = await client.get(url) if response.status_code == 200: return response.text else: self.logger.warning("Failed to fetch measurement points", status=response.status_code, url=url) return None except Exception as e: self.logger.error("Error fetching measurement points registry", url=url, error=str(e)) return None async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]: """Fetch historical traffic ZIP file""" try: async with httpx.AsyncClient( timeout=120.0, # Longer timeout for large files headers={ 'User-Agent': 'MadridTrafficClient/2.0', 'Accept': 'application/zip,*/*' }, follow_redirects=True ) as client: self.logger.debug("Fetching historical ZIP", url=zip_url) response = await client.get(zip_url) if response.status_code == 200: self.logger.debug("Historical ZIP fetched", url=zip_url, size=len(response.content)) return response.content else: self.logger.warning("Failed to fetch historical ZIP", status=response.status_code, url=zip_url) return None except Exception as e: self.logger.error("Error fetching historical ZIP", url=zip_url, error=str(e)) return None