Refactor the traffic fetching system
This commit is contained in:
12
services/data/app/external/clients/__init__.py
vendored
Normal file
12
services/data/app/external/clients/__init__.py
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/clients/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
HTTP clients package
|
||||
"""
|
||||
|
||||
from .madrid_client import MadridTrafficAPIClient
|
||||
|
||||
__all__ = [
|
||||
'MadridTrafficAPIClient'
|
||||
]
|
||||
155
services/data/app/external/clients/madrid_client.py
vendored
Normal file
155
services/data/app/external/clients/madrid_client.py
vendored
Normal file
@@ -0,0 +1,155 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/clients/madrid_client.py
|
||||
# ================================================================
|
||||
"""
|
||||
Pure HTTP client for Madrid traffic APIs
|
||||
Handles only HTTP communication and response decoding
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from ..base_client import BaseAPIClient
|
||||
|
||||
|
||||
class MadridTrafficAPIClient(BaseAPIClient):
|
||||
"""Pure HTTP client for Madrid traffic APIs"""
|
||||
|
||||
TRAFFIC_ENDPOINT = "https://datos.madrid.es/egob/catalogo/202468-10-intensidad-trafico.xml"
|
||||
MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(base_url="https://datos.madrid.es")
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
def _decode_response_content(self, response) -> Optional[str]:
|
||||
"""Decode response content with multiple encoding attempts"""
|
||||
try:
|
||||
return response.text
|
||||
except UnicodeDecodeError:
|
||||
# Try manual encoding for Spanish content
|
||||
for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']:
|
||||
try:
|
||||
content = response.content.decode(encoding)
|
||||
if content and len(content) > 100:
|
||||
self.logger.debug("Successfully decoded with encoding", encoding=encoding)
|
||||
return content
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _build_historical_url(self, year: int, month: int) -> str:
|
||||
"""Build historical ZIP URL for given year and month"""
|
||||
# Madrid historical data URL pattern
|
||||
base_url = "https://datos.madrid.es/egob/catalogo/208627"
|
||||
|
||||
# URL numbering pattern (this may need adjustment based on actual URLs)
|
||||
if year == 2023:
|
||||
url_number = 116 + (month - 1) # 116-127 for 2023
|
||||
elif year == 2024:
|
||||
url_number = 128 + (month - 1) # 128-139 for 2024
|
||||
else:
|
||||
url_number = 116 # Fallback
|
||||
|
||||
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
|
||||
|
||||
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch current traffic XML data"""
|
||||
endpoint = endpoint or self.TRAFFIC_ENDPOINT
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Accept': 'application/xml,text/xml,*/*',
|
||||
'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Referer': 'https://datos.madrid.es/'
|
||||
}
|
||||
|
||||
response = await self.get(endpoint, headers=headers, timeout=30)
|
||||
|
||||
if not response or response.status_code != 200:
|
||||
self.logger.warning("Failed to fetch XML data",
|
||||
endpoint=endpoint,
|
||||
status=response.status_code if response else None)
|
||||
return None
|
||||
|
||||
# Get XML content with encoding handling
|
||||
xml_content = self._decode_response_content(response)
|
||||
if not xml_content:
|
||||
self.logger.debug("No XML content received", endpoint=endpoint)
|
||||
return None
|
||||
|
||||
self.logger.debug("Madrid XML content fetched",
|
||||
length=len(xml_content),
|
||||
endpoint=endpoint)
|
||||
|
||||
return xml_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching traffic XML data",
|
||||
endpoint=endpoint,
|
||||
error=str(e))
|
||||
return None
|
||||
|
||||
async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch measurement points CSV data"""
|
||||
url = url or self.MEASUREMENT_POINTS_URL
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
headers={
|
||||
'User-Agent': 'MadridTrafficClient/2.0',
|
||||
'Accept': 'text/csv,application/csv,*/*'
|
||||
},
|
||||
follow_redirects=True
|
||||
) as client:
|
||||
|
||||
self.logger.debug("Fetching measurement points registry", url=url)
|
||||
response = await client.get(url)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
self.logger.warning("Failed to fetch measurement points",
|
||||
status=response.status_code, url=url)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching measurement points registry",
|
||||
url=url, error=str(e))
|
||||
return None
|
||||
|
||||
async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]:
|
||||
"""Fetch historical traffic ZIP file"""
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=120.0, # Longer timeout for large files
|
||||
headers={
|
||||
'User-Agent': 'MadridTrafficClient/2.0',
|
||||
'Accept': 'application/zip,*/*'
|
||||
},
|
||||
follow_redirects=True
|
||||
) as client:
|
||||
|
||||
self.logger.debug("Fetching historical ZIP", url=zip_url)
|
||||
response = await client.get(zip_url)
|
||||
|
||||
if response.status_code == 200:
|
||||
self.logger.debug("Historical ZIP fetched",
|
||||
url=zip_url,
|
||||
size=len(response.content))
|
||||
return response.content
|
||||
else:
|
||||
self.logger.warning("Failed to fetch historical ZIP",
|
||||
status=response.status_code, url=zip_url)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching historical ZIP",
|
||||
url=zip_url, error=str(e))
|
||||
return None
|
||||
Reference in New Issue
Block a user