From ff8a632915876ddf9ce17cb6ae1aa73a68d3ef3b Mon Sep 17 00:00:00 2001 From: Urtzi Alfaro Date: Sat, 19 Jul 2025 11:37:32 +0200 Subject: [PATCH] Fix Madrid traffic API --- services/data/app/external/madrid_opendata.py | 105 ++++++++++++++---- services/data/requirements.txt | 4 +- 2 files changed, 85 insertions(+), 24 deletions(-) diff --git a/services/data/app/external/madrid_opendata.py b/services/data/app/external/madrid_opendata.py index bfccb8d6..d0091ab7 100644 --- a/services/data/app/external/madrid_opendata.py +++ b/services/data/app/external/madrid_opendata.py @@ -13,6 +13,8 @@ import re from app.external.base_client import BaseAPIClient from app.core.config import settings +import pyproj + logger = structlog.get_logger() class MadridOpenDataClient(BaseAPIClient): @@ -192,10 +194,12 @@ class MadridOpenDataClient(BaseAPIClient): return xml_content def _extract_madrid_pm_element(self, pm_element) -> Dict[str, Any]: - """Extract traffic data from Madrid element""" + """Extract traffic data from Madrid element with proper coordinate conversion""" try: # Based on the actual Madrid XML structure shown in logs point_data = {} + utm_x = None + utm_y = None # Extract all child elements for child in pm_element: @@ -215,11 +219,13 @@ class MadridOpenDataClient(BaseAPIClient): elif tag == 'nivelServicio': point_data['nivelServicio'] = self._safe_int(text) elif tag == 'st_x': - # Convert from UTM coordinates to longitude (approximate) - point_data['longitude'] = self._convert_utm_to_lon(text) + # Store UTM X coordinate for later conversion + utm_x = text + point_data['utm_x'] = text # Keep original for debugging elif tag == 'st_y': - # Convert from UTM coordinates to latitude (approximate) - point_data['latitude'] = self._convert_utm_to_lat(text) + # Store UTM Y coordinate for later conversion + utm_y = text + point_data['utm_y'] = text # Keep original for debugging elif tag == 'error': point_data['error'] = text elif tag == 'subarea': @@ -229,33 +235,86 @@ class MadridOpenDataClient(BaseAPIClient): elif tag == 'intensidadSat': point_data['intensidadSat'] = self._safe_int(text) + # Convert UTM coordinates to lat/lon if both are available + if utm_x and utm_y: + latitude, longitude = self._convert_utm_coordinates_accurate(utm_x, utm_y) + + if latitude is not None and longitude is not None: + # Validate that coordinates are actually in Madrid area + if self._validate_madrid_coordinates(latitude, longitude): + point_data['latitude'] = latitude + point_data['longitude'] = longitude + + # Log first few successful conversions for verification + if len(getattr(self, '_conversion_log_count', [])) < 3: + if not hasattr(self, '_conversion_log_count'): + self._conversion_log_count = [] + self._conversion_log_count.append(1) + + logger.debug("Successful UTM conversion", + idelem=point_data.get('idelem'), + utm_x=utm_x, + utm_y=utm_y, + latitude=latitude, + longitude=longitude, + descripcion=point_data.get('descripcion')) + else: + # Log invalid coordinates for debugging + logger.debug("Invalid Madrid coordinates after conversion", + idelem=point_data.get('idelem'), + utm_x=utm_x, + utm_y=utm_y, + converted_lat=latitude, + converted_lon=longitude, + descripcion=point_data.get('descripcion')) + # Don't include this point - return empty dict + return {} + else: + # Conversion failed + logger.debug("UTM conversion failed", + idelem=point_data.get('idelem'), + utm_x=utm_x, + utm_y=utm_y) + return {} + else: + # Missing coordinates + logger.debug("Missing UTM coordinates", + idelem=point_data.get('idelem'), + has_utm_x=utm_x is not None, + has_utm_y=utm_y is not None) + return {} + return point_data except Exception as e: logger.debug("Error extracting Madrid PM element", error=str(e)) return {} + - def _convert_utm_to_lon(self, utm_x_str: str) -> Optional[float]: - """Convert UTM X coordinate to longitude (approximate for Madrid Zone 30N)""" + def _convert_utm_coordinates_accurate(self, utm_x_str: str, utm_y_str: str) -> tuple[Optional[float], Optional[float]]: + """Convert UTM coordinates to lat/lon using accurate pyproj library""" try: utm_x = float(utm_x_str.replace(',', '.')) - # Approximate conversion for Madrid (UTM Zone 30N) - # This is a simplified conversion for Madrid area - lon = (utm_x - 500000) / 111320.0 - 3.0 # Rough approximation - return round(lon, 6) - except (ValueError, TypeError): - return None - - def _convert_utm_to_lat(self, utm_y_str: str) -> Optional[float]: - """Convert UTM Y coordinate to latitude (approximate for Madrid Zone 30N)""" - try: utm_y = float(utm_y_str.replace(',', '.')) - # Approximate conversion for Madrid (UTM Zone 30N) - # This is a simplified conversion for Madrid area - lat = utm_y / 111320.0 # Rough approximation - return round(lat, 6) - except (ValueError, TypeError): - return None + + # Define UTM Zone 30N projection (EPSG:25830) + utm_proj = pyproj.Proj(proj='utm', zone=30, ellps='WGS84', preserve_units=False) + + # Convert to latitude/longitude + longitude, latitude = utm_proj(utm_x, utm_y, inverse=True) + + return round(latitude, 6), round(longitude, 6) + except (ValueError, TypeError, Exception): + return None, None + + def _validate_madrid_coordinates(self, latitude: float, longitude: float) -> bool: + """Validate that converted coordinates are actually in Madrid area""" + # Madrid bounds (expanded slightly to include metro area) + madrid_lat_min, madrid_lat_max = 40.31, 40.56 + madrid_lon_min, madrid_lon_max = -3.89, -3.51 + + return (madrid_lat_min <= latitude <= madrid_lat_max and + madrid_lon_min <= longitude <= madrid_lon_max) def _safe_int(self, value_str: str) -> int: """Safely convert string to int""" diff --git a/services/data/requirements.txt b/services/data/requirements.txt index 215dff86..5bef0f3a 100644 --- a/services/data/requirements.txt +++ b/services/data/requirements.txt @@ -42,4 +42,6 @@ bcrypt==4.1.2 # Testing pytest==7.4.3 pytest-asyncio==0.21.1 -pytest-cov==4.1.0 \ No newline at end of file +pytest-cov==4.1.0 + +pyproj==3.4.0 \ No newline at end of file