bakery-ia/services/data/app/external/apis/madrid_traffic_client.py

# ================================================================
# services/data/app/external/apis/madrid_traffic_client.py
# ================================================================
"""
Madrid traffic client - Orchestration layer only
Coordinates between HTTP client, data processor, and business logic components
"""

from datetime import datetime, timedelta, timezone
from typing import Dict, List, Any, Optional, Tuple
import structlog

from .traffic import BaseTrafficClient, SupportedCity
from ..base_client import BaseAPIClient
from ..clients.madrid_client import MadridTrafficAPIClient
from ..processors.madrid_processor import MadridTrafficDataProcessor
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
from ..models.madrid_models import TrafficRecord, CongestionLevel
from app.core.performance import (
    rate_limit, 
    async_cache,
    monitor_performance,
    global_performance_monitor
)


class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
    """
    Enhanced Madrid traffic client - Orchestration layer
    Coordinates HTTP, processing, and business logic components
    """
    
    # Madrid geographic bounds
    MADRID_BOUNDS = {
        'lat_min': 40.31, 'lat_max': 40.56,
        'lon_min': -3.89, 'lon_max': -3.51
    }
    
    # Configuration constants
    MAX_HISTORICAL_DAYS = 1095  # 3 years
    MAX_CSV_PROCESSING_ROWS = 5000000
    MEASUREMENT_POINTS_LIMIT = 20
    
    def __init__(self):
        BaseTrafficClient.__init__(self, SupportedCity.MADRID)
        BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")
        
        # Initialize components
        self.api_client = MadridTrafficAPIClient()
        self.processor = MadridTrafficDataProcessor()
        self.analyzer = MadridTrafficAnalyzer()
        
        self.logger = structlog.get_logger()
    
    def supports_location(self, latitude: float, longitude: float) -> bool:
        """Check if location is within Madrid bounds"""
        return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
                self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])
    
    @rate_limit(calls=30, period=60)
    @async_cache(ttl=300)
    @monitor_performance(monitor=global_performance_monitor)
    async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
        """Get current traffic data with enhanced pedestrian inference"""
        try:
            if not self.supports_location(latitude, longitude):
                self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
                return None
            
            # Fetch XML data
            xml_content = await self.api_client.fetch_current_traffic_xml()
            if not xml_content:
                self.logger.warning("No XML content received")
                return None
            
            # Parse XML data
            traffic_points = self.processor.parse_traffic_xml(xml_content)
            if not traffic_points:
                self.logger.warning("No traffic points found in XML")
                return None
            
            # Find nearest traffic point
            nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
            if not nearest_point:
                self.logger.warning("No nearby traffic points found")
                return None
            
            # Enhance with business logic
            enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)
            
            self.logger.info("Current traffic data retrieved", 
                           point_id=nearest_point.get('measurement_point_id'),
                           distance=enhanced_data.get('distance_km', 0))
            
            return enhanced_data
            
        except Exception as e:
            self.logger.error("Error getting current traffic", error=str(e))
            return None
    
    @rate_limit(calls=10, period=60)
    @monitor_performance(monitor=global_performance_monitor)
    async def get_historical_traffic(self, latitude: float, longitude: float, 
                                   start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
        """Get historical traffic data with pedestrian enhancement"""
        try:
            if not self.supports_location(latitude, longitude):
                self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
                return []
            
            # Validate date range
            if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
                self.logger.warning("Date range too large, truncating", 
                                  requested_days=(end_date - start_date).days,
                                  max_days=self.MAX_HISTORICAL_DAYS)
                start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)
            
            # Fetch measurement points registry
            csv_content = await self.api_client.fetch_measurement_points_csv()
            if not csv_content:
                self.logger.error("Failed to fetch measurement points registry")
                return []
            
            # Parse measurement points
            measurement_points = self.processor.parse_measurement_points_csv(csv_content)
            if not measurement_points:
                self.logger.error("No measurement points found")
                return []
            
            # Find nearest measurement points
            nearest_points = self.analyzer.find_nearest_measurement_points(
                measurement_points, latitude, longitude, num_points=3
            )
            
            if not nearest_points:
                self.logger.warning("No nearby measurement points found")
                return []
            
            # Process historical data
            historical_records = await self._fetch_historical_data_enhanced(
                latitude, longitude, start_date, end_date, nearest_points
            )
            
            self.logger.info("Historical traffic data retrieved", 
                           records_count=len(historical_records),
                           date_range=f"{start_date.date()} to {end_date.date()}")
            
            return historical_records
            
        except Exception as e:
            self.logger.error("Error getting historical traffic", error=str(e))
            return []
    
    async def get_events(self, latitude: float, longitude: float, 
                        radius_km: float = 5.0) -> List[Dict[str, Any]]:
        """Get traffic events (incidents, construction, etc.)"""
        # Madrid doesn't provide separate events endpoint
        # Return enhanced current traffic data as events
        current_data = await self.get_current_traffic(latitude, longitude)
        if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
            return [{
                'type': 'congestion',
                'severity': current_data.get('congestion_level'),
                'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
                'location': {
                    'latitude': current_data.get('latitude'),
                    'longitude': current_data.get('longitude')
                },
                'timestamp': current_data.get('timestamp')
            }]
        return []
    
    
    async def _enhance_traffic_data(self, traffic_point: Dict[str, Any], 
                                  query_lat: float, query_lon: float) -> Dict[str, Any]:
        """Enhance traffic data with business logic and pedestrian inference"""
        # Calculate distance
        distance_km = self.analyzer.calculate_distance(
            query_lat, query_lon,
            traffic_point.get('latitude', 0),
            traffic_point.get('longitude', 0)
        )
        
        # Classify road type
        road_type = self.analyzer.classify_road_type(
            traffic_point.get('measurement_point_name', '')
        )
        
        # Get congestion level
        congestion_level = self.analyzer.get_congestion_level(
            traffic_point.get('ocupacion', 0)
        )
        
        # Create traffic record for pedestrian inference
        traffic_record = TrafficRecord(
            date=datetime.now(timezone.utc),
            traffic_volume=traffic_point.get('intensidad', 0),
            occupation_percentage=int(traffic_point.get('ocupacion', 0)),
            load_percentage=traffic_point.get('carga', 0),
            average_speed=30,  # Default speed
            congestion_level=congestion_level,
            pedestrian_count=0,  # Will be calculated
            measurement_point_id=traffic_point.get('measurement_point_id', ''),
            measurement_point_name=traffic_point.get('measurement_point_name', ''),
            road_type=road_type,
            source='madrid_current_xml'
        )
        
        # Calculate pedestrian count
        location_context = {
            'latitude': traffic_point.get('latitude'),
            'longitude': traffic_point.get('longitude'),
            'measurement_point_name': traffic_point.get('measurement_point_name')
        }
        
        pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
            traffic_record, location_context
        )
        
        # Build enhanced response
        enhanced_data = {
            'timestamp': datetime.now(timezone.utc),
            'latitude': traffic_point.get('latitude'),
            'longitude': traffic_point.get('longitude'),
            'measurement_point_id': traffic_point.get('measurement_point_id'),
            'measurement_point_name': traffic_point.get('measurement_point_name'),
            'traffic_volume': traffic_point.get('intensidad', 0),
            'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
            'load_percentage': traffic_point.get('carga', 0),
            'congestion_level': congestion_level,
            'pedestrian_count': pedestrian_count,
            'road_type': road_type,
            'distance_km': distance_km,
            'source': 'madrid_current_xml',
            'city': 'madrid',
            'inference_metadata': inference_metadata,
            'raw_data': traffic_point
        }
        
        return enhanced_data
    
    async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
                                            start_date: datetime, end_date: datetime,
                                            nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
        """Fetch and process historical traffic data"""
        historical_records = []
        
        try:
            # Process by year and month to avoid memory issues
            current_date = start_date.replace(day=1)  # Start from beginning of month
            
            while current_date <= end_date:
                year = current_date.year
                month = current_date.month
                
                # Build historical URL
                zip_url = self.api_client._build_historical_url(year, month)
                
                self.logger.info("Processing historical ZIP file", 
                               year=year, month=month, zip_url=zip_url)
                
                # Fetch ZIP content
                zip_content = await self.api_client.fetch_historical_zip(zip_url)
                if not zip_content:
                    self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
                    current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
                    continue
                
                # Process ZIP content with enhanced parsing
                month_records = await self._process_historical_zip_enhanced(
                    zip_content, zip_url, latitude, longitude, nearest_points
                )
                
                # Filter by date range
                filtered_records = [
                    record for record in month_records
                    if start_date <= record.get('date', datetime.min.replace(tzinfo=timezone.utc)) <= end_date
                ]
                
                historical_records.extend(filtered_records)
                
                self.logger.info("Month processing completed", 
                               year=year, month=month, 
                               month_records=len(month_records),
                               filtered_records=len(filtered_records),
                               total_records=len(historical_records))
                
                # Move to next month
                if current_date.month == 12:
                    current_date = current_date.replace(year=current_date.year + 1, month=1)
                else:
                    current_date = current_date.replace(month=current_date.month + 1)
            
            return historical_records
            
        except Exception as e:
            self.logger.error("Error fetching historical data", error=str(e))
            return historical_records  # Return partial results
    
    async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
                                             latitude: float, longitude: float,
                                             nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
        """Process historical ZIP file with enhanced parsing"""
        try:
            import zipfile
            import io
            import csv
            import gc
            
            historical_records = []
            nearest_ids = {p[0] for p in nearest_points}
            
            with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
                csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
                
                for csv_filename in csv_files:
                    try:
                        # Read CSV content
                        with zip_file.open(csv_filename) as csv_file:
                            text_content = csv_file.read().decode('utf-8', errors='ignore')
                        
                        # Process CSV in chunks using processor
                        csv_records = await self.processor.process_csv_content_chunked(
                            text_content, csv_filename, nearest_ids, nearest_points
                        )
                        
                        historical_records.extend(csv_records)
                        
                        # Force garbage collection
                        gc.collect()
                        
                    except Exception as csv_error:
                        self.logger.warning("Error processing CSV file", 
                                          filename=csv_filename, 
                                          error=str(csv_error))
                        continue
            
            self.logger.info("Historical ZIP processing completed", 
                           zip_url=zip_url,
                           total_records=len(historical_records))
            
            return historical_records
            
        except Exception as e:
            self.logger.error("Error processing historical ZIP file", 
                            zip_url=zip_url, error=str(e))
            return []
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`# ================================================================`
			`# services/data/app/external/apis/madrid_traffic_client.py`
			`# ================================================================`
			`"""`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`Madrid traffic client - Orchestration layer only`
			`Coordinates between HTTP client, data processor, and business logic components`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`"""`

			`from datetime import datetime, timedelta, timezone`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`from typing import Dict, List, Any, Optional, Tuple`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`import structlog`

			`from .traffic import BaseTrafficClient, SupportedCity`
			`from ..base_client import BaseAPIClient`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`from ..clients.madrid_client import MadridTrafficAPIClient`
			`from ..processors.madrid_processor import MadridTrafficDataProcessor`
			`from ..processors.madrid_business_logic import MadridTrafficAnalyzer`
			`from ..models.madrid_models import TrafficRecord, CongestionLevel`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`from app.core.performance import (`
			`rate_limit,`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`async_cache,`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`monitor_performance,`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`global_performance_monitor`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`)`


			`class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):`
			`"""`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`Enhanced Madrid traffic client - Orchestration layer`
			`Coordinates HTTP, processing, and business logic components`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`"""`

			`# Madrid geographic bounds`
			`MADRID_BOUNDS = {`
			`'lat_min': 40.31, 'lat_max': 40.56,`
			`'lon_min': -3.89, 'lon_max': -3.51`
			`}`

			`# Configuration constants`
			`MAX_HISTORICAL_DAYS = 1095 # 3 years`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`MAX_CSV_PROCESSING_ROWS = 5000000`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`MEASUREMENT_POINTS_LIMIT = 20`

			`def __init__(self):`
			`BaseTrafficClient.__init__(self, SupportedCity.MADRID)`
			`BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Initialize components`
			`self.api_client = MadridTrafficAPIClient()`
			`self.processor = MadridTrafficDataProcessor()`
			`self.analyzer = MadridTrafficAnalyzer()`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`self.logger = structlog.get_logger()`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`def supports_location(self, latitude: float, longitude: float) -> bool:`
			`"""Check if location is within Madrid bounds"""`
			`return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and`
			`self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`@rate_limit(calls=30, period=60)`
			`@async_cache(ttl=300)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`@monitor_performance(monitor=global_performance_monitor)`
			`async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`"""Get current traffic data with enhanced pedestrian inference"""`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`try:`
			`if not self.supports_location(latitude, longitude):`
			`self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)`
			`return None`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Fetch XML data`
			`xml_content = await self.api_client.fetch_current_traffic_xml()`
			`if not xml_content:`
			`self.logger.warning("No XML content received")`
			`return None`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Parse XML data`
			`traffic_points = self.processor.parse_traffic_xml(xml_content)`
			`if not traffic_points:`
			`self.logger.warning("No traffic points found in XML")`
			`return None`

			`# Find nearest traffic point`
			`nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)`
			`if not nearest_point:`
			`self.logger.warning("No nearby traffic points found")`
			`return None`

			`# Enhance with business logic`
			`enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)`

			`self.logger.info("Current traffic data retrieved",`
			`point_id=nearest_point.get('measurement_point_id'),`
			`distance=enhanced_data.get('distance_km', 0))`

			`return enhanced_data`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`except Exception as e:`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`self.logger.error("Error getting current traffic", error=str(e))`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`return None`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`@rate_limit(calls=10, period=60)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`@monitor_performance(monitor=global_performance_monitor)`
			`async def get_historical_traffic(self, latitude: float, longitude: float,`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:`
			`"""Get historical traffic data with pedestrian enhancement"""`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`try:`
			`if not self.supports_location(latitude, longitude):`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`return []`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Validate date range`
			`if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:`
			`self.logger.warning("Date range too large, truncating",`
			`requested_days=(end_date - start_date).days,`
			`max_days=self.MAX_HISTORICAL_DAYS)`
			`start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)`

			`# Fetch measurement points registry`
			`csv_content = await self.api_client.fetch_measurement_points_csv()`
			`if not csv_content:`
			`self.logger.error("Failed to fetch measurement points registry")`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`return []`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Parse measurement points`
			`measurement_points = self.processor.parse_measurement_points_csv(csv_content)`
			`if not measurement_points:`
			`self.logger.error("No measurement points found")`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`return []`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Find nearest measurement points`
			`nearest_points = self.analyzer.find_nearest_measurement_points(`
			`measurement_points, latitude, longitude, num_points=3`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`)`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`if not nearest_points:`
			`self.logger.warning("No nearby measurement points found")`
			`return []`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Process historical data`
			`historical_records = await self._fetch_historical_data_enhanced(`
			`latitude, longitude, start_date, end_date, nearest_points`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`)`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`self.logger.info("Historical traffic data retrieved",`
			`records_count=len(historical_records),`
			`date_range=f"{start_date.date()} to {end_date.date()}")`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`return historical_records`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`except Exception as e:`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`self.logger.error("Error getting historical traffic", error=str(e))`
			`return []`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`async def get_events(self, latitude: float, longitude: float,`
			`radius_km: float = 5.0) -> List[Dict[str, Any]]:`
			`"""Get traffic events (incidents, construction, etc.)"""`
			`# Madrid doesn't provide separate events endpoint`
			`# Return enhanced current traffic data as events`
			`current_data = await self.get_current_traffic(latitude, longitude)`
			`if current_data and current_data.get('congestion_level') in ['high', 'blocked']:`
			`return [{`
			`'type': 'congestion',`
			`'severity': current_data.get('congestion_level'),`
			`'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",`
			`'location': {`
			`'latitude': current_data.get('latitude'),`
			`'longitude': current_data.get('longitude')`
			`},`
			`'timestamp': current_data.get('timestamp')`
			`}]`
			`return []`


			`async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],`
			`query_lat: float, query_lon: float) -> Dict[str, Any]:`
			`"""Enhance traffic data with business logic and pedestrian inference"""`
			`# Calculate distance`
			`distance_km = self.analyzer.calculate_distance(`
			`query_lat, query_lon,`
			`traffic_point.get('latitude', 0),`
			`traffic_point.get('longitude', 0)`
			`)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Classify road type`
			`road_type = self.analyzer.classify_road_type(`
			`traffic_point.get('measurement_point_name', '')`
			`)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Get congestion level`
			`congestion_level = self.analyzer.get_congestion_level(`
			`traffic_point.get('ocupacion', 0)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`)`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Create traffic record for pedestrian inference`
			`traffic_record = TrafficRecord(`
			`date=datetime.now(timezone.utc),`
			`traffic_volume=traffic_point.get('intensidad', 0),`
			`occupation_percentage=int(traffic_point.get('ocupacion', 0)),`
			`load_percentage=traffic_point.get('carga', 0),`
			`average_speed=30, # Default speed`
			`congestion_level=congestion_level,`
			`pedestrian_count=0, # Will be calculated`
			`measurement_point_id=traffic_point.get('measurement_point_id', ''),`
			`measurement_point_name=traffic_point.get('measurement_point_name', ''),`
			`road_type=road_type,`
			`source='madrid_current_xml'`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`)`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Calculate pedestrian count`
			`location_context = {`
			`'latitude': traffic_point.get('latitude'),`
			`'longitude': traffic_point.get('longitude'),`
			`'measurement_point_name': traffic_point.get('measurement_point_name')`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`}`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(`
			`traffic_record, location_context`
			`)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Build enhanced response`
			`enhanced_data = {`
			`'timestamp': datetime.now(timezone.utc),`
			`'latitude': traffic_point.get('latitude'),`
			`'longitude': traffic_point.get('longitude'),`
			`'measurement_point_id': traffic_point.get('measurement_point_id'),`
			`'measurement_point_name': traffic_point.get('measurement_point_name'),`
			`'traffic_volume': traffic_point.get('intensidad', 0),`
			`'occupation_percentage': int(traffic_point.get('ocupacion', 0)),`
			`'load_percentage': traffic_point.get('carga', 0),`
			`'congestion_level': congestion_level,`
			`'pedestrian_count': pedestrian_count,`
			`'road_type': road_type,`
			`'distance_km': distance_km,`
			`'source': 'madrid_current_xml',`
			`'city': 'madrid',`
			`'inference_metadata': inference_metadata,`
			`'raw_data': traffic_point`
			`}`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`return enhanced_data`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,`
			`start_date: datetime, end_date: datetime,`
			`nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:`
			`"""Fetch and process historical traffic data"""`
			`historical_records = []`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`try:`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Process by year and month to avoid memory issues`
			`current_date = start_date.replace(day=1) # Start from beginning of month`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`while current_date <= end_date:`
			`year = current_date.year`
			`month = current_date.month`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Build historical URL`
			`zip_url = self.api_client._build_historical_url(year, month)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`self.logger.info("Processing historical ZIP file",`
			`year=year, month=month, zip_url=zip_url)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Fetch ZIP content`
			`zip_content = await self.api_client.fetch_historical_zip(zip_url)`
			`if not zip_content:`
			`self.logger.warning("Failed to fetch historical ZIP", url=zip_url)`
			`current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`continue`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Process ZIP content with enhanced parsing`
			`month_records = await self._process_historical_zip_enhanced(`
			`zip_content, zip_url, latitude, longitude, nearest_points`
			`)`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`# Filter by date range`
			`filtered_records = [`
			`record for record in month_records`
			`if start_date <= record.get('date', datetime.min.replace(tzinfo=timezone.utc)) <= end_date`
			`]`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`historical_records.extend(filtered_records)`

			`self.logger.info("Month processing completed",`
			`year=year, month=month,`
			`month_records=len(month_records),`
			`filtered_records=len(filtered_records),`
			`total_records=len(historical_records))`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`# Move to next month`
			`if current_date.month == 12:`
			`current_date = current_date.replace(year=current_date.year + 1, month=1)`
			`else:`
			`current_date = current_date.replace(month=current_date.month + 1)`

Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`return historical_records`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`except Exception as e:`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`self.logger.error("Error fetching historical data", error=str(e))`
			`return historical_records # Return partial results`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,`
			`latitude: float, longitude: float,`
			`nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:`
			`"""Process historical ZIP file with enhanced parsing"""`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`try:`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`import zipfile`
			`import io`
			`import csv`
			`import gc`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`historical_records = []`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`nearest_ids = {p[0] for p in nearest_points}`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:`
			`csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]`
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00
			`for csv_filename in csv_files:`
			`try:`
			`# Read CSV content`
			`with zip_file.open(csv_filename) as csv_file:`
Refactor the traffic fetching system 2025-08-10 18:32:47 +02:00			`text_content = csv_file.read().decode('utf-8', errors='ignore')`

			`# Process CSV in chunks using processor`
			`csv_records = await self.processor.process_csv_content_chunked(`
			`text_content, csv_filename, nearest_ids, nearest_points`
			`)`

			`historical_records.extend(csv_records)`

			`# Force garbage collection`
			`gc.collect()`

Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`except Exception as csv_error:`
			`self.logger.warning("Error processing CSV file",`
			`filename=csv_filename,`
			`error=str(csv_error))`
			`continue`

			`self.logger.info("Historical ZIP processing completed",`
			`zip_url=zip_url,`
			`total_records=len(historical_records))`

			`return historical_records`

			`except Exception as e:`
			`self.logger.error("Error processing historical ZIP file",`
			`zip_url=zip_url, error=str(e))`
			`return []`