bakery-ia/services/data/app/external/apis/madrid_traffic_client.py

# ================================================================
# services/data/app/external/apis/madrid_traffic_client.py
# ================================================================
"""
Madrid traffic client - Orchestration layer only
Coordinates between HTTP client, data processor, and business logic components
"""

from datetime import datetime, timedelta, timezone
from typing import Dict, List, Any, Optional, Tuple
import structlog

from .traffic import BaseTrafficClient, SupportedCity
from ..base_client import BaseAPIClient
from ..clients.madrid_client import MadridTrafficAPIClient
from ..processors.madrid_processor import MadridTrafficDataProcessor
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
from ..models.madrid_models import TrafficRecord, CongestionLevel
from app.core.performance import (
    rate_limit,
    async_cache,
    monitor_performance,
    global_performance_monitor
)


class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
    """
    Enhanced Madrid traffic client - Orchestration layer
    Coordinates HTTP, processing, and business logic components
    """

    # Madrid geographic bounds
    MADRID_BOUNDS = {
        'lat_min': 40.31, 'lat_max': 40.56,
        'lon_min': -3.89, 'lon_max': -3.51
    }

    # Configuration constants
    MAX_HISTORICAL_DAYS = 1095  # 3 years
    MAX_CSV_PROCESSING_ROWS = 5000000
    MEASUREMENT_POINTS_LIMIT = 20

    def __init__(self):
        BaseTrafficClient.__init__(self, SupportedCity.MADRID)
        BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")

        # Initialize components
        self.api_client = MadridTrafficAPIClient()
        self.processor = MadridTrafficDataProcessor()
        self.analyzer = MadridTrafficAnalyzer()

        self.logger = structlog.get_logger()

    def supports_location(self, latitude: float, longitude: float) -> bool:
        """Check if location is within Madrid bounds"""
        return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
                self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])

    @rate_limit(calls=30, period=60)
    @async_cache(ttl=300)
    @monitor_performance(monitor=global_performance_monitor)
    async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
        """Get current traffic data with enhanced pedestrian inference"""
        try:
            if not self.supports_location(latitude, longitude):
                self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
                return None

            # Fetch XML data
            xml_content = await self.api_client.fetch_current_traffic_xml()
            if not xml_content:
                self.logger.warning("No XML content received")
                return None

            # Parse XML data
            traffic_points = self.processor.parse_traffic_xml(xml_content)
            if not traffic_points:
                self.logger.warning("No traffic points found in XML")
                return None

            # Find nearest traffic point
            nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
            if not nearest_point:
                self.logger.warning("No nearby traffic points found")
                return None

            # Enhance with business logic
            enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)

            self.logger.info("Current traffic data retrieved",
                           point_id=nearest_point.get('measurement_point_id'),
                           distance=enhanced_data.get('distance_km', 0))

            return enhanced_data

        except Exception as e:
            self.logger.error("Error getting current traffic", error=str(e))
            return None

    @rate_limit(calls=10, period=60)
    @monitor_performance(monitor=global_performance_monitor)
    async def get_historical_traffic(self, latitude: float, longitude: float,
                                   start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
        """Get historical traffic data with pedestrian enhancement"""
        try:
            if not self.supports_location(latitude, longitude):
                self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
                return []

            # Validate date range
            if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
                self.logger.warning("Date range too large, truncating",
                                  requested_days=(end_date - start_date).days,
                                  max_days=self.MAX_HISTORICAL_DAYS)
                start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)

            # Fetch measurement points registry
            csv_content = await self.api_client.fetch_measurement_points_csv()
            if not csv_content:
                self.logger.error("Failed to fetch measurement points registry")
                return []

            # Parse measurement points
            measurement_points = self.processor.parse_measurement_points_csv(csv_content)
            if not measurement_points:
                self.logger.error("No measurement points found")
                return []

            # Find nearest measurement points
            nearest_points = self.analyzer.find_nearest_measurement_points(
                measurement_points, latitude, longitude, num_points=3
            )

            if not nearest_points:
                self.logger.warning("No nearby measurement points found")
                return []

            # Process historical data
            historical_records = await self._fetch_historical_data_enhanced(
                latitude, longitude, start_date, end_date, nearest_points
            )

            self.logger.info("Historical traffic data retrieved",
                           records_count=len(historical_records),
                           date_range=f"{start_date.date()} to {end_date.date()}")

            return historical_records

        except Exception as e:
            self.logger.error("Error getting historical traffic", error=str(e))
            return []

    async def get_events(self, latitude: float, longitude: float,
                        radius_km: float = 5.0) -> List[Dict[str, Any]]:
        """Get traffic events (incidents, construction, etc.)"""
        # Madrid doesn't provide separate events endpoint
        # Return enhanced current traffic data as events
        current_data = await self.get_current_traffic(latitude, longitude)
        if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
            return [{
                'type': 'congestion',
                'severity': current_data.get('congestion_level'),
                'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
                'location': {
                    'latitude': current_data.get('latitude'),
                    'longitude': current_data.get('longitude')
                },
                'timestamp': current_data.get('timestamp')
            }]
        return []


    async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],
                                  query_lat: float, query_lon: float) -> Dict[str, Any]:
        """Enhance traffic data with business logic and pedestrian inference"""
        # Calculate distance
        distance_km = self.analyzer.calculate_distance(
            query_lat, query_lon,
            traffic_point.get('latitude', 0),
            traffic_point.get('longitude', 0)
        )

        # Classify road type
        road_type = self.analyzer.classify_road_type(
            traffic_point.get('measurement_point_name', '')
        )

        # Get congestion level
        congestion_level = self.analyzer.get_congestion_level(
            traffic_point.get('ocupacion', 0)
        )

        # Create traffic record for pedestrian inference
        traffic_record = TrafficRecord(
            date=datetime.now(timezone.utc),
            traffic_volume=traffic_point.get('intensidad', 0),
            occupation_percentage=int(traffic_point.get('ocupacion', 0)),
            load_percentage=traffic_point.get('carga', 0),
            average_speed=30,  # Default speed
            congestion_level=congestion_level,
            pedestrian_count=0,  # Will be calculated
            measurement_point_id=traffic_point.get('measurement_point_id', ''),
            measurement_point_name=traffic_point.get('measurement_point_name', ''),
            road_type=road_type,
            source='madrid_current_xml'
        )

        # Calculate pedestrian count
        location_context = {
            'latitude': traffic_point.get('latitude'),
            'longitude': traffic_point.get('longitude'),
            'measurement_point_name': traffic_point.get('measurement_point_name')
        }

        pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
            traffic_record, location_context
        )

        # Build enhanced response
        enhanced_data = {
            'timestamp': datetime.now(timezone.utc),
            'latitude': traffic_point.get('latitude'),
            'longitude': traffic_point.get('longitude'),
            'measurement_point_id': traffic_point.get('measurement_point_id'),
            'measurement_point_name': traffic_point.get('measurement_point_name'),
            'traffic_volume': traffic_point.get('intensidad', 0),
            'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
            'load_percentage': traffic_point.get('carga', 0),
            'congestion_level': congestion_level,
            'pedestrian_count': pedestrian_count,
            'road_type': road_type,
            'distance_km': distance_km,
            'source': 'madrid_current_xml',
            'city': 'madrid',
            'inference_metadata': inference_metadata,
            'raw_data': traffic_point
        }

        return enhanced_data

    async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
                                            start_date: datetime, end_date: datetime,
                                            nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
        """Fetch and process historical traffic data"""
        historical_records = []

        try:
            # Process by year and month to avoid memory issues
            current_date = start_date.replace(day=1)  # Start from beginning of month

            while current_date <= end_date:
                year = current_date.year
                month = current_date.month

                # Build historical URL
                zip_url = self.api_client._build_historical_url(year, month)

                self.logger.info("Processing historical ZIP file",
                               year=year, month=month, zip_url=zip_url)

                # Fetch ZIP content
                zip_content = await self.api_client.fetch_historical_zip(zip_url)
                if not zip_content:
                    self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
                    current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
                    continue

                # Process ZIP content with enhanced parsing
                month_records = await self._process_historical_zip_enhanced(
                    zip_content, zip_url, latitude, longitude, nearest_points
                )

                # Filter by date range
                filtered_records = [
                    record for record in month_records
                    if start_date <= record.get('date', datetime.min.replace(tzinfo=timezone.utc)) <= end_date
                ]

                historical_records.extend(filtered_records)

                self.logger.info("Month processing completed",
                               year=year, month=month,
                               month_records=len(month_records),
                               filtered_records=len(filtered_records),
                               total_records=len(historical_records))

                # Move to next month
                if current_date.month == 12:
                    current_date = current_date.replace(year=current_date.year + 1, month=1)
                else:
                    current_date = current_date.replace(month=current_date.month + 1)

            return historical_records

        except Exception as e:
            self.logger.error("Error fetching historical data", error=str(e))
            return historical_records  # Return partial results

    async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
                                             latitude: float, longitude: float,
                                             nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
        """Process historical ZIP file with enhanced parsing"""
        try:
            import zipfile
            import io
            import csv
            import gc

            historical_records = []
            nearest_ids = {p[0] for p in nearest_points}

            with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
                csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]

                for csv_filename in csv_files:
                    try:
                        # Read CSV content
                        with zip_file.open(csv_filename) as csv_file:
                            text_content = csv_file.read().decode('utf-8', errors='ignore')

                        # Process CSV in chunks using processor
                        csv_records = await self.processor.process_csv_content_chunked(
                            text_content, csv_filename, nearest_ids, nearest_points
                        )

                        historical_records.extend(csv_records)

                        # Force garbage collection
                        gc.collect()

                    except Exception as csv_error:
                        self.logger.warning("Error processing CSV file",
                                          filename=csv_filename,
                                          error=str(csv_error))
                        continue

            self.logger.info("Historical ZIP processing completed",
                           zip_url=zip_url,
                           total_records=len(historical_records))

            return historical_records

        except Exception as e:
            self.logger.error("Error processing historical ZIP file",
                            zip_url=zip_url, error=str(e))
            return []