Improve the traffic fetching system

2025-08-10 17:31:38 +02:00
parent 312fdc8ef3
commit 3c2acc934a
16 changed files with 3866 additions and 1981 deletions
--- a/services/data/app/services/traffic_service.py
+++ b/services/data/app/services/traffic_service.py
@@ -1,122 +1,283 @@
 # ================================================================
-# services/data/app/services/traffic_service.py - FIXED VERSION
+# services/data/app/services/traffic_service.py
 # ================================================================
-"""Traffic data service with improved error handling"""
+"""
+Abstracted Traffic Service - Universal interface for traffic data across multiple cities
+"""

-from typing import List, Dict, Any, Optional
-from datetime import datetime, timedelta
+import asyncio
+from datetime import datetime
+from typing import Dict, List, Any, Optional, Tuple
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy import select, and_
 import structlog

+from app.external.apis.traffic import UniversalTrafficClient
 from app.models.traffic import TrafficData
-from app.external.madrid_opendata import MadridOpenDataClient
-from app.schemas.external import TrafficDataResponse
-
-import uuid
+from app.core.performance import (
+    async_cache, 
+    monitor_performance, 
+    global_connection_pool,
+    global_performance_monitor,
+    batch_process
+)

 logger = structlog.get_logger()

+
 class TrafficService:
+    """
+    Abstracted traffic service providing unified interface for traffic data
+    Routes requests to appropriate city-specific clients automatically
+    """
    
    def __init__(self):
-        self.madrid_client = MadridOpenDataClient()
+        self.universal_client = UniversalTrafficClient()
+        self.logger = structlog.get_logger(__name__)
    
-    async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[TrafficDataResponse]:
-        """Get current traffic data for location"""
+    @async_cache(ttl=300)  # Cache for 5 minutes
+    @monitor_performance(monitor=global_performance_monitor)
+    async def get_current_traffic(
+        self, 
+        latitude: float, 
+        longitude: float, 
+        tenant_id: Optional[str] = None
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get current traffic data for any supported location
+        
+        Args:
+            latitude: Query location latitude
+            longitude: Query location longitude
+            tenant_id: Optional tenant identifier for logging/analytics
+            
+        Returns:
+            Dict with current traffic data or None if not available
+        """
        try:
-            logger.debug("Getting current traffic", lat=latitude, lon=longitude)
-            traffic_data = await self.madrid_client.get_current_traffic(latitude, longitude)
+            self.logger.info("Getting current traffic data", 
+                           lat=latitude, lon=longitude, tenant_id=tenant_id)
+            
+            # Delegate to universal client
+            traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
            
            if traffic_data:
-                logger.debug("Traffic data received", source=traffic_data.get('source'))
+                # Add service metadata
+                traffic_data['service_metadata'] = {
+                    'request_timestamp': datetime.now().isoformat(),
+                    'tenant_id': tenant_id,
+                    'service_version': '2.0',
+                    'query_location': {'latitude': latitude, 'longitude': longitude}
+                }
                
-                # Validate and clean traffic data before creating response
-                # Use keyword arguments instead of unpacking
-                response = TrafficDataResponse(
-                    date=traffic_data.get("date", datetime.now()),
-                    traffic_volume=int(traffic_data.get("traffic_volume", 100)),
-                    pedestrian_count=int(traffic_data.get("pedestrian_count", 150)),
-                    congestion_level=str(traffic_data.get("congestion_level", "medium")),
-                    average_speed=float(traffic_data.get("average_speed", 25.0)),  # Fixed: use float, not int
-                    source=str(traffic_data.get("source", "unknown"))
-                )
+                self.logger.info("Successfully retrieved current traffic data",
+                               lat=latitude, lon=longitude, 
+                               source=traffic_data.get('source', 'unknown'))
                
-                logger.debug("Successfully created traffic response", 
-                           traffic_volume=response.traffic_volume,
-                           congestion_level=response.congestion_level)
-                return response
+                return traffic_data
            else:
-                logger.warning("No traffic data received from Madrid client")
+                self.logger.warning("No current traffic data available",
+                                  lat=latitude, lon=longitude)
                return None
                
        except Exception as e:
-            logger.error("Failed to get current traffic", error=str(e), lat=latitude, lon=longitude)
-            # Log the full traceback for debugging
-            import traceback
-            logger.error("Traffic service traceback", traceback=traceback.format_exc())
+            self.logger.error("Error getting current traffic data",
+                            lat=latitude, lon=longitude, error=str(e))
            return None
    
-    async def get_historical_traffic(self, 
-                                   latitude: float, 
-                                   longitude: float, 
-                                   start_date: datetime, 
-                                   end_date: datetime,
-                                   db: AsyncSession) -> List[TrafficDataResponse]:
-        """Get historical traffic data with enhanced storage for re-training"""
+    @async_cache(ttl=1800)  # Cache for 30 minutes (historical data changes less frequently)
+    @monitor_performance(monitor=global_performance_monitor)
+    async def get_historical_traffic(
+        self, 
+        latitude: float, 
+        longitude: float, 
+        start_date: datetime, 
+        end_date: datetime,
+        tenant_id: Optional[str] = None,
+        db: Optional[AsyncSession] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Get historical traffic data for any supported location with database storage
+        
+        Args:
+            latitude: Query location latitude
+            longitude: Query location longitude
+            start_date: Start date for historical data
+            end_date: End date for historical data
+            tenant_id: Optional tenant identifier
+            db: Optional database session for storage
+            
+        Returns:
+            List of historical traffic data dictionaries
+        """
        try:
-            logger.debug("Getting historical traffic", 
-                        lat=latitude, lon=longitude, 
-                        start=start_date, end=end_date)
+            self.logger.info("Getting historical traffic data", 
+                           lat=latitude, lon=longitude, 
+                           start=start_date, end=end_date, tenant_id=tenant_id)
+            
+            # Validate date range
+            if start_date >= end_date:
+                self.logger.warning("Invalid date range", start=start_date, end=end_date)
+                return []
            
-            # Check database first
            location_id = f"{latitude:.4f},{longitude:.4f}"
-            stmt = select(TrafficData).where(
-                and_(
-                    TrafficData.location_id == location_id,
-                    TrafficData.date >= start_date,
-                    TrafficData.date <= end_date
-                )
-            ).order_by(TrafficData.date)
            
-            result = await db.execute(stmt)
-            db_records = result.scalars().all()
+            # Check database first if session provided
+            if db:
+                stmt = select(TrafficData).where(
+                    and_(
+                        TrafficData.location_id == location_id,
+                        TrafficData.date >= start_date,
+                        TrafficData.date <= end_date
+                    )
+                ).order_by(TrafficData.date)
+                
+                result = await db.execute(stmt)
+                db_records = result.scalars().all()
+                
+                if db_records:
+                    self.logger.info("Historical traffic data found in database", 
+                                   count=len(db_records))
+                    return [self._convert_db_record_to_dict(record) for record in db_records]
            
-            if db_records:
-                logger.debug("Historical traffic data found in database", count=len(db_records))
-                return [TrafficDataResponse(
-                    date=record.date,
-                    traffic_volume=record.traffic_volume,
-                    pedestrian_count=record.pedestrian_count,
-                    congestion_level=record.congestion_level,
-                    average_speed=record.average_speed,
-                    source=record.source
-                ) for record in db_records]
-            
-            # If not in database, fetch from API and store
-            logger.debug("Fetching historical data from MADRID OPEN DATA")
-            traffic_data = await self.madrid_client.get_historical_traffic(
+            # Delegate to universal client
+            traffic_data = await self.universal_client.get_historical_traffic(
                latitude, longitude, start_date, end_date
            )
            
            if traffic_data:
-                # Enhanced storage with better error handling and validation
-                stored_count = await self._store_traffic_data_batch(
-                    traffic_data, location_id, db
-                )
-                logger.info("Traffic data stored for re-training", 
-                           fetched=len(traffic_data), stored=stored_count, location=location_id)
-
-                return [TrafficDataResponse(**item) for item in traffic_data]
+                # Add service metadata to each record
+                for record in traffic_data:
+                    record['service_metadata'] = {
+                        'request_timestamp': datetime.now().isoformat(),
+                        'tenant_id': tenant_id,
+                        'service_version': '2.0',
+                        'query_location': {'latitude': latitude, 'longitude': longitude},
+                        'date_range': {
+                            'start': start_date.isoformat(),
+                            'end': end_date.isoformat()
+                        }
+                    }
                
+                # Store in database if session provided
+                if db:
+                    stored_count = await self._store_traffic_data_batch(
+                        traffic_data, location_id, db
+                    )
+                    self.logger.info("Traffic data stored for re-training", 
+                                   fetched=len(traffic_data), stored=stored_count, 
+                                   location=location_id)
+                
+                self.logger.info("Successfully retrieved historical traffic data",
+                               lat=latitude, lon=longitude, records=len(traffic_data))
+                
+                return traffic_data
            else:
-                logger.warning("No historical traffic data received")
+                self.logger.info("No historical traffic data available",
+                                lat=latitude, lon=longitude)
                return []
                
        except Exception as e:
-            logger.error("Failed to get historical traffic", error=str(e))
+            self.logger.error("Error getting historical traffic data",
+                            lat=latitude, lon=longitude, error=str(e))
            return []
    
+    def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
+        """Convert database record to dictionary format"""
+        return {
+            'date': record.date,
+            'traffic_volume': record.traffic_volume,
+            'pedestrian_count': record.pedestrian_count,
+            'congestion_level': record.congestion_level,
+            'average_speed': record.average_speed,
+            'source': record.source,
+            'location_id': record.location_id,
+            'raw_data': record.raw_data
+        }
+    
+    async def get_traffic_events(
+        self, 
+        latitude: float, 
+        longitude: float, 
+        radius_km: float = 5.0,
+        tenant_id: Optional[str] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Get traffic events and incidents for any supported location
+        
+        Args:
+            latitude: Query location latitude
+            longitude: Query location longitude
+            radius_km: Search radius in kilometers
+            tenant_id: Optional tenant identifier
+            
+        Returns:
+            List of traffic events
+        """
+        try:
+            self.logger.info("Getting traffic events", 
+                           lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
+            
+            # Delegate to universal client
+            events = await self.universal_client.get_events(latitude, longitude, radius_km)
+            
+            # Add metadata to events
+            for event in events:
+                event['service_metadata'] = {
+                    'request_timestamp': datetime.now().isoformat(),
+                    'tenant_id': tenant_id,
+                    'service_version': '2.0',
+                    'query_location': {'latitude': latitude, 'longitude': longitude},
+                    'search_radius_km': radius_km
+                }
+            
+            self.logger.info("Retrieved traffic events", 
+                           lat=latitude, lon=longitude, events=len(events))
+            
+            return events
+            
+        except Exception as e:
+            self.logger.error("Error getting traffic events",
+                            lat=latitude, lon=longitude, error=str(e))
+            return []
+    
+    def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
+        """
+        Get information about traffic data availability for location
+        
+        Args:
+            latitude: Query location latitude
+            longitude: Query location longitude
+            
+        Returns:
+            Dict with location support information
+        """
+        try:
+            info = self.universal_client.get_location_info(latitude, longitude)
+            
+            # Add service layer information
+            info['service_layer'] = {
+                'version': '2.0',
+                'abstraction_level': 'universal',
+                'supported_operations': [
+                    'current_traffic',
+                    'historical_traffic', 
+                    'traffic_events',
+                    'bulk_requests'
+                ]
+            }
+            
+            return info
+            
+        except Exception as e:
+            self.logger.error("Error getting location info", 
+                            lat=latitude, lon=longitude, error=str(e))
+            return {
+                'supported': False,
+                'error': str(e),
+                'service_layer': {'version': '2.0'}
+            }
+    
    async def store_traffic_data(self, 
                               latitude: float, 
                               longitude: float, 
@@ -176,7 +337,8 @@ class TrafficService:
            else:
                existing_dates = set()
            
-            # Store only new records
+            # Prepare batch of new records for bulk insert
+            batch_records = []
            for data in traffic_data:
                try:
                    record_date = data.get('date')
@@ -188,32 +350,41 @@ class TrafficService:
                        logger.warning("Invalid traffic data, skipping", data=data)
                        continue
                    
-                    traffic_record = TrafficData(
-                        location_id=location_id,
-                        date=record_date,
-                        traffic_volume=data.get('traffic_volume'),
-                        pedestrian_count=data.get('pedestrian_count'),
-                        congestion_level=data.get('congestion_level'),
-                        average_speed=data.get('average_speed'),
-                        source=data.get('source', 'madrid_opendata'),
-                        raw_data=str(data)
-                    )
-                    
-                    db.add(traffic_record)
-                    stored_count += 1
-                    
-                    # Commit in batches to avoid memory issues
-                    if stored_count % 100 == 0:
-                        await db.commit()
-                        logger.debug(f"Committed batch of {stored_count} records")
+                    # Prepare record data for bulk insert
+                    record_data = {
+                        'location_id': location_id,
+                        'date': record_date,
+                        'traffic_volume': data.get('traffic_volume'),
+                        'pedestrian_count': data.get('pedestrian_count'),
+                        'congestion_level': data.get('congestion_level'),
+                        'average_speed': data.get('average_speed'),
+                        'source': data.get('source', 'madrid_opendata'),
+                        'raw_data': str(data)
+                    }
+                    batch_records.append(record_data)
                    
                except Exception as record_error:
-                    logger.warning("Failed to store individual traffic record", 
+                    logger.warning("Failed to prepare traffic record", 
                                 error=str(record_error), data=data)
                    continue
            
-            # Final commit
-            await db.commit()
+            # Use efficient bulk insert instead of individual records
+            if batch_records:
+                # Process in chunks to avoid memory issues
+                chunk_size = 5000
+                for i in range(0, len(batch_records), chunk_size):
+                    chunk = batch_records[i:i + chunk_size]
+                    
+                    # Use SQLAlchemy bulk insert for maximum performance
+                    await db.execute(
+                        TrafficData.__table__.insert(),
+                        chunk
+                    )
+                    await db.commit()
+                    stored_count += len(chunk)
+                    
+                    logger.debug(f"Bulk inserted {len(chunk)} records (total: {stored_count})")
+            
            logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
            
        except Exception as e: