# ================================================================ # services/data/app/models/traffic.py - Enhanced for Multiple Cities # ================================================================ """ Flexible traffic data models supporting multiple cities and extensible schemas """ from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean, JSON from sqlalchemy.dialects.postgresql import UUID import uuid from datetime import datetime, timezone from typing import Dict, Any, Optional from shared.database.base import Base class TrafficData(Base): """ Flexible traffic data model supporting multiple cities Designed to accommodate varying data structures across different cities """ __tablename__ = "traffic_data" # Primary identification id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) # Location and temporal data location_id = Column(String(100), nullable=False, index=True) # "lat,lon" or city-specific ID city = Column(String(50), nullable=False, index=True) # madrid, barcelona, valencia, etc. date = Column(DateTime(timezone=True), nullable=False, index=True) # Core standardized traffic metrics (common across all cities) traffic_volume = Column(Integer, nullable=True) # Vehicle count or intensity congestion_level = Column(String(20), nullable=True) # low, medium, high, blocked average_speed = Column(Float, nullable=True) # Average speed in km/h # Enhanced metrics (may not be available for all cities) occupation_percentage = Column(Float, nullable=True) # Road occupation % load_percentage = Column(Float, nullable=True) # Traffic load % pedestrian_count = Column(Integer, nullable=True) # Estimated pedestrian count # Measurement point information measurement_point_id = Column(String(100), nullable=True, index=True) measurement_point_name = Column(String(500), nullable=True) measurement_point_type = Column(String(50), nullable=True) # URB, M30, A, etc. # Geographic data latitude = Column(Float, nullable=True) longitude = Column(Float, nullable=True) district = Column(String(100), nullable=True) # City district/area zone = Column(String(100), nullable=True) # Traffic zone or sector # Data source and quality source = Column(String(50), nullable=False, default="unknown") # madrid_opendata, synthetic, etc. data_quality_score = Column(Float, nullable=True) # Quality score 0-100 is_synthetic = Column(Boolean, default=False) has_pedestrian_inference = Column(Boolean, default=False) # City-specific data (flexible JSON storage) city_specific_data = Column(JSON, nullable=True) # Store city-specific fields # Raw data backup raw_data = Column(Text, nullable=True) # Original data for debugging # Audit fields tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # For multi-tenancy created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc)) # Performance-optimized indexes __table_args__ = ( # Core query patterns Index('idx_traffic_location_date', 'location_id', 'date'), Index('idx_traffic_city_date', 'city', 'date'), Index('idx_traffic_tenant_date', 'tenant_id', 'date'), # Advanced query patterns Index('idx_traffic_city_location', 'city', 'location_id'), Index('idx_traffic_measurement_point', 'city', 'measurement_point_id'), Index('idx_traffic_district_date', 'city', 'district', 'date'), # Training data queries Index('idx_traffic_training', 'tenant_id', 'city', 'date', 'is_synthetic'), Index('idx_traffic_quality', 'city', 'data_quality_score', 'date'), ) def to_dict(self) -> Dict[str, Any]: """Convert model to dictionary for API responses""" result = { 'id': str(self.id), 'location_id': self.location_id, 'city': self.city, 'date': self.date.isoformat() if self.date else None, 'traffic_volume': self.traffic_volume, 'congestion_level': self.congestion_level, 'average_speed': self.average_speed, 'occupation_percentage': self.occupation_percentage, 'load_percentage': self.load_percentage, 'pedestrian_count': self.pedestrian_count, 'measurement_point_id': self.measurement_point_id, 'measurement_point_name': self.measurement_point_name, 'measurement_point_type': self.measurement_point_type, 'latitude': self.latitude, 'longitude': self.longitude, 'district': self.district, 'zone': self.zone, 'source': self.source, 'data_quality_score': self.data_quality_score, 'is_synthetic': self.is_synthetic, 'has_pedestrian_inference': self.has_pedestrian_inference, 'created_at': self.created_at.isoformat() if self.created_at else None } # Add city-specific data if present if self.city_specific_data: result['city_specific_data'] = self.city_specific_data return result def get_city_specific_field(self, field_name: str, default: Any = None) -> Any: """Safely get city-specific field value""" if self.city_specific_data and isinstance(self.city_specific_data, dict): return self.city_specific_data.get(field_name, default) return default def set_city_specific_field(self, field_name: str, value: Any) -> None: """Set city-specific field value""" if not self.city_specific_data: self.city_specific_data = {} if not isinstance(self.city_specific_data, dict): self.city_specific_data = {} self.city_specific_data[field_name] = value class TrafficMeasurementPoint(Base): """ Registry of traffic measurement points across all cities Supports different city-specific measurement point schemas """ __tablename__ = "traffic_measurement_points" # Primary identification id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) # Location and identification city = Column(String(50), nullable=False, index=True) measurement_point_id = Column(String(100), nullable=False, index=True) # City-specific ID name = Column(String(500), nullable=True) description = Column(Text, nullable=True) # Geographic information latitude = Column(Float, nullable=False) longitude = Column(Float, nullable=False) district = Column(String(100), nullable=True) zone = Column(String(100), nullable=True) # Classification road_type = Column(String(50), nullable=True) # URB, M30, A, etc. measurement_type = Column(String(50), nullable=True) # intensity, speed, etc. point_category = Column(String(50), nullable=True) # urban, highway, ring_road # Status and metadata is_active = Column(Boolean, default=True) installation_date = Column(DateTime(timezone=True), nullable=True) last_data_received = Column(DateTime(timezone=True), nullable=True) data_quality_rating = Column(Float, nullable=True) # Average quality 0-100 # City-specific point data city_specific_metadata = Column(JSON, nullable=True) # Audit fields created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc)) __table_args__ = ( # Ensure unique measurement points per city Index('idx_unique_city_point', 'city', 'measurement_point_id', unique=True), # Geographic queries Index('idx_points_city_location', 'city', 'latitude', 'longitude'), Index('idx_points_district', 'city', 'district'), Index('idx_points_road_type', 'city', 'road_type'), # Status queries Index('idx_points_active', 'city', 'is_active', 'last_data_received'), ) def to_dict(self) -> Dict[str, Any]: """Convert measurement point to dictionary""" return { 'id': str(self.id), 'city': self.city, 'measurement_point_id': self.measurement_point_id, 'name': self.name, 'description': self.description, 'latitude': self.latitude, 'longitude': self.longitude, 'district': self.district, 'zone': self.zone, 'road_type': self.road_type, 'measurement_type': self.measurement_type, 'point_category': self.point_category, 'is_active': self.is_active, 'installation_date': self.installation_date.isoformat() if self.installation_date else None, 'last_data_received': self.last_data_received.isoformat() if self.last_data_received else None, 'data_quality_rating': self.data_quality_rating, 'city_specific_metadata': self.city_specific_metadata, 'created_at': self.created_at.isoformat() if self.created_at else None } class TrafficDataBackgroundJob(Base): """ Track background data collection jobs for multiple cities Supports scheduling and monitoring of data fetching processes """ __tablename__ = "traffic_background_jobs" # Primary identification id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) # Job configuration job_type = Column(String(50), nullable=False) # historical_fetch, cleanup, etc. city = Column(String(50), nullable=False, index=True) location_pattern = Column(String(200), nullable=True) # Location pattern or specific coords # Scheduling scheduled_at = Column(DateTime(timezone=True), nullable=False) started_at = Column(DateTime(timezone=True), nullable=True) completed_at = Column(DateTime(timezone=True), nullable=True) # Status tracking status = Column(String(20), nullable=False, default='pending') # pending, running, completed, failed progress_percentage = Column(Float, default=0.0) records_processed = Column(Integer, default=0) records_stored = Column(Integer, default=0) # Date range for data jobs data_start_date = Column(DateTime(timezone=True), nullable=True) data_end_date = Column(DateTime(timezone=True), nullable=True) # Results and error handling success_count = Column(Integer, default=0) error_count = Column(Integer, default=0) error_message = Column(Text, nullable=True) job_metadata = Column(JSON, nullable=True) # Additional job-specific data # Tenant association tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # Audit fields created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc)) __table_args__ = ( # Job monitoring Index('idx_jobs_city_status', 'city', 'status', 'scheduled_at'), Index('idx_jobs_tenant_status', 'tenant_id', 'status', 'scheduled_at'), Index('idx_jobs_type_city', 'job_type', 'city', 'scheduled_at'), # Cleanup queries Index('idx_jobs_completed', 'status', 'completed_at'), ) def to_dict(self) -> Dict[str, Any]: """Convert job to dictionary""" return { 'id': str(self.id), 'job_type': self.job_type, 'city': self.city, 'location_pattern': self.location_pattern, 'scheduled_at': self.scheduled_at.isoformat() if self.scheduled_at else None, 'started_at': self.started_at.isoformat() if self.started_at else None, 'completed_at': self.completed_at.isoformat() if self.completed_at else None, 'status': self.status, 'progress_percentage': self.progress_percentage, 'records_processed': self.records_processed, 'records_stored': self.records_stored, 'data_start_date': self.data_start_date.isoformat() if self.data_start_date else None, 'data_end_date': self.data_end_date.isoformat() if self.data_end_date else None, 'success_count': self.success_count, 'error_count': self.error_count, 'error_message': self.error_message, 'job_metadata': self.job_metadata, 'tenant_id': str(self.tenant_id) if self.tenant_id else None, 'created_at': self.created_at.isoformat() if self.created_at else None, 'updated_at': self.updated_at.isoformat() if self.updated_at else None }