295 lines
13 KiB
Python
295 lines
13 KiB
Python
# ================================================================
|
|
# services/data/app/models/traffic.py - Enhanced for Multiple Cities
|
|
# ================================================================
|
|
"""
|
|
Flexible traffic data models supporting multiple cities and extensible schemas
|
|
"""
|
|
|
|
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean, JSON
|
|
from sqlalchemy.dialects.postgresql import UUID
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from typing import Dict, Any, Optional
|
|
|
|
from shared.database.base import Base
|
|
|
|
|
|
class TrafficData(Base):
|
|
"""
|
|
Flexible traffic data model supporting multiple cities
|
|
Designed to accommodate varying data structures across different cities
|
|
"""
|
|
__tablename__ = "traffic_data"
|
|
|
|
# Primary identification
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
|
|
# Location and temporal data
|
|
location_id = Column(String(100), nullable=False, index=True) # "lat,lon" or city-specific ID
|
|
city = Column(String(50), nullable=False, index=True) # madrid, barcelona, valencia, etc.
|
|
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
|
|
|
# Core standardized traffic metrics (common across all cities)
|
|
traffic_volume = Column(Integer, nullable=True) # Vehicle count or intensity
|
|
congestion_level = Column(String(20), nullable=True) # low, medium, high, blocked
|
|
average_speed = Column(Float, nullable=True) # Average speed in km/h
|
|
|
|
# Enhanced metrics (may not be available for all cities)
|
|
occupation_percentage = Column(Float, nullable=True) # Road occupation %
|
|
load_percentage = Column(Float, nullable=True) # Traffic load %
|
|
pedestrian_count = Column(Integer, nullable=True) # Estimated pedestrian count
|
|
|
|
# Measurement point information
|
|
measurement_point_id = Column(String(100), nullable=True, index=True)
|
|
measurement_point_name = Column(String(500), nullable=True)
|
|
measurement_point_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
|
|
|
# Geographic data
|
|
latitude = Column(Float, nullable=True)
|
|
longitude = Column(Float, nullable=True)
|
|
district = Column(String(100), nullable=True) # City district/area
|
|
zone = Column(String(100), nullable=True) # Traffic zone or sector
|
|
|
|
# Data source and quality
|
|
source = Column(String(50), nullable=False, default="unknown") # madrid_opendata, synthetic, etc.
|
|
data_quality_score = Column(Float, nullable=True) # Quality score 0-100
|
|
is_synthetic = Column(Boolean, default=False)
|
|
has_pedestrian_inference = Column(Boolean, default=False)
|
|
|
|
# City-specific data (flexible JSON storage)
|
|
city_specific_data = Column(JSON, nullable=True) # Store city-specific fields
|
|
|
|
# Raw data backup
|
|
raw_data = Column(Text, nullable=True) # Original data for debugging
|
|
|
|
# Audit fields
|
|
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # For multi-tenancy
|
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
updated_at = Column(DateTime(timezone=True),
|
|
default=lambda: datetime.now(timezone.utc),
|
|
onupdate=lambda: datetime.now(timezone.utc))
|
|
|
|
# Performance-optimized indexes
|
|
__table_args__ = (
|
|
# Core query patterns
|
|
Index('idx_traffic_location_date', 'location_id', 'date'),
|
|
Index('idx_traffic_city_date', 'city', 'date'),
|
|
Index('idx_traffic_tenant_date', 'tenant_id', 'date'),
|
|
|
|
# Advanced query patterns
|
|
Index('idx_traffic_city_location', 'city', 'location_id'),
|
|
Index('idx_traffic_measurement_point', 'city', 'measurement_point_id'),
|
|
Index('idx_traffic_district_date', 'city', 'district', 'date'),
|
|
|
|
# Training data queries
|
|
Index('idx_traffic_training', 'tenant_id', 'city', 'date', 'is_synthetic'),
|
|
Index('idx_traffic_quality', 'city', 'data_quality_score', 'date'),
|
|
)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert model to dictionary for API responses"""
|
|
result = {
|
|
'id': str(self.id),
|
|
'location_id': self.location_id,
|
|
'city': self.city,
|
|
'date': self.date.isoformat() if self.date else None,
|
|
'traffic_volume': self.traffic_volume,
|
|
'congestion_level': self.congestion_level,
|
|
'average_speed': self.average_speed,
|
|
'occupation_percentage': self.occupation_percentage,
|
|
'load_percentage': self.load_percentage,
|
|
'pedestrian_count': self.pedestrian_count,
|
|
'measurement_point_id': self.measurement_point_id,
|
|
'measurement_point_name': self.measurement_point_name,
|
|
'measurement_point_type': self.measurement_point_type,
|
|
'latitude': self.latitude,
|
|
'longitude': self.longitude,
|
|
'district': self.district,
|
|
'zone': self.zone,
|
|
'source': self.source,
|
|
'data_quality_score': self.data_quality_score,
|
|
'is_synthetic': self.is_synthetic,
|
|
'has_pedestrian_inference': self.has_pedestrian_inference,
|
|
'created_at': self.created_at.isoformat() if self.created_at else None
|
|
}
|
|
|
|
# Add city-specific data if present
|
|
if self.city_specific_data:
|
|
result['city_specific_data'] = self.city_specific_data
|
|
|
|
return result
|
|
|
|
def get_city_specific_field(self, field_name: str, default: Any = None) -> Any:
|
|
"""Safely get city-specific field value"""
|
|
if self.city_specific_data and isinstance(self.city_specific_data, dict):
|
|
return self.city_specific_data.get(field_name, default)
|
|
return default
|
|
|
|
def set_city_specific_field(self, field_name: str, value: Any) -> None:
|
|
"""Set city-specific field value"""
|
|
if not self.city_specific_data:
|
|
self.city_specific_data = {}
|
|
if not isinstance(self.city_specific_data, dict):
|
|
self.city_specific_data = {}
|
|
self.city_specific_data[field_name] = value
|
|
|
|
|
|
class TrafficMeasurementPoint(Base):
|
|
"""
|
|
Registry of traffic measurement points across all cities
|
|
Supports different city-specific measurement point schemas
|
|
"""
|
|
__tablename__ = "traffic_measurement_points"
|
|
|
|
# Primary identification
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
|
|
# Location and identification
|
|
city = Column(String(50), nullable=False, index=True)
|
|
measurement_point_id = Column(String(100), nullable=False, index=True) # City-specific ID
|
|
name = Column(String(500), nullable=True)
|
|
description = Column(Text, nullable=True)
|
|
|
|
# Geographic information
|
|
latitude = Column(Float, nullable=False)
|
|
longitude = Column(Float, nullable=False)
|
|
district = Column(String(100), nullable=True)
|
|
zone = Column(String(100), nullable=True)
|
|
|
|
# Classification
|
|
road_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
|
measurement_type = Column(String(50), nullable=True) # intensity, speed, etc.
|
|
point_category = Column(String(50), nullable=True) # urban, highway, ring_road
|
|
|
|
# Status and metadata
|
|
is_active = Column(Boolean, default=True)
|
|
installation_date = Column(DateTime(timezone=True), nullable=True)
|
|
last_data_received = Column(DateTime(timezone=True), nullable=True)
|
|
data_quality_rating = Column(Float, nullable=True) # Average quality 0-100
|
|
|
|
# City-specific point data
|
|
city_specific_metadata = Column(JSON, nullable=True)
|
|
|
|
# Audit fields
|
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
updated_at = Column(DateTime(timezone=True),
|
|
default=lambda: datetime.now(timezone.utc),
|
|
onupdate=lambda: datetime.now(timezone.utc))
|
|
|
|
__table_args__ = (
|
|
# Ensure unique measurement points per city
|
|
Index('idx_unique_city_point', 'city', 'measurement_point_id', unique=True),
|
|
|
|
# Geographic queries
|
|
Index('idx_points_city_location', 'city', 'latitude', 'longitude'),
|
|
Index('idx_points_district', 'city', 'district'),
|
|
Index('idx_points_road_type', 'city', 'road_type'),
|
|
|
|
# Status queries
|
|
Index('idx_points_active', 'city', 'is_active', 'last_data_received'),
|
|
)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert measurement point to dictionary"""
|
|
return {
|
|
'id': str(self.id),
|
|
'city': self.city,
|
|
'measurement_point_id': self.measurement_point_id,
|
|
'name': self.name,
|
|
'description': self.description,
|
|
'latitude': self.latitude,
|
|
'longitude': self.longitude,
|
|
'district': self.district,
|
|
'zone': self.zone,
|
|
'road_type': self.road_type,
|
|
'measurement_type': self.measurement_type,
|
|
'point_category': self.point_category,
|
|
'is_active': self.is_active,
|
|
'installation_date': self.installation_date.isoformat() if self.installation_date else None,
|
|
'last_data_received': self.last_data_received.isoformat() if self.last_data_received else None,
|
|
'data_quality_rating': self.data_quality_rating,
|
|
'city_specific_metadata': self.city_specific_metadata,
|
|
'created_at': self.created_at.isoformat() if self.created_at else None
|
|
}
|
|
|
|
|
|
class TrafficDataBackgroundJob(Base):
|
|
"""
|
|
Track background data collection jobs for multiple cities
|
|
Supports scheduling and monitoring of data fetching processes
|
|
"""
|
|
__tablename__ = "traffic_background_jobs"
|
|
|
|
# Primary identification
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
|
|
# Job configuration
|
|
job_type = Column(String(50), nullable=False) # historical_fetch, cleanup, etc.
|
|
city = Column(String(50), nullable=False, index=True)
|
|
location_pattern = Column(String(200), nullable=True) # Location pattern or specific coords
|
|
|
|
# Scheduling
|
|
scheduled_at = Column(DateTime(timezone=True), nullable=False)
|
|
started_at = Column(DateTime(timezone=True), nullable=True)
|
|
completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
|
|
# Status tracking
|
|
status = Column(String(20), nullable=False, default='pending') # pending, running, completed, failed
|
|
progress_percentage = Column(Float, default=0.0)
|
|
records_processed = Column(Integer, default=0)
|
|
records_stored = Column(Integer, default=0)
|
|
|
|
# Date range for data jobs
|
|
data_start_date = Column(DateTime(timezone=True), nullable=True)
|
|
data_end_date = Column(DateTime(timezone=True), nullable=True)
|
|
|
|
# Results and error handling
|
|
success_count = Column(Integer, default=0)
|
|
error_count = Column(Integer, default=0)
|
|
error_message = Column(Text, nullable=True)
|
|
job_metadata = Column(JSON, nullable=True) # Additional job-specific data
|
|
|
|
# Tenant association
|
|
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
|
|
|
# Audit fields
|
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
updated_at = Column(DateTime(timezone=True),
|
|
default=lambda: datetime.now(timezone.utc),
|
|
onupdate=lambda: datetime.now(timezone.utc))
|
|
|
|
__table_args__ = (
|
|
# Job monitoring
|
|
Index('idx_jobs_city_status', 'city', 'status', 'scheduled_at'),
|
|
Index('idx_jobs_tenant_status', 'tenant_id', 'status', 'scheduled_at'),
|
|
Index('idx_jobs_type_city', 'job_type', 'city', 'scheduled_at'),
|
|
|
|
# Cleanup queries
|
|
Index('idx_jobs_completed', 'status', 'completed_at'),
|
|
)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert job to dictionary"""
|
|
return {
|
|
'id': str(self.id),
|
|
'job_type': self.job_type,
|
|
'city': self.city,
|
|
'location_pattern': self.location_pattern,
|
|
'scheduled_at': self.scheduled_at.isoformat() if self.scheduled_at else None,
|
|
'started_at': self.started_at.isoformat() if self.started_at else None,
|
|
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
|
'status': self.status,
|
|
'progress_percentage': self.progress_percentage,
|
|
'records_processed': self.records_processed,
|
|
'records_stored': self.records_stored,
|
|
'data_start_date': self.data_start_date.isoformat() if self.data_start_date else None,
|
|
'data_end_date': self.data_end_date.isoformat() if self.data_end_date else None,
|
|
'success_count': self.success_count,
|
|
'error_count': self.error_count,
|
|
'error_message': self.error_message,
|
|
'job_metadata': self.job_metadata,
|
|
'tenant_id': str(self.tenant_id) if self.tenant_id else None,
|
|
'created_at': self.created_at.isoformat() if self.created_at else None,
|
|
'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
|
}
|