REFACTOR data service
This commit is contained in:
1
services/external/app/models/__init__.py
vendored
Normal file
1
services/external/app/models/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/models/__init__.py
|
||||
294
services/external/app/models/traffic.py
vendored
Normal file
294
services/external/app/models/traffic.py
vendored
Normal file
@@ -0,0 +1,294 @@
|
||||
# ================================================================
|
||||
# services/data/app/models/traffic.py - Enhanced for Multiple Cities
|
||||
# ================================================================
|
||||
"""
|
||||
Flexible traffic data models supporting multiple cities and extensible schemas
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean, JSON
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from shared.database.base import Base
|
||||
|
||||
|
||||
class TrafficData(Base):
|
||||
"""
|
||||
Flexible traffic data model supporting multiple cities
|
||||
Designed to accommodate varying data structures across different cities
|
||||
"""
|
||||
__tablename__ = "traffic_data"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Location and temporal data
|
||||
location_id = Column(String(100), nullable=False, index=True) # "lat,lon" or city-specific ID
|
||||
city = Column(String(50), nullable=False, index=True) # madrid, barcelona, valencia, etc.
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
|
||||
# Core standardized traffic metrics (common across all cities)
|
||||
traffic_volume = Column(Integer, nullable=True) # Vehicle count or intensity
|
||||
congestion_level = Column(String(20), nullable=True) # low, medium, high, blocked
|
||||
average_speed = Column(Float, nullable=True) # Average speed in km/h
|
||||
|
||||
# Enhanced metrics (may not be available for all cities)
|
||||
occupation_percentage = Column(Float, nullable=True) # Road occupation %
|
||||
load_percentage = Column(Float, nullable=True) # Traffic load %
|
||||
pedestrian_count = Column(Integer, nullable=True) # Estimated pedestrian count
|
||||
|
||||
# Measurement point information
|
||||
measurement_point_id = Column(String(100), nullable=True, index=True)
|
||||
measurement_point_name = Column(String(500), nullable=True)
|
||||
measurement_point_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
||||
|
||||
# Geographic data
|
||||
latitude = Column(Float, nullable=True)
|
||||
longitude = Column(Float, nullable=True)
|
||||
district = Column(String(100), nullable=True) # City district/area
|
||||
zone = Column(String(100), nullable=True) # Traffic zone or sector
|
||||
|
||||
# Data source and quality
|
||||
source = Column(String(50), nullable=False, default="unknown") # madrid_opendata, synthetic, etc.
|
||||
data_quality_score = Column(Float, nullable=True) # Quality score 0-100
|
||||
is_synthetic = Column(Boolean, default=False)
|
||||
has_pedestrian_inference = Column(Boolean, default=False)
|
||||
|
||||
# City-specific data (flexible JSON storage)
|
||||
city_specific_data = Column(JSON, nullable=True) # Store city-specific fields
|
||||
|
||||
# Raw data backup
|
||||
raw_data = Column(Text, nullable=True) # Original data for debugging
|
||||
|
||||
# Audit fields
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # For multi-tenancy
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
# Performance-optimized indexes
|
||||
__table_args__ = (
|
||||
# Core query patterns
|
||||
Index('idx_traffic_location_date', 'location_id', 'date'),
|
||||
Index('idx_traffic_city_date', 'city', 'date'),
|
||||
Index('idx_traffic_tenant_date', 'tenant_id', 'date'),
|
||||
|
||||
# Advanced query patterns
|
||||
Index('idx_traffic_city_location', 'city', 'location_id'),
|
||||
Index('idx_traffic_measurement_point', 'city', 'measurement_point_id'),
|
||||
Index('idx_traffic_district_date', 'city', 'district', 'date'),
|
||||
|
||||
# Training data queries
|
||||
Index('idx_traffic_training', 'tenant_id', 'city', 'date', 'is_synthetic'),
|
||||
Index('idx_traffic_quality', 'city', 'data_quality_score', 'date'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert model to dictionary for API responses"""
|
||||
result = {
|
||||
'id': str(self.id),
|
||||
'location_id': self.location_id,
|
||||
'city': self.city,
|
||||
'date': self.date.isoformat() if self.date else None,
|
||||
'traffic_volume': self.traffic_volume,
|
||||
'congestion_level': self.congestion_level,
|
||||
'average_speed': self.average_speed,
|
||||
'occupation_percentage': self.occupation_percentage,
|
||||
'load_percentage': self.load_percentage,
|
||||
'pedestrian_count': self.pedestrian_count,
|
||||
'measurement_point_id': self.measurement_point_id,
|
||||
'measurement_point_name': self.measurement_point_name,
|
||||
'measurement_point_type': self.measurement_point_type,
|
||||
'latitude': self.latitude,
|
||||
'longitude': self.longitude,
|
||||
'district': self.district,
|
||||
'zone': self.zone,
|
||||
'source': self.source,
|
||||
'data_quality_score': self.data_quality_score,
|
||||
'is_synthetic': self.is_synthetic,
|
||||
'has_pedestrian_inference': self.has_pedestrian_inference,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
# Add city-specific data if present
|
||||
if self.city_specific_data:
|
||||
result['city_specific_data'] = self.city_specific_data
|
||||
|
||||
return result
|
||||
|
||||
def get_city_specific_field(self, field_name: str, default: Any = None) -> Any:
|
||||
"""Safely get city-specific field value"""
|
||||
if self.city_specific_data and isinstance(self.city_specific_data, dict):
|
||||
return self.city_specific_data.get(field_name, default)
|
||||
return default
|
||||
|
||||
def set_city_specific_field(self, field_name: str, value: Any) -> None:
|
||||
"""Set city-specific field value"""
|
||||
if not self.city_specific_data:
|
||||
self.city_specific_data = {}
|
||||
if not isinstance(self.city_specific_data, dict):
|
||||
self.city_specific_data = {}
|
||||
self.city_specific_data[field_name] = value
|
||||
|
||||
|
||||
class TrafficMeasurementPoint(Base):
|
||||
"""
|
||||
Registry of traffic measurement points across all cities
|
||||
Supports different city-specific measurement point schemas
|
||||
"""
|
||||
__tablename__ = "traffic_measurement_points"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Location and identification
|
||||
city = Column(String(50), nullable=False, index=True)
|
||||
measurement_point_id = Column(String(100), nullable=False, index=True) # City-specific ID
|
||||
name = Column(String(500), nullable=True)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
# Geographic information
|
||||
latitude = Column(Float, nullable=False)
|
||||
longitude = Column(Float, nullable=False)
|
||||
district = Column(String(100), nullable=True)
|
||||
zone = Column(String(100), nullable=True)
|
||||
|
||||
# Classification
|
||||
road_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
||||
measurement_type = Column(String(50), nullable=True) # intensity, speed, etc.
|
||||
point_category = Column(String(50), nullable=True) # urban, highway, ring_road
|
||||
|
||||
# Status and metadata
|
||||
is_active = Column(Boolean, default=True)
|
||||
installation_date = Column(DateTime(timezone=True), nullable=True)
|
||||
last_data_received = Column(DateTime(timezone=True), nullable=True)
|
||||
data_quality_rating = Column(Float, nullable=True) # Average quality 0-100
|
||||
|
||||
# City-specific point data
|
||||
city_specific_metadata = Column(JSON, nullable=True)
|
||||
|
||||
# Audit fields
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
# Ensure unique measurement points per city
|
||||
Index('idx_unique_city_point', 'city', 'measurement_point_id', unique=True),
|
||||
|
||||
# Geographic queries
|
||||
Index('idx_points_city_location', 'city', 'latitude', 'longitude'),
|
||||
Index('idx_points_district', 'city', 'district'),
|
||||
Index('idx_points_road_type', 'city', 'road_type'),
|
||||
|
||||
# Status queries
|
||||
Index('idx_points_active', 'city', 'is_active', 'last_data_received'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert measurement point to dictionary"""
|
||||
return {
|
||||
'id': str(self.id),
|
||||
'city': self.city,
|
||||
'measurement_point_id': self.measurement_point_id,
|
||||
'name': self.name,
|
||||
'description': self.description,
|
||||
'latitude': self.latitude,
|
||||
'longitude': self.longitude,
|
||||
'district': self.district,
|
||||
'zone': self.zone,
|
||||
'road_type': self.road_type,
|
||||
'measurement_type': self.measurement_type,
|
||||
'point_category': self.point_category,
|
||||
'is_active': self.is_active,
|
||||
'installation_date': self.installation_date.isoformat() if self.installation_date else None,
|
||||
'last_data_received': self.last_data_received.isoformat() if self.last_data_received else None,
|
||||
'data_quality_rating': self.data_quality_rating,
|
||||
'city_specific_metadata': self.city_specific_metadata,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
|
||||
class TrafficDataBackgroundJob(Base):
|
||||
"""
|
||||
Track background data collection jobs for multiple cities
|
||||
Supports scheduling and monitoring of data fetching processes
|
||||
"""
|
||||
__tablename__ = "traffic_background_jobs"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Job configuration
|
||||
job_type = Column(String(50), nullable=False) # historical_fetch, cleanup, etc.
|
||||
city = Column(String(50), nullable=False, index=True)
|
||||
location_pattern = Column(String(200), nullable=True) # Location pattern or specific coords
|
||||
|
||||
# Scheduling
|
||||
scheduled_at = Column(DateTime(timezone=True), nullable=False)
|
||||
started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Status tracking
|
||||
status = Column(String(20), nullable=False, default='pending') # pending, running, completed, failed
|
||||
progress_percentage = Column(Float, default=0.0)
|
||||
records_processed = Column(Integer, default=0)
|
||||
records_stored = Column(Integer, default=0)
|
||||
|
||||
# Date range for data jobs
|
||||
data_start_date = Column(DateTime(timezone=True), nullable=True)
|
||||
data_end_date = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Results and error handling
|
||||
success_count = Column(Integer, default=0)
|
||||
error_count = Column(Integer, default=0)
|
||||
error_message = Column(Text, nullable=True)
|
||||
job_metadata = Column(JSON, nullable=True) # Additional job-specific data
|
||||
|
||||
# Tenant association
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
||||
|
||||
# Audit fields
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
# Job monitoring
|
||||
Index('idx_jobs_city_status', 'city', 'status', 'scheduled_at'),
|
||||
Index('idx_jobs_tenant_status', 'tenant_id', 'status', 'scheduled_at'),
|
||||
Index('idx_jobs_type_city', 'job_type', 'city', 'scheduled_at'),
|
||||
|
||||
# Cleanup queries
|
||||
Index('idx_jobs_completed', 'status', 'completed_at'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert job to dictionary"""
|
||||
return {
|
||||
'id': str(self.id),
|
||||
'job_type': self.job_type,
|
||||
'city': self.city,
|
||||
'location_pattern': self.location_pattern,
|
||||
'scheduled_at': self.scheduled_at.isoformat() if self.scheduled_at else None,
|
||||
'started_at': self.started_at.isoformat() if self.started_at else None,
|
||||
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
||||
'status': self.status,
|
||||
'progress_percentage': self.progress_percentage,
|
||||
'records_processed': self.records_processed,
|
||||
'records_stored': self.records_stored,
|
||||
'data_start_date': self.data_start_date.isoformat() if self.data_start_date else None,
|
||||
'data_end_date': self.data_end_date.isoformat() if self.data_end_date else None,
|
||||
'success_count': self.success_count,
|
||||
'error_count': self.error_count,
|
||||
'error_message': self.error_message,
|
||||
'job_metadata': self.job_metadata,
|
||||
'tenant_id': str(self.tenant_id) if self.tenant_id else None,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None,
|
||||
'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
74
services/external/app/models/weather.py
vendored
Normal file
74
services/external/app/models/weather.py
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# ================================================================
|
||||
# services/data/app/models/weather.py
|
||||
# ================================================================
|
||||
"""Weather data models"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSON
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from shared.database.base import Base
|
||||
|
||||
class WeatherData(Base):
|
||||
__tablename__ = "weather_data"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
location_id = Column(String(100), nullable=False, index=True)
|
||||
city = Column(String(50), nullable=False)
|
||||
station_name = Column(String(200), nullable=True)
|
||||
latitude = Column(Float, nullable=True)
|
||||
longitude = Column(Float, nullable=True)
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
forecast_date = Column(DateTime(timezone=True), nullable=True)
|
||||
temperature = Column(Float, nullable=True) # Celsius
|
||||
temperature_min = Column(Float, nullable=True)
|
||||
temperature_max = Column(Float, nullable=True)
|
||||
feels_like = Column(Float, nullable=True)
|
||||
precipitation = Column(Float, nullable=True) # mm
|
||||
precipitation_probability = Column(Float, nullable=True)
|
||||
humidity = Column(Float, nullable=True) # percentage
|
||||
wind_speed = Column(Float, nullable=True) # km/h
|
||||
wind_direction = Column(Float, nullable=True)
|
||||
wind_gust = Column(Float, nullable=True)
|
||||
pressure = Column(Float, nullable=True) # hPa
|
||||
visibility = Column(Float, nullable=True)
|
||||
uv_index = Column(Float, nullable=True)
|
||||
cloud_cover = Column(Float, nullable=True)
|
||||
condition = Column(String(100), nullable=True)
|
||||
description = Column(String(200), nullable=True)
|
||||
weather_code = Column(String(20), nullable=True)
|
||||
source = Column(String(50), nullable=False, default="aemet")
|
||||
data_type = Column(String(20), nullable=False)
|
||||
is_forecast = Column(Boolean, nullable=True)
|
||||
data_quality_score = Column(Float, nullable=True)
|
||||
raw_data = Column(JSON, nullable=True)
|
||||
processed_data = Column(JSON, nullable=True)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_weather_location_date', 'location_id', 'date'),
|
||||
)
|
||||
|
||||
class WeatherForecast(Base):
|
||||
__tablename__ = "weather_forecasts"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
location_id = Column(String(100), nullable=False, index=True)
|
||||
forecast_date = Column(DateTime(timezone=True), nullable=False)
|
||||
generated_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
|
||||
temperature = Column(Float, nullable=True)
|
||||
precipitation = Column(Float, nullable=True)
|
||||
humidity = Column(Float, nullable=True)
|
||||
wind_speed = Column(Float, nullable=True)
|
||||
description = Column(String(200), nullable=True)
|
||||
source = Column(String(50), nullable=False, default="aemet")
|
||||
raw_data = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_forecast_location_date', 'location_id', 'forecast_date'),
|
||||
)
|
||||
Reference in New Issue
Block a user