Initial commit - production deployment

This commit is contained in:
2026-01-21 17:17:16 +01:00
commit c23d00dd92
2289 changed files with 638440 additions and 0 deletions

View File

View File

@@ -0,0 +1,329 @@
# services/external/app/repositories/calendar_repository.py
"""
Calendar Repository - Manages school calendars and tenant location contexts
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, and_, or_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import uuid
from app.models.calendar import SchoolCalendar, TenantLocationContext
logger = structlog.get_logger()
class CalendarRepository:
"""Repository for school calendar and tenant location data"""
def __init__(self, session: AsyncSession):
self.session = session
# ===== School Calendar Operations =====
async def create_school_calendar(
self,
city_id: str,
calendar_name: str,
school_type: str,
academic_year: str,
holiday_periods: List[Dict[str, Any]],
school_hours: Dict[str, Any],
source: Optional[str] = None,
enabled: bool = True
) -> SchoolCalendar:
"""Create a new school calendar"""
try:
calendar = SchoolCalendar(
id=uuid.uuid4(),
city_id=city_id,
calendar_name=calendar_name,
school_type=school_type,
academic_year=academic_year,
holiday_periods=holiday_periods,
school_hours=school_hours,
source=source,
enabled=enabled
)
self.session.add(calendar)
await self.session.commit()
await self.session.refresh(calendar)
logger.info(
"School calendar created",
calendar_id=str(calendar.id),
city_id=city_id,
school_type=school_type
)
return calendar
except Exception as e:
await self.session.rollback()
logger.error(
"Error creating school calendar",
city_id=city_id,
error=str(e)
)
raise
async def get_calendar_by_id(
self,
calendar_id: uuid.UUID
) -> Optional[SchoolCalendar]:
"""Get school calendar by ID"""
stmt = select(SchoolCalendar).where(SchoolCalendar.id == calendar_id)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def get_calendars_by_city(
self,
city_id: str,
enabled_only: bool = True
) -> List[SchoolCalendar]:
"""Get all school calendars for a city"""
stmt = select(SchoolCalendar).where(SchoolCalendar.city_id == city_id)
if enabled_only:
stmt = stmt.where(SchoolCalendar.enabled == True)
stmt = stmt.order_by(SchoolCalendar.academic_year.desc(), SchoolCalendar.school_type)
result = await self.session.execute(stmt)
return list(result.scalars().all())
async def get_calendar_by_city_type_year(
self,
city_id: str,
school_type: str,
academic_year: str
) -> Optional[SchoolCalendar]:
"""Get specific calendar by city, type, and year"""
stmt = select(SchoolCalendar).where(
and_(
SchoolCalendar.city_id == city_id,
SchoolCalendar.school_type == school_type,
SchoolCalendar.academic_year == academic_year,
SchoolCalendar.enabled == True
)
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def update_calendar(
self,
calendar_id: uuid.UUID,
**kwargs
) -> Optional[SchoolCalendar]:
"""Update school calendar"""
try:
calendar = await self.get_calendar_by_id(calendar_id)
if not calendar:
return None
for key, value in kwargs.items():
if hasattr(calendar, key):
setattr(calendar, key, value)
calendar.updated_at = datetime.utcnow()
await self.session.commit()
await self.session.refresh(calendar)
logger.info(
"School calendar updated",
calendar_id=str(calendar_id),
fields=list(kwargs.keys())
)
return calendar
except Exception as e:
await self.session.rollback()
logger.error(
"Error updating school calendar",
calendar_id=str(calendar_id),
error=str(e)
)
raise
async def delete_calendar(self, calendar_id: uuid.UUID) -> bool:
"""Delete school calendar"""
try:
calendar = await self.get_calendar_by_id(calendar_id)
if not calendar:
return False
await self.session.delete(calendar)
await self.session.commit()
logger.info("School calendar deleted", calendar_id=str(calendar_id))
return True
except Exception as e:
await self.session.rollback()
logger.error(
"Error deleting school calendar",
calendar_id=str(calendar_id),
error=str(e)
)
raise
# ===== Tenant Location Context Operations =====
async def create_or_update_tenant_location_context(
self,
tenant_id: uuid.UUID,
city_id: str,
school_calendar_id: Optional[uuid.UUID] = None,
neighborhood: Optional[str] = None,
local_events: Optional[List[Dict[str, Any]]] = None,
notes: Optional[str] = None
) -> TenantLocationContext:
"""Create or update tenant location context"""
try:
# Check if context exists
existing = await self.get_tenant_location_context(tenant_id)
if existing:
# Update existing
existing.city_id = city_id
if school_calendar_id is not None:
existing.school_calendar_id = school_calendar_id
if neighborhood is not None:
existing.neighborhood = neighborhood
if local_events is not None:
existing.local_events = local_events
if notes is not None:
existing.notes = notes
existing.updated_at = datetime.utcnow()
await self.session.commit()
await self.session.refresh(existing)
logger.info(
"Tenant location context updated",
tenant_id=str(tenant_id)
)
return existing
else:
# Create new
context = TenantLocationContext(
tenant_id=tenant_id,
city_id=city_id,
school_calendar_id=school_calendar_id,
neighborhood=neighborhood,
local_events=local_events or [],
notes=notes
)
self.session.add(context)
await self.session.commit()
await self.session.refresh(context)
logger.info(
"Tenant location context created",
tenant_id=str(tenant_id),
city_id=city_id
)
return context
except Exception as e:
await self.session.rollback()
logger.error(
"Error creating/updating tenant location context",
tenant_id=str(tenant_id),
error=str(e)
)
raise
async def get_tenant_location_context(
self,
tenant_id: uuid.UUID
) -> Optional[TenantLocationContext]:
"""Get tenant location context"""
stmt = select(TenantLocationContext).where(
TenantLocationContext.tenant_id == tenant_id
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def get_tenant_with_calendar(
self,
tenant_id: uuid.UUID
) -> Optional[Dict[str, Any]]:
"""Get tenant location context with full calendar details"""
context = await self.get_tenant_location_context(tenant_id)
if not context:
return None
result = {
"tenant_id": str(context.tenant_id),
"city_id": context.city_id,
"neighborhood": context.neighborhood,
"local_events": context.local_events,
"notes": context.notes,
"calendar": None
}
if context.school_calendar_id:
calendar = await self.get_calendar_by_id(context.school_calendar_id)
if calendar:
result["calendar"] = {
"calendar_id": str(calendar.id),
"calendar_name": calendar.calendar_name,
"school_type": calendar.school_type,
"academic_year": calendar.academic_year,
"holiday_periods": calendar.holiday_periods,
"school_hours": calendar.school_hours,
"source": calendar.source
}
return result
async def delete_tenant_location_context(
self,
tenant_id: uuid.UUID
) -> bool:
"""Delete tenant location context"""
try:
context = await self.get_tenant_location_context(tenant_id)
if not context:
return False
await self.session.delete(context)
await self.session.commit()
logger.info(
"Tenant location context deleted",
tenant_id=str(tenant_id)
)
return True
except Exception as e:
await self.session.rollback()
logger.error(
"Error deleting tenant location context",
tenant_id=str(tenant_id),
error=str(e)
)
raise
# ===== Helper Methods =====
async def get_all_tenants_for_calendar(
self,
calendar_id: uuid.UUID
) -> List[TenantLocationContext]:
"""Get all tenants using a specific calendar"""
stmt = select(TenantLocationContext).where(
TenantLocationContext.school_calendar_id == calendar_id
)
result = await self.session.execute(stmt)
return list(result.scalars().all())

View File

@@ -0,0 +1,249 @@
# services/external/app/repositories/city_data_repository.py
"""
City Data Repository - Manages shared city-based data storage
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, delete, and_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.models.city_weather import CityWeatherData
from app.models.city_traffic import CityTrafficData
logger = structlog.get_logger()
class CityDataRepository:
"""Repository for city-based historical data"""
def __init__(self, session: AsyncSession):
self.session = session
async def bulk_store_weather(
self,
city_id: str,
weather_records: List[Dict[str, Any]]
) -> int:
"""Bulk insert weather records for a city"""
if not weather_records:
return 0
try:
objects = []
for record in weather_records:
obj = CityWeatherData(
city_id=city_id,
date=record.get('date'),
temperature=record.get('temperature'),
precipitation=record.get('precipitation'),
humidity=record.get('humidity'),
wind_speed=record.get('wind_speed'),
pressure=record.get('pressure'),
description=record.get('description'),
source=record.get('source', 'ingestion'),
raw_data=record.get('raw_data')
)
objects.append(obj)
self.session.add_all(objects)
await self.session.commit()
logger.info(
"Weather data stored",
city_id=city_id,
records=len(objects)
)
return len(objects)
except Exception as e:
await self.session.rollback()
logger.error(
"Error storing weather data",
city_id=city_id,
error=str(e)
)
raise
async def get_weather_by_city_and_range(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> List[CityWeatherData]:
"""Get weather data for city within date range"""
stmt = select(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date >= start_date,
CityWeatherData.date <= end_date
)
).order_by(CityWeatherData.date)
result = await self.session.execute(stmt)
return result.scalars().all()
async def delete_weather_before(
self,
city_id: str,
cutoff_date: datetime
) -> int:
"""Delete weather records older than cutoff date"""
stmt = delete(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date < cutoff_date
)
)
result = await self.session.execute(stmt)
await self.session.commit()
return result.rowcount
async def bulk_store_traffic(
self,
city_id: str,
traffic_records: List[Dict[str, Any]]
) -> int:
"""Bulk insert traffic records for a city"""
if not traffic_records:
return 0
try:
objects = []
for record in traffic_records:
obj = CityTrafficData(
city_id=city_id,
date=record.get('date'),
traffic_volume=record.get('traffic_volume'),
pedestrian_count=record.get('pedestrian_count'),
congestion_level=record.get('congestion_level'),
average_speed=record.get('average_speed'),
source=record.get('source', 'ingestion'),
raw_data=record.get('raw_data')
)
objects.append(obj)
self.session.add_all(objects)
await self.session.commit()
logger.info(
"Traffic data stored",
city_id=city_id,
records=len(objects)
)
return len(objects)
except Exception as e:
await self.session.rollback()
logger.error(
"Error storing traffic data",
city_id=city_id,
error=str(e)
)
raise
async def get_traffic_by_city_and_range(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> List[CityTrafficData]:
"""Get traffic data for city within date range - aggregated daily"""
from sqlalchemy import func, cast, Date
# Aggregate hourly data to daily averages to avoid loading hundreds of thousands of records
stmt = select(
cast(CityTrafficData.date, Date).label('date'),
func.avg(CityTrafficData.traffic_volume).label('traffic_volume'),
func.avg(CityTrafficData.pedestrian_count).label('pedestrian_count'),
func.avg(CityTrafficData.average_speed).label('average_speed'),
func.max(CityTrafficData.source).label('source')
).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date >= start_date,
CityTrafficData.date <= end_date
)
).group_by(
cast(CityTrafficData.date, Date)
).order_by(
cast(CityTrafficData.date, Date)
)
result = await self.session.execute(stmt)
# Convert aggregated rows to CityTrafficData objects
traffic_records = []
for row in result:
record = CityTrafficData(
city_id=city_id,
date=datetime.combine(row.date, datetime.min.time()),
traffic_volume=int(row.traffic_volume) if row.traffic_volume else None,
pedestrian_count=int(row.pedestrian_count) if row.pedestrian_count else None,
congestion_level='medium', # Default since we're averaging
average_speed=float(row.average_speed) if row.average_speed else None,
source=row.source or 'aggregated'
)
traffic_records.append(record)
return traffic_records
async def delete_traffic_before(
self,
city_id: str,
cutoff_date: datetime
) -> int:
"""Delete traffic records older than cutoff date"""
stmt = delete(CityTrafficData).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date < cutoff_date
)
)
result = await self.session.execute(stmt)
await self.session.commit()
return result.rowcount
async def get_data_coverage(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> Dict[str, int]:
"""
Check how much data exists for a city in a date range
Returns dict with counts: {'weather': X, 'traffic': Y}
"""
# Count weather records
weather_stmt = select(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date >= start_date,
CityWeatherData.date <= end_date
)
)
weather_result = await self.session.execute(weather_stmt)
weather_count = len(weather_result.scalars().all())
# Count traffic records
traffic_stmt = select(CityTrafficData).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date >= start_date,
CityTrafficData.date <= end_date
)
)
traffic_result = await self.session.execute(traffic_stmt)
traffic_count = len(traffic_result.scalars().all())
return {
'weather': weather_count,
'traffic': traffic_count
}

View File

@@ -0,0 +1,271 @@
"""
POI Context Repository
Data access layer for TenantPOIContext model.
Handles CRUD operations for POI detection results and ML features.
"""
from typing import Optional, List
from datetime import datetime, timezone
from sqlalchemy import select, update, delete
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import uuid
from app.models.poi_context import TenantPOIContext
logger = structlog.get_logger()
class POIContextRepository:
"""
Repository for POI context data access.
Manages storage and retrieval of POI detection results
and ML features for tenant locations.
"""
def __init__(self, session: AsyncSession):
"""
Initialize repository.
Args:
session: SQLAlchemy async session
"""
self.session = session
async def create(self, poi_context_data: dict) -> TenantPOIContext:
"""
Create new POI context record.
Args:
poi_context_data: Dictionary with POI context data
Returns:
Created TenantPOIContext instance
"""
poi_context = TenantPOIContext(
tenant_id=poi_context_data["tenant_id"],
latitude=poi_context_data["latitude"],
longitude=poi_context_data["longitude"],
poi_detection_results=poi_context_data.get("poi_detection_results", {}),
ml_features=poi_context_data.get("ml_features", {}),
total_pois_detected=poi_context_data.get("total_pois_detected", 0),
high_impact_categories=poi_context_data.get("high_impact_categories", []),
relevant_categories=poi_context_data.get("relevant_categories", []),
detection_timestamp=poi_context_data.get(
"detection_timestamp",
datetime.now(timezone.utc)
),
detection_source=poi_context_data.get("detection_source", "overpass_api"),
detection_status=poi_context_data.get("detection_status", "completed"),
detection_error=poi_context_data.get("detection_error"),
refresh_interval_days=poi_context_data.get("refresh_interval_days", 180)
)
# Calculate next refresh date
poi_context.next_refresh_date = poi_context.calculate_next_refresh()
self.session.add(poi_context)
await self.session.commit()
await self.session.refresh(poi_context)
logger.info(
"POI context created",
tenant_id=str(poi_context.tenant_id),
total_pois=poi_context.total_pois_detected
)
return poi_context
async def get_by_tenant_id(self, tenant_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
"""
Get POI context by tenant ID.
Args:
tenant_id: Tenant UUID
Returns:
TenantPOIContext or None if not found
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
stmt = select(TenantPOIContext).where(
TenantPOIContext.tenant_id == tenant_id
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def get_by_id(self, poi_context_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
"""
Get POI context by ID.
Args:
poi_context_id: POI context UUID
Returns:
TenantPOIContext or None if not found
"""
if isinstance(poi_context_id, str):
poi_context_id = uuid.UUID(poi_context_id)
stmt = select(TenantPOIContext).where(
TenantPOIContext.id == poi_context_id
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def update(
self,
tenant_id: str | uuid.UUID,
update_data: dict
) -> Optional[TenantPOIContext]:
"""
Update POI context for tenant.
Args:
tenant_id: Tenant UUID
update_data: Dictionary with fields to update
Returns:
Updated TenantPOIContext or None if not found
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
poi_context = await self.get_by_tenant_id(tenant_id)
if not poi_context:
return None
# Update fields
for key, value in update_data.items():
if hasattr(poi_context, key):
setattr(poi_context, key, value)
# Update timestamp
poi_context.updated_at = datetime.now(timezone.utc)
await self.session.commit()
await self.session.refresh(poi_context)
logger.info(
"POI context updated",
tenant_id=str(tenant_id),
updated_fields=list(update_data.keys())
)
return poi_context
async def create_or_update(
self,
tenant_id: str | uuid.UUID,
poi_detection_results: dict
) -> TenantPOIContext:
"""
Create new POI context or update existing one.
Args:
tenant_id: Tenant UUID
poi_detection_results: Full POI detection results
Returns:
Created or updated TenantPOIContext
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
existing = await self.get_by_tenant_id(tenant_id)
poi_context_data = {
"tenant_id": tenant_id,
"latitude": poi_detection_results["location"]["latitude"],
"longitude": poi_detection_results["location"]["longitude"],
"poi_detection_results": poi_detection_results.get("poi_categories", {}),
"ml_features": poi_detection_results.get("ml_features", {}),
"total_pois_detected": poi_detection_results.get("summary", {}).get("total_pois_detected", 0),
"high_impact_categories": poi_detection_results.get("summary", {}).get("high_impact_categories", []),
"relevant_categories": poi_detection_results.get("relevant_categories", []),
"detection_timestamp": datetime.fromisoformat(
poi_detection_results["detection_timestamp"].replace("Z", "+00:00")
) if isinstance(poi_detection_results.get("detection_timestamp"), str)
else datetime.now(timezone.utc),
"detection_status": poi_detection_results.get("detection_status", "completed"),
"detection_error": None if poi_detection_results.get("detection_status") == "completed"
else str(poi_detection_results.get("detection_errors"))
}
if existing:
# Update existing
update_data = {
**poi_context_data,
"last_refreshed_at": datetime.now(timezone.utc)
}
existing.mark_refreshed() # Update next_refresh_date
return await self.update(tenant_id, update_data)
else:
# Create new
return await self.create(poi_context_data)
async def delete_by_tenant_id(self, tenant_id: str | uuid.UUID) -> bool:
"""
Delete POI context for tenant.
Args:
tenant_id: Tenant UUID
Returns:
True if deleted, False if not found
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
stmt = delete(TenantPOIContext).where(
TenantPOIContext.tenant_id == tenant_id
)
result = await self.session.execute(stmt)
await self.session.commit()
deleted = result.rowcount > 0
if deleted:
logger.info("POI context deleted", tenant_id=str(tenant_id))
return deleted
async def get_stale_contexts(self, limit: int = 100) -> List[TenantPOIContext]:
"""
Get POI contexts that need refresh.
Args:
limit: Maximum number of contexts to return
Returns:
List of stale TenantPOIContext instances
"""
now = datetime.now(timezone.utc)
stmt = (
select(TenantPOIContext)
.where(TenantPOIContext.next_refresh_date <= now)
.limit(limit)
)
result = await self.session.execute(stmt)
return list(result.scalars().all())
async def count_by_status(self) -> dict:
"""
Count POI contexts by detection status.
Returns:
Dictionary with counts by status
"""
from sqlalchemy import func
stmt = select(
TenantPOIContext.detection_status,
func.count(TenantPOIContext.id)
).group_by(TenantPOIContext.detection_status)
result = await self.session.execute(stmt)
rows = result.all()
return {status: count for status, count in rows}

View File

@@ -0,0 +1,226 @@
# ================================================================
# services/data/app/repositories/traffic_repository.py
# ================================================================
"""
Traffic Repository - Enhanced for multiple cities with comprehensive data access patterns
Follows existing repository architecture while adding city-specific functionality
"""
from typing import Optional, List, Dict, Any, Type, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, or_, func, desc, asc, text, update, delete
from sqlalchemy.orm import selectinload
from datetime import datetime, timezone, timedelta
import structlog
from app.models.traffic import TrafficData
from app.schemas.traffic import TrafficDataCreate, TrafficDataResponse
from shared.database.exceptions import DatabaseError, ValidationError
logger = structlog.get_logger()
class TrafficRepository:
"""
Enhanced repository for traffic data operations across multiple cities
Provides city-aware queries and advanced traffic analytics
"""
def __init__(self, session: AsyncSession):
self.session = session
self.model = TrafficData
# ================================================================
# CORE TRAFFIC DATA OPERATIONS
# ================================================================
async def get_by_location_and_date_range(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None
) -> List[TrafficData]:
"""Get traffic data by location and date range"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
# Build base query
query = select(self.model).where(self.model.location_id == location_id)
# Add tenant filter if specified
if tenant_id:
query = query.where(self.model.tenant_id == tenant_id)
# Add date range filters
if start_date:
query = query.where(self.model.date >= start_date)
if end_date:
query = query.where(self.model.date <= end_date)
# Order by date
query = query.order_by(self.model.date)
result = await self.session.execute(query)
return result.scalars().all()
except Exception as e:
logger.error("Failed to get traffic data by location and date range",
latitude=latitude, longitude=longitude,
error=str(e))
raise DatabaseError(f"Failed to get traffic data: {str(e)}")
async def store_traffic_data_batch(
self,
traffic_data_list: List[Dict[str, Any]],
location_id: str,
tenant_id: Optional[str] = None
) -> int:
"""Store a batch of traffic data records with enhanced validation and duplicate handling."""
stored_count = 0
try:
if not traffic_data_list:
return 0
# Check for existing records to avoid duplicates - batch the queries to avoid parameter limit
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
existing_dates = set()
if dates:
# PostgreSQL has a limit of 32767 parameters, so batch the queries
batch_size = 30000 # Safe batch size under the limit
for i in range(0, len(dates), batch_size):
date_batch = dates[i:i + batch_size]
existing_stmt = select(TrafficData.date).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date.in_(date_batch)
)
)
result = await self.session.execute(existing_stmt)
existing_dates.update({row[0] for row in result.fetchall()})
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
batch_records = []
for data in traffic_data_list:
record_date = data.get('date')
if not record_date or record_date in existing_dates:
continue # Skip duplicates
# Validate data before preparing for insertion
if self._validate_traffic_data(data):
batch_records.append({
'location_id': location_id,
'city': data.get('city', 'madrid'), # Default to madrid for historical data
'tenant_id': tenant_id, # Include tenant_id in batch insert
'date': record_date,
'traffic_volume': data.get('traffic_volume'),
'pedestrian_count': data.get('pedestrian_count'),
'congestion_level': data.get('congestion_level'),
'average_speed': data.get('average_speed'),
'source': data.get('source', 'unknown'),
'raw_data': str(data)
})
if batch_records:
# Use bulk insert for performance
await self.session.execute(
TrafficData.__table__.insert(),
batch_records
)
await self.session.commit()
stored_count = len(batch_records)
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
except Exception as e:
logger.error("Failed to store traffic data batch",
error=str(e), location_id=location_id)
await self.session.rollback()
raise DatabaseError(f"Batch store failed: {str(e)}")
return stored_count
def _validate_traffic_data(self, data: Dict[str, Any]) -> bool:
"""Validate traffic data before storage"""
required_fields = ['date']
# Check required fields
for field in required_fields:
if not data.get(field):
return False
# Validate data types and ranges
traffic_volume = data.get('traffic_volume')
if traffic_volume is not None and (traffic_volume < 0 or traffic_volume > 10000):
return False
pedestrian_count = data.get('pedestrian_count')
if pedestrian_count is not None and (pedestrian_count < 0 or pedestrian_count > 10000):
return False
average_speed = data.get('average_speed')
if average_speed is not None and (average_speed < 0 or average_speed > 200):
return False
congestion_level = data.get('congestion_level')
if congestion_level and congestion_level not in ['low', 'medium', 'high', 'blocked']:
return False
return True
async def get_historical_traffic_for_training(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[TrafficData]:
"""Retrieve stored traffic data for training ML models."""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= start_date,
TrafficData.date <= end_date
)
).order_by(TrafficData.date)
result = await self.session.execute(stmt)
return result.scalars().all()
except Exception as e:
logger.error("Failed to retrieve traffic data for training",
error=str(e), location_id=location_id)
raise DatabaseError(f"Training data retrieval failed: {str(e)}")
async def get_recent_by_location(
self,
latitude: float,
longitude: float,
cutoff_datetime: datetime,
tenant_id: Optional[str] = None
) -> List[TrafficData]:
"""Get recent traffic data by location after a cutoff datetime"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= cutoff_datetime
)
).order_by(TrafficData.date.desc())
result = await self.session.execute(stmt)
records = result.scalars().all()
logger.info("Retrieved recent traffic data",
location_id=location_id, count=len(records),
cutoff=cutoff_datetime.isoformat())
return records
except Exception as e:
logger.error("Failed to retrieve recent traffic data",
error=str(e), location_id=f"{latitude:.4f},{longitude:.4f}")
raise DatabaseError(f"Recent traffic data retrieval failed: {str(e)}")

View File

@@ -0,0 +1,138 @@
# services/external/app/repositories/weather_repository.py
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, and_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import json
from app.models.weather import WeatherData
logger = structlog.get_logger()
class WeatherRepository:
"""
Repository for weather data operations, adapted for WeatherService.
"""
def __init__(self, session: AsyncSession):
self.session = session
async def get_historical_weather(self,
location_id: str,
start_date: datetime,
end_date: datetime) -> List[WeatherData]:
"""
Retrieves historical weather data for a specific location and date range.
This method directly supports the data retrieval logic in WeatherService.
"""
try:
stmt = select(WeatherData).where(
and_(
WeatherData.location_id == location_id,
WeatherData.date >= start_date,
WeatherData.date <= end_date
)
).order_by(WeatherData.date)
result = await self.session.execute(stmt)
records = result.scalars().all()
logger.debug(f"Retrieved {len(records)} historical records for location {location_id}")
return list(records)
except Exception as e:
logger.error(
"Failed to get historical weather from repository",
error=str(e),
location_id=location_id
)
raise
def _serialize_json_fields(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Serialize JSON fields (raw_data, processed_data) to ensure proper JSON storage
"""
serialized = data.copy()
# Serialize raw_data if present
if 'raw_data' in serialized and serialized['raw_data'] is not None:
if not isinstance(serialized['raw_data'], str):
try:
# Convert datetime objects to strings for JSON serialization
raw_data = serialized['raw_data']
if isinstance(raw_data, dict):
# Handle datetime objects in the dict
json_safe_data = {}
for k, v in raw_data.items():
if hasattr(v, 'isoformat'): # datetime-like object
json_safe_data[k] = v.isoformat()
else:
json_safe_data[k] = v
serialized['raw_data'] = json_safe_data
except Exception as e:
logger.warning(f"Could not serialize raw_data, storing as string: {e}")
serialized['raw_data'] = str(raw_data)
# Serialize processed_data if present
if 'processed_data' in serialized and serialized['processed_data'] is not None:
if not isinstance(serialized['processed_data'], str):
try:
processed_data = serialized['processed_data']
if isinstance(processed_data, dict):
json_safe_data = {}
for k, v in processed_data.items():
if hasattr(v, 'isoformat'): # datetime-like object
json_safe_data[k] = v.isoformat()
else:
json_safe_data[k] = v
serialized['processed_data'] = json_safe_data
except Exception as e:
logger.warning(f"Could not serialize processed_data, storing as string: {e}")
serialized['processed_data'] = str(processed_data)
return serialized
async def bulk_create_weather_data(self, weather_records: List[Dict[str, Any]]) -> None:
"""
Bulk inserts new weather records into the database.
Used by WeatherService after fetching new historical data from an external API.
"""
try:
if not weather_records:
return
# Serialize JSON fields before creating model instances
serialized_records = [self._serialize_json_fields(data) for data in weather_records]
records = [WeatherData(**data) for data in serialized_records]
self.session.add_all(records)
await self.session.commit()
logger.info(f"Successfully bulk inserted {len(records)} weather records")
except Exception as e:
await self.session.rollback()
logger.error(
"Failed to bulk create weather records",
error=str(e),
count=len(weather_records)
)
raise
async def create_weather_data(self, data: Dict[str, Any]) -> WeatherData:
"""
Creates a single new weather data record.
"""
try:
# Serialize JSON fields before creating model instance
serialized_data = self._serialize_json_fields(data)
new_record = WeatherData(**serialized_data)
self.session.add(new_record)
await self.session.commit()
await self.session.refresh(new_record)
logger.info(f"Created new weather record with ID {new_record.id}")
return new_record
except Exception as e:
await self.session.rollback()
logger.error("Failed to create single weather record", error=str(e))
raise