REFACTOR external service and improve websocket training
This commit is contained in:
249
services/external/app/repositories/city_data_repository.py
vendored
Normal file
249
services/external/app/repositories/city_data_repository.py
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
# services/external/app/repositories/city_data_repository.py
|
||||
"""
|
||||
City Data Repository - Manages shared city-based data storage
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, delete, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.models.city_weather import CityWeatherData
|
||||
from app.models.city_traffic import CityTrafficData
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CityDataRepository:
|
||||
"""Repository for city-based historical data"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def bulk_store_weather(
|
||||
self,
|
||||
city_id: str,
|
||||
weather_records: List[Dict[str, Any]]
|
||||
) -> int:
|
||||
"""Bulk insert weather records for a city"""
|
||||
if not weather_records:
|
||||
return 0
|
||||
|
||||
try:
|
||||
objects = []
|
||||
for record in weather_records:
|
||||
obj = CityWeatherData(
|
||||
city_id=city_id,
|
||||
date=record.get('date'),
|
||||
temperature=record.get('temperature'),
|
||||
precipitation=record.get('precipitation'),
|
||||
humidity=record.get('humidity'),
|
||||
wind_speed=record.get('wind_speed'),
|
||||
pressure=record.get('pressure'),
|
||||
description=record.get('description'),
|
||||
source=record.get('source', 'ingestion'),
|
||||
raw_data=record.get('raw_data')
|
||||
)
|
||||
objects.append(obj)
|
||||
|
||||
self.session.add_all(objects)
|
||||
await self.session.commit()
|
||||
|
||||
logger.info(
|
||||
"Weather data stored",
|
||||
city_id=city_id,
|
||||
records=len(objects)
|
||||
)
|
||||
|
||||
return len(objects)
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error storing weather data",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def get_weather_by_city_and_range(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[CityWeatherData]:
|
||||
"""Get weather data for city within date range"""
|
||||
stmt = select(CityWeatherData).where(
|
||||
and_(
|
||||
CityWeatherData.city_id == city_id,
|
||||
CityWeatherData.date >= start_date,
|
||||
CityWeatherData.date <= end_date
|
||||
)
|
||||
).order_by(CityWeatherData.date)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalars().all()
|
||||
|
||||
async def delete_weather_before(
|
||||
self,
|
||||
city_id: str,
|
||||
cutoff_date: datetime
|
||||
) -> int:
|
||||
"""Delete weather records older than cutoff date"""
|
||||
stmt = delete(CityWeatherData).where(
|
||||
and_(
|
||||
CityWeatherData.city_id == city_id,
|
||||
CityWeatherData.date < cutoff_date
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
await self.session.commit()
|
||||
|
||||
return result.rowcount
|
||||
|
||||
async def bulk_store_traffic(
|
||||
self,
|
||||
city_id: str,
|
||||
traffic_records: List[Dict[str, Any]]
|
||||
) -> int:
|
||||
"""Bulk insert traffic records for a city"""
|
||||
if not traffic_records:
|
||||
return 0
|
||||
|
||||
try:
|
||||
objects = []
|
||||
for record in traffic_records:
|
||||
obj = CityTrafficData(
|
||||
city_id=city_id,
|
||||
date=record.get('date'),
|
||||
traffic_volume=record.get('traffic_volume'),
|
||||
pedestrian_count=record.get('pedestrian_count'),
|
||||
congestion_level=record.get('congestion_level'),
|
||||
average_speed=record.get('average_speed'),
|
||||
source=record.get('source', 'ingestion'),
|
||||
raw_data=record.get('raw_data')
|
||||
)
|
||||
objects.append(obj)
|
||||
|
||||
self.session.add_all(objects)
|
||||
await self.session.commit()
|
||||
|
||||
logger.info(
|
||||
"Traffic data stored",
|
||||
city_id=city_id,
|
||||
records=len(objects)
|
||||
)
|
||||
|
||||
return len(objects)
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error storing traffic data",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def get_traffic_by_city_and_range(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[CityTrafficData]:
|
||||
"""Get traffic data for city within date range - aggregated daily"""
|
||||
from sqlalchemy import func, cast, Date
|
||||
|
||||
# Aggregate hourly data to daily averages to avoid loading hundreds of thousands of records
|
||||
stmt = select(
|
||||
cast(CityTrafficData.date, Date).label('date'),
|
||||
func.avg(CityTrafficData.traffic_volume).label('traffic_volume'),
|
||||
func.avg(CityTrafficData.pedestrian_count).label('pedestrian_count'),
|
||||
func.avg(CityTrafficData.average_speed).label('average_speed'),
|
||||
func.max(CityTrafficData.source).label('source')
|
||||
).where(
|
||||
and_(
|
||||
CityTrafficData.city_id == city_id,
|
||||
CityTrafficData.date >= start_date,
|
||||
CityTrafficData.date <= end_date
|
||||
)
|
||||
).group_by(
|
||||
cast(CityTrafficData.date, Date)
|
||||
).order_by(
|
||||
cast(CityTrafficData.date, Date)
|
||||
)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
|
||||
# Convert aggregated rows to CityTrafficData objects
|
||||
traffic_records = []
|
||||
for row in result:
|
||||
record = CityTrafficData(
|
||||
city_id=city_id,
|
||||
date=datetime.combine(row.date, datetime.min.time()),
|
||||
traffic_volume=int(row.traffic_volume) if row.traffic_volume else None,
|
||||
pedestrian_count=int(row.pedestrian_count) if row.pedestrian_count else None,
|
||||
congestion_level='medium', # Default since we're averaging
|
||||
average_speed=float(row.average_speed) if row.average_speed else None,
|
||||
source=row.source or 'aggregated'
|
||||
)
|
||||
traffic_records.append(record)
|
||||
|
||||
return traffic_records
|
||||
|
||||
async def delete_traffic_before(
|
||||
self,
|
||||
city_id: str,
|
||||
cutoff_date: datetime
|
||||
) -> int:
|
||||
"""Delete traffic records older than cutoff date"""
|
||||
stmt = delete(CityTrafficData).where(
|
||||
and_(
|
||||
CityTrafficData.city_id == city_id,
|
||||
CityTrafficData.date < cutoff_date
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
await self.session.commit()
|
||||
|
||||
return result.rowcount
|
||||
|
||||
async def get_data_coverage(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
Check how much data exists for a city in a date range
|
||||
Returns dict with counts: {'weather': X, 'traffic': Y}
|
||||
"""
|
||||
# Count weather records
|
||||
weather_stmt = select(CityWeatherData).where(
|
||||
and_(
|
||||
CityWeatherData.city_id == city_id,
|
||||
CityWeatherData.date >= start_date,
|
||||
CityWeatherData.date <= end_date
|
||||
)
|
||||
)
|
||||
weather_result = await self.session.execute(weather_stmt)
|
||||
weather_count = len(weather_result.scalars().all())
|
||||
|
||||
# Count traffic records
|
||||
traffic_stmt = select(CityTrafficData).where(
|
||||
and_(
|
||||
CityTrafficData.city_id == city_id,
|
||||
CityTrafficData.date >= start_date,
|
||||
CityTrafficData.date <= end_date
|
||||
)
|
||||
)
|
||||
traffic_result = await self.session.execute(traffic_stmt)
|
||||
traffic_count = len(traffic_result.scalars().all())
|
||||
|
||||
return {
|
||||
'weather': weather_count,
|
||||
'traffic': traffic_count
|
||||
}
|
||||
Reference in New Issue
Block a user