Initial commit - production deployment
This commit is contained in:
59
services/external/Dockerfile
vendored
Normal file
59
services/external/Dockerfile
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
# =============================================================================
|
||||
# External Service Dockerfile - Environment-Configurable Base Images
|
||||
# =============================================================================
|
||||
# Build arguments for registry configuration:
|
||||
# - BASE_REGISTRY: Registry URL (default: docker.io for Docker Hub)
|
||||
# - PYTHON_IMAGE: Python image name and tag (default: python:3.11-slim)
|
||||
# =============================================================================
|
||||
|
||||
ARG BASE_REGISTRY=docker.io
|
||||
ARG PYTHON_IMAGE=python:3.11-slim
|
||||
|
||||
FROM ${BASE_REGISTRY}/${PYTHON_IMAGE} AS shared
|
||||
WORKDIR /shared
|
||||
COPY shared/ /shared/
|
||||
|
||||
ARG BASE_REGISTRY=docker.io
|
||||
ARG PYTHON_IMAGE=python:3.11-slim
|
||||
FROM ${BASE_REGISTRY}/${PYTHON_IMAGE}
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
g++ \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements
|
||||
COPY shared/requirements-tracing.txt /tmp/
|
||||
|
||||
COPY services/external/requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r /tmp/requirements-tracing.txt
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy shared libraries from the shared stage
|
||||
COPY --from=shared /shared /app/shared
|
||||
|
||||
# Copy application code
|
||||
COPY services/external/ .
|
||||
|
||||
|
||||
|
||||
# Add shared libraries to Python path
|
||||
ENV PYTHONPATH="/app:/app/shared:${PYTHONPATH:-}"
|
||||
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run application
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
1049
services/external/README.md
vendored
Normal file
1049
services/external/README.md
vendored
Normal file
File diff suppressed because it is too large
Load Diff
84
services/external/alembic.ini
vendored
Normal file
84
services/external/alembic.ini
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
# ================================================================
|
||||
# services/external/alembic.ini - Alembic Configuration
|
||||
# ================================================================
|
||||
[alembic]
|
||||
# path to migration scripts
|
||||
script_location = migrations
|
||||
|
||||
# template used to generate migration file names
|
||||
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
prepend_sys_path = .
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
timezone = Europe/Madrid
|
||||
|
||||
# max length of characters to apply to the
|
||||
# "slug" field
|
||||
truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
sourceless = false
|
||||
|
||||
# version of a migration file's filename format
|
||||
version_num_format = %%s
|
||||
|
||||
# version path separator
|
||||
version_path_separator = os
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
output_encoding = utf-8
|
||||
|
||||
# Database URL - will be overridden by environment variable or settings
|
||||
sqlalchemy.url = postgresql+asyncpg://external_user:password@external-db-service:5432/external_db
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts.
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
1
services/external/app/__init__.py
vendored
Normal file
1
services/external/app/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/__init__.py
|
||||
1
services/external/app/api/__init__.py
vendored
Normal file
1
services/external/app/api/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/api/__init__.py
|
||||
237
services/external/app/api/audit.py
vendored
Normal file
237
services/external/app/api/audit.py
vendored
Normal file
@@ -0,0 +1,237 @@
|
||||
# services/external/app/api/audit.py
|
||||
"""
|
||||
Audit Logs API - Retrieve audit trail for external service
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path, status
|
||||
from typing import Optional, Dict, Any
|
||||
from uuid import UUID
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
from sqlalchemy import select, func, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models import AuditLog
|
||||
from shared.auth.decorators import get_current_user_dep
|
||||
from shared.auth.access_control import require_user_role
|
||||
from shared.routing import RouteBuilder
|
||||
from shared.models.audit_log_schemas import (
|
||||
AuditLogResponse,
|
||||
AuditLogListResponse,
|
||||
AuditLogStatsResponse
|
||||
)
|
||||
from app.core.database import database_manager
|
||||
|
||||
route_builder = RouteBuilder('external')
|
||||
router = APIRouter(tags=["audit-logs"])
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
async def get_db():
|
||||
"""Database session dependency"""
|
||||
async with database_manager.get_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("audit-logs"),
|
||||
response_model=AuditLogListResponse
|
||||
)
|
||||
@require_user_role(['admin', 'owner'])
|
||||
async def get_audit_logs(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
start_date: Optional[datetime] = Query(None, description="Filter logs from this date"),
|
||||
end_date: Optional[datetime] = Query(None, description="Filter logs until this date"),
|
||||
user_id: Optional[UUID] = Query(None, description="Filter by user ID"),
|
||||
action: Optional[str] = Query(None, description="Filter by action type"),
|
||||
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
|
||||
severity: Optional[str] = Query(None, description="Filter by severity level"),
|
||||
search: Optional[str] = Query(None, description="Search in description field"),
|
||||
limit: int = Query(100, ge=1, le=1000, description="Number of records to return"),
|
||||
offset: int = Query(0, ge=0, description="Number of records to skip"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get audit logs for external service.
|
||||
Requires admin or owner role.
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Retrieving audit logs",
|
||||
tenant_id=tenant_id,
|
||||
user_id=current_user.get("user_id"),
|
||||
filters={
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"action": action,
|
||||
"resource_type": resource_type,
|
||||
"severity": severity
|
||||
}
|
||||
)
|
||||
|
||||
# Build query filters
|
||||
filters = [AuditLog.tenant_id == tenant_id]
|
||||
|
||||
if start_date:
|
||||
filters.append(AuditLog.created_at >= start_date)
|
||||
if end_date:
|
||||
filters.append(AuditLog.created_at <= end_date)
|
||||
if user_id:
|
||||
filters.append(AuditLog.user_id == user_id)
|
||||
if action:
|
||||
filters.append(AuditLog.action == action)
|
||||
if resource_type:
|
||||
filters.append(AuditLog.resource_type == resource_type)
|
||||
if severity:
|
||||
filters.append(AuditLog.severity == severity)
|
||||
if search:
|
||||
filters.append(AuditLog.description.ilike(f"%{search}%"))
|
||||
|
||||
# Count total matching records
|
||||
count_query = select(func.count()).select_from(AuditLog).where(and_(*filters))
|
||||
total_result = await db.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
# Fetch paginated results
|
||||
query = (
|
||||
select(AuditLog)
|
||||
.where(and_(*filters))
|
||||
.order_by(AuditLog.created_at.desc())
|
||||
.limit(limit)
|
||||
.offset(offset)
|
||||
)
|
||||
|
||||
result = await db.execute(query)
|
||||
audit_logs = result.scalars().all()
|
||||
|
||||
# Convert to response models
|
||||
items = [AuditLogResponse.from_orm(log) for log in audit_logs]
|
||||
|
||||
logger.info(
|
||||
"Successfully retrieved audit logs",
|
||||
tenant_id=tenant_id,
|
||||
total=total,
|
||||
returned=len(items)
|
||||
)
|
||||
|
||||
return AuditLogListResponse(
|
||||
items=items,
|
||||
total=total,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
has_more=(offset + len(items)) < total
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to retrieve audit logs",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to retrieve audit logs: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("audit-logs/stats"),
|
||||
response_model=AuditLogStatsResponse
|
||||
)
|
||||
@require_user_role(['admin', 'owner'])
|
||||
async def get_audit_log_stats(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
start_date: Optional[datetime] = Query(None, description="Filter logs from this date"),
|
||||
end_date: Optional[datetime] = Query(None, description="Filter logs until this date"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get audit log statistics for external service.
|
||||
Requires admin or owner role.
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Retrieving audit log statistics",
|
||||
tenant_id=tenant_id,
|
||||
user_id=current_user.get("user_id")
|
||||
)
|
||||
|
||||
# Build base filters
|
||||
filters = [AuditLog.tenant_id == tenant_id]
|
||||
if start_date:
|
||||
filters.append(AuditLog.created_at >= start_date)
|
||||
if end_date:
|
||||
filters.append(AuditLog.created_at <= end_date)
|
||||
|
||||
# Total events
|
||||
count_query = select(func.count()).select_from(AuditLog).where(and_(*filters))
|
||||
total_result = await db.execute(count_query)
|
||||
total_events = total_result.scalar() or 0
|
||||
|
||||
# Events by action
|
||||
action_query = (
|
||||
select(AuditLog.action, func.count().label('count'))
|
||||
.where(and_(*filters))
|
||||
.group_by(AuditLog.action)
|
||||
)
|
||||
action_result = await db.execute(action_query)
|
||||
events_by_action = {row.action: row.count for row in action_result}
|
||||
|
||||
# Events by severity
|
||||
severity_query = (
|
||||
select(AuditLog.severity, func.count().label('count'))
|
||||
.where(and_(*filters))
|
||||
.group_by(AuditLog.severity)
|
||||
)
|
||||
severity_result = await db.execute(severity_query)
|
||||
events_by_severity = {row.severity: row.count for row in severity_result}
|
||||
|
||||
# Events by resource type
|
||||
resource_query = (
|
||||
select(AuditLog.resource_type, func.count().label('count'))
|
||||
.where(and_(*filters))
|
||||
.group_by(AuditLog.resource_type)
|
||||
)
|
||||
resource_result = await db.execute(resource_query)
|
||||
events_by_resource_type = {row.resource_type: row.count for row in resource_result}
|
||||
|
||||
# Date range
|
||||
date_range_query = (
|
||||
select(
|
||||
func.min(AuditLog.created_at).label('min_date'),
|
||||
func.max(AuditLog.created_at).label('max_date')
|
||||
)
|
||||
.where(and_(*filters))
|
||||
)
|
||||
date_result = await db.execute(date_range_query)
|
||||
date_row = date_result.one()
|
||||
|
||||
logger.info(
|
||||
"Successfully retrieved audit log statistics",
|
||||
tenant_id=tenant_id,
|
||||
total_events=total_events
|
||||
)
|
||||
|
||||
return AuditLogStatsResponse(
|
||||
total_events=total_events,
|
||||
events_by_action=events_by_action,
|
||||
events_by_severity=events_by_severity,
|
||||
events_by_resource_type=events_by_resource_type,
|
||||
date_range={
|
||||
"min": date_row.min_date,
|
||||
"max": date_row.max_date
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to retrieve audit log statistics",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to retrieve audit log statistics: {str(e)}"
|
||||
)
|
||||
488
services/external/app/api/calendar_operations.py
vendored
Normal file
488
services/external/app/api/calendar_operations.py
vendored
Normal file
@@ -0,0 +1,488 @@
|
||||
# services/external/app/api/calendar_operations.py
|
||||
"""
|
||||
Calendar Operations API - School calendars and tenant location context endpoints
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path, Body
|
||||
from typing import List, Optional
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
|
||||
from app.schemas.calendar import (
|
||||
SchoolCalendarResponse,
|
||||
SchoolCalendarListResponse,
|
||||
TenantLocationContextResponse,
|
||||
TenantLocationContextCreateRequest,
|
||||
CalendarCheckResponse
|
||||
)
|
||||
from app.registry.calendar_registry import CalendarRegistry, SchoolType
|
||||
from app.repositories.calendar_repository import CalendarRepository
|
||||
from app.cache.redis_wrapper import ExternalDataCache
|
||||
from shared.routing.route_builder import RouteBuilder
|
||||
from shared.auth.decorators import get_current_user_dep
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.database import get_db
|
||||
from datetime import datetime, date
|
||||
|
||||
route_builder = RouteBuilder('external')
|
||||
router = APIRouter(tags=["calendar-operations"])
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Initialize cache
|
||||
cache = ExternalDataCache()
|
||||
|
||||
|
||||
# ===== School Calendar Endpoints =====
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("cities/{city_id}/school-calendars"),
|
||||
response_model=SchoolCalendarListResponse
|
||||
)
|
||||
async def list_school_calendars_for_city(
|
||||
city_id: str = Path(..., description="City ID (e.g., 'madrid')"),
|
||||
school_type: Optional[str] = Query(None, description="Filter by school type"),
|
||||
academic_year: Optional[str] = Query(None, description="Filter by academic year"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""List all available school calendars for a city"""
|
||||
try:
|
||||
repo = CalendarRepository(db)
|
||||
calendars = await repo.get_calendars_by_city(city_id, enabled_only=True)
|
||||
|
||||
# Apply filters if provided
|
||||
if school_type:
|
||||
calendars = [c for c in calendars if c.school_type == school_type]
|
||||
if academic_year:
|
||||
calendars = [c for c in calendars if c.academic_year == academic_year]
|
||||
|
||||
calendar_responses = [
|
||||
SchoolCalendarResponse(
|
||||
calendar_id=str(c.id),
|
||||
calendar_name=c.calendar_name,
|
||||
city_id=c.city_id,
|
||||
school_type=c.school_type,
|
||||
academic_year=c.academic_year,
|
||||
holiday_periods=c.holiday_periods,
|
||||
school_hours=c.school_hours,
|
||||
source=c.source,
|
||||
enabled=c.enabled
|
||||
)
|
||||
for c in calendars
|
||||
]
|
||||
|
||||
return SchoolCalendarListResponse(
|
||||
city_id=city_id,
|
||||
calendars=calendar_responses,
|
||||
total=len(calendar_responses)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error listing school calendars",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error retrieving school calendars: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("school-calendars/{calendar_id}"),
|
||||
response_model=SchoolCalendarResponse
|
||||
)
|
||||
async def get_school_calendar(
|
||||
calendar_id: UUID = Path(..., description="School calendar ID"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get detailed information about a specific school calendar (cached)"""
|
||||
try:
|
||||
calendar_id_str = str(calendar_id)
|
||||
|
||||
# Check cache first
|
||||
cached = await cache.get_cached_calendar(calendar_id_str)
|
||||
if cached:
|
||||
logger.debug("Returning cached calendar", calendar_id=calendar_id_str)
|
||||
return SchoolCalendarResponse(**cached)
|
||||
|
||||
# Cache miss - fetch from database
|
||||
repo = CalendarRepository(db)
|
||||
calendar = await repo.get_calendar_by_id(calendar_id)
|
||||
|
||||
if not calendar:
|
||||
raise HTTPException(status_code=404, detail="School calendar not found")
|
||||
|
||||
response_data = {
|
||||
"calendar_id": str(calendar.id),
|
||||
"calendar_name": calendar.calendar_name,
|
||||
"city_id": calendar.city_id,
|
||||
"school_type": calendar.school_type,
|
||||
"academic_year": calendar.academic_year,
|
||||
"holiday_periods": calendar.holiday_periods,
|
||||
"school_hours": calendar.school_hours,
|
||||
"source": calendar.source,
|
||||
"enabled": calendar.enabled
|
||||
}
|
||||
|
||||
# Cache the result
|
||||
await cache.set_cached_calendar(calendar_id_str, response_data)
|
||||
|
||||
return SchoolCalendarResponse(**response_data)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error retrieving school calendar",
|
||||
calendar_id=str(calendar_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error retrieving school calendar: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("school-calendars/{calendar_id}/is-holiday"),
|
||||
response_model=CalendarCheckResponse
|
||||
)
|
||||
async def check_is_school_holiday(
|
||||
calendar_id: UUID = Path(..., description="School calendar ID"),
|
||||
check_date: str = Query(..., description="Date to check (ISO format: YYYY-MM-DD)"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Check if a specific date is a school holiday"""
|
||||
try:
|
||||
repo = CalendarRepository(db)
|
||||
calendar = await repo.get_calendar_by_id(calendar_id)
|
||||
|
||||
if not calendar:
|
||||
raise HTTPException(status_code=404, detail="School calendar not found")
|
||||
|
||||
# Parse the date
|
||||
try:
|
||||
date_obj = datetime.strptime(check_date, "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Invalid date format. Use YYYY-MM-DD"
|
||||
)
|
||||
|
||||
# Check if date falls within any holiday period
|
||||
is_holiday = False
|
||||
holiday_name = None
|
||||
|
||||
for period in calendar.holiday_periods:
|
||||
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
|
||||
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
|
||||
|
||||
if start <= date_obj <= end:
|
||||
is_holiday = True
|
||||
holiday_name = period["name"]
|
||||
break
|
||||
|
||||
return CalendarCheckResponse(
|
||||
date=check_date,
|
||||
is_holiday=is_holiday,
|
||||
holiday_name=holiday_name,
|
||||
calendar_id=str(calendar_id),
|
||||
calendar_name=calendar.calendar_name
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error checking holiday status",
|
||||
calendar_id=str(calendar_id),
|
||||
date=check_date,
|
||||
error=str(e)
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error checking holiday status: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# ===== Tenant Location Context Endpoints =====
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("location-context"),
|
||||
response_model=TenantLocationContextResponse
|
||||
)
|
||||
async def get_tenant_location_context(
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get location context for a tenant including school calendar assignment (cached)"""
|
||||
try:
|
||||
|
||||
# Check cache first
|
||||
cached = await cache.get_cached_tenant_context(tenant_id)
|
||||
if cached:
|
||||
logger.debug("Returning cached tenant context", tenant_id=tenant_id)
|
||||
return TenantLocationContextResponse(**cached)
|
||||
|
||||
# Cache miss - fetch from database
|
||||
repo = CalendarRepository(db)
|
||||
context = await repo.get_tenant_with_calendar(tenant_id)
|
||||
|
||||
if not context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Location context not found for this tenant"
|
||||
)
|
||||
|
||||
# Cache the result
|
||||
await cache.set_cached_tenant_context(tenant_id_str, context)
|
||||
|
||||
return TenantLocationContextResponse(**context)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error retrieving tenant location context",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error retrieving location context: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
route_builder.build_base_route("location-context"),
|
||||
response_model=TenantLocationContextResponse
|
||||
)
|
||||
async def create_or_update_tenant_location_context(
|
||||
request: TenantLocationContextCreateRequest,
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Create or update tenant location context"""
|
||||
try:
|
||||
|
||||
# Convert to UUID for use with repository
|
||||
tenant_uuid = UUID(tenant_id)
|
||||
|
||||
repo = CalendarRepository(db)
|
||||
|
||||
# Validate calendar_id if provided
|
||||
if request.school_calendar_id:
|
||||
calendar = await repo.get_calendar_by_id(request.school_calendar_id)
|
||||
if not calendar:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Invalid school_calendar_id"
|
||||
)
|
||||
|
||||
# Create or update context
|
||||
context_obj = await repo.create_or_update_tenant_location_context(
|
||||
tenant_id=tenant_uuid,
|
||||
city_id=request.city_id,
|
||||
school_calendar_id=request.school_calendar_id,
|
||||
neighborhood=request.neighborhood,
|
||||
local_events=request.local_events,
|
||||
notes=request.notes
|
||||
)
|
||||
|
||||
# Invalidate cache since context was updated
|
||||
await cache.invalidate_tenant_context(tenant_id)
|
||||
|
||||
# Get full context with calendar details
|
||||
context = await repo.get_tenant_with_calendar(tenant_uuid)
|
||||
|
||||
# Cache the new context
|
||||
await cache.set_cached_tenant_context(tenant_id, context)
|
||||
|
||||
return TenantLocationContextResponse(**context)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error creating/updating tenant location context",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error creating/updating location context: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.delete(
|
||||
route_builder.build_base_route("location-context"),
|
||||
status_code=204
|
||||
)
|
||||
async def delete_tenant_location_context(
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Delete tenant location context"""
|
||||
try:
|
||||
|
||||
# Convert to UUID for use with repository
|
||||
tenant_uuid = UUID(tenant_id)
|
||||
|
||||
repo = CalendarRepository(db)
|
||||
deleted = await repo.delete_tenant_location_context(tenant_uuid)
|
||||
|
||||
if not deleted:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Location context not found"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error deleting tenant location context",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error deleting location context: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# ===== Calendar Suggestion Endpoint =====
|
||||
|
||||
@router.post(
|
||||
route_builder.build_base_route("location-context/suggest-calendar")
|
||||
)
|
||||
async def suggest_calendar_for_tenant(
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Suggest an appropriate school calendar for a tenant based on location and POI data.
|
||||
|
||||
This endpoint analyzes:
|
||||
- Tenant's city location
|
||||
- Detected schools nearby (from POI detection)
|
||||
- Available calendars for the city
|
||||
- Bakery-specific heuristics (primary schools = stronger morning rush)
|
||||
|
||||
Returns a suggestion with confidence score and reasoning.
|
||||
Does NOT automatically assign - requires admin approval.
|
||||
"""
|
||||
try:
|
||||
from app.utils.calendar_suggester import CalendarSuggester
|
||||
from app.repositories.poi_context_repository import POIContextRepository
|
||||
|
||||
tenant_uuid = UUID(tenant_id)
|
||||
|
||||
# Get tenant's location context
|
||||
calendar_repo = CalendarRepository(db)
|
||||
location_context = await calendar_repo.get_tenant_location_context(tenant_uuid)
|
||||
|
||||
if not location_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Location context not found. Create location context first."
|
||||
)
|
||||
|
||||
city_id = location_context.city_id
|
||||
|
||||
# Get available calendars for city
|
||||
calendars_result = await calendar_repo.get_calendars_by_city(city_id, enabled_only=True)
|
||||
calendars = calendars_result.get("calendars", []) if calendars_result else []
|
||||
|
||||
# Get POI context if available
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
poi_data = poi_context.to_dict() if poi_context else None
|
||||
|
||||
# Generate suggestion
|
||||
suggester = CalendarSuggester()
|
||||
suggestion = suggester.suggest_calendar_for_tenant(
|
||||
city_id=city_id,
|
||||
available_calendars=calendars,
|
||||
poi_context=poi_data,
|
||||
tenant_data=None # Could include tenant info if needed
|
||||
)
|
||||
|
||||
# Format for admin display
|
||||
admin_message = suggester.format_suggestion_for_admin(suggestion)
|
||||
|
||||
logger.info(
|
||||
"Calendar suggestion generated",
|
||||
tenant_id=tenant_id,
|
||||
city_id=city_id,
|
||||
suggested_calendar=suggestion.get("suggested_calendar_id"),
|
||||
confidence=suggestion.get("confidence")
|
||||
)
|
||||
|
||||
return {
|
||||
**suggestion,
|
||||
"admin_message": admin_message,
|
||||
"tenant_id": tenant_id,
|
||||
"current_calendar_id": str(location_context.school_calendar_id) if location_context.school_calendar_id else None
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error generating calendar suggestion",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error generating calendar suggestion: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# ===== Helper Endpoints =====
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("calendars/registry"),
|
||||
response_model=List[SchoolCalendarResponse]
|
||||
)
|
||||
async def list_registry_calendars():
|
||||
"""List all calendars from the CalendarRegistry (static configuration)"""
|
||||
calendars = CalendarRegistry.get_enabled_calendars()
|
||||
|
||||
return [
|
||||
SchoolCalendarResponse(
|
||||
calendar_id=cal.calendar_id,
|
||||
calendar_name=cal.calendar_name,
|
||||
city_id=cal.city_id,
|
||||
school_type=cal.school_type.value,
|
||||
academic_year=cal.academic_year,
|
||||
holiday_periods=[
|
||||
{
|
||||
"name": hp.name,
|
||||
"start_date": hp.start_date,
|
||||
"end_date": hp.end_date,
|
||||
"description": hp.description
|
||||
}
|
||||
for hp in cal.holiday_periods
|
||||
],
|
||||
school_hours={
|
||||
"morning_start": cal.school_hours.morning_start,
|
||||
"morning_end": cal.school_hours.morning_end,
|
||||
"has_afternoon_session": cal.school_hours.has_afternoon_session,
|
||||
"afternoon_start": cal.school_hours.afternoon_start,
|
||||
"afternoon_end": cal.school_hours.afternoon_end
|
||||
},
|
||||
source=cal.source,
|
||||
enabled=cal.enabled
|
||||
)
|
||||
for cal in calendars
|
||||
]
|
||||
510
services/external/app/api/city_operations.py
vendored
Normal file
510
services/external/app/api/city_operations.py
vendored
Normal file
@@ -0,0 +1,510 @@
|
||||
# services/external/app/api/city_operations.py
|
||||
"""
|
||||
City Operations API - New endpoints for city-based data access
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from typing import List
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
|
||||
from app.schemas.city_data import CityInfoResponse, DataAvailabilityResponse
|
||||
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse, WeatherForecastAPIResponse
|
||||
from app.schemas.traffic import TrafficDataResponse
|
||||
from app.registry.city_registry import CityRegistry
|
||||
from app.registry.geolocation_mapper import GeolocationMapper
|
||||
from app.repositories.city_data_repository import CityDataRepository
|
||||
from app.cache.redis_wrapper import ExternalDataCache
|
||||
from app.services.weather_service import WeatherService
|
||||
from app.services.traffic_service import TrafficService
|
||||
from app.services.tenant_deletion_service import ExternalTenantDeletionService
|
||||
from shared.routing.route_builder import RouteBuilder
|
||||
from shared.auth.decorators import get_current_user_dep
|
||||
from shared.auth.access_control import service_only_access
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.database import get_db
|
||||
|
||||
route_builder = RouteBuilder('external')
|
||||
router = APIRouter(tags=["city-operations"])
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("cities"),
|
||||
response_model=List[CityInfoResponse]
|
||||
)
|
||||
async def list_supported_cities():
|
||||
"""List all enabled cities with data availability"""
|
||||
registry = CityRegistry()
|
||||
cities = registry.get_enabled_cities()
|
||||
|
||||
return [
|
||||
CityInfoResponse(
|
||||
city_id=city.city_id,
|
||||
name=city.name,
|
||||
country=city.country.value,
|
||||
latitude=city.latitude,
|
||||
longitude=city.longitude,
|
||||
radius_km=city.radius_km,
|
||||
weather_provider=city.weather_provider.value,
|
||||
traffic_provider=city.traffic_provider.value,
|
||||
enabled=city.enabled
|
||||
)
|
||||
for city in cities
|
||||
]
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("cities/{city_id}/availability"),
|
||||
response_model=DataAvailabilityResponse
|
||||
)
|
||||
async def get_city_data_availability(
|
||||
city_id: str = Path(..., description="City ID"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get data availability for a specific city"""
|
||||
registry = CityRegistry()
|
||||
city = registry.get_city(city_id)
|
||||
|
||||
if not city:
|
||||
raise HTTPException(status_code=404, detail="City not found")
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
weather_stmt = text(
|
||||
"SELECT MIN(date), MAX(date), COUNT(*) FROM city_weather_data WHERE city_id = :city_id"
|
||||
)
|
||||
weather_result = await db.execute(weather_stmt, {"city_id": city_id})
|
||||
weather_row = weather_result.fetchone()
|
||||
weather_min, weather_max, weather_count = weather_row if weather_row else (None, None, 0)
|
||||
|
||||
traffic_stmt = text(
|
||||
"SELECT MIN(date), MAX(date), COUNT(*) FROM city_traffic_data WHERE city_id = :city_id"
|
||||
)
|
||||
traffic_result = await db.execute(traffic_stmt, {"city_id": city_id})
|
||||
traffic_row = traffic_result.fetchone()
|
||||
traffic_min, traffic_max, traffic_count = traffic_row if traffic_row else (None, None, 0)
|
||||
|
||||
return DataAvailabilityResponse(
|
||||
city_id=city_id,
|
||||
city_name=city.name,
|
||||
weather_available=weather_count > 0,
|
||||
weather_start_date=weather_min.isoformat() if weather_min else None,
|
||||
weather_end_date=weather_max.isoformat() if weather_max else None,
|
||||
weather_record_count=weather_count or 0,
|
||||
traffic_available=traffic_count > 0,
|
||||
traffic_start_date=traffic_min.isoformat() if traffic_min else None,
|
||||
traffic_end_date=traffic_max.isoformat() if traffic_max else None,
|
||||
traffic_record_count=traffic_count or 0
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("historical-weather-optimized"),
|
||||
response_model=List[WeatherDataResponse]
|
||||
)
|
||||
async def get_historical_weather_optimized(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
latitude: float = Query(..., description="Latitude"),
|
||||
longitude: float = Query(..., description="Longitude"),
|
||||
start_date: datetime = Query(..., description="Start date"),
|
||||
end_date: datetime = Query(..., description="End date"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get historical weather data using city-based cached data
|
||||
This is the FAST endpoint for training service
|
||||
"""
|
||||
try:
|
||||
mapper = GeolocationMapper()
|
||||
mapping = mapper.map_tenant_to_city(latitude, longitude)
|
||||
|
||||
if not mapping:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No supported city found for this location"
|
||||
)
|
||||
|
||||
city, distance = mapping
|
||||
|
||||
logger.info(
|
||||
"Fetching historical weather from cache",
|
||||
tenant_id=tenant_id,
|
||||
city=city.name,
|
||||
distance_km=round(distance, 2)
|
||||
)
|
||||
|
||||
cache = ExternalDataCache()
|
||||
cached_data = await cache.get_cached_weather(
|
||||
city.city_id, start_date, end_date
|
||||
)
|
||||
|
||||
if cached_data:
|
||||
logger.info("Weather cache hit", records=len(cached_data))
|
||||
return cached_data
|
||||
|
||||
repo = CityDataRepository(db)
|
||||
db_records = await repo.get_weather_by_city_and_range(
|
||||
city.city_id, start_date, end_date
|
||||
)
|
||||
|
||||
response_data = [
|
||||
WeatherDataResponse(
|
||||
id=str(record.id),
|
||||
location_id=f"{city.city_id}_{record.date.date()}",
|
||||
date=record.date,
|
||||
temperature=record.temperature,
|
||||
precipitation=record.precipitation,
|
||||
humidity=record.humidity,
|
||||
wind_speed=record.wind_speed,
|
||||
pressure=record.pressure,
|
||||
description=record.description,
|
||||
source=record.source,
|
||||
raw_data=None,
|
||||
created_at=record.created_at,
|
||||
updated_at=record.updated_at
|
||||
)
|
||||
for record in db_records
|
||||
]
|
||||
|
||||
await cache.set_cached_weather(
|
||||
city.city_id, start_date, end_date, response_data
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Historical weather data retrieved",
|
||||
records=len(response_data),
|
||||
source="database"
|
||||
)
|
||||
|
||||
return response_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error fetching historical weather", error=str(e))
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("historical-traffic-optimized"),
|
||||
response_model=List[TrafficDataResponse]
|
||||
)
|
||||
async def get_historical_traffic_optimized(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
latitude: float = Query(..., description="Latitude"),
|
||||
longitude: float = Query(..., description="Longitude"),
|
||||
start_date: datetime = Query(..., description="Start date"),
|
||||
end_date: datetime = Query(..., description="End date"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get historical traffic data using city-based cached data
|
||||
This is the FAST endpoint for training service
|
||||
"""
|
||||
try:
|
||||
mapper = GeolocationMapper()
|
||||
mapping = mapper.map_tenant_to_city(latitude, longitude)
|
||||
|
||||
if not mapping:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No supported city found for this location"
|
||||
)
|
||||
|
||||
city, distance = mapping
|
||||
|
||||
logger.info(
|
||||
"Fetching historical traffic from cache",
|
||||
tenant_id=tenant_id,
|
||||
city=city.name,
|
||||
distance_km=round(distance, 2)
|
||||
)
|
||||
|
||||
cache = ExternalDataCache()
|
||||
cached_data = await cache.get_cached_traffic(
|
||||
city.city_id, start_date, end_date
|
||||
)
|
||||
|
||||
if cached_data:
|
||||
logger.info("Traffic cache hit", records=len(cached_data))
|
||||
return cached_data
|
||||
|
||||
logger.debug("Starting DB query for traffic", city_id=city.city_id)
|
||||
repo = CityDataRepository(db)
|
||||
db_records = await repo.get_traffic_by_city_and_range(
|
||||
city.city_id, start_date, end_date
|
||||
)
|
||||
logger.debug("DB query completed", records=len(db_records))
|
||||
|
||||
logger.debug("Creating response objects")
|
||||
response_data = [
|
||||
TrafficDataResponse(
|
||||
date=record.date,
|
||||
traffic_volume=record.traffic_volume,
|
||||
pedestrian_count=record.pedestrian_count,
|
||||
congestion_level=record.congestion_level,
|
||||
average_speed=record.average_speed,
|
||||
source=record.source
|
||||
)
|
||||
for record in db_records
|
||||
]
|
||||
logger.debug("Response objects created", count=len(response_data))
|
||||
|
||||
logger.debug("Caching traffic data")
|
||||
await cache.set_cached_traffic(
|
||||
city.city_id, start_date, end_date, response_data
|
||||
)
|
||||
logger.debug("Caching completed")
|
||||
|
||||
logger.info(
|
||||
"Historical traffic data retrieved",
|
||||
records=len(response_data),
|
||||
source="database"
|
||||
)
|
||||
|
||||
return response_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error fetching historical traffic", error=str(e))
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
# ================================================================
|
||||
# REAL-TIME & FORECAST ENDPOINTS
|
||||
# ================================================================
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("weather/current"),
|
||||
response_model=WeatherDataResponse
|
||||
)
|
||||
async def get_current_weather(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
latitude: float = Query(..., description="Latitude"),
|
||||
longitude: float = Query(..., description="Longitude")
|
||||
):
|
||||
"""
|
||||
Get current weather for a location (real-time data from AEMET)
|
||||
"""
|
||||
try:
|
||||
weather_service = WeatherService()
|
||||
weather_data = await weather_service.get_current_weather(latitude, longitude)
|
||||
|
||||
if not weather_data:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No weather data available for this location"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Current weather retrieved",
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude
|
||||
)
|
||||
|
||||
return weather_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error fetching current weather", error=str(e))
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("weather/forecast")
|
||||
)
|
||||
async def get_weather_forecast(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
latitude: float = Query(..., description="Latitude"),
|
||||
longitude: float = Query(..., description="Longitude"),
|
||||
days: int = Query(7, ge=1, le=14, description="Number of days to forecast")
|
||||
):
|
||||
"""
|
||||
Get weather forecast for a location (from AEMET)
|
||||
Returns list of forecast objects with: forecast_date, generated_at, temperature, precipitation, humidity, wind_speed, description, source
|
||||
"""
|
||||
try:
|
||||
weather_service = WeatherService()
|
||||
forecast_data = await weather_service.get_weather_forecast(latitude, longitude, days)
|
||||
|
||||
if not forecast_data:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No forecast data available for this location"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Weather forecast retrieved",
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
days=days,
|
||||
count=len(forecast_data)
|
||||
)
|
||||
|
||||
return forecast_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error fetching weather forecast", error=str(e))
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_operations_route("traffic/current"),
|
||||
response_model=TrafficDataResponse
|
||||
)
|
||||
async def get_current_traffic(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
latitude: float = Query(..., description="Latitude"),
|
||||
longitude: float = Query(..., description="Longitude")
|
||||
):
|
||||
"""
|
||||
Get current traffic conditions for a location (real-time data from Madrid OpenData)
|
||||
"""
|
||||
try:
|
||||
traffic_service = TrafficService()
|
||||
traffic_data = await traffic_service.get_current_traffic(latitude, longitude)
|
||||
|
||||
if not traffic_data:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="No traffic data available for this location"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Current traffic retrieved",
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude
|
||||
)
|
||||
|
||||
return traffic_data
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error fetching current traffic", error=str(e))
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tenant Data Deletion Operations (Internal Service Only)
|
||||
# ============================================================================
|
||||
|
||||
@router.delete(
|
||||
route_builder.build_base_route("tenant/{tenant_id}", include_tenant_prefix=False),
|
||||
response_model=dict
|
||||
)
|
||||
@service_only_access
|
||||
async def delete_tenant_data(
|
||||
tenant_id: str = Path(..., description="Tenant ID to delete data for"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Delete tenant-specific external data (Internal service only)
|
||||
|
||||
IMPORTANT NOTE:
|
||||
The External service primarily stores SHARED city-wide data that is used
|
||||
by ALL tenants. This endpoint only deletes tenant-specific data:
|
||||
- Tenant-specific audit logs
|
||||
- Tenant-specific weather data (if any)
|
||||
|
||||
City-wide data (CityWeatherData, CityTrafficData, TrafficData, etc.)
|
||||
is intentionally PRESERVED as it's shared across all tenants.
|
||||
|
||||
**WARNING**: This operation is irreversible!
|
||||
|
||||
Returns:
|
||||
Deletion summary with counts of deleted records and note about preserved data
|
||||
"""
|
||||
try:
|
||||
logger.info("external.tenant_deletion.api_called", tenant_id=tenant_id)
|
||||
|
||||
deletion_service = ExternalTenantDeletionService(db)
|
||||
result = await deletion_service.safe_delete_tenant_data(tenant_id)
|
||||
|
||||
if not result.success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Tenant data deletion failed: {', '.join(result.errors)}"
|
||||
)
|
||||
|
||||
return {
|
||||
"message": "Tenant-specific data deletion completed successfully",
|
||||
"note": "City-wide shared data (weather, traffic) has been preserved",
|
||||
"summary": result.to_dict()
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("external.tenant_deletion.api_error",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to delete tenant data: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("tenant/{tenant_id}/deletion-preview", include_tenant_prefix=False),
|
||||
response_model=dict
|
||||
)
|
||||
@service_only_access
|
||||
async def preview_tenant_data_deletion(
|
||||
tenant_id: str = Path(..., description="Tenant ID to preview deletion for"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Preview what tenant-specific data would be deleted (dry-run)
|
||||
|
||||
This shows counts of tenant-specific data only. City-wide shared data
|
||||
(CityWeatherData, CityTrafficData, TrafficData, etc.) will NOT be deleted.
|
||||
|
||||
Returns:
|
||||
Dictionary with entity names and their counts
|
||||
"""
|
||||
try:
|
||||
logger.info("external.tenant_deletion.preview_called", tenant_id=tenant_id)
|
||||
|
||||
deletion_service = ExternalTenantDeletionService(db)
|
||||
preview = await deletion_service.get_tenant_data_preview(tenant_id)
|
||||
|
||||
total_records = sum(v for k, v in preview.items() if not k.startswith("_"))
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"service": "external",
|
||||
"preview": preview,
|
||||
"total_records": total_records,
|
||||
"note": "City-wide data (weather, traffic) is shared and will NOT be deleted",
|
||||
"preserved_data": [
|
||||
"CityWeatherData (city-wide)",
|
||||
"CityTrafficData (city-wide)",
|
||||
"TrafficData (city-wide)",
|
||||
"TrafficMeasurementPoint (reference data)",
|
||||
"WeatherForecast (city-wide)"
|
||||
],
|
||||
"warning": "Only tenant-specific records will be permanently deleted"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("external.tenant_deletion.preview_error",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to preview tenant data deletion: {str(e)}"
|
||||
)
|
||||
302
services/external/app/api/geocoding.py
vendored
Normal file
302
services/external/app/api/geocoding.py
vendored
Normal file
@@ -0,0 +1,302 @@
|
||||
"""
|
||||
Geocoding API Endpoints
|
||||
|
||||
Provides address search, autocomplete, and geocoding via Nominatim.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Query, HTTPException
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
import structlog
|
||||
|
||||
from app.services.nominatim_service import NominatimService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
router = APIRouter(prefix="/api/v1/geocoding", tags=["Geocoding"])
|
||||
|
||||
# Initialize Nominatim service
|
||||
# In production, override with environment variable for self-hosted instance
|
||||
nominatim_service = NominatimService()
|
||||
|
||||
|
||||
# Response Models
|
||||
class AddressResult(BaseModel):
|
||||
"""Address search result"""
|
||||
display_name: str = Field(..., description="Full formatted address")
|
||||
lat: float = Field(..., description="Latitude")
|
||||
lon: float = Field(..., description="Longitude")
|
||||
osm_type: str = Field(..., description="OSM object type")
|
||||
osm_id: int = Field(..., description="OSM object ID")
|
||||
place_id: int = Field(..., description="Nominatim place ID")
|
||||
type: str = Field(..., description="Place type")
|
||||
class_: str = Field(..., alias="class", description="OSM class")
|
||||
address: dict = Field(..., description="Parsed address components")
|
||||
boundingbox: List[str] = Field(..., description="Bounding box coordinates")
|
||||
|
||||
|
||||
class GeocodeResult(BaseModel):
|
||||
"""Geocoding result"""
|
||||
display_name: str = Field(..., description="Full formatted address")
|
||||
lat: float = Field(..., description="Latitude")
|
||||
lon: float = Field(..., description="Longitude")
|
||||
address: dict = Field(..., description="Parsed address components")
|
||||
|
||||
|
||||
class CoordinateValidation(BaseModel):
|
||||
"""Coordinate validation result"""
|
||||
valid: bool = Field(..., description="Whether coordinates are valid")
|
||||
address: Optional[str] = Field(None, description="Address at coordinates if valid")
|
||||
|
||||
|
||||
# Endpoints
|
||||
@router.get(
|
||||
"/search",
|
||||
response_model=List[AddressResult],
|
||||
summary="Search for addresses",
|
||||
description="Search for addresses matching query (autocomplete). Minimum 3 characters required."
|
||||
)
|
||||
async def search_addresses(
|
||||
q: str = Query(..., min_length=3, description="Search query (minimum 3 characters)"),
|
||||
country_code: str = Query("es", description="ISO country code to restrict search"),
|
||||
limit: int = Query(10, ge=1, le=50, description="Maximum number of results")
|
||||
):
|
||||
"""
|
||||
Search for addresses matching the query.
|
||||
|
||||
This endpoint provides autocomplete functionality for address input.
|
||||
Results are restricted to the specified country and sorted by relevance.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/search?q=Gran%20Via%20Madrid&limit=5
|
||||
"""
|
||||
try:
|
||||
results = await nominatim_service.search_address(
|
||||
query=q,
|
||||
country_code=country_code,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Address search request",
|
||||
query=q,
|
||||
country=country_code,
|
||||
result_count=len(results)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Address search failed",
|
||||
query=q,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Address search failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/geocode",
|
||||
response_model=GeocodeResult,
|
||||
summary="Geocode an address",
|
||||
description="Convert an address string to coordinates (lat/lon)"
|
||||
)
|
||||
async def geocode_address(
|
||||
address: str = Query(..., min_length=5, description="Full address to geocode"),
|
||||
country_code: str = Query("es", description="ISO country code")
|
||||
):
|
||||
"""
|
||||
Geocode an address to get coordinates.
|
||||
|
||||
Returns the best matching location for the given address.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/geocode?address=Gran%20Via%2028,%20Madrid
|
||||
"""
|
||||
try:
|
||||
result = await nominatim_service.geocode_address(
|
||||
address=address,
|
||||
country_code=country_code
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Address not found: {address}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Geocoding request",
|
||||
address=address,
|
||||
lat=result["lat"],
|
||||
lon=result["lon"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Geocoding failed",
|
||||
address=address,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Geocoding failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/reverse",
|
||||
response_model=GeocodeResult,
|
||||
summary="Reverse geocode coordinates",
|
||||
description="Convert coordinates (lat/lon) to an address"
|
||||
)
|
||||
async def reverse_geocode(
|
||||
lat: float = Query(..., ge=-90, le=90, description="Latitude"),
|
||||
lon: float = Query(..., ge=-180, le=180, description="Longitude")
|
||||
):
|
||||
"""
|
||||
Reverse geocode coordinates to get address.
|
||||
|
||||
Returns the address at the specified coordinates.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/reverse?lat=40.4168&lon=-3.7038
|
||||
"""
|
||||
try:
|
||||
result = await nominatim_service.reverse_geocode(
|
||||
latitude=lat,
|
||||
longitude=lon
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"No address found at coordinates: {lat}, {lon}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Reverse geocoding request",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
address=result["display_name"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Reverse geocoding failed",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Reverse geocoding failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/validate",
|
||||
response_model=CoordinateValidation,
|
||||
summary="Validate coordinates",
|
||||
description="Check if coordinates point to a valid location"
|
||||
)
|
||||
async def validate_coordinates(
|
||||
lat: float = Query(..., ge=-90, le=90, description="Latitude"),
|
||||
lon: float = Query(..., ge=-180, le=180, description="Longitude")
|
||||
):
|
||||
"""
|
||||
Validate that coordinates point to a real location.
|
||||
|
||||
Returns validation result with address if valid.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/validate?lat=40.4168&lon=-3.7038
|
||||
"""
|
||||
try:
|
||||
is_valid = await nominatim_service.validate_coordinates(
|
||||
latitude=lat,
|
||||
longitude=lon
|
||||
)
|
||||
|
||||
result = {"valid": is_valid, "address": None}
|
||||
|
||||
if is_valid:
|
||||
geocode_result = await nominatim_service.reverse_geocode(lat, lon)
|
||||
if geocode_result:
|
||||
result["address"] = geocode_result["display_name"]
|
||||
|
||||
logger.info(
|
||||
"Coordinate validation request",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
valid=is_valid
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Coordinate validation failed",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Coordinate validation failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health",
|
||||
summary="Check geocoding service health",
|
||||
description="Check if Nominatim service is accessible"
|
||||
)
|
||||
async def health_check():
|
||||
"""
|
||||
Check if Nominatim service is accessible.
|
||||
|
||||
Returns service health status.
|
||||
"""
|
||||
try:
|
||||
is_healthy = await nominatim_service.health_check()
|
||||
|
||||
if not is_healthy:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Nominatim service is unavailable"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "nominatim",
|
||||
"base_url": nominatim_service.base_url,
|
||||
"is_public_api": nominatim_service.is_public_api
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Health check failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Health check failed: {str(e)}"
|
||||
)
|
||||
532
services/external/app/api/poi_context.py
vendored
Normal file
532
services/external/app/api/poi_context.py
vendored
Normal file
@@ -0,0 +1,532 @@
|
||||
"""
|
||||
POI Context API Endpoints
|
||||
|
||||
REST API for POI detection, retrieval, and management.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import Optional
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.poi_detection_service import POIDetectionService
|
||||
from app.services.poi_feature_selector import POIFeatureSelector
|
||||
from app.services.competitor_analyzer import CompetitorAnalyzer
|
||||
from app.services.poi_refresh_service import POIRefreshService
|
||||
from app.repositories.poi_context_repository import POIContextRepository
|
||||
from app.cache.poi_cache_service import POICacheService
|
||||
from app.core.redis_client import get_redis_client
|
||||
from shared.routing.route_builder import RouteBuilder
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
route_builder = RouteBuilder('external')
|
||||
router = APIRouter(tags=["POI Context"])
|
||||
|
||||
|
||||
@router.post(
|
||||
route_builder.build_base_route("poi-context/detect")
|
||||
)
|
||||
async def detect_pois_for_tenant(
|
||||
tenant_id: str,
|
||||
latitude: float = Query(..., description="Bakery latitude"),
|
||||
longitude: float = Query(..., description="Bakery longitude"),
|
||||
force_refresh: bool = Query(False, description="Force refresh, skip cache"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Detect POIs for a tenant's bakery location.
|
||||
|
||||
Performs automated POI detection using Overpass API, calculates ML features,
|
||||
and stores results for demand forecasting.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
logger.info(
|
||||
"POI detection requested",
|
||||
tenant_id=tenant_id,
|
||||
location=(latitude, longitude),
|
||||
force_refresh=force_refresh
|
||||
)
|
||||
|
||||
try:
|
||||
# Initialize services
|
||||
poi_service = POIDetectionService()
|
||||
feature_selector = POIFeatureSelector()
|
||||
competitor_analyzer = CompetitorAnalyzer()
|
||||
poi_repo = POIContextRepository(db)
|
||||
redis_client = await get_redis_client()
|
||||
cache_service = POICacheService(redis_client)
|
||||
|
||||
# Check cache first (unless force refresh)
|
||||
if not force_refresh:
|
||||
cached_result = await cache_service.get_cached_pois(latitude, longitude)
|
||||
if cached_result:
|
||||
logger.info("Using cached POI results", tenant_id=tenant_id)
|
||||
# Still save to database for this tenant
|
||||
poi_context = await poi_repo.create_or_update(tenant_uuid, cached_result)
|
||||
return {
|
||||
"status": "success",
|
||||
"source": "cache",
|
||||
"poi_context": poi_context.to_dict()
|
||||
}
|
||||
|
||||
# Detect POIs
|
||||
poi_results = await poi_service.detect_pois_for_bakery(
|
||||
latitude, longitude, tenant_id
|
||||
)
|
||||
|
||||
# Select relevant features
|
||||
try:
|
||||
feature_selection = feature_selector.select_relevant_features(
|
||||
poi_results["poi_categories"],
|
||||
tenant_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Feature selection failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
# Provide default feature selection to continue
|
||||
feature_selection = {
|
||||
"features": {},
|
||||
"relevant_categories": [],
|
||||
"relevance_report": [],
|
||||
"total_features": 0,
|
||||
"total_relevant_categories": 0
|
||||
}
|
||||
|
||||
# Analyze competitors specifically
|
||||
try:
|
||||
competitors_data = poi_results["poi_categories"].get("competitors", {})
|
||||
competitor_pois = competitors_data.get("pois", [])
|
||||
competitor_analysis = competitor_analyzer.analyze_competitive_landscape(
|
||||
competitor_pois,
|
||||
(latitude, longitude),
|
||||
tenant_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Competitor analysis failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
# Provide default competitor analysis to continue
|
||||
competitor_analysis = {
|
||||
"competitive_pressure_score": 0.0,
|
||||
"direct_competitors_count": 0,
|
||||
"nearby_competitors_count": 0,
|
||||
"market_competitors_count": 0,
|
||||
"total_competitors_count": 0,
|
||||
"competitive_zone": "low_competition",
|
||||
"market_type": "underserved",
|
||||
"competitive_advantage": "first_mover",
|
||||
"ml_feature_competitive_pressure": 0.0,
|
||||
"ml_feature_has_direct_competitor": 0,
|
||||
"ml_feature_competitor_density_500m": 0,
|
||||
"competitor_details": [],
|
||||
"nearest_competitor": None
|
||||
}
|
||||
|
||||
# Generate competitive insights
|
||||
try:
|
||||
competitive_insights = competitor_analyzer.get_competitive_insights(
|
||||
competitor_analysis
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to generate competitive insights",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
competitive_insights = []
|
||||
|
||||
# Combine results
|
||||
enhanced_results = {
|
||||
**poi_results,
|
||||
"ml_features": feature_selection.get("features", {}),
|
||||
"relevant_categories": feature_selection.get("relevant_categories", []),
|
||||
"relevance_report": feature_selection.get("relevance_report", []),
|
||||
"competitor_analysis": competitor_analysis,
|
||||
"competitive_insights": competitive_insights
|
||||
}
|
||||
|
||||
# Cache results
|
||||
try:
|
||||
await cache_service.cache_poi_results(latitude, longitude, enhanced_results)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to cache POI results",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
# Save to database
|
||||
try:
|
||||
poi_context = await poi_repo.create_or_update(tenant_uuid, enhanced_results)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to save POI context to database",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to save POI context: {str(e)}"
|
||||
)
|
||||
|
||||
# Schedule automatic refresh job (180 days from now)
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
refresh_job = await poi_refresh_service.schedule_refresh_job(
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
session=db
|
||||
)
|
||||
logger.info(
|
||||
"POI refresh job scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(refresh_job.id),
|
||||
scheduled_at=refresh_job.scheduled_at
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to schedule POI refresh job",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI detection completed",
|
||||
tenant_id=tenant_id,
|
||||
total_pois=poi_context.total_pois_detected,
|
||||
relevant_categories=len(feature_selection.get("relevant_categories", []))
|
||||
)
|
||||
|
||||
# Phase 3: Auto-trigger calendar suggestion after POI detection
|
||||
# This helps admins by providing intelligent calendar recommendations
|
||||
calendar_suggestion = None
|
||||
try:
|
||||
from app.utils.calendar_suggester import CalendarSuggester
|
||||
from app.repositories.calendar_repository import CalendarRepository
|
||||
|
||||
# Get tenant's location context
|
||||
calendar_repo = CalendarRepository(db)
|
||||
location_context = await calendar_repo.get_tenant_location_context(tenant_uuid)
|
||||
|
||||
if location_context and location_context.school_calendar_id is None:
|
||||
# Only suggest if no calendar assigned yet
|
||||
city_id = location_context.city_id
|
||||
|
||||
# Get available calendars for city
|
||||
calendars_result = await calendar_repo.get_calendars_by_city(city_id, enabled_only=True)
|
||||
calendars = calendars_result.get("calendars", []) if calendars_result else []
|
||||
|
||||
if calendars:
|
||||
# Generate suggestion using POI data
|
||||
suggester = CalendarSuggester()
|
||||
calendar_suggestion = suggester.suggest_calendar_for_tenant(
|
||||
city_id=city_id,
|
||||
available_calendars=calendars,
|
||||
poi_context=poi_context.to_dict(),
|
||||
tenant_data=None
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Calendar suggestion auto-generated after POI detection",
|
||||
tenant_id=tenant_id,
|
||||
suggested_calendar=calendar_suggestion.get("calendar_name"),
|
||||
confidence=calendar_suggestion.get("confidence_percentage"),
|
||||
should_auto_assign=calendar_suggestion.get("should_auto_assign")
|
||||
)
|
||||
|
||||
# TODO: Send notification to admin about available suggestion
|
||||
# This will be implemented when notification service is integrated
|
||||
else:
|
||||
logger.info(
|
||||
"No calendars available for city, skipping suggestion",
|
||||
tenant_id=tenant_id,
|
||||
city_id=city_id
|
||||
)
|
||||
elif location_context and location_context.school_calendar_id:
|
||||
logger.info(
|
||||
"Calendar already assigned, skipping suggestion",
|
||||
tenant_id=tenant_id,
|
||||
calendar_id=str(location_context.school_calendar_id)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"No location context found, skipping calendar suggestion",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Non-blocking: POI detection should succeed even if suggestion fails
|
||||
logger.warning(
|
||||
"Failed to auto-generate calendar suggestion (non-blocking)",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"source": "detection",
|
||||
"poi_context": poi_context.to_dict(),
|
||||
"feature_selection": feature_selection,
|
||||
"competitor_analysis": competitor_analysis,
|
||||
"competitive_insights": competitive_insights,
|
||||
"calendar_suggestion": calendar_suggestion # Include suggestion in response
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI detection failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"POI detection failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("poi-context")
|
||||
)
|
||||
async def get_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get POI context for a tenant.
|
||||
|
||||
Returns stored POI detection results and ML features.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not poi_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
# Check if stale
|
||||
is_stale = poi_context.is_stale()
|
||||
|
||||
return {
|
||||
"poi_context": poi_context.to_dict(),
|
||||
"is_stale": is_stale,
|
||||
"needs_refresh": is_stale
|
||||
}
|
||||
|
||||
|
||||
@router.post(
|
||||
route_builder.build_base_route("poi-context/refresh")
|
||||
)
|
||||
async def refresh_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Refresh POI context for a tenant.
|
||||
|
||||
Re-detects POIs and updates stored data.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
existing_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not existing_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}. Use detect endpoint first."
|
||||
)
|
||||
|
||||
# Perform detection with force_refresh=True
|
||||
return await detect_pois_for_tenant(
|
||||
tenant_id=tenant_id,
|
||||
latitude=existing_context.latitude,
|
||||
longitude=existing_context.longitude,
|
||||
force_refresh=True,
|
||||
db=db
|
||||
)
|
||||
|
||||
|
||||
@router.delete(
|
||||
route_builder.build_base_route("poi-context")
|
||||
)
|
||||
async def delete_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Delete POI context for a tenant.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
deleted = await poi_repo.delete_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not deleted:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"POI context deleted for tenant {tenant_id}"
|
||||
}
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("poi-context/feature-importance")
|
||||
)
|
||||
async def get_feature_importance(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get feature importance summary for tenant's POI context.
|
||||
|
||||
Shows which POI categories are relevant and their impact scores.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not poi_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
feature_selector = POIFeatureSelector()
|
||||
importance_summary = feature_selector.get_feature_importance_summary(
|
||||
poi_context.poi_detection_results
|
||||
)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"feature_importance": importance_summary,
|
||||
"total_categories": len(importance_summary),
|
||||
"relevant_categories": sum(1 for cat in importance_summary if cat["is_relevant"])
|
||||
}
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("poi-context/competitor-analysis")
|
||||
)
|
||||
async def get_competitor_analysis(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get detailed competitor analysis for tenant location.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not poi_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
competitor_analyzer = CompetitorAnalyzer()
|
||||
competitors = poi_context.poi_detection_results.get("competitors", {}).get("pois", [])
|
||||
|
||||
analysis = competitor_analyzer.analyze_competitive_landscape(
|
||||
competitors,
|
||||
(poi_context.latitude, poi_context.longitude),
|
||||
tenant_id
|
||||
)
|
||||
|
||||
insights = competitor_analyzer.get_competitive_insights(analysis)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"location": {
|
||||
"latitude": poi_context.latitude,
|
||||
"longitude": poi_context.longitude
|
||||
},
|
||||
"competitor_analysis": analysis,
|
||||
"insights": insights
|
||||
}
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def poi_health_check():
|
||||
"""
|
||||
Check POI detection service health.
|
||||
|
||||
Verifies Overpass API accessibility.
|
||||
"""
|
||||
poi_service = POIDetectionService()
|
||||
health = await poi_service.health_check()
|
||||
|
||||
if not health["healthy"]:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"POI detection service unhealthy: {health.get('error', 'Unknown error')}"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"overpass_api": health
|
||||
}
|
||||
|
||||
|
||||
@router.get("/cache/stats")
|
||||
async def get_cache_stats():
|
||||
"""
|
||||
Get POI cache statistics.
|
||||
"""
|
||||
try:
|
||||
redis_client = await get_redis_client()
|
||||
cache_service = POICacheService(redis_client)
|
||||
stats = await cache_service.get_cache_stats()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"cache_stats": stats
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Failed to get cache stats", error=str(e))
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get cache stats: {str(e)}"
|
||||
)
|
||||
441
services/external/app/api/poi_refresh_jobs.py
vendored
Normal file
441
services/external/app/api/poi_refresh_jobs.py
vendored
Normal file
@@ -0,0 +1,441 @@
|
||||
"""
|
||||
POI Refresh Jobs API Endpoints
|
||||
|
||||
REST API for managing POI refresh background jobs.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, desc
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, timezone
|
||||
from pydantic import BaseModel, Field
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.poi_refresh_service import POIRefreshService
|
||||
from app.services.poi_scheduler import get_scheduler
|
||||
from app.models.poi_refresh_job import POIRefreshJob
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
router = APIRouter(prefix="/poi-refresh-jobs", tags=["POI Refresh Jobs"])
|
||||
|
||||
|
||||
# Response Models
|
||||
class POIRefreshJobResponse(BaseModel):
|
||||
"""POI refresh job response"""
|
||||
id: str
|
||||
tenant_id: str
|
||||
status: str
|
||||
scheduled_at: datetime
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
attempt_count: int
|
||||
max_attempts: int
|
||||
pois_detected: Optional[int] = None
|
||||
changes_detected: bool = False
|
||||
change_summary: Optional[dict] = None
|
||||
error_message: Optional[str] = None
|
||||
next_scheduled_at: Optional[datetime] = None
|
||||
duration_seconds: Optional[float] = None
|
||||
is_overdue: bool
|
||||
can_retry: bool
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class ScheduleJobRequest(BaseModel):
|
||||
"""Schedule POI refresh job request"""
|
||||
tenant_id: str = Field(..., description="Tenant UUID")
|
||||
latitude: float = Field(..., ge=-90, le=90, description="Bakery latitude")
|
||||
longitude: float = Field(..., ge=-180, le=180, description="Bakery longitude")
|
||||
scheduled_at: Optional[datetime] = Field(None, description="When to run (default: 180 days from now)")
|
||||
|
||||
|
||||
class JobExecutionResult(BaseModel):
|
||||
"""Job execution result"""
|
||||
status: str
|
||||
job_id: str
|
||||
message: Optional[str] = None
|
||||
pois_detected: Optional[int] = None
|
||||
changes_detected: Optional[bool] = None
|
||||
change_summary: Optional[dict] = None
|
||||
duration_seconds: Optional[float] = None
|
||||
next_scheduled_at: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
attempt: Optional[int] = None
|
||||
can_retry: Optional[bool] = None
|
||||
|
||||
|
||||
# Endpoints
|
||||
@router.post(
|
||||
"/schedule",
|
||||
response_model=POIRefreshJobResponse,
|
||||
summary="Schedule POI refresh job",
|
||||
description="Schedule a background job to refresh POI context for a tenant"
|
||||
)
|
||||
async def schedule_refresh_job(
|
||||
request: ScheduleJobRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Schedule a POI refresh job for a tenant.
|
||||
|
||||
Creates a background job that will detect POIs for the tenant's location
|
||||
at the scheduled time. Default schedule is 180 days from now.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(request.tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
job = await poi_refresh_service.schedule_refresh_job(
|
||||
tenant_id=request.tenant_id,
|
||||
latitude=request.latitude,
|
||||
longitude=request.longitude,
|
||||
scheduled_at=request.scheduled_at,
|
||||
session=db
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI refresh job scheduled via API",
|
||||
tenant_id=request.tenant_id,
|
||||
job_id=str(job.id),
|
||||
scheduled_at=job.scheduled_at
|
||||
)
|
||||
|
||||
return POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to schedule POI refresh job",
|
||||
tenant_id=request.tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to schedule refresh job: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{job_id}",
|
||||
response_model=POIRefreshJobResponse,
|
||||
summary="Get refresh job by ID",
|
||||
description="Retrieve details of a specific POI refresh job"
|
||||
)
|
||||
async def get_refresh_job(
|
||||
job_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get POI refresh job by ID"""
|
||||
try:
|
||||
job_uuid = uuid.UUID(job_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid job_id format")
|
||||
|
||||
result = await db.execute(
|
||||
select(POIRefreshJob).where(POIRefreshJob.id == job_uuid)
|
||||
)
|
||||
job = result.scalar_one_or_none()
|
||||
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
|
||||
|
||||
return POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/tenant/{tenant_id}",
|
||||
response_model=List[POIRefreshJobResponse],
|
||||
summary="Get refresh jobs for tenant",
|
||||
description="Retrieve all POI refresh jobs for a specific tenant"
|
||||
)
|
||||
async def get_tenant_refresh_jobs(
|
||||
tenant_id: str,
|
||||
status: Optional[str] = Query(None, description="Filter by status"),
|
||||
limit: int = Query(50, ge=1, le=200, description="Maximum number of results"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get all POI refresh jobs for a tenant"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
query = select(POIRefreshJob).where(POIRefreshJob.tenant_id == tenant_uuid)
|
||||
|
||||
if status:
|
||||
query = query.where(POIRefreshJob.status == status)
|
||||
|
||||
query = query.order_by(desc(POIRefreshJob.scheduled_at)).limit(limit)
|
||||
|
||||
result = await db.execute(query)
|
||||
jobs = result.scalars().all()
|
||||
|
||||
return [
|
||||
POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
for job in jobs
|
||||
]
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{job_id}/execute",
|
||||
response_model=JobExecutionResult,
|
||||
summary="Execute refresh job",
|
||||
description="Manually trigger execution of a pending POI refresh job"
|
||||
)
|
||||
async def execute_refresh_job(
|
||||
job_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Manually execute a POI refresh job"""
|
||||
try:
|
||||
job_uuid = uuid.UUID(job_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid job_id format")
|
||||
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
result = await poi_refresh_service.execute_refresh_job(
|
||||
job_id=job_id,
|
||||
session=db
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI refresh job executed via API",
|
||||
job_id=job_id,
|
||||
status=result["status"]
|
||||
)
|
||||
|
||||
return JobExecutionResult(**result)
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to execute POI refresh job",
|
||||
job_id=job_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to execute refresh job: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/process-pending",
|
||||
summary="Process all pending jobs",
|
||||
description="Manually trigger processing of all pending POI refresh jobs"
|
||||
)
|
||||
async def process_pending_jobs(
|
||||
max_concurrent: int = Query(5, ge=1, le=20, description="Max concurrent executions"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Process all pending POI refresh jobs"""
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
result = await poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=max_concurrent,
|
||||
session=db
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Pending POI refresh jobs processed via API",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to process pending POI refresh jobs",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to process pending jobs: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/pending",
|
||||
response_model=List[POIRefreshJobResponse],
|
||||
summary="Get pending jobs",
|
||||
description="Retrieve all pending POI refresh jobs that are due for execution"
|
||||
)
|
||||
async def get_pending_jobs(
|
||||
limit: int = Query(100, ge=1, le=500, description="Maximum number of results"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get all pending POI refresh jobs"""
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
jobs = await poi_refresh_service.get_pending_jobs(
|
||||
limit=limit,
|
||||
session=db
|
||||
)
|
||||
|
||||
return [
|
||||
POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
for job in jobs
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to get pending POI refresh jobs",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get pending jobs: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/trigger-scheduler",
|
||||
summary="Trigger scheduler immediately",
|
||||
description="Trigger an immediate check for pending jobs (bypasses schedule)"
|
||||
)
|
||||
async def trigger_scheduler():
|
||||
"""Trigger POI refresh scheduler immediately"""
|
||||
try:
|
||||
scheduler = get_scheduler()
|
||||
|
||||
if not scheduler.is_running:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="POI refresh scheduler is not running"
|
||||
)
|
||||
|
||||
result = await scheduler.trigger_immediate_check()
|
||||
|
||||
logger.info(
|
||||
"POI refresh scheduler triggered via API",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to trigger POI refresh scheduler",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to trigger scheduler: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/scheduler/status",
|
||||
summary="Get scheduler status",
|
||||
description="Check if POI refresh scheduler is running"
|
||||
)
|
||||
async def get_scheduler_status():
|
||||
"""Get POI refresh scheduler status"""
|
||||
try:
|
||||
scheduler = get_scheduler()
|
||||
|
||||
return {
|
||||
"is_running": scheduler.is_running,
|
||||
"check_interval_seconds": scheduler.check_interval_seconds,
|
||||
"max_concurrent_jobs": scheduler.max_concurrent_jobs
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to get scheduler status",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get scheduler status: {str(e)}"
|
||||
)
|
||||
129
services/external/app/api/traffic_data.py
vendored
Normal file
129
services/external/app/api/traffic_data.py
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
# services/external/app/api/traffic_data.py
|
||||
"""
|
||||
Traffic Data API - Atomic CRUD operations on TrafficData model
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from typing import List, Optional
|
||||
from datetime import date
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
|
||||
from app.schemas.traffic import TrafficDataResponse
|
||||
from app.services.traffic_service import TrafficService
|
||||
from shared.routing.route_builder import RouteBuilder
|
||||
from shared.auth.decorators import get_current_user_dep
|
||||
from shared.auth.access_control import analytics_tier_required
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.database import get_db
|
||||
|
||||
route_builder = RouteBuilder('external')
|
||||
router = APIRouter(tags=["traffic-data"])
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
def get_traffic_service():
|
||||
"""Dependency injection for TrafficService"""
|
||||
return TrafficService()
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("traffic-data"),
|
||||
response_model=List[TrafficDataResponse]
|
||||
)
|
||||
@analytics_tier_required
|
||||
async def list_traffic_data(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
start_date: Optional[date] = Query(None),
|
||||
end_date: Optional[date] = Query(None),
|
||||
latitude: Optional[float] = Query(None),
|
||||
longitude: Optional[float] = Query(None),
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
traffic_service: TrafficService = Depends(get_traffic_service)
|
||||
):
|
||||
"""List stored traffic data records (Professional+ tier required)"""
|
||||
try:
|
||||
logger.info("Listing traffic data", tenant_id=tenant_id)
|
||||
|
||||
traffic_records = await traffic_service.get_stored_traffic_data(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
limit=limit,
|
||||
db=db
|
||||
)
|
||||
|
||||
return traffic_records
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to list traffic data", error=str(e), tenant_id=tenant_id)
|
||||
raise HTTPException(status_code=500, detail="Failed to retrieve traffic data")
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_resource_detail_route("traffic-data", "traffic_id"),
|
||||
response_model=TrafficDataResponse
|
||||
)
|
||||
@analytics_tier_required
|
||||
async def get_traffic_data(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
traffic_id: UUID = Path(..., description="Traffic data ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
traffic_service: TrafficService = Depends(get_traffic_service)
|
||||
):
|
||||
"""Get a specific traffic data record"""
|
||||
try:
|
||||
logger.info("Getting traffic data", tenant_id=tenant_id, traffic_id=traffic_id)
|
||||
|
||||
traffic_record = await traffic_service.get_traffic_data_by_id(
|
||||
tenant_id=tenant_id,
|
||||
traffic_id=traffic_id,
|
||||
db=db
|
||||
)
|
||||
|
||||
if not traffic_record:
|
||||
raise HTTPException(status_code=404, detail="Traffic data not found")
|
||||
|
||||
return traffic_record
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Failed to get traffic data", error=str(e), tenant_id=tenant_id)
|
||||
raise HTTPException(status_code=500, detail="Failed to retrieve traffic data")
|
||||
|
||||
|
||||
@router.delete(
|
||||
route_builder.build_resource_detail_route("traffic-data", "traffic_id")
|
||||
)
|
||||
async def delete_traffic_data(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
traffic_id: UUID = Path(..., description="Traffic data ID"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
traffic_service: TrafficService = Depends(get_traffic_service)
|
||||
):
|
||||
"""Delete a traffic data record"""
|
||||
try:
|
||||
logger.info("Deleting traffic data", tenant_id=tenant_id, traffic_id=traffic_id)
|
||||
|
||||
success = await traffic_service.delete_traffic_data(
|
||||
tenant_id=tenant_id,
|
||||
traffic_id=traffic_id,
|
||||
db=db
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Traffic data not found")
|
||||
|
||||
return {"message": "Traffic data deleted successfully"}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Failed to delete traffic data", error=str(e), tenant_id=tenant_id)
|
||||
raise HTTPException(status_code=500, detail="Failed to delete traffic data")
|
||||
129
services/external/app/api/weather_data.py
vendored
Normal file
129
services/external/app/api/weather_data.py
vendored
Normal file
@@ -0,0 +1,129 @@
|
||||
# services/external/app/api/weather_data.py
|
||||
"""
|
||||
Weather Data API - Atomic CRUD operations on WeatherData model
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from typing import List, Optional
|
||||
from datetime import date
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
|
||||
from app.schemas.weather import WeatherDataResponse
|
||||
from app.services.weather_service import WeatherService
|
||||
from shared.routing.route_builder import RouteBuilder
|
||||
from shared.auth.decorators import get_current_user_dep
|
||||
from shared.auth.access_control import analytics_tier_required
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.database import get_db
|
||||
|
||||
route_builder = RouteBuilder('external')
|
||||
router = APIRouter(tags=["weather-data"])
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
def get_weather_service():
|
||||
"""Dependency injection for WeatherService"""
|
||||
return WeatherService()
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_base_route("weather-data"),
|
||||
response_model=List[WeatherDataResponse]
|
||||
)
|
||||
@analytics_tier_required
|
||||
async def list_weather_data(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
start_date: Optional[date] = Query(None),
|
||||
end_date: Optional[date] = Query(None),
|
||||
latitude: Optional[float] = Query(None),
|
||||
longitude: Optional[float] = Query(None),
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
weather_service: WeatherService = Depends(get_weather_service)
|
||||
):
|
||||
"""List stored weather data records (Professional+ tier required)"""
|
||||
try:
|
||||
logger.info("Listing weather data", tenant_id=tenant_id)
|
||||
|
||||
weather_records = await weather_service.get_stored_weather_data(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
limit=limit,
|
||||
db=db
|
||||
)
|
||||
|
||||
return weather_records
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to list weather data", error=str(e), tenant_id=tenant_id)
|
||||
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
|
||||
|
||||
|
||||
@router.get(
|
||||
route_builder.build_resource_detail_route("weather-data", "weather_id"),
|
||||
response_model=WeatherDataResponse
|
||||
)
|
||||
@analytics_tier_required
|
||||
async def get_weather_data(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
weather_id: UUID = Path(..., description="Weather data ID"),
|
||||
current_user: dict = Depends(get_current_user_dep),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
weather_service: WeatherService = Depends(get_weather_service)
|
||||
):
|
||||
"""Get a specific weather data record"""
|
||||
try:
|
||||
logger.info("Getting weather data", tenant_id=tenant_id, weather_id=weather_id)
|
||||
|
||||
weather_record = await weather_service.get_weather_data_by_id(
|
||||
tenant_id=tenant_id,
|
||||
weather_id=weather_id,
|
||||
db=db
|
||||
)
|
||||
|
||||
if not weather_record:
|
||||
raise HTTPException(status_code=404, detail="Weather data not found")
|
||||
|
||||
return weather_record
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Failed to get weather data", error=str(e), tenant_id=tenant_id)
|
||||
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
|
||||
|
||||
|
||||
@router.delete(
|
||||
route_builder.build_resource_detail_route("weather-data", "weather_id")
|
||||
)
|
||||
async def delete_weather_data(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
weather_id: UUID = Path(..., description="Weather data ID"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
weather_service: WeatherService = Depends(get_weather_service)
|
||||
):
|
||||
"""Delete a weather data record"""
|
||||
try:
|
||||
logger.info("Deleting weather data", tenant_id=tenant_id, weather_id=weather_id)
|
||||
|
||||
success = await weather_service.delete_weather_data(
|
||||
tenant_id=tenant_id,
|
||||
weather_id=weather_id,
|
||||
db=db
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Weather data not found")
|
||||
|
||||
return {"message": "Weather data deleted successfully"}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Failed to delete weather data", error=str(e), tenant_id=tenant_id)
|
||||
raise HTTPException(status_code=500, detail="Failed to delete weather data")
|
||||
1
services/external/app/cache/__init__.py
vendored
Normal file
1
services/external/app/cache/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
"""Cache module for external data service"""
|
||||
208
services/external/app/cache/poi_cache_service.py
vendored
Normal file
208
services/external/app/cache/poi_cache_service.py
vendored
Normal file
@@ -0,0 +1,208 @@
|
||||
"""
|
||||
POI Cache Service
|
||||
|
||||
Caches POI detection results to avoid hammering Overpass API.
|
||||
POI data doesn't change frequently, so aggressive caching is appropriate.
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict, Any
|
||||
import json
|
||||
import structlog
|
||||
from datetime import timedelta
|
||||
|
||||
from app.core.poi_config import (
|
||||
POI_CACHE_TTL_DAYS,
|
||||
POI_COORDINATE_PRECISION
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POICacheService:
|
||||
"""
|
||||
Redis-based cache for POI detection results.
|
||||
|
||||
Caches results by rounded coordinates to allow reuse for nearby locations.
|
||||
Reduces load on Overpass API and improves onboarding performance.
|
||||
"""
|
||||
|
||||
def __init__(self, redis_client):
|
||||
"""
|
||||
Initialize cache service.
|
||||
|
||||
Args:
|
||||
redis_client: Redis client instance
|
||||
"""
|
||||
self.redis = redis_client
|
||||
self.cache_ttl_days = POI_CACHE_TTL_DAYS
|
||||
self.coordinate_precision = POI_COORDINATE_PRECISION
|
||||
|
||||
def _generate_cache_key(self, latitude: float, longitude: float) -> str:
|
||||
"""
|
||||
Generate cache key from coordinates.
|
||||
|
||||
Rounds coordinates to specified precision (default 4 decimals ≈ 10m).
|
||||
This allows cache reuse for bakeries in very close proximity.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
|
||||
Returns:
|
||||
Redis cache key
|
||||
"""
|
||||
lat_rounded = round(latitude, self.coordinate_precision)
|
||||
lon_rounded = round(longitude, self.coordinate_precision)
|
||||
return f"poi_cache:{lat_rounded}:{lon_rounded}"
|
||||
|
||||
async def get_cached_pois(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get cached POI results for location.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
|
||||
Returns:
|
||||
Cached POI detection results or None if not cached
|
||||
"""
|
||||
cache_key = self._generate_cache_key(latitude, longitude)
|
||||
|
||||
try:
|
||||
cached_data = await self.redis.get(cache_key)
|
||||
if cached_data:
|
||||
logger.info(
|
||||
"POI cache hit",
|
||||
cache_key=cache_key,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return json.loads(cached_data)
|
||||
else:
|
||||
logger.debug(
|
||||
"POI cache miss",
|
||||
cache_key=cache_key,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to retrieve POI cache",
|
||||
error=str(e),
|
||||
cache_key=cache_key
|
||||
)
|
||||
return None
|
||||
|
||||
async def cache_poi_results(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
poi_data: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""
|
||||
Cache POI detection results.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
poi_data: Complete POI detection results
|
||||
|
||||
Returns:
|
||||
True if cached successfully, False otherwise
|
||||
"""
|
||||
cache_key = self._generate_cache_key(latitude, longitude)
|
||||
ttl_seconds = self.cache_ttl_days * 24 * 60 * 60
|
||||
|
||||
try:
|
||||
await self.redis.setex(
|
||||
cache_key,
|
||||
ttl_seconds,
|
||||
json.dumps(poi_data)
|
||||
)
|
||||
logger.info(
|
||||
"POI results cached",
|
||||
cache_key=cache_key,
|
||||
ttl_days=self.cache_ttl_days,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to cache POI results",
|
||||
error=str(e),
|
||||
cache_key=cache_key
|
||||
)
|
||||
return False
|
||||
|
||||
async def invalidate_cache(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> bool:
|
||||
"""
|
||||
Invalidate cached POI results for location.
|
||||
|
||||
Useful for manual refresh or data corrections.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
|
||||
Returns:
|
||||
True if invalidated successfully
|
||||
"""
|
||||
cache_key = self._generate_cache_key(latitude, longitude)
|
||||
|
||||
try:
|
||||
deleted = await self.redis.delete(cache_key)
|
||||
if deleted:
|
||||
logger.info(
|
||||
"POI cache invalidated",
|
||||
cache_key=cache_key,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return bool(deleted)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to invalidate POI cache",
|
||||
error=str(e),
|
||||
cache_key=cache_key
|
||||
)
|
||||
return False
|
||||
|
||||
async def get_cache_stats(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get cache statistics.
|
||||
|
||||
Returns:
|
||||
Dictionary with cache stats (key count, memory usage, etc.)
|
||||
"""
|
||||
try:
|
||||
# Count POI cache keys
|
||||
pattern = "poi_cache:*"
|
||||
cursor = 0
|
||||
key_count = 0
|
||||
|
||||
while True:
|
||||
cursor, keys = await self.redis.scan(
|
||||
cursor=cursor,
|
||||
match=pattern,
|
||||
count=100
|
||||
)
|
||||
key_count += len(keys)
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
return {
|
||||
"total_cached_locations": key_count,
|
||||
"cache_ttl_days": self.cache_ttl_days,
|
||||
"coordinate_precision": self.coordinate_precision
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Failed to get cache stats", error=str(e))
|
||||
return {
|
||||
"error": str(e)
|
||||
}
|
||||
298
services/external/app/cache/redis_wrapper.py
vendored
Normal file
298
services/external/app/cache/redis_wrapper.py
vendored
Normal file
@@ -0,0 +1,298 @@
|
||||
# services/external/app/cache/redis_wrapper.py
|
||||
"""
|
||||
Redis cache layer for fast training data access using shared Redis implementation
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from shared.redis_utils import get_redis_client
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class ExternalDataCache:
|
||||
"""Redis cache for external data service"""
|
||||
|
||||
def __init__(self):
|
||||
self.ttl = 86400 * 7 # 7 days
|
||||
|
||||
async def _get_client(self):
|
||||
"""Get the shared Redis client"""
|
||||
return await get_redis_client()
|
||||
|
||||
def _weather_cache_key(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> str:
|
||||
"""Generate cache key for weather data"""
|
||||
return f"weather:{city_id}:{start_date.date()}:{end_date.date()}"
|
||||
|
||||
async def get_cached_weather(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Get cached weather data"""
|
||||
try:
|
||||
key = self._weather_cache_key(city_id, start_date, end_date)
|
||||
client = await self._get_client()
|
||||
cached = await client.get(key)
|
||||
|
||||
if cached:
|
||||
logger.debug("Weather cache hit", city_id=city_id, key=key)
|
||||
return json.loads(cached)
|
||||
|
||||
logger.debug("Weather cache miss", city_id=city_id, key=key)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error reading weather cache", error=str(e))
|
||||
return None
|
||||
|
||||
async def set_cached_weather(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
data: List[Dict[str, Any]]
|
||||
):
|
||||
"""Set cached weather data"""
|
||||
try:
|
||||
key = self._weather_cache_key(city_id, start_date, end_date)
|
||||
|
||||
serializable_data = []
|
||||
for record in data:
|
||||
# Handle both dict and Pydantic model objects
|
||||
if hasattr(record, 'model_dump'):
|
||||
record_dict = record.model_dump()
|
||||
elif hasattr(record, 'dict'):
|
||||
record_dict = record.dict()
|
||||
else:
|
||||
record_dict = record.copy() if isinstance(record, dict) else dict(record)
|
||||
|
||||
# Convert any datetime fields to ISO format strings
|
||||
for key_name, value in record_dict.items():
|
||||
if isinstance(value, datetime):
|
||||
record_dict[key_name] = value.isoformat()
|
||||
|
||||
serializable_data.append(record_dict)
|
||||
|
||||
client = await self._get_client()
|
||||
await client.setex(
|
||||
key,
|
||||
self.ttl,
|
||||
json.dumps(serializable_data)
|
||||
)
|
||||
|
||||
logger.debug("Weather data cached", city_id=city_id, records=len(data))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error caching weather data", error=str(e))
|
||||
|
||||
def _traffic_cache_key(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> str:
|
||||
"""Generate cache key for traffic data"""
|
||||
return f"traffic:{city_id}:{start_date.date()}:{end_date.date()}"
|
||||
|
||||
async def get_cached_traffic(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Get cached traffic data"""
|
||||
try:
|
||||
key = self._traffic_cache_key(city_id, start_date, end_date)
|
||||
client = await self._get_client()
|
||||
cached = await client.get(key)
|
||||
|
||||
if cached:
|
||||
logger.debug("Traffic cache hit", city_id=city_id, key=key)
|
||||
return json.loads(cached)
|
||||
|
||||
logger.debug("Traffic cache miss", city_id=city_id, key=key)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error reading traffic cache", error=str(e))
|
||||
return None
|
||||
|
||||
async def set_cached_traffic(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
data: List[Dict[str, Any]]
|
||||
):
|
||||
"""Set cached traffic data"""
|
||||
try:
|
||||
key = self._traffic_cache_key(city_id, start_date, end_date)
|
||||
|
||||
serializable_data = []
|
||||
for record in data:
|
||||
# Handle both dict and Pydantic model objects
|
||||
if hasattr(record, 'model_dump'):
|
||||
record_dict = record.model_dump()
|
||||
elif hasattr(record, 'dict'):
|
||||
record_dict = record.dict()
|
||||
else:
|
||||
record_dict = record.copy() if isinstance(record, dict) else dict(record)
|
||||
|
||||
# Convert any datetime fields to ISO format strings
|
||||
for key_name, value in record_dict.items():
|
||||
if isinstance(value, datetime):
|
||||
record_dict[key_name] = value.isoformat()
|
||||
|
||||
serializable_data.append(record_dict)
|
||||
|
||||
client = await self._get_client()
|
||||
await client.setex(
|
||||
key,
|
||||
self.ttl,
|
||||
json.dumps(serializable_data)
|
||||
)
|
||||
|
||||
logger.debug("Traffic data cached", city_id=city_id, records=len(data))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error caching traffic data", error=str(e))
|
||||
|
||||
async def invalidate_city_cache(self, city_id: str):
|
||||
"""Invalidate all cache entries for a city"""
|
||||
try:
|
||||
client = await self._get_client()
|
||||
pattern = f"*:{city_id}:*"
|
||||
|
||||
# Use scan_iter for safer key pattern matching
|
||||
keys_to_delete = []
|
||||
async for key in client.scan_iter(match=pattern):
|
||||
keys_to_delete.append(key)
|
||||
|
||||
if keys_to_delete:
|
||||
await client.delete(*keys_to_delete)
|
||||
|
||||
logger.info("City cache invalidated", city_id=city_id, keys_deleted=len(keys_to_delete))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error invalidating cache", error=str(e))
|
||||
|
||||
# ===== Calendar Caching Methods =====
|
||||
|
||||
def _calendar_cache_key(self, calendar_id: str) -> str:
|
||||
"""Generate cache key for school calendar"""
|
||||
return f"calendar:{calendar_id}"
|
||||
|
||||
def _tenant_context_cache_key(self, tenant_id: str) -> str:
|
||||
"""Generate cache key for tenant location context"""
|
||||
return f"tenant_context:{tenant_id}"
|
||||
|
||||
async def get_cached_calendar(
|
||||
self,
|
||||
calendar_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached school calendar by ID"""
|
||||
try:
|
||||
key = self._calendar_cache_key(calendar_id)
|
||||
client = await self._get_client()
|
||||
cached = await client.get(key)
|
||||
|
||||
if cached:
|
||||
logger.debug("Calendar cache hit", calendar_id=calendar_id)
|
||||
return json.loads(cached)
|
||||
|
||||
logger.debug("Calendar cache miss", calendar_id=calendar_id)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error reading calendar cache", error=str(e))
|
||||
return None
|
||||
|
||||
async def set_cached_calendar(
|
||||
self,
|
||||
calendar_id: str,
|
||||
calendar_data: Dict[str, Any]
|
||||
):
|
||||
"""Cache school calendar data (7 days TTL)"""
|
||||
try:
|
||||
key = self._calendar_cache_key(calendar_id)
|
||||
client = await self._get_client()
|
||||
|
||||
# Calendars change rarely, use 7-day TTL
|
||||
ttl = 86400 * 7
|
||||
|
||||
await client.setex(
|
||||
key,
|
||||
ttl,
|
||||
json.dumps(calendar_data)
|
||||
)
|
||||
|
||||
logger.debug("Calendar cached", calendar_id=calendar_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error caching calendar", error=str(e))
|
||||
|
||||
async def get_cached_tenant_context(
|
||||
self,
|
||||
tenant_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached tenant location context"""
|
||||
try:
|
||||
key = self._tenant_context_cache_key(tenant_id)
|
||||
client = await self._get_client()
|
||||
cached = await client.get(key)
|
||||
|
||||
if cached:
|
||||
logger.debug("Tenant context cache hit", tenant_id=tenant_id)
|
||||
return json.loads(cached)
|
||||
|
||||
logger.debug("Tenant context cache miss", tenant_id=tenant_id)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error reading tenant context cache", error=str(e))
|
||||
return None
|
||||
|
||||
async def set_cached_tenant_context(
|
||||
self,
|
||||
tenant_id: str,
|
||||
context_data: Dict[str, Any]
|
||||
):
|
||||
"""Cache tenant location context (24 hours TTL)"""
|
||||
try:
|
||||
key = self._tenant_context_cache_key(tenant_id)
|
||||
client = await self._get_client()
|
||||
|
||||
# Tenant context changes less frequently, 24-hour TTL
|
||||
ttl = 86400
|
||||
|
||||
await client.setex(
|
||||
key,
|
||||
ttl,
|
||||
json.dumps(context_data)
|
||||
)
|
||||
|
||||
logger.debug("Tenant context cached", tenant_id=tenant_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error caching tenant context", error=str(e))
|
||||
|
||||
async def invalidate_tenant_context(self, tenant_id: str):
|
||||
"""Invalidate tenant context cache (called when context is updated)"""
|
||||
try:
|
||||
key = self._tenant_context_cache_key(tenant_id)
|
||||
client = await self._get_client()
|
||||
await client.delete(key)
|
||||
|
||||
logger.info("Tenant context cache invalidated", tenant_id=tenant_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error invalidating tenant context cache", error=str(e))
|
||||
1
services/external/app/core/__init__.py
vendored
Normal file
1
services/external/app/core/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/core/__init__.py
|
||||
77
services/external/app/core/config.py
vendored
Normal file
77
services/external/app/core/config.py
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
# services/external/app/core/config.py
|
||||
|
||||
from shared.config.base import BaseServiceSettings
|
||||
import os
|
||||
from pydantic import Field
|
||||
|
||||
class DataSettings(BaseServiceSettings):
|
||||
"""Data service specific settings"""
|
||||
|
||||
# Service Identity
|
||||
SERVICE_NAME: str = "external-service"
|
||||
VERSION: str = "1.0.0"
|
||||
APP_NAME: str = "Bakery External Data Service"
|
||||
DESCRIPTION: str = "External data collection service for weather and traffic data"
|
||||
|
||||
# API Configuration
|
||||
API_V1_STR: str = "/api/v1"
|
||||
|
||||
# Database configuration (secure approach - build from components)
|
||||
@property
|
||||
def DATABASE_URL(self) -> str:
|
||||
"""Build database URL from secure components"""
|
||||
# Try complete URL first (for backward compatibility)
|
||||
complete_url = os.getenv("EXTERNAL_DATABASE_URL")
|
||||
if complete_url:
|
||||
return complete_url
|
||||
|
||||
# Build from components (secure approach)
|
||||
user = os.getenv("EXTERNAL_DB_USER", "external_user")
|
||||
password = os.getenv("EXTERNAL_DB_PASSWORD", "external_pass123")
|
||||
host = os.getenv("EXTERNAL_DB_HOST", "localhost")
|
||||
port = os.getenv("EXTERNAL_DB_PORT", "5432")
|
||||
name = os.getenv("EXTERNAL_DB_NAME", "external_db")
|
||||
|
||||
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
|
||||
|
||||
# External API Configuration
|
||||
AEMET_API_KEY: str = os.getenv("AEMET_API_KEY", "")
|
||||
AEMET_BASE_URL: str = "https://opendata.aemet.es/opendata"
|
||||
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "90")) # Increased for unstable API
|
||||
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "5")) # More retries for connection issues
|
||||
AEMET_ENABLED: bool = os.getenv("AEMET_ENABLED", "true").lower() == "true" # Allow disabling AEMET
|
||||
|
||||
MADRID_OPENDATA_API_KEY: str = os.getenv("MADRID_OPENDATA_API_KEY", "")
|
||||
MADRID_OPENDATA_BASE_URL: str = "https://datos.madrid.es"
|
||||
MADRID_OPENDATA_TIMEOUT: int = int(os.getenv("MADRID_OPENDATA_TIMEOUT", "30"))
|
||||
|
||||
# Data Collection Configuration
|
||||
WEATHER_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("WEATHER_COLLECTION_INTERVAL_HOURS", "1"))
|
||||
TRAFFIC_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("TRAFFIC_COLLECTION_INTERVAL_HOURS", "1"))
|
||||
EVENTS_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("EVENTS_COLLECTION_INTERVAL_HOURS", "6"))
|
||||
|
||||
# Cache TTL Configuration
|
||||
WEATHER_CACHE_TTL_HOURS: int = int(os.getenv("WEATHER_CACHE_TTL_HOURS", "1"))
|
||||
TRAFFIC_CACHE_TTL_HOURS: int = int(os.getenv("TRAFFIC_CACHE_TTL_HOURS", "1"))
|
||||
EVENTS_CACHE_TTL_HOURS: int = int(os.getenv("EVENTS_CACHE_TTL_HOURS", "6"))
|
||||
|
||||
# Data Quality Configuration
|
||||
DATA_VALIDATION_ENABLED: bool = os.getenv("DATA_VALIDATION_ENABLED", "true").lower() == "true"
|
||||
OUTLIER_DETECTION_ENABLED: bool = os.getenv("OUTLIER_DETECTION_ENABLED", "true").lower() == "true"
|
||||
DATA_COMPLETENESS_THRESHOLD: float = float(os.getenv("DATA_COMPLETENESS_THRESHOLD", "0.8"))
|
||||
|
||||
# Geolocation Settings (Madrid focus)
|
||||
DEFAULT_LATITUDE: float = float(os.getenv("DEFAULT_LATITUDE", "40.4168")) # Madrid
|
||||
DEFAULT_LONGITUDE: float = float(os.getenv("DEFAULT_LONGITUDE", "-3.7038")) # Madrid
|
||||
LOCATION_RADIUS_KM: float = float(os.getenv("LOCATION_RADIUS_KM", "50.0"))
|
||||
|
||||
# Data Retention
|
||||
RAW_DATA_RETENTION_DAYS: int = int(os.getenv("RAW_DATA_RETENTION_DAYS", "90"))
|
||||
PROCESSED_DATA_RETENTION_DAYS: int = int(os.getenv("PROCESSED_DATA_RETENTION_DAYS", "365"))
|
||||
|
||||
# Batch Processing
|
||||
BATCH_PROCESSING_ENABLED: bool = os.getenv("BATCH_PROCESSING_ENABLED", "true").lower() == "true"
|
||||
BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "1000"))
|
||||
PARALLEL_PROCESSING_WORKERS: int = int(os.getenv("PARALLEL_PROCESSING_WORKERS", "4"))
|
||||
|
||||
settings = DataSettings()
|
||||
81
services/external/app/core/database.py
vendored
Normal file
81
services/external/app/core/database.py
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
# services/external/app/core/database.py
|
||||
"""
|
||||
External Service Database Configuration using shared database manager
|
||||
"""
|
||||
|
||||
import structlog
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from app.core.config import settings
|
||||
from shared.database.base import DatabaseManager, Base
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Create database manager instance
|
||||
database_manager = DatabaseManager(
|
||||
database_url=settings.DATABASE_URL,
|
||||
service_name="external-service"
|
||||
)
|
||||
|
||||
async def get_db():
|
||||
"""
|
||||
Database dependency for FastAPI - using shared database manager
|
||||
"""
|
||||
async for session in database_manager.get_db():
|
||||
yield session
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""Initialize database tables using shared database manager"""
|
||||
try:
|
||||
logger.info("Initializing External Service database...")
|
||||
|
||||
# Import all models to ensure they're registered
|
||||
from app.models import weather, traffic # noqa: F401
|
||||
|
||||
# Create all tables using database manager
|
||||
await database_manager.create_tables(Base.metadata)
|
||||
|
||||
logger.info("External Service database initialized successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize database", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
async def close_db():
|
||||
"""Close database connections using shared database manager"""
|
||||
try:
|
||||
await database_manager.close_connections()
|
||||
logger.info("Database connections closed")
|
||||
except Exception as e:
|
||||
logger.error("Error closing database connections", error=str(e))
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_db_transaction():
|
||||
"""
|
||||
Context manager for database transactions using shared database manager
|
||||
"""
|
||||
async with database_manager.get_session() as session:
|
||||
try:
|
||||
async with session.begin():
|
||||
yield session
|
||||
except Exception as e:
|
||||
logger.error("Transaction error", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_background_session():
|
||||
"""
|
||||
Context manager for background tasks using shared database manager
|
||||
"""
|
||||
async with database_manager.get_background_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
async def health_check():
|
||||
"""Database health check using shared database manager"""
|
||||
return await database_manager.health_check()
|
||||
181
services/external/app/core/poi_config.py
vendored
Normal file
181
services/external/app/core/poi_config.py
vendored
Normal file
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
POI Detection Configuration
|
||||
|
||||
Defines POI categories, search parameters, and relevance thresholds
|
||||
for automated Point of Interest detection and feature engineering.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class POICategory:
|
||||
"""POI category definition with OSM query and ML parameters"""
|
||||
name: str
|
||||
osm_query: str
|
||||
search_radius_m: int
|
||||
weight: float # Importance weight for ML model (positive or negative)
|
||||
description: str
|
||||
|
||||
|
||||
# POI Category Definitions based on OpenStreetMap tags
|
||||
# Research-based search radii and weights for bakery demand forecasting
|
||||
POI_CATEGORIES: Dict[str, POICategory] = {
|
||||
"schools": POICategory(
|
||||
name="schools",
|
||||
osm_query='["amenity"~"school|kindergarten|university|college"]',
|
||||
search_radius_m=500,
|
||||
weight=1.5, # High positive impact - morning drop-off rush
|
||||
description="Educational institutions causing morning/afternoon rush patterns"
|
||||
),
|
||||
"offices": POICategory(
|
||||
name="offices",
|
||||
osm_query='["office"]',
|
||||
search_radius_m=800,
|
||||
weight=1.3, # Positive impact - weekday lunch/breakfast demand
|
||||
description="Office buildings and business centers"
|
||||
),
|
||||
"gyms_sports": POICategory(
|
||||
name="gyms_sports",
|
||||
osm_query='["leisure"~"fitness_centre|sports_centre|stadium"]',
|
||||
search_radius_m=600,
|
||||
weight=0.8, # Moderate impact - morning/evening activity
|
||||
description="Fitness centers and sports facilities"
|
||||
),
|
||||
"residential": POICategory(
|
||||
name="residential",
|
||||
osm_query='["building"~"residential|apartments|house"]',
|
||||
search_radius_m=400,
|
||||
weight=1.0, # Base demand from residents
|
||||
description="Residential buildings and housing"
|
||||
),
|
||||
"tourism": POICategory(
|
||||
name="tourism",
|
||||
osm_query='["tourism"~"attraction|museum|hotel|hostel|guest_house"]',
|
||||
search_radius_m=1000,
|
||||
weight=1.2, # Positive impact - tourist foot traffic
|
||||
description="Tourist attractions, hotels, and points of interest"
|
||||
),
|
||||
"competitors": POICategory(
|
||||
name="competitors",
|
||||
osm_query='["shop"~"bakery|pastry|confectionery"]',
|
||||
search_radius_m=1000,
|
||||
weight=-0.5, # Negative impact - competition pressure
|
||||
description="Competing bakeries and pastry shops"
|
||||
),
|
||||
"transport_hubs": POICategory(
|
||||
name="transport_hubs",
|
||||
osm_query='["public_transport"~"station|stop"]["railway"~"station|subway_entrance|tram_stop"]',
|
||||
search_radius_m=800,
|
||||
weight=1.4, # High impact - commuter foot traffic
|
||||
description="Public transport stations and hubs"
|
||||
),
|
||||
"coworking": POICategory(
|
||||
name="coworking",
|
||||
osm_query='["amenity"="coworking_space"]',
|
||||
search_radius_m=600,
|
||||
weight=1.1, # Moderate-high impact - flexible workers
|
||||
description="Coworking spaces and shared offices"
|
||||
),
|
||||
"retail": POICategory(
|
||||
name="retail",
|
||||
osm_query='["shop"]',
|
||||
search_radius_m=500,
|
||||
weight=0.9, # Moderate impact - general foot traffic
|
||||
description="Retail shops and commercial areas"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# Feature Relevance Thresholds
|
||||
# Determines which POI features are significant enough to include in ML models
|
||||
# Based on retail gravity model research and distance decay patterns
|
||||
RELEVANCE_THRESHOLDS: Dict[str, Dict[str, float]] = {
|
||||
"schools": {
|
||||
"min_proximity_score": 0.5, # At least moderate proximity required
|
||||
"max_distance_to_nearest_m": 500, # Must be within 500m
|
||||
"min_count": 1 # At least 1 school
|
||||
},
|
||||
"offices": {
|
||||
"min_proximity_score": 0.3,
|
||||
"max_distance_to_nearest_m": 800,
|
||||
"min_count": 2 # Offices are common; need multiple for impact
|
||||
},
|
||||
"gyms_sports": {
|
||||
"min_proximity_score": 0.4,
|
||||
"max_distance_to_nearest_m": 600,
|
||||
"min_count": 1
|
||||
},
|
||||
"residential": {
|
||||
"min_proximity_score": 1.0, # High threshold; residential is everywhere in cities
|
||||
"max_distance_to_nearest_m": 400,
|
||||
"min_count": 5 # Need significant residential density
|
||||
},
|
||||
"tourism": {
|
||||
"min_proximity_score": 0.2, # Lower threshold; tourism is high-impact even at distance
|
||||
"max_distance_to_nearest_m": 1000,
|
||||
"min_count": 1
|
||||
},
|
||||
"competitors": {
|
||||
"min_proximity_score": 0.1, # Any competition is relevant (even distant)
|
||||
"max_distance_to_nearest_m": 1000,
|
||||
"min_count": 1
|
||||
},
|
||||
"transport_hubs": {
|
||||
"min_proximity_score": 0.4,
|
||||
"max_distance_to_nearest_m": 800,
|
||||
"min_count": 1
|
||||
},
|
||||
"coworking": {
|
||||
"min_proximity_score": 0.3,
|
||||
"max_distance_to_nearest_m": 600,
|
||||
"min_count": 1
|
||||
},
|
||||
"retail": {
|
||||
"min_proximity_score": 0.8, # Retail is common; higher bar for relevance
|
||||
"max_distance_to_nearest_m": 500,
|
||||
"min_count": 3
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Overpass API Configuration
|
||||
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
|
||||
OVERPASS_TIMEOUT_SECONDS = 30
|
||||
OVERPASS_MAX_RETRIES = 4 # Increased from 3 to 4 for better resilience
|
||||
OVERPASS_RETRY_DELAY_SECONDS = 2 # Base delay (will use exponential backoff)
|
||||
|
||||
|
||||
# POI Cache Configuration
|
||||
POI_CACHE_TTL_DAYS = 90 # Cache POI results for 90 days
|
||||
POI_REFRESH_INTERVAL_DAYS = 180 # Refresh every 6 months
|
||||
POI_COORDINATE_PRECISION = 4 # Decimal places for cache key (≈10m precision)
|
||||
|
||||
|
||||
# Distance Bands for Feature Engineering (meters)
|
||||
DISTANCE_BANDS = [
|
||||
(0, 100), # Immediate proximity
|
||||
(100, 300), # Primary catchment (walking distance)
|
||||
(300, 500), # Secondary catchment
|
||||
(500, 1000) # Tertiary catchment
|
||||
]
|
||||
|
||||
|
||||
# Competitive Pressure Zones
|
||||
COMPETITOR_ZONES = {
|
||||
"direct": {
|
||||
"max_distance_m": 100,
|
||||
"pressure_multiplier": -1.0 # Strong negative impact
|
||||
},
|
||||
"nearby": {
|
||||
"max_distance_m": 500,
|
||||
"pressure_multiplier": -0.5 # Moderate negative impact
|
||||
},
|
||||
"market": {
|
||||
"max_distance_m": 1000,
|
||||
"min_count_for_district": 5, # If 5+ bakeries = bakery district
|
||||
"district_multiplier": 0.3, # Positive impact (destination area)
|
||||
"normal_multiplier": -0.2 # Slight negative (competitive market)
|
||||
}
|
||||
}
|
||||
16
services/external/app/core/redis_client.py
vendored
Normal file
16
services/external/app/core/redis_client.py
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Redis Client for POI Service
|
||||
|
||||
Provides access to shared Redis client for POI caching.
|
||||
"""
|
||||
|
||||
from shared.redis_utils import get_redis_client as get_shared_redis_client
|
||||
|
||||
|
||||
async def get_redis_client():
|
||||
"""
|
||||
Get Redis client for POI service.
|
||||
|
||||
Uses shared Redis infrastructure from shared utilities.
|
||||
"""
|
||||
return await get_shared_redis_client()
|
||||
0
services/external/app/external/__init__.py
vendored
Normal file
0
services/external/app/external/__init__.py
vendored
Normal file
1004
services/external/app/external/aemet.py
vendored
Normal file
1004
services/external/app/external/aemet.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
10
services/external/app/external/apis/__init__.py
vendored
Normal file
10
services/external/app/external/apis/__init__.py
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/apis/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
External API clients module - Scalable architecture for multiple cities
|
||||
"""
|
||||
|
||||
from .traffic import TrafficAPIClientFactory
|
||||
|
||||
__all__ = ["TrafficAPIClientFactory"]
|
||||
410
services/external/app/external/apis/madrid_traffic_client.py
vendored
Normal file
410
services/external/app/external/apis/madrid_traffic_client.py
vendored
Normal file
@@ -0,0 +1,410 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/apis/madrid_traffic_client.py
|
||||
# ================================================================
|
||||
"""
|
||||
Madrid traffic client - Orchestration layer only
|
||||
Coordinates between HTTP client, data processor, and business logic components
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
|
||||
from .traffic import BaseTrafficClient, SupportedCity
|
||||
from ..base_client import BaseAPIClient
|
||||
from ..clients.madrid_client import MadridTrafficAPIClient
|
||||
from ..processors.madrid_processor import MadridTrafficDataProcessor
|
||||
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
|
||||
from ..models.madrid_models import TrafficRecord, CongestionLevel
|
||||
|
||||
class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
"""
|
||||
Enhanced Madrid traffic client - Orchestration layer
|
||||
Coordinates HTTP, processing, and business logic components
|
||||
"""
|
||||
|
||||
# Madrid geographic bounds
|
||||
MADRID_BOUNDS = {
|
||||
'lat_min': 40.31, 'lat_max': 40.56,
|
||||
'lon_min': -3.89, 'lon_max': -3.51
|
||||
}
|
||||
|
||||
# Configuration constants
|
||||
MAX_HISTORICAL_DAYS = 1095 # 3 years
|
||||
MAX_CSV_PROCESSING_ROWS = 5000000
|
||||
MEASUREMENT_POINTS_LIMIT = 20
|
||||
|
||||
def __init__(self):
|
||||
BaseTrafficClient.__init__(self, SupportedCity.MADRID)
|
||||
BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")
|
||||
|
||||
# Initialize components
|
||||
self.api_client = MadridTrafficAPIClient()
|
||||
self.processor = MadridTrafficDataProcessor()
|
||||
self.analyzer = MadridTrafficAnalyzer()
|
||||
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
def supports_location(self, latitude: float, longitude: float) -> bool:
|
||||
"""Check if location is within Madrid bounds"""
|
||||
return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
|
||||
self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])
|
||||
|
||||
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get current traffic data with enhanced pedestrian inference"""
|
||||
try:
|
||||
if not self.supports_location(latitude, longitude):
|
||||
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
# Fetch XML data
|
||||
xml_content = await self.api_client.fetch_current_traffic_xml()
|
||||
if not xml_content:
|
||||
self.logger.warning("No XML content received")
|
||||
return None
|
||||
|
||||
# Parse XML data
|
||||
traffic_points = self.processor.parse_traffic_xml(xml_content)
|
||||
if not traffic_points:
|
||||
self.logger.warning("No traffic points found in XML - API may be temporarily unavailable")
|
||||
return None
|
||||
|
||||
# Find nearest traffic point
|
||||
nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
|
||||
if not nearest_point:
|
||||
self.logger.warning("No nearby traffic points found")
|
||||
return None
|
||||
|
||||
# Enhance with business logic
|
||||
enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)
|
||||
|
||||
self.logger.info("Current traffic data retrieved",
|
||||
point_id=nearest_point.get('measurement_point_id'),
|
||||
distance=enhanced_data.get('distance_km', 0))
|
||||
|
||||
return enhanced_data
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error getting current traffic", error=str(e))
|
||||
return None
|
||||
|
||||
async def get_historical_traffic(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Get historical traffic data with pedestrian enhancement"""
|
||||
try:
|
||||
if not self.supports_location(latitude, longitude):
|
||||
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
# Validate date range
|
||||
if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
|
||||
self.logger.warning("Date range too large, truncating",
|
||||
requested_days=(end_date - start_date).days,
|
||||
max_days=self.MAX_HISTORICAL_DAYS)
|
||||
start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)
|
||||
|
||||
# Fetch measurement points registry
|
||||
csv_content = await self.api_client.fetch_measurement_points_csv()
|
||||
if not csv_content:
|
||||
self.logger.error("Failed to fetch measurement points registry")
|
||||
return []
|
||||
|
||||
# Parse measurement points
|
||||
measurement_points = self.processor.parse_measurement_points_csv(csv_content)
|
||||
if not measurement_points:
|
||||
self.logger.error("No measurement points found")
|
||||
return []
|
||||
|
||||
# Find nearest measurement points
|
||||
nearest_points = self.analyzer.find_nearest_measurement_points(
|
||||
measurement_points, latitude, longitude, num_points=3
|
||||
)
|
||||
|
||||
if not nearest_points:
|
||||
self.logger.warning("No nearby measurement points found")
|
||||
return []
|
||||
|
||||
# Process historical data
|
||||
historical_records = await self._fetch_historical_data_enhanced(
|
||||
latitude, longitude, start_date, end_date, nearest_points
|
||||
)
|
||||
|
||||
self.logger.info("Historical traffic data retrieved",
|
||||
records_count=len(historical_records),
|
||||
date_range=f"{start_date.date()} to {end_date.date()}")
|
||||
|
||||
return historical_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error getting historical traffic", error=str(e))
|
||||
return []
|
||||
|
||||
async def get_events(self, latitude: float, longitude: float,
|
||||
radius_km: float = 5.0) -> List[Dict[str, Any]]:
|
||||
"""Get traffic events (incidents, construction, etc.)"""
|
||||
# Madrid doesn't provide separate events endpoint
|
||||
# Return enhanced current traffic data as events
|
||||
current_data = await self.get_current_traffic(latitude, longitude)
|
||||
if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
|
||||
return [{
|
||||
'type': 'congestion',
|
||||
'severity': current_data.get('congestion_level'),
|
||||
'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
|
||||
'location': {
|
||||
'latitude': current_data.get('latitude'),
|
||||
'longitude': current_data.get('longitude')
|
||||
},
|
||||
'timestamp': current_data.get('timestamp')
|
||||
}]
|
||||
return []
|
||||
|
||||
|
||||
async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],
|
||||
query_lat: float, query_lon: float) -> Dict[str, Any]:
|
||||
"""Enhance traffic data with business logic and pedestrian inference"""
|
||||
# Calculate distance
|
||||
distance_km = self.analyzer.calculate_distance(
|
||||
query_lat, query_lon,
|
||||
traffic_point.get('latitude', 0),
|
||||
traffic_point.get('longitude', 0)
|
||||
)
|
||||
|
||||
# Classify road type
|
||||
road_type = self.analyzer.classify_road_type(
|
||||
traffic_point.get('measurement_point_name', '')
|
||||
)
|
||||
|
||||
# Get congestion level
|
||||
congestion_level = self.analyzer.get_congestion_level(
|
||||
traffic_point.get('ocupacion', 0)
|
||||
)
|
||||
|
||||
# Create traffic record for pedestrian inference
|
||||
traffic_record = TrafficRecord(
|
||||
date=datetime.now(timezone.utc),
|
||||
traffic_volume=traffic_point.get('intensidad', 0),
|
||||
occupation_percentage=int(traffic_point.get('ocupacion', 0)),
|
||||
load_percentage=traffic_point.get('carga', 0),
|
||||
average_speed=30, # Default speed
|
||||
congestion_level=congestion_level,
|
||||
pedestrian_count=0, # Will be calculated
|
||||
measurement_point_id=traffic_point.get('measurement_point_id', ''),
|
||||
measurement_point_name=traffic_point.get('measurement_point_name', ''),
|
||||
road_type=road_type,
|
||||
source='madrid_current_xml'
|
||||
)
|
||||
|
||||
# Calculate pedestrian count
|
||||
location_context = {
|
||||
'latitude': traffic_point.get('latitude'),
|
||||
'longitude': traffic_point.get('longitude'),
|
||||
'measurement_point_name': traffic_point.get('measurement_point_name')
|
||||
}
|
||||
|
||||
pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
|
||||
traffic_record, location_context
|
||||
)
|
||||
|
||||
# Calculate average speed based on congestion level
|
||||
if congestion_level == 'high':
|
||||
average_speed = 15.0
|
||||
elif congestion_level == 'medium':
|
||||
average_speed = 35.0
|
||||
elif congestion_level == 'low':
|
||||
average_speed = 50.0
|
||||
else:
|
||||
average_speed = 30.0 # default
|
||||
|
||||
# Build enhanced response with required API fields
|
||||
enhanced_data = {
|
||||
'date': datetime.now(timezone.utc), # Required API field
|
||||
'timestamp': datetime.now(timezone.utc),
|
||||
'latitude': traffic_point.get('latitude'),
|
||||
'longitude': traffic_point.get('longitude'),
|
||||
'measurement_point_id': traffic_point.get('measurement_point_id'),
|
||||
'measurement_point_name': traffic_point.get('measurement_point_name'),
|
||||
'traffic_volume': traffic_point.get('intensidad', 0),
|
||||
'pedestrian_count': pedestrian_count,
|
||||
'congestion_level': congestion_level,
|
||||
'average_speed': average_speed, # Required API field
|
||||
'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
|
||||
'load_percentage': traffic_point.get('carga', 0),
|
||||
'road_type': road_type,
|
||||
'distance_km': distance_km,
|
||||
'source': 'madrid_current_xml',
|
||||
'city': 'madrid',
|
||||
'inference_metadata': inference_metadata,
|
||||
'raw_data': traffic_point
|
||||
}
|
||||
|
||||
return enhanced_data
|
||||
|
||||
async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime,
|
||||
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
|
||||
"""Fetch and process historical traffic data"""
|
||||
historical_records = []
|
||||
|
||||
try:
|
||||
# Process by year and month to avoid memory issues
|
||||
current_date = start_date.replace(day=1) # Start from beginning of month
|
||||
now = datetime.now()
|
||||
|
||||
while current_date <= end_date:
|
||||
year = current_date.year
|
||||
month = current_date.month
|
||||
|
||||
# Skip current month and future months (no historical data available yet)
|
||||
if (year == now.year and month >= now.month) or year > now.year:
|
||||
self.logger.info("Skipping current/future month - no historical data available",
|
||||
year=year, month=month)
|
||||
current_date = self._next_month(current_date)
|
||||
continue
|
||||
|
||||
# Build historical URL
|
||||
zip_url = self.api_client._build_historical_url(year, month)
|
||||
|
||||
self.logger.info("Processing historical ZIP file",
|
||||
year=year, month=month, zip_url=zip_url)
|
||||
|
||||
# Fetch ZIP content
|
||||
zip_content = await self.api_client.fetch_historical_zip(zip_url)
|
||||
if not zip_content:
|
||||
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
|
||||
current_date = self._next_month(current_date)
|
||||
continue
|
||||
|
||||
# Process ZIP content with enhanced parsing
|
||||
month_records = await self._process_historical_zip_enhanced(
|
||||
zip_content, zip_url, latitude, longitude, nearest_points
|
||||
)
|
||||
|
||||
# Filter by date range - ensure timezone consistency
|
||||
# Make sure start_date and end_date have timezone info for comparison
|
||||
start_tz = start_date if start_date.tzinfo else start_date.replace(tzinfo=timezone.utc)
|
||||
end_tz = end_date if end_date.tzinfo else end_date.replace(tzinfo=timezone.utc)
|
||||
|
||||
filtered_records = []
|
||||
for record in month_records:
|
||||
record_date = record.get('date')
|
||||
if not record_date:
|
||||
continue
|
||||
|
||||
# Ensure record date has timezone info
|
||||
if not record_date.tzinfo:
|
||||
record_date = record_date.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Now compare with consistent timezone info
|
||||
if start_tz <= record_date <= end_tz:
|
||||
filtered_records.append(record)
|
||||
|
||||
historical_records.extend(filtered_records)
|
||||
|
||||
self.logger.info("Month processing completed",
|
||||
year=year, month=month,
|
||||
month_records=len(month_records),
|
||||
filtered_records=len(filtered_records),
|
||||
total_records=len(historical_records))
|
||||
|
||||
# Move to next month - extracted to helper method
|
||||
current_date = self._next_month(current_date)
|
||||
|
||||
return historical_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching historical data", error=str(e))
|
||||
return historical_records # Return partial results
|
||||
|
||||
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
|
||||
latitude: float, longitude: float,
|
||||
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
|
||||
"""Process historical ZIP file with memory-efficient streaming"""
|
||||
try:
|
||||
import zipfile
|
||||
import io
|
||||
import csv
|
||||
import gc
|
||||
|
||||
historical_records = []
|
||||
nearest_ids = {p[0] for p in nearest_points}
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
|
||||
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
|
||||
|
||||
for csv_filename in csv_files:
|
||||
try:
|
||||
# Stream CSV file line-by-line to avoid loading entire file into memory
|
||||
with zip_file.open(csv_filename) as csv_file:
|
||||
# Use TextIOWrapper for efficient line-by-line reading
|
||||
import codecs
|
||||
text_wrapper = codecs.iterdecode(csv_file, 'utf-8', errors='ignore')
|
||||
csv_reader = csv.DictReader(text_wrapper, delimiter=';')
|
||||
|
||||
# Process in small batches
|
||||
batch_size = 5000
|
||||
batch_records = []
|
||||
row_count = 0
|
||||
|
||||
for row in csv_reader:
|
||||
row_count += 1
|
||||
measurement_point_id = row.get('id', '').strip()
|
||||
|
||||
# Skip rows we don't need
|
||||
if measurement_point_id not in nearest_ids:
|
||||
continue
|
||||
|
||||
try:
|
||||
record_data = await self.processor.parse_historical_csv_row(row, nearest_points)
|
||||
if record_data:
|
||||
batch_records.append(record_data)
|
||||
|
||||
# Store and clear batch when full
|
||||
if len(batch_records) >= batch_size:
|
||||
historical_records.extend(batch_records)
|
||||
batch_records = []
|
||||
gc.collect()
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Store remaining records
|
||||
if batch_records:
|
||||
historical_records.extend(batch_records)
|
||||
batch_records = []
|
||||
|
||||
self.logger.info("CSV file processed",
|
||||
filename=csv_filename,
|
||||
rows_scanned=row_count,
|
||||
records_extracted=len(historical_records))
|
||||
|
||||
# Aggressive garbage collection after each CSV
|
||||
gc.collect()
|
||||
|
||||
except Exception as csv_error:
|
||||
self.logger.warning("Error processing CSV file",
|
||||
filename=csv_filename,
|
||||
error=str(csv_error))
|
||||
continue
|
||||
|
||||
self.logger.info("Historical ZIP processing completed",
|
||||
zip_url=zip_url,
|
||||
total_records=len(historical_records))
|
||||
|
||||
# Final cleanup
|
||||
del zip_content
|
||||
gc.collect()
|
||||
|
||||
return historical_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error processing historical ZIP file",
|
||||
zip_url=zip_url, error=str(e))
|
||||
return []
|
||||
|
||||
def _next_month(self, current_date: datetime) -> datetime:
|
||||
"""Helper method to move to next month"""
|
||||
if current_date.month == 12:
|
||||
return current_date.replace(year=current_date.year + 1, month=1)
|
||||
else:
|
||||
return current_date.replace(month=current_date.month + 1)
|
||||
|
||||
257
services/external/app/external/apis/traffic.py
vendored
Normal file
257
services/external/app/external/apis/traffic.py
vendored
Normal file
@@ -0,0 +1,257 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/apis/traffic.py
|
||||
# ================================================================
|
||||
"""
|
||||
Traffic API abstraction layer for multiple cities
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class SupportedCity(Enum):
|
||||
"""Supported cities for traffic data collection"""
|
||||
MADRID = "madrid"
|
||||
BARCELONA = "barcelona"
|
||||
VALENCIA = "valencia"
|
||||
|
||||
|
||||
class BaseTrafficClient(ABC):
|
||||
"""
|
||||
Abstract base class for city-specific traffic clients
|
||||
Defines the contract that all traffic clients must implement
|
||||
"""
|
||||
|
||||
def __init__(self, city: SupportedCity):
|
||||
self.city = city
|
||||
self.logger = structlog.get_logger().bind(city=city.value)
|
||||
|
||||
@abstractmethod
|
||||
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get current traffic data for location"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_historical_traffic(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Get historical traffic data"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
|
||||
"""Get traffic incidents and events"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def supports_location(self, latitude: float, longitude: float) -> bool:
|
||||
"""Check if this client supports the given location"""
|
||||
pass
|
||||
|
||||
|
||||
class TrafficAPIClientFactory:
|
||||
"""
|
||||
Factory class to create appropriate traffic clients based on location
|
||||
"""
|
||||
|
||||
# City geographical bounds
|
||||
CITY_BOUNDS = {
|
||||
SupportedCity.MADRID: {
|
||||
'lat_min': 40.31, 'lat_max': 40.56,
|
||||
'lon_min': -3.89, 'lon_max': -3.51
|
||||
},
|
||||
SupportedCity.BARCELONA: {
|
||||
'lat_min': 41.32, 'lat_max': 41.47,
|
||||
'lon_min': 2.05, 'lon_max': 2.25
|
||||
},
|
||||
SupportedCity.VALENCIA: {
|
||||
'lat_min': 39.42, 'lat_max': 39.52,
|
||||
'lon_min': -0.42, 'lon_max': -0.32
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_client_for_location(cls, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
|
||||
"""
|
||||
Get appropriate traffic client for given location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
|
||||
Returns:
|
||||
BaseTrafficClient instance or None if location not supported
|
||||
"""
|
||||
try:
|
||||
# Check each city's bounds
|
||||
for city, bounds in cls.CITY_BOUNDS.items():
|
||||
if (bounds['lat_min'] <= latitude <= bounds['lat_max'] and
|
||||
bounds['lon_min'] <= longitude <= bounds['lon_max']):
|
||||
|
||||
logger.info("Location matched to city",
|
||||
city=city.value, lat=latitude, lon=longitude)
|
||||
return cls._create_client(city)
|
||||
|
||||
# If no specific city matches, try to find closest supported city
|
||||
closest_city = cls._find_closest_city(latitude, longitude)
|
||||
if closest_city:
|
||||
logger.info("Using closest city for location",
|
||||
closest_city=closest_city.value, lat=latitude, lon=longitude)
|
||||
return cls._create_client(closest_city)
|
||||
|
||||
logger.warning("No traffic client available for location",
|
||||
lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting traffic client for location",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _create_client(cls, city: SupportedCity) -> BaseTrafficClient:
|
||||
"""Create traffic client for specific city"""
|
||||
if city == SupportedCity.MADRID:
|
||||
from .madrid_traffic_client import MadridTrafficClient
|
||||
return MadridTrafficClient()
|
||||
elif city == SupportedCity.BARCELONA:
|
||||
# Future implementation
|
||||
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
|
||||
elif city == SupportedCity.VALENCIA:
|
||||
# Future implementation
|
||||
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
|
||||
else:
|
||||
raise ValueError(f"Unsupported city: {city}")
|
||||
|
||||
@classmethod
|
||||
def _find_closest_city(cls, latitude: float, longitude: float) -> Optional[SupportedCity]:
|
||||
"""Find closest supported city to given coordinates"""
|
||||
import math
|
||||
|
||||
def distance(lat1, lon1, lat2, lon2):
|
||||
"""Calculate distance between two coordinates"""
|
||||
R = 6371 # Earth's radius in km
|
||||
dlat = math.radians(lat2 - lat1)
|
||||
dlon = math.radians(lon2 - lon1)
|
||||
a = (math.sin(dlat/2) * math.sin(dlat/2) +
|
||||
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
|
||||
math.sin(dlon/2) * math.sin(dlon/2))
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
|
||||
return R * c
|
||||
|
||||
min_distance = float('inf')
|
||||
closest_city = None
|
||||
|
||||
# City centers for distance calculation
|
||||
city_centers = {
|
||||
SupportedCity.MADRID: (40.4168, -3.7038),
|
||||
SupportedCity.BARCELONA: (41.3851, 2.1734),
|
||||
SupportedCity.VALENCIA: (39.4699, -0.3763)
|
||||
}
|
||||
|
||||
for city, (city_lat, city_lon) in city_centers.items():
|
||||
dist = distance(latitude, longitude, city_lat, city_lon)
|
||||
if dist < min_distance and dist < 100: # Within 100km
|
||||
min_distance = dist
|
||||
closest_city = city
|
||||
|
||||
return closest_city
|
||||
|
||||
@classmethod
|
||||
def get_supported_cities(cls) -> List[Dict[str, Any]]:
|
||||
"""Get list of supported cities with their bounds"""
|
||||
cities = []
|
||||
for city, bounds in cls.CITY_BOUNDS.items():
|
||||
cities.append({
|
||||
"city": city.value,
|
||||
"bounds": bounds,
|
||||
"status": "active" if city == SupportedCity.MADRID else "planned"
|
||||
})
|
||||
return cities
|
||||
|
||||
|
||||
class UniversalTrafficClient:
|
||||
"""
|
||||
Universal traffic client that delegates to appropriate city-specific clients
|
||||
This is the main interface that external services should use
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.factory = TrafficAPIClientFactory()
|
||||
self.client_cache = {} # Cache clients for performance
|
||||
|
||||
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get current traffic data for any supported location"""
|
||||
try:
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return await client.get_current_traffic(latitude, longitude)
|
||||
else:
|
||||
logger.warning("No traffic data available for location",
|
||||
lat=latitude, lon=longitude)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Error getting current traffic",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return None
|
||||
|
||||
async def get_historical_traffic(self, latitude: float, longitude: float,
|
||||
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Get historical traffic data for any supported location"""
|
||||
try:
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return await client.get_historical_traffic(latitude, longitude, start_date, end_date)
|
||||
else:
|
||||
logger.warning("No historical traffic data available for location",
|
||||
lat=latitude, lon=longitude)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error("Error getting historical traffic",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
|
||||
"""Get traffic events for any supported location"""
|
||||
try:
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return await client.get_events(latitude, longitude, radius_km)
|
||||
else:
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error("Error getting traffic events",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def _get_client_for_location(self, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
|
||||
"""Get cached or create new client for location"""
|
||||
cache_key = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
if cache_key not in self.client_cache:
|
||||
client = self.factory.get_client_for_location(latitude, longitude)
|
||||
self.client_cache[cache_key] = client
|
||||
|
||||
return self.client_cache[cache_key]
|
||||
|
||||
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
|
||||
"""Get information about traffic data availability for location"""
|
||||
client = self._get_client_for_location(latitude, longitude)
|
||||
if client:
|
||||
return {
|
||||
"supported": True,
|
||||
"city": client.city.value,
|
||||
"features": ["current_traffic", "historical_traffic", "events"]
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"supported": False,
|
||||
"city": None,
|
||||
"features": [],
|
||||
"message": "No traffic data available for this location"
|
||||
}
|
||||
204
services/external/app/external/base_client.py
vendored
Normal file
204
services/external/app/external/base_client.py
vendored
Normal file
@@ -0,0 +1,204 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/base_client.py
|
||||
# ================================================================
|
||||
"""Base HTTP client for external APIs - Enhanced for AEMET"""
|
||||
|
||||
import httpx
|
||||
from typing import Dict, Any, Optional
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class BaseAPIClient:
|
||||
|
||||
def __init__(self, base_url: str, api_key: Optional[str] = None):
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
# Increase timeout and add connection retries for unstable AEMET API
|
||||
self.timeout = httpx.Timeout(60.0) # Increased from 30s
|
||||
self.retries = 3
|
||||
|
||||
async def _get(self, endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Make GET request with retry logic for unstable APIs"""
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
# Add API key to params for AEMET (not headers)
|
||||
request_params = params or {}
|
||||
if self.api_key:
|
||||
request_params["api_key"] = self.api_key
|
||||
|
||||
# Add headers if provided
|
||||
request_headers = headers or {}
|
||||
|
||||
logger.debug("Making API request", url=url, params=request_params)
|
||||
|
||||
# Retry logic for unstable AEMET API
|
||||
for attempt in range(self.retries):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(url, params=request_params, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Log response for debugging
|
||||
response_data = response.json()
|
||||
logger.debug("API response received",
|
||||
status_code=response.status_code,
|
||||
response_keys=list(response_data.keys()) if isinstance(response_data, dict) else "non-dict",
|
||||
attempt=attempt + 1)
|
||||
|
||||
return response_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error", status_code=e.response.status_code, url=url,
|
||||
response_text=e.response.text[:200], attempt=attempt + 1)
|
||||
|
||||
# Handle rate limiting (429) with longer backoff
|
||||
if e.response.status_code == 429:
|
||||
import asyncio
|
||||
# Exponential backoff: 5s, 15s, 45s for rate limits
|
||||
wait_time = 5 * (3 ** attempt)
|
||||
logger.warning(f"Rate limit hit, waiting {wait_time}s before retry",
|
||||
attempt=attempt + 1, max_attempts=self.retries)
|
||||
await asyncio.sleep(wait_time)
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
|
||||
if attempt == self.retries - 1: # Last attempt
|
||||
return None
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error", error=str(e), url=url, attempt=attempt + 1)
|
||||
if attempt == self.retries - 1: # Last attempt
|
||||
return None
|
||||
|
||||
# Wait before retry (exponential backoff)
|
||||
import asyncio
|
||||
wait_time = 2 ** attempt # 1s, 2s, 4s
|
||||
logger.info(f"Retrying AEMET API in {wait_time}s", attempt=attempt + 1, max_attempts=self.retries)
|
||||
await asyncio.sleep(wait_time)
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error", error=str(e), url=url, attempt=attempt + 1)
|
||||
if attempt == self.retries - 1: # Last attempt
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
async def _fetch_url_directly(self, url: str, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch data directly from a full URL (for AEMET datos URLs) with retry logic"""
|
||||
request_headers = headers or {}
|
||||
|
||||
logger.debug("Making direct URL request", url=url)
|
||||
|
||||
# Retry logic for unstable AEMET datos URLs
|
||||
for attempt in range(self.retries):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(url, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Handle encoding issues common with Spanish data sources
|
||||
try:
|
||||
response_data = response.json()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
|
||||
# Try common Spanish encodings
|
||||
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
|
||||
try:
|
||||
text_content = response.content.decode(encoding)
|
||||
import json
|
||||
response_data = json.loads(text_content)
|
||||
logger.info("Successfully decoded with encoding", encoding=encoding)
|
||||
break
|
||||
except (UnicodeDecodeError, json.JSONDecodeError):
|
||||
continue
|
||||
else:
|
||||
logger.error("Failed to decode response with any encoding", url=url)
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
return None
|
||||
|
||||
logger.debug("Direct URL response received",
|
||||
status_code=response.status_code,
|
||||
data_type=type(response_data),
|
||||
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
|
||||
|
||||
return response_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error in direct fetch",
|
||||
status_code=e.response.status_code,
|
||||
url=url,
|
||||
attempt=attempt + 1)
|
||||
|
||||
# On last attempt, return None
|
||||
if attempt == self.retries - 1:
|
||||
return None
|
||||
|
||||
# Wait before retry
|
||||
import asyncio
|
||||
wait_time = 2 ** attempt # 1s, 2s, 4s
|
||||
logger.info(f"Retrying datos URL in {wait_time}s",
|
||||
attempt=attempt + 1, max_attempts=self.retries)
|
||||
await asyncio.sleep(wait_time)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error in direct fetch",
|
||||
error=str(e), url=url, attempt=attempt + 1)
|
||||
|
||||
# On last attempt, return None
|
||||
if attempt == self.retries - 1:
|
||||
return None
|
||||
|
||||
# Wait before retry
|
||||
import asyncio
|
||||
wait_time = 2 ** attempt # 1s, 2s, 4s
|
||||
logger.info(f"Retrying datos URL in {wait_time}s",
|
||||
attempt=attempt + 1, max_attempts=self.retries)
|
||||
await asyncio.sleep(wait_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in direct fetch",
|
||||
error=str(e), url=url, attempt=attempt + 1)
|
||||
|
||||
# On last attempt, return None
|
||||
if attempt == self.retries - 1:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
async def _post(self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Make POST request"""
|
||||
try:
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
request_headers = headers or {}
|
||||
if self.api_key:
|
||||
request_headers["Authorization"] = f"Bearer {self.api_key}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(url, json=data, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error", status_code=e.response.status_code, url=url)
|
||||
return None
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error", error=str(e), url=url)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error", error=str(e), url=url)
|
||||
return None
|
||||
|
||||
async def get_direct(self, url: str, headers: Optional[Dict] = None, timeout: Optional[int] = None) -> httpx.Response:
|
||||
"""
|
||||
Public GET method for direct HTTP requests
|
||||
Returns the raw httpx Response object for maximum flexibility
|
||||
"""
|
||||
request_headers = headers or {}
|
||||
request_timeout = httpx.Timeout(timeout if timeout else 30.0)
|
||||
|
||||
async with httpx.AsyncClient(timeout=request_timeout, follow_redirects=True) as client:
|
||||
response = await client.get(url, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
12
services/external/app/external/clients/__init__.py
vendored
Normal file
12
services/external/app/external/clients/__init__.py
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/clients/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
HTTP clients package
|
||||
"""
|
||||
|
||||
from .madrid_client import MadridTrafficAPIClient
|
||||
|
||||
__all__ = [
|
||||
'MadridTrafficAPIClient'
|
||||
]
|
||||
146
services/external/app/external/clients/madrid_client.py
vendored
Normal file
146
services/external/app/external/clients/madrid_client.py
vendored
Normal file
@@ -0,0 +1,146 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/clients/madrid_client.py
|
||||
# ================================================================
|
||||
"""
|
||||
Pure HTTP client for Madrid traffic APIs
|
||||
Handles only HTTP communication and response decoding
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from ..base_client import BaseAPIClient
|
||||
|
||||
|
||||
class MadridTrafficAPIClient(BaseAPIClient):
|
||||
"""Pure HTTP client for Madrid traffic APIs"""
|
||||
|
||||
TRAFFIC_ENDPOINT = "https://informo.madrid.es/informo/tmadrid/pm.xml"
|
||||
MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(base_url="https://datos.madrid.es")
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
def _decode_response_content(self, response) -> Optional[str]:
|
||||
"""Decode response content with multiple encoding attempts"""
|
||||
try:
|
||||
return response.text
|
||||
except UnicodeDecodeError:
|
||||
# Try manual encoding for Spanish content
|
||||
for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']:
|
||||
try:
|
||||
content = response.content.decode(encoding)
|
||||
if content and len(content) > 100:
|
||||
self.logger.debug("Successfully decoded with encoding", encoding=encoding)
|
||||
return content
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _build_historical_url(self, year: int, month: int) -> str:
|
||||
"""Build historical ZIP URL for given year and month"""
|
||||
# Madrid uses a direct file pattern now: https://datos.madrid.es/egobfiles/MANUAL/208627/MM-YYYY.zip
|
||||
# Only historical data is available (not current month)
|
||||
return f"https://datos.madrid.es/egobfiles/MANUAL/208627/{month:02d}-{year}.zip"
|
||||
|
||||
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch current traffic XML data"""
|
||||
endpoint = endpoint or self.TRAFFIC_ENDPOINT
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Accept': 'application/xml,text/xml,*/*',
|
||||
'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Referer': 'https://datos.madrid.es/'
|
||||
}
|
||||
|
||||
response = await self.get_direct(endpoint, headers=headers, timeout=30)
|
||||
|
||||
if not response or response.status_code != 200:
|
||||
self.logger.warning("Failed to fetch XML data",
|
||||
endpoint=endpoint,
|
||||
status=response.status_code if response else None)
|
||||
return None
|
||||
|
||||
# Get XML content with encoding handling
|
||||
xml_content = self._decode_response_content(response)
|
||||
if not xml_content:
|
||||
self.logger.debug("No XML content received", endpoint=endpoint)
|
||||
return None
|
||||
|
||||
self.logger.debug("Madrid XML content fetched",
|
||||
length=len(xml_content),
|
||||
endpoint=endpoint)
|
||||
|
||||
return xml_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching traffic XML data",
|
||||
endpoint=endpoint,
|
||||
error=str(e))
|
||||
return None
|
||||
|
||||
async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch measurement points CSV data"""
|
||||
url = url or self.MEASUREMENT_POINTS_URL
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
headers={
|
||||
'User-Agent': 'MadridTrafficClient/2.0',
|
||||
'Accept': 'text/csv,application/csv,*/*'
|
||||
},
|
||||
follow_redirects=True
|
||||
) as client:
|
||||
|
||||
self.logger.debug("Fetching measurement points registry", url=url)
|
||||
response = await client.get(url)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
self.logger.warning("Failed to fetch measurement points",
|
||||
status=response.status_code, url=url)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching measurement points registry",
|
||||
url=url, error=str(e))
|
||||
return None
|
||||
|
||||
async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]:
|
||||
"""Fetch historical traffic ZIP file"""
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=120.0, # Longer timeout for large files
|
||||
headers={
|
||||
'User-Agent': 'MadridTrafficClient/2.0',
|
||||
'Accept': 'application/zip,*/*'
|
||||
},
|
||||
follow_redirects=True
|
||||
) as client:
|
||||
|
||||
self.logger.debug("Fetching historical ZIP", url=zip_url)
|
||||
response = await client.get(zip_url)
|
||||
|
||||
if response.status_code == 200:
|
||||
self.logger.debug("Historical ZIP fetched",
|
||||
url=zip_url,
|
||||
size=len(response.content))
|
||||
return response.content
|
||||
else:
|
||||
self.logger.warning("Failed to fetch historical ZIP",
|
||||
status=response.status_code, url=zip_url)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error fetching historical ZIP",
|
||||
url=zip_url, error=str(e))
|
||||
return None
|
||||
20
services/external/app/external/models/__init__.py
vendored
Normal file
20
services/external/app/external/models/__init__.py
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/models/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
Madrid traffic models package
|
||||
"""
|
||||
|
||||
from .madrid_models import (
|
||||
TrafficServiceLevel,
|
||||
CongestionLevel,
|
||||
MeasurementPoint,
|
||||
TrafficRecord
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'TrafficServiceLevel',
|
||||
'CongestionLevel',
|
||||
'MeasurementPoint',
|
||||
'TrafficRecord'
|
||||
]
|
||||
66
services/external/app/external/models/madrid_models.py
vendored
Normal file
66
services/external/app/external/models/madrid_models.py
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/models/madrid_models.py
|
||||
# ================================================================
|
||||
"""
|
||||
Data structures, enums, and dataclasses for Madrid traffic system
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class TrafficServiceLevel(Enum):
|
||||
"""Madrid traffic service levels"""
|
||||
FLUID = 0
|
||||
DENSE = 1
|
||||
CONGESTED = 2
|
||||
BLOCKED = 3
|
||||
|
||||
|
||||
class CongestionLevel(Enum):
|
||||
"""Standardized congestion levels"""
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
BLOCKED = "blocked"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MeasurementPoint:
|
||||
"""Madrid measurement point data structure"""
|
||||
id: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
distance: float
|
||||
name: str
|
||||
type: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrafficRecord:
|
||||
"""Standardized traffic record with pedestrian inference"""
|
||||
date: datetime
|
||||
traffic_volume: int
|
||||
occupation_percentage: int
|
||||
load_percentage: int
|
||||
average_speed: int
|
||||
congestion_level: str
|
||||
pedestrian_count: int
|
||||
measurement_point_id: str
|
||||
measurement_point_name: str
|
||||
road_type: str
|
||||
source: str
|
||||
district: Optional[str] = None
|
||||
|
||||
# Madrid-specific data
|
||||
intensidad_raw: Optional[int] = None
|
||||
ocupacion_raw: Optional[int] = None
|
||||
carga_raw: Optional[int] = None
|
||||
vmed_raw: Optional[int] = None
|
||||
|
||||
# Pedestrian inference metadata
|
||||
pedestrian_multiplier: Optional[float] = None
|
||||
time_pattern_factor: Optional[float] = None
|
||||
district_factor: Optional[float] = None
|
||||
14
services/external/app/external/processors/__init__.py
vendored
Normal file
14
services/external/app/external/processors/__init__.py
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/processors/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
Data processors package
|
||||
"""
|
||||
|
||||
from .madrid_processor import MadridTrafficDataProcessor
|
||||
from .madrid_business_logic import MadridTrafficAnalyzer
|
||||
|
||||
__all__ = [
|
||||
'MadridTrafficDataProcessor',
|
||||
'MadridTrafficAnalyzer'
|
||||
]
|
||||
346
services/external/app/external/processors/madrid_business_logic.py
vendored
Normal file
346
services/external/app/external/processors/madrid_business_logic.py
vendored
Normal file
@@ -0,0 +1,346 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/processors/madrid_business_logic.py
|
||||
# ================================================================
|
||||
"""
|
||||
Business rules, inference, and domain logic for Madrid traffic data
|
||||
Handles pedestrian inference, district mapping, road classification, and validation
|
||||
"""
|
||||
|
||||
import math
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
|
||||
from ..models.madrid_models import TrafficRecord, CongestionLevel
|
||||
|
||||
|
||||
class MadridTrafficAnalyzer:
|
||||
"""Handles business logic for Madrid traffic analysis"""
|
||||
|
||||
# Madrid district characteristics for pedestrian patterns
|
||||
DISTRICT_MULTIPLIERS = {
|
||||
'Centro': 2.5, # Historic center, high pedestrian activity
|
||||
'Salamanca': 2.0, # Shopping area, high foot traffic
|
||||
'Chamberí': 1.8, # Business district
|
||||
'Retiro': 2.2, # Near park, high leisure activity
|
||||
'Chamartín': 1.6, # Business/residential
|
||||
'Tetuán': 1.4, # Mixed residential/commercial
|
||||
'Fuencarral': 1.3, # Residential with commercial areas
|
||||
'Moncloa': 1.7, # University area
|
||||
'Latina': 1.5, # Residential area
|
||||
'Carabanchel': 1.2, # Residential periphery
|
||||
'Usera': 1.1, # Industrial/residential
|
||||
'Villaverde': 1.0, # Industrial area
|
||||
'Villa de Vallecas': 1.0, # Peripheral residential
|
||||
'Vicálvaro': 0.9, # Peripheral
|
||||
'San Blas': 1.1, # Residential
|
||||
'Barajas': 0.8, # Airport area, low pedestrian activity
|
||||
'Hortaleza': 1.2, # Mixed area
|
||||
'Ciudad Lineal': 1.3, # Linear development
|
||||
'Puente de Vallecas': 1.2, # Working class area
|
||||
'Moratalaz': 1.1, # Residential
|
||||
'Arganzuela': 1.6, # Near center, growing area
|
||||
}
|
||||
|
||||
# Time-based patterns (hour of day)
|
||||
TIME_PATTERNS = {
|
||||
'morning_peak': {'hours': [7, 8, 9], 'multiplier': 2.0},
|
||||
'lunch_peak': {'hours': [12, 13, 14], 'multiplier': 2.5},
|
||||
'evening_peak': {'hours': [18, 19, 20], 'multiplier': 2.2},
|
||||
'afternoon': {'hours': [15, 16, 17], 'multiplier': 1.8},
|
||||
'late_evening': {'hours': [21, 22], 'multiplier': 1.5},
|
||||
'night': {'hours': [23, 0, 1, 2, 3, 4, 5, 6], 'multiplier': 0.3},
|
||||
'morning': {'hours': [10, 11], 'multiplier': 1.4}
|
||||
}
|
||||
|
||||
# Road type specific patterns
|
||||
ROAD_TYPE_BASE = {
|
||||
'URB': 250, # Urban streets - high pedestrian activity
|
||||
'M30': 50, # Ring road - minimal pedestrians
|
||||
'C30': 75, # Secondary ring - some pedestrian access
|
||||
'A': 25, # Highways - very low pedestrians
|
||||
'R': 40 # Radial roads - low to moderate
|
||||
}
|
||||
|
||||
# Weather impact on pedestrian activity
|
||||
WEATHER_IMPACT = {
|
||||
'rain': 0.6, # 40% reduction in rain
|
||||
'hot_weather': 0.8, # 20% reduction when very hot
|
||||
'cold_weather': 0.7, # 30% reduction when very cold
|
||||
'normal': 1.0 # No impact
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.logger = structlog.get_logger()
|
||||
|
||||
def calculate_pedestrian_flow(
|
||||
self,
|
||||
traffic_record: TrafficRecord,
|
||||
location_context: Optional[Dict[str, Any]] = None
|
||||
) -> Tuple[int, Dict[str, float]]:
|
||||
"""
|
||||
Calculate pedestrian flow estimate with detailed metadata
|
||||
|
||||
Returns:
|
||||
Tuple of (pedestrian_count, inference_metadata)
|
||||
"""
|
||||
# Base calculation from road type
|
||||
road_type = traffic_record.road_type or 'URB'
|
||||
base_pedestrians = self.ROAD_TYPE_BASE.get(road_type, 200)
|
||||
|
||||
# Time pattern adjustment
|
||||
hour = traffic_record.date.hour
|
||||
time_factor = self._get_time_pattern_factor(hour)
|
||||
|
||||
# District adjustment (if available)
|
||||
district_factor = 1.0
|
||||
district = traffic_record.district or self.infer_district_from_location(location_context)
|
||||
if district:
|
||||
district_factor = self.DISTRICT_MULTIPLIERS.get(district, 1.0)
|
||||
|
||||
# Traffic correlation adjustment
|
||||
traffic_factor = self._calculate_traffic_correlation(traffic_record)
|
||||
|
||||
# Weather adjustment (if data available)
|
||||
weather_factor = self._get_weather_factor(traffic_record.date, location_context)
|
||||
|
||||
# Weekend adjustment
|
||||
weekend_factor = self._get_weekend_factor(traffic_record.date)
|
||||
|
||||
# Combined calculation
|
||||
pedestrian_count = int(
|
||||
base_pedestrians *
|
||||
time_factor *
|
||||
district_factor *
|
||||
traffic_factor *
|
||||
weather_factor *
|
||||
weekend_factor
|
||||
)
|
||||
|
||||
# Ensure reasonable bounds
|
||||
pedestrian_count = max(10, min(2000, pedestrian_count))
|
||||
|
||||
# Metadata for model training
|
||||
inference_metadata = {
|
||||
'base_pedestrians': base_pedestrians,
|
||||
'time_factor': time_factor,
|
||||
'district_factor': district_factor,
|
||||
'traffic_factor': traffic_factor,
|
||||
'weather_factor': weather_factor,
|
||||
'weekend_factor': weekend_factor,
|
||||
'inferred_district': district,
|
||||
'hour': hour,
|
||||
'road_type': road_type
|
||||
}
|
||||
|
||||
return pedestrian_count, inference_metadata
|
||||
|
||||
def _get_time_pattern_factor(self, hour: int) -> float:
|
||||
"""Get time-based pedestrian activity multiplier"""
|
||||
for pattern, config in self.TIME_PATTERNS.items():
|
||||
if hour in config['hours']:
|
||||
return config['multiplier']
|
||||
return 1.0 # Default multiplier
|
||||
|
||||
def _calculate_traffic_correlation(self, traffic_record: TrafficRecord) -> float:
|
||||
"""
|
||||
Calculate pedestrian correlation with traffic patterns
|
||||
Higher traffic in urban areas often correlates with more pedestrians
|
||||
"""
|
||||
if traffic_record.road_type == 'URB':
|
||||
# Urban areas: moderate traffic indicates commercial activity
|
||||
if 30 <= traffic_record.load_percentage <= 70:
|
||||
return 1.3 # Sweet spot for pedestrian activity
|
||||
elif traffic_record.load_percentage > 70:
|
||||
return 0.9 # Too congested, pedestrians avoid
|
||||
else:
|
||||
return 1.0 # Normal correlation
|
||||
else:
|
||||
# Highway/ring roads: more traffic = fewer pedestrians
|
||||
if traffic_record.load_percentage > 60:
|
||||
return 0.5
|
||||
else:
|
||||
return 0.8
|
||||
|
||||
def _get_weather_factor(self, date: datetime, location_context: Optional[Dict] = None) -> float:
|
||||
"""Estimate weather impact on pedestrian activity"""
|
||||
# Simplified weather inference based on season and typical Madrid patterns
|
||||
month = date.month
|
||||
|
||||
# Madrid seasonal patterns
|
||||
if month in [12, 1, 2]: # Winter - cold weather impact
|
||||
return self.WEATHER_IMPACT['cold_weather']
|
||||
elif month in [7, 8]: # Summer - hot weather impact
|
||||
return self.WEATHER_IMPACT['hot_weather']
|
||||
elif month in [10, 11, 3, 4]: # Rainy seasons - moderate impact
|
||||
return 0.85
|
||||
else: # Spring/early summer - optimal weather
|
||||
return 1.1
|
||||
|
||||
def _get_weekend_factor(self, date: datetime) -> float:
|
||||
"""Weekend vs weekday pedestrian patterns"""
|
||||
weekday = date.weekday()
|
||||
hour = date.hour
|
||||
|
||||
if weekday >= 5: # Weekend
|
||||
if 11 <= hour <= 16: # Weekend shopping/leisure hours
|
||||
return 1.4
|
||||
elif 20 <= hour <= 23: # Weekend evening activity
|
||||
return 1.3
|
||||
else:
|
||||
return 0.9
|
||||
else: # Weekday
|
||||
return 1.0
|
||||
|
||||
def infer_district_from_location(self, location_context: Optional[Dict] = None) -> Optional[str]:
|
||||
"""
|
||||
Infer Madrid district from location context or coordinates
|
||||
"""
|
||||
if not location_context:
|
||||
return None
|
||||
|
||||
lat = location_context.get('latitude')
|
||||
lon = location_context.get('longitude')
|
||||
|
||||
if not (lat and lon):
|
||||
return None
|
||||
|
||||
# Madrid district boundaries (simplified boundaries for inference)
|
||||
districts = {
|
||||
# Central districts
|
||||
'Centro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Arganzuela': {'lat_min': 40.385, 'lat_max': 40.410, 'lon_min': -3.720, 'lon_max': -3.680},
|
||||
'Retiro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.690, 'lon_max': -3.660},
|
||||
'Salamanca': {'lat_min': 40.420, 'lat_max': 40.445, 'lon_min': -3.690, 'lon_max': -3.660},
|
||||
'Chamartín': {'lat_min': 40.445, 'lat_max': 40.480, 'lon_min': -3.690, 'lon_max': -3.660},
|
||||
'Tetuán': {'lat_min': 40.445, 'lat_max': 40.470, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Chamberí': {'lat_min': 40.425, 'lat_max': 40.450, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Fuencarral-El Pardo': {'lat_min': 40.470, 'lat_max': 40.540, 'lon_min': -3.750, 'lon_max': -3.650},
|
||||
'Moncloa-Aravaca': {'lat_min': 40.430, 'lat_max': 40.480, 'lon_min': -3.750, 'lon_max': -3.720},
|
||||
'Latina': {'lat_min': 40.380, 'lat_max': 40.420, 'lon_min': -3.750, 'lon_max': -3.720},
|
||||
'Carabanchel': {'lat_min': 40.350, 'lat_max': 40.390, 'lon_min': -3.750, 'lon_max': -3.720},
|
||||
'Usera': {'lat_min': 40.350, 'lat_max': 40.385, 'lon_min': -3.720, 'lon_max': -3.690},
|
||||
'Puente de Vallecas': {'lat_min': 40.370, 'lat_max': 40.410, 'lon_min': -3.680, 'lon_max': -3.640},
|
||||
'Moratalaz': {'lat_min': 40.400, 'lat_max': 40.430, 'lon_min': -3.650, 'lon_max': -3.620},
|
||||
'Ciudad Lineal': {'lat_min': 40.430, 'lat_max': 40.460, 'lon_min': -3.650, 'lon_max': -3.620},
|
||||
'Hortaleza': {'lat_min': 40.460, 'lat_max': 40.500, 'lon_min': -3.650, 'lon_max': -3.620},
|
||||
'Villaverde': {'lat_min': 40.320, 'lat_max': 40.360, 'lon_min': -3.720, 'lon_max': -3.680},
|
||||
}
|
||||
|
||||
# Find matching district
|
||||
for district_name, bounds in districts.items():
|
||||
if (bounds['lat_min'] <= lat <= bounds['lat_max'] and
|
||||
bounds['lon_min'] <= lon <= bounds['lon_max']):
|
||||
return district_name
|
||||
|
||||
# Default for coordinates in Madrid but not matching specific districts
|
||||
if 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5:
|
||||
return 'Other Madrid'
|
||||
|
||||
return None
|
||||
|
||||
def classify_road_type(self, measurement_point_name: str) -> str:
|
||||
"""Classify road type based on measurement point name"""
|
||||
if not measurement_point_name:
|
||||
return 'URB' # Default to urban
|
||||
|
||||
name_upper = measurement_point_name.upper()
|
||||
|
||||
# Highway patterns
|
||||
if any(pattern in name_upper for pattern in ['A-', 'AP-', 'AUTOPISTA', 'AUTOVIA']):
|
||||
return 'A'
|
||||
|
||||
# M-30 Ring road
|
||||
if 'M-30' in name_upper or 'M30' in name_upper:
|
||||
return 'M30'
|
||||
|
||||
# Other M roads (ring roads)
|
||||
if re.search(r'M-[0-9]', name_upper) or re.search(r'M[0-9]', name_upper):
|
||||
return 'C30'
|
||||
|
||||
# Radial roads (R-1, R-2, etc.)
|
||||
if re.search(r'R-[0-9]', name_upper) or 'RADIAL' in name_upper:
|
||||
return 'R'
|
||||
|
||||
# Default to urban street
|
||||
return 'URB'
|
||||
|
||||
def validate_madrid_coordinates(self, lat: float, lon: float) -> bool:
|
||||
"""Validate coordinates are within Madrid bounds"""
|
||||
# Madrid metropolitan area bounds
|
||||
return 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5
|
||||
|
||||
def get_congestion_level(self, occupation_pct: float) -> str:
|
||||
"""Convert occupation percentage to congestion level"""
|
||||
if occupation_pct >= 80:
|
||||
return CongestionLevel.BLOCKED.value
|
||||
elif occupation_pct >= 50:
|
||||
return CongestionLevel.HIGH.value
|
||||
elif occupation_pct >= 25:
|
||||
return CongestionLevel.MEDIUM.value
|
||||
else:
|
||||
return CongestionLevel.LOW.value
|
||||
|
||||
def calculate_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Calculate distance between two points in kilometers using Haversine formula"""
|
||||
R = 6371 # Earth's radius in kilometers
|
||||
|
||||
dlat = math.radians(lat2 - lat1)
|
||||
dlon = math.radians(lon2 - lon1)
|
||||
a = (math.sin(dlat/2) * math.sin(dlat/2) +
|
||||
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
|
||||
math.sin(dlon/2) * math.sin(dlon/2))
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def find_nearest_traffic_point(self, traffic_points: List[Dict[str, Any]],
|
||||
latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
|
||||
"""Find the nearest traffic point to given coordinates"""
|
||||
if not traffic_points:
|
||||
return None
|
||||
|
||||
min_distance = float('inf')
|
||||
nearest_point = None
|
||||
|
||||
for point in traffic_points:
|
||||
point_lat = point.get('latitude')
|
||||
point_lon = point.get('longitude')
|
||||
|
||||
if point_lat and point_lon:
|
||||
distance = self.calculate_distance(latitude, longitude, point_lat, point_lon)
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
nearest_point = point
|
||||
|
||||
return nearest_point
|
||||
|
||||
def find_nearest_measurement_points(self, measurement_points: Dict[str, Dict[str, Any]],
|
||||
latitude: float, longitude: float,
|
||||
num_points: int = 3, max_distance_km: Optional[float] = 5.0) -> List[Tuple[str, Dict[str, Any], float]]:
|
||||
"""Find nearest measurement points for historical data"""
|
||||
distances = []
|
||||
|
||||
for point_id, point_data in measurement_points.items():
|
||||
point_lat = point_data.get('latitude')
|
||||
point_lon = point_data.get('longitude')
|
||||
|
||||
if point_lat and point_lon:
|
||||
distance_km = self.calculate_distance(latitude, longitude, point_lat, point_lon)
|
||||
distances.append((point_id, point_data, distance_km))
|
||||
|
||||
# Sort by distance and take nearest points
|
||||
distances.sort(key=lambda x: x[2])
|
||||
|
||||
# Apply distance filter if specified
|
||||
if max_distance_km is not None:
|
||||
distances = [p for p in distances if p[2] <= max_distance_km]
|
||||
|
||||
nearest = distances[:num_points]
|
||||
|
||||
self.logger.info("Found nearest measurement points",
|
||||
count=len(nearest),
|
||||
nearest_distance_km=nearest[0][2] if nearest else None)
|
||||
|
||||
return nearest
|
||||
493
services/external/app/external/processors/madrid_processor.py
vendored
Normal file
493
services/external/app/external/processors/madrid_processor.py
vendored
Normal file
@@ -0,0 +1,493 @@
|
||||
# ================================================================
|
||||
# services/data/app/external/processors/madrid_processor.py
|
||||
# ================================================================
|
||||
"""
|
||||
Data transformation and parsing for Madrid traffic data
|
||||
Handles XML parsing, CSV processing, coordinate conversion, and data quality scoring
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import math
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
import pyproj
|
||||
|
||||
from ..models.madrid_models import TrafficRecord, MeasurementPoint, CongestionLevel
|
||||
|
||||
|
||||
class MadridTrafficDataProcessor:
|
||||
"""Handles all data transformation and parsing for Madrid traffic data"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = structlog.get_logger()
|
||||
# UTM Zone 30N (Madrid's coordinate system) - using modern pyproj API
|
||||
self.transformer = pyproj.Transformer.from_crs("EPSG:25830", "EPSG:4326", always_xy=True)
|
||||
|
||||
def safe_int(self, value: str) -> int:
|
||||
"""Safely convert string to int"""
|
||||
try:
|
||||
return int(float(value.replace(',', '.')))
|
||||
except (ValueError, TypeError):
|
||||
return 0
|
||||
|
||||
def _safe_float(self, value: str) -> float:
|
||||
"""Safely convert string to float"""
|
||||
try:
|
||||
return float(value.replace(',', '.'))
|
||||
except (ValueError, TypeError):
|
||||
return 0.0
|
||||
|
||||
def clean_madrid_xml(self, xml_content: str) -> str:
|
||||
"""Clean and prepare Madrid XML content for parsing"""
|
||||
if not xml_content:
|
||||
return ""
|
||||
|
||||
# Remove BOM and extra whitespace
|
||||
cleaned = xml_content.strip()
|
||||
if cleaned.startswith('\ufeff'):
|
||||
cleaned = cleaned[1:]
|
||||
|
||||
# Fix common XML issues
|
||||
cleaned = re.sub(r'&(?!amp;|lt;|gt;|quot;|apos;)', '&', cleaned)
|
||||
|
||||
# Ensure proper encoding declaration
|
||||
if not cleaned.startswith('<?xml'):
|
||||
cleaned = '<?xml version="1.0" encoding="UTF-8"?>\n' + cleaned
|
||||
|
||||
return cleaned
|
||||
|
||||
def convert_utm_to_latlon(self, utm_x: str, utm_y: str) -> Tuple[Optional[float], Optional[float]]:
|
||||
"""Convert UTM coordinates to latitude/longitude"""
|
||||
try:
|
||||
utm_x_float = float(utm_x.replace(',', '.'))
|
||||
utm_y_float = float(utm_y.replace(',', '.'))
|
||||
|
||||
# Convert from UTM Zone 30N to WGS84 using modern pyproj API
|
||||
longitude, latitude = self.transformer.transform(utm_x_float, utm_y_float)
|
||||
|
||||
# Validate coordinates are in Madrid area
|
||||
if 40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5:
|
||||
return latitude, longitude
|
||||
else:
|
||||
self.logger.debug("Coordinates outside Madrid bounds",
|
||||
lat=latitude, lon=longitude, utm_x=utm_x, utm_y=utm_y)
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("UTM conversion error",
|
||||
utm_x=utm_x, utm_y=utm_y, error=str(e))
|
||||
return None, None
|
||||
|
||||
def parse_traffic_xml(self, xml_content: str) -> List[Dict[str, Any]]:
|
||||
"""Parse Madrid traffic XML data"""
|
||||
traffic_points = []
|
||||
|
||||
try:
|
||||
cleaned_xml = self.clean_madrid_xml(xml_content)
|
||||
root = ET.fromstring(cleaned_xml)
|
||||
|
||||
self.logger.debug("Madrid XML structure", root_tag=root.tag, children_count=len(list(root)))
|
||||
|
||||
if root.tag == 'pms':
|
||||
pm_elements = root.findall('pm')
|
||||
self.logger.debug("Found PM elements", count=len(pm_elements))
|
||||
|
||||
for pm in pm_elements:
|
||||
try:
|
||||
traffic_point = self._extract_madrid_pm_element(pm)
|
||||
|
||||
if self._is_valid_traffic_point(traffic_point):
|
||||
traffic_points.append(traffic_point)
|
||||
|
||||
# Log first few points for debugging
|
||||
if len(traffic_points) <= 3:
|
||||
self.logger.debug("Sample traffic point",
|
||||
id=traffic_point['idelem'],
|
||||
lat=traffic_point['latitude'],
|
||||
lon=traffic_point['longitude'],
|
||||
intensity=traffic_point.get('intensidad'))
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error parsing PM element", error=str(e))
|
||||
continue
|
||||
else:
|
||||
self.logger.warning("Unexpected XML root tag", root_tag=root.tag)
|
||||
|
||||
self.logger.debug("Madrid traffic XML parsing completed", valid_points=len(traffic_points))
|
||||
return traffic_points
|
||||
|
||||
except ET.ParseError as e:
|
||||
self.logger.warning("Failed to parse Madrid XML", error=str(e))
|
||||
return self._extract_traffic_data_regex(xml_content)
|
||||
except Exception as e:
|
||||
self.logger.error("Error in Madrid traffic XML parsing", error=str(e))
|
||||
return []
|
||||
|
||||
def _extract_madrid_pm_element(self, pm_element) -> Dict[str, Any]:
|
||||
"""Extract traffic data from Madrid <pm> element with coordinate conversion"""
|
||||
try:
|
||||
point_data = {}
|
||||
utm_x = utm_y = None
|
||||
|
||||
# Extract all child elements
|
||||
for child in pm_element:
|
||||
tag, text = child.tag, child.text.strip() if child.text else ''
|
||||
|
||||
if tag == 'idelem':
|
||||
point_data['idelem'] = text
|
||||
elif tag == 'descripcion':
|
||||
point_data['descripcion'] = text
|
||||
elif tag == 'intensidad':
|
||||
point_data['intensidad'] = self.safe_int(text)
|
||||
elif tag == 'ocupacion':
|
||||
point_data['ocupacion'] = self._safe_float(text)
|
||||
elif tag == 'carga':
|
||||
point_data['carga'] = self.safe_int(text)
|
||||
elif tag == 'nivelServicio':
|
||||
point_data['nivelServicio'] = self.safe_int(text)
|
||||
elif tag == 'st_x': # UTM X coordinate
|
||||
utm_x = text
|
||||
point_data['utm_x'] = text
|
||||
elif tag == 'st_y': # UTM Y coordinate
|
||||
utm_y = text
|
||||
point_data['utm_y'] = text
|
||||
elif tag == 'error':
|
||||
point_data['error'] = text
|
||||
elif tag in ['subarea', 'accesoAsociado', 'intensidadSat']:
|
||||
point_data[tag] = text
|
||||
|
||||
# Convert coordinates
|
||||
if utm_x and utm_y:
|
||||
latitude, longitude = self.convert_utm_to_latlon(utm_x, utm_y)
|
||||
|
||||
if latitude and longitude:
|
||||
point_data.update({
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'measurement_point_id': point_data.get('idelem'),
|
||||
'measurement_point_name': point_data.get('descripcion'),
|
||||
'timestamp': datetime.now(timezone.utc),
|
||||
'source': 'madrid_opendata_xml'
|
||||
})
|
||||
|
||||
return point_data
|
||||
else:
|
||||
self.logger.debug("Invalid coordinates after conversion",
|
||||
idelem=point_data.get('idelem'), utm_x=utm_x, utm_y=utm_y)
|
||||
return {}
|
||||
else:
|
||||
self.logger.debug("Missing UTM coordinates", idelem=point_data.get('idelem'))
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error extracting PM element", error=str(e))
|
||||
return {}
|
||||
|
||||
def _is_valid_traffic_point(self, traffic_point: Dict[str, Any]) -> bool:
|
||||
"""Validate traffic point data"""
|
||||
required_fields = ['idelem', 'latitude', 'longitude']
|
||||
return all(field in traffic_point and traffic_point[field] for field in required_fields)
|
||||
|
||||
def _extract_traffic_data_regex(self, xml_content: str) -> List[Dict[str, Any]]:
|
||||
"""Fallback regex-based extraction if XML parsing fails"""
|
||||
traffic_points = []
|
||||
|
||||
try:
|
||||
# Pattern to match PM elements
|
||||
pm_pattern = r'<pm>(.*?)</pm>'
|
||||
pm_matches = re.findall(pm_pattern, xml_content, re.DOTALL)
|
||||
|
||||
for pm_content in pm_matches:
|
||||
traffic_point = {}
|
||||
|
||||
# Extract key fields
|
||||
patterns = {
|
||||
'idelem': r'<idelem>(.*?)</idelem>',
|
||||
'descripcion': r'<descripcion>(.*?)</descripcion>',
|
||||
'intensidad': r'<intensidad>(.*?)</intensidad>',
|
||||
'ocupacion': r'<ocupacion>(.*?)</ocupacion>',
|
||||
'st_x': r'<st_x>(.*?)</st_x>',
|
||||
'st_y': r'<st_y>(.*?)</st_y>'
|
||||
}
|
||||
|
||||
for field, pattern in patterns.items():
|
||||
match = re.search(pattern, pm_content)
|
||||
if match:
|
||||
traffic_point[field] = match.group(1).strip()
|
||||
|
||||
# Convert coordinates
|
||||
if 'st_x' in traffic_point and 'st_y' in traffic_point:
|
||||
latitude, longitude = self.convert_utm_to_latlon(
|
||||
traffic_point['st_x'], traffic_point['st_y']
|
||||
)
|
||||
|
||||
if latitude and longitude:
|
||||
traffic_point.update({
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'intensidad': self.safe_int(traffic_point.get('intensidad', '0')),
|
||||
'ocupacion': self._safe_float(traffic_point.get('ocupacion', '0')),
|
||||
'measurement_point_id': traffic_point.get('idelem'),
|
||||
'measurement_point_name': traffic_point.get('descripcion'),
|
||||
'timestamp': datetime.now(timezone.utc),
|
||||
'source': 'madrid_opendata_xml_regex'
|
||||
})
|
||||
|
||||
traffic_points.append(traffic_point)
|
||||
|
||||
self.logger.debug("Regex extraction completed", points=len(traffic_points))
|
||||
return traffic_points
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error in regex extraction", error=str(e))
|
||||
return []
|
||||
|
||||
def parse_measurement_points_csv(self, csv_content: str) -> Dict[str, Dict[str, Any]]:
|
||||
"""Parse measurement points CSV into lookup dictionary"""
|
||||
measurement_points = {}
|
||||
|
||||
try:
|
||||
# Parse CSV with semicolon delimiter
|
||||
csv_reader = csv.DictReader(io.StringIO(csv_content), delimiter=';')
|
||||
|
||||
processed_count = 0
|
||||
for row in csv_reader:
|
||||
try:
|
||||
# Extract point ID and coordinates
|
||||
point_id = row.get('id', '').strip()
|
||||
if not point_id:
|
||||
continue
|
||||
|
||||
processed_count += 1
|
||||
|
||||
# Try different coordinate field names
|
||||
lat_str = ''
|
||||
lon_str = ''
|
||||
|
||||
# Common coordinate field patterns
|
||||
lat_fields = ['lat', 'latitude', 'latitud', 'y', 'utm_y']
|
||||
lon_fields = ['lon', 'lng', 'longitude', 'longitud', 'x', 'utm_x']
|
||||
|
||||
for field in lat_fields:
|
||||
if field in row and row[field].strip():
|
||||
lat_str = row[field].strip()
|
||||
break
|
||||
|
||||
for field in lon_fields:
|
||||
if field in row and row[field].strip():
|
||||
lon_str = row[field].strip()
|
||||
break
|
||||
|
||||
if lat_str and lon_str:
|
||||
try:
|
||||
# Try direct lat/lon first
|
||||
latitude = self._safe_float(lat_str)
|
||||
longitude = self._safe_float(lon_str)
|
||||
|
||||
# If values look like UTM coordinates, convert them
|
||||
if latitude > 1000 or longitude > 1000:
|
||||
latitude, longitude = self.convert_utm_to_latlon(lon_str, lat_str)
|
||||
if not latitude or not longitude:
|
||||
continue
|
||||
|
||||
# Validate Madrid area
|
||||
if not (40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5):
|
||||
continue
|
||||
|
||||
measurement_points[point_id] = {
|
||||
'id': point_id,
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'name': row.get('nombre', row.get('descripcion', f"Point {point_id}")),
|
||||
'type': row.get('tipo', 'traffic'),
|
||||
'raw_data': dict(row) # Keep original data
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error processing point coordinates",
|
||||
point_id=point_id, error=str(e))
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error processing CSV row", error=str(e))
|
||||
continue
|
||||
|
||||
self.logger.info("Parsed measurement points registry",
|
||||
total_points=len(measurement_points))
|
||||
return measurement_points
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error parsing measurement points CSV", error=str(e))
|
||||
return {}
|
||||
|
||||
def calculate_data_quality_score(self, row: Dict[str, str]) -> float:
|
||||
"""Calculate data quality score for a traffic record"""
|
||||
try:
|
||||
score = 1.0
|
||||
|
||||
# Check for missing or invalid values
|
||||
intensidad = row.get('intensidad', '').strip()
|
||||
if not intensidad or intensidad in ['N', '', '0']:
|
||||
score *= 0.7
|
||||
|
||||
ocupacion = row.get('ocupacion', '').strip()
|
||||
if not ocupacion or ocupacion in ['N', '', '0']:
|
||||
score *= 0.8
|
||||
|
||||
error_status = row.get('error', '').strip()
|
||||
if error_status and error_status != 'N':
|
||||
score *= 0.6
|
||||
|
||||
# Check for reasonable value ranges
|
||||
try:
|
||||
intensidad_val = self.safe_int(intensidad)
|
||||
if intensidad_val < 0 or intensidad_val > 5000: # Unrealistic traffic volume
|
||||
score *= 0.7
|
||||
|
||||
ocupacion_val = self.safe_int(ocupacion)
|
||||
if ocupacion_val < 0 or ocupacion_val > 100: # Invalid percentage
|
||||
score *= 0.5
|
||||
|
||||
except:
|
||||
score *= 0.6
|
||||
|
||||
return max(0.1, score) # Minimum quality score
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error calculating quality score", error=str(e))
|
||||
return 0.5 # Default medium quality
|
||||
|
||||
async def process_csv_content_chunked(self, text_content: str, csv_filename: str,
|
||||
nearest_ids: set, nearest_points: list) -> list:
|
||||
"""Process CSV content in chunks to prevent memory issues"""
|
||||
import csv
|
||||
import io
|
||||
import gc
|
||||
|
||||
try:
|
||||
csv_reader = csv.DictReader(io.StringIO(text_content), delimiter=';')
|
||||
|
||||
chunk_size = 10000
|
||||
chunk_records = []
|
||||
all_records = []
|
||||
processed_count = 0
|
||||
total_rows_seen = 0
|
||||
|
||||
for row in csv_reader:
|
||||
total_rows_seen += 1
|
||||
measurement_point_id = row.get('id', '').strip()
|
||||
|
||||
if measurement_point_id not in nearest_ids:
|
||||
continue
|
||||
|
||||
try:
|
||||
record_data = await self.parse_historical_csv_row(row, nearest_points)
|
||||
|
||||
if record_data:
|
||||
chunk_records.append(record_data)
|
||||
processed_count += 1
|
||||
|
||||
if len(chunk_records) >= chunk_size:
|
||||
all_records.extend(chunk_records)
|
||||
chunk_records = []
|
||||
gc.collect()
|
||||
|
||||
except Exception as e:
|
||||
if processed_count < 5:
|
||||
self.logger.error("Row parsing exception",
|
||||
row_num=total_rows_seen,
|
||||
measurement_point_id=measurement_point_id,
|
||||
error=str(e))
|
||||
continue
|
||||
|
||||
# Process remaining records
|
||||
if chunk_records:
|
||||
all_records.extend(chunk_records)
|
||||
chunk_records = []
|
||||
gc.collect()
|
||||
|
||||
self.logger.info("Processed CSV file",
|
||||
filename=csv_filename,
|
||||
total_rows_read=total_rows_seen,
|
||||
processed_records=processed_count)
|
||||
|
||||
return all_records
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error processing CSV content",
|
||||
filename=csv_filename, error=str(e))
|
||||
return []
|
||||
|
||||
async def parse_historical_csv_row(self, row: dict, nearest_points: list) -> dict:
|
||||
"""Parse a single row from Madrid's historical traffic CSV"""
|
||||
try:
|
||||
# Extract date
|
||||
fecha_str = row.get('fecha', '').strip()
|
||||
if not fecha_str:
|
||||
return None
|
||||
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
date_obj = datetime.strptime(fecha_str, '%Y-%m-%d %H:%M:%S')
|
||||
date_obj = date_obj.replace(tzinfo=timezone.utc)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
measurement_point_id = row.get('id', '').strip()
|
||||
|
||||
# Find point data
|
||||
point_match = next((p for p in nearest_points if p[0] == measurement_point_id), None)
|
||||
if not point_match:
|
||||
return None
|
||||
|
||||
point_data = point_match[1]
|
||||
distance_km = point_match[2]
|
||||
|
||||
# Extract traffic data
|
||||
intensidad = self.safe_int(row.get('intensidad', '0'))
|
||||
ocupacion = self.safe_int(row.get('ocupacion', '0'))
|
||||
carga = self.safe_int(row.get('carga', '0'))
|
||||
vmed = self.safe_int(row.get('vmed', '0'))
|
||||
|
||||
# Calculate average speed (vmed is in km/h, use it if available)
|
||||
average_speed = float(vmed) if vmed > 0 else 30.0 # Default speed
|
||||
|
||||
# Determine congestion level based on occupation percentage
|
||||
if ocupacion > 75:
|
||||
congestion_level = 'high'
|
||||
elif ocupacion > 40:
|
||||
congestion_level = 'medium'
|
||||
else:
|
||||
congestion_level = 'low'
|
||||
|
||||
# Build result with API-compatible fields
|
||||
result = {
|
||||
'date': date_obj, # Required API field
|
||||
'traffic_volume': intensidad, # Required API field
|
||||
'pedestrian_count': max(1, int(intensidad * 0.1)), # Estimated pedestrian count
|
||||
'congestion_level': congestion_level, # Required API field
|
||||
'average_speed': average_speed, # Required API field
|
||||
'source': 'madrid_historical_csv', # Required API field
|
||||
'measurement_point_id': measurement_point_id,
|
||||
'point_data': point_data,
|
||||
'distance_km': distance_km,
|
||||
'traffic_data': {
|
||||
'intensidad': intensidad,
|
||||
'ocupacion': ocupacion,
|
||||
'carga': carga,
|
||||
'vmed': vmed
|
||||
},
|
||||
'data_quality_score': self.calculate_data_quality_score(row),
|
||||
'raw_row': row
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
self.logger.debug("Error parsing historical CSV row", error=str(e))
|
||||
return None
|
||||
1
services/external/app/ingestion/__init__.py
vendored
Normal file
1
services/external/app/ingestion/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
"""Data ingestion module for multi-city external data"""
|
||||
20
services/external/app/ingestion/adapters/__init__.py
vendored
Normal file
20
services/external/app/ingestion/adapters/__init__.py
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
# services/external/app/ingestion/adapters/__init__.py
|
||||
"""
|
||||
Adapter registry - Maps city IDs to adapter implementations
|
||||
"""
|
||||
|
||||
from typing import Dict, Type
|
||||
from ..base_adapter import CityDataAdapter
|
||||
from .madrid_adapter import MadridAdapter
|
||||
|
||||
ADAPTER_REGISTRY: Dict[str, Type[CityDataAdapter]] = {
|
||||
"madrid": MadridAdapter,
|
||||
}
|
||||
|
||||
|
||||
def get_adapter(city_id: str, config: Dict) -> CityDataAdapter:
|
||||
"""Factory to instantiate appropriate adapter"""
|
||||
adapter_class = ADAPTER_REGISTRY.get(city_id)
|
||||
if not adapter_class:
|
||||
raise ValueError(f"No adapter registered for city: {city_id}")
|
||||
return adapter_class(city_id, config)
|
||||
152
services/external/app/ingestion/adapters/madrid_adapter.py
vendored
Normal file
152
services/external/app/ingestion/adapters/madrid_adapter.py
vendored
Normal file
@@ -0,0 +1,152 @@
|
||||
# services/external/app/ingestion/adapters/madrid_adapter.py
|
||||
"""
|
||||
Madrid city data adapter - Uses existing AEMET and Madrid OpenData clients
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
|
||||
from ..base_adapter import CityDataAdapter
|
||||
from app.external.aemet import AEMETClient
|
||||
from app.external.apis.madrid_traffic_client import MadridTrafficClient
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class MadridAdapter(CityDataAdapter):
|
||||
"""Adapter for Madrid using AEMET + Madrid OpenData"""
|
||||
|
||||
def __init__(self, city_id: str, config: Dict[str, Any]):
|
||||
super().__init__(city_id, config)
|
||||
self.aemet_client = AEMETClient()
|
||||
self.traffic_client = MadridTrafficClient()
|
||||
|
||||
self.madrid_lat = 40.4168
|
||||
self.madrid_lon = -3.7038
|
||||
|
||||
async def fetch_historical_weather(
|
||||
self,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Fetch historical weather from AEMET"""
|
||||
try:
|
||||
logger.info(
|
||||
"Fetching Madrid historical weather",
|
||||
start=start_date.isoformat(),
|
||||
end=end_date.isoformat()
|
||||
)
|
||||
|
||||
weather_data = await self.aemet_client.get_historical_weather(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon,
|
||||
start_date,
|
||||
end_date
|
||||
)
|
||||
|
||||
for record in weather_data:
|
||||
record['city_id'] = self.city_id
|
||||
record['city_name'] = 'Madrid'
|
||||
|
||||
logger.info(
|
||||
"Madrid weather data fetched",
|
||||
records=len(weather_data)
|
||||
)
|
||||
|
||||
return weather_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error fetching Madrid weather", error=str(e))
|
||||
return []
|
||||
|
||||
async def fetch_historical_traffic(
|
||||
self,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Fetch historical traffic from Madrid OpenData"""
|
||||
try:
|
||||
logger.info(
|
||||
"Fetching Madrid historical traffic",
|
||||
start=start_date.isoformat(),
|
||||
end=end_date.isoformat()
|
||||
)
|
||||
|
||||
traffic_data = await self.traffic_client.get_historical_traffic(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon,
|
||||
start_date,
|
||||
end_date
|
||||
)
|
||||
|
||||
for record in traffic_data:
|
||||
record['city_id'] = self.city_id
|
||||
record['city_name'] = 'Madrid'
|
||||
|
||||
logger.info(
|
||||
"Madrid traffic data fetched",
|
||||
records=len(traffic_data)
|
||||
)
|
||||
|
||||
return traffic_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error fetching Madrid traffic", error=str(e))
|
||||
return []
|
||||
|
||||
async def validate_connection(self) -> bool:
|
||||
"""Validate connection to AEMET and Madrid OpenData
|
||||
|
||||
Note: Validation is lenient - allows partial failures for temporary API issues.
|
||||
AEMET rate limits may cause weather validation to fail during initialization.
|
||||
Madrid traffic API outages should not block validation entirely.
|
||||
"""
|
||||
try:
|
||||
traffic_validation_passed = False
|
||||
weather_validation_passed = False
|
||||
|
||||
# Try traffic API first
|
||||
try:
|
||||
test_traffic = await self.traffic_client.get_current_traffic(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon
|
||||
)
|
||||
|
||||
if test_traffic is not None and len(test_traffic) > 0:
|
||||
traffic_validation_passed = True
|
||||
logger.info("Traffic API validation successful")
|
||||
else:
|
||||
logger.warning("Traffic API validation failed - temporary unavailability (proceeding anyway)")
|
||||
except Exception as traffic_error:
|
||||
logger.warning("Traffic API validation error (temporary unavailability) - proceeding anyway", error=str(traffic_error))
|
||||
|
||||
# Try weather API
|
||||
try:
|
||||
test_weather = await self.aemet_client.get_current_weather(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon
|
||||
)
|
||||
|
||||
if test_weather is not None:
|
||||
weather_validation_passed = True
|
||||
logger.info("Weather API validation successful")
|
||||
else:
|
||||
logger.warning("Weather API validation failed (likely rate limited) - proceeding anyway")
|
||||
except Exception as weather_error:
|
||||
logger.warning("Weather API validation error - proceeding anyway", error=str(weather_error))
|
||||
|
||||
# At least one validation should pass for basic connectivity
|
||||
if not traffic_validation_passed and not weather_validation_passed:
|
||||
logger.error("Both traffic and weather API validations failed - no connectivity")
|
||||
return False
|
||||
|
||||
# Return success if at least one API is accessible
|
||||
logger.info("Adapter connection validation passed",
|
||||
traffic_valid=traffic_validation_passed,
|
||||
weather_valid=weather_validation_passed)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Madrid adapter connection validation failed", error=str(e))
|
||||
return False
|
||||
43
services/external/app/ingestion/base_adapter.py
vendored
Normal file
43
services/external/app/ingestion/base_adapter.py
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
# services/external/app/ingestion/base_adapter.py
|
||||
"""
|
||||
Base adapter interface for city-specific data sources
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class CityDataAdapter(ABC):
|
||||
"""Abstract base class for city-specific data adapters"""
|
||||
|
||||
def __init__(self, city_id: str, config: Dict[str, Any]):
|
||||
self.city_id = city_id
|
||||
self.config = config
|
||||
|
||||
@abstractmethod
|
||||
async def fetch_historical_weather(
|
||||
self,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Fetch historical weather data for date range"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def fetch_historical_traffic(
|
||||
self,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Fetch historical traffic data for date range"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def validate_connection(self) -> bool:
|
||||
"""Validate connection to data source"""
|
||||
pass
|
||||
|
||||
def get_city_id(self) -> str:
|
||||
"""Get city identifier"""
|
||||
return self.city_id
|
||||
408
services/external/app/ingestion/ingestion_manager.py
vendored
Normal file
408
services/external/app/ingestion/ingestion_manager.py
vendored
Normal file
@@ -0,0 +1,408 @@
|
||||
# services/external/app/ingestion/ingestion_manager.py
|
||||
"""
|
||||
Data Ingestion Manager - Coordinates multi-city data collection
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
import asyncio
|
||||
|
||||
from app.registry.city_registry import CityRegistry
|
||||
from app.registry.calendar_registry import CalendarRegistry
|
||||
from .adapters import get_adapter
|
||||
from app.repositories.city_data_repository import CityDataRepository
|
||||
from app.repositories.calendar_repository import CalendarRepository
|
||||
from app.core.database import database_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class DataIngestionManager:
|
||||
"""Orchestrates data ingestion across all cities"""
|
||||
|
||||
def __init__(self):
|
||||
self.registry = CityRegistry()
|
||||
self.database_manager = database_manager
|
||||
|
||||
async def initialize_all_cities(self, months: int = 24):
|
||||
"""
|
||||
Initialize historical data for all enabled cities
|
||||
Called by Kubernetes Init Job
|
||||
"""
|
||||
enabled_cities = self.registry.get_enabled_cities()
|
||||
|
||||
logger.info(
|
||||
"Starting full data initialization",
|
||||
cities=len(enabled_cities),
|
||||
months=months
|
||||
)
|
||||
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=months * 30)
|
||||
|
||||
tasks = [
|
||||
self.initialize_city(city.city_id, start_date, end_date)
|
||||
for city in enabled_cities
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
successes = sum(1 for r in results if r is True)
|
||||
failures = len(results) - successes
|
||||
|
||||
logger.info(
|
||||
"Data initialization complete",
|
||||
total=len(results),
|
||||
successes=successes,
|
||||
failures=failures
|
||||
)
|
||||
|
||||
# Consider success if we have at least some cities initialized (majority success)
|
||||
# This allows the system to continue even if some external APIs are temporarily unavailable
|
||||
if successes > 0:
|
||||
logger.info(
|
||||
"Partial success achieved - continuing with available data",
|
||||
success_ratio=f"{successes}/{len(results)}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.error("All city initializations failed - system cannot proceed")
|
||||
return False
|
||||
|
||||
async def initialize_city(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> bool:
|
||||
"""Initialize historical data for a single city (idempotent)"""
|
||||
try:
|
||||
city = self.registry.get_city(city_id)
|
||||
if not city:
|
||||
logger.error("City not found", city_id=city_id)
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
"Initializing city data",
|
||||
city=city.name,
|
||||
start=start_date.date(),
|
||||
end=end_date.date()
|
||||
)
|
||||
|
||||
# Check if data already exists (idempotency)
|
||||
async with self.database_manager.get_session() as session:
|
||||
repo = CityDataRepository(session)
|
||||
coverage = await repo.get_data_coverage(city_id, start_date, end_date)
|
||||
|
||||
days_in_range = (end_date - start_date).days
|
||||
expected_records = days_in_range # One record per day minimum
|
||||
|
||||
# If we have >= 90% coverage, skip initialization
|
||||
threshold = expected_records * 0.9
|
||||
weather_sufficient = coverage['weather'] >= threshold
|
||||
traffic_sufficient = coverage['traffic'] >= threshold
|
||||
|
||||
if weather_sufficient and traffic_sufficient:
|
||||
logger.info(
|
||||
"City data already initialized, skipping",
|
||||
city=city.name,
|
||||
weather_records=coverage['weather'],
|
||||
traffic_records=coverage['traffic'],
|
||||
threshold=int(threshold)
|
||||
)
|
||||
return True
|
||||
|
||||
logger.info(
|
||||
"Insufficient data coverage, proceeding with initialization",
|
||||
city=city.name,
|
||||
existing_weather=coverage['weather'],
|
||||
existing_traffic=coverage['traffic'],
|
||||
expected=expected_records
|
||||
)
|
||||
|
||||
adapter = get_adapter(
|
||||
city_id,
|
||||
{
|
||||
"weather_config": city.weather_config,
|
||||
"traffic_config": city.traffic_config
|
||||
}
|
||||
)
|
||||
|
||||
if not await adapter.validate_connection():
|
||||
logger.error("Adapter validation failed", city=city.name)
|
||||
return False
|
||||
|
||||
# Fetch data with error handling to allow partial success
|
||||
weather_data = []
|
||||
traffic_data = []
|
||||
|
||||
# Fetch weather data
|
||||
try:
|
||||
weather_data = await adapter.fetch_historical_weather(
|
||||
start_date, end_date
|
||||
)
|
||||
logger.info("Weather data fetched successfully",
|
||||
records=len(weather_data), city=city.name)
|
||||
except Exception as weather_error:
|
||||
logger.error("Failed to fetch weather data",
|
||||
city=city.name, error=str(weather_error))
|
||||
# Don't return False here - continue with whatever data we can get
|
||||
|
||||
# Fetch traffic data
|
||||
try:
|
||||
traffic_data = await adapter.fetch_historical_traffic(
|
||||
start_date, end_date
|
||||
)
|
||||
logger.info("Traffic data fetched successfully",
|
||||
records=len(traffic_data), city=city.name)
|
||||
except Exception as traffic_error:
|
||||
logger.error("Failed to fetch traffic data",
|
||||
city=city.name, error=str(traffic_error))
|
||||
# Don't return False here - continue with weather data only if available
|
||||
|
||||
# Store available data (at least one type should be available for partial success)
|
||||
async with self.database_manager.get_session() as session:
|
||||
repo = CityDataRepository(session)
|
||||
|
||||
weather_stored = 0
|
||||
traffic_stored = 0
|
||||
|
||||
if weather_data:
|
||||
weather_stored = await repo.bulk_store_weather(
|
||||
city_id, weather_data
|
||||
)
|
||||
|
||||
if traffic_data:
|
||||
traffic_stored = await repo.bulk_store_traffic(
|
||||
city_id, traffic_data
|
||||
)
|
||||
|
||||
# Only fail if both data types failed to fetch
|
||||
if not weather_data and not traffic_data:
|
||||
logger.error("Both weather and traffic data fetch failed", city=city.name)
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
"City initialization complete",
|
||||
city=city.name,
|
||||
weather_records=weather_stored,
|
||||
traffic_records=traffic_stored
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"City initialization failed",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
return False
|
||||
|
||||
async def rotate_monthly_data(self):
|
||||
"""
|
||||
Rotate 24-month window: delete old, ingest new
|
||||
Called by Kubernetes CronJob monthly
|
||||
"""
|
||||
enabled_cities = self.registry.get_enabled_cities()
|
||||
|
||||
logger.info("Starting monthly data rotation", cities=len(enabled_cities))
|
||||
|
||||
now = datetime.now()
|
||||
cutoff_date = now - timedelta(days=24 * 30)
|
||||
|
||||
last_month_end = now.replace(day=1) - timedelta(days=1)
|
||||
last_month_start = last_month_end.replace(day=1)
|
||||
|
||||
tasks = []
|
||||
for city in enabled_cities:
|
||||
tasks.append(
|
||||
self._rotate_city_data(
|
||||
city.city_id,
|
||||
cutoff_date,
|
||||
last_month_start,
|
||||
last_month_end
|
||||
)
|
||||
)
|
||||
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
successes = sum(1 for r in results if r is True)
|
||||
logger.info(
|
||||
"Monthly rotation complete",
|
||||
total=len(results),
|
||||
successes=successes
|
||||
)
|
||||
|
||||
async def _rotate_city_data(
|
||||
self,
|
||||
city_id: str,
|
||||
cutoff_date: datetime,
|
||||
new_start: datetime,
|
||||
new_end: datetime
|
||||
) -> bool:
|
||||
"""Rotate data for a single city"""
|
||||
try:
|
||||
city = self.registry.get_city(city_id)
|
||||
if not city:
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
"Rotating city data",
|
||||
city=city.name,
|
||||
cutoff=cutoff_date.date(),
|
||||
new_month=new_start.strftime("%Y-%m")
|
||||
)
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
repo = CityDataRepository(session)
|
||||
|
||||
deleted_weather = await repo.delete_weather_before(
|
||||
city_id, cutoff_date
|
||||
)
|
||||
deleted_traffic = await repo.delete_traffic_before(
|
||||
city_id, cutoff_date
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Old data deleted",
|
||||
city=city.name,
|
||||
weather_deleted=deleted_weather,
|
||||
traffic_deleted=deleted_traffic
|
||||
)
|
||||
|
||||
adapter = get_adapter(city_id, {
|
||||
"weather_config": city.weather_config,
|
||||
"traffic_config": city.traffic_config
|
||||
})
|
||||
|
||||
new_weather = await adapter.fetch_historical_weather(
|
||||
new_start, new_end
|
||||
)
|
||||
new_traffic = await adapter.fetch_historical_traffic(
|
||||
new_start, new_end
|
||||
)
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
repo = CityDataRepository(session)
|
||||
|
||||
weather_stored = await repo.bulk_store_weather(
|
||||
city_id, new_weather
|
||||
)
|
||||
traffic_stored = await repo.bulk_store_traffic(
|
||||
city_id, new_traffic
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"New data ingested",
|
||||
city=city.name,
|
||||
weather_added=weather_stored,
|
||||
traffic_added=traffic_stored
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"City rotation failed",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
return False
|
||||
|
||||
async def seed_school_calendars(self) -> bool:
|
||||
"""
|
||||
Seed school calendars from CalendarRegistry into database
|
||||
Called during initialization - idempotent
|
||||
"""
|
||||
try:
|
||||
logger.info("Starting school calendar seeding...")
|
||||
|
||||
# Get all calendars from registry
|
||||
calendars = CalendarRegistry.get_all_calendars()
|
||||
logger.info(f"Found {len(calendars)} calendars in registry")
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
repo = CalendarRepository(session)
|
||||
|
||||
seeded_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for cal_def in calendars:
|
||||
logger.info(
|
||||
"Processing calendar",
|
||||
calendar_id=cal_def.calendar_id,
|
||||
city=cal_def.city_id,
|
||||
type=cal_def.school_type.value,
|
||||
year=cal_def.academic_year
|
||||
)
|
||||
|
||||
# Check if calendar already exists (idempotency)
|
||||
existing = await repo.get_calendar_by_city_type_year(
|
||||
city_id=cal_def.city_id,
|
||||
school_type=cal_def.school_type.value,
|
||||
academic_year=cal_def.academic_year
|
||||
)
|
||||
|
||||
if existing:
|
||||
logger.info(
|
||||
"Calendar already exists, skipping",
|
||||
calendar_id=cal_def.calendar_id
|
||||
)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Convert holiday periods to dict format
|
||||
holiday_periods = [
|
||||
{
|
||||
"name": hp.name,
|
||||
"start_date": hp.start_date,
|
||||
"end_date": hp.end_date,
|
||||
"description": hp.description
|
||||
}
|
||||
for hp in cal_def.holiday_periods
|
||||
]
|
||||
|
||||
# Convert school hours to dict format
|
||||
school_hours = {
|
||||
"morning_start": cal_def.school_hours.morning_start,
|
||||
"morning_end": cal_def.school_hours.morning_end,
|
||||
"has_afternoon_session": cal_def.school_hours.has_afternoon_session,
|
||||
"afternoon_start": cal_def.school_hours.afternoon_start,
|
||||
"afternoon_end": cal_def.school_hours.afternoon_end
|
||||
}
|
||||
|
||||
# Create calendar in database
|
||||
created_calendar = await repo.create_school_calendar(
|
||||
city_id=cal_def.city_id,
|
||||
calendar_name=cal_def.calendar_name,
|
||||
school_type=cal_def.school_type.value,
|
||||
academic_year=cal_def.academic_year,
|
||||
holiday_periods=holiday_periods,
|
||||
school_hours=school_hours,
|
||||
source=cal_def.source,
|
||||
enabled=cal_def.enabled
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Calendar seeded successfully",
|
||||
calendar_id=str(created_calendar.id),
|
||||
city=cal_def.city_id,
|
||||
type=cal_def.school_type.value,
|
||||
year=cal_def.academic_year
|
||||
)
|
||||
seeded_count += 1
|
||||
|
||||
logger.info(
|
||||
"School calendar seeding completed",
|
||||
seeded=seeded_count,
|
||||
skipped=skipped_count,
|
||||
total=len(calendars)
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error seeding school calendars", error=str(e))
|
||||
return False
|
||||
1
services/external/app/jobs/__init__.py
vendored
Normal file
1
services/external/app/jobs/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
"""Kubernetes job scripts for data initialization and rotation"""
|
||||
69
services/external/app/jobs/initialize_data.py
vendored
Normal file
69
services/external/app/jobs/initialize_data.py
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
# services/external/app/jobs/initialize_data.py
|
||||
"""
|
||||
Kubernetes Init Job - Initialize 24-month historical data
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import sys
|
||||
import logging
|
||||
import structlog
|
||||
|
||||
from app.ingestion.ingestion_manager import DataIngestionManager
|
||||
from app.core.database import database_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
async def main(months: int = 24):
|
||||
"""Initialize historical data for all enabled cities and seed calendars"""
|
||||
logger.info("Starting data initialization job", months=months)
|
||||
|
||||
try:
|
||||
manager = DataIngestionManager()
|
||||
|
||||
# Initialize weather and traffic data
|
||||
weather_traffic_success = await manager.initialize_all_cities(months=months)
|
||||
|
||||
# Seed school calendars
|
||||
logger.info("Proceeding to seed school calendars...")
|
||||
calendar_success = await manager.seed_school_calendars()
|
||||
|
||||
# Calendar seeding is critical, but weather/traffic can have partial success
|
||||
overall_success = calendar_success and weather_traffic_success
|
||||
|
||||
if overall_success:
|
||||
logger.info("✅ Data initialization completed successfully (weather, traffic, calendars)")
|
||||
sys.exit(0)
|
||||
else:
|
||||
if not calendar_success:
|
||||
logger.error("❌ Calendar seeding failed - this is critical")
|
||||
sys.exit(1)
|
||||
elif not weather_traffic_success:
|
||||
# Log as warning instead of error if some data was retrieved
|
||||
logger.warning("⚠️ Weather/traffic initialization had partial failures, but system can continue")
|
||||
logger.info("✅ Calendar seeding completed - system can operate with available data")
|
||||
sys.exit(0) # Allow partial success for weather/traffic
|
||||
|
||||
except Exception as e:
|
||||
logger.error("❌ Fatal error during initialization", error=str(e))
|
||||
sys.exit(1)
|
||||
finally:
|
||||
await database_manager.close_connections()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Initialize historical data")
|
||||
parser.add_argument("--months", type=int, default=24, help="Number of months to load")
|
||||
parser.add_argument("--log-level", default="INFO", help="Log level")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Convert string log level to logging constant
|
||||
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
|
||||
|
||||
structlog.configure(
|
||||
wrapper_class=structlog.make_filtering_bound_logger(log_level)
|
||||
)
|
||||
|
||||
asyncio.run(main(months=args.months))
|
||||
50
services/external/app/jobs/rotate_data.py
vendored
Normal file
50
services/external/app/jobs/rotate_data.py
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
# services/external/app/jobs/rotate_data.py
|
||||
"""
|
||||
Kubernetes CronJob - Monthly data rotation (24-month window)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import argparse
|
||||
import sys
|
||||
import logging
|
||||
import structlog
|
||||
|
||||
from app.ingestion.ingestion_manager import DataIngestionManager
|
||||
from app.core.database import database_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
async def main():
|
||||
"""Rotate 24-month data window"""
|
||||
logger.info("Starting monthly data rotation job")
|
||||
|
||||
try:
|
||||
manager = DataIngestionManager()
|
||||
await manager.rotate_monthly_data()
|
||||
|
||||
logger.info("✅ Data rotation completed successfully")
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("❌ Fatal error during rotation", error=str(e))
|
||||
sys.exit(1)
|
||||
finally:
|
||||
await database_manager.close_connections()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Rotate historical data")
|
||||
parser.add_argument("--log-level", default="INFO", help="Log level")
|
||||
parser.add_argument("--notify-slack", type=bool, default=False, help="Send Slack notification")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Convert string log level to logging constant
|
||||
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
|
||||
|
||||
structlog.configure(
|
||||
wrapper_class=structlog.make_filtering_bound_logger(log_level)
|
||||
)
|
||||
|
||||
asyncio.run(main())
|
||||
207
services/external/app/main.py
vendored
Normal file
207
services/external/app/main.py
vendored
Normal file
@@ -0,0 +1,207 @@
|
||||
# services/external/app/main.py
|
||||
"""
|
||||
External Service Main Application
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI
|
||||
from sqlalchemy import text
|
||||
from app.core.config import settings
|
||||
from app.core.database import database_manager
|
||||
# Removed import of non-existent messaging module
|
||||
# External service will use unified messaging from base class
|
||||
from shared.service_base import StandardFastAPIService
|
||||
from shared.redis_utils import initialize_redis, close_redis
|
||||
# Include routers
|
||||
from app.api import weather_data, traffic_data, city_operations, calendar_operations, audit, poi_context, geocoding, poi_refresh_jobs
|
||||
from app.services.poi_scheduler import start_scheduler, stop_scheduler
|
||||
|
||||
|
||||
class ExternalService(StandardFastAPIService):
|
||||
"""External Data Service with standardized setup"""
|
||||
|
||||
expected_migration_version = "00001"
|
||||
|
||||
async def on_startup(self, app):
|
||||
"""Custom startup logic including migration verification"""
|
||||
await self.verify_migrations()
|
||||
await super().on_startup(app)
|
||||
|
||||
async def verify_migrations(self):
|
||||
"""Verify database schema matches the latest migrations."""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
result = await session.execute(text("SELECT version_num FROM alembic_version"))
|
||||
version = result.scalar()
|
||||
if version != self.expected_migration_version:
|
||||
self.logger.error(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
raise RuntimeError(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
self.logger.info(f"Migration verification successful: {version}")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Migration verification failed: {e}")
|
||||
raise
|
||||
|
||||
def __init__(self):
|
||||
# Define expected database tables for health checks
|
||||
external_expected_tables = [
|
||||
'weather_data', 'weather_forecasts', 'traffic_data',
|
||||
'traffic_measurement_points', 'traffic_background_jobs',
|
||||
'tenant_poi_contexts', 'poi_refresh_jobs'
|
||||
]
|
||||
|
||||
# Define custom API checks
|
||||
async def check_weather_api():
|
||||
"""Check weather API configuration"""
|
||||
try:
|
||||
return bool(settings.AEMET_API_KEY)
|
||||
except Exception as e:
|
||||
self.logger.error("Weather API check failed", error=str(e))
|
||||
return False
|
||||
|
||||
async def check_traffic_api():
|
||||
"""Check traffic API configuration"""
|
||||
try:
|
||||
return bool(settings.MADRID_OPENDATA_API_KEY)
|
||||
except Exception as e:
|
||||
self.logger.error("Traffic API check failed", error=str(e))
|
||||
return False
|
||||
|
||||
# Define custom metrics for external service
|
||||
external_custom_metrics = {
|
||||
"weather_api_calls_total": {
|
||||
"type": "counter",
|
||||
"description": "Total weather API calls"
|
||||
},
|
||||
"weather_api_success_total": {
|
||||
"type": "counter",
|
||||
"description": "Successful weather API calls"
|
||||
},
|
||||
"weather_api_failures_total": {
|
||||
"type": "counter",
|
||||
"description": "Failed weather API calls"
|
||||
},
|
||||
"traffic_api_calls_total": {
|
||||
"type": "counter",
|
||||
"description": "Total traffic API calls"
|
||||
},
|
||||
"traffic_api_success_total": {
|
||||
"type": "counter",
|
||||
"description": "Successful traffic API calls"
|
||||
},
|
||||
"traffic_api_failures_total": {
|
||||
"type": "counter",
|
||||
"description": "Failed traffic API calls"
|
||||
},
|
||||
"data_collection_jobs_total": {
|
||||
"type": "counter",
|
||||
"description": "Data collection jobs"
|
||||
},
|
||||
"data_records_stored_total": {
|
||||
"type": "counter",
|
||||
"description": "Data records stored"
|
||||
},
|
||||
"data_quality_issues_total": {
|
||||
"type": "counter",
|
||||
"description": "Data quality issues detected"
|
||||
},
|
||||
"weather_api_duration_seconds": {
|
||||
"type": "histogram",
|
||||
"description": "Weather API call duration"
|
||||
},
|
||||
"traffic_api_duration_seconds": {
|
||||
"type": "histogram",
|
||||
"description": "Traffic API call duration"
|
||||
},
|
||||
"data_collection_duration_seconds": {
|
||||
"type": "histogram",
|
||||
"description": "Data collection job duration"
|
||||
},
|
||||
"data_processing_duration_seconds": {
|
||||
"type": "histogram",
|
||||
"description": "Data processing duration"
|
||||
}
|
||||
}
|
||||
|
||||
super().__init__(
|
||||
service_name="external-service",
|
||||
app_name="Bakery External Data Service",
|
||||
description="External data collection service for weather, traffic, and events data",
|
||||
version="1.0.0",
|
||||
log_level=settings.LOG_LEVEL,
|
||||
cors_origins=settings.CORS_ORIGINS,
|
||||
api_prefix="", # Empty because RouteBuilder already includes /api/v1
|
||||
database_manager=database_manager,
|
||||
expected_tables=external_expected_tables,
|
||||
custom_health_checks={
|
||||
"weather_api": check_weather_api,
|
||||
"traffic_api": check_traffic_api
|
||||
},
|
||||
custom_metrics=external_custom_metrics,
|
||||
enable_messaging=True
|
||||
)
|
||||
|
||||
async def _setup_messaging(self):
|
||||
"""Setup messaging for external service using unified messaging"""
|
||||
# The base class will handle the unified messaging setup
|
||||
# For external service, no additional setup is needed
|
||||
self.logger.info("External service unified messaging initialized")
|
||||
|
||||
async def _cleanup_messaging(self):
|
||||
"""Cleanup messaging for external service"""
|
||||
# The base class will handle the unified messaging cleanup
|
||||
self.logger.info("External service unified messaging cleaned up")
|
||||
|
||||
async def on_startup(self, app: FastAPI):
|
||||
"""Custom startup logic for external service"""
|
||||
# Initialize Redis connection
|
||||
await initialize_redis(settings.REDIS_URL, db=0, max_connections=50)
|
||||
self.logger.info("Redis initialized for external service")
|
||||
|
||||
# Start POI refresh scheduler
|
||||
await start_scheduler()
|
||||
self.logger.info("POI refresh scheduler started")
|
||||
|
||||
async def on_shutdown(self, app: FastAPI):
|
||||
"""Custom shutdown logic for external service"""
|
||||
# Stop POI refresh scheduler
|
||||
await stop_scheduler()
|
||||
self.logger.info("POI refresh scheduler stopped")
|
||||
|
||||
# Close Redis connection
|
||||
await close_redis()
|
||||
self.logger.info("Redis connection closed")
|
||||
|
||||
# Database cleanup is handled by the base class
|
||||
|
||||
def get_service_features(self):
|
||||
"""Return external-specific features"""
|
||||
return [
|
||||
"weather_data_collection",
|
||||
"traffic_data_collection",
|
||||
"aemet_integration",
|
||||
"madrid_opendata_integration",
|
||||
"data_quality_monitoring",
|
||||
"scheduled_collection_jobs",
|
||||
"external_api_monitoring"
|
||||
]
|
||||
|
||||
|
||||
|
||||
# Create service instance
|
||||
service = ExternalService()
|
||||
|
||||
# Create FastAPI app with standardized setup
|
||||
app = service.create_app()
|
||||
|
||||
# Setup standard endpoints
|
||||
service.setup_standard_endpoints()
|
||||
|
||||
# Include routers
|
||||
# IMPORTANT: Register audit router FIRST to avoid route matching conflicts
|
||||
service.add_router(audit.router)
|
||||
service.add_router(weather_data.router)
|
||||
service.add_router(traffic_data.router)
|
||||
service.add_router(city_operations.router) # New v2.0 city-based optimized endpoints
|
||||
service.add_router(calendar_operations.router) # School calendars and hyperlocal data
|
||||
service.add_router(poi_context.router) # POI detection and location-based features
|
||||
service.add_router(geocoding.router) # Address search and geocoding
|
||||
service.add_router(poi_refresh_jobs.router) # POI refresh background jobs
|
||||
46
services/external/app/models/__init__.py
vendored
Normal file
46
services/external/app/models/__init__.py
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
"""
|
||||
External Service Models Package
|
||||
|
||||
Import all models to ensure they are registered with SQLAlchemy Base.
|
||||
"""
|
||||
|
||||
# Import AuditLog model for this service
|
||||
from shared.security import create_audit_log_model
|
||||
from shared.database.base import Base
|
||||
|
||||
# Create audit log model for this service
|
||||
AuditLog = create_audit_log_model(Base)
|
||||
|
||||
# Import all models to register them with the Base metadata
|
||||
from .traffic import (
|
||||
TrafficData,
|
||||
TrafficMeasurementPoint,
|
||||
TrafficDataBackgroundJob,
|
||||
)
|
||||
|
||||
from .weather import (
|
||||
WeatherData,
|
||||
WeatherForecast,
|
||||
)
|
||||
|
||||
from .city_weather import CityWeatherData
|
||||
from .city_traffic import CityTrafficData
|
||||
from .calendar import SchoolCalendar, TenantLocationContext
|
||||
|
||||
# List all models for easier access
|
||||
__all__ = [
|
||||
# Traffic models
|
||||
"TrafficData",
|
||||
"TrafficMeasurementPoint",
|
||||
"TrafficDataBackgroundJob",
|
||||
# Weather models
|
||||
"WeatherData",
|
||||
"WeatherForecast",
|
||||
# City-based models (new)
|
||||
"CityWeatherData",
|
||||
"CityTrafficData",
|
||||
# Calendar models (hyperlocal)
|
||||
"SchoolCalendar",
|
||||
"TenantLocationContext",
|
||||
"AuditLog",
|
||||
]
|
||||
86
services/external/app/models/calendar.py
vendored
Normal file
86
services/external/app/models/calendar.py
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
# services/external/app/models/calendar.py
|
||||
"""
|
||||
School Calendar and Tenant Location Context Models
|
||||
Hyperlocal data for demand forecasting
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Index, Boolean
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class SchoolCalendar(Base):
|
||||
"""City-based school calendar data for forecasting"""
|
||||
|
||||
__tablename__ = "school_calendars"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
city_id = Column(String(50), nullable=False, index=True)
|
||||
calendar_name = Column(String(100), nullable=False)
|
||||
school_type = Column(String(20), nullable=False) # primary, secondary, university
|
||||
academic_year = Column(String(10), nullable=False) # e.g., "2024-2025"
|
||||
|
||||
# Holiday periods as array of date ranges
|
||||
# Example: [
|
||||
# {"name": "Christmas", "start": "2024-12-20", "end": "2025-01-08"},
|
||||
# {"name": "Easter", "start": "2025-04-10", "end": "2025-04-21"},
|
||||
# {"name": "Summer", "start": "2025-06-23", "end": "2025-09-09"}
|
||||
# ]
|
||||
holiday_periods = Column(JSONB, nullable=False, default=list)
|
||||
|
||||
# School hours configuration
|
||||
# Example: {
|
||||
# "morning_start": "09:00",
|
||||
# "morning_end": "14:00",
|
||||
# "afternoon_start": "15:00", # if applicable
|
||||
# "afternoon_end": "17:00",
|
||||
# "has_afternoon_session": false
|
||||
# }
|
||||
school_hours = Column(JSONB, nullable=False, default=dict)
|
||||
|
||||
# Metadata
|
||||
source = Column(String(100), nullable=True) # e.g., "madrid_education_dept"
|
||||
enabled = Column(Boolean, default=True, nullable=False)
|
||||
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_school_calendar_city_year', 'city_id', 'academic_year'),
|
||||
Index('idx_school_calendar_city_type', 'city_id', 'school_type'),
|
||||
)
|
||||
|
||||
|
||||
class TenantLocationContext(Base):
|
||||
"""Tenant-specific location context for hyperlocal forecasting"""
|
||||
|
||||
__tablename__ = "tenant_location_contexts"
|
||||
|
||||
tenant_id = Column(UUID(as_uuid=True), primary_key=True)
|
||||
city_id = Column(String(50), nullable=False, index=True)
|
||||
|
||||
# School calendar assignment
|
||||
school_calendar_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
||||
|
||||
# Hyperlocal context
|
||||
neighborhood = Column(String(100), nullable=True)
|
||||
|
||||
# Custom local events specific to this tenant's location
|
||||
# Example: [
|
||||
# {"name": "Neighborhood Festival", "date": "2025-06-15", "impact": "high"},
|
||||
# {"name": "Local Market Day", "date": "2025-05-20", "impact": "medium"}
|
||||
# ]
|
||||
local_events = Column(JSONB, nullable=True, default=list)
|
||||
|
||||
# Additional metadata
|
||||
notes = Column(String(500), nullable=True)
|
||||
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_tenant_location_calendar', 'school_calendar_id'),
|
||||
)
|
||||
36
services/external/app/models/city_traffic.py
vendored
Normal file
36
services/external/app/models/city_traffic.py
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
# services/external/app/models/city_traffic.py
|
||||
"""
|
||||
City Traffic Data Model - Shared city-based traffic storage
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, Integer, Float, DateTime, Text, Index
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class CityTrafficData(Base):
|
||||
"""City-based historical traffic data"""
|
||||
|
||||
__tablename__ = "city_traffic_data"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
city_id = Column(String(50), nullable=False, index=True)
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
|
||||
traffic_volume = Column(Integer, nullable=True)
|
||||
pedestrian_count = Column(Integer, nullable=True)
|
||||
congestion_level = Column(String(20), nullable=True)
|
||||
average_speed = Column(Float, nullable=True)
|
||||
|
||||
source = Column(String(50), nullable=False)
|
||||
raw_data = Column(JSONB, nullable=True)
|
||||
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_city_traffic_lookup', 'city_id', 'date'),
|
||||
)
|
||||
38
services/external/app/models/city_weather.py
vendored
Normal file
38
services/external/app/models/city_weather.py
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# services/external/app/models/city_weather.py
|
||||
"""
|
||||
City Weather Data Model - Shared city-based weather storage
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, Float, DateTime, Text, Index
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class CityWeatherData(Base):
|
||||
"""City-based historical weather data"""
|
||||
|
||||
__tablename__ = "city_weather_data"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
city_id = Column(String(50), nullable=False, index=True)
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
|
||||
temperature = Column(Float, nullable=True)
|
||||
precipitation = Column(Float, nullable=True)
|
||||
humidity = Column(Float, nullable=True)
|
||||
wind_speed = Column(Float, nullable=True)
|
||||
pressure = Column(Float, nullable=True)
|
||||
description = Column(String(200), nullable=True)
|
||||
|
||||
source = Column(String(50), nullable=False)
|
||||
raw_data = Column(JSONB, nullable=True)
|
||||
|
||||
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
|
||||
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_city_weather_lookup', 'city_id', 'date'),
|
||||
)
|
||||
123
services/external/app/models/poi_context.py
vendored
Normal file
123
services/external/app/models/poi_context.py
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
POI Context Model
|
||||
|
||||
Stores Point of Interest detection results and ML features for bakery locations.
|
||||
Used for location-based demand forecasting with contextual features.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Index, Integer
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class TenantPOIContext(Base):
|
||||
"""
|
||||
POI (Point of Interest) context for bakery location.
|
||||
|
||||
Stores detected POIs around bakery and calculated ML features
|
||||
for demand forecasting with location-specific context.
|
||||
"""
|
||||
|
||||
__tablename__ = "tenant_poi_contexts"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, unique=True, index=True)
|
||||
|
||||
# Location (denormalized for quick reference and spatial queries)
|
||||
latitude = Column(Float, nullable=False)
|
||||
longitude = Column(Float, nullable=False)
|
||||
|
||||
# POI Detection Results (full raw data)
|
||||
# Structure: {
|
||||
# "schools": {
|
||||
# "pois": [{"osm_id": "...", "name": "...", "lat": ..., "lon": ...}],
|
||||
# "features": {"proximity_score": 3.45, "count_0_100m": 2, ...},
|
||||
# "count": 5
|
||||
# },
|
||||
# "offices": {...},
|
||||
# ...
|
||||
# }
|
||||
poi_detection_results = Column(JSONB, nullable=False, default=dict)
|
||||
|
||||
# ML Features (flat structure for easy model ingestion)
|
||||
# Structure: {
|
||||
# "poi_schools_proximity_score": 3.45,
|
||||
# "poi_schools_weighted_proximity_score": 5.18,
|
||||
# "poi_schools_count_0_100m": 2,
|
||||
# "poi_offices_proximity_score": 1.23,
|
||||
# ...
|
||||
# }
|
||||
ml_features = Column(JSONB, nullable=False, default=dict)
|
||||
|
||||
# Summary Statistics
|
||||
total_pois_detected = Column(Integer, default=0)
|
||||
high_impact_categories = Column(JSONB, default=list) # Categories with significant POI presence
|
||||
relevant_categories = Column(JSONB, default=list) # Categories that passed relevance thresholds
|
||||
|
||||
# Detection Metadata
|
||||
detection_timestamp = Column(DateTime(timezone=True), nullable=False)
|
||||
detection_source = Column(String(50), default="overpass_api")
|
||||
detection_status = Column(String(20), default="completed") # completed, failed, partial
|
||||
detection_error = Column(String(500), nullable=True) # Error message if detection failed
|
||||
|
||||
# Data Freshness Strategy
|
||||
# POIs don't change frequently, refresh every 6 months
|
||||
next_refresh_date = Column(DateTime(timezone=True), nullable=True)
|
||||
refresh_interval_days = Column(Integer, default=180) # 6 months default
|
||||
last_refreshed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_tenant_poi_location', 'latitude', 'longitude'),
|
||||
Index('idx_tenant_poi_refresh', 'next_refresh_date'),
|
||||
Index('idx_tenant_poi_status', 'detection_status'),
|
||||
)
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert to dictionary for API responses"""
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"tenant_id": str(self.tenant_id),
|
||||
"location": {
|
||||
"latitude": self.latitude,
|
||||
"longitude": self.longitude
|
||||
},
|
||||
"poi_detection_results": self.poi_detection_results,
|
||||
"ml_features": self.ml_features,
|
||||
"total_pois_detected": self.total_pois_detected,
|
||||
"high_impact_categories": self.high_impact_categories,
|
||||
"relevant_categories": self.relevant_categories,
|
||||
"detection_timestamp": self.detection_timestamp.isoformat() if self.detection_timestamp else None,
|
||||
"detection_source": self.detection_source,
|
||||
"detection_status": self.detection_status,
|
||||
"detection_error": self.detection_error,
|
||||
"next_refresh_date": self.next_refresh_date.isoformat() if self.next_refresh_date else None,
|
||||
"last_refreshed_at": self.last_refreshed_at.isoformat() if self.last_refreshed_at else None,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
def is_stale(self) -> bool:
|
||||
"""Check if POI data needs refresh"""
|
||||
if not self.next_refresh_date:
|
||||
return True
|
||||
return datetime.now(timezone.utc) > self.next_refresh_date
|
||||
|
||||
def calculate_next_refresh(self) -> datetime:
|
||||
"""Calculate next refresh date based on interval"""
|
||||
return datetime.now(timezone.utc) + timedelta(days=self.refresh_interval_days)
|
||||
|
||||
def mark_refreshed(self):
|
||||
"""Mark as refreshed and calculate next refresh date"""
|
||||
self.last_refreshed_at = datetime.now(timezone.utc)
|
||||
self.next_refresh_date = self.calculate_next_refresh()
|
||||
154
services/external/app/models/poi_refresh_job.py
vendored
Normal file
154
services/external/app/models/poi_refresh_job.py
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
POI Refresh Job Model
|
||||
|
||||
Tracks background jobs for periodic POI context refresh.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Integer, Boolean, Text, Float
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from datetime import datetime, timezone
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class POIRefreshJob(Base):
|
||||
"""
|
||||
POI Refresh Background Job Model
|
||||
|
||||
Tracks periodic POI context refresh jobs for all tenants.
|
||||
Jobs run on a configurable schedule (default: 180 days).
|
||||
"""
|
||||
|
||||
__tablename__ = "poi_refresh_jobs"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
|
||||
# Job scheduling
|
||||
scheduled_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="When this job was scheduled"
|
||||
)
|
||||
started_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
comment="When job execution started"
|
||||
)
|
||||
completed_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
comment="When job execution completed"
|
||||
)
|
||||
|
||||
# Job status
|
||||
status = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
default="pending",
|
||||
index=True,
|
||||
comment="Job status: pending, running, completed, failed"
|
||||
)
|
||||
|
||||
# Job execution details
|
||||
attempt_count = Column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
default=0,
|
||||
comment="Number of execution attempts"
|
||||
)
|
||||
max_attempts = Column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
default=3,
|
||||
comment="Maximum number of retry attempts"
|
||||
)
|
||||
|
||||
# Location data (cached for job execution)
|
||||
latitude = Column(
|
||||
Float,
|
||||
nullable=False,
|
||||
comment="Bakery latitude for POI detection"
|
||||
)
|
||||
longitude = Column(
|
||||
Float,
|
||||
nullable=False,
|
||||
comment="Bakery longitude for POI detection"
|
||||
)
|
||||
|
||||
# Results
|
||||
pois_detected = Column(
|
||||
Integer,
|
||||
nullable=True,
|
||||
comment="Number of POIs detected in this refresh"
|
||||
)
|
||||
changes_detected = Column(
|
||||
Boolean,
|
||||
default=False,
|
||||
comment="Whether significant changes were detected"
|
||||
)
|
||||
change_summary = Column(
|
||||
JSONB,
|
||||
nullable=True,
|
||||
comment="Summary of changes detected"
|
||||
)
|
||||
|
||||
# Error handling
|
||||
error_message = Column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Error message if job failed"
|
||||
)
|
||||
error_details = Column(
|
||||
JSONB,
|
||||
nullable=True,
|
||||
comment="Detailed error information"
|
||||
)
|
||||
|
||||
# Next execution
|
||||
next_scheduled_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
index=True,
|
||||
comment="When next refresh should be scheduled"
|
||||
)
|
||||
|
||||
# Metadata
|
||||
created_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
f"<POIRefreshJob(id={self.id}, tenant_id={self.tenant_id}, "
|
||||
f"status={self.status}, scheduled_at={self.scheduled_at})>"
|
||||
)
|
||||
|
||||
@property
|
||||
def is_overdue(self) -> bool:
|
||||
"""Check if job is overdue for execution"""
|
||||
if self.status in ("completed", "running"):
|
||||
return False
|
||||
return datetime.now(timezone.utc) > self.scheduled_at
|
||||
|
||||
@property
|
||||
def can_retry(self) -> bool:
|
||||
"""Check if job can be retried"""
|
||||
return self.attempt_count < self.max_attempts
|
||||
|
||||
@property
|
||||
def duration_seconds(self) -> float | None:
|
||||
"""Calculate job duration in seconds"""
|
||||
if self.started_at and self.completed_at:
|
||||
return (self.completed_at - self.started_at).total_seconds()
|
||||
return None
|
||||
294
services/external/app/models/traffic.py
vendored
Normal file
294
services/external/app/models/traffic.py
vendored
Normal file
@@ -0,0 +1,294 @@
|
||||
# ================================================================
|
||||
# services/data/app/models/traffic.py - Enhanced for Multiple Cities
|
||||
# ================================================================
|
||||
"""
|
||||
Flexible traffic data models supporting multiple cities and extensible schemas
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean, JSON
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from shared.database.base import Base
|
||||
|
||||
|
||||
class TrafficData(Base):
|
||||
"""
|
||||
Flexible traffic data model supporting multiple cities
|
||||
Designed to accommodate varying data structures across different cities
|
||||
"""
|
||||
__tablename__ = "traffic_data"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Location and temporal data
|
||||
location_id = Column(String(100), nullable=False, index=True) # "lat,lon" or city-specific ID
|
||||
city = Column(String(50), nullable=False, index=True) # madrid, barcelona, valencia, etc.
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
|
||||
# Core standardized traffic metrics (common across all cities)
|
||||
traffic_volume = Column(Integer, nullable=True) # Vehicle count or intensity
|
||||
congestion_level = Column(String(20), nullable=True) # low, medium, high, blocked
|
||||
average_speed = Column(Float, nullable=True) # Average speed in km/h
|
||||
|
||||
# Enhanced metrics (may not be available for all cities)
|
||||
occupation_percentage = Column(Float, nullable=True) # Road occupation %
|
||||
load_percentage = Column(Float, nullable=True) # Traffic load %
|
||||
pedestrian_count = Column(Integer, nullable=True) # Estimated pedestrian count
|
||||
|
||||
# Measurement point information
|
||||
measurement_point_id = Column(String(100), nullable=True, index=True)
|
||||
measurement_point_name = Column(String(500), nullable=True)
|
||||
measurement_point_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
||||
|
||||
# Geographic data
|
||||
latitude = Column(Float, nullable=True)
|
||||
longitude = Column(Float, nullable=True)
|
||||
district = Column(String(100), nullable=True) # City district/area
|
||||
zone = Column(String(100), nullable=True) # Traffic zone or sector
|
||||
|
||||
# Data source and quality
|
||||
source = Column(String(50), nullable=False, default="unknown") # madrid_opendata, synthetic, etc.
|
||||
data_quality_score = Column(Float, nullable=True) # Quality score 0-100
|
||||
is_synthetic = Column(Boolean, default=False)
|
||||
has_pedestrian_inference = Column(Boolean, default=False)
|
||||
|
||||
# City-specific data (flexible JSON storage)
|
||||
city_specific_data = Column(JSON, nullable=True) # Store city-specific fields
|
||||
|
||||
# Raw data backup
|
||||
raw_data = Column(Text, nullable=True) # Original data for debugging
|
||||
|
||||
# Audit fields
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # For multi-tenancy
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
# Performance-optimized indexes
|
||||
__table_args__ = (
|
||||
# Core query patterns
|
||||
Index('idx_traffic_location_date', 'location_id', 'date'),
|
||||
Index('idx_traffic_city_date', 'city', 'date'),
|
||||
Index('idx_traffic_tenant_date', 'tenant_id', 'date'),
|
||||
|
||||
# Advanced query patterns
|
||||
Index('idx_traffic_city_location', 'city', 'location_id'),
|
||||
Index('idx_traffic_measurement_point', 'city', 'measurement_point_id'),
|
||||
Index('idx_traffic_district_date', 'city', 'district', 'date'),
|
||||
|
||||
# Training data queries
|
||||
Index('idx_traffic_training', 'tenant_id', 'city', 'date', 'is_synthetic'),
|
||||
Index('idx_traffic_quality', 'city', 'data_quality_score', 'date'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert model to dictionary for API responses"""
|
||||
result = {
|
||||
'id': str(self.id),
|
||||
'location_id': self.location_id,
|
||||
'city': self.city,
|
||||
'date': self.date.isoformat() if self.date else None,
|
||||
'traffic_volume': self.traffic_volume,
|
||||
'congestion_level': self.congestion_level,
|
||||
'average_speed': self.average_speed,
|
||||
'occupation_percentage': self.occupation_percentage,
|
||||
'load_percentage': self.load_percentage,
|
||||
'pedestrian_count': self.pedestrian_count,
|
||||
'measurement_point_id': self.measurement_point_id,
|
||||
'measurement_point_name': self.measurement_point_name,
|
||||
'measurement_point_type': self.measurement_point_type,
|
||||
'latitude': self.latitude,
|
||||
'longitude': self.longitude,
|
||||
'district': self.district,
|
||||
'zone': self.zone,
|
||||
'source': self.source,
|
||||
'data_quality_score': self.data_quality_score,
|
||||
'is_synthetic': self.is_synthetic,
|
||||
'has_pedestrian_inference': self.has_pedestrian_inference,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
# Add city-specific data if present
|
||||
if self.city_specific_data:
|
||||
result['city_specific_data'] = self.city_specific_data
|
||||
|
||||
return result
|
||||
|
||||
def get_city_specific_field(self, field_name: str, default: Any = None) -> Any:
|
||||
"""Safely get city-specific field value"""
|
||||
if self.city_specific_data and isinstance(self.city_specific_data, dict):
|
||||
return self.city_specific_data.get(field_name, default)
|
||||
return default
|
||||
|
||||
def set_city_specific_field(self, field_name: str, value: Any) -> None:
|
||||
"""Set city-specific field value"""
|
||||
if not self.city_specific_data:
|
||||
self.city_specific_data = {}
|
||||
if not isinstance(self.city_specific_data, dict):
|
||||
self.city_specific_data = {}
|
||||
self.city_specific_data[field_name] = value
|
||||
|
||||
|
||||
class TrafficMeasurementPoint(Base):
|
||||
"""
|
||||
Registry of traffic measurement points across all cities
|
||||
Supports different city-specific measurement point schemas
|
||||
"""
|
||||
__tablename__ = "traffic_measurement_points"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Location and identification
|
||||
city = Column(String(50), nullable=False, index=True)
|
||||
measurement_point_id = Column(String(100), nullable=False, index=True) # City-specific ID
|
||||
name = Column(String(500), nullable=True)
|
||||
description = Column(Text, nullable=True)
|
||||
|
||||
# Geographic information
|
||||
latitude = Column(Float, nullable=False)
|
||||
longitude = Column(Float, nullable=False)
|
||||
district = Column(String(100), nullable=True)
|
||||
zone = Column(String(100), nullable=True)
|
||||
|
||||
# Classification
|
||||
road_type = Column(String(50), nullable=True) # URB, M30, A, etc.
|
||||
measurement_type = Column(String(50), nullable=True) # intensity, speed, etc.
|
||||
point_category = Column(String(50), nullable=True) # urban, highway, ring_road
|
||||
|
||||
# Status and metadata
|
||||
is_active = Column(Boolean, default=True)
|
||||
installation_date = Column(DateTime(timezone=True), nullable=True)
|
||||
last_data_received = Column(DateTime(timezone=True), nullable=True)
|
||||
data_quality_rating = Column(Float, nullable=True) # Average quality 0-100
|
||||
|
||||
# City-specific point data
|
||||
city_specific_metadata = Column(JSON, nullable=True)
|
||||
|
||||
# Audit fields
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
# Ensure unique measurement points per city
|
||||
Index('idx_unique_city_point', 'city', 'measurement_point_id', unique=True),
|
||||
|
||||
# Geographic queries
|
||||
Index('idx_points_city_location', 'city', 'latitude', 'longitude'),
|
||||
Index('idx_points_district', 'city', 'district'),
|
||||
Index('idx_points_road_type', 'city', 'road_type'),
|
||||
|
||||
# Status queries
|
||||
Index('idx_points_active', 'city', 'is_active', 'last_data_received'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert measurement point to dictionary"""
|
||||
return {
|
||||
'id': str(self.id),
|
||||
'city': self.city,
|
||||
'measurement_point_id': self.measurement_point_id,
|
||||
'name': self.name,
|
||||
'description': self.description,
|
||||
'latitude': self.latitude,
|
||||
'longitude': self.longitude,
|
||||
'district': self.district,
|
||||
'zone': self.zone,
|
||||
'road_type': self.road_type,
|
||||
'measurement_type': self.measurement_type,
|
||||
'point_category': self.point_category,
|
||||
'is_active': self.is_active,
|
||||
'installation_date': self.installation_date.isoformat() if self.installation_date else None,
|
||||
'last_data_received': self.last_data_received.isoformat() if self.last_data_received else None,
|
||||
'data_quality_rating': self.data_quality_rating,
|
||||
'city_specific_metadata': self.city_specific_metadata,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
|
||||
|
||||
class TrafficDataBackgroundJob(Base):
|
||||
"""
|
||||
Track background data collection jobs for multiple cities
|
||||
Supports scheduling and monitoring of data fetching processes
|
||||
"""
|
||||
__tablename__ = "traffic_background_jobs"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Job configuration
|
||||
job_type = Column(String(50), nullable=False) # historical_fetch, cleanup, etc.
|
||||
city = Column(String(50), nullable=False, index=True)
|
||||
location_pattern = Column(String(200), nullable=True) # Location pattern or specific coords
|
||||
|
||||
# Scheduling
|
||||
scheduled_at = Column(DateTime(timezone=True), nullable=False)
|
||||
started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Status tracking
|
||||
status = Column(String(20), nullable=False, default='pending') # pending, running, completed, failed
|
||||
progress_percentage = Column(Float, default=0.0)
|
||||
records_processed = Column(Integer, default=0)
|
||||
records_stored = Column(Integer, default=0)
|
||||
|
||||
# Date range for data jobs
|
||||
data_start_date = Column(DateTime(timezone=True), nullable=True)
|
||||
data_end_date = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Results and error handling
|
||||
success_count = Column(Integer, default=0)
|
||||
error_count = Column(Integer, default=0)
|
||||
error_message = Column(Text, nullable=True)
|
||||
job_metadata = Column(JSON, nullable=True) # Additional job-specific data
|
||||
|
||||
# Tenant association
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
||||
|
||||
# Audit fields
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
# Job monitoring
|
||||
Index('idx_jobs_city_status', 'city', 'status', 'scheduled_at'),
|
||||
Index('idx_jobs_tenant_status', 'tenant_id', 'status', 'scheduled_at'),
|
||||
Index('idx_jobs_type_city', 'job_type', 'city', 'scheduled_at'),
|
||||
|
||||
# Cleanup queries
|
||||
Index('idx_jobs_completed', 'status', 'completed_at'),
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert job to dictionary"""
|
||||
return {
|
||||
'id': str(self.id),
|
||||
'job_type': self.job_type,
|
||||
'city': self.city,
|
||||
'location_pattern': self.location_pattern,
|
||||
'scheduled_at': self.scheduled_at.isoformat() if self.scheduled_at else None,
|
||||
'started_at': self.started_at.isoformat() if self.started_at else None,
|
||||
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
||||
'status': self.status,
|
||||
'progress_percentage': self.progress_percentage,
|
||||
'records_processed': self.records_processed,
|
||||
'records_stored': self.records_stored,
|
||||
'data_start_date': self.data_start_date.isoformat() if self.data_start_date else None,
|
||||
'data_end_date': self.data_end_date.isoformat() if self.data_end_date else None,
|
||||
'success_count': self.success_count,
|
||||
'error_count': self.error_count,
|
||||
'error_message': self.error_message,
|
||||
'job_metadata': self.job_metadata,
|
||||
'tenant_id': str(self.tenant_id) if self.tenant_id else None,
|
||||
'created_at': self.created_at.isoformat() if self.created_at else None,
|
||||
'updated_at': self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
74
services/external/app/models/weather.py
vendored
Normal file
74
services/external/app/models/weather.py
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# ================================================================
|
||||
# services/data/app/models/weather.py
|
||||
# ================================================================
|
||||
"""Weather data models"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSON
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from shared.database.base import Base
|
||||
|
||||
class WeatherData(Base):
|
||||
__tablename__ = "weather_data"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
location_id = Column(String(100), nullable=False, index=True)
|
||||
city = Column(String(50), nullable=False)
|
||||
station_name = Column(String(200), nullable=True)
|
||||
latitude = Column(Float, nullable=True)
|
||||
longitude = Column(Float, nullable=True)
|
||||
date = Column(DateTime(timezone=True), nullable=False, index=True)
|
||||
forecast_date = Column(DateTime(timezone=True), nullable=True)
|
||||
temperature = Column(Float, nullable=True) # Celsius
|
||||
temperature_min = Column(Float, nullable=True)
|
||||
temperature_max = Column(Float, nullable=True)
|
||||
feels_like = Column(Float, nullable=True)
|
||||
precipitation = Column(Float, nullable=True) # mm
|
||||
precipitation_probability = Column(Float, nullable=True)
|
||||
humidity = Column(Float, nullable=True) # percentage
|
||||
wind_speed = Column(Float, nullable=True) # km/h
|
||||
wind_direction = Column(Float, nullable=True)
|
||||
wind_gust = Column(Float, nullable=True)
|
||||
pressure = Column(Float, nullable=True) # hPa
|
||||
visibility = Column(Float, nullable=True)
|
||||
uv_index = Column(Float, nullable=True)
|
||||
cloud_cover = Column(Float, nullable=True)
|
||||
condition = Column(String(100), nullable=True)
|
||||
description = Column(String(200), nullable=True)
|
||||
weather_code = Column(String(20), nullable=True)
|
||||
source = Column(String(50), nullable=False, default="aemet")
|
||||
data_type = Column(String(20), nullable=False)
|
||||
is_forecast = Column(Boolean, nullable=True)
|
||||
data_quality_score = Column(Float, nullable=True)
|
||||
raw_data = Column(JSON, nullable=True)
|
||||
processed_data = Column(JSON, nullable=True)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_weather_location_date', 'location_id', 'date'),
|
||||
)
|
||||
|
||||
class WeatherForecast(Base):
|
||||
__tablename__ = "weather_forecasts"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
location_id = Column(String(100), nullable=False, index=True)
|
||||
forecast_date = Column(DateTime(timezone=True), nullable=False)
|
||||
generated_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
|
||||
temperature = Column(Float, nullable=True)
|
||||
precipitation = Column(Float, nullable=True)
|
||||
humidity = Column(Float, nullable=True)
|
||||
wind_speed = Column(Float, nullable=True)
|
||||
description = Column(String(200), nullable=True)
|
||||
source = Column(String(50), nullable=False, default="aemet")
|
||||
raw_data = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_forecast_location_date', 'location_id', 'forecast_date'),
|
||||
)
|
||||
1
services/external/app/registry/__init__.py
vendored
Normal file
1
services/external/app/registry/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
"""City registry module for multi-city support"""
|
||||
377
services/external/app/registry/calendar_registry.py
vendored
Normal file
377
services/external/app/registry/calendar_registry.py
vendored
Normal file
@@ -0,0 +1,377 @@
|
||||
# services/external/app/registry/calendar_registry.py
|
||||
"""
|
||||
Calendar Registry - Pre-configured school calendars and local events
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import date
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class SchoolType(str, Enum):
|
||||
PRIMARY = "primary"
|
||||
SECONDARY = "secondary"
|
||||
UNIVERSITY = "university"
|
||||
|
||||
|
||||
@dataclass
|
||||
class HolidayPeriod:
|
||||
"""School holiday period definition"""
|
||||
name: str
|
||||
start_date: str # ISO format: "2024-12-20"
|
||||
end_date: str # ISO format: "2025-01-08"
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchoolHours:
|
||||
"""School operating hours configuration"""
|
||||
morning_start: str # "09:00"
|
||||
morning_end: str # "14:00"
|
||||
has_afternoon_session: bool # True/False
|
||||
afternoon_start: Optional[str] = None # "15:00" if has_afternoon_session
|
||||
afternoon_end: Optional[str] = None # "17:00" if has_afternoon_session
|
||||
|
||||
|
||||
@dataclass
|
||||
class CalendarDefinition:
|
||||
"""School calendar configuration for a specific city and school type"""
|
||||
calendar_id: str
|
||||
calendar_name: str
|
||||
city_id: str
|
||||
school_type: SchoolType
|
||||
academic_year: str # "2024-2025"
|
||||
holiday_periods: List[HolidayPeriod]
|
||||
school_hours: SchoolHours
|
||||
source: str
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class CalendarRegistry:
|
||||
"""Central registry of school calendars for forecasting"""
|
||||
|
||||
# Madrid Primary School Calendar 2024-2025 (Official Comunidad de Madrid - ORDEN 1177/2024)
|
||||
MADRID_PRIMARY_2024_2025 = CalendarDefinition(
|
||||
calendar_id="madrid_primary_2024_2025",
|
||||
calendar_name="Madrid Primary School Calendar 2024-2025",
|
||||
city_id="madrid",
|
||||
school_type=SchoolType.PRIMARY,
|
||||
academic_year="2024-2025",
|
||||
holiday_periods=[
|
||||
HolidayPeriod(
|
||||
name="Christmas Holiday",
|
||||
start_date="2024-12-21",
|
||||
end_date="2025-01-07",
|
||||
description="Official Christmas break - Comunidad de Madrid (Dec 21 - Jan 7)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Easter Holiday (Semana Santa)",
|
||||
start_date="2025-04-11",
|
||||
end_date="2025-04-21",
|
||||
description="Official Easter break - Comunidad de Madrid (Apr 11-21)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Summer Holiday",
|
||||
start_date="2025-06-21",
|
||||
end_date="2025-09-08",
|
||||
description="Summer vacation (Last day Jun 20, classes resume Sep 9)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="All Saints Long Weekend",
|
||||
start_date="2024-10-31",
|
||||
end_date="2024-11-03",
|
||||
description="October 31 - November 3 non-working days"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="February Long Weekend",
|
||||
start_date="2025-02-28",
|
||||
end_date="2025-03-03",
|
||||
description="February 28 - March 3 non-working days"
|
||||
),
|
||||
],
|
||||
school_hours=SchoolHours(
|
||||
morning_start="09:00",
|
||||
morning_end="14:00",
|
||||
has_afternoon_session=False
|
||||
),
|
||||
source="comunidad_madrid_orden_1177_2024",
|
||||
enabled=True
|
||||
)
|
||||
|
||||
# Madrid Secondary School Calendar 2024-2025 (Official Comunidad de Madrid - ORDEN 1177/2024)
|
||||
MADRID_SECONDARY_2024_2025 = CalendarDefinition(
|
||||
calendar_id="madrid_secondary_2024_2025",
|
||||
calendar_name="Madrid Secondary School Calendar 2024-2025",
|
||||
city_id="madrid",
|
||||
school_type=SchoolType.SECONDARY,
|
||||
academic_year="2024-2025",
|
||||
holiday_periods=[
|
||||
HolidayPeriod(
|
||||
name="Christmas Holiday",
|
||||
start_date="2024-12-21",
|
||||
end_date="2025-01-07",
|
||||
description="Official Christmas break - Comunidad de Madrid (Dec 21 - Jan 7)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Easter Holiday (Semana Santa)",
|
||||
start_date="2025-04-11",
|
||||
end_date="2025-04-21",
|
||||
description="Official Easter break - Comunidad de Madrid (Apr 11-21)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Summer Holiday",
|
||||
start_date="2025-06-21",
|
||||
end_date="2025-09-09",
|
||||
description="Summer vacation (Last day Jun 20, classes resume Sep 10)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="All Saints Long Weekend",
|
||||
start_date="2024-10-31",
|
||||
end_date="2024-11-03",
|
||||
description="October 31 - November 3 non-working days"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="February Long Weekend",
|
||||
start_date="2025-02-28",
|
||||
end_date="2025-03-03",
|
||||
description="February 28 - March 3 non-working days"
|
||||
),
|
||||
],
|
||||
school_hours=SchoolHours(
|
||||
morning_start="09:00",
|
||||
morning_end="14:00",
|
||||
has_afternoon_session=False
|
||||
),
|
||||
source="comunidad_madrid_orden_1177_2024",
|
||||
enabled=True
|
||||
)
|
||||
|
||||
# Madrid Primary School Calendar 2025-2026 (Official Comunidad de Madrid - ORDEN 1476/2025)
|
||||
MADRID_PRIMARY_2025_2026 = CalendarDefinition(
|
||||
calendar_id="madrid_primary_2025_2026",
|
||||
calendar_name="Madrid Primary School Calendar 2025-2026",
|
||||
city_id="madrid",
|
||||
school_type=SchoolType.PRIMARY,
|
||||
academic_year="2025-2026",
|
||||
holiday_periods=[
|
||||
HolidayPeriod(
|
||||
name="Christmas Holiday",
|
||||
start_date="2025-12-20",
|
||||
end_date="2026-01-07",
|
||||
description="Official Christmas break - Comunidad de Madrid (Dec 20 - Jan 7)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Easter Holiday (Semana Santa)",
|
||||
start_date="2026-03-27",
|
||||
end_date="2026-04-06",
|
||||
description="Official Easter break - Comunidad de Madrid (Mar 27 - Apr 6)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Summer Holiday",
|
||||
start_date="2026-06-21",
|
||||
end_date="2026-09-08",
|
||||
description="Summer vacation (classes resume Sep 9)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="October Long Weekend",
|
||||
start_date="2025-10-13",
|
||||
end_date="2025-10-13",
|
||||
description="October 13 non-working day (after Día de la Hispanidad)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="All Saints Long Weekend",
|
||||
start_date="2025-11-03",
|
||||
end_date="2025-11-03",
|
||||
description="November 3 non-working day (after All Saints)"
|
||||
),
|
||||
],
|
||||
school_hours=SchoolHours(
|
||||
morning_start="09:00",
|
||||
morning_end="14:00",
|
||||
has_afternoon_session=False
|
||||
),
|
||||
source="comunidad_madrid_orden_1476_2025",
|
||||
enabled=True
|
||||
)
|
||||
|
||||
# Madrid Secondary School Calendar 2025-2026 (Official Comunidad de Madrid - ORDEN 1476/2025)
|
||||
MADRID_SECONDARY_2025_2026 = CalendarDefinition(
|
||||
calendar_id="madrid_secondary_2025_2026",
|
||||
calendar_name="Madrid Secondary School Calendar 2025-2026",
|
||||
city_id="madrid",
|
||||
school_type=SchoolType.SECONDARY,
|
||||
academic_year="2025-2026",
|
||||
holiday_periods=[
|
||||
HolidayPeriod(
|
||||
name="Christmas Holiday",
|
||||
start_date="2025-12-20",
|
||||
end_date="2026-01-07",
|
||||
description="Official Christmas break - Comunidad de Madrid (Dec 20 - Jan 7)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Easter Holiday (Semana Santa)",
|
||||
start_date="2026-03-27",
|
||||
end_date="2026-04-06",
|
||||
description="Official Easter break - Comunidad de Madrid (Mar 27 - Apr 6)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="Summer Holiday",
|
||||
start_date="2026-06-21",
|
||||
end_date="2026-09-09",
|
||||
description="Summer vacation (classes resume Sep 10)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="October Long Weekend",
|
||||
start_date="2025-10-13",
|
||||
end_date="2025-10-13",
|
||||
description="October 13 non-working day (after Día de la Hispanidad)"
|
||||
),
|
||||
HolidayPeriod(
|
||||
name="All Saints Long Weekend",
|
||||
start_date="2025-11-03",
|
||||
end_date="2025-11-03",
|
||||
description="November 3 non-working day (after All Saints)"
|
||||
),
|
||||
],
|
||||
school_hours=SchoolHours(
|
||||
morning_start="09:00",
|
||||
morning_end="14:00",
|
||||
has_afternoon_session=False
|
||||
),
|
||||
source="comunidad_madrid_orden_1476_2025",
|
||||
enabled=True
|
||||
)
|
||||
|
||||
# Registry of all calendars
|
||||
CALENDARS: List[CalendarDefinition] = [
|
||||
MADRID_PRIMARY_2024_2025,
|
||||
MADRID_SECONDARY_2024_2025,
|
||||
MADRID_PRIMARY_2025_2026,
|
||||
MADRID_SECONDARY_2025_2026,
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_all_calendars(cls) -> List[CalendarDefinition]:
|
||||
"""Get all calendars"""
|
||||
return cls.CALENDARS
|
||||
|
||||
@classmethod
|
||||
def get_enabled_calendars(cls) -> List[CalendarDefinition]:
|
||||
"""Get all enabled calendars"""
|
||||
return [cal for cal in cls.CALENDARS if cal.enabled]
|
||||
|
||||
@classmethod
|
||||
def get_calendar(cls, calendar_id: str) -> Optional[CalendarDefinition]:
|
||||
"""Get calendar by ID"""
|
||||
for cal in cls.CALENDARS:
|
||||
if cal.calendar_id == calendar_id:
|
||||
return cal
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def get_calendars_for_city(cls, city_id: str) -> List[CalendarDefinition]:
|
||||
"""Get all enabled calendars for a specific city"""
|
||||
return [
|
||||
cal for cal in cls.CALENDARS
|
||||
if cal.city_id == city_id and cal.enabled
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_calendar_for_city_and_type(
|
||||
cls,
|
||||
city_id: str,
|
||||
school_type: SchoolType,
|
||||
academic_year: Optional[str] = None
|
||||
) -> Optional[CalendarDefinition]:
|
||||
"""Get specific calendar for city, type, and optionally year"""
|
||||
for cal in cls.CALENDARS:
|
||||
if (cal.city_id == city_id and
|
||||
cal.school_type == school_type and
|
||||
cal.enabled and
|
||||
(academic_year is None or cal.academic_year == academic_year)):
|
||||
return cal
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def to_dict(cls, calendar: CalendarDefinition) -> Dict[str, Any]:
|
||||
"""Convert calendar definition to dictionary for JSON serialization"""
|
||||
return {
|
||||
"calendar_id": calendar.calendar_id,
|
||||
"calendar_name": calendar.calendar_name,
|
||||
"city_id": calendar.city_id,
|
||||
"school_type": calendar.school_type.value,
|
||||
"academic_year": calendar.academic_year,
|
||||
"holiday_periods": [
|
||||
{
|
||||
"name": hp.name,
|
||||
"start_date": hp.start_date,
|
||||
"end_date": hp.end_date,
|
||||
"description": hp.description
|
||||
}
|
||||
for hp in calendar.holiday_periods
|
||||
],
|
||||
"school_hours": {
|
||||
"morning_start": calendar.school_hours.morning_start,
|
||||
"morning_end": calendar.school_hours.morning_end,
|
||||
"has_afternoon_session": calendar.school_hours.has_afternoon_session,
|
||||
"afternoon_start": calendar.school_hours.afternoon_start,
|
||||
"afternoon_end": calendar.school_hours.afternoon_end,
|
||||
},
|
||||
"source": calendar.source,
|
||||
"enabled": calendar.enabled
|
||||
}
|
||||
|
||||
|
||||
# Local Events Registry for Madrid
|
||||
@dataclass
|
||||
class LocalEventDefinition:
|
||||
"""Local event that impacts demand"""
|
||||
event_id: str
|
||||
name: str
|
||||
city_id: str
|
||||
date: str # ISO format or "annual-MM-DD" for recurring
|
||||
impact_level: str # "low", "medium", "high"
|
||||
description: Optional[str] = None
|
||||
recurring: bool = False # True for annual events
|
||||
|
||||
|
||||
class LocalEventsRegistry:
|
||||
"""Registry of local events and festivals"""
|
||||
|
||||
MADRID_EVENTS = [
|
||||
LocalEventDefinition(
|
||||
event_id="madrid_san_isidro",
|
||||
name="San Isidro Festival",
|
||||
city_id="madrid",
|
||||
date="annual-05-15",
|
||||
impact_level="high",
|
||||
description="Madrid's patron saint festival - major citywide celebration",
|
||||
recurring=True
|
||||
),
|
||||
LocalEventDefinition(
|
||||
event_id="madrid_dos_de_mayo",
|
||||
name="Dos de Mayo",
|
||||
city_id="madrid",
|
||||
date="annual-05-02",
|
||||
impact_level="medium",
|
||||
description="Madrid regional holiday",
|
||||
recurring=True
|
||||
),
|
||||
LocalEventDefinition(
|
||||
event_id="madrid_almudena",
|
||||
name="Virgen de la Almudena",
|
||||
city_id="madrid",
|
||||
date="annual-11-09",
|
||||
impact_level="medium",
|
||||
description="Madrid patron saint day",
|
||||
recurring=True
|
||||
),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_events_for_city(cls, city_id: str) -> List[LocalEventDefinition]:
|
||||
"""Get all local events for a city"""
|
||||
if city_id == "madrid":
|
||||
return cls.MADRID_EVENTS
|
||||
return []
|
||||
163
services/external/app/registry/city_registry.py
vendored
Normal file
163
services/external/app/registry/city_registry.py
vendored
Normal file
@@ -0,0 +1,163 @@
|
||||
# services/external/app/registry/city_registry.py
|
||||
"""
|
||||
City Registry - Configuration-driven multi-city support
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Dict, Any
|
||||
from enum import Enum
|
||||
import math
|
||||
|
||||
|
||||
class Country(str, Enum):
|
||||
SPAIN = "ES"
|
||||
FRANCE = "FR"
|
||||
|
||||
|
||||
class WeatherProvider(str, Enum):
|
||||
AEMET = "aemet"
|
||||
METEO_FRANCE = "meteo_france"
|
||||
OPEN_WEATHER = "open_weather"
|
||||
|
||||
|
||||
class TrafficProvider(str, Enum):
|
||||
MADRID_OPENDATA = "madrid_opendata"
|
||||
VALENCIA_OPENDATA = "valencia_opendata"
|
||||
BARCELONA_OPENDATA = "barcelona_opendata"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CityDefinition:
|
||||
"""City configuration with data source specifications"""
|
||||
city_id: str
|
||||
name: str
|
||||
country: Country
|
||||
latitude: float
|
||||
longitude: float
|
||||
radius_km: float
|
||||
|
||||
weather_provider: WeatherProvider
|
||||
weather_config: Dict[str, Any]
|
||||
traffic_provider: TrafficProvider
|
||||
traffic_config: Dict[str, Any]
|
||||
|
||||
timezone: str
|
||||
population: int
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class CityRegistry:
|
||||
"""Central registry of supported cities"""
|
||||
|
||||
CITIES: List[CityDefinition] = [
|
||||
CityDefinition(
|
||||
city_id="madrid",
|
||||
name="Madrid",
|
||||
country=Country.SPAIN,
|
||||
latitude=40.4168,
|
||||
longitude=-3.7038,
|
||||
radius_km=30.0,
|
||||
weather_provider=WeatherProvider.AEMET,
|
||||
weather_config={
|
||||
"station_ids": ["3195", "3129", "3197"],
|
||||
"municipality_code": "28079"
|
||||
},
|
||||
traffic_provider=TrafficProvider.MADRID_OPENDATA,
|
||||
traffic_config={
|
||||
"current_xml_url": "https://datos.madrid.es/egob/catalogo/...",
|
||||
"historical_base_url": "https://datos.madrid.es/...",
|
||||
"measurement_points_csv": "https://datos.madrid.es/..."
|
||||
},
|
||||
timezone="Europe/Madrid",
|
||||
population=3_200_000
|
||||
),
|
||||
CityDefinition(
|
||||
city_id="valencia",
|
||||
name="Valencia",
|
||||
country=Country.SPAIN,
|
||||
latitude=39.4699,
|
||||
longitude=-0.3763,
|
||||
radius_km=25.0,
|
||||
weather_provider=WeatherProvider.AEMET,
|
||||
weather_config={
|
||||
"station_ids": ["8416"],
|
||||
"municipality_code": "46250"
|
||||
},
|
||||
traffic_provider=TrafficProvider.VALENCIA_OPENDATA,
|
||||
traffic_config={
|
||||
"api_endpoint": "https://valencia.opendatasoft.com/api/..."
|
||||
},
|
||||
timezone="Europe/Madrid",
|
||||
population=800_000,
|
||||
enabled=False
|
||||
),
|
||||
CityDefinition(
|
||||
city_id="barcelona",
|
||||
name="Barcelona",
|
||||
country=Country.SPAIN,
|
||||
latitude=41.3851,
|
||||
longitude=2.1734,
|
||||
radius_km=30.0,
|
||||
weather_provider=WeatherProvider.AEMET,
|
||||
weather_config={
|
||||
"station_ids": ["0076"],
|
||||
"municipality_code": "08019"
|
||||
},
|
||||
traffic_provider=TrafficProvider.BARCELONA_OPENDATA,
|
||||
traffic_config={
|
||||
"api_endpoint": "https://opendata-ajuntament.barcelona.cat/..."
|
||||
},
|
||||
timezone="Europe/Madrid",
|
||||
population=1_600_000,
|
||||
enabled=False
|
||||
)
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_enabled_cities(cls) -> List[CityDefinition]:
|
||||
"""Get all enabled cities"""
|
||||
return [city for city in cls.CITIES if city.enabled]
|
||||
|
||||
@classmethod
|
||||
def get_city(cls, city_id: str) -> Optional[CityDefinition]:
|
||||
"""Get city by ID"""
|
||||
for city in cls.CITIES:
|
||||
if city.city_id == city_id:
|
||||
return city
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def find_nearest_city(cls, latitude: float, longitude: float) -> Optional[CityDefinition]:
|
||||
"""Find nearest enabled city to coordinates"""
|
||||
enabled_cities = cls.get_enabled_cities()
|
||||
if not enabled_cities:
|
||||
return None
|
||||
|
||||
min_distance = float('inf')
|
||||
nearest_city = None
|
||||
|
||||
for city in enabled_cities:
|
||||
distance = cls._haversine_distance(
|
||||
latitude, longitude,
|
||||
city.latitude, city.longitude
|
||||
)
|
||||
if distance <= city.radius_km and distance < min_distance:
|
||||
min_distance = distance
|
||||
nearest_city = city
|
||||
|
||||
return nearest_city
|
||||
|
||||
@staticmethod
|
||||
def _haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Calculate distance in km between two coordinates"""
|
||||
R = 6371
|
||||
|
||||
dlat = math.radians(lat2 - lat1)
|
||||
dlon = math.radians(lon2 - lon1)
|
||||
|
||||
a = (math.sin(dlat/2) ** 2 +
|
||||
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
|
||||
math.sin(dlon/2) ** 2)
|
||||
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
|
||||
return R * c
|
||||
58
services/external/app/registry/geolocation_mapper.py
vendored
Normal file
58
services/external/app/registry/geolocation_mapper.py
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
# services/external/app/registry/geolocation_mapper.py
|
||||
"""
|
||||
Geolocation Mapper - Maps tenant locations to cities
|
||||
"""
|
||||
|
||||
from typing import Optional, Tuple
|
||||
import structlog
|
||||
from .city_registry import CityRegistry, CityDefinition
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class GeolocationMapper:
|
||||
"""Maps tenant coordinates to nearest supported city"""
|
||||
|
||||
def __init__(self):
|
||||
self.registry = CityRegistry()
|
||||
|
||||
def map_tenant_to_city(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> Optional[Tuple[CityDefinition, float]]:
|
||||
"""
|
||||
Map tenant coordinates to nearest city
|
||||
|
||||
Returns:
|
||||
Tuple of (CityDefinition, distance_km) or None if no match
|
||||
"""
|
||||
nearest_city = self.registry.find_nearest_city(latitude, longitude)
|
||||
|
||||
if not nearest_city:
|
||||
logger.warning(
|
||||
"No supported city found for coordinates",
|
||||
lat=latitude,
|
||||
lon=longitude
|
||||
)
|
||||
return None
|
||||
|
||||
distance = self.registry._haversine_distance(
|
||||
latitude, longitude,
|
||||
nearest_city.latitude, nearest_city.longitude
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Mapped tenant to city",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
city=nearest_city.name,
|
||||
distance_km=round(distance, 2)
|
||||
)
|
||||
|
||||
return (nearest_city, distance)
|
||||
|
||||
def validate_location_support(self, latitude: float, longitude: float) -> bool:
|
||||
"""Check if coordinates are supported"""
|
||||
result = self.map_tenant_to_city(latitude, longitude)
|
||||
return result is not None
|
||||
0
services/external/app/repositories/__init__.py
vendored
Normal file
0
services/external/app/repositories/__init__.py
vendored
Normal file
329
services/external/app/repositories/calendar_repository.py
vendored
Normal file
329
services/external/app/repositories/calendar_repository.py
vendored
Normal file
@@ -0,0 +1,329 @@
|
||||
# services/external/app/repositories/calendar_repository.py
|
||||
"""
|
||||
Calendar Repository - Manages school calendars and tenant location contexts
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, and_, or_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.models.calendar import SchoolCalendar, TenantLocationContext
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CalendarRepository:
|
||||
"""Repository for school calendar and tenant location data"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
# ===== School Calendar Operations =====
|
||||
|
||||
async def create_school_calendar(
|
||||
self,
|
||||
city_id: str,
|
||||
calendar_name: str,
|
||||
school_type: str,
|
||||
academic_year: str,
|
||||
holiday_periods: List[Dict[str, Any]],
|
||||
school_hours: Dict[str, Any],
|
||||
source: Optional[str] = None,
|
||||
enabled: bool = True
|
||||
) -> SchoolCalendar:
|
||||
"""Create a new school calendar"""
|
||||
try:
|
||||
calendar = SchoolCalendar(
|
||||
id=uuid.uuid4(),
|
||||
city_id=city_id,
|
||||
calendar_name=calendar_name,
|
||||
school_type=school_type,
|
||||
academic_year=academic_year,
|
||||
holiday_periods=holiday_periods,
|
||||
school_hours=school_hours,
|
||||
source=source,
|
||||
enabled=enabled
|
||||
)
|
||||
|
||||
self.session.add(calendar)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(calendar)
|
||||
|
||||
logger.info(
|
||||
"School calendar created",
|
||||
calendar_id=str(calendar.id),
|
||||
city_id=city_id,
|
||||
school_type=school_type
|
||||
)
|
||||
|
||||
return calendar
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error creating school calendar",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def get_calendar_by_id(
|
||||
self,
|
||||
calendar_id: uuid.UUID
|
||||
) -> Optional[SchoolCalendar]:
|
||||
"""Get school calendar by ID"""
|
||||
stmt = select(SchoolCalendar).where(SchoolCalendar.id == calendar_id)
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_calendars_by_city(
|
||||
self,
|
||||
city_id: str,
|
||||
enabled_only: bool = True
|
||||
) -> List[SchoolCalendar]:
|
||||
"""Get all school calendars for a city"""
|
||||
stmt = select(SchoolCalendar).where(SchoolCalendar.city_id == city_id)
|
||||
|
||||
if enabled_only:
|
||||
stmt = stmt.where(SchoolCalendar.enabled == True)
|
||||
|
||||
stmt = stmt.order_by(SchoolCalendar.academic_year.desc(), SchoolCalendar.school_type)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def get_calendar_by_city_type_year(
|
||||
self,
|
||||
city_id: str,
|
||||
school_type: str,
|
||||
academic_year: str
|
||||
) -> Optional[SchoolCalendar]:
|
||||
"""Get specific calendar by city, type, and year"""
|
||||
stmt = select(SchoolCalendar).where(
|
||||
and_(
|
||||
SchoolCalendar.city_id == city_id,
|
||||
SchoolCalendar.school_type == school_type,
|
||||
SchoolCalendar.academic_year == academic_year,
|
||||
SchoolCalendar.enabled == True
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def update_calendar(
|
||||
self,
|
||||
calendar_id: uuid.UUID,
|
||||
**kwargs
|
||||
) -> Optional[SchoolCalendar]:
|
||||
"""Update school calendar"""
|
||||
try:
|
||||
calendar = await self.get_calendar_by_id(calendar_id)
|
||||
if not calendar:
|
||||
return None
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(calendar, key):
|
||||
setattr(calendar, key, value)
|
||||
|
||||
calendar.updated_at = datetime.utcnow()
|
||||
|
||||
await self.session.commit()
|
||||
await self.session.refresh(calendar)
|
||||
|
||||
logger.info(
|
||||
"School calendar updated",
|
||||
calendar_id=str(calendar_id),
|
||||
fields=list(kwargs.keys())
|
||||
)
|
||||
|
||||
return calendar
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error updating school calendar",
|
||||
calendar_id=str(calendar_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def delete_calendar(self, calendar_id: uuid.UUID) -> bool:
|
||||
"""Delete school calendar"""
|
||||
try:
|
||||
calendar = await self.get_calendar_by_id(calendar_id)
|
||||
if not calendar:
|
||||
return False
|
||||
|
||||
await self.session.delete(calendar)
|
||||
await self.session.commit()
|
||||
|
||||
logger.info("School calendar deleted", calendar_id=str(calendar_id))
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error deleting school calendar",
|
||||
calendar_id=str(calendar_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
# ===== Tenant Location Context Operations =====
|
||||
|
||||
async def create_or_update_tenant_location_context(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
city_id: str,
|
||||
school_calendar_id: Optional[uuid.UUID] = None,
|
||||
neighborhood: Optional[str] = None,
|
||||
local_events: Optional[List[Dict[str, Any]]] = None,
|
||||
notes: Optional[str] = None
|
||||
) -> TenantLocationContext:
|
||||
"""Create or update tenant location context"""
|
||||
try:
|
||||
# Check if context exists
|
||||
existing = await self.get_tenant_location_context(tenant_id)
|
||||
|
||||
if existing:
|
||||
# Update existing
|
||||
existing.city_id = city_id
|
||||
if school_calendar_id is not None:
|
||||
existing.school_calendar_id = school_calendar_id
|
||||
if neighborhood is not None:
|
||||
existing.neighborhood = neighborhood
|
||||
if local_events is not None:
|
||||
existing.local_events = local_events
|
||||
if notes is not None:
|
||||
existing.notes = notes
|
||||
existing.updated_at = datetime.utcnow()
|
||||
|
||||
await self.session.commit()
|
||||
await self.session.refresh(existing)
|
||||
|
||||
logger.info(
|
||||
"Tenant location context updated",
|
||||
tenant_id=str(tenant_id)
|
||||
)
|
||||
|
||||
return existing
|
||||
else:
|
||||
# Create new
|
||||
context = TenantLocationContext(
|
||||
tenant_id=tenant_id,
|
||||
city_id=city_id,
|
||||
school_calendar_id=school_calendar_id,
|
||||
neighborhood=neighborhood,
|
||||
local_events=local_events or [],
|
||||
notes=notes
|
||||
)
|
||||
|
||||
self.session.add(context)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(context)
|
||||
|
||||
logger.info(
|
||||
"Tenant location context created",
|
||||
tenant_id=str(tenant_id),
|
||||
city_id=city_id
|
||||
)
|
||||
|
||||
return context
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error creating/updating tenant location context",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def get_tenant_location_context(
|
||||
self,
|
||||
tenant_id: uuid.UUID
|
||||
) -> Optional[TenantLocationContext]:
|
||||
"""Get tenant location context"""
|
||||
stmt = select(TenantLocationContext).where(
|
||||
TenantLocationContext.tenant_id == tenant_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_tenant_with_calendar(
|
||||
self,
|
||||
tenant_id: uuid.UUID
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Get tenant location context with full calendar details"""
|
||||
context = await self.get_tenant_location_context(tenant_id)
|
||||
if not context:
|
||||
return None
|
||||
|
||||
result = {
|
||||
"tenant_id": str(context.tenant_id),
|
||||
"city_id": context.city_id,
|
||||
"neighborhood": context.neighborhood,
|
||||
"local_events": context.local_events,
|
||||
"notes": context.notes,
|
||||
"calendar": None
|
||||
}
|
||||
|
||||
if context.school_calendar_id:
|
||||
calendar = await self.get_calendar_by_id(context.school_calendar_id)
|
||||
if calendar:
|
||||
result["calendar"] = {
|
||||
"calendar_id": str(calendar.id),
|
||||
"calendar_name": calendar.calendar_name,
|
||||
"school_type": calendar.school_type,
|
||||
"academic_year": calendar.academic_year,
|
||||
"holiday_periods": calendar.holiday_periods,
|
||||
"school_hours": calendar.school_hours,
|
||||
"source": calendar.source
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
async def delete_tenant_location_context(
|
||||
self,
|
||||
tenant_id: uuid.UUID
|
||||
) -> bool:
|
||||
"""Delete tenant location context"""
|
||||
try:
|
||||
context = await self.get_tenant_location_context(tenant_id)
|
||||
if not context:
|
||||
return False
|
||||
|
||||
await self.session.delete(context)
|
||||
await self.session.commit()
|
||||
|
||||
logger.info(
|
||||
"Tenant location context deleted",
|
||||
tenant_id=str(tenant_id)
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error deleting tenant location context",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
# ===== Helper Methods =====
|
||||
|
||||
async def get_all_tenants_for_calendar(
|
||||
self,
|
||||
calendar_id: uuid.UUID
|
||||
) -> List[TenantLocationContext]:
|
||||
"""Get all tenants using a specific calendar"""
|
||||
stmt = select(TenantLocationContext).where(
|
||||
TenantLocationContext.school_calendar_id == calendar_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return list(result.scalars().all())
|
||||
249
services/external/app/repositories/city_data_repository.py
vendored
Normal file
249
services/external/app/repositories/city_data_repository.py
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
# services/external/app/repositories/city_data_repository.py
|
||||
"""
|
||||
City Data Repository - Manages shared city-based data storage
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, delete, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.models.city_weather import CityWeatherData
|
||||
from app.models.city_traffic import CityTrafficData
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CityDataRepository:
|
||||
"""Repository for city-based historical data"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def bulk_store_weather(
|
||||
self,
|
||||
city_id: str,
|
||||
weather_records: List[Dict[str, Any]]
|
||||
) -> int:
|
||||
"""Bulk insert weather records for a city"""
|
||||
if not weather_records:
|
||||
return 0
|
||||
|
||||
try:
|
||||
objects = []
|
||||
for record in weather_records:
|
||||
obj = CityWeatherData(
|
||||
city_id=city_id,
|
||||
date=record.get('date'),
|
||||
temperature=record.get('temperature'),
|
||||
precipitation=record.get('precipitation'),
|
||||
humidity=record.get('humidity'),
|
||||
wind_speed=record.get('wind_speed'),
|
||||
pressure=record.get('pressure'),
|
||||
description=record.get('description'),
|
||||
source=record.get('source', 'ingestion'),
|
||||
raw_data=record.get('raw_data')
|
||||
)
|
||||
objects.append(obj)
|
||||
|
||||
self.session.add_all(objects)
|
||||
await self.session.commit()
|
||||
|
||||
logger.info(
|
||||
"Weather data stored",
|
||||
city_id=city_id,
|
||||
records=len(objects)
|
||||
)
|
||||
|
||||
return len(objects)
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error storing weather data",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def get_weather_by_city_and_range(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[CityWeatherData]:
|
||||
"""Get weather data for city within date range"""
|
||||
stmt = select(CityWeatherData).where(
|
||||
and_(
|
||||
CityWeatherData.city_id == city_id,
|
||||
CityWeatherData.date >= start_date,
|
||||
CityWeatherData.date <= end_date
|
||||
)
|
||||
).order_by(CityWeatherData.date)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalars().all()
|
||||
|
||||
async def delete_weather_before(
|
||||
self,
|
||||
city_id: str,
|
||||
cutoff_date: datetime
|
||||
) -> int:
|
||||
"""Delete weather records older than cutoff date"""
|
||||
stmt = delete(CityWeatherData).where(
|
||||
and_(
|
||||
CityWeatherData.city_id == city_id,
|
||||
CityWeatherData.date < cutoff_date
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
await self.session.commit()
|
||||
|
||||
return result.rowcount
|
||||
|
||||
async def bulk_store_traffic(
|
||||
self,
|
||||
city_id: str,
|
||||
traffic_records: List[Dict[str, Any]]
|
||||
) -> int:
|
||||
"""Bulk insert traffic records for a city"""
|
||||
if not traffic_records:
|
||||
return 0
|
||||
|
||||
try:
|
||||
objects = []
|
||||
for record in traffic_records:
|
||||
obj = CityTrafficData(
|
||||
city_id=city_id,
|
||||
date=record.get('date'),
|
||||
traffic_volume=record.get('traffic_volume'),
|
||||
pedestrian_count=record.get('pedestrian_count'),
|
||||
congestion_level=record.get('congestion_level'),
|
||||
average_speed=record.get('average_speed'),
|
||||
source=record.get('source', 'ingestion'),
|
||||
raw_data=record.get('raw_data')
|
||||
)
|
||||
objects.append(obj)
|
||||
|
||||
self.session.add_all(objects)
|
||||
await self.session.commit()
|
||||
|
||||
logger.info(
|
||||
"Traffic data stored",
|
||||
city_id=city_id,
|
||||
records=len(objects)
|
||||
)
|
||||
|
||||
return len(objects)
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Error storing traffic data",
|
||||
city_id=city_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise
|
||||
|
||||
async def get_traffic_by_city_and_range(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[CityTrafficData]:
|
||||
"""Get traffic data for city within date range - aggregated daily"""
|
||||
from sqlalchemy import func, cast, Date
|
||||
|
||||
# Aggregate hourly data to daily averages to avoid loading hundreds of thousands of records
|
||||
stmt = select(
|
||||
cast(CityTrafficData.date, Date).label('date'),
|
||||
func.avg(CityTrafficData.traffic_volume).label('traffic_volume'),
|
||||
func.avg(CityTrafficData.pedestrian_count).label('pedestrian_count'),
|
||||
func.avg(CityTrafficData.average_speed).label('average_speed'),
|
||||
func.max(CityTrafficData.source).label('source')
|
||||
).where(
|
||||
and_(
|
||||
CityTrafficData.city_id == city_id,
|
||||
CityTrafficData.date >= start_date,
|
||||
CityTrafficData.date <= end_date
|
||||
)
|
||||
).group_by(
|
||||
cast(CityTrafficData.date, Date)
|
||||
).order_by(
|
||||
cast(CityTrafficData.date, Date)
|
||||
)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
|
||||
# Convert aggregated rows to CityTrafficData objects
|
||||
traffic_records = []
|
||||
for row in result:
|
||||
record = CityTrafficData(
|
||||
city_id=city_id,
|
||||
date=datetime.combine(row.date, datetime.min.time()),
|
||||
traffic_volume=int(row.traffic_volume) if row.traffic_volume else None,
|
||||
pedestrian_count=int(row.pedestrian_count) if row.pedestrian_count else None,
|
||||
congestion_level='medium', # Default since we're averaging
|
||||
average_speed=float(row.average_speed) if row.average_speed else None,
|
||||
source=row.source or 'aggregated'
|
||||
)
|
||||
traffic_records.append(record)
|
||||
|
||||
return traffic_records
|
||||
|
||||
async def delete_traffic_before(
|
||||
self,
|
||||
city_id: str,
|
||||
cutoff_date: datetime
|
||||
) -> int:
|
||||
"""Delete traffic records older than cutoff date"""
|
||||
stmt = delete(CityTrafficData).where(
|
||||
and_(
|
||||
CityTrafficData.city_id == city_id,
|
||||
CityTrafficData.date < cutoff_date
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
await self.session.commit()
|
||||
|
||||
return result.rowcount
|
||||
|
||||
async def get_data_coverage(
|
||||
self,
|
||||
city_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
Check how much data exists for a city in a date range
|
||||
Returns dict with counts: {'weather': X, 'traffic': Y}
|
||||
"""
|
||||
# Count weather records
|
||||
weather_stmt = select(CityWeatherData).where(
|
||||
and_(
|
||||
CityWeatherData.city_id == city_id,
|
||||
CityWeatherData.date >= start_date,
|
||||
CityWeatherData.date <= end_date
|
||||
)
|
||||
)
|
||||
weather_result = await self.session.execute(weather_stmt)
|
||||
weather_count = len(weather_result.scalars().all())
|
||||
|
||||
# Count traffic records
|
||||
traffic_stmt = select(CityTrafficData).where(
|
||||
and_(
|
||||
CityTrafficData.city_id == city_id,
|
||||
CityTrafficData.date >= start_date,
|
||||
CityTrafficData.date <= end_date
|
||||
)
|
||||
)
|
||||
traffic_result = await self.session.execute(traffic_stmt)
|
||||
traffic_count = len(traffic_result.scalars().all())
|
||||
|
||||
return {
|
||||
'weather': weather_count,
|
||||
'traffic': traffic_count
|
||||
}
|
||||
271
services/external/app/repositories/poi_context_repository.py
vendored
Normal file
271
services/external/app/repositories/poi_context_repository.py
vendored
Normal file
@@ -0,0 +1,271 @@
|
||||
"""
|
||||
POI Context Repository
|
||||
|
||||
Data access layer for TenantPOIContext model.
|
||||
Handles CRUD operations for POI detection results and ML features.
|
||||
"""
|
||||
|
||||
from typing import Optional, List
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import select, update, delete
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.models.poi_context import TenantPOIContext
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIContextRepository:
|
||||
"""
|
||||
Repository for POI context data access.
|
||||
|
||||
Manages storage and retrieval of POI detection results
|
||||
and ML features for tenant locations.
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
"""
|
||||
Initialize repository.
|
||||
|
||||
Args:
|
||||
session: SQLAlchemy async session
|
||||
"""
|
||||
self.session = session
|
||||
|
||||
async def create(self, poi_context_data: dict) -> TenantPOIContext:
|
||||
"""
|
||||
Create new POI context record.
|
||||
|
||||
Args:
|
||||
poi_context_data: Dictionary with POI context data
|
||||
|
||||
Returns:
|
||||
Created TenantPOIContext instance
|
||||
"""
|
||||
poi_context = TenantPOIContext(
|
||||
tenant_id=poi_context_data["tenant_id"],
|
||||
latitude=poi_context_data["latitude"],
|
||||
longitude=poi_context_data["longitude"],
|
||||
poi_detection_results=poi_context_data.get("poi_detection_results", {}),
|
||||
ml_features=poi_context_data.get("ml_features", {}),
|
||||
total_pois_detected=poi_context_data.get("total_pois_detected", 0),
|
||||
high_impact_categories=poi_context_data.get("high_impact_categories", []),
|
||||
relevant_categories=poi_context_data.get("relevant_categories", []),
|
||||
detection_timestamp=poi_context_data.get(
|
||||
"detection_timestamp",
|
||||
datetime.now(timezone.utc)
|
||||
),
|
||||
detection_source=poi_context_data.get("detection_source", "overpass_api"),
|
||||
detection_status=poi_context_data.get("detection_status", "completed"),
|
||||
detection_error=poi_context_data.get("detection_error"),
|
||||
refresh_interval_days=poi_context_data.get("refresh_interval_days", 180)
|
||||
)
|
||||
|
||||
# Calculate next refresh date
|
||||
poi_context.next_refresh_date = poi_context.calculate_next_refresh()
|
||||
|
||||
self.session.add(poi_context)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(poi_context)
|
||||
|
||||
logger.info(
|
||||
"POI context created",
|
||||
tenant_id=str(poi_context.tenant_id),
|
||||
total_pois=poi_context.total_pois_detected
|
||||
)
|
||||
|
||||
return poi_context
|
||||
|
||||
async def get_by_tenant_id(self, tenant_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
|
||||
"""
|
||||
Get POI context by tenant ID.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
|
||||
Returns:
|
||||
TenantPOIContext or None if not found
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
stmt = select(TenantPOIContext).where(
|
||||
TenantPOIContext.tenant_id == tenant_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_id(self, poi_context_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
|
||||
"""
|
||||
Get POI context by ID.
|
||||
|
||||
Args:
|
||||
poi_context_id: POI context UUID
|
||||
|
||||
Returns:
|
||||
TenantPOIContext or None if not found
|
||||
"""
|
||||
if isinstance(poi_context_id, str):
|
||||
poi_context_id = uuid.UUID(poi_context_id)
|
||||
|
||||
stmt = select(TenantPOIContext).where(
|
||||
TenantPOIContext.id == poi_context_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def update(
|
||||
self,
|
||||
tenant_id: str | uuid.UUID,
|
||||
update_data: dict
|
||||
) -> Optional[TenantPOIContext]:
|
||||
"""
|
||||
Update POI context for tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
update_data: Dictionary with fields to update
|
||||
|
||||
Returns:
|
||||
Updated TenantPOIContext or None if not found
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
poi_context = await self.get_by_tenant_id(tenant_id)
|
||||
if not poi_context:
|
||||
return None
|
||||
|
||||
# Update fields
|
||||
for key, value in update_data.items():
|
||||
if hasattr(poi_context, key):
|
||||
setattr(poi_context, key, value)
|
||||
|
||||
# Update timestamp
|
||||
poi_context.updated_at = datetime.now(timezone.utc)
|
||||
|
||||
await self.session.commit()
|
||||
await self.session.refresh(poi_context)
|
||||
|
||||
logger.info(
|
||||
"POI context updated",
|
||||
tenant_id=str(tenant_id),
|
||||
updated_fields=list(update_data.keys())
|
||||
)
|
||||
|
||||
return poi_context
|
||||
|
||||
async def create_or_update(
|
||||
self,
|
||||
tenant_id: str | uuid.UUID,
|
||||
poi_detection_results: dict
|
||||
) -> TenantPOIContext:
|
||||
"""
|
||||
Create new POI context or update existing one.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
poi_detection_results: Full POI detection results
|
||||
|
||||
Returns:
|
||||
Created or updated TenantPOIContext
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
existing = await self.get_by_tenant_id(tenant_id)
|
||||
|
||||
poi_context_data = {
|
||||
"tenant_id": tenant_id,
|
||||
"latitude": poi_detection_results["location"]["latitude"],
|
||||
"longitude": poi_detection_results["location"]["longitude"],
|
||||
"poi_detection_results": poi_detection_results.get("poi_categories", {}),
|
||||
"ml_features": poi_detection_results.get("ml_features", {}),
|
||||
"total_pois_detected": poi_detection_results.get("summary", {}).get("total_pois_detected", 0),
|
||||
"high_impact_categories": poi_detection_results.get("summary", {}).get("high_impact_categories", []),
|
||||
"relevant_categories": poi_detection_results.get("relevant_categories", []),
|
||||
"detection_timestamp": datetime.fromisoformat(
|
||||
poi_detection_results["detection_timestamp"].replace("Z", "+00:00")
|
||||
) if isinstance(poi_detection_results.get("detection_timestamp"), str)
|
||||
else datetime.now(timezone.utc),
|
||||
"detection_status": poi_detection_results.get("detection_status", "completed"),
|
||||
"detection_error": None if poi_detection_results.get("detection_status") == "completed"
|
||||
else str(poi_detection_results.get("detection_errors"))
|
||||
}
|
||||
|
||||
if existing:
|
||||
# Update existing
|
||||
update_data = {
|
||||
**poi_context_data,
|
||||
"last_refreshed_at": datetime.now(timezone.utc)
|
||||
}
|
||||
existing.mark_refreshed() # Update next_refresh_date
|
||||
return await self.update(tenant_id, update_data)
|
||||
else:
|
||||
# Create new
|
||||
return await self.create(poi_context_data)
|
||||
|
||||
async def delete_by_tenant_id(self, tenant_id: str | uuid.UUID) -> bool:
|
||||
"""
|
||||
Delete POI context for tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
|
||||
Returns:
|
||||
True if deleted, False if not found
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
stmt = delete(TenantPOIContext).where(
|
||||
TenantPOIContext.tenant_id == tenant_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
await self.session.commit()
|
||||
|
||||
deleted = result.rowcount > 0
|
||||
if deleted:
|
||||
logger.info("POI context deleted", tenant_id=str(tenant_id))
|
||||
|
||||
return deleted
|
||||
|
||||
async def get_stale_contexts(self, limit: int = 100) -> List[TenantPOIContext]:
|
||||
"""
|
||||
Get POI contexts that need refresh.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of contexts to return
|
||||
|
||||
Returns:
|
||||
List of stale TenantPOIContext instances
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
stmt = (
|
||||
select(TenantPOIContext)
|
||||
.where(TenantPOIContext.next_refresh_date <= now)
|
||||
.limit(limit)
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def count_by_status(self) -> dict:
|
||||
"""
|
||||
Count POI contexts by detection status.
|
||||
|
||||
Returns:
|
||||
Dictionary with counts by status
|
||||
"""
|
||||
from sqlalchemy import func
|
||||
|
||||
stmt = select(
|
||||
TenantPOIContext.detection_status,
|
||||
func.count(TenantPOIContext.id)
|
||||
).group_by(TenantPOIContext.detection_status)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
rows = result.all()
|
||||
|
||||
return {status: count for status, count in rows}
|
||||
226
services/external/app/repositories/traffic_repository.py
vendored
Normal file
226
services/external/app/repositories/traffic_repository.py
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
# ================================================================
|
||||
# services/data/app/repositories/traffic_repository.py
|
||||
# ================================================================
|
||||
"""
|
||||
Traffic Repository - Enhanced for multiple cities with comprehensive data access patterns
|
||||
Follows existing repository architecture while adding city-specific functionality
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Any, Type, Tuple
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, or_, func, desc, asc, text, update, delete
|
||||
from sqlalchemy.orm import selectinload
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import structlog
|
||||
|
||||
from app.models.traffic import TrafficData
|
||||
from app.schemas.traffic import TrafficDataCreate, TrafficDataResponse
|
||||
from shared.database.exceptions import DatabaseError, ValidationError
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class TrafficRepository:
|
||||
"""
|
||||
Enhanced repository for traffic data operations across multiple cities
|
||||
Provides city-aware queries and advanced traffic analytics
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
self.model = TrafficData
|
||||
|
||||
# ================================================================
|
||||
# CORE TRAFFIC DATA OPERATIONS
|
||||
# ================================================================
|
||||
|
||||
async def get_by_location_and_date_range(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[TrafficData]:
|
||||
"""Get traffic data by location and date range"""
|
||||
try:
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
# Build base query
|
||||
query = select(self.model).where(self.model.location_id == location_id)
|
||||
|
||||
# Add tenant filter if specified
|
||||
if tenant_id:
|
||||
query = query.where(self.model.tenant_id == tenant_id)
|
||||
|
||||
# Add date range filters
|
||||
if start_date:
|
||||
query = query.where(self.model.date >= start_date)
|
||||
|
||||
if end_date:
|
||||
query = query.where(self.model.date <= end_date)
|
||||
|
||||
# Order by date
|
||||
query = query.order_by(self.model.date)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
return result.scalars().all()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get traffic data by location and date range",
|
||||
latitude=latitude, longitude=longitude,
|
||||
error=str(e))
|
||||
raise DatabaseError(f"Failed to get traffic data: {str(e)}")
|
||||
|
||||
async def store_traffic_data_batch(
|
||||
self,
|
||||
traffic_data_list: List[Dict[str, Any]],
|
||||
location_id: str,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> int:
|
||||
"""Store a batch of traffic data records with enhanced validation and duplicate handling."""
|
||||
stored_count = 0
|
||||
try:
|
||||
if not traffic_data_list:
|
||||
return 0
|
||||
|
||||
# Check for existing records to avoid duplicates - batch the queries to avoid parameter limit
|
||||
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
|
||||
existing_dates = set()
|
||||
if dates:
|
||||
# PostgreSQL has a limit of 32767 parameters, so batch the queries
|
||||
batch_size = 30000 # Safe batch size under the limit
|
||||
for i in range(0, len(dates), batch_size):
|
||||
date_batch = dates[i:i + batch_size]
|
||||
existing_stmt = select(TrafficData.date).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date.in_(date_batch)
|
||||
)
|
||||
)
|
||||
result = await self.session.execute(existing_stmt)
|
||||
existing_dates.update({row[0] for row in result.fetchall()})
|
||||
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
|
||||
|
||||
batch_records = []
|
||||
for data in traffic_data_list:
|
||||
record_date = data.get('date')
|
||||
if not record_date or record_date in existing_dates:
|
||||
continue # Skip duplicates
|
||||
|
||||
# Validate data before preparing for insertion
|
||||
if self._validate_traffic_data(data):
|
||||
batch_records.append({
|
||||
'location_id': location_id,
|
||||
'city': data.get('city', 'madrid'), # Default to madrid for historical data
|
||||
'tenant_id': tenant_id, # Include tenant_id in batch insert
|
||||
'date': record_date,
|
||||
'traffic_volume': data.get('traffic_volume'),
|
||||
'pedestrian_count': data.get('pedestrian_count'),
|
||||
'congestion_level': data.get('congestion_level'),
|
||||
'average_speed': data.get('average_speed'),
|
||||
'source': data.get('source', 'unknown'),
|
||||
'raw_data': str(data)
|
||||
})
|
||||
|
||||
if batch_records:
|
||||
# Use bulk insert for performance
|
||||
await self.session.execute(
|
||||
TrafficData.__table__.insert(),
|
||||
batch_records
|
||||
)
|
||||
await self.session.commit()
|
||||
stored_count = len(batch_records)
|
||||
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to store traffic data batch",
|
||||
error=str(e), location_id=location_id)
|
||||
await self.session.rollback()
|
||||
raise DatabaseError(f"Batch store failed: {str(e)}")
|
||||
|
||||
return stored_count
|
||||
|
||||
def _validate_traffic_data(self, data: Dict[str, Any]) -> bool:
|
||||
"""Validate traffic data before storage"""
|
||||
required_fields = ['date']
|
||||
|
||||
# Check required fields
|
||||
for field in required_fields:
|
||||
if not data.get(field):
|
||||
return False
|
||||
|
||||
# Validate data types and ranges
|
||||
traffic_volume = data.get('traffic_volume')
|
||||
if traffic_volume is not None and (traffic_volume < 0 or traffic_volume > 10000):
|
||||
return False
|
||||
|
||||
pedestrian_count = data.get('pedestrian_count')
|
||||
if pedestrian_count is not None and (pedestrian_count < 0 or pedestrian_count > 10000):
|
||||
return False
|
||||
|
||||
average_speed = data.get('average_speed')
|
||||
if average_speed is not None and (average_speed < 0 or average_speed > 200):
|
||||
return False
|
||||
|
||||
congestion_level = data.get('congestion_level')
|
||||
if congestion_level and congestion_level not in ['low', 'medium', 'high', 'blocked']:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def get_historical_traffic_for_training(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[TrafficData]:
|
||||
"""Retrieve stored traffic data for training ML models."""
|
||||
try:
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
stmt = select(TrafficData).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date >= start_date,
|
||||
TrafficData.date <= end_date
|
||||
)
|
||||
).order_by(TrafficData.date)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalars().all()
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to retrieve traffic data for training",
|
||||
error=str(e), location_id=location_id)
|
||||
raise DatabaseError(f"Training data retrieval failed: {str(e)}")
|
||||
|
||||
async def get_recent_by_location(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
cutoff_datetime: datetime,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[TrafficData]:
|
||||
"""Get recent traffic data by location after a cutoff datetime"""
|
||||
try:
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
stmt = select(TrafficData).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date >= cutoff_datetime
|
||||
)
|
||||
).order_by(TrafficData.date.desc())
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
records = result.scalars().all()
|
||||
|
||||
logger.info("Retrieved recent traffic data",
|
||||
location_id=location_id, count=len(records),
|
||||
cutoff=cutoff_datetime.isoformat())
|
||||
return records
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to retrieve recent traffic data",
|
||||
error=str(e), location_id=f"{latitude:.4f},{longitude:.4f}")
|
||||
raise DatabaseError(f"Recent traffic data retrieval failed: {str(e)}")
|
||||
138
services/external/app/repositories/weather_repository.py
vendored
Normal file
138
services/external/app/repositories/weather_repository.py
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
# services/external/app/repositories/weather_repository.py
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, and_
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
import json
|
||||
|
||||
from app.models.weather import WeatherData
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class WeatherRepository:
|
||||
"""
|
||||
Repository for weather data operations, adapted for WeatherService.
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def get_historical_weather(self,
|
||||
location_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[WeatherData]:
|
||||
"""
|
||||
Retrieves historical weather data for a specific location and date range.
|
||||
This method directly supports the data retrieval logic in WeatherService.
|
||||
"""
|
||||
try:
|
||||
stmt = select(WeatherData).where(
|
||||
and_(
|
||||
WeatherData.location_id == location_id,
|
||||
WeatherData.date >= start_date,
|
||||
WeatherData.date <= end_date
|
||||
)
|
||||
).order_by(WeatherData.date)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
records = result.scalars().all()
|
||||
logger.debug(f"Retrieved {len(records)} historical records for location {location_id}")
|
||||
return list(records)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to get historical weather from repository",
|
||||
error=str(e),
|
||||
location_id=location_id
|
||||
)
|
||||
raise
|
||||
|
||||
def _serialize_json_fields(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Serialize JSON fields (raw_data, processed_data) to ensure proper JSON storage
|
||||
"""
|
||||
serialized = data.copy()
|
||||
|
||||
# Serialize raw_data if present
|
||||
if 'raw_data' in serialized and serialized['raw_data'] is not None:
|
||||
if not isinstance(serialized['raw_data'], str):
|
||||
try:
|
||||
# Convert datetime objects to strings for JSON serialization
|
||||
raw_data = serialized['raw_data']
|
||||
if isinstance(raw_data, dict):
|
||||
# Handle datetime objects in the dict
|
||||
json_safe_data = {}
|
||||
for k, v in raw_data.items():
|
||||
if hasattr(v, 'isoformat'): # datetime-like object
|
||||
json_safe_data[k] = v.isoformat()
|
||||
else:
|
||||
json_safe_data[k] = v
|
||||
serialized['raw_data'] = json_safe_data
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not serialize raw_data, storing as string: {e}")
|
||||
serialized['raw_data'] = str(raw_data)
|
||||
|
||||
# Serialize processed_data if present
|
||||
if 'processed_data' in serialized and serialized['processed_data'] is not None:
|
||||
if not isinstance(serialized['processed_data'], str):
|
||||
try:
|
||||
processed_data = serialized['processed_data']
|
||||
if isinstance(processed_data, dict):
|
||||
json_safe_data = {}
|
||||
for k, v in processed_data.items():
|
||||
if hasattr(v, 'isoformat'): # datetime-like object
|
||||
json_safe_data[k] = v.isoformat()
|
||||
else:
|
||||
json_safe_data[k] = v
|
||||
serialized['processed_data'] = json_safe_data
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not serialize processed_data, storing as string: {e}")
|
||||
serialized['processed_data'] = str(processed_data)
|
||||
|
||||
return serialized
|
||||
|
||||
async def bulk_create_weather_data(self, weather_records: List[Dict[str, Any]]) -> None:
|
||||
"""
|
||||
Bulk inserts new weather records into the database.
|
||||
Used by WeatherService after fetching new historical data from an external API.
|
||||
"""
|
||||
try:
|
||||
if not weather_records:
|
||||
return
|
||||
|
||||
# Serialize JSON fields before creating model instances
|
||||
serialized_records = [self._serialize_json_fields(data) for data in weather_records]
|
||||
records = [WeatherData(**data) for data in serialized_records]
|
||||
self.session.add_all(records)
|
||||
await self.session.commit()
|
||||
logger.info(f"Successfully bulk inserted {len(records)} weather records")
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error(
|
||||
"Failed to bulk create weather records",
|
||||
error=str(e),
|
||||
count=len(weather_records)
|
||||
)
|
||||
raise
|
||||
|
||||
async def create_weather_data(self, data: Dict[str, Any]) -> WeatherData:
|
||||
"""
|
||||
Creates a single new weather data record.
|
||||
"""
|
||||
try:
|
||||
# Serialize JSON fields before creating model instance
|
||||
serialized_data = self._serialize_json_fields(data)
|
||||
new_record = WeatherData(**serialized_data)
|
||||
self.session.add(new_record)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(new_record)
|
||||
logger.info(f"Created new weather record with ID {new_record.id}")
|
||||
return new_record
|
||||
|
||||
except Exception as e:
|
||||
await self.session.rollback()
|
||||
logger.error("Failed to create single weather record", error=str(e))
|
||||
raise
|
||||
1
services/external/app/schemas/__init__.py
vendored
Normal file
1
services/external/app/schemas/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/schemas/__init__.py
|
||||
134
services/external/app/schemas/calendar.py
vendored
Normal file
134
services/external/app/schemas/calendar.py
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
# services/external/app/schemas/calendar.py
|
||||
"""
|
||||
Calendar Schemas - Request/Response types for school calendars and location context
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional, List, Dict, Any
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
class SchoolCalendarResponse(BaseModel):
|
||||
"""School calendar information"""
|
||||
calendar_id: str
|
||||
calendar_name: str
|
||||
city_id: str
|
||||
school_type: str
|
||||
academic_year: str
|
||||
holiday_periods: List[Dict[str, Any]]
|
||||
school_hours: Dict[str, Any]
|
||||
source: Optional[str] = None
|
||||
enabled: bool = True
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"calendar_id": "madrid_primary_2024_2025",
|
||||
"calendar_name": "Madrid Primary School Calendar 2024-2025",
|
||||
"city_id": "madrid",
|
||||
"school_type": "primary",
|
||||
"academic_year": "2024-2025",
|
||||
"holiday_periods": [
|
||||
{
|
||||
"name": "Christmas Holiday",
|
||||
"start_date": "2024-12-23",
|
||||
"end_date": "2025-01-07",
|
||||
"description": "Christmas and New Year break"
|
||||
}
|
||||
],
|
||||
"school_hours": {
|
||||
"morning_start": "09:00",
|
||||
"morning_end": "14:00",
|
||||
"has_afternoon_session": False
|
||||
},
|
||||
"source": "madrid_education_dept_2024",
|
||||
"enabled": True
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SchoolCalendarListResponse(BaseModel):
|
||||
"""List of school calendars for a city"""
|
||||
city_id: str
|
||||
calendars: List[SchoolCalendarResponse]
|
||||
total: int
|
||||
|
||||
|
||||
class CalendarCheckResponse(BaseModel):
|
||||
"""Response for holiday check"""
|
||||
date: str = Field(..., description="Date checked (ISO format)")
|
||||
is_holiday: bool = Field(..., description="Whether the date is a school holiday")
|
||||
holiday_name: Optional[str] = Field(None, description="Name of the holiday if applicable")
|
||||
calendar_id: str
|
||||
calendar_name: str
|
||||
|
||||
|
||||
class TenantLocationContextResponse(BaseModel):
|
||||
"""Tenant location context with calendar details"""
|
||||
tenant_id: str
|
||||
city_id: str
|
||||
neighborhood: Optional[str] = None
|
||||
local_events: Optional[List[Dict[str, Any]]] = None
|
||||
notes: Optional[str] = None
|
||||
calendar: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description="Full calendar details if assigned"
|
||||
)
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"tenant_id": "fbffcf18-d02a-4104-b6e3-0b32006e3e47",
|
||||
"city_id": "madrid",
|
||||
"neighborhood": "Chamberí",
|
||||
"local_events": [
|
||||
{
|
||||
"name": "Neighborhood Festival",
|
||||
"date": "2025-06-15",
|
||||
"impact": "high"
|
||||
}
|
||||
],
|
||||
"notes": "Bakery near primary school",
|
||||
"calendar": {
|
||||
"calendar_id": "uuid",
|
||||
"calendar_name": "Madrid Primary School Calendar 2024-2025",
|
||||
"school_type": "primary",
|
||||
"academic_year": "2024-2025",
|
||||
"holiday_periods": [],
|
||||
"school_hours": {},
|
||||
"source": "madrid_education_dept_2024"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class TenantLocationContextCreateRequest(BaseModel):
|
||||
"""Request to create/update tenant location context"""
|
||||
city_id: str = Field(..., description="City ID (e.g., 'madrid')")
|
||||
school_calendar_id: Optional[UUID] = Field(
|
||||
None,
|
||||
description="School calendar ID to assign"
|
||||
)
|
||||
neighborhood: Optional[str] = Field(None, description="Neighborhood name")
|
||||
local_events: Optional[List[Dict[str, Any]]] = Field(
|
||||
None,
|
||||
description="Local events specific to this location"
|
||||
)
|
||||
notes: Optional[str] = Field(None, description="Additional notes")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"city_id": "madrid",
|
||||
"school_calendar_id": "123e4567-e89b-12d3-a456-426614174000",
|
||||
"neighborhood": "Chamberí",
|
||||
"local_events": [
|
||||
{
|
||||
"name": "Local Market Day",
|
||||
"date": "2025-05-20",
|
||||
"impact": "medium"
|
||||
}
|
||||
],
|
||||
"notes": "Bakery located near primary school entrance"
|
||||
}
|
||||
}
|
||||
36
services/external/app/schemas/city_data.py
vendored
Normal file
36
services/external/app/schemas/city_data.py
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
# services/external/app/schemas/city_data.py
|
||||
"""
|
||||
City Data Schemas - New response types for city-based operations
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class CityInfoResponse(BaseModel):
|
||||
"""Information about a supported city"""
|
||||
city_id: str
|
||||
name: str
|
||||
country: str
|
||||
latitude: float
|
||||
longitude: float
|
||||
radius_km: float
|
||||
weather_provider: str
|
||||
traffic_provider: str
|
||||
enabled: bool
|
||||
|
||||
|
||||
class DataAvailabilityResponse(BaseModel):
|
||||
"""Data availability for a city"""
|
||||
city_id: str
|
||||
city_name: str
|
||||
|
||||
weather_available: bool
|
||||
weather_start_date: Optional[str] = None
|
||||
weather_end_date: Optional[str] = None
|
||||
weather_record_count: int = 0
|
||||
|
||||
traffic_available: bool
|
||||
traffic_start_date: Optional[str] = None
|
||||
traffic_end_date: Optional[str] = None
|
||||
traffic_record_count: int = 0
|
||||
106
services/external/app/schemas/traffic.py
vendored
Normal file
106
services/external/app/schemas/traffic.py
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
# services/external/app/schemas/traffic.py
|
||||
"""
|
||||
Traffic Service Pydantic Schemas
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from uuid import UUID
|
||||
|
||||
class TrafficDataBase(BaseModel):
|
||||
"""Base traffic data schema"""
|
||||
location_id: str = Field(..., max_length=100, description="Traffic monitoring location ID")
|
||||
date: datetime = Field(..., description="Date and time of traffic measurement")
|
||||
traffic_volume: Optional[int] = Field(None, ge=0, description="Vehicles per hour")
|
||||
pedestrian_count: Optional[int] = Field(None, ge=0, description="Pedestrians per hour")
|
||||
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$", description="Traffic congestion level")
|
||||
average_speed: Optional[float] = Field(None, ge=0, le=200, description="Average speed in km/h")
|
||||
source: str = Field("madrid_opendata", max_length=50, description="Data source")
|
||||
raw_data: Optional[str] = Field(None, description="Raw data from source")
|
||||
|
||||
class TrafficDataCreate(TrafficDataBase):
|
||||
"""Schema for creating traffic data"""
|
||||
pass
|
||||
|
||||
class TrafficDataUpdate(BaseModel):
|
||||
"""Schema for updating traffic data"""
|
||||
traffic_volume: Optional[int] = Field(None, ge=0)
|
||||
pedestrian_count: Optional[int] = Field(None, ge=0)
|
||||
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$")
|
||||
average_speed: Optional[float] = Field(None, ge=0, le=200)
|
||||
raw_data: Optional[str] = None
|
||||
|
||||
class TrafficDataResponseDB(TrafficDataBase):
|
||||
"""Schema for traffic data responses from database"""
|
||||
id: str = Field(..., description="Unique identifier")
|
||||
created_at: datetime = Field(..., description="Creation timestamp")
|
||||
updated_at: datetime = Field(..., description="Last update timestamp")
|
||||
|
||||
@field_validator('id', mode='before')
|
||||
@classmethod
|
||||
def convert_uuid_to_string(cls, v):
|
||||
if isinstance(v, UUID):
|
||||
return str(v)
|
||||
return v
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
|
||||
class TrafficDataList(BaseModel):
|
||||
"""Schema for paginated traffic data responses"""
|
||||
data: List[TrafficDataResponseDB]
|
||||
total: int = Field(..., description="Total number of records")
|
||||
page: int = Field(..., description="Current page number")
|
||||
per_page: int = Field(..., description="Records per page")
|
||||
has_next: bool = Field(..., description="Whether there are more pages")
|
||||
has_prev: bool = Field(..., description="Whether there are previous pages")
|
||||
|
||||
class TrafficAnalytics(BaseModel):
|
||||
"""Schema for traffic analytics"""
|
||||
location_id: str
|
||||
period_start: datetime
|
||||
period_end: datetime
|
||||
avg_traffic_volume: Optional[float] = None
|
||||
avg_pedestrian_count: Optional[float] = None
|
||||
peak_traffic_hour: Optional[int] = None
|
||||
peak_pedestrian_hour: Optional[int] = None
|
||||
congestion_distribution: dict = Field(default_factory=dict)
|
||||
avg_speed: Optional[float] = None
|
||||
|
||||
class TrafficDataResponse(BaseModel):
|
||||
"""Schema for API traffic data responses"""
|
||||
date: datetime = Field(..., description="Date and time of traffic measurement")
|
||||
traffic_volume: Optional[int] = Field(None, ge=0, description="Vehicles per hour")
|
||||
pedestrian_count: Optional[int] = Field(None, ge=0, description="Pedestrians per hour")
|
||||
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$", description="Traffic congestion level")
|
||||
average_speed: Optional[float] = Field(None, ge=0, le=200, description="Average speed in km/h")
|
||||
source: str = Field(..., description="Data source")
|
||||
|
||||
class Config:
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
|
||||
class LocationRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
address: Optional[str] = None
|
||||
|
||||
class DateRangeRequest(BaseModel):
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class HistoricalTrafficRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class TrafficForecastRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
hours: int = 24
|
||||
173
services/external/app/schemas/weather.py
vendored
Normal file
173
services/external/app/schemas/weather.py
vendored
Normal file
@@ -0,0 +1,173 @@
|
||||
# services/external/app/schemas/weather.py
|
||||
"""Weather data schemas"""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from uuid import UUID
|
||||
|
||||
class WeatherDataBase(BaseModel):
|
||||
"""Base weather data schema"""
|
||||
location_id: str = Field(..., max_length=100, description="Weather monitoring location ID")
|
||||
date: datetime = Field(..., description="Date and time of weather measurement")
|
||||
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Temperature in Celsius")
|
||||
precipitation: Optional[float] = Field(None, ge=0, description="Precipitation in mm")
|
||||
humidity: Optional[float] = Field(None, ge=0, le=100, description="Humidity percentage")
|
||||
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Wind speed in km/h")
|
||||
pressure: Optional[float] = Field(None, ge=800, le=1200, description="Atmospheric pressure in hPa")
|
||||
description: Optional[str] = Field(None, max_length=200, description="Weather description")
|
||||
source: str = Field("aemet", max_length=50, description="Data source")
|
||||
raw_data: Optional[str] = Field(None, description="Raw data from source")
|
||||
|
||||
class WeatherDataCreate(WeatherDataBase):
|
||||
"""Schema for creating weather data"""
|
||||
pass
|
||||
|
||||
class WeatherDataUpdate(BaseModel):
|
||||
"""Schema for updating weather data"""
|
||||
temperature: Optional[float] = Field(None, ge=-50, le=60)
|
||||
precipitation: Optional[float] = Field(None, ge=0)
|
||||
humidity: Optional[float] = Field(None, ge=0, le=100)
|
||||
wind_speed: Optional[float] = Field(None, ge=0, le=200)
|
||||
pressure: Optional[float] = Field(None, ge=800, le=1200)
|
||||
description: Optional[str] = Field(None, max_length=200)
|
||||
raw_data: Optional[str] = None
|
||||
|
||||
class WeatherDataResponse(WeatherDataBase):
|
||||
"""Schema for weather data responses"""
|
||||
id: str = Field(..., description="Unique identifier")
|
||||
created_at: datetime = Field(..., description="Creation timestamp")
|
||||
updated_at: datetime = Field(..., description="Last update timestamp")
|
||||
|
||||
@field_validator('id', mode='before')
|
||||
@classmethod
|
||||
def convert_uuid_to_string(cls, v):
|
||||
if isinstance(v, UUID):
|
||||
return str(v)
|
||||
return v
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
|
||||
class WeatherForecastBase(BaseModel):
|
||||
"""Base weather forecast schema"""
|
||||
location_id: str = Field(..., max_length=100, description="Location ID")
|
||||
forecast_date: datetime = Field(..., description="Date for forecast")
|
||||
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Forecasted temperature")
|
||||
precipitation: Optional[float] = Field(None, ge=0, description="Forecasted precipitation")
|
||||
humidity: Optional[float] = Field(None, ge=0, le=100, description="Forecasted humidity")
|
||||
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Forecasted wind speed")
|
||||
description: Optional[str] = Field(None, max_length=200, description="Forecast description")
|
||||
source: str = Field("aemet", max_length=50, description="Data source")
|
||||
raw_data: Optional[str] = Field(None, description="Raw forecast data")
|
||||
|
||||
class WeatherForecastCreate(WeatherForecastBase):
|
||||
"""Schema for creating weather forecasts"""
|
||||
pass
|
||||
|
||||
class WeatherForecastResponse(WeatherForecastBase):
|
||||
"""Schema for weather forecast responses"""
|
||||
id: str = Field(..., description="Unique identifier")
|
||||
generated_at: datetime = Field(..., description="When forecast was generated")
|
||||
created_at: datetime = Field(..., description="Creation timestamp")
|
||||
updated_at: datetime = Field(..., description="Last update timestamp")
|
||||
|
||||
@field_validator('id', mode='before')
|
||||
@classmethod
|
||||
def convert_uuid_to_string(cls, v):
|
||||
if isinstance(v, UUID):
|
||||
return str(v)
|
||||
return v
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
|
||||
class WeatherDataList(BaseModel):
|
||||
"""Schema for paginated weather data responses"""
|
||||
data: List[WeatherDataResponse]
|
||||
total: int = Field(..., description="Total number of records")
|
||||
page: int = Field(..., description="Current page number")
|
||||
per_page: int = Field(..., description="Records per page")
|
||||
has_next: bool = Field(..., description="Whether there are more pages")
|
||||
has_prev: bool = Field(..., description="Whether there are previous pages")
|
||||
|
||||
class WeatherForecastList(BaseModel):
|
||||
"""Schema for paginated weather forecast responses"""
|
||||
forecasts: List[WeatherForecastResponse]
|
||||
total: int = Field(..., description="Total number of forecasts")
|
||||
page: int = Field(..., description="Current page number")
|
||||
per_page: int = Field(..., description="Forecasts per page")
|
||||
|
||||
class WeatherAnalytics(BaseModel):
|
||||
"""Schema for weather analytics"""
|
||||
location_id: str
|
||||
period_start: datetime
|
||||
period_end: datetime
|
||||
avg_temperature: Optional[float] = None
|
||||
min_temperature: Optional[float] = None
|
||||
max_temperature: Optional[float] = None
|
||||
total_precipitation: Optional[float] = None
|
||||
avg_humidity: Optional[float] = None
|
||||
avg_wind_speed: Optional[float] = None
|
||||
avg_pressure: Optional[float] = None
|
||||
weather_conditions: dict = Field(default_factory=dict)
|
||||
rainy_days: int = 0
|
||||
sunny_days: int = 0
|
||||
|
||||
class LocationRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
address: Optional[str] = None
|
||||
|
||||
class DateRangeRequest(BaseModel):
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class HistoricalWeatherRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
class WeatherForecastRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
days: int
|
||||
|
||||
class HourlyForecastRequest(BaseModel):
|
||||
latitude: float
|
||||
longitude: float
|
||||
hours: int = Field(default=48, ge=1, le=48, description="Number of hours to forecast (1-48)")
|
||||
|
||||
class HourlyForecastResponse(BaseModel):
|
||||
forecast_datetime: datetime
|
||||
generated_at: datetime
|
||||
temperature: Optional[float]
|
||||
precipitation: Optional[float]
|
||||
humidity: Optional[float]
|
||||
wind_speed: Optional[float]
|
||||
description: Optional[str]
|
||||
source: str
|
||||
hour: int
|
||||
|
||||
class WeatherForecastAPIResponse(BaseModel):
|
||||
"""Simplified schema for API weather forecast responses (without database fields)"""
|
||||
forecast_date: datetime = Field(..., description="Date for forecast")
|
||||
generated_at: datetime = Field(..., description="When forecast was generated")
|
||||
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Forecasted temperature")
|
||||
precipitation: Optional[float] = Field(None, ge=0, description="Forecasted precipitation")
|
||||
humidity: Optional[float] = Field(None, ge=0, le=100, description="Forecasted humidity")
|
||||
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Forecasted wind speed")
|
||||
description: Optional[str] = Field(None, max_length=200, description="Forecast description")
|
||||
source: str = Field("aemet", max_length=50, description="Data source")
|
||||
|
||||
class Config:
|
||||
json_encoders = {
|
||||
datetime: lambda v: v.isoformat()
|
||||
}
|
||||
1
services/external/app/services/__init__.py
vendored
Normal file
1
services/external/app/services/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/services/__init__.py
|
||||
269
services/external/app/services/competitor_analyzer.py
vendored
Normal file
269
services/external/app/services/competitor_analyzer.py
vendored
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Competitor Analyzer
|
||||
|
||||
Specialized analysis for competitor bakeries with competitive pressure modeling.
|
||||
Treats competitor proximity differently than other POIs, considering market dynamics.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any, Tuple
|
||||
import structlog
|
||||
from math import radians, sin, cos, sqrt, atan2
|
||||
|
||||
from app.core.poi_config import COMPETITOR_ZONES
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CompetitorAnalyzer:
|
||||
"""
|
||||
Competitive landscape analyzer for bakery locations.
|
||||
|
||||
Models competitive pressure considering:
|
||||
- Direct competition (<100m): Strong negative impact
|
||||
- Nearby competition (100-500m): Moderate negative impact
|
||||
- Market saturation (500-1000m): Can be positive (bakery district)
|
||||
or negative (competitive market)
|
||||
"""
|
||||
|
||||
def analyze_competitive_landscape(
|
||||
self,
|
||||
competitor_pois: List[Dict[str, Any]],
|
||||
bakery_location: Tuple[float, float],
|
||||
tenant_id: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze competitive pressure from nearby bakeries.
|
||||
|
||||
Args:
|
||||
competitor_pois: List of detected competitor POIs
|
||||
bakery_location: Tuple of (latitude, longitude)
|
||||
tenant_id: Optional tenant ID for logging
|
||||
|
||||
Returns:
|
||||
Competitive analysis with pressure scores and market classification
|
||||
"""
|
||||
if not competitor_pois:
|
||||
logger.info(
|
||||
"No competitors detected - underserved market",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return {
|
||||
"competitive_pressure_score": 0.0,
|
||||
"direct_competitors_count": 0,
|
||||
"nearby_competitors_count": 0,
|
||||
"market_competitors_count": 0,
|
||||
"competitive_zone": "low_competition",
|
||||
"market_type": "underserved",
|
||||
"competitive_advantage": "first_mover",
|
||||
"ml_feature_competitive_pressure": 0.0,
|
||||
"ml_feature_has_direct_competitor": 0,
|
||||
"ml_feature_competitor_density_500m": 0,
|
||||
"competitor_details": []
|
||||
}
|
||||
|
||||
# Categorize competitors by distance
|
||||
direct_competitors = [] # <100m
|
||||
nearby_competitors = [] # 100-500m
|
||||
market_competitors = [] # 500-1000m
|
||||
competitor_details = []
|
||||
|
||||
for poi in competitor_pois:
|
||||
distance_m = self._calculate_distance(
|
||||
bakery_location, (poi["lat"], poi["lon"])
|
||||
) * 1000
|
||||
|
||||
competitor_info = {
|
||||
"name": poi.get("name", "Unnamed"),
|
||||
"osm_id": poi.get("osm_id"),
|
||||
"distance_m": round(distance_m, 1),
|
||||
"lat": poi["lat"],
|
||||
"lon": poi["lon"]
|
||||
}
|
||||
|
||||
if distance_m < COMPETITOR_ZONES["direct"]["max_distance_m"]:
|
||||
direct_competitors.append(poi)
|
||||
competitor_info["zone"] = "direct"
|
||||
elif distance_m < COMPETITOR_ZONES["nearby"]["max_distance_m"]:
|
||||
nearby_competitors.append(poi)
|
||||
competitor_info["zone"] = "nearby"
|
||||
elif distance_m < COMPETITOR_ZONES["market"]["max_distance_m"]:
|
||||
market_competitors.append(poi)
|
||||
competitor_info["zone"] = "market"
|
||||
|
||||
competitor_details.append(competitor_info)
|
||||
|
||||
# Calculate competitive pressure score
|
||||
direct_pressure = (
|
||||
len(direct_competitors) *
|
||||
COMPETITOR_ZONES["direct"]["pressure_multiplier"]
|
||||
)
|
||||
nearby_pressure = (
|
||||
len(nearby_competitors) *
|
||||
COMPETITOR_ZONES["nearby"]["pressure_multiplier"]
|
||||
)
|
||||
|
||||
# Market saturation analysis
|
||||
min_for_district = COMPETITOR_ZONES["market"]["min_count_for_district"]
|
||||
if len(market_competitors) >= min_for_district:
|
||||
# Many bakeries = destination area (bakery district)
|
||||
market_pressure = COMPETITOR_ZONES["market"]["district_multiplier"]
|
||||
market_type = "bakery_district"
|
||||
elif len(market_competitors) > 2:
|
||||
market_pressure = COMPETITOR_ZONES["market"]["normal_multiplier"]
|
||||
market_type = "competitive_market"
|
||||
else:
|
||||
market_pressure = 0.0
|
||||
market_type = "normal_market"
|
||||
|
||||
competitive_pressure_score = (
|
||||
direct_pressure + nearby_pressure + market_pressure
|
||||
)
|
||||
|
||||
# Determine competitive zone classification
|
||||
if len(direct_competitors) > 0:
|
||||
competitive_zone = "high_competition"
|
||||
competitive_advantage = "differentiation_required"
|
||||
elif len(nearby_competitors) > 2:
|
||||
competitive_zone = "moderate_competition"
|
||||
competitive_advantage = "quality_focused"
|
||||
else:
|
||||
competitive_zone = "low_competition"
|
||||
competitive_advantage = "local_leader"
|
||||
|
||||
# Sort competitors by distance
|
||||
competitor_details.sort(key=lambda x: x["distance_m"])
|
||||
|
||||
logger.info(
|
||||
"Competitive analysis complete",
|
||||
tenant_id=tenant_id,
|
||||
competitive_zone=competitive_zone,
|
||||
market_type=market_type,
|
||||
total_competitors=len(competitor_pois),
|
||||
direct=len(direct_competitors),
|
||||
nearby=len(nearby_competitors),
|
||||
market=len(market_competitors),
|
||||
pressure_score=competitive_pressure_score
|
||||
)
|
||||
|
||||
return {
|
||||
# Summary scores
|
||||
"competitive_pressure_score": round(competitive_pressure_score, 2),
|
||||
|
||||
# Competitor counts by zone
|
||||
"direct_competitors_count": len(direct_competitors),
|
||||
"nearby_competitors_count": len(nearby_competitors),
|
||||
"market_competitors_count": len(market_competitors),
|
||||
"total_competitors_count": len(competitor_pois),
|
||||
|
||||
# Market classification
|
||||
"competitive_zone": competitive_zone,
|
||||
"market_type": market_type,
|
||||
"competitive_advantage": competitive_advantage,
|
||||
|
||||
# ML features (for model integration)
|
||||
"ml_feature_competitive_pressure": round(competitive_pressure_score, 2),
|
||||
"ml_feature_has_direct_competitor": 1 if len(direct_competitors) > 0 else 0,
|
||||
"ml_feature_competitor_density_500m": (
|
||||
len(direct_competitors) + len(nearby_competitors)
|
||||
),
|
||||
|
||||
# Detailed competitor information
|
||||
"competitor_details": competitor_details,
|
||||
|
||||
# Nearest competitor
|
||||
"nearest_competitor": competitor_details[0] if competitor_details else None
|
||||
}
|
||||
|
||||
def _calculate_distance(
|
||||
self,
|
||||
coord1: Tuple[float, float],
|
||||
coord2: Tuple[float, float]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Haversine distance in kilometers.
|
||||
|
||||
Args:
|
||||
coord1: Tuple of (latitude, longitude)
|
||||
coord2: Tuple of (latitude, longitude)
|
||||
|
||||
Returns:
|
||||
Distance in kilometers
|
||||
"""
|
||||
lat1, lon1 = coord1
|
||||
lat2, lon2 = coord2
|
||||
|
||||
R = 6371 # Earth radius in km
|
||||
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
|
||||
a = (sin(dlat/2)**2 +
|
||||
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
|
||||
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def get_competitive_insights(
|
||||
self,
|
||||
analysis_result: Dict[str, Any]
|
||||
) -> List[str]:
|
||||
"""
|
||||
Generate human-readable competitive insights.
|
||||
|
||||
Args:
|
||||
analysis_result: Result from analyze_competitive_landscape
|
||||
|
||||
Returns:
|
||||
List of insight strings for business intelligence
|
||||
"""
|
||||
insights = []
|
||||
|
||||
zone = analysis_result["competitive_zone"]
|
||||
market = analysis_result["market_type"]
|
||||
pressure = analysis_result["competitive_pressure_score"]
|
||||
direct = analysis_result["direct_competitors_count"]
|
||||
nearby = analysis_result["nearby_competitors_count"]
|
||||
|
||||
# Zone-specific insights
|
||||
if zone == "high_competition":
|
||||
insights.append(
|
||||
f"⚠️ High competition: {direct} direct competitor(s) within 100m. "
|
||||
"Focus on differentiation and quality."
|
||||
)
|
||||
elif zone == "moderate_competition":
|
||||
insights.append(
|
||||
f"Moderate competition: {nearby} nearby competitor(s) within 500m. "
|
||||
"Good opportunity for market share."
|
||||
)
|
||||
else:
|
||||
insights.append(
|
||||
"✅ Low competition: Local market leader opportunity."
|
||||
)
|
||||
|
||||
# Market type insights
|
||||
if market == "bakery_district":
|
||||
insights.append(
|
||||
"📍 Bakery district: High foot traffic area with multiple bakeries. "
|
||||
"Customers actively seek bakery products here."
|
||||
)
|
||||
elif market == "competitive_market":
|
||||
insights.append(
|
||||
"Market has multiple bakeries. Quality and customer service critical."
|
||||
)
|
||||
elif market == "underserved":
|
||||
insights.append(
|
||||
"🎯 Underserved market: Potential for strong customer base growth."
|
||||
)
|
||||
|
||||
# Pressure score insight
|
||||
if pressure < -1.5:
|
||||
insights.append(
|
||||
"Strong competitive pressure expected to impact demand. "
|
||||
"Marketing and differentiation essential."
|
||||
)
|
||||
elif pressure > 0:
|
||||
insights.append(
|
||||
"Positive market dynamics: Location benefits from bakery destination traffic."
|
||||
)
|
||||
|
||||
return insights
|
||||
282
services/external/app/services/nominatim_service.py
vendored
Normal file
282
services/external/app/services/nominatim_service.py
vendored
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
Nominatim Geocoding Service
|
||||
|
||||
Provides address search and geocoding using OpenStreetMap Nominatim API.
|
||||
For development: uses public API (rate-limited)
|
||||
For production: should point to self-hosted Nominatim instance
|
||||
"""
|
||||
|
||||
import httpx
|
||||
from typing import List, Dict, Any, Optional
|
||||
import structlog
|
||||
from asyncio import sleep
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class NominatimService:
|
||||
"""
|
||||
Nominatim geocoding and address search service.
|
||||
|
||||
Uses OpenStreetMap Nominatim API for address autocomplete and geocoding.
|
||||
Respects rate limits and usage policy.
|
||||
"""
|
||||
|
||||
# For development: public API (rate-limited to 1 req/sec)
|
||||
# For production: should be overridden with self-hosted instance
|
||||
DEFAULT_BASE_URL = "https://nominatim.openstreetmap.org"
|
||||
|
||||
def __init__(self, base_url: Optional[str] = None, user_agent: str = "BakeryIA-Forecasting/1.0"):
|
||||
"""
|
||||
Initialize Nominatim service.
|
||||
|
||||
Args:
|
||||
base_url: Nominatim server URL (defaults to public API)
|
||||
user_agent: User agent for API requests (required by Nominatim policy)
|
||||
"""
|
||||
self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip("/")
|
||||
self.user_agent = user_agent
|
||||
self.headers = {
|
||||
"User-Agent": self.user_agent
|
||||
}
|
||||
|
||||
# Rate limiting for public API (1 request per second)
|
||||
self.is_public_api = self.base_url == self.DEFAULT_BASE_URL
|
||||
self.min_request_interval = 1.0 if self.is_public_api else 0.0
|
||||
|
||||
logger.info(
|
||||
"Nominatim service initialized",
|
||||
base_url=self.base_url,
|
||||
is_public_api=self.is_public_api,
|
||||
rate_limit=f"{self.min_request_interval}s" if self.is_public_api else "none"
|
||||
)
|
||||
|
||||
async def search_address(
|
||||
self,
|
||||
query: str,
|
||||
country_code: str = "es",
|
||||
limit: int = 10
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for addresses matching query (autocomplete).
|
||||
|
||||
Args:
|
||||
query: Address search query
|
||||
country_code: ISO country code to restrict search (default: Spain)
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of address suggestions with display_name, lat, lon, osm_id, etc.
|
||||
"""
|
||||
if not query or len(query.strip()) < 3:
|
||||
logger.warning("Search query too short", query=query)
|
||||
return []
|
||||
|
||||
try:
|
||||
# Rate limiting for public API
|
||||
if self.is_public_api:
|
||||
await sleep(self.min_request_interval)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/search",
|
||||
params={
|
||||
"q": query,
|
||||
"format": "json",
|
||||
"addressdetails": 1,
|
||||
"countrycodes": country_code,
|
||||
"limit": limit,
|
||||
"accept-language": "es"
|
||||
},
|
||||
headers=self.headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
results = response.json()
|
||||
|
||||
# Parse and enrich results
|
||||
addresses = []
|
||||
for result in results:
|
||||
addresses.append({
|
||||
"display_name": result.get("display_name"),
|
||||
"lat": float(result.get("lat")),
|
||||
"lon": float(result.get("lon")),
|
||||
"osm_type": result.get("osm_type"),
|
||||
"osm_id": result.get("osm_id"),
|
||||
"place_id": result.get("place_id"),
|
||||
"type": result.get("type"),
|
||||
"class": result.get("class"),
|
||||
"address": result.get("address", {}),
|
||||
"boundingbox": result.get("boundingbox", [])
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Address search completed",
|
||||
query=query,
|
||||
result_count=len(addresses)
|
||||
)
|
||||
|
||||
return addresses
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(
|
||||
"Nominatim API request failed",
|
||||
query=query,
|
||||
error=str(e)
|
||||
)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unexpected error in address search",
|
||||
query=query,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return []
|
||||
|
||||
async def geocode_address(
|
||||
self,
|
||||
address: str,
|
||||
country_code: str = "es"
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Geocode an address to get coordinates.
|
||||
|
||||
Args:
|
||||
address: Full address string
|
||||
country_code: ISO country code
|
||||
|
||||
Returns:
|
||||
Dictionary with lat, lon, display_name, address components or None
|
||||
"""
|
||||
results = await self.search_address(address, country_code, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.warning("No geocoding results found", address=address)
|
||||
return None
|
||||
|
||||
result = results[0]
|
||||
|
||||
logger.info(
|
||||
"Address geocoded successfully",
|
||||
address=address,
|
||||
lat=result["lat"],
|
||||
lon=result["lon"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
async def reverse_geocode(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Reverse geocode coordinates to get address.
|
||||
|
||||
Args:
|
||||
latitude: Latitude coordinate
|
||||
longitude: Longitude coordinate
|
||||
|
||||
Returns:
|
||||
Dictionary with address information or None
|
||||
"""
|
||||
try:
|
||||
# Rate limiting for public API
|
||||
if self.is_public_api:
|
||||
await sleep(self.min_request_interval)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/reverse",
|
||||
params={
|
||||
"lat": latitude,
|
||||
"lon": longitude,
|
||||
"format": "json",
|
||||
"addressdetails": 1,
|
||||
"accept-language": "es"
|
||||
},
|
||||
headers=self.headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
address_info = {
|
||||
"display_name": result.get("display_name"),
|
||||
"lat": float(result.get("lat")),
|
||||
"lon": float(result.get("lon")),
|
||||
"osm_type": result.get("osm_type"),
|
||||
"osm_id": result.get("osm_id"),
|
||||
"place_id": result.get("place_id"),
|
||||
"address": result.get("address", {}),
|
||||
"boundingbox": result.get("boundingbox", [])
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Reverse geocoding completed",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
address=address_info["display_name"]
|
||||
)
|
||||
|
||||
return address_info
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(
|
||||
"Nominatim reverse geocoding failed",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unexpected error in reverse geocoding",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return None
|
||||
|
||||
async def validate_coordinates(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> bool:
|
||||
"""
|
||||
Validate that coordinates point to a real location.
|
||||
|
||||
Args:
|
||||
latitude: Latitude to validate
|
||||
longitude: Longitude to validate
|
||||
|
||||
Returns:
|
||||
True if coordinates are valid, False otherwise
|
||||
"""
|
||||
if not (-90 <= latitude <= 90 and -180 <= longitude <= 180):
|
||||
return False
|
||||
|
||||
result = await self.reverse_geocode(latitude, longitude)
|
||||
return result is not None
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""
|
||||
Check if Nominatim service is accessible.
|
||||
|
||||
Returns:
|
||||
True if service is healthy, False otherwise
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/status",
|
||||
params={"format": "json"},
|
||||
headers=self.headers
|
||||
)
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Nominatim health check failed",
|
||||
error=str(e)
|
||||
)
|
||||
return False
|
||||
466
services/external/app/services/poi_detection_service.py
vendored
Normal file
466
services/external/app/services/poi_detection_service.py
vendored
Normal file
@@ -0,0 +1,466 @@
|
||||
"""
|
||||
POI Detection Service
|
||||
|
||||
Automated Point of Interest detection using Overpass API (OpenStreetMap).
|
||||
Detects nearby POIs around bakery locations and generates ML features
|
||||
for location-based demand forecasting.
|
||||
"""
|
||||
|
||||
import overpy
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import asyncio
|
||||
import structlog
|
||||
import httpx
|
||||
from math import radians, sin, cos, sqrt, atan2
|
||||
import random
|
||||
|
||||
from app.core.poi_config import (
|
||||
POI_CATEGORIES,
|
||||
OVERPASS_API_URL,
|
||||
OVERPASS_TIMEOUT_SECONDS,
|
||||
OVERPASS_MAX_RETRIES,
|
||||
OVERPASS_RETRY_DELAY_SECONDS,
|
||||
DISTANCE_BANDS
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIDetectionService:
|
||||
"""
|
||||
Automated POI detection using Overpass API (OpenStreetMap).
|
||||
|
||||
Detects points of interest near bakery locations and calculates
|
||||
ML features for demand forecasting with location-specific context.
|
||||
"""
|
||||
|
||||
def __init__(self, overpass_url: str = OVERPASS_API_URL):
|
||||
self.overpass_url = overpass_url
|
||||
self.api = overpy.Overpass(url=overpass_url)
|
||||
self.timeout = OVERPASS_TIMEOUT_SECONDS
|
||||
|
||||
async def detect_pois_for_bakery(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect all POIs around a bakery location.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
tenant_id: Tenant identifier for logging
|
||||
|
||||
Returns:
|
||||
Complete POI detection results with ML features
|
||||
"""
|
||||
logger.info(
|
||||
"Starting POI detection",
|
||||
tenant_id=tenant_id,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
|
||||
poi_results = {}
|
||||
detection_errors = []
|
||||
|
||||
# Query each POI category with inter-query delays
|
||||
category_items = list(POI_CATEGORIES.items())
|
||||
for idx, (category_key, category) in enumerate(category_items):
|
||||
try:
|
||||
pois = await self._query_pois_with_retry(
|
||||
latitude,
|
||||
longitude,
|
||||
category.osm_query,
|
||||
category.search_radius_m,
|
||||
category_key
|
||||
)
|
||||
|
||||
# Calculate features for this category
|
||||
features = self._calculate_poi_features(
|
||||
pois,
|
||||
(latitude, longitude),
|
||||
category
|
||||
)
|
||||
|
||||
poi_results[category_key] = {
|
||||
"pois": pois,
|
||||
"features": features,
|
||||
"count": len(pois)
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Detected {category_key}",
|
||||
count=len(pois),
|
||||
proximity_score=features["proximity_score"]
|
||||
)
|
||||
|
||||
# Add delay between categories to respect rate limits
|
||||
# (except after the last category)
|
||||
if idx < len(category_items) - 1:
|
||||
inter_query_delay = 2.0 + random.uniform(0.5, 1.5)
|
||||
await asyncio.sleep(inter_query_delay)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to detect {category_key}",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
detection_errors.append({
|
||||
"category": category_key,
|
||||
"error": str(e)
|
||||
})
|
||||
poi_results[category_key] = {
|
||||
"pois": [],
|
||||
"features": self._get_empty_features(),
|
||||
"count": 0,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
# Add a longer delay after an error before continuing
|
||||
if idx < len(category_items) - 1:
|
||||
error_recovery_delay = 3.0 + random.uniform(1.0, 2.0)
|
||||
await asyncio.sleep(error_recovery_delay)
|
||||
|
||||
# Generate combined ML features
|
||||
ml_features = self._generate_ml_features(poi_results)
|
||||
|
||||
# Generate summary
|
||||
summary = self._generate_summary(poi_results)
|
||||
|
||||
detection_status = "completed" if not detection_errors else (
|
||||
"partial" if len(detection_errors) < len(POI_CATEGORIES) else "failed"
|
||||
)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"location": {"latitude": latitude, "longitude": longitude},
|
||||
"detection_timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"detection_status": detection_status,
|
||||
"detection_errors": detection_errors if detection_errors else None,
|
||||
"poi_categories": poi_results,
|
||||
"ml_features": ml_features,
|
||||
"summary": summary
|
||||
}
|
||||
|
||||
async def _query_pois_with_retry(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
osm_query: str,
|
||||
radius_m: int,
|
||||
category_key: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query Overpass API with exponential backoff retry logic.
|
||||
|
||||
Implements:
|
||||
- Exponential backoff with jitter
|
||||
- Extended delays for rate limiting errors
|
||||
- Proper error type detection
|
||||
"""
|
||||
last_error = None
|
||||
base_delay = OVERPASS_RETRY_DELAY_SECONDS
|
||||
|
||||
for attempt in range(OVERPASS_MAX_RETRIES):
|
||||
try:
|
||||
return await self._query_pois(
|
||||
latitude, longitude, osm_query, radius_m
|
||||
)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
error_message = str(e).lower()
|
||||
|
||||
# Determine if this is a rate limiting error
|
||||
is_rate_limit = any(phrase in error_message for phrase in [
|
||||
'too many requests',
|
||||
'rate limit',
|
||||
'server load too high',
|
||||
'quota exceeded',
|
||||
'retry later',
|
||||
'429',
|
||||
'503',
|
||||
'504'
|
||||
])
|
||||
|
||||
if attempt < OVERPASS_MAX_RETRIES - 1:
|
||||
# Calculate exponential backoff with jitter
|
||||
# For rate limiting: use longer delays (10-30 seconds)
|
||||
# For other errors: use standard backoff (2-8 seconds)
|
||||
if is_rate_limit:
|
||||
delay = base_delay * (3 ** attempt) + random.uniform(1, 5)
|
||||
delay = min(delay, 30) # Cap at 30 seconds
|
||||
else:
|
||||
delay = base_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
|
||||
delay = min(delay, 10) # Cap at 10 seconds
|
||||
|
||||
logger.warning(
|
||||
f"POI query retry {attempt + 1}/{OVERPASS_MAX_RETRIES}",
|
||||
category=category_key,
|
||||
error=str(e),
|
||||
is_rate_limit=is_rate_limit,
|
||||
retry_delay=f"{delay:.1f}s"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
logger.error(
|
||||
"POI query failed after all retries",
|
||||
category=category_key,
|
||||
error=str(e),
|
||||
is_rate_limit=is_rate_limit
|
||||
)
|
||||
|
||||
raise last_error
|
||||
|
||||
async def _query_pois(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
osm_query: str,
|
||||
radius_m: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query Overpass API for POIs in radius.
|
||||
|
||||
Raises:
|
||||
Exception: With descriptive error message from Overpass API
|
||||
"""
|
||||
|
||||
# Build Overpass QL query
|
||||
query = f"""
|
||||
[out:json][timeout:{self.timeout}];
|
||||
(
|
||||
node{osm_query}(around:{radius_m},{latitude},{longitude});
|
||||
way{osm_query}(around:{radius_m},{latitude},{longitude});
|
||||
);
|
||||
out center;
|
||||
"""
|
||||
|
||||
# Execute query (use asyncio thread pool for blocking overpy)
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
self.api.query,
|
||||
query
|
||||
)
|
||||
except overpy.exception.OverpassTooManyRequests as e:
|
||||
# Explicitly handle rate limiting
|
||||
raise Exception("Too many requests - Overpass API rate limit exceeded") from e
|
||||
except overpy.exception.OverpassGatewayTimeout as e:
|
||||
# Query took too long
|
||||
raise Exception("Gateway timeout - query too complex or server busy") from e
|
||||
except overpy.exception.OverpassBadRequest as e:
|
||||
# Query syntax error
|
||||
raise Exception(f"Bad request - invalid query syntax: {str(e)}") from e
|
||||
except Exception as e:
|
||||
# Check if it's an HTTP error with status code
|
||||
error_msg = str(e).lower()
|
||||
if '429' in error_msg or 'too many' in error_msg:
|
||||
raise Exception("Too many requests - rate limit exceeded") from e
|
||||
elif '503' in error_msg or 'load too high' in error_msg:
|
||||
raise Exception("Server load too high - Overpass API overloaded") from e
|
||||
elif '504' in error_msg or 'timeout' in error_msg:
|
||||
raise Exception("Gateway timeout - server busy") from e
|
||||
else:
|
||||
# Re-raise with original message
|
||||
raise
|
||||
|
||||
# Parse results
|
||||
pois = []
|
||||
|
||||
# Process nodes
|
||||
for node in result.nodes:
|
||||
pois.append({
|
||||
"osm_id": str(node.id),
|
||||
"type": "node",
|
||||
"lat": float(node.lat),
|
||||
"lon": float(node.lon),
|
||||
"tags": dict(node.tags),
|
||||
"name": node.tags.get("name", "Unnamed")
|
||||
})
|
||||
|
||||
# Process ways (buildings, areas)
|
||||
for way in result.ways:
|
||||
# Get center point
|
||||
if hasattr(way, 'center_lat') and way.center_lat:
|
||||
lat, lon = float(way.center_lat), float(way.center_lon)
|
||||
else:
|
||||
# Calculate centroid from nodes
|
||||
if way.nodes:
|
||||
lats = [float(node.lat) for node in way.nodes]
|
||||
lons = [float(node.lon) for node in way.nodes]
|
||||
lat = sum(lats) / len(lats)
|
||||
lon = sum(lons) / len(lons)
|
||||
else:
|
||||
continue
|
||||
|
||||
pois.append({
|
||||
"osm_id": str(way.id),
|
||||
"type": "way",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"tags": dict(way.tags),
|
||||
"name": way.tags.get("name", "Unnamed")
|
||||
})
|
||||
|
||||
return pois
|
||||
|
||||
def _calculate_poi_features(
|
||||
self,
|
||||
pois: List[Dict[str, Any]],
|
||||
bakery_location: Tuple[float, float],
|
||||
category
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate ML features for POI category"""
|
||||
|
||||
if not pois:
|
||||
return self._get_empty_features()
|
||||
|
||||
# Calculate distances
|
||||
distances = []
|
||||
for poi in pois:
|
||||
dist_km = self._haversine_distance(
|
||||
bakery_location,
|
||||
(poi["lat"], poi["lon"])
|
||||
)
|
||||
distances.append(dist_km * 1000) # Convert to meters
|
||||
|
||||
# Feature Tier 1: Proximity Scores (PRIMARY)
|
||||
proximity_score = sum(1.0 / (1.0 + d/1000) for d in distances)
|
||||
weighted_proximity_score = proximity_score * category.weight
|
||||
|
||||
# Feature Tier 2: Distance Band Counts
|
||||
count_0_100m = sum(1 for d in distances if d <= 100)
|
||||
count_100_300m = sum(1 for d in distances if 100 < d <= 300)
|
||||
count_300_500m = sum(1 for d in distances if 300 < d <= 500)
|
||||
count_500_1000m = sum(1 for d in distances if 500 < d <= 1000)
|
||||
|
||||
# Feature Tier 3: Distance to Nearest
|
||||
distance_to_nearest_m = min(distances) if distances else 9999.0
|
||||
|
||||
# Feature Tier 4: Binary Flags
|
||||
has_within_100m = any(d <= 100 for d in distances)
|
||||
has_within_300m = any(d <= 300 for d in distances)
|
||||
has_within_500m = any(d <= 500 for d in distances)
|
||||
|
||||
return {
|
||||
# Tier 1: Proximity scores (PRIMARY for ML)
|
||||
"proximity_score": round(proximity_score, 4),
|
||||
"weighted_proximity_score": round(weighted_proximity_score, 4),
|
||||
|
||||
# Tier 2: Distance bands
|
||||
"count_0_100m": count_0_100m,
|
||||
"count_100_300m": count_100_300m,
|
||||
"count_300_500m": count_300_500m,
|
||||
"count_500_1000m": count_500_1000m,
|
||||
"total_count": len(pois),
|
||||
|
||||
# Tier 3: Distance to nearest
|
||||
"distance_to_nearest_m": round(distance_to_nearest_m, 1),
|
||||
|
||||
# Tier 4: Binary flags
|
||||
"has_within_100m": has_within_100m,
|
||||
"has_within_300m": has_within_300m,
|
||||
"has_within_500m": has_within_500m
|
||||
}
|
||||
|
||||
def _generate_ml_features(self, poi_results: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""
|
||||
Generate flat feature dictionary for ML model ingestion.
|
||||
|
||||
These features will be added to Prophet/XGBoost as regressors.
|
||||
"""
|
||||
ml_features = {}
|
||||
|
||||
for category_key, data in poi_results.items():
|
||||
features = data.get("features", {})
|
||||
|
||||
# Flatten with category prefix
|
||||
for feature_name, value in features.items():
|
||||
ml_feature_name = f"poi_{category_key}_{feature_name}"
|
||||
# Convert boolean to int for ML
|
||||
if isinstance(value, bool):
|
||||
value = 1 if value else 0
|
||||
ml_features[ml_feature_name] = value
|
||||
|
||||
return ml_features
|
||||
|
||||
def _get_empty_features(self) -> Dict[str, float]:
|
||||
"""Return zero features when no POIs found"""
|
||||
return {
|
||||
"proximity_score": 0.0,
|
||||
"weighted_proximity_score": 0.0,
|
||||
"count_0_100m": 0,
|
||||
"count_100_300m": 0,
|
||||
"count_300_500m": 0,
|
||||
"count_500_1000m": 0,
|
||||
"total_count": 0,
|
||||
"distance_to_nearest_m": 9999.0,
|
||||
"has_within_100m": False,
|
||||
"has_within_300m": False,
|
||||
"has_within_500m": False
|
||||
}
|
||||
|
||||
def _haversine_distance(
|
||||
self,
|
||||
coord1: Tuple[float, float],
|
||||
coord2: Tuple[float, float]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate distance between two coordinates in kilometers.
|
||||
|
||||
Uses Haversine formula for great-circle distance.
|
||||
"""
|
||||
lat1, lon1 = coord1
|
||||
lat2, lon2 = coord2
|
||||
|
||||
R = 6371 # Earth radius in km
|
||||
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
|
||||
a = (sin(dlat/2)**2 +
|
||||
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
|
||||
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def _generate_summary(self, poi_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate human-readable summary"""
|
||||
total_pois = sum(r["count"] for r in poi_results.values())
|
||||
categories_with_pois = [
|
||||
k for k, v in poi_results.items() if v["count"] > 0
|
||||
]
|
||||
high_impact_categories = [
|
||||
k for k, v in poi_results.items()
|
||||
if v["features"]["proximity_score"] > 2.0
|
||||
]
|
||||
|
||||
return {
|
||||
"total_pois_detected": total_pois,
|
||||
"categories_with_pois": categories_with_pois,
|
||||
"high_impact_categories": high_impact_categories,
|
||||
"categories_count": len(categories_with_pois)
|
||||
}
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Check if Overpass API is accessible"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
response = await client.get(f"{self.overpass_url}/status")
|
||||
is_healthy = response.status_code == 200
|
||||
return {
|
||||
"healthy": is_healthy,
|
||||
"status_code": response.status_code,
|
||||
"url": self.overpass_url
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"healthy": False,
|
||||
"error": str(e),
|
||||
"url": self.overpass_url
|
||||
}
|
||||
184
services/external/app/services/poi_feature_selector.py
vendored
Normal file
184
services/external/app/services/poi_feature_selector.py
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
POI Feature Selector
|
||||
|
||||
Determines which POI features are relevant for ML model inclusion.
|
||||
Filters out low-signal features to prevent model noise and overfitting.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any
|
||||
import structlog
|
||||
|
||||
from app.core.poi_config import RELEVANCE_THRESHOLDS
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIFeatureSelector:
|
||||
"""
|
||||
Feature relevance engine for POI-based ML features.
|
||||
|
||||
Applies research-based thresholds to filter out irrelevant POI features
|
||||
that would add noise to bakery-specific demand forecasting models.
|
||||
"""
|
||||
|
||||
def __init__(self, thresholds: Dict[str, Dict[str, float]] = None):
|
||||
"""
|
||||
Initialize feature selector.
|
||||
|
||||
Args:
|
||||
thresholds: Custom relevance thresholds (defaults to RELEVANCE_THRESHOLDS)
|
||||
"""
|
||||
self.thresholds = thresholds or RELEVANCE_THRESHOLDS
|
||||
|
||||
def select_relevant_features(
|
||||
self,
|
||||
poi_detection_results: Dict[str, Any],
|
||||
tenant_id: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Filter POI features based on relevance thresholds.
|
||||
|
||||
Only includes features for POI categories that pass relevance tests.
|
||||
This prevents adding noise to ML models for bakeries where certain
|
||||
POI categories are not significant.
|
||||
|
||||
Args:
|
||||
poi_detection_results: Full POI detection results
|
||||
tenant_id: Optional tenant ID for logging
|
||||
|
||||
Returns:
|
||||
Dictionary with relevant features and detailed relevance report
|
||||
"""
|
||||
relevant_features = {}
|
||||
relevance_report = []
|
||||
relevant_categories = []
|
||||
|
||||
for category_key, data in poi_detection_results.items():
|
||||
features = data.get("features", {})
|
||||
thresholds = self.thresholds.get(category_key, {})
|
||||
|
||||
if not thresholds:
|
||||
logger.warning(
|
||||
f"No thresholds defined for category {category_key}",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
continue
|
||||
|
||||
# Check relevance criteria
|
||||
is_relevant, rejection_reason = self._check_relevance(
|
||||
features, thresholds, category_key
|
||||
)
|
||||
|
||||
if is_relevant:
|
||||
# Include features with category prefix
|
||||
for feature_name, value in features.items():
|
||||
ml_feature_name = f"poi_{category_key}_{feature_name}"
|
||||
# Convert boolean to int for ML
|
||||
if isinstance(value, bool):
|
||||
value = 1 if value else 0
|
||||
relevant_features[ml_feature_name] = value
|
||||
|
||||
relevant_categories.append(category_key)
|
||||
relevance_report.append({
|
||||
"category": category_key,
|
||||
"relevant": True,
|
||||
"reason": "Passes all relevance thresholds",
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
|
||||
})
|
||||
else:
|
||||
relevance_report.append({
|
||||
"category": category_key,
|
||||
"relevant": False,
|
||||
"reason": rejection_reason,
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"POI feature selection complete",
|
||||
tenant_id=tenant_id,
|
||||
total_categories=len(poi_detection_results),
|
||||
relevant_categories=len(relevant_categories),
|
||||
rejected_categories=len(poi_detection_results) - len(relevant_categories)
|
||||
)
|
||||
|
||||
return {
|
||||
"features": relevant_features,
|
||||
"relevant_categories": relevant_categories,
|
||||
"relevance_report": relevance_report,
|
||||
"total_features": len(relevant_features),
|
||||
"total_relevant_categories": len(relevant_categories)
|
||||
}
|
||||
|
||||
def _check_relevance(
|
||||
self,
|
||||
features: Dict[str, Any],
|
||||
thresholds: Dict[str, float],
|
||||
category_key: str
|
||||
) -> tuple[bool, str]:
|
||||
"""
|
||||
Check if POI category passes relevance thresholds.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_relevant, rejection_reason)
|
||||
"""
|
||||
# Criterion 1: Proximity score
|
||||
min_proximity = thresholds.get("min_proximity_score", 0)
|
||||
actual_proximity = features.get("proximity_score", 0)
|
||||
if actual_proximity < min_proximity:
|
||||
return False, f"Proximity score too low ({actual_proximity:.2f} < {min_proximity})"
|
||||
|
||||
# Criterion 2: Distance to nearest
|
||||
max_distance = thresholds.get("max_distance_to_nearest_m", 9999)
|
||||
actual_distance = features.get("distance_to_nearest_m", 9999)
|
||||
if actual_distance > max_distance:
|
||||
return False, f"Nearest POI too far ({actual_distance:.0f}m > {max_distance}m)"
|
||||
|
||||
# Criterion 3: Count threshold
|
||||
min_count = thresholds.get("min_count", 0)
|
||||
actual_count = features.get("total_count", 0)
|
||||
if actual_count < min_count:
|
||||
return False, f"Count too low ({actual_count} < {min_count})"
|
||||
|
||||
return True, "Passes all thresholds"
|
||||
|
||||
def get_feature_importance_summary(
|
||||
self,
|
||||
poi_detection_results: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate summary of feature importance for all categories.
|
||||
|
||||
Useful for understanding POI landscape around a bakery.
|
||||
"""
|
||||
summary = []
|
||||
|
||||
for category_key, data in poi_detection_results.items():
|
||||
features = data.get("features", {})
|
||||
thresholds = self.thresholds.get(category_key, {})
|
||||
|
||||
is_relevant, reason = self._check_relevance(
|
||||
features, thresholds, category_key
|
||||
) if thresholds else (False, "No thresholds defined")
|
||||
|
||||
summary.append({
|
||||
"category": category_key,
|
||||
"is_relevant": is_relevant,
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"weighted_score": features.get("weighted_proximity_score", 0),
|
||||
"total_count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999),
|
||||
"has_within_100m": features.get("has_within_100m", False),
|
||||
"rejection_reason": None if is_relevant else reason
|
||||
})
|
||||
|
||||
# Sort by relevance and proximity score
|
||||
summary.sort(
|
||||
key=lambda x: (x["is_relevant"], x["proximity_score"]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return summary
|
||||
468
services/external/app/services/poi_refresh_service.py
vendored
Normal file
468
services/external/app/services/poi_refresh_service.py
vendored
Normal file
@@ -0,0 +1,468 @@
|
||||
"""
|
||||
POI Refresh Service
|
||||
|
||||
Manages periodic POI context refresh jobs.
|
||||
Detects changes in POI landscape and updates tenant POI contexts.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional, Dict, Any, List
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, or_
|
||||
import structlog
|
||||
|
||||
from app.models.poi_refresh_job import POIRefreshJob
|
||||
from app.models.poi_context import TenantPOIContext
|
||||
from app.services.poi_detection_service import POIDetectionService
|
||||
from app.core.database import database_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIRefreshService:
|
||||
"""
|
||||
POI Refresh Service
|
||||
|
||||
Manages background jobs for periodic POI context refresh.
|
||||
Default refresh cycle: 180 days (6 months).
|
||||
"""
|
||||
|
||||
DEFAULT_REFRESH_INTERVAL_DAYS = 180
|
||||
DEFAULT_MAX_ATTEMPTS = 3
|
||||
STALE_THRESHOLD_DAYS = 180
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
poi_detection_service: Optional[POIDetectionService] = None,
|
||||
refresh_interval_days: int = DEFAULT_REFRESH_INTERVAL_DAYS
|
||||
):
|
||||
"""
|
||||
Initialize POI refresh service.
|
||||
|
||||
Args:
|
||||
poi_detection_service: POI detection service instance
|
||||
refresh_interval_days: Days between POI refreshes (default: 180)
|
||||
"""
|
||||
self.poi_detection_service = poi_detection_service or POIDetectionService()
|
||||
self.refresh_interval_days = refresh_interval_days
|
||||
|
||||
logger.info(
|
||||
"POI Refresh Service initialized",
|
||||
refresh_interval_days=refresh_interval_days
|
||||
)
|
||||
|
||||
async def schedule_refresh_job(
|
||||
self,
|
||||
tenant_id: str,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
scheduled_at: Optional[datetime] = None,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> POIRefreshJob:
|
||||
"""
|
||||
Schedule a POI refresh job for a tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
scheduled_at: When to run the job (default: now + refresh_interval)
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Created POIRefreshJob
|
||||
"""
|
||||
if scheduled_at is None:
|
||||
scheduled_at = datetime.now(timezone.utc) + timedelta(
|
||||
days=self.refresh_interval_days
|
||||
)
|
||||
|
||||
async def _create_job(db_session: AsyncSession):
|
||||
# Check if pending job already exists
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob).where(
|
||||
and_(
|
||||
POIRefreshJob.tenant_id == tenant_id,
|
||||
POIRefreshJob.status.in_(["pending", "running"])
|
||||
)
|
||||
)
|
||||
)
|
||||
existing_job = result.scalar_one_or_none()
|
||||
|
||||
if existing_job:
|
||||
logger.info(
|
||||
"POI refresh job already scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(existing_job.id),
|
||||
scheduled_at=existing_job.scheduled_at
|
||||
)
|
||||
return existing_job
|
||||
|
||||
# Create new job
|
||||
job = POIRefreshJob(
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
scheduled_at=scheduled_at,
|
||||
status="pending",
|
||||
max_attempts=self.DEFAULT_MAX_ATTEMPTS
|
||||
)
|
||||
|
||||
db_session.add(job)
|
||||
await db_session.commit()
|
||||
await db_session.refresh(job)
|
||||
|
||||
logger.info(
|
||||
"POI refresh job scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(job.id),
|
||||
scheduled_at=scheduled_at
|
||||
)
|
||||
|
||||
return job
|
||||
|
||||
if session:
|
||||
return await _create_job(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _create_job(db_session)
|
||||
|
||||
async def execute_refresh_job(
|
||||
self,
|
||||
job_id: str,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a POI refresh job.
|
||||
|
||||
Args:
|
||||
job_id: Job UUID
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Execution result with status and details
|
||||
"""
|
||||
async def _execute(db_session: AsyncSession):
|
||||
# Load job
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob).where(POIRefreshJob.id == job_id)
|
||||
)
|
||||
job = result.scalar_one_or_none()
|
||||
|
||||
if not job:
|
||||
raise ValueError(f"Job not found: {job_id}")
|
||||
|
||||
if job.status == "running":
|
||||
return {
|
||||
"status": "already_running",
|
||||
"job_id": str(job.id),
|
||||
"message": "Job is already running"
|
||||
}
|
||||
|
||||
if job.status == "completed":
|
||||
return {
|
||||
"status": "already_completed",
|
||||
"job_id": str(job.id),
|
||||
"message": "Job already completed"
|
||||
}
|
||||
|
||||
if not job.can_retry:
|
||||
return {
|
||||
"status": "max_attempts_reached",
|
||||
"job_id": str(job.id),
|
||||
"message": f"Max attempts ({job.max_attempts}) reached"
|
||||
}
|
||||
|
||||
# Update job status
|
||||
job.status = "running"
|
||||
job.started_at = datetime.now(timezone.utc)
|
||||
job.attempt_count += 1
|
||||
await db_session.commit()
|
||||
|
||||
logger.info(
|
||||
"Executing POI refresh job",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count
|
||||
)
|
||||
|
||||
try:
|
||||
# Get existing POI context
|
||||
poi_result = await db_session.execute(
|
||||
select(TenantPOIContext).where(
|
||||
TenantPOIContext.tenant_id == job.tenant_id
|
||||
)
|
||||
)
|
||||
existing_context = poi_result.scalar_one_or_none()
|
||||
|
||||
# Perform POI detection
|
||||
detection_result = await self.poi_detection_service.detect_pois_for_bakery(
|
||||
latitude=job.latitude,
|
||||
longitude=job.longitude,
|
||||
tenant_id=str(job.tenant_id),
|
||||
force_refresh=True
|
||||
)
|
||||
|
||||
# Analyze changes
|
||||
changes = self._analyze_changes(
|
||||
existing_context.poi_detection_results if existing_context else {},
|
||||
detection_result
|
||||
)
|
||||
|
||||
# Update job with results
|
||||
job.status = "completed"
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
job.pois_detected = sum(
|
||||
data.get("count", 0)
|
||||
for data in detection_result.values()
|
||||
)
|
||||
job.changes_detected = changes["has_significant_changes"]
|
||||
job.change_summary = changes
|
||||
|
||||
# Schedule next refresh
|
||||
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(
|
||||
days=self.refresh_interval_days
|
||||
)
|
||||
|
||||
await db_session.commit()
|
||||
|
||||
logger.info(
|
||||
"POI refresh job completed",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
duration_seconds=job.duration_seconds
|
||||
)
|
||||
|
||||
# Schedule next job
|
||||
await self.schedule_refresh_job(
|
||||
tenant_id=str(job.tenant_id),
|
||||
latitude=job.latitude,
|
||||
longitude=job.longitude,
|
||||
scheduled_at=job.next_scheduled_at,
|
||||
session=db_session
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"job_id": str(job.id),
|
||||
"pois_detected": job.pois_detected,
|
||||
"changes_detected": job.changes_detected,
|
||||
"change_summary": changes,
|
||||
"duration_seconds": job.duration_seconds,
|
||||
"next_scheduled_at": job.next_scheduled_at.isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# Job failed
|
||||
job.status = "failed"
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
job.error_message = str(e)
|
||||
job.error_details = {
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e),
|
||||
"attempt": job.attempt_count
|
||||
}
|
||||
|
||||
# Schedule retry if attempts remaining
|
||||
if job.can_retry:
|
||||
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
logger.warning(
|
||||
"POI refresh job failed, will retry",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
error=str(e)
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"POI refresh job failed permanently",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
await db_session.commit()
|
||||
|
||||
return {
|
||||
"status": "failed",
|
||||
"job_id": str(job.id),
|
||||
"error": str(e),
|
||||
"attempt": job.attempt_count,
|
||||
"can_retry": job.can_retry
|
||||
}
|
||||
|
||||
if session:
|
||||
return await _execute(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _execute(db_session)
|
||||
|
||||
def _analyze_changes(
|
||||
self,
|
||||
old_results: Dict[str, Any],
|
||||
new_results: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze changes between old and new POI detection results.
|
||||
|
||||
Args:
|
||||
old_results: Previous POI detection results
|
||||
new_results: New POI detection results
|
||||
|
||||
Returns:
|
||||
Change analysis with significance flag
|
||||
"""
|
||||
changes = {
|
||||
"has_significant_changes": False,
|
||||
"category_changes": {},
|
||||
"total_poi_change": 0,
|
||||
"new_categories": [],
|
||||
"removed_categories": []
|
||||
}
|
||||
|
||||
old_categories = set(old_results.keys())
|
||||
new_categories = set(new_results.keys())
|
||||
|
||||
# New categories
|
||||
changes["new_categories"] = list(new_categories - old_categories)
|
||||
|
||||
# Removed categories
|
||||
changes["removed_categories"] = list(old_categories - new_categories)
|
||||
|
||||
# Analyze changes per category
|
||||
for category in new_categories:
|
||||
old_count = old_results.get(category, {}).get("count", 0)
|
||||
new_count = new_results.get(category, {}).get("count", 0)
|
||||
change = new_count - old_count
|
||||
|
||||
if abs(change) > 0:
|
||||
changes["category_changes"][category] = {
|
||||
"old_count": old_count,
|
||||
"new_count": new_count,
|
||||
"change": change,
|
||||
"change_percent": (change / old_count * 100) if old_count > 0 else 100
|
||||
}
|
||||
|
||||
changes["total_poi_change"] += abs(change)
|
||||
|
||||
# Determine if changes are significant
|
||||
# Significant if: 10+ POIs changed OR 20%+ change OR new/removed categories
|
||||
total_old_pois = sum(data.get("count", 0) for data in old_results.values())
|
||||
if total_old_pois > 0:
|
||||
change_percent = (changes["total_poi_change"] / total_old_pois) * 100
|
||||
changes["total_change_percent"] = change_percent
|
||||
|
||||
changes["has_significant_changes"] = (
|
||||
changes["total_poi_change"] >= 10
|
||||
or change_percent >= 20
|
||||
or len(changes["new_categories"]) > 0
|
||||
or len(changes["removed_categories"]) > 0
|
||||
)
|
||||
else:
|
||||
changes["has_significant_changes"] = changes["total_poi_change"] > 0
|
||||
|
||||
return changes
|
||||
|
||||
async def get_pending_jobs(
|
||||
self,
|
||||
limit: int = 100,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> List[POIRefreshJob]:
|
||||
"""
|
||||
Get pending jobs that are due for execution.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of jobs to return
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
List of pending jobs
|
||||
"""
|
||||
async def _get_jobs(db_session: AsyncSession):
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob)
|
||||
.where(
|
||||
and_(
|
||||
POIRefreshJob.status == "pending",
|
||||
POIRefreshJob.scheduled_at <= datetime.now(timezone.utc)
|
||||
)
|
||||
)
|
||||
.order_by(POIRefreshJob.scheduled_at)
|
||||
.limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
if session:
|
||||
return await _get_jobs(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _get_jobs(db_session)
|
||||
|
||||
async def process_pending_jobs(
|
||||
self,
|
||||
max_concurrent: int = 5,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process all pending jobs concurrently.
|
||||
|
||||
Args:
|
||||
max_concurrent: Maximum concurrent job executions
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Processing summary
|
||||
"""
|
||||
pending_jobs = await self.get_pending_jobs(session=session)
|
||||
|
||||
if not pending_jobs:
|
||||
logger.info("No pending POI refresh jobs")
|
||||
return {
|
||||
"total_jobs": 0,
|
||||
"successful": 0,
|
||||
"failed": 0,
|
||||
"results": []
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Processing pending POI refresh jobs",
|
||||
count=len(pending_jobs),
|
||||
max_concurrent=max_concurrent
|
||||
)
|
||||
|
||||
# Process jobs with concurrency limit
|
||||
semaphore = asyncio.Semaphore(max_concurrent)
|
||||
|
||||
async def process_job(job: POIRefreshJob):
|
||||
async with semaphore:
|
||||
return await self.execute_refresh_job(str(job.id))
|
||||
|
||||
results = await asyncio.gather(
|
||||
*[process_job(job) for job in pending_jobs],
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
# Summarize results
|
||||
successful = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "success")
|
||||
failed = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "failed")
|
||||
errors = sum(1 for r in results if isinstance(r, Exception))
|
||||
|
||||
summary = {
|
||||
"total_jobs": len(pending_jobs),
|
||||
"successful": successful,
|
||||
"failed": failed + errors,
|
||||
"results": [r if not isinstance(r, Exception) else {"status": "error", "error": str(r)} for r in results]
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"POI refresh jobs processing completed",
|
||||
**summary
|
||||
)
|
||||
|
||||
return summary
|
||||
187
services/external/app/services/poi_scheduler.py
vendored
Normal file
187
services/external/app/services/poi_scheduler.py
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
POI Refresh Scheduler
|
||||
|
||||
Background scheduler for periodic POI context refresh.
|
||||
Runs every hour to check for and execute pending POI refresh jobs.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from datetime import datetime, timezone
|
||||
import structlog
|
||||
|
||||
from app.services.poi_refresh_service import POIRefreshService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIRefreshScheduler:
|
||||
"""
|
||||
POI Refresh Scheduler
|
||||
|
||||
Background task that periodically checks for and executes
|
||||
pending POI refresh jobs.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
poi_refresh_service: Optional[POIRefreshService] = None,
|
||||
check_interval_seconds: int = 3600, # 1 hour
|
||||
max_concurrent_jobs: int = 5
|
||||
):
|
||||
"""
|
||||
Initialize POI refresh scheduler.
|
||||
|
||||
Args:
|
||||
poi_refresh_service: POI refresh service instance
|
||||
check_interval_seconds: Seconds between checks (default: 3600 = 1 hour)
|
||||
max_concurrent_jobs: Max concurrent job executions (default: 5)
|
||||
"""
|
||||
self.poi_refresh_service = poi_refresh_service or POIRefreshService()
|
||||
self.check_interval_seconds = check_interval_seconds
|
||||
self.max_concurrent_jobs = max_concurrent_jobs
|
||||
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._running = False
|
||||
|
||||
logger.info(
|
||||
"POI Refresh Scheduler initialized",
|
||||
check_interval_seconds=check_interval_seconds,
|
||||
max_concurrent_jobs=max_concurrent_jobs
|
||||
)
|
||||
|
||||
async def start(self):
|
||||
"""Start the scheduler background task"""
|
||||
if self._running:
|
||||
logger.warning("POI Refresh Scheduler already running")
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._run_scheduler())
|
||||
|
||||
logger.info("POI Refresh Scheduler started")
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the scheduler background task"""
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
self._running = False
|
||||
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
logger.info("POI Refresh Scheduler stopped")
|
||||
|
||||
async def _run_scheduler(self):
|
||||
"""Main scheduler loop"""
|
||||
logger.info("POI Refresh Scheduler loop started")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await self._process_cycle()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI refresh scheduler cycle failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
# Wait for next cycle
|
||||
try:
|
||||
await asyncio.sleep(self.check_interval_seconds)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
|
||||
logger.info("POI Refresh Scheduler loop ended")
|
||||
|
||||
async def _process_cycle(self):
|
||||
"""Process one scheduler cycle"""
|
||||
cycle_start = datetime.now(timezone.utc)
|
||||
|
||||
logger.debug(
|
||||
"POI refresh scheduler cycle started",
|
||||
timestamp=cycle_start.isoformat()
|
||||
)
|
||||
|
||||
# Process pending jobs
|
||||
result = await self.poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=self.max_concurrent_jobs
|
||||
)
|
||||
|
||||
cycle_end = datetime.now(timezone.utc)
|
||||
cycle_duration = (cycle_end - cycle_start).total_seconds()
|
||||
|
||||
if result["total_jobs"] > 0:
|
||||
logger.info(
|
||||
"POI refresh scheduler cycle completed",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"],
|
||||
cycle_duration_seconds=cycle_duration
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"POI refresh scheduler cycle completed (no jobs)",
|
||||
cycle_duration_seconds=cycle_duration
|
||||
)
|
||||
|
||||
async def trigger_immediate_check(self):
|
||||
"""Trigger an immediate check for pending jobs (bypasses schedule)"""
|
||||
logger.info("POI refresh scheduler immediate check triggered")
|
||||
|
||||
try:
|
||||
result = await self.poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=self.max_concurrent_jobs
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI refresh scheduler immediate check completed",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"]
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI refresh scheduler immediate check failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Check if scheduler is running"""
|
||||
return self._running
|
||||
|
||||
|
||||
# Global scheduler instance
|
||||
_scheduler_instance: Optional[POIRefreshScheduler] = None
|
||||
|
||||
|
||||
def get_scheduler() -> POIRefreshScheduler:
|
||||
"""Get global scheduler instance (singleton)"""
|
||||
global _scheduler_instance
|
||||
|
||||
if _scheduler_instance is None:
|
||||
_scheduler_instance = POIRefreshScheduler()
|
||||
|
||||
return _scheduler_instance
|
||||
|
||||
|
||||
async def start_scheduler():
|
||||
"""Start global POI refresh scheduler"""
|
||||
scheduler = get_scheduler()
|
||||
await scheduler.start()
|
||||
|
||||
|
||||
async def stop_scheduler():
|
||||
"""Stop global POI refresh scheduler"""
|
||||
scheduler = get_scheduler()
|
||||
await scheduler.stop()
|
||||
190
services/external/app/services/tenant_deletion_service.py
vendored
Normal file
190
services/external/app/services/tenant_deletion_service.py
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
# services/external/app/services/tenant_deletion_service.py
|
||||
"""
|
||||
Tenant Data Deletion Service for External Service
|
||||
Handles deletion of tenant-specific data for the External service
|
||||
"""
|
||||
|
||||
from typing import Dict
|
||||
from sqlalchemy import select, func, delete
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
import structlog
|
||||
|
||||
from shared.services.tenant_deletion import (
|
||||
BaseTenantDataDeletionService,
|
||||
TenantDataDeletionResult
|
||||
)
|
||||
from app.models import AuditLog, WeatherData
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class ExternalTenantDeletionService(BaseTenantDataDeletionService):
|
||||
"""
|
||||
Service for deleting tenant-specific external data
|
||||
|
||||
IMPORTANT NOTE:
|
||||
The External service primarily stores SHARED city-wide data (weather, traffic)
|
||||
that is NOT tenant-specific. This data is used by ALL tenants and should
|
||||
NOT be deleted when a single tenant is removed.
|
||||
|
||||
Tenant-specific data in this service:
|
||||
- Audit logs (tenant_id)
|
||||
- Tenant-specific weather data (if any exists with tenant_id)
|
||||
|
||||
City-wide data that is NOT deleted (shared across all tenants):
|
||||
- CityWeatherData (no tenant_id - city-wide data)
|
||||
- CityTrafficData (no tenant_id - city-wide data)
|
||||
- TrafficData (no tenant_id - city-wide data)
|
||||
- TrafficMeasurementPoint (no tenant_id - reference data)
|
||||
- WeatherForecast (no tenant_id - city-wide forecasts)
|
||||
"""
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
self.service_name = "external"
|
||||
|
||||
async def get_tenant_data_preview(self, tenant_id: str) -> Dict[str, int]:
|
||||
"""
|
||||
Get counts of what would be deleted for a tenant (dry-run)
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant ID to preview deletion for
|
||||
|
||||
Returns:
|
||||
Dictionary with entity names and their counts
|
||||
"""
|
||||
logger.info("external.tenant_deletion.preview", tenant_id=tenant_id)
|
||||
preview = {}
|
||||
|
||||
try:
|
||||
# Count tenant-specific weather data (if any)
|
||||
weather_count = await self.db.scalar(
|
||||
select(func.count(WeatherData.id)).where(
|
||||
WeatherData.tenant_id == tenant_id
|
||||
)
|
||||
)
|
||||
preview["tenant_weather_data"] = weather_count or 0
|
||||
|
||||
# Count audit logs
|
||||
audit_count = await self.db.scalar(
|
||||
select(func.count(AuditLog.id)).where(
|
||||
AuditLog.tenant_id == tenant_id
|
||||
)
|
||||
)
|
||||
preview["audit_logs"] = audit_count or 0
|
||||
|
||||
# Add informational message about shared data
|
||||
logger.info(
|
||||
"external.tenant_deletion.preview_complete",
|
||||
tenant_id=tenant_id,
|
||||
preview=preview,
|
||||
note="City-wide data (traffic, weather) is shared and will NOT be deleted"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"external.tenant_deletion.preview_error",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
return preview
|
||||
|
||||
async def delete_tenant_data(self, tenant_id: str) -> TenantDataDeletionResult:
|
||||
"""
|
||||
Permanently delete tenant-specific external data
|
||||
|
||||
NOTE: This only deletes tenant-specific data. City-wide shared data
|
||||
(CityWeatherData, CityTrafficData, TrafficData, etc.) is intentionally
|
||||
preserved as it's used by all tenants.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant ID to delete data for
|
||||
|
||||
Returns:
|
||||
TenantDataDeletionResult with deletion counts and any errors
|
||||
"""
|
||||
logger.info(
|
||||
"external.tenant_deletion.started",
|
||||
tenant_id=tenant_id,
|
||||
note="Only deleting tenant-specific data; city-wide data preserved"
|
||||
)
|
||||
result = TenantDataDeletionResult(tenant_id=tenant_id, service_name=self.service_name)
|
||||
|
||||
try:
|
||||
# Step 1: Delete tenant-specific weather data (if any exists)
|
||||
logger.info("external.tenant_deletion.deleting_weather_data", tenant_id=tenant_id)
|
||||
weather_result = await self.db.execute(
|
||||
delete(WeatherData).where(
|
||||
WeatherData.tenant_id == tenant_id
|
||||
)
|
||||
)
|
||||
result.deleted_counts["tenant_weather_data"] = weather_result.rowcount
|
||||
logger.info(
|
||||
"external.tenant_deletion.weather_data_deleted",
|
||||
tenant_id=tenant_id,
|
||||
count=weather_result.rowcount
|
||||
)
|
||||
|
||||
# Step 2: Delete audit logs
|
||||
logger.info("external.tenant_deletion.deleting_audit_logs", tenant_id=tenant_id)
|
||||
audit_result = await self.db.execute(
|
||||
delete(AuditLog).where(
|
||||
AuditLog.tenant_id == tenant_id
|
||||
)
|
||||
)
|
||||
result.deleted_counts["audit_logs"] = audit_result.rowcount
|
||||
logger.info(
|
||||
"external.tenant_deletion.audit_logs_deleted",
|
||||
tenant_id=tenant_id,
|
||||
count=audit_result.rowcount
|
||||
)
|
||||
|
||||
# Commit the transaction
|
||||
await self.db.commit()
|
||||
|
||||
# Calculate total deleted
|
||||
total_deleted = sum(result.deleted_counts.values())
|
||||
|
||||
# Add informational note about preserved data
|
||||
result.deleted_counts["_note"] = "City-wide data preserved (shared across tenants)"
|
||||
|
||||
logger.info(
|
||||
"external.tenant_deletion.completed",
|
||||
tenant_id=tenant_id,
|
||||
total_deleted=total_deleted,
|
||||
breakdown=result.deleted_counts,
|
||||
preserved_data="CityWeatherData, CityTrafficData, TrafficData (shared)"
|
||||
)
|
||||
|
||||
result.success = True
|
||||
|
||||
except Exception as e:
|
||||
await self.db.rollback()
|
||||
error_msg = f"Failed to delete external data for tenant {tenant_id}: {str(e)}"
|
||||
logger.error(
|
||||
"external.tenant_deletion.failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
result.errors.append(error_msg)
|
||||
result.success = False
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_external_tenant_deletion_service(db: AsyncSession) -> ExternalTenantDeletionService:
|
||||
"""
|
||||
Factory function to create ExternalTenantDeletionService instance
|
||||
|
||||
Args:
|
||||
db: AsyncSession database session
|
||||
|
||||
Returns:
|
||||
ExternalTenantDeletionService instance
|
||||
"""
|
||||
return ExternalTenantDeletionService(db)
|
||||
411
services/external/app/services/traffic_service.py
vendored
Normal file
411
services/external/app/services/traffic_service.py
vendored
Normal file
@@ -0,0 +1,411 @@
|
||||
# ================================================================
|
||||
# services/data/app/services/traffic_service.py
|
||||
# ================================================================
|
||||
"""
|
||||
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.external.apis.traffic import UniversalTrafficClient
|
||||
from app.models.traffic import TrafficData
|
||||
from app.repositories.traffic_repository import TrafficRepository
|
||||
|
||||
logger = structlog.get_logger()
|
||||
from app.core.database import database_manager
|
||||
|
||||
class TrafficService:
|
||||
"""
|
||||
Abstracted traffic service providing unified interface for traffic data
|
||||
Routes requests to appropriate city-specific clients automatically
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.universal_client = UniversalTrafficClient()
|
||||
self.database_manager = database_manager
|
||||
|
||||
async def get_current_traffic(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: Optional[str] = None,
|
||||
force_refresh: bool = False,
|
||||
cache_duration_minutes: int = 5
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get current traffic data with intelligent cache-first strategy
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
tenant_id: Optional tenant identifier for logging/analytics
|
||||
force_refresh: If True, bypass cache and fetch fresh data
|
||||
cache_duration_minutes: How long to consider cached data valid (default: 5 minutes)
|
||||
|
||||
Returns:
|
||||
Dict with current traffic data or None if not available
|
||||
"""
|
||||
try:
|
||||
logger.info("Getting current traffic data",
|
||||
lat=latitude, lon=longitude, tenant_id=tenant_id,
|
||||
force_refresh=force_refresh, cache_duration=cache_duration_minutes)
|
||||
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
# Step 1: Check database cache first (unless force_refresh)
|
||||
if not force_refresh:
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
# Get recent traffic data (within cache_duration_minutes)
|
||||
from datetime import timedelta
|
||||
cache_cutoff = datetime.now() - timedelta(minutes=cache_duration_minutes)
|
||||
|
||||
cached_records = await traffic_repo.get_recent_by_location(
|
||||
latitude, longitude, cache_cutoff, tenant_id
|
||||
)
|
||||
|
||||
if cached_records:
|
||||
logger.info("Current traffic data found in cache",
|
||||
count=len(cached_records), cache_age_minutes=cache_duration_minutes)
|
||||
# Return the most recent cached record
|
||||
latest_record = max(cached_records, key=lambda x: x.date)
|
||||
cached_data = self._convert_db_record_to_dict(latest_record)
|
||||
|
||||
# Add cache metadata
|
||||
cached_data['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'data_source': 'cache',
|
||||
'cache_age_minutes': (datetime.now() - latest_record.date).total_seconds() / 60
|
||||
}
|
||||
|
||||
return cached_data
|
||||
|
||||
# Step 2: Fetch fresh data from external API
|
||||
logger.info("Fetching fresh current traffic data" +
|
||||
(" (force refresh)" if force_refresh else " (no valid cache)"))
|
||||
|
||||
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
|
||||
|
||||
if traffic_data:
|
||||
# Add service metadata
|
||||
traffic_data['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'data_source': 'fresh_api'
|
||||
}
|
||||
|
||||
# Step 3: Store fresh data in cache for future requests
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
# Store the fresh data as a single record
|
||||
stored_count = await traffic_repo.store_traffic_data_batch(
|
||||
[traffic_data], location_id, tenant_id
|
||||
)
|
||||
logger.info("Stored fresh current traffic data in cache",
|
||||
stored_records=stored_count)
|
||||
except Exception as cache_error:
|
||||
logger.warning("Failed to cache current traffic data", error=str(cache_error))
|
||||
|
||||
logger.info("Successfully retrieved fresh current traffic data",
|
||||
lat=latitude, lon=longitude,
|
||||
source=traffic_data.get('source', 'unknown'))
|
||||
|
||||
return traffic_data
|
||||
else:
|
||||
logger.warning("No current traffic data available",
|
||||
lat=latitude, lon=longitude)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting current traffic data",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return None
|
||||
|
||||
async def get_historical_traffic(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get historical traffic data for any supported location with database storage
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
start_date: Start date for historical data
|
||||
end_date: End date for historical data
|
||||
tenant_id: Optional tenant identifier
|
||||
|
||||
Returns:
|
||||
List of historical traffic data dictionaries
|
||||
"""
|
||||
try:
|
||||
logger.info("Getting historical traffic data",
|
||||
lat=latitude, lon=longitude,
|
||||
start=start_date, end=end_date, tenant_id=tenant_id)
|
||||
|
||||
# Validate date range
|
||||
if start_date >= end_date:
|
||||
logger.warning("Invalid date range", start=start_date, end=end_date)
|
||||
return []
|
||||
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
# Check database first using the repository
|
||||
db_records = await traffic_repo.get_by_location_and_date_range(
|
||||
latitude, longitude, start_date, end_date, tenant_id
|
||||
)
|
||||
|
||||
if db_records:
|
||||
logger.info("Historical traffic data found in database",
|
||||
count=len(db_records))
|
||||
return [self._convert_db_record_to_dict(record) for record in db_records]
|
||||
|
||||
# Delegate to universal client if not in DB
|
||||
traffic_data = await self.universal_client.get_historical_traffic(
|
||||
latitude, longitude, start_date, end_date
|
||||
)
|
||||
|
||||
if traffic_data:
|
||||
# Add service metadata to each record
|
||||
for record in traffic_data:
|
||||
record['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'date_range': {
|
||||
'start': start_date.isoformat(),
|
||||
'end': end_date.isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
# Store in database using the repository
|
||||
stored_count = await traffic_repo.store_traffic_data_batch(
|
||||
traffic_data, location_id, tenant_id
|
||||
)
|
||||
logger.info("Traffic data stored for re-training",
|
||||
fetched=len(traffic_data), stored=stored_count,
|
||||
location=location_id)
|
||||
|
||||
logger.info("Successfully retrieved historical traffic data",
|
||||
lat=latitude, lon=longitude, records=len(traffic_data))
|
||||
|
||||
return traffic_data
|
||||
else:
|
||||
logger.info("No historical traffic data available",
|
||||
lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting historical traffic data",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
|
||||
"""Convert database record to dictionary format"""
|
||||
return {
|
||||
'date': record.date,
|
||||
'traffic_volume': record.traffic_volume,
|
||||
'pedestrian_count': record.pedestrian_count,
|
||||
'congestion_level': record.congestion_level,
|
||||
'average_speed': record.average_speed,
|
||||
'source': record.source,
|
||||
'location_id': record.location_id,
|
||||
'raw_data': record.raw_data
|
||||
}
|
||||
|
||||
async def get_traffic_events(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
radius_km: float = 5.0,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get traffic events and incidents for any supported location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
radius_km: Search radius in kilometers
|
||||
tenant_id: Optional tenant identifier
|
||||
|
||||
Returns:
|
||||
List of traffic events
|
||||
"""
|
||||
try:
|
||||
logger.info("Getting traffic events",
|
||||
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
|
||||
|
||||
# Delegate to universal client
|
||||
events = await self.universal_client.get_events(latitude, longitude, radius_km)
|
||||
|
||||
# Add metadata to events
|
||||
for event in events:
|
||||
event['service_metadata'] = {
|
||||
'request_timestamp': datetime.now().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'service_version': '2.0',
|
||||
'query_location': {'latitude': latitude, 'longitude': longitude},
|
||||
'search_radius_km': radius_km
|
||||
}
|
||||
|
||||
logger.info("Retrieved traffic events",
|
||||
lat=latitude, lon=longitude, events=len(events))
|
||||
|
||||
return events
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting traffic events",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return []
|
||||
|
||||
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about traffic data availability for location
|
||||
|
||||
Args:
|
||||
latitude: Query location latitude
|
||||
longitude: Query location longitude
|
||||
|
||||
Returns:
|
||||
Dict with location support information
|
||||
"""
|
||||
try:
|
||||
info = self.universal_client.get_location_info(latitude, longitude)
|
||||
|
||||
# Add service layer information
|
||||
info['service_layer'] = {
|
||||
'version': '2.0',
|
||||
'abstraction_level': 'universal',
|
||||
'supported_operations': [
|
||||
'current_traffic',
|
||||
'historical_traffic',
|
||||
'traffic_events',
|
||||
'bulk_requests'
|
||||
]
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting location info",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return {
|
||||
'supported': False,
|
||||
'error': str(e),
|
||||
'service_layer': {'version': '2.0'}
|
||||
}
|
||||
|
||||
async def get_stored_traffic_for_training(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Retrieve stored traffic data specifically for training purposes"""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
records = await traffic_repo.get_historical_traffic_for_training(
|
||||
latitude, longitude, start_date, end_date
|
||||
)
|
||||
|
||||
# Convert to training format
|
||||
training_data = []
|
||||
for record in records:
|
||||
training_data.append({
|
||||
'date': record.date,
|
||||
'traffic_volume': record.traffic_volume,
|
||||
'pedestrian_count': record.pedestrian_count,
|
||||
'congestion_level': record.congestion_level,
|
||||
'average_speed': record.average_speed,
|
||||
'location_id': record.location_id,
|
||||
'source': record.source,
|
||||
'measurement_point_id': record.raw_data # Contains additional metadata
|
||||
})
|
||||
|
||||
logger.info(f"Retrieved {len(training_data)} traffic records for training",
|
||||
location_id=f"{latitude:.4f},{longitude:.4f}", start=start_date, end=end_date)
|
||||
|
||||
return training_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to retrieve traffic data for training",
|
||||
error=str(e), location_id=f"{latitude:.4f},{longitude:.4f}")
|
||||
return []
|
||||
|
||||
# ============= UNIFIED CONVENIENCE METHODS =============
|
||||
|
||||
async def get_current_traffic_fresh(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Get current traffic data, forcing fresh API call (bypass cache)"""
|
||||
return await self.get_current_traffic(
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
tenant_id=tenant_id,
|
||||
force_refresh=True
|
||||
)
|
||||
|
||||
async def get_historical_traffic_fresh(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Get historical traffic data, forcing fresh API call (bypass cache)"""
|
||||
# For historical data, we can implement force_refresh logic
|
||||
# For now, historical already has good cache-first logic
|
||||
return await self.get_historical_traffic(
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
async def clear_traffic_cache(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: Optional[str] = None
|
||||
) -> bool:
|
||||
"""Clear cached traffic data for a specific location"""
|
||||
try:
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
traffic_repo = TrafficRepository(session)
|
||||
# This would need a new repository method to delete by location
|
||||
# For now, just log the intent
|
||||
logger.info("Traffic cache clear requested",
|
||||
location_id=location_id, tenant_id=tenant_id)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error clearing traffic cache",
|
||||
lat=latitude, lon=longitude, error=str(e))
|
||||
return False
|
||||
219
services/external/app/services/weather_service.py
vendored
Normal file
219
services/external/app/services/weather_service.py
vendored
Normal file
@@ -0,0 +1,219 @@
|
||||
# services/data/app/services/weather_service.py - REVISED VERSION
|
||||
|
||||
"""Weather data service with repository pattern"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.models.weather import WeatherData, WeatherForecast
|
||||
from app.external.aemet import AEMETClient
|
||||
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse, WeatherForecastAPIResponse, HourlyForecastResponse
|
||||
from app.repositories.weather_repository import WeatherRepository
|
||||
|
||||
logger = structlog.get_logger()
|
||||
from app.core.database import database_manager
|
||||
|
||||
class WeatherService:
|
||||
|
||||
def __init__(self):
|
||||
self.aemet_client = AEMETClient()
|
||||
self.database_manager = database_manager
|
||||
|
||||
async def get_current_weather(self, latitude: float, longitude: float) -> Optional[WeatherDataResponse]:
|
||||
"""Get current weather for location with graceful failure handling"""
|
||||
try:
|
||||
logger.debug("Getting current weather", lat=latitude, lon=longitude)
|
||||
weather_data = await self.aemet_client.get_current_weather(latitude, longitude)
|
||||
|
||||
if weather_data:
|
||||
logger.debug("Weather data received", source=weather_data.get('source'))
|
||||
return WeatherDataResponse(**weather_data)
|
||||
else:
|
||||
logger.warning("No weather data received from AEMET client - providing service unavailable response")
|
||||
# Return a response indicating service unavailable rather than None
|
||||
return WeatherDataResponse(
|
||||
date=datetime.utcnow().isoformat(),
|
||||
temperature=None,
|
||||
precipitation=None,
|
||||
humidity=None,
|
||||
wind_speed=None,
|
||||
pressure=None,
|
||||
description="Servicio meteorológico temporalmente no disponible",
|
||||
source="unavailable"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get current weather", error=str(e), lat=latitude, lon=longitude)
|
||||
# Return error response rather than None to prevent 404
|
||||
return WeatherDataResponse(
|
||||
date=datetime.utcnow().isoformat(),
|
||||
temperature=None,
|
||||
precipitation=None,
|
||||
humidity=None,
|
||||
wind_speed=None,
|
||||
pressure=None,
|
||||
description="Error al obtener datos meteorológicos",
|
||||
source="error"
|
||||
)
|
||||
|
||||
async def get_weather_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[Dict[str, Any]]:
|
||||
"""Get weather forecast for location - returns plain dicts"""
|
||||
try:
|
||||
logger.debug("Getting weather forecast", lat=latitude, lon=longitude, days=days)
|
||||
forecast_data = await self.aemet_client.get_forecast(latitude, longitude, days)
|
||||
|
||||
if forecast_data:
|
||||
logger.debug("Forecast data received", count=len(forecast_data))
|
||||
# Validate and normalize each forecast item
|
||||
valid_forecasts = []
|
||||
for item in forecast_data:
|
||||
try:
|
||||
if isinstance(item, dict):
|
||||
# Ensure required fields are present and convert to serializable format
|
||||
forecast_date = item.get("forecast_date", datetime.now())
|
||||
generated_at = item.get("generated_at", datetime.now())
|
||||
|
||||
forecast_item = {
|
||||
"forecast_date": forecast_date.isoformat() if isinstance(forecast_date, datetime) else str(forecast_date),
|
||||
"generated_at": generated_at.isoformat() if isinstance(generated_at, datetime) else str(generated_at),
|
||||
"temperature": float(item.get("temperature", 15.0)),
|
||||
"precipitation": float(item.get("precipitation", 0.0)),
|
||||
"humidity": float(item.get("humidity", 50.0)),
|
||||
"wind_speed": float(item.get("wind_speed", 10.0)),
|
||||
"description": str(item.get("description", "Variable")),
|
||||
"source": str(item.get("source", "unknown"))
|
||||
}
|
||||
valid_forecasts.append(forecast_item)
|
||||
else:
|
||||
logger.warning("Invalid forecast item type", item_type=type(item))
|
||||
except Exception as item_error:
|
||||
logger.warning("Error processing forecast item", error=str(item_error), item=item)
|
||||
continue
|
||||
|
||||
logger.debug("Valid forecasts processed", count=len(valid_forecasts))
|
||||
return valid_forecasts
|
||||
else:
|
||||
logger.warning("No forecast data received from AEMET client")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get weather forecast", error=str(e), lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
async def get_hourly_forecast(self, latitude: float, longitude: float, hours: int = 48) -> List[HourlyForecastResponse]:
|
||||
"""Get hourly weather forecast for location"""
|
||||
try:
|
||||
logger.debug("Getting hourly weather forecast", lat=latitude, lon=longitude, hours=hours)
|
||||
hourly_data = await self.aemet_client.get_hourly_forecast(latitude, longitude, hours)
|
||||
|
||||
if hourly_data:
|
||||
logger.debug("Hourly forecast data received", count=len(hourly_data))
|
||||
# Validate each hourly forecast item before creating response
|
||||
valid_forecasts = []
|
||||
for item in hourly_data:
|
||||
try:
|
||||
if isinstance(item, dict):
|
||||
# Ensure required fields are present
|
||||
hourly_item = {
|
||||
"forecast_datetime": item.get("forecast_datetime", datetime.now()),
|
||||
"generated_at": item.get("generated_at", datetime.now()),
|
||||
"temperature": float(item.get("temperature", 15.0)),
|
||||
"precipitation": float(item.get("precipitation", 0.0)),
|
||||
"humidity": float(item.get("humidity", 50.0)),
|
||||
"wind_speed": float(item.get("wind_speed", 10.0)),
|
||||
"description": str(item.get("description", "Variable")),
|
||||
"source": str(item.get("source", "unknown")),
|
||||
"hour": int(item.get("hour", 0))
|
||||
}
|
||||
valid_forecasts.append(HourlyForecastResponse(**hourly_item))
|
||||
else:
|
||||
logger.warning("Invalid hourly forecast item type", item_type=type(item))
|
||||
except Exception as item_error:
|
||||
logger.warning("Error processing hourly forecast item", error=str(item_error), item=item)
|
||||
continue
|
||||
|
||||
logger.debug("Valid hourly forecasts processed", count=len(valid_forecasts))
|
||||
return valid_forecasts
|
||||
else:
|
||||
logger.warning("No hourly forecast data received from AEMET client")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get hourly weather forecast", error=str(e), lat=latitude, lon=longitude)
|
||||
return []
|
||||
|
||||
async def get_historical_weather(self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[WeatherDataResponse]:
|
||||
"""Get historical weather data"""
|
||||
try:
|
||||
logger.debug("Getting historical weather",
|
||||
lat=latitude, lon=longitude,
|
||||
start=start_date, end=end_date)
|
||||
|
||||
location_id = f"{latitude:.4f},{longitude:.4f}"
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
weather_repository = WeatherRepository(session)
|
||||
# Use the repository to get data from the database
|
||||
db_records = await weather_repository.get_historical_weather(
|
||||
location_id,
|
||||
start_date,
|
||||
end_date
|
||||
)
|
||||
|
||||
if db_records:
|
||||
logger.debug("Historical data found in database", count=len(db_records))
|
||||
return [WeatherDataResponse(
|
||||
date=record.date,
|
||||
temperature=record.temperature,
|
||||
precipitation=record.precipitation,
|
||||
humidity=record.humidity,
|
||||
wind_speed=record.wind_speed,
|
||||
pressure=record.pressure,
|
||||
description=record.description,
|
||||
source=record.source
|
||||
) for record in db_records]
|
||||
|
||||
# If not in database, fetch from API and store
|
||||
logger.debug("Fetching historical data from AEMET API")
|
||||
weather_data = await self.aemet_client.get_historical_weather(
|
||||
latitude, longitude, start_date, end_date
|
||||
)
|
||||
|
||||
if weather_data:
|
||||
# Use the repository to store the new data
|
||||
records_to_store = [{
|
||||
"location_id": location_id,
|
||||
"city": "Madrid", # Default city for AEMET data
|
||||
"date": data.get('date', datetime.now()),
|
||||
"temperature": data.get('temperature'),
|
||||
"precipitation": data.get('precipitation'),
|
||||
"humidity": data.get('humidity'),
|
||||
"wind_speed": data.get('wind_speed'),
|
||||
"pressure": data.get('pressure'),
|
||||
"description": data.get('description'),
|
||||
"source": "aemet",
|
||||
"data_type": "historical",
|
||||
"raw_data": data, # Pass as dict, not string
|
||||
"tenant_id": None
|
||||
} for data in weather_data]
|
||||
|
||||
async with self.database_manager.get_session() as session:
|
||||
weather_repository = WeatherRepository(session)
|
||||
await weather_repository.bulk_create_weather_data(records_to_store)
|
||||
|
||||
logger.debug("Historical data stored in database", count=len(weather_data))
|
||||
|
||||
return [WeatherDataResponse(**item) for item in weather_data]
|
||||
else:
|
||||
logger.warning("No historical weather data received")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get historical weather", error=str(e))
|
||||
return []
|
||||
342
services/external/app/utils/calendar_suggester.py
vendored
Normal file
342
services/external/app/utils/calendar_suggester.py
vendored
Normal file
@@ -0,0 +1,342 @@
|
||||
"""
|
||||
Calendar Suggester Utility
|
||||
|
||||
Provides intelligent school calendar suggestions based on POI detection data,
|
||||
tenant location, and heuristics optimized for bakery demand forecasting.
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict, List, Any, Tuple
|
||||
from datetime import datetime, date, timezone
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CalendarSuggester:
|
||||
"""
|
||||
Suggests appropriate school calendars for tenants based on location context.
|
||||
|
||||
Uses POI detection data, proximity analysis, and bakery-specific heuristics
|
||||
to provide intelligent calendar recommendations with confidence scores.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
|
||||
def suggest_calendar_for_tenant(
|
||||
self,
|
||||
city_id: str,
|
||||
available_calendars: List[Dict[str, Any]],
|
||||
poi_context: Optional[Dict[str, Any]] = None,
|
||||
tenant_data: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Suggest the most appropriate calendar for a tenant.
|
||||
|
||||
Args:
|
||||
city_id: Normalized city ID (e.g., "madrid")
|
||||
available_calendars: List of available school calendars for the city
|
||||
poi_context: Optional POI detection results including school data
|
||||
tenant_data: Optional tenant information (location, etc.)
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- suggested_calendar_id: UUID of suggested calendar or None
|
||||
- calendar_name: Name of suggested calendar
|
||||
- confidence: Float 0.0-1.0 confidence score
|
||||
- reasoning: List of reasoning steps
|
||||
- fallback_calendars: Alternative suggestions
|
||||
- should_assign: Boolean recommendation to auto-assign
|
||||
"""
|
||||
if not available_calendars:
|
||||
return self._no_calendars_available(city_id)
|
||||
|
||||
# Get current academic year
|
||||
academic_year = self._get_current_academic_year()
|
||||
|
||||
# Filter calendars for current academic year
|
||||
current_year_calendars = [
|
||||
cal for cal in available_calendars
|
||||
if cal.get("academic_year") == academic_year
|
||||
]
|
||||
|
||||
if not current_year_calendars:
|
||||
# Fallback to any calendar if current year not available
|
||||
current_year_calendars = available_calendars
|
||||
self.logger.warning(
|
||||
"No calendars for current academic year, using all available",
|
||||
city_id=city_id,
|
||||
academic_year=academic_year
|
||||
)
|
||||
|
||||
# Analyze POI context if available
|
||||
school_analysis = self._analyze_schools_from_poi(poi_context) if poi_context else None
|
||||
|
||||
# Apply bakery-specific heuristics
|
||||
suggestion = self._apply_suggestion_heuristics(
|
||||
current_year_calendars,
|
||||
school_analysis,
|
||||
city_id
|
||||
)
|
||||
|
||||
return suggestion
|
||||
|
||||
def _get_current_academic_year(self) -> str:
|
||||
"""
|
||||
Determine current academic year based on date.
|
||||
|
||||
Academic year runs September to June (Spain):
|
||||
- Jan-Aug: Previous year (e.g., 2024-2025)
|
||||
- Sep-Dec: Current year (e.g., 2025-2026)
|
||||
|
||||
Returns:
|
||||
Academic year string (e.g., "2024-2025")
|
||||
"""
|
||||
today = date.today()
|
||||
year = today.year
|
||||
|
||||
# Academic year starts in September
|
||||
if today.month >= 9: # September onwards
|
||||
return f"{year}-{year + 1}"
|
||||
else: # January-August
|
||||
return f"{year - 1}-{year}"
|
||||
|
||||
def _analyze_schools_from_poi(
|
||||
self,
|
||||
poi_context: Dict[str, Any]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze school POIs to infer school type preferences.
|
||||
|
||||
Args:
|
||||
poi_context: POI detection results
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- has_schools_nearby: Boolean
|
||||
- school_count: Int count of schools
|
||||
- nearest_distance: Float distance to nearest school (meters)
|
||||
- proximity_score: Float proximity score
|
||||
- school_names: List of detected school names
|
||||
"""
|
||||
try:
|
||||
poi_results = poi_context.get("poi_detection_results", {})
|
||||
schools_data = poi_results.get("schools", {})
|
||||
|
||||
if not schools_data:
|
||||
return None
|
||||
|
||||
school_pois = schools_data.get("pois", [])
|
||||
school_count = len(school_pois)
|
||||
|
||||
if school_count == 0:
|
||||
return None
|
||||
|
||||
# Extract school details
|
||||
school_names = [
|
||||
poi.get("name", "Unknown School")
|
||||
for poi in school_pois
|
||||
if poi.get("name")
|
||||
]
|
||||
|
||||
# Get proximity metrics
|
||||
features = schools_data.get("features", {})
|
||||
proximity_score = features.get("proximity_score", 0.0)
|
||||
|
||||
# Calculate nearest distance (approximate from POI data)
|
||||
nearest_distance = None
|
||||
if school_pois:
|
||||
# If we have POIs, estimate nearest distance
|
||||
# This is approximate - exact calculation would require tenant coords
|
||||
nearest_distance = 100.0 # Default assumption if schools detected
|
||||
|
||||
return {
|
||||
"has_schools_nearby": True,
|
||||
"school_count": school_count,
|
||||
"nearest_distance": nearest_distance,
|
||||
"proximity_score": proximity_score,
|
||||
"school_names": school_names
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(
|
||||
"Failed to analyze schools from POI",
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
|
||||
def _apply_suggestion_heuristics(
|
||||
self,
|
||||
calendars: List[Dict[str, Any]],
|
||||
school_analysis: Optional[Dict[str, Any]],
|
||||
city_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply heuristics to suggest best calendar.
|
||||
|
||||
Bakery-specific heuristics:
|
||||
1. If schools detected nearby -> Prefer primary (stronger morning rush)
|
||||
2. If no schools detected -> Still suggest primary (more common, safer default)
|
||||
3. Primary schools have stronger impact on bakery traffic
|
||||
|
||||
Args:
|
||||
calendars: List of available calendars
|
||||
school_analysis: Analysis of nearby schools
|
||||
city_id: City identifier
|
||||
|
||||
Returns:
|
||||
Suggestion dict with confidence and reasoning
|
||||
"""
|
||||
reasoning = []
|
||||
confidence = 0.0
|
||||
|
||||
# Separate calendars by type
|
||||
primary_calendars = [c for c in calendars if c.get("school_type") == "primary"]
|
||||
secondary_calendars = [c for c in calendars if c.get("school_type") == "secondary"]
|
||||
other_calendars = [c for c in calendars if c.get("school_type") not in ["primary", "secondary"]]
|
||||
|
||||
# Heuristic 1: Schools detected nearby
|
||||
if school_analysis and school_analysis.get("has_schools_nearby"):
|
||||
school_count = school_analysis.get("school_count", 0)
|
||||
proximity_score = school_analysis.get("proximity_score", 0.0)
|
||||
|
||||
reasoning.append(f"Detected {school_count} schools nearby (proximity score: {proximity_score:.2f})")
|
||||
|
||||
if primary_calendars:
|
||||
suggested = primary_calendars[0]
|
||||
confidence = min(0.85, 0.65 + (proximity_score * 0.1)) # 65-85% confidence
|
||||
reasoning.append("Primary schools create strong morning rush (7:30-9am drop-off)")
|
||||
reasoning.append("Primary calendars recommended for bakeries near schools")
|
||||
elif secondary_calendars:
|
||||
suggested = secondary_calendars[0]
|
||||
confidence = 0.70
|
||||
reasoning.append("Secondary school calendars available (later morning start)")
|
||||
else:
|
||||
suggested = calendars[0]
|
||||
confidence = 0.50
|
||||
reasoning.append("Using available calendar (school type not specified)")
|
||||
|
||||
# Heuristic 2: No schools detected
|
||||
else:
|
||||
reasoning.append("No schools detected within 500m radius")
|
||||
|
||||
if primary_calendars:
|
||||
suggested = primary_calendars[0]
|
||||
confidence = 0.60 # Lower confidence without detected schools
|
||||
reasoning.append("Defaulting to primary calendar (more common, safer choice)")
|
||||
reasoning.append("Primary school holidays still affect general foot traffic")
|
||||
elif secondary_calendars:
|
||||
suggested = secondary_calendars[0]
|
||||
confidence = 0.55
|
||||
reasoning.append("Secondary calendar available as default")
|
||||
elif other_calendars:
|
||||
suggested = other_calendars[0]
|
||||
confidence = 0.50
|
||||
reasoning.append("Using available calendar")
|
||||
else:
|
||||
suggested = calendars[0]
|
||||
confidence = 0.45
|
||||
reasoning.append("No preferred calendar type available")
|
||||
|
||||
# Confidence adjustment based on school analysis quality
|
||||
if school_analysis:
|
||||
if school_analysis.get("school_count", 0) >= 3:
|
||||
confidence = min(1.0, confidence + 0.05) # Boost for multiple schools
|
||||
reasoning.append("High confidence: Multiple schools detected")
|
||||
|
||||
proximity = school_analysis.get("proximity_score", 0.0)
|
||||
if proximity > 2.0:
|
||||
confidence = min(1.0, confidence + 0.05) # Boost for close proximity
|
||||
reasoning.append("High confidence: Schools very close to bakery")
|
||||
|
||||
# Determine if we should auto-assign
|
||||
# Only auto-assign if confidence >= 75% AND schools detected
|
||||
should_auto_assign = (
|
||||
confidence >= 0.75 and
|
||||
school_analysis is not None and
|
||||
school_analysis.get("has_schools_nearby", False)
|
||||
)
|
||||
|
||||
# Build fallback suggestions
|
||||
fallback_calendars = []
|
||||
for cal in calendars:
|
||||
if cal.get("id") != suggested.get("id"):
|
||||
fallback_calendars.append({
|
||||
"calendar_id": str(cal.get("id")),
|
||||
"calendar_name": cal.get("name"),
|
||||
"school_type": cal.get("school_type"),
|
||||
"academic_year": cal.get("academic_year")
|
||||
})
|
||||
|
||||
return {
|
||||
"suggested_calendar_id": str(suggested.get("id")),
|
||||
"calendar_name": suggested.get("name"),
|
||||
"school_type": suggested.get("school_type"),
|
||||
"academic_year": suggested.get("academic_year"),
|
||||
"confidence": round(confidence, 2),
|
||||
"confidence_percentage": round(confidence * 100, 1),
|
||||
"reasoning": reasoning,
|
||||
"fallback_calendars": fallback_calendars[:2], # Top 2 alternatives
|
||||
"should_auto_assign": should_auto_assign,
|
||||
"school_analysis": school_analysis,
|
||||
"city_id": city_id
|
||||
}
|
||||
|
||||
def _no_calendars_available(self, city_id: str) -> Dict[str, Any]:
|
||||
"""Return response when no calendars available for city."""
|
||||
return {
|
||||
"suggested_calendar_id": None,
|
||||
"calendar_name": None,
|
||||
"school_type": None,
|
||||
"academic_year": None,
|
||||
"confidence": 0.0,
|
||||
"confidence_percentage": 0.0,
|
||||
"reasoning": [
|
||||
f"No school calendars configured for city: {city_id}",
|
||||
"Calendar assignment not possible at this time",
|
||||
"Location context created without calendar (can be added later)"
|
||||
],
|
||||
"fallback_calendars": [],
|
||||
"should_auto_assign": False,
|
||||
"school_analysis": None,
|
||||
"city_id": city_id
|
||||
}
|
||||
|
||||
def format_suggestion_for_admin(self, suggestion: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Format suggestion as human-readable text for admin UI.
|
||||
|
||||
Args:
|
||||
suggestion: Suggestion dict from suggest_calendar_for_tenant
|
||||
|
||||
Returns:
|
||||
Formatted string for display
|
||||
"""
|
||||
if not suggestion.get("suggested_calendar_id"):
|
||||
return f"⚠️ No calendars available for {suggestion.get('city_id', 'this city')}"
|
||||
|
||||
confidence_pct = suggestion.get("confidence_percentage", 0)
|
||||
calendar_name = suggestion.get("calendar_name", "Unknown")
|
||||
school_type = suggestion.get("school_type", "").capitalize()
|
||||
|
||||
# Confidence emoji
|
||||
if confidence_pct >= 80:
|
||||
emoji = "✅"
|
||||
elif confidence_pct >= 60:
|
||||
emoji = "📊"
|
||||
else:
|
||||
emoji = "💡"
|
||||
|
||||
text = f"{emoji} **Suggested**: {calendar_name}\n"
|
||||
text += f"**Type**: {school_type} | **Confidence**: {confidence_pct}%\n\n"
|
||||
text += "**Reasoning**:\n"
|
||||
|
||||
for reason in suggestion.get("reasoning", []):
|
||||
text += f"• {reason}\n"
|
||||
|
||||
if suggestion.get("fallback_calendars"):
|
||||
text += "\n**Alternatives**:\n"
|
||||
for alt in suggestion.get("fallback_calendars", [])[:2]:
|
||||
text += f"• {alt.get('calendar_name')} ({alt.get('school_type')})\n"
|
||||
|
||||
return text
|
||||
141
services/external/migrations/env.py
vendored
Normal file
141
services/external/migrations/env.py
vendored
Normal file
@@ -0,0 +1,141 @@
|
||||
"""Alembic environment configuration for external service"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from logging.config import fileConfig
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||
from alembic import context
|
||||
|
||||
# Add the service directory to the Python path
|
||||
service_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if service_path not in sys.path:
|
||||
sys.path.insert(0, service_path)
|
||||
|
||||
# Add shared modules to path
|
||||
shared_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "shared"))
|
||||
if shared_path not in sys.path:
|
||||
sys.path.insert(0, shared_path)
|
||||
|
||||
try:
|
||||
from app.core.config import settings
|
||||
from shared.database.base import Base
|
||||
|
||||
# Import all models to ensure they are registered with Base.metadata
|
||||
from app.models import * # noqa: F401, F403
|
||||
|
||||
except ImportError as e:
|
||||
print(f"Import error in migrations env.py: {e}")
|
||||
print(f"Current Python path: {sys.path}")
|
||||
raise
|
||||
|
||||
# this is the Alembic Config object
|
||||
config = context.config
|
||||
|
||||
# Determine service name from file path
|
||||
service_name = os.path.basename(os.path.dirname(os.path.dirname(__file__)))
|
||||
service_name_upper = service_name.upper().replace('-', '_')
|
||||
|
||||
# Set database URL from environment variables with multiple fallback strategies
|
||||
database_url = (
|
||||
os.getenv(f'{service_name_upper}_DATABASE_URL') or # Service-specific
|
||||
os.getenv('DATABASE_URL') # Generic fallback
|
||||
)
|
||||
|
||||
# If DATABASE_URL is not set, construct from individual components
|
||||
if not database_url:
|
||||
# Try generic PostgreSQL environment variables first
|
||||
postgres_host = os.getenv('POSTGRES_HOST')
|
||||
postgres_port = os.getenv('POSTGRES_PORT', '5432')
|
||||
postgres_db = os.getenv('POSTGRES_DB')
|
||||
postgres_user = os.getenv('POSTGRES_USER')
|
||||
postgres_password = os.getenv('POSTGRES_PASSWORD')
|
||||
|
||||
if all([postgres_host, postgres_db, postgres_user, postgres_password]):
|
||||
database_url = f"postgresql+asyncpg://{postgres_user}:{postgres_password}@{postgres_host}:{postgres_port}/{postgres_db}"
|
||||
else:
|
||||
# Try service-specific environment variables
|
||||
db_host = os.getenv(f'{service_name_upper}_DB_HOST', f'{service_name}-db-service')
|
||||
db_port = os.getenv(f'{service_name_upper}_DB_PORT', '5432')
|
||||
db_name = os.getenv(f'{service_name_upper}_DB_NAME', f'{service_name.replace("-", "_")}_db')
|
||||
db_user = os.getenv(f'{service_name_upper}_DB_USER', f'{service_name.replace("-", "_")}_user')
|
||||
db_password = os.getenv(f'{service_name_upper}_DB_PASSWORD')
|
||||
|
||||
if db_password:
|
||||
database_url = f"postgresql+asyncpg://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
else:
|
||||
# Final fallback: try to get from settings object
|
||||
try:
|
||||
database_url = getattr(settings, 'DATABASE_URL', None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not database_url:
|
||||
error_msg = f"ERROR: No database URL configured for {service_name} service"
|
||||
print(error_msg)
|
||||
raise Exception(error_msg)
|
||||
|
||||
config.set_main_option("sqlalchemy.url", database_url)
|
||||
|
||||
# Interpret the config file for Python logging
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# Set target metadata
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode."""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
compare_type=True,
|
||||
compare_server_default=True,
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
"""Execute migrations with the given connection."""
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
compare_type=True,
|
||||
compare_server_default=True,
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
"""Run migrations in 'online' mode with async support."""
|
||||
connectable = async_engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode."""
|
||||
asyncio.run(run_async_migrations())
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
26
services/external/migrations/script.py.mako
vendored
Normal file
26
services/external/migrations/script.py.mako
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
464
services/external/migrations/versions/20251110_1900_unified_initial_schema.py
vendored
Normal file
464
services/external/migrations/versions/20251110_1900_unified_initial_schema.py
vendored
Normal file
@@ -0,0 +1,464 @@
|
||||
"""unified_initial_schema
|
||||
|
||||
Revision ID: 00001
|
||||
Revises:
|
||||
Create Date: 2025-11-10 19:00:00.000000+01:00
|
||||
|
||||
Complete unified initial schema for External Service including:
|
||||
- Weather data collection (weather_data, weather_forecasts, city_weather_data)
|
||||
- Traffic data collection (traffic_data, traffic_measurement_points, traffic_background_jobs, city_traffic_data)
|
||||
- School calendars and location context (school_calendars, tenant_location_contexts)
|
||||
- POI detection system (tenant_poi_contexts, poi_refresh_jobs)
|
||||
- Audit logging (audit_logs)
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '00001'
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Create all tables for External Service"""
|
||||
|
||||
# ============================================================================
|
||||
# AUDIT LOGS
|
||||
# ============================================================================
|
||||
op.create_table(
|
||||
'audit_logs',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('user_id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('action', sa.String(length=100), nullable=False),
|
||||
sa.Column('resource_type', sa.String(length=100), nullable=False),
|
||||
sa.Column('resource_id', sa.String(length=255), nullable=True),
|
||||
sa.Column('severity', sa.String(length=20), nullable=False),
|
||||
sa.Column('service_name', sa.String(length=100), nullable=False),
|
||||
sa.Column('description', sa.Text(), nullable=True),
|
||||
sa.Column('changes', JSONB, nullable=True),
|
||||
sa.Column('audit_metadata', JSONB, nullable=True),
|
||||
sa.Column('ip_address', sa.String(length=45), nullable=True),
|
||||
sa.Column('user_agent', sa.Text(), nullable=True),
|
||||
sa.Column('endpoint', sa.String(length=255), nullable=True),
|
||||
sa.Column('method', sa.String(length=10), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_audit_resource_type_action', 'audit_logs', ['resource_type', 'action'])
|
||||
op.create_index('idx_audit_service_created', 'audit_logs', ['service_name', 'created_at'])
|
||||
op.create_index('idx_audit_severity_created', 'audit_logs', ['severity', 'created_at'])
|
||||
op.create_index('idx_audit_tenant_created', 'audit_logs', ['tenant_id', 'created_at'])
|
||||
op.create_index('idx_audit_user_created', 'audit_logs', ['user_id', 'created_at'])
|
||||
op.create_index(op.f('ix_audit_logs_action'), 'audit_logs', ['action'])
|
||||
op.create_index(op.f('ix_audit_logs_created_at'), 'audit_logs', ['created_at'])
|
||||
op.create_index(op.f('ix_audit_logs_resource_id'), 'audit_logs', ['resource_id'])
|
||||
op.create_index(op.f('ix_audit_logs_resource_type'), 'audit_logs', ['resource_type'])
|
||||
op.create_index(op.f('ix_audit_logs_service_name'), 'audit_logs', ['service_name'])
|
||||
op.create_index(op.f('ix_audit_logs_severity'), 'audit_logs', ['severity'])
|
||||
op.create_index(op.f('ix_audit_logs_tenant_id'), 'audit_logs', ['tenant_id'])
|
||||
op.create_index(op.f('ix_audit_logs_user_id'), 'audit_logs', ['user_id'])
|
||||
|
||||
# ============================================================================
|
||||
# WEATHER DATA
|
||||
# ============================================================================
|
||||
op.create_table(
|
||||
'city_weather_data',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('city_id', sa.String(length=50), nullable=False),
|
||||
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('temperature', sa.Float(), nullable=True),
|
||||
sa.Column('precipitation', sa.Float(), nullable=True),
|
||||
sa.Column('humidity', sa.Float(), nullable=True),
|
||||
sa.Column('wind_speed', sa.Float(), nullable=True),
|
||||
sa.Column('pressure', sa.Float(), nullable=True),
|
||||
sa.Column('description', sa.String(length=200), nullable=True),
|
||||
sa.Column('source', sa.String(length=50), nullable=False),
|
||||
sa.Column('raw_data', JSONB, nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_city_weather_lookup', 'city_weather_data', ['city_id', 'date'])
|
||||
op.create_index(op.f('ix_city_weather_data_city_id'), 'city_weather_data', ['city_id'])
|
||||
op.create_index(op.f('ix_city_weather_data_date'), 'city_weather_data', ['date'])
|
||||
|
||||
op.create_table(
|
||||
'weather_data',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('location_id', sa.String(length=100), nullable=False),
|
||||
sa.Column('city', sa.String(length=50), nullable=False),
|
||||
sa.Column('station_name', sa.String(length=200), nullable=True),
|
||||
sa.Column('latitude', sa.Float(), nullable=True),
|
||||
sa.Column('longitude', sa.Float(), nullable=True),
|
||||
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('forecast_date', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('temperature', sa.Float(), nullable=True),
|
||||
sa.Column('temperature_min', sa.Float(), nullable=True),
|
||||
sa.Column('temperature_max', sa.Float(), nullable=True),
|
||||
sa.Column('feels_like', sa.Float(), nullable=True),
|
||||
sa.Column('precipitation', sa.Float(), nullable=True),
|
||||
sa.Column('precipitation_probability', sa.Float(), nullable=True),
|
||||
sa.Column('humidity', sa.Float(), nullable=True),
|
||||
sa.Column('wind_speed', sa.Float(), nullable=True),
|
||||
sa.Column('wind_direction', sa.Float(), nullable=True),
|
||||
sa.Column('wind_gust', sa.Float(), nullable=True),
|
||||
sa.Column('pressure', sa.Float(), nullable=True),
|
||||
sa.Column('visibility', sa.Float(), nullable=True),
|
||||
sa.Column('uv_index', sa.Float(), nullable=True),
|
||||
sa.Column('cloud_cover', sa.Float(), nullable=True),
|
||||
sa.Column('condition', sa.String(length=100), nullable=True),
|
||||
sa.Column('description', sa.String(length=200), nullable=True),
|
||||
sa.Column('weather_code', sa.String(length=20), nullable=True),
|
||||
sa.Column('source', sa.String(length=50), nullable=False),
|
||||
sa.Column('data_type', sa.String(length=20), nullable=False),
|
||||
sa.Column('is_forecast', sa.Boolean(), nullable=True),
|
||||
sa.Column('data_quality_score', sa.Float(), nullable=True),
|
||||
sa.Column('raw_data', JSONB, nullable=True),
|
||||
sa.Column('processed_data', JSONB, nullable=True),
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_weather_location_date', 'weather_data', ['location_id', 'date'])
|
||||
op.create_index(op.f('ix_weather_data_date'), 'weather_data', ['date'])
|
||||
op.create_index(op.f('ix_weather_data_location_id'), 'weather_data', ['location_id'])
|
||||
op.create_index(op.f('ix_weather_data_tenant_id'), 'weather_data', ['tenant_id'])
|
||||
|
||||
op.create_table(
|
||||
'weather_forecasts',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('location_id', sa.String(length=100), nullable=False),
|
||||
sa.Column('forecast_date', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('generated_at', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('temperature', sa.Float(), nullable=True),
|
||||
sa.Column('precipitation', sa.Float(), nullable=True),
|
||||
sa.Column('humidity', sa.Float(), nullable=True),
|
||||
sa.Column('wind_speed', sa.Float(), nullable=True),
|
||||
sa.Column('description', sa.String(length=200), nullable=True),
|
||||
sa.Column('source', sa.String(length=50), nullable=False),
|
||||
sa.Column('raw_data', sa.Text(), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_forecast_location_date', 'weather_forecasts', ['location_id', 'forecast_date'])
|
||||
op.create_index(op.f('ix_weather_forecasts_location_id'), 'weather_forecasts', ['location_id'])
|
||||
|
||||
# ============================================================================
|
||||
# TRAFFIC DATA
|
||||
# ============================================================================
|
||||
op.create_table(
|
||||
'city_traffic_data',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('city_id', sa.String(length=50), nullable=False),
|
||||
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('traffic_volume', sa.Integer(), nullable=True),
|
||||
sa.Column('pedestrian_count', sa.Integer(), nullable=True),
|
||||
sa.Column('congestion_level', sa.String(length=20), nullable=True),
|
||||
sa.Column('average_speed', sa.Float(), nullable=True),
|
||||
sa.Column('source', sa.String(length=50), nullable=False),
|
||||
sa.Column('raw_data', JSONB, nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_city_traffic_lookup', 'city_traffic_data', ['city_id', 'date'])
|
||||
op.create_index(op.f('ix_city_traffic_data_city_id'), 'city_traffic_data', ['city_id'])
|
||||
op.create_index(op.f('ix_city_traffic_data_date'), 'city_traffic_data', ['date'])
|
||||
|
||||
op.create_table(
|
||||
'traffic_measurement_points',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('city', sa.String(length=50), nullable=False),
|
||||
sa.Column('measurement_point_id', sa.String(length=100), nullable=False),
|
||||
sa.Column('name', sa.String(length=500), nullable=True),
|
||||
sa.Column('description', sa.Text(), nullable=True),
|
||||
sa.Column('latitude', sa.Float(), nullable=False),
|
||||
sa.Column('longitude', sa.Float(), nullable=False),
|
||||
sa.Column('district', sa.String(length=100), nullable=True),
|
||||
sa.Column('zone', sa.String(length=100), nullable=True),
|
||||
sa.Column('road_type', sa.String(length=50), nullable=True),
|
||||
sa.Column('measurement_type', sa.String(length=50), nullable=True),
|
||||
sa.Column('point_category', sa.String(length=50), nullable=True),
|
||||
sa.Column('is_active', sa.Boolean(), nullable=True),
|
||||
sa.Column('installation_date', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('last_data_received', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('data_quality_rating', sa.Float(), nullable=True),
|
||||
sa.Column('city_specific_metadata', JSONB, nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_points_active', 'traffic_measurement_points', ['city', 'is_active', 'last_data_received'])
|
||||
op.create_index('idx_points_city_location', 'traffic_measurement_points', ['city', 'latitude', 'longitude'])
|
||||
op.create_index('idx_points_district', 'traffic_measurement_points', ['city', 'district'])
|
||||
op.create_index('idx_points_road_type', 'traffic_measurement_points', ['city', 'road_type'])
|
||||
op.create_index('idx_unique_city_point', 'traffic_measurement_points', ['city', 'measurement_point_id'], unique=True)
|
||||
op.create_index(op.f('ix_traffic_measurement_points_city'), 'traffic_measurement_points', ['city'])
|
||||
op.create_index(op.f('ix_traffic_measurement_points_measurement_point_id'), 'traffic_measurement_points', ['measurement_point_id'])
|
||||
|
||||
op.create_table(
|
||||
'traffic_data',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('location_id', sa.String(length=100), nullable=False),
|
||||
sa.Column('city', sa.String(length=50), nullable=False),
|
||||
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('traffic_volume', sa.Integer(), nullable=True),
|
||||
sa.Column('congestion_level', sa.String(length=20), nullable=True),
|
||||
sa.Column('average_speed', sa.Float(), nullable=True),
|
||||
sa.Column('occupation_percentage', sa.Float(), nullable=True),
|
||||
sa.Column('load_percentage', sa.Float(), nullable=True),
|
||||
sa.Column('pedestrian_count', sa.Integer(), nullable=True),
|
||||
sa.Column('measurement_point_id', sa.String(length=100), nullable=True),
|
||||
sa.Column('measurement_point_name', sa.String(length=500), nullable=True),
|
||||
sa.Column('measurement_point_type', sa.String(length=50), nullable=True),
|
||||
sa.Column('latitude', sa.Float(), nullable=True),
|
||||
sa.Column('longitude', sa.Float(), nullable=True),
|
||||
sa.Column('district', sa.String(length=100), nullable=True),
|
||||
sa.Column('zone', sa.String(length=100), nullable=True),
|
||||
sa.Column('source', sa.String(length=50), nullable=False),
|
||||
sa.Column('data_quality_score', sa.Float(), nullable=True),
|
||||
sa.Column('is_synthetic', sa.Boolean(), nullable=True),
|
||||
sa.Column('has_pedestrian_inference', sa.Boolean(), nullable=True),
|
||||
sa.Column('city_specific_data', JSONB, nullable=True),
|
||||
sa.Column('raw_data', sa.Text(), nullable=True),
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_traffic_city_date', 'traffic_data', ['city', 'date'])
|
||||
op.create_index('idx_traffic_city_location', 'traffic_data', ['city', 'location_id'])
|
||||
op.create_index('idx_traffic_district_date', 'traffic_data', ['city', 'district', 'date'])
|
||||
op.create_index('idx_traffic_location_date', 'traffic_data', ['location_id', 'date'])
|
||||
op.create_index('idx_traffic_measurement_point', 'traffic_data', ['city', 'measurement_point_id'])
|
||||
op.create_index('idx_traffic_quality', 'traffic_data', ['city', 'data_quality_score', 'date'])
|
||||
op.create_index('idx_traffic_tenant_date', 'traffic_data', ['tenant_id', 'date'])
|
||||
op.create_index('idx_traffic_training', 'traffic_data', ['tenant_id', 'city', 'date', 'is_synthetic'])
|
||||
op.create_index(op.f('ix_traffic_data_city'), 'traffic_data', ['city'])
|
||||
op.create_index(op.f('ix_traffic_data_date'), 'traffic_data', ['date'])
|
||||
op.create_index(op.f('ix_traffic_data_location_id'), 'traffic_data', ['location_id'])
|
||||
op.create_index(op.f('ix_traffic_data_measurement_point_id'), 'traffic_data', ['measurement_point_id'])
|
||||
op.create_index(op.f('ix_traffic_data_tenant_id'), 'traffic_data', ['tenant_id'])
|
||||
|
||||
op.create_table(
|
||||
'traffic_background_jobs',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('job_type', sa.String(length=50), nullable=False),
|
||||
sa.Column('city', sa.String(length=50), nullable=False),
|
||||
sa.Column('location_pattern', sa.String(length=200), nullable=True),
|
||||
sa.Column('scheduled_at', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('started_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('status', sa.String(length=20), nullable=False),
|
||||
sa.Column('progress_percentage', sa.Float(), nullable=True),
|
||||
sa.Column('records_processed', sa.Integer(), nullable=True),
|
||||
sa.Column('records_stored', sa.Integer(), nullable=True),
|
||||
sa.Column('data_start_date', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('data_end_date', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('success_count', sa.Integer(), nullable=True),
|
||||
sa.Column('error_count', sa.Integer(), nullable=True),
|
||||
sa.Column('error_message', sa.Text(), nullable=True),
|
||||
sa.Column('job_metadata', JSONB, nullable=True),
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_jobs_city_status', 'traffic_background_jobs', ['city', 'status', 'scheduled_at'])
|
||||
op.create_index('idx_jobs_completed', 'traffic_background_jobs', ['status', 'completed_at'])
|
||||
op.create_index('idx_jobs_tenant_status', 'traffic_background_jobs', ['tenant_id', 'status', 'scheduled_at'])
|
||||
op.create_index('idx_jobs_type_city', 'traffic_background_jobs', ['job_type', 'city', 'scheduled_at'])
|
||||
op.create_index(op.f('ix_traffic_background_jobs_city'), 'traffic_background_jobs', ['city'])
|
||||
op.create_index(op.f('ix_traffic_background_jobs_tenant_id'), 'traffic_background_jobs', ['tenant_id'])
|
||||
|
||||
# ============================================================================
|
||||
# SCHOOL CALENDARS & LOCATION CONTEXT
|
||||
# ============================================================================
|
||||
op.create_table(
|
||||
'school_calendars',
|
||||
sa.Column('id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('city_id', sa.String(length=50), nullable=False),
|
||||
sa.Column('calendar_name', sa.String(length=100), nullable=False),
|
||||
sa.Column('school_type', sa.String(length=20), nullable=False),
|
||||
sa.Column('academic_year', sa.String(length=10), nullable=False),
|
||||
sa.Column('holiday_periods', JSONB, nullable=False),
|
||||
sa.Column('school_hours', JSONB, nullable=False),
|
||||
sa.Column('source', sa.String(length=100), nullable=True),
|
||||
sa.Column('enabled', sa.Boolean(), nullable=False),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('id')
|
||||
)
|
||||
op.create_index('idx_school_calendar_city_year', 'school_calendars', ['city_id', 'academic_year'])
|
||||
op.create_index('idx_school_calendar_city_type', 'school_calendars', ['city_id', 'school_type'])
|
||||
op.create_index(op.f('ix_school_calendars_city_id'), 'school_calendars', ['city_id'])
|
||||
|
||||
op.create_table(
|
||||
'tenant_location_contexts',
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('city_id', sa.String(length=50), nullable=False),
|
||||
sa.Column('school_calendar_id', UUID(as_uuid=True), nullable=True),
|
||||
sa.Column('neighborhood', sa.String(length=100), nullable=True),
|
||||
sa.Column('local_events', JSONB, nullable=True),
|
||||
sa.Column('notes', sa.String(length=500), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.PrimaryKeyConstraint('tenant_id')
|
||||
)
|
||||
op.create_index('idx_tenant_location_calendar', 'tenant_location_contexts', ['school_calendar_id'])
|
||||
op.create_index(op.f('ix_tenant_location_contexts_city_id'), 'tenant_location_contexts', ['city_id'])
|
||||
|
||||
# ============================================================================
|
||||
# POI DETECTION SYSTEM
|
||||
# ============================================================================
|
||||
op.create_table(
|
||||
'tenant_poi_contexts',
|
||||
sa.Column('id', UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False, unique=True, index=True),
|
||||
sa.Column('latitude', sa.Float(), nullable=False),
|
||||
sa.Column('longitude', sa.Float(), nullable=False),
|
||||
sa.Column('poi_detection_results', JSONB, nullable=False, server_default='{}'),
|
||||
sa.Column('ml_features', JSONB, nullable=False, server_default='{}'),
|
||||
sa.Column('total_pois_detected', sa.Integer(), default=0),
|
||||
sa.Column('high_impact_categories', JSONB, server_default='[]'),
|
||||
sa.Column('relevant_categories', JSONB, server_default='[]'),
|
||||
sa.Column('detection_timestamp', sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column('detection_source', sa.String(50), default='overpass_api'),
|
||||
sa.Column('detection_status', sa.String(20), default='completed'),
|
||||
sa.Column('detection_error', sa.String(500), nullable=True),
|
||||
sa.Column('next_refresh_date', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('refresh_interval_days', sa.Integer(), default=180),
|
||||
sa.Column('last_refreshed_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now())
|
||||
)
|
||||
op.create_index('idx_tenant_poi_location', 'tenant_poi_contexts', ['latitude', 'longitude'])
|
||||
op.create_index('idx_tenant_poi_refresh', 'tenant_poi_contexts', ['next_refresh_date'])
|
||||
op.create_index('idx_tenant_poi_status', 'tenant_poi_contexts', ['detection_status'])
|
||||
|
||||
op.create_table(
|
||||
'poi_refresh_jobs',
|
||||
sa.Column('id', UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False, index=True),
|
||||
sa.Column('scheduled_at', sa.DateTime(timezone=True), nullable=False, index=True),
|
||||
sa.Column('started_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column('status', sa.String(50), nullable=False, default='pending', index=True),
|
||||
sa.Column('attempt_count', sa.Integer, nullable=False, default=0),
|
||||
sa.Column('max_attempts', sa.Integer, nullable=False, default=3),
|
||||
sa.Column('latitude', sa.Float, nullable=False),
|
||||
sa.Column('longitude', sa.Float, nullable=False),
|
||||
sa.Column('pois_detected', sa.Integer, nullable=True),
|
||||
sa.Column('changes_detected', sa.Boolean, default=False),
|
||||
sa.Column('change_summary', JSONB, nullable=True),
|
||||
sa.Column('error_message', sa.Text, nullable=True),
|
||||
sa.Column('error_details', JSONB, nullable=True),
|
||||
sa.Column('next_scheduled_at', sa.DateTime(timezone=True), nullable=True, index=True),
|
||||
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()),
|
||||
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now(), onupdate=sa.func.now())
|
||||
)
|
||||
op.create_index('idx_poi_refresh_jobs_tenant_status', 'poi_refresh_jobs', ['tenant_id', 'status'])
|
||||
op.create_index('idx_poi_refresh_jobs_status_scheduled', 'poi_refresh_jobs', ['status', 'scheduled_at'])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Drop all tables"""
|
||||
|
||||
# POI Detection System
|
||||
op.drop_index('idx_poi_refresh_jobs_status_scheduled', table_name='poi_refresh_jobs')
|
||||
op.drop_index('idx_poi_refresh_jobs_tenant_status', table_name='poi_refresh_jobs')
|
||||
op.drop_table('poi_refresh_jobs')
|
||||
|
||||
op.drop_index('idx_tenant_poi_status', table_name='tenant_poi_contexts')
|
||||
op.drop_index('idx_tenant_poi_refresh', table_name='tenant_poi_contexts')
|
||||
op.drop_index('idx_tenant_poi_location', table_name='tenant_poi_contexts')
|
||||
op.drop_table('tenant_poi_contexts')
|
||||
|
||||
# School Calendars & Location Context
|
||||
op.drop_index(op.f('ix_tenant_location_contexts_city_id'), table_name='tenant_location_contexts')
|
||||
op.drop_index('idx_tenant_location_calendar', table_name='tenant_location_contexts')
|
||||
op.drop_table('tenant_location_contexts')
|
||||
|
||||
op.drop_index(op.f('ix_school_calendars_city_id'), table_name='school_calendars')
|
||||
op.drop_index('idx_school_calendar_city_type', table_name='school_calendars')
|
||||
op.drop_index('idx_school_calendar_city_year', table_name='school_calendars')
|
||||
op.drop_table('school_calendars')
|
||||
|
||||
# Traffic Data
|
||||
op.drop_index(op.f('ix_traffic_background_jobs_tenant_id'), table_name='traffic_background_jobs')
|
||||
op.drop_index(op.f('ix_traffic_background_jobs_city'), table_name='traffic_background_jobs')
|
||||
op.drop_index('idx_jobs_type_city', table_name='traffic_background_jobs')
|
||||
op.drop_index('idx_jobs_tenant_status', table_name='traffic_background_jobs')
|
||||
op.drop_index('idx_jobs_completed', table_name='traffic_background_jobs')
|
||||
op.drop_index('idx_jobs_city_status', table_name='traffic_background_jobs')
|
||||
op.drop_table('traffic_background_jobs')
|
||||
|
||||
op.drop_index(op.f('ix_traffic_data_tenant_id'), table_name='traffic_data')
|
||||
op.drop_index(op.f('ix_traffic_data_measurement_point_id'), table_name='traffic_data')
|
||||
op.drop_index(op.f('ix_traffic_data_location_id'), table_name='traffic_data')
|
||||
op.drop_index(op.f('ix_traffic_data_date'), table_name='traffic_data')
|
||||
op.drop_index(op.f('ix_traffic_data_city'), table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_training', table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_tenant_date', table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_quality', table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_measurement_point', table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_location_date', table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_district_date', table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_city_location', table_name='traffic_data')
|
||||
op.drop_index('idx_traffic_city_date', table_name='traffic_data')
|
||||
op.drop_table('traffic_data')
|
||||
|
||||
op.drop_index(op.f('ix_traffic_measurement_points_measurement_point_id'), table_name='traffic_measurement_points')
|
||||
op.drop_index(op.f('ix_traffic_measurement_points_city'), table_name='traffic_measurement_points')
|
||||
op.drop_index('idx_unique_city_point', table_name='traffic_measurement_points')
|
||||
op.drop_index('idx_points_road_type', table_name='traffic_measurement_points')
|
||||
op.drop_index('idx_points_district', table_name='traffic_measurement_points')
|
||||
op.drop_index('idx_points_city_location', table_name='traffic_measurement_points')
|
||||
op.drop_index('idx_points_active', table_name='traffic_measurement_points')
|
||||
op.drop_table('traffic_measurement_points')
|
||||
|
||||
op.drop_index(op.f('ix_city_traffic_data_date'), table_name='city_traffic_data')
|
||||
op.drop_index(op.f('ix_city_traffic_data_city_id'), table_name='city_traffic_data')
|
||||
op.drop_index('idx_city_traffic_lookup', table_name='city_traffic_data')
|
||||
op.drop_table('city_traffic_data')
|
||||
|
||||
# Weather Data
|
||||
op.drop_index(op.f('ix_weather_forecasts_location_id'), table_name='weather_forecasts')
|
||||
op.drop_index('idx_forecast_location_date', table_name='weather_forecasts')
|
||||
op.drop_table('weather_forecasts')
|
||||
|
||||
op.drop_index(op.f('ix_weather_data_tenant_id'), table_name='weather_data')
|
||||
op.drop_index(op.f('ix_weather_data_location_id'), table_name='weather_data')
|
||||
op.drop_index(op.f('ix_weather_data_date'), table_name='weather_data')
|
||||
op.drop_index('idx_weather_location_date', table_name='weather_data')
|
||||
op.drop_table('weather_data')
|
||||
|
||||
op.drop_index(op.f('ix_city_weather_data_date'), table_name='city_weather_data')
|
||||
op.drop_index(op.f('ix_city_weather_data_city_id'), table_name='city_weather_data')
|
||||
op.drop_index('idx_city_weather_lookup', table_name='city_weather_data')
|
||||
op.drop_table('city_weather_data')
|
||||
|
||||
# Audit Logs
|
||||
op.drop_index(op.f('ix_audit_logs_user_id'), table_name='audit_logs')
|
||||
op.drop_index(op.f('ix_audit_logs_tenant_id'), table_name='audit_logs')
|
||||
op.drop_index(op.f('ix_audit_logs_severity'), table_name='audit_logs')
|
||||
op.drop_index(op.f('ix_audit_logs_service_name'), table_name='audit_logs')
|
||||
op.drop_index(op.f('ix_audit_logs_resource_type'), table_name='audit_logs')
|
||||
op.drop_index(op.f('ix_audit_logs_resource_id'), table_name='audit_logs')
|
||||
op.drop_index(op.f('ix_audit_logs_created_at'), table_name='audit_logs')
|
||||
op.drop_index(op.f('ix_audit_logs_action'), table_name='audit_logs')
|
||||
op.drop_index('idx_audit_user_created', table_name='audit_logs')
|
||||
op.drop_index('idx_audit_tenant_created', table_name='audit_logs')
|
||||
op.drop_index('idx_audit_severity_created', table_name='audit_logs')
|
||||
op.drop_index('idx_audit_service_created', table_name='audit_logs')
|
||||
op.drop_index('idx_audit_resource_type_action', table_name='audit_logs')
|
||||
op.drop_table('audit_logs')
|
||||
19
services/external/pytest.ini
vendored
Normal file
19
services/external/pytest.ini
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
[tool:pytest]
|
||||
testpaths = tests
|
||||
asyncio_mode = auto
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
addopts =
|
||||
-v
|
||||
--tb=short
|
||||
--strict-markers
|
||||
--disable-warnings
|
||||
--cov=app
|
||||
--cov-report=term-missing
|
||||
--cov-report=html:htmlcov
|
||||
markers =
|
||||
unit: Unit tests
|
||||
integration: Integration tests
|
||||
slow: Slow running tests
|
||||
external: Tests requiring external services
|
||||
59
services/external/requirements.txt
vendored
Normal file
59
services/external/requirements.txt
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
# services/external/requirements.txt
|
||||
# FastAPI and web framework
|
||||
fastapi==0.119.0
|
||||
uvicorn[standard]==0.32.1
|
||||
|
||||
# Database
|
||||
sqlalchemy==2.0.44
|
||||
psycopg2-binary==2.9.10
|
||||
asyncpg==0.30.0
|
||||
aiosqlite==0.20.0
|
||||
alembic==1.17.0
|
||||
|
||||
# HTTP clients for external APIs
|
||||
httpx==0.28.1
|
||||
aiofiles==24.1.0
|
||||
requests==2.32.3
|
||||
|
||||
# Data processing and time series
|
||||
pandas==2.2.3
|
||||
numpy==2.2.2
|
||||
|
||||
# Validation and serialization
|
||||
pydantic==2.12.3
|
||||
pydantic-settings==2.7.1
|
||||
email-validator==2.2.0
|
||||
|
||||
# Authentication and security
|
||||
python-jose[cryptography]==3.3.0
|
||||
cryptography==44.0.0
|
||||
|
||||
# Logging and monitoring
|
||||
structlog==25.4.0
|
||||
psutil==5.9.8
|
||||
|
||||
# Message queues
|
||||
aio-pika==9.4.3
|
||||
|
||||
# Background job processing
|
||||
redis==6.4.0
|
||||
|
||||
# Date and time handling
|
||||
pytz==2024.2
|
||||
python-dateutil==2.9.0.post0
|
||||
|
||||
# XML parsing (for some APIs)
|
||||
lxml==5.3.0
|
||||
|
||||
# Geospatial processing
|
||||
pyproj==3.7.1
|
||||
|
||||
# OpenStreetMap / POI detection
|
||||
overpy==0.7
|
||||
|
||||
# Development
|
||||
python-multipart==0.0.6
|
||||
|
||||
# External API specific
|
||||
beautifulsoup4==4.12.3
|
||||
xmltodict==0.14.2
|
||||
119
services/external/scripts/seed_school_calendars.py
vendored
Executable file
119
services/external/scripts/seed_school_calendars.py
vendored
Executable file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Seed School Calendars Script
|
||||
Loads school calendars from CalendarRegistry into the database
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add parent directory to path to allow imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from app.core.database import database_manager
|
||||
from app.repositories.calendar_repository import CalendarRepository
|
||||
from app.registry.calendar_registry import CalendarRegistry
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
async def seed_calendars():
|
||||
"""Seed school calendars from registry into database"""
|
||||
|
||||
logger.info("Starting school calendar seeding...")
|
||||
|
||||
# Get all calendars from registry
|
||||
calendars = CalendarRegistry.get_all_calendars()
|
||||
logger.info(f"Found {len(calendars)} calendars in registry")
|
||||
|
||||
# Initialize database
|
||||
await database_manager.initialize()
|
||||
|
||||
try:
|
||||
async with database_manager.get_session() as session:
|
||||
repo = CalendarRepository(session)
|
||||
|
||||
seeded_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for cal_def in calendars:
|
||||
logger.info(
|
||||
"Processing calendar",
|
||||
calendar_id=cal_def.calendar_id,
|
||||
city=cal_def.city_id,
|
||||
type=cal_def.school_type.value
|
||||
)
|
||||
|
||||
# Check if calendar already exists
|
||||
existing = await repo.get_calendar_by_city_type_year(
|
||||
city_id=cal_def.city_id,
|
||||
school_type=cal_def.school_type.value,
|
||||
academic_year=cal_def.academic_year
|
||||
)
|
||||
|
||||
if existing:
|
||||
logger.info(
|
||||
"Calendar already exists, skipping",
|
||||
calendar_id=cal_def.calendar_id
|
||||
)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Convert holiday periods to dict format
|
||||
holiday_periods = [
|
||||
{
|
||||
"name": hp.name,
|
||||
"start_date": hp.start_date,
|
||||
"end_date": hp.end_date,
|
||||
"description": hp.description
|
||||
}
|
||||
for hp in cal_def.holiday_periods
|
||||
]
|
||||
|
||||
# Convert school hours to dict format
|
||||
school_hours = {
|
||||
"morning_start": cal_def.school_hours.morning_start,
|
||||
"morning_end": cal_def.school_hours.morning_end,
|
||||
"has_afternoon_session": cal_def.school_hours.has_afternoon_session,
|
||||
"afternoon_start": cal_def.school_hours.afternoon_start,
|
||||
"afternoon_end": cal_def.school_hours.afternoon_end
|
||||
}
|
||||
|
||||
# Create calendar in database
|
||||
created_calendar = await repo.create_school_calendar(
|
||||
city_id=cal_def.city_id,
|
||||
calendar_name=cal_def.calendar_name,
|
||||
school_type=cal_def.school_type.value,
|
||||
academic_year=cal_def.academic_year,
|
||||
holiday_periods=holiday_periods,
|
||||
school_hours=school_hours,
|
||||
source=cal_def.source,
|
||||
enabled=cal_def.enabled
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Calendar seeded successfully",
|
||||
calendar_id=str(created_calendar.id),
|
||||
city=cal_def.city_id,
|
||||
type=cal_def.school_type.value
|
||||
)
|
||||
seeded_count += 1
|
||||
|
||||
logger.info(
|
||||
"Calendar seeding completed",
|
||||
seeded=seeded_count,
|
||||
skipped=skipped_count,
|
||||
total=len(calendars)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error seeding calendars", error=str(e))
|
||||
raise
|
||||
finally:
|
||||
await database_manager.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(seed_calendars())
|
||||
314
services/external/tests/conftest.py
vendored
Normal file
314
services/external/tests/conftest.py
vendored
Normal file
@@ -0,0 +1,314 @@
|
||||
# services/external/tests/conftest.py
|
||||
"""
|
||||
Pytest configuration and fixtures for External Service tests
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
from typing import AsyncGenerator
|
||||
from uuid import uuid4, UUID
|
||||
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.main import app
|
||||
from app.core.config import settings
|
||||
from app.core.database import Base, get_db
|
||||
from app.models.weather import WeatherData, WeatherStation
|
||||
from app.models.traffic import TrafficData, TrafficMeasurementPoint
|
||||
|
||||
|
||||
# Test database configuration
|
||||
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def event_loop():
|
||||
"""Create event loop for the test session"""
|
||||
loop = asyncio.new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def test_engine():
|
||||
"""Create test database engine"""
|
||||
engine = create_async_engine(
|
||||
TEST_DATABASE_URL,
|
||||
poolclass=StaticPool,
|
||||
connect_args={"check_same_thread": False}
|
||||
)
|
||||
|
||||
# Create tables
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
yield engine
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def test_db_session(test_engine) -> AsyncGenerator[AsyncSession, None]:
|
||||
"""Create test database session"""
|
||||
async_session = async_sessionmaker(
|
||||
test_engine, class_=AsyncSession, expire_on_commit=False
|
||||
)
|
||||
|
||||
async with async_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_client():
|
||||
"""Create test client"""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def override_get_db(test_db_session):
|
||||
"""Override get_db dependency for testing"""
|
||||
async def _override_get_db():
|
||||
yield test_db_session
|
||||
|
||||
app.dependency_overrides[get_db] = _override_get_db
|
||||
yield
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
# Test data fixtures
|
||||
@pytest.fixture
|
||||
def sample_tenant_id() -> UUID:
|
||||
"""Sample tenant ID for testing"""
|
||||
return uuid4()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_weather_data() -> dict:
|
||||
"""Sample weather data for testing"""
|
||||
return {
|
||||
"city": "madrid",
|
||||
"location_id": "40.4168,-3.7038",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"temperature": 18.5,
|
||||
"humidity": 65.0,
|
||||
"pressure": 1013.2,
|
||||
"wind_speed": 10.2,
|
||||
"condition": "partly_cloudy",
|
||||
"description": "Parcialmente nublado",
|
||||
"source": "aemet",
|
||||
"data_type": "current",
|
||||
"is_forecast": False,
|
||||
"data_quality_score": 95.0
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_traffic_data() -> dict:
|
||||
"""Sample traffic data for testing"""
|
||||
return {
|
||||
"city": "madrid",
|
||||
"location_id": "PM_M30_001",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"measurement_point_id": "PM_M30_001",
|
||||
"measurement_point_name": "M-30 Norte - Nudo Norte",
|
||||
"measurement_point_type": "M30",
|
||||
"traffic_volume": 850,
|
||||
"average_speed": 65.2,
|
||||
"congestion_level": "medium",
|
||||
"occupation_percentage": 45.8,
|
||||
"latitude": 40.4501,
|
||||
"longitude": -3.6919,
|
||||
"district": "Chamartín",
|
||||
"source": "madrid_opendata",
|
||||
"data_quality_score": 92.0,
|
||||
"is_synthetic": False
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_weather_forecast() -> list[dict]:
|
||||
"""Sample weather forecast data"""
|
||||
base_date = datetime.now(timezone.utc)
|
||||
return [
|
||||
{
|
||||
"city": "madrid",
|
||||
"location_id": "40.4168,-3.7038",
|
||||
"date": base_date,
|
||||
"forecast_date": base_date,
|
||||
"temperature": 20.0,
|
||||
"temperature_min": 15.0,
|
||||
"temperature_max": 25.0,
|
||||
"precipitation": 0.0,
|
||||
"humidity": 60.0,
|
||||
"wind_speed": 12.0,
|
||||
"condition": "sunny",
|
||||
"description": "Soleado",
|
||||
"source": "aemet",
|
||||
"data_type": "forecast",
|
||||
"is_forecast": True,
|
||||
"data_quality_score": 85.0
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def populated_weather_db(test_db_session: AsyncSession, sample_weather_data: dict):
|
||||
"""Database populated with weather test data"""
|
||||
weather_record = WeatherData(**sample_weather_data)
|
||||
test_db_session.add(weather_record)
|
||||
await test_db_session.commit()
|
||||
yield test_db_session
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def populated_traffic_db(test_db_session: AsyncSession, sample_traffic_data: dict):
|
||||
"""Database populated with traffic test data"""
|
||||
traffic_record = TrafficData(**sample_traffic_data)
|
||||
test_db_session.add(traffic_record)
|
||||
await test_db_session.commit()
|
||||
yield test_db_session
|
||||
|
||||
|
||||
# Mock external API fixtures
|
||||
@pytest.fixture
|
||||
def mock_aemet_response():
|
||||
"""Mock AEMET API response"""
|
||||
return {
|
||||
"date": datetime.now(timezone.utc),
|
||||
"temperature": 18.5,
|
||||
"humidity": 65.0,
|
||||
"pressure": 1013.2,
|
||||
"wind_speed": 10.2,
|
||||
"description": "Parcialmente nublado",
|
||||
"source": "aemet"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_madrid_traffic_xml():
|
||||
"""Mock Madrid Open Data traffic XML"""
|
||||
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pms>
|
||||
<pm codigo="PM_M30_001" nombre="M-30 Norte - Nudo Norte">
|
||||
<intensidad>850</intensidad>
|
||||
<ocupacion>45</ocupacion>
|
||||
<velocidad>65</velocidad>
|
||||
<fechahora>2024-01-15T10:30:00</fechahora>
|
||||
</pm>
|
||||
<pm codigo="PM_URB_002" nombre="Gran Vía - Plaza España">
|
||||
<intensidad>320</intensidad>
|
||||
<ocupacion>78</ocupacion>
|
||||
<velocidad>25</velocidad>
|
||||
<fechahora>2024-01-15T10:30:00</fechahora>
|
||||
</pm>
|
||||
</pms>"""
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_messaging():
|
||||
"""Mock messaging service"""
|
||||
class MockMessaging:
|
||||
def __init__(self):
|
||||
self.published_events = []
|
||||
|
||||
async def publish_weather_updated(self, data):
|
||||
self.published_events.append(("weather_updated", data))
|
||||
return True
|
||||
|
||||
async def publish_traffic_updated(self, data):
|
||||
self.published_events.append(("traffic_updated", data))
|
||||
return True
|
||||
|
||||
async def publish_collection_job_started(self, data):
|
||||
self.published_events.append(("job_started", data))
|
||||
return True
|
||||
|
||||
async def publish_collection_job_completed(self, data):
|
||||
self.published_events.append(("job_completed", data))
|
||||
return True
|
||||
|
||||
return MockMessaging()
|
||||
|
||||
|
||||
# Mock external clients
|
||||
@pytest.fixture
|
||||
def mock_aemet_client():
|
||||
"""Mock AEMET client"""
|
||||
class MockAEMETClient:
|
||||
async def get_current_weather(self, lat, lon):
|
||||
return {
|
||||
"date": datetime.now(timezone.utc),
|
||||
"temperature": 18.5,
|
||||
"humidity": 65.0,
|
||||
"pressure": 1013.2,
|
||||
"wind_speed": 10.2,
|
||||
"description": "Parcialmente nublado",
|
||||
"source": "aemet"
|
||||
}
|
||||
|
||||
async def get_forecast(self, lat, lon, days):
|
||||
return [
|
||||
{
|
||||
"forecast_date": datetime.now(timezone.utc),
|
||||
"temperature": 20.0,
|
||||
"temperature_min": 15.0,
|
||||
"temperature_max": 25.0,
|
||||
"precipitation": 0.0,
|
||||
"humidity": 60.0,
|
||||
"wind_speed": 12.0,
|
||||
"description": "Soleado",
|
||||
"source": "aemet"
|
||||
}
|
||||
]
|
||||
|
||||
return MockAEMETClient()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_madrid_client():
|
||||
"""Mock Madrid traffic client"""
|
||||
class MockMadridClient:
|
||||
async def fetch_current_traffic_xml(self):
|
||||
return """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<pms>
|
||||
<pm codigo="PM_TEST_001" nombre="Test Point">
|
||||
<intensidad>500</intensidad>
|
||||
<ocupacion>50</ocupacion>
|
||||
<velocidad>50</velocidad>
|
||||
<fechahora>2024-01-15T10:30:00</fechahora>
|
||||
</pm>
|
||||
</pms>"""
|
||||
|
||||
return MockMadridClient()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_madrid_processor():
|
||||
"""Mock Madrid traffic processor"""
|
||||
class MockMadridProcessor:
|
||||
async def process_current_traffic_xml(self, xml_content):
|
||||
return [
|
||||
{
|
||||
"city": "madrid",
|
||||
"location_id": "PM_TEST_001",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"measurement_point_id": "PM_TEST_001",
|
||||
"measurement_point_name": "Test Point",
|
||||
"measurement_point_type": "TEST",
|
||||
"traffic_volume": 500,
|
||||
"average_speed": 50.0,
|
||||
"congestion_level": "medium",
|
||||
"occupation_percentage": 50.0,
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"district": "Centro",
|
||||
"source": "madrid_opendata",
|
||||
"data_quality_score": 90.0,
|
||||
"is_synthetic": False
|
||||
}
|
||||
]
|
||||
|
||||
return MockMadridProcessor()
|
||||
9
services/external/tests/requirements.txt
vendored
Normal file
9
services/external/tests/requirements.txt
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
# Testing dependencies for External Service
|
||||
pytest==7.4.3
|
||||
pytest-asyncio==0.21.1
|
||||
pytest-mock==3.12.0
|
||||
httpx==0.25.2
|
||||
fastapi[all]==0.104.1
|
||||
sqlalchemy[asyncio]==2.0.23
|
||||
aiosqlite==0.19.0
|
||||
coverage==7.3.2
|
||||
393
services/external/tests/unit/test_repositories.py
vendored
Normal file
393
services/external/tests/unit/test_repositories.py
vendored
Normal file
@@ -0,0 +1,393 @@
|
||||
# services/external/tests/unit/test_repositories.py
|
||||
"""
|
||||
Unit tests for External Service Repositories
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from uuid import uuid4
|
||||
|
||||
from app.repositories.weather_repository import WeatherRepository
|
||||
from app.repositories.traffic_repository import TrafficRepository
|
||||
from app.models.weather import WeatherData, WeatherStation, WeatherDataJob
|
||||
from app.models.traffic import TrafficData, TrafficMeasurementPoint, TrafficDataJob
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestWeatherRepository:
|
||||
"""Test Weather Repository operations"""
|
||||
|
||||
async def test_create_weather_data(self, test_db_session, sample_weather_data):
|
||||
"""Test creating weather data"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
record = await repository.create_weather_data(sample_weather_data)
|
||||
|
||||
assert record is not None
|
||||
assert record.id is not None
|
||||
assert record.city == sample_weather_data["city"]
|
||||
assert record.temperature == sample_weather_data["temperature"]
|
||||
|
||||
async def test_get_current_weather(self, populated_weather_db, sample_weather_data):
|
||||
"""Test getting current weather data"""
|
||||
repository = WeatherRepository(populated_weather_db)
|
||||
|
||||
result = await repository.get_current_weather("madrid")
|
||||
|
||||
assert result is not None
|
||||
assert result.city == "madrid"
|
||||
assert result.temperature == sample_weather_data["temperature"]
|
||||
|
||||
async def test_get_weather_forecast(self, test_db_session, sample_weather_forecast):
|
||||
"""Test getting weather forecast"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create forecast data
|
||||
for forecast_item in sample_weather_forecast:
|
||||
await repository.create_weather_data(forecast_item)
|
||||
|
||||
result = await repository.get_weather_forecast("madrid", 7)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].is_forecast is True
|
||||
|
||||
async def test_get_historical_weather(self, test_db_session, sample_weather_data):
|
||||
"""Test getting historical weather data"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create historical data
|
||||
historical_data = sample_weather_data.copy()
|
||||
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
|
||||
await repository.create_weather_data(historical_data)
|
||||
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=2)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
result = await repository.get_historical_weather("madrid", start_date, end_date)
|
||||
|
||||
assert len(result) >= 1
|
||||
|
||||
async def test_create_weather_station(self, test_db_session):
|
||||
"""Test creating weather station"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
station_data = {
|
||||
"station_id": "TEST_001",
|
||||
"name": "Test Station",
|
||||
"city": "madrid",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"altitude": 650.0,
|
||||
"is_active": True
|
||||
}
|
||||
|
||||
station = await repository.create_weather_station(station_data)
|
||||
|
||||
assert station is not None
|
||||
assert station.station_id == "TEST_001"
|
||||
assert station.name == "Test Station"
|
||||
|
||||
async def test_get_weather_stations(self, test_db_session):
|
||||
"""Test getting weather stations"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create test station
|
||||
station_data = {
|
||||
"station_id": "TEST_001",
|
||||
"name": "Test Station",
|
||||
"city": "madrid",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"is_active": True
|
||||
}
|
||||
await repository.create_weather_station(station_data)
|
||||
|
||||
stations = await repository.get_weather_stations("madrid")
|
||||
|
||||
assert len(stations) == 1
|
||||
assert stations[0].station_id == "TEST_001"
|
||||
|
||||
async def test_create_weather_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test creating weather data collection job"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
|
||||
job = await repository.create_weather_job(job_data)
|
||||
|
||||
assert job is not None
|
||||
assert job.job_type == "current"
|
||||
assert job.status == "pending"
|
||||
|
||||
async def test_update_weather_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test updating weather job"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create job first
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
job = await repository.create_weather_job(job_data)
|
||||
|
||||
# Update job
|
||||
update_data = {
|
||||
"status": "completed",
|
||||
"completed_at": datetime.utcnow(),
|
||||
"success_count": 1
|
||||
}
|
||||
|
||||
success = await repository.update_weather_job(job.id, update_data)
|
||||
|
||||
assert success is True
|
||||
|
||||
async def test_get_weather_jobs(self, test_db_session, sample_tenant_id):
|
||||
"""Test getting weather jobs"""
|
||||
repository = WeatherRepository(test_db_session)
|
||||
|
||||
# Create test job
|
||||
job_data = {
|
||||
"job_type": "forecast",
|
||||
"city": "madrid",
|
||||
"status": "completed",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
await repository.create_weather_job(job_data)
|
||||
|
||||
jobs = await repository.get_weather_jobs()
|
||||
|
||||
assert len(jobs) >= 1
|
||||
assert any(job.job_type == "forecast" for job in jobs)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTrafficRepository:
|
||||
"""Test Traffic Repository operations"""
|
||||
|
||||
async def test_create_traffic_data(self, test_db_session, sample_traffic_data):
|
||||
"""Test creating traffic data"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Convert sample data to list for bulk create
|
||||
traffic_list = [sample_traffic_data]
|
||||
|
||||
count = await repository.bulk_create_traffic_data(traffic_list)
|
||||
|
||||
assert count == 1
|
||||
|
||||
async def test_get_current_traffic(self, populated_traffic_db, sample_traffic_data):
|
||||
"""Test getting current traffic data"""
|
||||
repository = TrafficRepository(populated_traffic_db)
|
||||
|
||||
result = await repository.get_current_traffic("madrid")
|
||||
|
||||
assert len(result) >= 1
|
||||
assert result[0].city == "madrid"
|
||||
|
||||
async def test_get_current_traffic_with_filters(self, populated_traffic_db):
|
||||
"""Test getting current traffic with filters"""
|
||||
repository = TrafficRepository(populated_traffic_db)
|
||||
|
||||
result = await repository.get_current_traffic("madrid", district="Chamartín")
|
||||
|
||||
# Should return results based on filter
|
||||
assert isinstance(result, list)
|
||||
|
||||
async def test_get_historical_traffic(self, test_db_session, sample_traffic_data):
|
||||
"""Test getting historical traffic data"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create historical data
|
||||
historical_data = sample_traffic_data.copy()
|
||||
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
|
||||
await repository.bulk_create_traffic_data([historical_data])
|
||||
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=2)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
result = await repository.get_historical_traffic("madrid", start_date, end_date)
|
||||
|
||||
assert len(result) >= 1
|
||||
|
||||
async def test_create_measurement_point(self, test_db_session):
|
||||
"""Test creating traffic measurement point"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
point_data = {
|
||||
"point_id": "TEST_POINT_001",
|
||||
"name": "Test Measurement Point",
|
||||
"city": "madrid",
|
||||
"point_type": "TEST",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"district": "Centro",
|
||||
"road_name": "Test Road",
|
||||
"is_active": True
|
||||
}
|
||||
|
||||
point = await repository.create_measurement_point(point_data)
|
||||
|
||||
assert point is not None
|
||||
assert point.point_id == "TEST_POINT_001"
|
||||
assert point.name == "Test Measurement Point"
|
||||
|
||||
async def test_get_measurement_points(self, test_db_session):
|
||||
"""Test getting measurement points"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create test point
|
||||
point_data = {
|
||||
"point_id": "TEST_POINT_001",
|
||||
"name": "Test Point",
|
||||
"city": "madrid",
|
||||
"point_type": "TEST",
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"is_active": True
|
||||
}
|
||||
await repository.create_measurement_point(point_data)
|
||||
|
||||
points = await repository.get_measurement_points("madrid")
|
||||
|
||||
assert len(points) == 1
|
||||
assert points[0].point_id == "TEST_POINT_001"
|
||||
|
||||
async def test_get_measurement_points_with_filters(self, test_db_session):
|
||||
"""Test getting measurement points with filters"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create test points with different types
|
||||
for i, point_type in enumerate(["M30", "URB", "TEST"]):
|
||||
point_data = {
|
||||
"point_id": f"TEST_POINT_{i:03d}",
|
||||
"name": f"Test Point {i}",
|
||||
"city": "madrid",
|
||||
"point_type": point_type,
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"is_active": True
|
||||
}
|
||||
await repository.create_measurement_point(point_data)
|
||||
|
||||
# Filter by type
|
||||
points = await repository.get_measurement_points("madrid", road_type="M30")
|
||||
|
||||
assert len(points) == 1
|
||||
assert points[0].point_type == "M30"
|
||||
|
||||
async def test_get_traffic_analytics(self, populated_traffic_db):
|
||||
"""Test getting traffic analytics"""
|
||||
repository = TrafficRepository(populated_traffic_db)
|
||||
|
||||
analytics = await repository.get_traffic_analytics("madrid")
|
||||
|
||||
assert isinstance(analytics, dict)
|
||||
assert "total_measurements" in analytics
|
||||
assert "average_volume" in analytics
|
||||
|
||||
async def test_create_traffic_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test creating traffic collection job"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
|
||||
job = await repository.create_traffic_job(job_data)
|
||||
|
||||
assert job is not None
|
||||
assert job.job_type == "current"
|
||||
assert job.status == "pending"
|
||||
|
||||
async def test_update_traffic_job(self, test_db_session, sample_tenant_id):
|
||||
"""Test updating traffic job"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create job first
|
||||
job_data = {
|
||||
"job_type": "current",
|
||||
"city": "madrid",
|
||||
"status": "pending",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
job = await repository.create_traffic_job(job_data)
|
||||
|
||||
# Update job
|
||||
update_data = {
|
||||
"status": "completed",
|
||||
"completed_at": datetime.utcnow(),
|
||||
"success_count": 10
|
||||
}
|
||||
|
||||
success = await repository.update_traffic_job(job.id, update_data)
|
||||
|
||||
assert success is True
|
||||
|
||||
async def test_get_traffic_jobs(self, test_db_session, sample_tenant_id):
|
||||
"""Test getting traffic jobs"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create test job
|
||||
job_data = {
|
||||
"job_type": "historical",
|
||||
"city": "madrid",
|
||||
"status": "completed",
|
||||
"scheduled_at": datetime.utcnow(),
|
||||
"tenant_id": sample_tenant_id
|
||||
}
|
||||
await repository.create_traffic_job(job_data)
|
||||
|
||||
jobs = await repository.get_traffic_jobs()
|
||||
|
||||
assert len(jobs) >= 1
|
||||
assert any(job.job_type == "historical" for job in jobs)
|
||||
|
||||
async def test_bulk_create_performance(self, test_db_session):
|
||||
"""Test bulk create performance"""
|
||||
repository = TrafficRepository(test_db_session)
|
||||
|
||||
# Create large dataset
|
||||
bulk_data = []
|
||||
for i in range(100):
|
||||
data = {
|
||||
"city": "madrid",
|
||||
"location_id": f"PM_TEST_{i:03d}",
|
||||
"date": datetime.now(timezone.utc),
|
||||
"measurement_point_id": f"PM_TEST_{i:03d}",
|
||||
"measurement_point_name": f"Test Point {i}",
|
||||
"measurement_point_type": "TEST",
|
||||
"traffic_volume": 100 + i,
|
||||
"average_speed": 50.0,
|
||||
"congestion_level": "medium",
|
||||
"occupation_percentage": 50.0,
|
||||
"latitude": 40.4168,
|
||||
"longitude": -3.7038,
|
||||
"source": "test"
|
||||
}
|
||||
bulk_data.append(data)
|
||||
|
||||
import time
|
||||
start_time = time.time()
|
||||
|
||||
count = await repository.bulk_create_traffic_data(bulk_data)
|
||||
|
||||
end_time = time.time()
|
||||
execution_time = end_time - start_time
|
||||
|
||||
assert count == 100
|
||||
assert execution_time < 3.0 # Should complete in under 3 seconds
|
||||
445
services/external/tests/unit/test_services.py
vendored
Normal file
445
services/external/tests/unit/test_services.py
vendored
Normal file
@@ -0,0 +1,445 @@
|
||||
# services/external/tests/unit/test_services.py
|
||||
"""
|
||||
Unit tests for External Service Services
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from uuid import uuid4
|
||||
|
||||
from app.services.weather_service import WeatherService
|
||||
from app.services.traffic_service import TrafficService
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestWeatherService:
|
||||
"""Test Weather Service business logic"""
|
||||
|
||||
@pytest.fixture
|
||||
def weather_service(self):
|
||||
"""Create weather service instance"""
|
||||
return WeatherService()
|
||||
|
||||
async def test_get_current_weather_from_cache(self, weather_service):
|
||||
"""Test getting current weather from cache"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_weather = AsyncMock()
|
||||
mock_weather.date = datetime.now(timezone.utc) - timedelta(minutes=30) # Fresh data
|
||||
mock_weather.to_dict.return_value = {"temperature": 18.5, "city": "madrid"}
|
||||
mock_repository.get_current_weather.return_value = mock_weather
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_current_weather("madrid")
|
||||
|
||||
assert result is not None
|
||||
assert result["temperature"] == 18.5
|
||||
assert result["city"] == "madrid"
|
||||
|
||||
async def test_get_current_weather_fetch_from_api(self, weather_service, mock_aemet_response):
|
||||
"""Test getting current weather from API when cache is stale"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
# No cached data or stale data
|
||||
mock_repository.get_current_weather.return_value = None
|
||||
mock_stored = AsyncMock()
|
||||
mock_stored.to_dict.return_value = {"temperature": 20.0}
|
||||
mock_repository.create_weather_data.return_value = mock_stored
|
||||
|
||||
# Mock AEMET client
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get_current_weather.return_value = mock_aemet_response
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
weather_service.aemet_client = mock_client
|
||||
|
||||
result = await weather_service.get_current_weather("madrid")
|
||||
|
||||
assert result is not None
|
||||
assert result["temperature"] == 20.0
|
||||
mock_client.get_current_weather.assert_called_once()
|
||||
|
||||
async def test_get_weather_forecast_from_cache(self, weather_service):
|
||||
"""Test getting weather forecast from cache"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_forecast = [AsyncMock(), AsyncMock()]
|
||||
for item in mock_forecast:
|
||||
item.created_at = datetime.now(timezone.utc) - timedelta(hours=1) # Fresh
|
||||
item.to_dict.return_value = {"temperature": 22.0}
|
||||
mock_repository.get_weather_forecast.return_value = mock_forecast
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_weather_forecast("madrid", 7)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(item["temperature"] == 22.0 for item in result)
|
||||
|
||||
async def test_get_weather_forecast_fetch_from_api(self, weather_service):
|
||||
"""Test getting weather forecast from API when cache is stale"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
# No cached data
|
||||
mock_repository.get_weather_forecast.return_value = []
|
||||
mock_stored = AsyncMock()
|
||||
mock_stored.to_dict.return_value = {"temperature": 25.0}
|
||||
mock_repository.create_weather_data.return_value = mock_stored
|
||||
|
||||
# Mock AEMET client
|
||||
mock_client = AsyncMock()
|
||||
mock_client.get_forecast.return_value = [
|
||||
{"forecast_date": datetime.now(), "temperature": 25.0}
|
||||
]
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
weather_service.aemet_client = mock_client
|
||||
|
||||
result = await weather_service.get_weather_forecast("madrid", 7)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["temperature"] == 25.0
|
||||
mock_client.get_forecast.assert_called_once()
|
||||
|
||||
async def test_get_historical_weather(self, weather_service, sample_tenant_id):
|
||||
"""Test getting historical weather data"""
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_historical = [AsyncMock(), AsyncMock()]
|
||||
for item in mock_historical:
|
||||
item.to_dict.return_value = {"temperature": 18.0}
|
||||
mock_repository.get_historical_weather.return_value = mock_historical
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_historical_weather(
|
||||
"madrid", start_date, end_date, sample_tenant_id
|
||||
)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(item["temperature"] == 18.0 for item in result)
|
||||
|
||||
async def test_get_weather_stations(self, weather_service):
|
||||
"""Test getting weather stations"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_stations = [AsyncMock()]
|
||||
mock_stations[0].to_dict.return_value = {"station_id": "TEST_001"}
|
||||
mock_repository.get_weather_stations.return_value = mock_stations
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.get_weather_stations("madrid")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["station_id"] == "TEST_001"
|
||||
|
||||
async def test_trigger_weather_collection(self, weather_service, sample_tenant_id):
|
||||
"""Test triggering weather data collection"""
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = uuid4()
|
||||
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
|
||||
mock_repository.create_weather_job.return_value = mock_job
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
result = await weather_service.trigger_weather_collection(
|
||||
"madrid", "current", sample_tenant_id
|
||||
)
|
||||
|
||||
assert result["status"] == "pending"
|
||||
mock_repository.create_weather_job.assert_called_once()
|
||||
|
||||
async def test_process_weather_collection_job(self, weather_service):
|
||||
"""Test processing weather collection job"""
|
||||
job_id = uuid4()
|
||||
|
||||
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
|
||||
# Mock job
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = job_id
|
||||
mock_job.job_type = "current"
|
||||
mock_job.city = "madrid"
|
||||
|
||||
mock_repository.get_weather_jobs.return_value = [mock_job]
|
||||
mock_repository.update_weather_job.return_value = True
|
||||
|
||||
# Mock updated job after completion
|
||||
mock_updated_job = AsyncMock()
|
||||
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
|
||||
|
||||
# Mock methods for different calls
|
||||
def mock_get_jobs_side_effect():
|
||||
return [mock_updated_job] # Return completed job
|
||||
|
||||
mock_repository.get_weather_jobs.side_effect = [
|
||||
[mock_job], # First call returns pending job
|
||||
[mock_updated_job] # Second call returns completed job
|
||||
]
|
||||
|
||||
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
|
||||
with patch.object(weather_service, '_collect_current_weather', return_value=1):
|
||||
result = await weather_service.process_weather_collection_job(job_id)
|
||||
|
||||
assert result["status"] == "completed"
|
||||
|
||||
async def test_map_weather_condition(self, weather_service):
|
||||
"""Test weather condition mapping"""
|
||||
test_cases = [
|
||||
("Soleado", "clear"),
|
||||
("Nublado", "cloudy"),
|
||||
("Parcialmente nublado", "partly_cloudy"),
|
||||
("Lluvioso", "rainy"),
|
||||
("Nevando", "snowy"),
|
||||
("Tormenta", "stormy"),
|
||||
("Desconocido", "unknown")
|
||||
]
|
||||
|
||||
for description, expected in test_cases:
|
||||
result = weather_service._map_weather_condition(description)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTrafficService:
|
||||
"""Test Traffic Service business logic"""
|
||||
|
||||
@pytest.fixture
|
||||
def traffic_service(self):
|
||||
"""Create traffic service instance"""
|
||||
return TrafficService()
|
||||
|
||||
async def test_get_current_traffic_from_cache(self, traffic_service):
|
||||
"""Test getting current traffic from cache"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_traffic = [AsyncMock()]
|
||||
mock_traffic[0].date = datetime.now(timezone.utc) - timedelta(minutes=5) # Fresh
|
||||
mock_traffic[0].to_dict.return_value = {"traffic_volume": 850}
|
||||
mock_repository.get_current_traffic.return_value = mock_traffic
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_current_traffic("madrid")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["traffic_volume"] == 850
|
||||
|
||||
async def test_get_current_traffic_fetch_from_api(self, traffic_service, mock_madrid_traffic_xml):
|
||||
"""Test getting current traffic from API when cache is stale"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
# No cached data
|
||||
mock_repository.get_current_traffic.return_value = []
|
||||
mock_repository.bulk_create_traffic_data.return_value = 2
|
||||
|
||||
# Mock clients
|
||||
mock_client = AsyncMock()
|
||||
mock_client.fetch_current_traffic_xml.return_value = mock_madrid_traffic_xml
|
||||
|
||||
mock_processor = AsyncMock()
|
||||
mock_processor.process_current_traffic_xml.return_value = [
|
||||
{"traffic_volume": 850, "measurement_point_id": "PM_M30_001"},
|
||||
{"traffic_volume": 320, "measurement_point_id": "PM_URB_002"}
|
||||
]
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
traffic_service.madrid_client = mock_client
|
||||
traffic_service.madrid_processor = mock_processor
|
||||
|
||||
result = await traffic_service.get_current_traffic("madrid")
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0]["traffic_volume"] == 850
|
||||
mock_client.fetch_current_traffic_xml.assert_called_once()
|
||||
|
||||
async def test_get_historical_traffic(self, traffic_service, sample_tenant_id):
|
||||
"""Test getting historical traffic data"""
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=7)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_historical = [AsyncMock(), AsyncMock()]
|
||||
for item in mock_historical:
|
||||
item.to_dict.return_value = {"traffic_volume": 500}
|
||||
mock_repository.get_historical_traffic.return_value = mock_historical
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_historical_traffic(
|
||||
"madrid", start_date, end_date, tenant_id=sample_tenant_id
|
||||
)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(item["traffic_volume"] == 500 for item in result)
|
||||
|
||||
async def test_get_measurement_points(self, traffic_service):
|
||||
"""Test getting measurement points"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_points = [AsyncMock()]
|
||||
mock_points[0].to_dict.return_value = {"point_id": "PM_TEST_001"}
|
||||
mock_repository.get_measurement_points.return_value = mock_points
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_measurement_points("madrid")
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["point_id"] == "PM_TEST_001"
|
||||
|
||||
async def test_get_traffic_analytics(self, traffic_service):
|
||||
"""Test getting traffic analytics"""
|
||||
start_date = datetime.now(timezone.utc) - timedelta(days=30)
|
||||
end_date = datetime.now(timezone.utc)
|
||||
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_analytics = {
|
||||
"total_measurements": 1000,
|
||||
"average_volume": 650.5,
|
||||
"peak_hour": "08:00"
|
||||
}
|
||||
mock_repository.get_traffic_analytics.return_value = mock_analytics
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.get_traffic_analytics(
|
||||
"madrid", start_date, end_date
|
||||
)
|
||||
|
||||
assert result["total_measurements"] == 1000
|
||||
assert result["average_volume"] == 650.5
|
||||
assert "generated_at" in result
|
||||
|
||||
async def test_trigger_traffic_collection(self, traffic_service, sample_tenant_id):
|
||||
"""Test triggering traffic data collection"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = uuid4()
|
||||
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
|
||||
mock_repository.create_traffic_job.return_value = mock_job
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
result = await traffic_service.trigger_traffic_collection(
|
||||
"madrid", "current", user_id=sample_tenant_id
|
||||
)
|
||||
|
||||
assert result["status"] == "pending"
|
||||
mock_repository.create_traffic_job.assert_called_once()
|
||||
|
||||
async def test_process_traffic_collection_job(self, traffic_service):
|
||||
"""Test processing traffic collection job"""
|
||||
job_id = uuid4()
|
||||
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
|
||||
# Mock job
|
||||
mock_job = AsyncMock()
|
||||
mock_job.id = job_id
|
||||
mock_job.job_type = "current"
|
||||
mock_job.city = "madrid"
|
||||
mock_job.location_pattern = None
|
||||
|
||||
mock_repository.get_traffic_jobs.return_value = [mock_job]
|
||||
mock_repository.update_traffic_job.return_value = True
|
||||
|
||||
# Mock updated job after completion
|
||||
mock_updated_job = AsyncMock()
|
||||
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
|
||||
|
||||
mock_repository.get_traffic_jobs.side_effect = [
|
||||
[mock_job], # First call returns pending job
|
||||
[mock_updated_job] # Second call returns completed job
|
||||
]
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
with patch.object(traffic_service, '_collect_current_traffic', return_value=125):
|
||||
result = await traffic_service.process_traffic_collection_job(job_id)
|
||||
|
||||
assert result["status"] == "completed"
|
||||
|
||||
async def test_is_traffic_data_fresh(self, traffic_service):
|
||||
"""Test traffic data freshness check"""
|
||||
from app.models.traffic import TrafficData
|
||||
|
||||
# Fresh data (5 minutes old)
|
||||
fresh_data = [AsyncMock()]
|
||||
fresh_data[0].date = datetime.utcnow() - timedelta(minutes=5)
|
||||
|
||||
result = traffic_service._is_traffic_data_fresh(fresh_data)
|
||||
assert result is True
|
||||
|
||||
# Stale data (15 minutes old)
|
||||
stale_data = [AsyncMock()]
|
||||
stale_data[0].date = datetime.utcnow() - timedelta(minutes=15)
|
||||
|
||||
result = traffic_service._is_traffic_data_fresh(stale_data)
|
||||
assert result is False
|
||||
|
||||
# Empty data
|
||||
result = traffic_service._is_traffic_data_fresh([])
|
||||
assert result is False
|
||||
|
||||
async def test_collect_current_traffic(self, traffic_service):
|
||||
"""Test current traffic collection"""
|
||||
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
|
||||
mock_db = AsyncMock()
|
||||
mock_get_db.return_value.__aenter__.return_value = mock_db
|
||||
|
||||
mock_repository = AsyncMock()
|
||||
mock_repository.bulk_create_traffic_data.return_value = 10
|
||||
|
||||
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
|
||||
with patch.object(traffic_service, '_fetch_current_traffic_from_api', return_value=[{} for _ in range(10)]):
|
||||
result = await traffic_service._collect_current_traffic("madrid", None)
|
||||
|
||||
assert result == 10
|
||||
Reference in New Issue
Block a user