Initial commit - production deployment

This commit is contained in:
2026-01-21 17:17:16 +01:00
commit c23d00dd92
2289 changed files with 638440 additions and 0 deletions

59
services/external/Dockerfile vendored Normal file
View File

@@ -0,0 +1,59 @@
# =============================================================================
# External Service Dockerfile - Environment-Configurable Base Images
# =============================================================================
# Build arguments for registry configuration:
# - BASE_REGISTRY: Registry URL (default: docker.io for Docker Hub)
# - PYTHON_IMAGE: Python image name and tag (default: python:3.11-slim)
# =============================================================================
ARG BASE_REGISTRY=docker.io
ARG PYTHON_IMAGE=python:3.11-slim
FROM ${BASE_REGISTRY}/${PYTHON_IMAGE} AS shared
WORKDIR /shared
COPY shared/ /shared/
ARG BASE_REGISTRY=docker.io
ARG PYTHON_IMAGE=python:3.11-slim
FROM ${BASE_REGISTRY}/${PYTHON_IMAGE}
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
g++ \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements
COPY shared/requirements-tracing.txt /tmp/
COPY services/external/requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r /tmp/requirements-tracing.txt
RUN pip install --no-cache-dir -r requirements.txt
# Copy shared libraries from the shared stage
COPY --from=shared /shared /app/shared
# Copy application code
COPY services/external/ .
# Add shared libraries to Python path
ENV PYTHONPATH="/app:/app/shared:${PYTHONPATH:-}"
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Run application
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

1049
services/external/README.md vendored Normal file

File diff suppressed because it is too large Load Diff

84
services/external/alembic.ini vendored Normal file
View File

@@ -0,0 +1,84 @@
# ================================================================
# services/external/alembic.ini - Alembic Configuration
# ================================================================
[alembic]
# path to migration scripts
script_location = migrations
# template used to generate migration file names
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
timezone = Europe/Madrid
# max length of characters to apply to the
# "slug" field
truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
sourceless = false
# version of a migration file's filename format
version_num_format = %%s
# version path separator
version_path_separator = os
# set to 'true' to search source files recursively
# in each "version_locations" directory
recursive_version_locations = false
# the output encoding used when revision files
# are written from script.py.mako
output_encoding = utf-8
# Database URL - will be overridden by environment variable or settings
sqlalchemy.url = postgresql+asyncpg://external_user:password@external-db-service:5432/external_db
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts.
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

1
services/external/app/__init__.py vendored Normal file
View File

@@ -0,0 +1 @@
# services/external/app/__init__.py

1
services/external/app/api/__init__.py vendored Normal file
View File

@@ -0,0 +1 @@
# services/external/app/api/__init__.py

237
services/external/app/api/audit.py vendored Normal file
View File

@@ -0,0 +1,237 @@
# services/external/app/api/audit.py
"""
Audit Logs API - Retrieve audit trail for external service
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path, status
from typing import Optional, Dict, Any
from uuid import UUID
from datetime import datetime
import structlog
from sqlalchemy import select, func, and_
from sqlalchemy.ext.asyncio import AsyncSession
from app.models import AuditLog
from shared.auth.decorators import get_current_user_dep
from shared.auth.access_control import require_user_role
from shared.routing import RouteBuilder
from shared.models.audit_log_schemas import (
AuditLogResponse,
AuditLogListResponse,
AuditLogStatsResponse
)
from app.core.database import database_manager
route_builder = RouteBuilder('external')
router = APIRouter(tags=["audit-logs"])
logger = structlog.get_logger()
async def get_db():
"""Database session dependency"""
async with database_manager.get_session() as session:
yield session
@router.get(
route_builder.build_base_route("audit-logs"),
response_model=AuditLogListResponse
)
@require_user_role(['admin', 'owner'])
async def get_audit_logs(
tenant_id: UUID = Path(..., description="Tenant ID"),
start_date: Optional[datetime] = Query(None, description="Filter logs from this date"),
end_date: Optional[datetime] = Query(None, description="Filter logs until this date"),
user_id: Optional[UUID] = Query(None, description="Filter by user ID"),
action: Optional[str] = Query(None, description="Filter by action type"),
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
severity: Optional[str] = Query(None, description="Filter by severity level"),
search: Optional[str] = Query(None, description="Search in description field"),
limit: int = Query(100, ge=1, le=1000, description="Number of records to return"),
offset: int = Query(0, ge=0, description="Number of records to skip"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""
Get audit logs for external service.
Requires admin or owner role.
"""
try:
logger.info(
"Retrieving audit logs",
tenant_id=tenant_id,
user_id=current_user.get("user_id"),
filters={
"start_date": start_date,
"end_date": end_date,
"action": action,
"resource_type": resource_type,
"severity": severity
}
)
# Build query filters
filters = [AuditLog.tenant_id == tenant_id]
if start_date:
filters.append(AuditLog.created_at >= start_date)
if end_date:
filters.append(AuditLog.created_at <= end_date)
if user_id:
filters.append(AuditLog.user_id == user_id)
if action:
filters.append(AuditLog.action == action)
if resource_type:
filters.append(AuditLog.resource_type == resource_type)
if severity:
filters.append(AuditLog.severity == severity)
if search:
filters.append(AuditLog.description.ilike(f"%{search}%"))
# Count total matching records
count_query = select(func.count()).select_from(AuditLog).where(and_(*filters))
total_result = await db.execute(count_query)
total = total_result.scalar() or 0
# Fetch paginated results
query = (
select(AuditLog)
.where(and_(*filters))
.order_by(AuditLog.created_at.desc())
.limit(limit)
.offset(offset)
)
result = await db.execute(query)
audit_logs = result.scalars().all()
# Convert to response models
items = [AuditLogResponse.from_orm(log) for log in audit_logs]
logger.info(
"Successfully retrieved audit logs",
tenant_id=tenant_id,
total=total,
returned=len(items)
)
return AuditLogListResponse(
items=items,
total=total,
limit=limit,
offset=offset,
has_more=(offset + len(items)) < total
)
except Exception as e:
logger.error(
"Failed to retrieve audit logs",
error=str(e),
tenant_id=tenant_id
)
raise HTTPException(
status_code=500,
detail=f"Failed to retrieve audit logs: {str(e)}"
)
@router.get(
route_builder.build_base_route("audit-logs/stats"),
response_model=AuditLogStatsResponse
)
@require_user_role(['admin', 'owner'])
async def get_audit_log_stats(
tenant_id: UUID = Path(..., description="Tenant ID"),
start_date: Optional[datetime] = Query(None, description="Filter logs from this date"),
end_date: Optional[datetime] = Query(None, description="Filter logs until this date"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""
Get audit log statistics for external service.
Requires admin or owner role.
"""
try:
logger.info(
"Retrieving audit log statistics",
tenant_id=tenant_id,
user_id=current_user.get("user_id")
)
# Build base filters
filters = [AuditLog.tenant_id == tenant_id]
if start_date:
filters.append(AuditLog.created_at >= start_date)
if end_date:
filters.append(AuditLog.created_at <= end_date)
# Total events
count_query = select(func.count()).select_from(AuditLog).where(and_(*filters))
total_result = await db.execute(count_query)
total_events = total_result.scalar() or 0
# Events by action
action_query = (
select(AuditLog.action, func.count().label('count'))
.where(and_(*filters))
.group_by(AuditLog.action)
)
action_result = await db.execute(action_query)
events_by_action = {row.action: row.count for row in action_result}
# Events by severity
severity_query = (
select(AuditLog.severity, func.count().label('count'))
.where(and_(*filters))
.group_by(AuditLog.severity)
)
severity_result = await db.execute(severity_query)
events_by_severity = {row.severity: row.count for row in severity_result}
# Events by resource type
resource_query = (
select(AuditLog.resource_type, func.count().label('count'))
.where(and_(*filters))
.group_by(AuditLog.resource_type)
)
resource_result = await db.execute(resource_query)
events_by_resource_type = {row.resource_type: row.count for row in resource_result}
# Date range
date_range_query = (
select(
func.min(AuditLog.created_at).label('min_date'),
func.max(AuditLog.created_at).label('max_date')
)
.where(and_(*filters))
)
date_result = await db.execute(date_range_query)
date_row = date_result.one()
logger.info(
"Successfully retrieved audit log statistics",
tenant_id=tenant_id,
total_events=total_events
)
return AuditLogStatsResponse(
total_events=total_events,
events_by_action=events_by_action,
events_by_severity=events_by_severity,
events_by_resource_type=events_by_resource_type,
date_range={
"min": date_row.min_date,
"max": date_row.max_date
}
)
except Exception as e:
logger.error(
"Failed to retrieve audit log statistics",
error=str(e),
tenant_id=tenant_id
)
raise HTTPException(
status_code=500,
detail=f"Failed to retrieve audit log statistics: {str(e)}"
)

View File

@@ -0,0 +1,488 @@
# services/external/app/api/calendar_operations.py
"""
Calendar Operations API - School calendars and tenant location context endpoints
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path, Body
from typing import List, Optional
from uuid import UUID
import structlog
from app.schemas.calendar import (
SchoolCalendarResponse,
SchoolCalendarListResponse,
TenantLocationContextResponse,
TenantLocationContextCreateRequest,
CalendarCheckResponse
)
from app.registry.calendar_registry import CalendarRegistry, SchoolType
from app.repositories.calendar_repository import CalendarRepository
from app.cache.redis_wrapper import ExternalDataCache
from shared.routing.route_builder import RouteBuilder
from shared.auth.decorators import get_current_user_dep
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from datetime import datetime, date
route_builder = RouteBuilder('external')
router = APIRouter(tags=["calendar-operations"])
logger = structlog.get_logger()
# Initialize cache
cache = ExternalDataCache()
# ===== School Calendar Endpoints =====
@router.get(
route_builder.build_operations_route("cities/{city_id}/school-calendars"),
response_model=SchoolCalendarListResponse
)
async def list_school_calendars_for_city(
city_id: str = Path(..., description="City ID (e.g., 'madrid')"),
school_type: Optional[str] = Query(None, description="Filter by school type"),
academic_year: Optional[str] = Query(None, description="Filter by academic year"),
db: AsyncSession = Depends(get_db)
):
"""List all available school calendars for a city"""
try:
repo = CalendarRepository(db)
calendars = await repo.get_calendars_by_city(city_id, enabled_only=True)
# Apply filters if provided
if school_type:
calendars = [c for c in calendars if c.school_type == school_type]
if academic_year:
calendars = [c for c in calendars if c.academic_year == academic_year]
calendar_responses = [
SchoolCalendarResponse(
calendar_id=str(c.id),
calendar_name=c.calendar_name,
city_id=c.city_id,
school_type=c.school_type,
academic_year=c.academic_year,
holiday_periods=c.holiday_periods,
school_hours=c.school_hours,
source=c.source,
enabled=c.enabled
)
for c in calendars
]
return SchoolCalendarListResponse(
city_id=city_id,
calendars=calendar_responses,
total=len(calendar_responses)
)
except Exception as e:
logger.error(
"Error listing school calendars",
city_id=city_id,
error=str(e)
)
raise HTTPException(
status_code=500,
detail=f"Error retrieving school calendars: {str(e)}"
)
@router.get(
route_builder.build_operations_route("school-calendars/{calendar_id}"),
response_model=SchoolCalendarResponse
)
async def get_school_calendar(
calendar_id: UUID = Path(..., description="School calendar ID"),
db: AsyncSession = Depends(get_db)
):
"""Get detailed information about a specific school calendar (cached)"""
try:
calendar_id_str = str(calendar_id)
# Check cache first
cached = await cache.get_cached_calendar(calendar_id_str)
if cached:
logger.debug("Returning cached calendar", calendar_id=calendar_id_str)
return SchoolCalendarResponse(**cached)
# Cache miss - fetch from database
repo = CalendarRepository(db)
calendar = await repo.get_calendar_by_id(calendar_id)
if not calendar:
raise HTTPException(status_code=404, detail="School calendar not found")
response_data = {
"calendar_id": str(calendar.id),
"calendar_name": calendar.calendar_name,
"city_id": calendar.city_id,
"school_type": calendar.school_type,
"academic_year": calendar.academic_year,
"holiday_periods": calendar.holiday_periods,
"school_hours": calendar.school_hours,
"source": calendar.source,
"enabled": calendar.enabled
}
# Cache the result
await cache.set_cached_calendar(calendar_id_str, response_data)
return SchoolCalendarResponse(**response_data)
except HTTPException:
raise
except Exception as e:
logger.error(
"Error retrieving school calendar",
calendar_id=str(calendar_id),
error=str(e)
)
raise HTTPException(
status_code=500,
detail=f"Error retrieving school calendar: {str(e)}"
)
@router.get(
route_builder.build_operations_route("school-calendars/{calendar_id}/is-holiday"),
response_model=CalendarCheckResponse
)
async def check_is_school_holiday(
calendar_id: UUID = Path(..., description="School calendar ID"),
check_date: str = Query(..., description="Date to check (ISO format: YYYY-MM-DD)"),
db: AsyncSession = Depends(get_db)
):
"""Check if a specific date is a school holiday"""
try:
repo = CalendarRepository(db)
calendar = await repo.get_calendar_by_id(calendar_id)
if not calendar:
raise HTTPException(status_code=404, detail="School calendar not found")
# Parse the date
try:
date_obj = datetime.strptime(check_date, "%Y-%m-%d").date()
except ValueError:
raise HTTPException(
status_code=400,
detail="Invalid date format. Use YYYY-MM-DD"
)
# Check if date falls within any holiday period
is_holiday = False
holiday_name = None
for period in calendar.holiday_periods:
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
if start <= date_obj <= end:
is_holiday = True
holiday_name = period["name"]
break
return CalendarCheckResponse(
date=check_date,
is_holiday=is_holiday,
holiday_name=holiday_name,
calendar_id=str(calendar_id),
calendar_name=calendar.calendar_name
)
except HTTPException:
raise
except Exception as e:
logger.error(
"Error checking holiday status",
calendar_id=str(calendar_id),
date=check_date,
error=str(e)
)
raise HTTPException(
status_code=500,
detail=f"Error checking holiday status: {str(e)}"
)
# ===== Tenant Location Context Endpoints =====
@router.get(
route_builder.build_base_route("location-context"),
response_model=TenantLocationContextResponse
)
async def get_tenant_location_context(
tenant_id: str = Path(..., description="Tenant ID"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""Get location context for a tenant including school calendar assignment (cached)"""
try:
# Check cache first
cached = await cache.get_cached_tenant_context(tenant_id)
if cached:
logger.debug("Returning cached tenant context", tenant_id=tenant_id)
return TenantLocationContextResponse(**cached)
# Cache miss - fetch from database
repo = CalendarRepository(db)
context = await repo.get_tenant_with_calendar(tenant_id)
if not context:
raise HTTPException(
status_code=404,
detail="Location context not found for this tenant"
)
# Cache the result
await cache.set_cached_tenant_context(tenant_id_str, context)
return TenantLocationContextResponse(**context)
except HTTPException:
raise
except Exception as e:
logger.error(
"Error retrieving tenant location context",
tenant_id=str(tenant_id),
error=str(e)
)
raise HTTPException(
status_code=500,
detail=f"Error retrieving location context: {str(e)}"
)
@router.post(
route_builder.build_base_route("location-context"),
response_model=TenantLocationContextResponse
)
async def create_or_update_tenant_location_context(
request: TenantLocationContextCreateRequest,
tenant_id: str = Path(..., description="Tenant ID"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""Create or update tenant location context"""
try:
# Convert to UUID for use with repository
tenant_uuid = UUID(tenant_id)
repo = CalendarRepository(db)
# Validate calendar_id if provided
if request.school_calendar_id:
calendar = await repo.get_calendar_by_id(request.school_calendar_id)
if not calendar:
raise HTTPException(
status_code=400,
detail="Invalid school_calendar_id"
)
# Create or update context
context_obj = await repo.create_or_update_tenant_location_context(
tenant_id=tenant_uuid,
city_id=request.city_id,
school_calendar_id=request.school_calendar_id,
neighborhood=request.neighborhood,
local_events=request.local_events,
notes=request.notes
)
# Invalidate cache since context was updated
await cache.invalidate_tenant_context(tenant_id)
# Get full context with calendar details
context = await repo.get_tenant_with_calendar(tenant_uuid)
# Cache the new context
await cache.set_cached_tenant_context(tenant_id, context)
return TenantLocationContextResponse(**context)
except HTTPException:
raise
except Exception as e:
logger.error(
"Error creating/updating tenant location context",
tenant_id=str(tenant_id),
error=str(e)
)
raise HTTPException(
status_code=500,
detail=f"Error creating/updating location context: {str(e)}"
)
@router.delete(
route_builder.build_base_route("location-context"),
status_code=204
)
async def delete_tenant_location_context(
tenant_id: str = Path(..., description="Tenant ID"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""Delete tenant location context"""
try:
# Convert to UUID for use with repository
tenant_uuid = UUID(tenant_id)
repo = CalendarRepository(db)
deleted = await repo.delete_tenant_location_context(tenant_uuid)
if not deleted:
raise HTTPException(
status_code=404,
detail="Location context not found"
)
return None
except HTTPException:
raise
except Exception as e:
logger.error(
"Error deleting tenant location context",
tenant_id=str(tenant_id),
error=str(e)
)
raise HTTPException(
status_code=500,
detail=f"Error deleting location context: {str(e)}"
)
# ===== Calendar Suggestion Endpoint =====
@router.post(
route_builder.build_base_route("location-context/suggest-calendar")
)
async def suggest_calendar_for_tenant(
tenant_id: str = Path(..., description="Tenant ID"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""
Suggest an appropriate school calendar for a tenant based on location and POI data.
This endpoint analyzes:
- Tenant's city location
- Detected schools nearby (from POI detection)
- Available calendars for the city
- Bakery-specific heuristics (primary schools = stronger morning rush)
Returns a suggestion with confidence score and reasoning.
Does NOT automatically assign - requires admin approval.
"""
try:
from app.utils.calendar_suggester import CalendarSuggester
from app.repositories.poi_context_repository import POIContextRepository
tenant_uuid = UUID(tenant_id)
# Get tenant's location context
calendar_repo = CalendarRepository(db)
location_context = await calendar_repo.get_tenant_location_context(tenant_uuid)
if not location_context:
raise HTTPException(
status_code=404,
detail="Location context not found. Create location context first."
)
city_id = location_context.city_id
# Get available calendars for city
calendars_result = await calendar_repo.get_calendars_by_city(city_id, enabled_only=True)
calendars = calendars_result.get("calendars", []) if calendars_result else []
# Get POI context if available
poi_repo = POIContextRepository(db)
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
poi_data = poi_context.to_dict() if poi_context else None
# Generate suggestion
suggester = CalendarSuggester()
suggestion = suggester.suggest_calendar_for_tenant(
city_id=city_id,
available_calendars=calendars,
poi_context=poi_data,
tenant_data=None # Could include tenant info if needed
)
# Format for admin display
admin_message = suggester.format_suggestion_for_admin(suggestion)
logger.info(
"Calendar suggestion generated",
tenant_id=tenant_id,
city_id=city_id,
suggested_calendar=suggestion.get("suggested_calendar_id"),
confidence=suggestion.get("confidence")
)
return {
**suggestion,
"admin_message": admin_message,
"tenant_id": tenant_id,
"current_calendar_id": str(location_context.school_calendar_id) if location_context.school_calendar_id else None
}
except HTTPException:
raise
except Exception as e:
logger.error(
"Error generating calendar suggestion",
tenant_id=tenant_id,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Error generating calendar suggestion: {str(e)}"
)
# ===== Helper Endpoints =====
@router.get(
route_builder.build_operations_route("calendars/registry"),
response_model=List[SchoolCalendarResponse]
)
async def list_registry_calendars():
"""List all calendars from the CalendarRegistry (static configuration)"""
calendars = CalendarRegistry.get_enabled_calendars()
return [
SchoolCalendarResponse(
calendar_id=cal.calendar_id,
calendar_name=cal.calendar_name,
city_id=cal.city_id,
school_type=cal.school_type.value,
academic_year=cal.academic_year,
holiday_periods=[
{
"name": hp.name,
"start_date": hp.start_date,
"end_date": hp.end_date,
"description": hp.description
}
for hp in cal.holiday_periods
],
school_hours={
"morning_start": cal.school_hours.morning_start,
"morning_end": cal.school_hours.morning_end,
"has_afternoon_session": cal.school_hours.has_afternoon_session,
"afternoon_start": cal.school_hours.afternoon_start,
"afternoon_end": cal.school_hours.afternoon_end
},
source=cal.source,
enabled=cal.enabled
)
for cal in calendars
]

View File

@@ -0,0 +1,510 @@
# services/external/app/api/city_operations.py
"""
City Operations API - New endpoints for city-based data access
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from typing import List
from datetime import datetime
from uuid import UUID
import structlog
from app.schemas.city_data import CityInfoResponse, DataAvailabilityResponse
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse, WeatherForecastAPIResponse
from app.schemas.traffic import TrafficDataResponse
from app.registry.city_registry import CityRegistry
from app.registry.geolocation_mapper import GeolocationMapper
from app.repositories.city_data_repository import CityDataRepository
from app.cache.redis_wrapper import ExternalDataCache
from app.services.weather_service import WeatherService
from app.services.traffic_service import TrafficService
from app.services.tenant_deletion_service import ExternalTenantDeletionService
from shared.routing.route_builder import RouteBuilder
from shared.auth.decorators import get_current_user_dep
from shared.auth.access_control import service_only_access
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
route_builder = RouteBuilder('external')
router = APIRouter(tags=["city-operations"])
logger = structlog.get_logger()
@router.get(
route_builder.build_base_route("cities"),
response_model=List[CityInfoResponse]
)
async def list_supported_cities():
"""List all enabled cities with data availability"""
registry = CityRegistry()
cities = registry.get_enabled_cities()
return [
CityInfoResponse(
city_id=city.city_id,
name=city.name,
country=city.country.value,
latitude=city.latitude,
longitude=city.longitude,
radius_km=city.radius_km,
weather_provider=city.weather_provider.value,
traffic_provider=city.traffic_provider.value,
enabled=city.enabled
)
for city in cities
]
@router.get(
route_builder.build_operations_route("cities/{city_id}/availability"),
response_model=DataAvailabilityResponse
)
async def get_city_data_availability(
city_id: str = Path(..., description="City ID"),
db: AsyncSession = Depends(get_db)
):
"""Get data availability for a specific city"""
registry = CityRegistry()
city = registry.get_city(city_id)
if not city:
raise HTTPException(status_code=404, detail="City not found")
from sqlalchemy import text
weather_stmt = text(
"SELECT MIN(date), MAX(date), COUNT(*) FROM city_weather_data WHERE city_id = :city_id"
)
weather_result = await db.execute(weather_stmt, {"city_id": city_id})
weather_row = weather_result.fetchone()
weather_min, weather_max, weather_count = weather_row if weather_row else (None, None, 0)
traffic_stmt = text(
"SELECT MIN(date), MAX(date), COUNT(*) FROM city_traffic_data WHERE city_id = :city_id"
)
traffic_result = await db.execute(traffic_stmt, {"city_id": city_id})
traffic_row = traffic_result.fetchone()
traffic_min, traffic_max, traffic_count = traffic_row if traffic_row else (None, None, 0)
return DataAvailabilityResponse(
city_id=city_id,
city_name=city.name,
weather_available=weather_count > 0,
weather_start_date=weather_min.isoformat() if weather_min else None,
weather_end_date=weather_max.isoformat() if weather_max else None,
weather_record_count=weather_count or 0,
traffic_available=traffic_count > 0,
traffic_start_date=traffic_min.isoformat() if traffic_min else None,
traffic_end_date=traffic_max.isoformat() if traffic_max else None,
traffic_record_count=traffic_count or 0
)
@router.get(
route_builder.build_operations_route("historical-weather-optimized"),
response_model=List[WeatherDataResponse]
)
async def get_historical_weather_optimized(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
start_date: datetime = Query(..., description="Start date"),
end_date: datetime = Query(..., description="End date"),
db: AsyncSession = Depends(get_db)
):
"""
Get historical weather data using city-based cached data
This is the FAST endpoint for training service
"""
try:
mapper = GeolocationMapper()
mapping = mapper.map_tenant_to_city(latitude, longitude)
if not mapping:
raise HTTPException(
status_code=404,
detail="No supported city found for this location"
)
city, distance = mapping
logger.info(
"Fetching historical weather from cache",
tenant_id=tenant_id,
city=city.name,
distance_km=round(distance, 2)
)
cache = ExternalDataCache()
cached_data = await cache.get_cached_weather(
city.city_id, start_date, end_date
)
if cached_data:
logger.info("Weather cache hit", records=len(cached_data))
return cached_data
repo = CityDataRepository(db)
db_records = await repo.get_weather_by_city_and_range(
city.city_id, start_date, end_date
)
response_data = [
WeatherDataResponse(
id=str(record.id),
location_id=f"{city.city_id}_{record.date.date()}",
date=record.date,
temperature=record.temperature,
precipitation=record.precipitation,
humidity=record.humidity,
wind_speed=record.wind_speed,
pressure=record.pressure,
description=record.description,
source=record.source,
raw_data=None,
created_at=record.created_at,
updated_at=record.updated_at
)
for record in db_records
]
await cache.set_cached_weather(
city.city_id, start_date, end_date, response_data
)
logger.info(
"Historical weather data retrieved",
records=len(response_data),
source="database"
)
return response_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching historical weather", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
route_builder.build_operations_route("historical-traffic-optimized"),
response_model=List[TrafficDataResponse]
)
async def get_historical_traffic_optimized(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
start_date: datetime = Query(..., description="Start date"),
end_date: datetime = Query(..., description="End date"),
db: AsyncSession = Depends(get_db)
):
"""
Get historical traffic data using city-based cached data
This is the FAST endpoint for training service
"""
try:
mapper = GeolocationMapper()
mapping = mapper.map_tenant_to_city(latitude, longitude)
if not mapping:
raise HTTPException(
status_code=404,
detail="No supported city found for this location"
)
city, distance = mapping
logger.info(
"Fetching historical traffic from cache",
tenant_id=tenant_id,
city=city.name,
distance_km=round(distance, 2)
)
cache = ExternalDataCache()
cached_data = await cache.get_cached_traffic(
city.city_id, start_date, end_date
)
if cached_data:
logger.info("Traffic cache hit", records=len(cached_data))
return cached_data
logger.debug("Starting DB query for traffic", city_id=city.city_id)
repo = CityDataRepository(db)
db_records = await repo.get_traffic_by_city_and_range(
city.city_id, start_date, end_date
)
logger.debug("DB query completed", records=len(db_records))
logger.debug("Creating response objects")
response_data = [
TrafficDataResponse(
date=record.date,
traffic_volume=record.traffic_volume,
pedestrian_count=record.pedestrian_count,
congestion_level=record.congestion_level,
average_speed=record.average_speed,
source=record.source
)
for record in db_records
]
logger.debug("Response objects created", count=len(response_data))
logger.debug("Caching traffic data")
await cache.set_cached_traffic(
city.city_id, start_date, end_date, response_data
)
logger.debug("Caching completed")
logger.info(
"Historical traffic data retrieved",
records=len(response_data),
source="database"
)
return response_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching historical traffic", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
# ================================================================
# REAL-TIME & FORECAST ENDPOINTS
# ================================================================
@router.get(
route_builder.build_operations_route("weather/current"),
response_model=WeatherDataResponse
)
async def get_current_weather(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude")
):
"""
Get current weather for a location (real-time data from AEMET)
"""
try:
weather_service = WeatherService()
weather_data = await weather_service.get_current_weather(latitude, longitude)
if not weather_data:
raise HTTPException(
status_code=404,
detail="No weather data available for this location"
)
logger.info(
"Current weather retrieved",
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude
)
return weather_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching current weather", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
route_builder.build_operations_route("weather/forecast")
)
async def get_weather_forecast(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude"),
days: int = Query(7, ge=1, le=14, description="Number of days to forecast")
):
"""
Get weather forecast for a location (from AEMET)
Returns list of forecast objects with: forecast_date, generated_at, temperature, precipitation, humidity, wind_speed, description, source
"""
try:
weather_service = WeatherService()
forecast_data = await weather_service.get_weather_forecast(latitude, longitude, days)
if not forecast_data:
raise HTTPException(
status_code=404,
detail="No forecast data available for this location"
)
logger.info(
"Weather forecast retrieved",
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude,
days=days,
count=len(forecast_data)
)
return forecast_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching weather forecast", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
@router.get(
route_builder.build_operations_route("traffic/current"),
response_model=TrafficDataResponse
)
async def get_current_traffic(
tenant_id: UUID = Path(..., description="Tenant ID"),
latitude: float = Query(..., description="Latitude"),
longitude: float = Query(..., description="Longitude")
):
"""
Get current traffic conditions for a location (real-time data from Madrid OpenData)
"""
try:
traffic_service = TrafficService()
traffic_data = await traffic_service.get_current_traffic(latitude, longitude)
if not traffic_data:
raise HTTPException(
status_code=404,
detail="No traffic data available for this location"
)
logger.info(
"Current traffic retrieved",
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude
)
return traffic_data
except HTTPException:
raise
except Exception as e:
logger.error("Error fetching current traffic", error=str(e))
raise HTTPException(status_code=500, detail="Internal server error")
# ============================================================================
# Tenant Data Deletion Operations (Internal Service Only)
# ============================================================================
@router.delete(
route_builder.build_base_route("tenant/{tenant_id}", include_tenant_prefix=False),
response_model=dict
)
@service_only_access
async def delete_tenant_data(
tenant_id: str = Path(..., description="Tenant ID to delete data for"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""
Delete tenant-specific external data (Internal service only)
IMPORTANT NOTE:
The External service primarily stores SHARED city-wide data that is used
by ALL tenants. This endpoint only deletes tenant-specific data:
- Tenant-specific audit logs
- Tenant-specific weather data (if any)
City-wide data (CityWeatherData, CityTrafficData, TrafficData, etc.)
is intentionally PRESERVED as it's shared across all tenants.
**WARNING**: This operation is irreversible!
Returns:
Deletion summary with counts of deleted records and note about preserved data
"""
try:
logger.info("external.tenant_deletion.api_called", tenant_id=tenant_id)
deletion_service = ExternalTenantDeletionService(db)
result = await deletion_service.safe_delete_tenant_data(tenant_id)
if not result.success:
raise HTTPException(
status_code=500,
detail=f"Tenant data deletion failed: {', '.join(result.errors)}"
)
return {
"message": "Tenant-specific data deletion completed successfully",
"note": "City-wide shared data (weather, traffic) has been preserved",
"summary": result.to_dict()
}
except HTTPException:
raise
except Exception as e:
logger.error("external.tenant_deletion.api_error",
tenant_id=tenant_id,
error=str(e),
exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Failed to delete tenant data: {str(e)}"
)
@router.get(
route_builder.build_base_route("tenant/{tenant_id}/deletion-preview", include_tenant_prefix=False),
response_model=dict
)
@service_only_access
async def preview_tenant_data_deletion(
tenant_id: str = Path(..., description="Tenant ID to preview deletion for"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db)
):
"""
Preview what tenant-specific data would be deleted (dry-run)
This shows counts of tenant-specific data only. City-wide shared data
(CityWeatherData, CityTrafficData, TrafficData, etc.) will NOT be deleted.
Returns:
Dictionary with entity names and their counts
"""
try:
logger.info("external.tenant_deletion.preview_called", tenant_id=tenant_id)
deletion_service = ExternalTenantDeletionService(db)
preview = await deletion_service.get_tenant_data_preview(tenant_id)
total_records = sum(v for k, v in preview.items() if not k.startswith("_"))
return {
"tenant_id": tenant_id,
"service": "external",
"preview": preview,
"total_records": total_records,
"note": "City-wide data (weather, traffic) is shared and will NOT be deleted",
"preserved_data": [
"CityWeatherData (city-wide)",
"CityTrafficData (city-wide)",
"TrafficData (city-wide)",
"TrafficMeasurementPoint (reference data)",
"WeatherForecast (city-wide)"
],
"warning": "Only tenant-specific records will be permanently deleted"
}
except Exception as e:
logger.error("external.tenant_deletion.preview_error",
tenant_id=tenant_id,
error=str(e),
exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Failed to preview tenant data deletion: {str(e)}"
)

302
services/external/app/api/geocoding.py vendored Normal file
View File

@@ -0,0 +1,302 @@
"""
Geocoding API Endpoints
Provides address search, autocomplete, and geocoding via Nominatim.
"""
from fastapi import APIRouter, Query, HTTPException
from typing import List, Optional
from pydantic import BaseModel, Field
import structlog
from app.services.nominatim_service import NominatimService
logger = structlog.get_logger()
router = APIRouter(prefix="/api/v1/geocoding", tags=["Geocoding"])
# Initialize Nominatim service
# In production, override with environment variable for self-hosted instance
nominatim_service = NominatimService()
# Response Models
class AddressResult(BaseModel):
"""Address search result"""
display_name: str = Field(..., description="Full formatted address")
lat: float = Field(..., description="Latitude")
lon: float = Field(..., description="Longitude")
osm_type: str = Field(..., description="OSM object type")
osm_id: int = Field(..., description="OSM object ID")
place_id: int = Field(..., description="Nominatim place ID")
type: str = Field(..., description="Place type")
class_: str = Field(..., alias="class", description="OSM class")
address: dict = Field(..., description="Parsed address components")
boundingbox: List[str] = Field(..., description="Bounding box coordinates")
class GeocodeResult(BaseModel):
"""Geocoding result"""
display_name: str = Field(..., description="Full formatted address")
lat: float = Field(..., description="Latitude")
lon: float = Field(..., description="Longitude")
address: dict = Field(..., description="Parsed address components")
class CoordinateValidation(BaseModel):
"""Coordinate validation result"""
valid: bool = Field(..., description="Whether coordinates are valid")
address: Optional[str] = Field(None, description="Address at coordinates if valid")
# Endpoints
@router.get(
"/search",
response_model=List[AddressResult],
summary="Search for addresses",
description="Search for addresses matching query (autocomplete). Minimum 3 characters required."
)
async def search_addresses(
q: str = Query(..., min_length=3, description="Search query (minimum 3 characters)"),
country_code: str = Query("es", description="ISO country code to restrict search"),
limit: int = Query(10, ge=1, le=50, description="Maximum number of results")
):
"""
Search for addresses matching the query.
This endpoint provides autocomplete functionality for address input.
Results are restricted to the specified country and sorted by relevance.
Example:
GET /api/v1/geocoding/search?q=Gran%20Via%20Madrid&limit=5
"""
try:
results = await nominatim_service.search_address(
query=q,
country_code=country_code,
limit=limit
)
logger.info(
"Address search request",
query=q,
country=country_code,
result_count=len(results)
)
return results
except Exception as e:
logger.error(
"Address search failed",
query=q,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Address search failed: {str(e)}"
)
@router.get(
"/geocode",
response_model=GeocodeResult,
summary="Geocode an address",
description="Convert an address string to coordinates (lat/lon)"
)
async def geocode_address(
address: str = Query(..., min_length=5, description="Full address to geocode"),
country_code: str = Query("es", description="ISO country code")
):
"""
Geocode an address to get coordinates.
Returns the best matching location for the given address.
Example:
GET /api/v1/geocoding/geocode?address=Gran%20Via%2028,%20Madrid
"""
try:
result = await nominatim_service.geocode_address(
address=address,
country_code=country_code
)
if not result:
raise HTTPException(
status_code=404,
detail=f"Address not found: {address}"
)
logger.info(
"Geocoding request",
address=address,
lat=result["lat"],
lon=result["lon"]
)
return result
except HTTPException:
raise
except Exception as e:
logger.error(
"Geocoding failed",
address=address,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Geocoding failed: {str(e)}"
)
@router.get(
"/reverse",
response_model=GeocodeResult,
summary="Reverse geocode coordinates",
description="Convert coordinates (lat/lon) to an address"
)
async def reverse_geocode(
lat: float = Query(..., ge=-90, le=90, description="Latitude"),
lon: float = Query(..., ge=-180, le=180, description="Longitude")
):
"""
Reverse geocode coordinates to get address.
Returns the address at the specified coordinates.
Example:
GET /api/v1/geocoding/reverse?lat=40.4168&lon=-3.7038
"""
try:
result = await nominatim_service.reverse_geocode(
latitude=lat,
longitude=lon
)
if not result:
raise HTTPException(
status_code=404,
detail=f"No address found at coordinates: {lat}, {lon}"
)
logger.info(
"Reverse geocoding request",
lat=lat,
lon=lon,
address=result["display_name"]
)
return result
except HTTPException:
raise
except Exception as e:
logger.error(
"Reverse geocoding failed",
lat=lat,
lon=lon,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Reverse geocoding failed: {str(e)}"
)
@router.get(
"/validate",
response_model=CoordinateValidation,
summary="Validate coordinates",
description="Check if coordinates point to a valid location"
)
async def validate_coordinates(
lat: float = Query(..., ge=-90, le=90, description="Latitude"),
lon: float = Query(..., ge=-180, le=180, description="Longitude")
):
"""
Validate that coordinates point to a real location.
Returns validation result with address if valid.
Example:
GET /api/v1/geocoding/validate?lat=40.4168&lon=-3.7038
"""
try:
is_valid = await nominatim_service.validate_coordinates(
latitude=lat,
longitude=lon
)
result = {"valid": is_valid, "address": None}
if is_valid:
geocode_result = await nominatim_service.reverse_geocode(lat, lon)
if geocode_result:
result["address"] = geocode_result["display_name"]
logger.info(
"Coordinate validation request",
lat=lat,
lon=lon,
valid=is_valid
)
return result
except Exception as e:
logger.error(
"Coordinate validation failed",
lat=lat,
lon=lon,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Coordinate validation failed: {str(e)}"
)
@router.get(
"/health",
summary="Check geocoding service health",
description="Check if Nominatim service is accessible"
)
async def health_check():
"""
Check if Nominatim service is accessible.
Returns service health status.
"""
try:
is_healthy = await nominatim_service.health_check()
if not is_healthy:
raise HTTPException(
status_code=503,
detail="Nominatim service is unavailable"
)
return {
"status": "healthy",
"service": "nominatim",
"base_url": nominatim_service.base_url,
"is_public_api": nominatim_service.is_public_api
}
except HTTPException:
raise
except Exception as e:
logger.error(
"Health check failed",
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=503,
detail=f"Health check failed: {str(e)}"
)

532
services/external/app/api/poi_context.py vendored Normal file
View File

@@ -0,0 +1,532 @@
"""
POI Context API Endpoints
REST API for POI detection, retrieval, and management.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from typing import Optional
import structlog
import uuid
from app.core.database import get_db
from app.services.poi_detection_service import POIDetectionService
from app.services.poi_feature_selector import POIFeatureSelector
from app.services.competitor_analyzer import CompetitorAnalyzer
from app.services.poi_refresh_service import POIRefreshService
from app.repositories.poi_context_repository import POIContextRepository
from app.cache.poi_cache_service import POICacheService
from app.core.redis_client import get_redis_client
from shared.routing.route_builder import RouteBuilder
logger = structlog.get_logger()
route_builder = RouteBuilder('external')
router = APIRouter(tags=["POI Context"])
@router.post(
route_builder.build_base_route("poi-context/detect")
)
async def detect_pois_for_tenant(
tenant_id: str,
latitude: float = Query(..., description="Bakery latitude"),
longitude: float = Query(..., description="Bakery longitude"),
force_refresh: bool = Query(False, description="Force refresh, skip cache"),
db: AsyncSession = Depends(get_db)
):
"""
Detect POIs for a tenant's bakery location.
Performs automated POI detection using Overpass API, calculates ML features,
and stores results for demand forecasting.
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
logger.info(
"POI detection requested",
tenant_id=tenant_id,
location=(latitude, longitude),
force_refresh=force_refresh
)
try:
# Initialize services
poi_service = POIDetectionService()
feature_selector = POIFeatureSelector()
competitor_analyzer = CompetitorAnalyzer()
poi_repo = POIContextRepository(db)
redis_client = await get_redis_client()
cache_service = POICacheService(redis_client)
# Check cache first (unless force refresh)
if not force_refresh:
cached_result = await cache_service.get_cached_pois(latitude, longitude)
if cached_result:
logger.info("Using cached POI results", tenant_id=tenant_id)
# Still save to database for this tenant
poi_context = await poi_repo.create_or_update(tenant_uuid, cached_result)
return {
"status": "success",
"source": "cache",
"poi_context": poi_context.to_dict()
}
# Detect POIs
poi_results = await poi_service.detect_pois_for_bakery(
latitude, longitude, tenant_id
)
# Select relevant features
try:
feature_selection = feature_selector.select_relevant_features(
poi_results["poi_categories"],
tenant_id
)
except Exception as e:
logger.error(
"Feature selection failed",
tenant_id=tenant_id,
error=str(e),
exc_info=True
)
# Provide default feature selection to continue
feature_selection = {
"features": {},
"relevant_categories": [],
"relevance_report": [],
"total_features": 0,
"total_relevant_categories": 0
}
# Analyze competitors specifically
try:
competitors_data = poi_results["poi_categories"].get("competitors", {})
competitor_pois = competitors_data.get("pois", [])
competitor_analysis = competitor_analyzer.analyze_competitive_landscape(
competitor_pois,
(latitude, longitude),
tenant_id
)
except Exception as e:
logger.error(
"Competitor analysis failed",
tenant_id=tenant_id,
error=str(e),
exc_info=True
)
# Provide default competitor analysis to continue
competitor_analysis = {
"competitive_pressure_score": 0.0,
"direct_competitors_count": 0,
"nearby_competitors_count": 0,
"market_competitors_count": 0,
"total_competitors_count": 0,
"competitive_zone": "low_competition",
"market_type": "underserved",
"competitive_advantage": "first_mover",
"ml_feature_competitive_pressure": 0.0,
"ml_feature_has_direct_competitor": 0,
"ml_feature_competitor_density_500m": 0,
"competitor_details": [],
"nearest_competitor": None
}
# Generate competitive insights
try:
competitive_insights = competitor_analyzer.get_competitive_insights(
competitor_analysis
)
except Exception as e:
logger.warning(
"Failed to generate competitive insights",
tenant_id=tenant_id,
error=str(e)
)
competitive_insights = []
# Combine results
enhanced_results = {
**poi_results,
"ml_features": feature_selection.get("features", {}),
"relevant_categories": feature_selection.get("relevant_categories", []),
"relevance_report": feature_selection.get("relevance_report", []),
"competitor_analysis": competitor_analysis,
"competitive_insights": competitive_insights
}
# Cache results
try:
await cache_service.cache_poi_results(latitude, longitude, enhanced_results)
except Exception as e:
logger.warning(
"Failed to cache POI results",
tenant_id=tenant_id,
error=str(e)
)
# Save to database
try:
poi_context = await poi_repo.create_or_update(tenant_uuid, enhanced_results)
except Exception as e:
logger.error(
"Failed to save POI context to database",
tenant_id=tenant_id,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to save POI context: {str(e)}"
)
# Schedule automatic refresh job (180 days from now)
try:
poi_refresh_service = POIRefreshService()
refresh_job = await poi_refresh_service.schedule_refresh_job(
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude,
session=db
)
logger.info(
"POI refresh job scheduled",
tenant_id=tenant_id,
job_id=str(refresh_job.id),
scheduled_at=refresh_job.scheduled_at
)
except Exception as e:
logger.warning(
"Failed to schedule POI refresh job",
tenant_id=tenant_id,
error=str(e)
)
logger.info(
"POI detection completed",
tenant_id=tenant_id,
total_pois=poi_context.total_pois_detected,
relevant_categories=len(feature_selection.get("relevant_categories", []))
)
# Phase 3: Auto-trigger calendar suggestion after POI detection
# This helps admins by providing intelligent calendar recommendations
calendar_suggestion = None
try:
from app.utils.calendar_suggester import CalendarSuggester
from app.repositories.calendar_repository import CalendarRepository
# Get tenant's location context
calendar_repo = CalendarRepository(db)
location_context = await calendar_repo.get_tenant_location_context(tenant_uuid)
if location_context and location_context.school_calendar_id is None:
# Only suggest if no calendar assigned yet
city_id = location_context.city_id
# Get available calendars for city
calendars_result = await calendar_repo.get_calendars_by_city(city_id, enabled_only=True)
calendars = calendars_result.get("calendars", []) if calendars_result else []
if calendars:
# Generate suggestion using POI data
suggester = CalendarSuggester()
calendar_suggestion = suggester.suggest_calendar_for_tenant(
city_id=city_id,
available_calendars=calendars,
poi_context=poi_context.to_dict(),
tenant_data=None
)
logger.info(
"Calendar suggestion auto-generated after POI detection",
tenant_id=tenant_id,
suggested_calendar=calendar_suggestion.get("calendar_name"),
confidence=calendar_suggestion.get("confidence_percentage"),
should_auto_assign=calendar_suggestion.get("should_auto_assign")
)
# TODO: Send notification to admin about available suggestion
# This will be implemented when notification service is integrated
else:
logger.info(
"No calendars available for city, skipping suggestion",
tenant_id=tenant_id,
city_id=city_id
)
elif location_context and location_context.school_calendar_id:
logger.info(
"Calendar already assigned, skipping suggestion",
tenant_id=tenant_id,
calendar_id=str(location_context.school_calendar_id)
)
else:
logger.warning(
"No location context found, skipping calendar suggestion",
tenant_id=tenant_id
)
except Exception as e:
# Non-blocking: POI detection should succeed even if suggestion fails
logger.warning(
"Failed to auto-generate calendar suggestion (non-blocking)",
tenant_id=tenant_id,
error=str(e)
)
return {
"status": "success",
"source": "detection",
"poi_context": poi_context.to_dict(),
"feature_selection": feature_selection,
"competitor_analysis": competitor_analysis,
"competitive_insights": competitive_insights,
"calendar_suggestion": calendar_suggestion # Include suggestion in response
}
except Exception as e:
logger.error(
"POI detection failed",
tenant_id=tenant_id,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"POI detection failed: {str(e)}"
)
@router.get(
route_builder.build_base_route("poi-context")
)
async def get_poi_context(
tenant_id: str,
db: AsyncSession = Depends(get_db)
):
"""
Get POI context for a tenant.
Returns stored POI detection results and ML features.
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
poi_repo = POIContextRepository(db)
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
if not poi_context:
raise HTTPException(
status_code=404,
detail=f"POI context not found for tenant {tenant_id}"
)
# Check if stale
is_stale = poi_context.is_stale()
return {
"poi_context": poi_context.to_dict(),
"is_stale": is_stale,
"needs_refresh": is_stale
}
@router.post(
route_builder.build_base_route("poi-context/refresh")
)
async def refresh_poi_context(
tenant_id: str,
db: AsyncSession = Depends(get_db)
):
"""
Refresh POI context for a tenant.
Re-detects POIs and updates stored data.
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
poi_repo = POIContextRepository(db)
existing_context = await poi_repo.get_by_tenant_id(tenant_uuid)
if not existing_context:
raise HTTPException(
status_code=404,
detail=f"POI context not found for tenant {tenant_id}. Use detect endpoint first."
)
# Perform detection with force_refresh=True
return await detect_pois_for_tenant(
tenant_id=tenant_id,
latitude=existing_context.latitude,
longitude=existing_context.longitude,
force_refresh=True,
db=db
)
@router.delete(
route_builder.build_base_route("poi-context")
)
async def delete_poi_context(
tenant_id: str,
db: AsyncSession = Depends(get_db)
):
"""
Delete POI context for a tenant.
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
poi_repo = POIContextRepository(db)
deleted = await poi_repo.delete_by_tenant_id(tenant_uuid)
if not deleted:
raise HTTPException(
status_code=404,
detail=f"POI context not found for tenant {tenant_id}"
)
return {
"status": "success",
"message": f"POI context deleted for tenant {tenant_id}"
}
@router.get(
route_builder.build_base_route("poi-context/feature-importance")
)
async def get_feature_importance(
tenant_id: str,
db: AsyncSession = Depends(get_db)
):
"""
Get feature importance summary for tenant's POI context.
Shows which POI categories are relevant and their impact scores.
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
poi_repo = POIContextRepository(db)
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
if not poi_context:
raise HTTPException(
status_code=404,
detail=f"POI context not found for tenant {tenant_id}"
)
feature_selector = POIFeatureSelector()
importance_summary = feature_selector.get_feature_importance_summary(
poi_context.poi_detection_results
)
return {
"tenant_id": tenant_id,
"feature_importance": importance_summary,
"total_categories": len(importance_summary),
"relevant_categories": sum(1 for cat in importance_summary if cat["is_relevant"])
}
@router.get(
route_builder.build_base_route("poi-context/competitor-analysis")
)
async def get_competitor_analysis(
tenant_id: str,
db: AsyncSession = Depends(get_db)
):
"""
Get detailed competitor analysis for tenant location.
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
poi_repo = POIContextRepository(db)
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
if not poi_context:
raise HTTPException(
status_code=404,
detail=f"POI context not found for tenant {tenant_id}"
)
competitor_analyzer = CompetitorAnalyzer()
competitors = poi_context.poi_detection_results.get("competitors", {}).get("pois", [])
analysis = competitor_analyzer.analyze_competitive_landscape(
competitors,
(poi_context.latitude, poi_context.longitude),
tenant_id
)
insights = competitor_analyzer.get_competitive_insights(analysis)
return {
"tenant_id": tenant_id,
"location": {
"latitude": poi_context.latitude,
"longitude": poi_context.longitude
},
"competitor_analysis": analysis,
"insights": insights
}
@router.get("/health")
async def poi_health_check():
"""
Check POI detection service health.
Verifies Overpass API accessibility.
"""
poi_service = POIDetectionService()
health = await poi_service.health_check()
if not health["healthy"]:
raise HTTPException(
status_code=503,
detail=f"POI detection service unhealthy: {health.get('error', 'Unknown error')}"
)
return {
"status": "healthy",
"overpass_api": health
}
@router.get("/cache/stats")
async def get_cache_stats():
"""
Get POI cache statistics.
"""
try:
redis_client = await get_redis_client()
cache_service = POICacheService(redis_client)
stats = await cache_service.get_cache_stats()
return {
"status": "success",
"cache_stats": stats
}
except Exception as e:
logger.error("Failed to get cache stats", error=str(e))
raise HTTPException(
status_code=500,
detail=f"Failed to get cache stats: {str(e)}"
)

View File

@@ -0,0 +1,441 @@
"""
POI Refresh Jobs API Endpoints
REST API for managing POI refresh background jobs.
"""
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, desc
from typing import List, Optional
from datetime import datetime, timezone
from pydantic import BaseModel, Field
import structlog
import uuid
from app.core.database import get_db
from app.services.poi_refresh_service import POIRefreshService
from app.services.poi_scheduler import get_scheduler
from app.models.poi_refresh_job import POIRefreshJob
logger = structlog.get_logger()
router = APIRouter(prefix="/poi-refresh-jobs", tags=["POI Refresh Jobs"])
# Response Models
class POIRefreshJobResponse(BaseModel):
"""POI refresh job response"""
id: str
tenant_id: str
status: str
scheduled_at: datetime
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
attempt_count: int
max_attempts: int
pois_detected: Optional[int] = None
changes_detected: bool = False
change_summary: Optional[dict] = None
error_message: Optional[str] = None
next_scheduled_at: Optional[datetime] = None
duration_seconds: Optional[float] = None
is_overdue: bool
can_retry: bool
class Config:
from_attributes = True
class ScheduleJobRequest(BaseModel):
"""Schedule POI refresh job request"""
tenant_id: str = Field(..., description="Tenant UUID")
latitude: float = Field(..., ge=-90, le=90, description="Bakery latitude")
longitude: float = Field(..., ge=-180, le=180, description="Bakery longitude")
scheduled_at: Optional[datetime] = Field(None, description="When to run (default: 180 days from now)")
class JobExecutionResult(BaseModel):
"""Job execution result"""
status: str
job_id: str
message: Optional[str] = None
pois_detected: Optional[int] = None
changes_detected: Optional[bool] = None
change_summary: Optional[dict] = None
duration_seconds: Optional[float] = None
next_scheduled_at: Optional[str] = None
error: Optional[str] = None
attempt: Optional[int] = None
can_retry: Optional[bool] = None
# Endpoints
@router.post(
"/schedule",
response_model=POIRefreshJobResponse,
summary="Schedule POI refresh job",
description="Schedule a background job to refresh POI context for a tenant"
)
async def schedule_refresh_job(
request: ScheduleJobRequest,
db: AsyncSession = Depends(get_db)
):
"""
Schedule a POI refresh job for a tenant.
Creates a background job that will detect POIs for the tenant's location
at the scheduled time. Default schedule is 180 days from now.
"""
try:
tenant_uuid = uuid.UUID(request.tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
try:
poi_refresh_service = POIRefreshService()
job = await poi_refresh_service.schedule_refresh_job(
tenant_id=request.tenant_id,
latitude=request.latitude,
longitude=request.longitude,
scheduled_at=request.scheduled_at,
session=db
)
logger.info(
"POI refresh job scheduled via API",
tenant_id=request.tenant_id,
job_id=str(job.id),
scheduled_at=job.scheduled_at
)
return POIRefreshJobResponse(
id=str(job.id),
tenant_id=str(job.tenant_id),
status=job.status,
scheduled_at=job.scheduled_at,
started_at=job.started_at,
completed_at=job.completed_at,
attempt_count=job.attempt_count,
max_attempts=job.max_attempts,
pois_detected=job.pois_detected,
changes_detected=job.changes_detected,
change_summary=job.change_summary,
error_message=job.error_message,
next_scheduled_at=job.next_scheduled_at,
duration_seconds=job.duration_seconds,
is_overdue=job.is_overdue,
can_retry=job.can_retry
)
except Exception as e:
logger.error(
"Failed to schedule POI refresh job",
tenant_id=request.tenant_id,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to schedule refresh job: {str(e)}"
)
@router.get(
"/{job_id}",
response_model=POIRefreshJobResponse,
summary="Get refresh job by ID",
description="Retrieve details of a specific POI refresh job"
)
async def get_refresh_job(
job_id: str,
db: AsyncSession = Depends(get_db)
):
"""Get POI refresh job by ID"""
try:
job_uuid = uuid.UUID(job_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid job_id format")
result = await db.execute(
select(POIRefreshJob).where(POIRefreshJob.id == job_uuid)
)
job = result.scalar_one_or_none()
if not job:
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
return POIRefreshJobResponse(
id=str(job.id),
tenant_id=str(job.tenant_id),
status=job.status,
scheduled_at=job.scheduled_at,
started_at=job.started_at,
completed_at=job.completed_at,
attempt_count=job.attempt_count,
max_attempts=job.max_attempts,
pois_detected=job.pois_detected,
changes_detected=job.changes_detected,
change_summary=job.change_summary,
error_message=job.error_message,
next_scheduled_at=job.next_scheduled_at,
duration_seconds=job.duration_seconds,
is_overdue=job.is_overdue,
can_retry=job.can_retry
)
@router.get(
"/tenant/{tenant_id}",
response_model=List[POIRefreshJobResponse],
summary="Get refresh jobs for tenant",
description="Retrieve all POI refresh jobs for a specific tenant"
)
async def get_tenant_refresh_jobs(
tenant_id: str,
status: Optional[str] = Query(None, description="Filter by status"),
limit: int = Query(50, ge=1, le=200, description="Maximum number of results"),
db: AsyncSession = Depends(get_db)
):
"""Get all POI refresh jobs for a tenant"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
query = select(POIRefreshJob).where(POIRefreshJob.tenant_id == tenant_uuid)
if status:
query = query.where(POIRefreshJob.status == status)
query = query.order_by(desc(POIRefreshJob.scheduled_at)).limit(limit)
result = await db.execute(query)
jobs = result.scalars().all()
return [
POIRefreshJobResponse(
id=str(job.id),
tenant_id=str(job.tenant_id),
status=job.status,
scheduled_at=job.scheduled_at,
started_at=job.started_at,
completed_at=job.completed_at,
attempt_count=job.attempt_count,
max_attempts=job.max_attempts,
pois_detected=job.pois_detected,
changes_detected=job.changes_detected,
change_summary=job.change_summary,
error_message=job.error_message,
next_scheduled_at=job.next_scheduled_at,
duration_seconds=job.duration_seconds,
is_overdue=job.is_overdue,
can_retry=job.can_retry
)
for job in jobs
]
@router.post(
"/{job_id}/execute",
response_model=JobExecutionResult,
summary="Execute refresh job",
description="Manually trigger execution of a pending POI refresh job"
)
async def execute_refresh_job(
job_id: str,
db: AsyncSession = Depends(get_db)
):
"""Manually execute a POI refresh job"""
try:
job_uuid = uuid.UUID(job_id)
except ValueError:
raise HTTPException(status_code=400, detail="Invalid job_id format")
try:
poi_refresh_service = POIRefreshService()
result = await poi_refresh_service.execute_refresh_job(
job_id=job_id,
session=db
)
logger.info(
"POI refresh job executed via API",
job_id=job_id,
status=result["status"]
)
return JobExecutionResult(**result)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
logger.error(
"Failed to execute POI refresh job",
job_id=job_id,
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to execute refresh job: {str(e)}"
)
@router.post(
"/process-pending",
summary="Process all pending jobs",
description="Manually trigger processing of all pending POI refresh jobs"
)
async def process_pending_jobs(
max_concurrent: int = Query(5, ge=1, le=20, description="Max concurrent executions"),
db: AsyncSession = Depends(get_db)
):
"""Process all pending POI refresh jobs"""
try:
poi_refresh_service = POIRefreshService()
result = await poi_refresh_service.process_pending_jobs(
max_concurrent=max_concurrent,
session=db
)
logger.info(
"Pending POI refresh jobs processed via API",
total_jobs=result["total_jobs"],
successful=result["successful"],
failed=result["failed"]
)
return result
except Exception as e:
logger.error(
"Failed to process pending POI refresh jobs",
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to process pending jobs: {str(e)}"
)
@router.get(
"/pending",
response_model=List[POIRefreshJobResponse],
summary="Get pending jobs",
description="Retrieve all pending POI refresh jobs that are due for execution"
)
async def get_pending_jobs(
limit: int = Query(100, ge=1, le=500, description="Maximum number of results"),
db: AsyncSession = Depends(get_db)
):
"""Get all pending POI refresh jobs"""
try:
poi_refresh_service = POIRefreshService()
jobs = await poi_refresh_service.get_pending_jobs(
limit=limit,
session=db
)
return [
POIRefreshJobResponse(
id=str(job.id),
tenant_id=str(job.tenant_id),
status=job.status,
scheduled_at=job.scheduled_at,
started_at=job.started_at,
completed_at=job.completed_at,
attempt_count=job.attempt_count,
max_attempts=job.max_attempts,
pois_detected=job.pois_detected,
changes_detected=job.changes_detected,
change_summary=job.change_summary,
error_message=job.error_message,
next_scheduled_at=job.next_scheduled_at,
duration_seconds=job.duration_seconds,
is_overdue=job.is_overdue,
can_retry=job.can_retry
)
for job in jobs
]
except Exception as e:
logger.error(
"Failed to get pending POI refresh jobs",
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to get pending jobs: {str(e)}"
)
@router.post(
"/trigger-scheduler",
summary="Trigger scheduler immediately",
description="Trigger an immediate check for pending jobs (bypasses schedule)"
)
async def trigger_scheduler():
"""Trigger POI refresh scheduler immediately"""
try:
scheduler = get_scheduler()
if not scheduler.is_running:
raise HTTPException(
status_code=503,
detail="POI refresh scheduler is not running"
)
result = await scheduler.trigger_immediate_check()
logger.info(
"POI refresh scheduler triggered via API",
total_jobs=result["total_jobs"],
successful=result["successful"],
failed=result["failed"]
)
return result
except HTTPException:
raise
except Exception as e:
logger.error(
"Failed to trigger POI refresh scheduler",
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to trigger scheduler: {str(e)}"
)
@router.get(
"/scheduler/status",
summary="Get scheduler status",
description="Check if POI refresh scheduler is running"
)
async def get_scheduler_status():
"""Get POI refresh scheduler status"""
try:
scheduler = get_scheduler()
return {
"is_running": scheduler.is_running,
"check_interval_seconds": scheduler.check_interval_seconds,
"max_concurrent_jobs": scheduler.max_concurrent_jobs
}
except Exception as e:
logger.error(
"Failed to get scheduler status",
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to get scheduler status: {str(e)}"
)

View File

@@ -0,0 +1,129 @@
# services/external/app/api/traffic_data.py
"""
Traffic Data API - Atomic CRUD operations on TrafficData model
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from typing import List, Optional
from datetime import date
from uuid import UUID
import structlog
from app.schemas.traffic import TrafficDataResponse
from app.services.traffic_service import TrafficService
from shared.routing.route_builder import RouteBuilder
from shared.auth.decorators import get_current_user_dep
from shared.auth.access_control import analytics_tier_required
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
route_builder = RouteBuilder('external')
router = APIRouter(tags=["traffic-data"])
logger = structlog.get_logger()
def get_traffic_service():
"""Dependency injection for TrafficService"""
return TrafficService()
@router.get(
route_builder.build_base_route("traffic-data"),
response_model=List[TrafficDataResponse]
)
@analytics_tier_required
async def list_traffic_data(
tenant_id: UUID = Path(..., description="Tenant ID"),
start_date: Optional[date] = Query(None),
end_date: Optional[date] = Query(None),
latitude: Optional[float] = Query(None),
longitude: Optional[float] = Query(None),
limit: int = Query(100, ge=1, le=1000),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db),
traffic_service: TrafficService = Depends(get_traffic_service)
):
"""List stored traffic data records (Professional+ tier required)"""
try:
logger.info("Listing traffic data", tenant_id=tenant_id)
traffic_records = await traffic_service.get_stored_traffic_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
latitude=latitude,
longitude=longitude,
limit=limit,
db=db
)
return traffic_records
except Exception as e:
logger.error("Failed to list traffic data", error=str(e), tenant_id=tenant_id)
raise HTTPException(status_code=500, detail="Failed to retrieve traffic data")
@router.get(
route_builder.build_resource_detail_route("traffic-data", "traffic_id"),
response_model=TrafficDataResponse
)
@analytics_tier_required
async def get_traffic_data(
tenant_id: UUID = Path(..., description="Tenant ID"),
traffic_id: UUID = Path(..., description="Traffic data ID"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db),
traffic_service: TrafficService = Depends(get_traffic_service)
):
"""Get a specific traffic data record"""
try:
logger.info("Getting traffic data", tenant_id=tenant_id, traffic_id=traffic_id)
traffic_record = await traffic_service.get_traffic_data_by_id(
tenant_id=tenant_id,
traffic_id=traffic_id,
db=db
)
if not traffic_record:
raise HTTPException(status_code=404, detail="Traffic data not found")
return traffic_record
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get traffic data", error=str(e), tenant_id=tenant_id)
raise HTTPException(status_code=500, detail="Failed to retrieve traffic data")
@router.delete(
route_builder.build_resource_detail_route("traffic-data", "traffic_id")
)
async def delete_traffic_data(
tenant_id: UUID = Path(..., description="Tenant ID"),
traffic_id: UUID = Path(..., description="Traffic data ID"),
db: AsyncSession = Depends(get_db),
traffic_service: TrafficService = Depends(get_traffic_service)
):
"""Delete a traffic data record"""
try:
logger.info("Deleting traffic data", tenant_id=tenant_id, traffic_id=traffic_id)
success = await traffic_service.delete_traffic_data(
tenant_id=tenant_id,
traffic_id=traffic_id,
db=db
)
if not success:
raise HTTPException(status_code=404, detail="Traffic data not found")
return {"message": "Traffic data deleted successfully"}
except HTTPException:
raise
except Exception as e:
logger.error("Failed to delete traffic data", error=str(e), tenant_id=tenant_id)
raise HTTPException(status_code=500, detail="Failed to delete traffic data")

View File

@@ -0,0 +1,129 @@
# services/external/app/api/weather_data.py
"""
Weather Data API - Atomic CRUD operations on WeatherData model
"""
from fastapi import APIRouter, Depends, HTTPException, Query, Path
from typing import List, Optional
from datetime import date
from uuid import UUID
import structlog
from app.schemas.weather import WeatherDataResponse
from app.services.weather_service import WeatherService
from shared.routing.route_builder import RouteBuilder
from shared.auth.decorators import get_current_user_dep
from shared.auth.access_control import analytics_tier_required
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
route_builder = RouteBuilder('external')
router = APIRouter(tags=["weather-data"])
logger = structlog.get_logger()
def get_weather_service():
"""Dependency injection for WeatherService"""
return WeatherService()
@router.get(
route_builder.build_base_route("weather-data"),
response_model=List[WeatherDataResponse]
)
@analytics_tier_required
async def list_weather_data(
tenant_id: UUID = Path(..., description="Tenant ID"),
start_date: Optional[date] = Query(None),
end_date: Optional[date] = Query(None),
latitude: Optional[float] = Query(None),
longitude: Optional[float] = Query(None),
limit: int = Query(100, ge=1, le=1000),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db),
weather_service: WeatherService = Depends(get_weather_service)
):
"""List stored weather data records (Professional+ tier required)"""
try:
logger.info("Listing weather data", tenant_id=tenant_id)
weather_records = await weather_service.get_stored_weather_data(
tenant_id=tenant_id,
start_date=start_date,
end_date=end_date,
latitude=latitude,
longitude=longitude,
limit=limit,
db=db
)
return weather_records
except Exception as e:
logger.error("Failed to list weather data", error=str(e), tenant_id=tenant_id)
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
@router.get(
route_builder.build_resource_detail_route("weather-data", "weather_id"),
response_model=WeatherDataResponse
)
@analytics_tier_required
async def get_weather_data(
tenant_id: UUID = Path(..., description="Tenant ID"),
weather_id: UUID = Path(..., description="Weather data ID"),
current_user: dict = Depends(get_current_user_dep),
db: AsyncSession = Depends(get_db),
weather_service: WeatherService = Depends(get_weather_service)
):
"""Get a specific weather data record"""
try:
logger.info("Getting weather data", tenant_id=tenant_id, weather_id=weather_id)
weather_record = await weather_service.get_weather_data_by_id(
tenant_id=tenant_id,
weather_id=weather_id,
db=db
)
if not weather_record:
raise HTTPException(status_code=404, detail="Weather data not found")
return weather_record
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get weather data", error=str(e), tenant_id=tenant_id)
raise HTTPException(status_code=500, detail="Failed to retrieve weather data")
@router.delete(
route_builder.build_resource_detail_route("weather-data", "weather_id")
)
async def delete_weather_data(
tenant_id: UUID = Path(..., description="Tenant ID"),
weather_id: UUID = Path(..., description="Weather data ID"),
db: AsyncSession = Depends(get_db),
weather_service: WeatherService = Depends(get_weather_service)
):
"""Delete a weather data record"""
try:
logger.info("Deleting weather data", tenant_id=tenant_id, weather_id=weather_id)
success = await weather_service.delete_weather_data(
tenant_id=tenant_id,
weather_id=weather_id,
db=db
)
if not success:
raise HTTPException(status_code=404, detail="Weather data not found")
return {"message": "Weather data deleted successfully"}
except HTTPException:
raise
except Exception as e:
logger.error("Failed to delete weather data", error=str(e), tenant_id=tenant_id)
raise HTTPException(status_code=500, detail="Failed to delete weather data")

View File

@@ -0,0 +1 @@
"""Cache module for external data service"""

View File

@@ -0,0 +1,208 @@
"""
POI Cache Service
Caches POI detection results to avoid hammering Overpass API.
POI data doesn't change frequently, so aggressive caching is appropriate.
"""
from typing import Optional, Dict, Any
import json
import structlog
from datetime import timedelta
from app.core.poi_config import (
POI_CACHE_TTL_DAYS,
POI_COORDINATE_PRECISION
)
logger = structlog.get_logger()
class POICacheService:
"""
Redis-based cache for POI detection results.
Caches results by rounded coordinates to allow reuse for nearby locations.
Reduces load on Overpass API and improves onboarding performance.
"""
def __init__(self, redis_client):
"""
Initialize cache service.
Args:
redis_client: Redis client instance
"""
self.redis = redis_client
self.cache_ttl_days = POI_CACHE_TTL_DAYS
self.coordinate_precision = POI_COORDINATE_PRECISION
def _generate_cache_key(self, latitude: float, longitude: float) -> str:
"""
Generate cache key from coordinates.
Rounds coordinates to specified precision (default 4 decimals ≈ 10m).
This allows cache reuse for bakeries in very close proximity.
Args:
latitude: Bakery latitude
longitude: Bakery longitude
Returns:
Redis cache key
"""
lat_rounded = round(latitude, self.coordinate_precision)
lon_rounded = round(longitude, self.coordinate_precision)
return f"poi_cache:{lat_rounded}:{lon_rounded}"
async def get_cached_pois(
self,
latitude: float,
longitude: float
) -> Optional[Dict[str, Any]]:
"""
Get cached POI results for location.
Args:
latitude: Bakery latitude
longitude: Bakery longitude
Returns:
Cached POI detection results or None if not cached
"""
cache_key = self._generate_cache_key(latitude, longitude)
try:
cached_data = await self.redis.get(cache_key)
if cached_data:
logger.info(
"POI cache hit",
cache_key=cache_key,
location=(latitude, longitude)
)
return json.loads(cached_data)
else:
logger.debug(
"POI cache miss",
cache_key=cache_key,
location=(latitude, longitude)
)
return None
except Exception as e:
logger.warning(
"Failed to retrieve POI cache",
error=str(e),
cache_key=cache_key
)
return None
async def cache_poi_results(
self,
latitude: float,
longitude: float,
poi_data: Dict[str, Any]
) -> bool:
"""
Cache POI detection results.
Args:
latitude: Bakery latitude
longitude: Bakery longitude
poi_data: Complete POI detection results
Returns:
True if cached successfully, False otherwise
"""
cache_key = self._generate_cache_key(latitude, longitude)
ttl_seconds = self.cache_ttl_days * 24 * 60 * 60
try:
await self.redis.setex(
cache_key,
ttl_seconds,
json.dumps(poi_data)
)
logger.info(
"POI results cached",
cache_key=cache_key,
ttl_days=self.cache_ttl_days,
location=(latitude, longitude)
)
return True
except Exception as e:
logger.error(
"Failed to cache POI results",
error=str(e),
cache_key=cache_key
)
return False
async def invalidate_cache(
self,
latitude: float,
longitude: float
) -> bool:
"""
Invalidate cached POI results for location.
Useful for manual refresh or data corrections.
Args:
latitude: Bakery latitude
longitude: Bakery longitude
Returns:
True if invalidated successfully
"""
cache_key = self._generate_cache_key(latitude, longitude)
try:
deleted = await self.redis.delete(cache_key)
if deleted:
logger.info(
"POI cache invalidated",
cache_key=cache_key,
location=(latitude, longitude)
)
return bool(deleted)
except Exception as e:
logger.error(
"Failed to invalidate POI cache",
error=str(e),
cache_key=cache_key
)
return False
async def get_cache_stats(self) -> Dict[str, Any]:
"""
Get cache statistics.
Returns:
Dictionary with cache stats (key count, memory usage, etc.)
"""
try:
# Count POI cache keys
pattern = "poi_cache:*"
cursor = 0
key_count = 0
while True:
cursor, keys = await self.redis.scan(
cursor=cursor,
match=pattern,
count=100
)
key_count += len(keys)
if cursor == 0:
break
return {
"total_cached_locations": key_count,
"cache_ttl_days": self.cache_ttl_days,
"coordinate_precision": self.coordinate_precision
}
except Exception as e:
logger.error("Failed to get cache stats", error=str(e))
return {
"error": str(e)
}

View File

@@ -0,0 +1,298 @@
# services/external/app/cache/redis_wrapper.py
"""
Redis cache layer for fast training data access using shared Redis implementation
"""
from typing import List, Dict, Any, Optional
import json
from datetime import datetime, timedelta
import structlog
from shared.redis_utils import get_redis_client
logger = structlog.get_logger()
class ExternalDataCache:
"""Redis cache for external data service"""
def __init__(self):
self.ttl = 86400 * 7 # 7 days
async def _get_client(self):
"""Get the shared Redis client"""
return await get_redis_client()
def _weather_cache_key(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> str:
"""Generate cache key for weather data"""
return f"weather:{city_id}:{start_date.date()}:{end_date.date()}"
async def get_cached_weather(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> Optional[List[Dict[str, Any]]]:
"""Get cached weather data"""
try:
key = self._weather_cache_key(city_id, start_date, end_date)
client = await self._get_client()
cached = await client.get(key)
if cached:
logger.debug("Weather cache hit", city_id=city_id, key=key)
return json.loads(cached)
logger.debug("Weather cache miss", city_id=city_id, key=key)
return None
except Exception as e:
logger.error("Error reading weather cache", error=str(e))
return None
async def set_cached_weather(
self,
city_id: str,
start_date: datetime,
end_date: datetime,
data: List[Dict[str, Any]]
):
"""Set cached weather data"""
try:
key = self._weather_cache_key(city_id, start_date, end_date)
serializable_data = []
for record in data:
# Handle both dict and Pydantic model objects
if hasattr(record, 'model_dump'):
record_dict = record.model_dump()
elif hasattr(record, 'dict'):
record_dict = record.dict()
else:
record_dict = record.copy() if isinstance(record, dict) else dict(record)
# Convert any datetime fields to ISO format strings
for key_name, value in record_dict.items():
if isinstance(value, datetime):
record_dict[key_name] = value.isoformat()
serializable_data.append(record_dict)
client = await self._get_client()
await client.setex(
key,
self.ttl,
json.dumps(serializable_data)
)
logger.debug("Weather data cached", city_id=city_id, records=len(data))
except Exception as e:
logger.error("Error caching weather data", error=str(e))
def _traffic_cache_key(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> str:
"""Generate cache key for traffic data"""
return f"traffic:{city_id}:{start_date.date()}:{end_date.date()}"
async def get_cached_traffic(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> Optional[List[Dict[str, Any]]]:
"""Get cached traffic data"""
try:
key = self._traffic_cache_key(city_id, start_date, end_date)
client = await self._get_client()
cached = await client.get(key)
if cached:
logger.debug("Traffic cache hit", city_id=city_id, key=key)
return json.loads(cached)
logger.debug("Traffic cache miss", city_id=city_id, key=key)
return None
except Exception as e:
logger.error("Error reading traffic cache", error=str(e))
return None
async def set_cached_traffic(
self,
city_id: str,
start_date: datetime,
end_date: datetime,
data: List[Dict[str, Any]]
):
"""Set cached traffic data"""
try:
key = self._traffic_cache_key(city_id, start_date, end_date)
serializable_data = []
for record in data:
# Handle both dict and Pydantic model objects
if hasattr(record, 'model_dump'):
record_dict = record.model_dump()
elif hasattr(record, 'dict'):
record_dict = record.dict()
else:
record_dict = record.copy() if isinstance(record, dict) else dict(record)
# Convert any datetime fields to ISO format strings
for key_name, value in record_dict.items():
if isinstance(value, datetime):
record_dict[key_name] = value.isoformat()
serializable_data.append(record_dict)
client = await self._get_client()
await client.setex(
key,
self.ttl,
json.dumps(serializable_data)
)
logger.debug("Traffic data cached", city_id=city_id, records=len(data))
except Exception as e:
logger.error("Error caching traffic data", error=str(e))
async def invalidate_city_cache(self, city_id: str):
"""Invalidate all cache entries for a city"""
try:
client = await self._get_client()
pattern = f"*:{city_id}:*"
# Use scan_iter for safer key pattern matching
keys_to_delete = []
async for key in client.scan_iter(match=pattern):
keys_to_delete.append(key)
if keys_to_delete:
await client.delete(*keys_to_delete)
logger.info("City cache invalidated", city_id=city_id, keys_deleted=len(keys_to_delete))
except Exception as e:
logger.error("Error invalidating cache", error=str(e))
# ===== Calendar Caching Methods =====
def _calendar_cache_key(self, calendar_id: str) -> str:
"""Generate cache key for school calendar"""
return f"calendar:{calendar_id}"
def _tenant_context_cache_key(self, tenant_id: str) -> str:
"""Generate cache key for tenant location context"""
return f"tenant_context:{tenant_id}"
async def get_cached_calendar(
self,
calendar_id: str
) -> Optional[Dict[str, Any]]:
"""Get cached school calendar by ID"""
try:
key = self._calendar_cache_key(calendar_id)
client = await self._get_client()
cached = await client.get(key)
if cached:
logger.debug("Calendar cache hit", calendar_id=calendar_id)
return json.loads(cached)
logger.debug("Calendar cache miss", calendar_id=calendar_id)
return None
except Exception as e:
logger.error("Error reading calendar cache", error=str(e))
return None
async def set_cached_calendar(
self,
calendar_id: str,
calendar_data: Dict[str, Any]
):
"""Cache school calendar data (7 days TTL)"""
try:
key = self._calendar_cache_key(calendar_id)
client = await self._get_client()
# Calendars change rarely, use 7-day TTL
ttl = 86400 * 7
await client.setex(
key,
ttl,
json.dumps(calendar_data)
)
logger.debug("Calendar cached", calendar_id=calendar_id)
except Exception as e:
logger.error("Error caching calendar", error=str(e))
async def get_cached_tenant_context(
self,
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Get cached tenant location context"""
try:
key = self._tenant_context_cache_key(tenant_id)
client = await self._get_client()
cached = await client.get(key)
if cached:
logger.debug("Tenant context cache hit", tenant_id=tenant_id)
return json.loads(cached)
logger.debug("Tenant context cache miss", tenant_id=tenant_id)
return None
except Exception as e:
logger.error("Error reading tenant context cache", error=str(e))
return None
async def set_cached_tenant_context(
self,
tenant_id: str,
context_data: Dict[str, Any]
):
"""Cache tenant location context (24 hours TTL)"""
try:
key = self._tenant_context_cache_key(tenant_id)
client = await self._get_client()
# Tenant context changes less frequently, 24-hour TTL
ttl = 86400
await client.setex(
key,
ttl,
json.dumps(context_data)
)
logger.debug("Tenant context cached", tenant_id=tenant_id)
except Exception as e:
logger.error("Error caching tenant context", error=str(e))
async def invalidate_tenant_context(self, tenant_id: str):
"""Invalidate tenant context cache (called when context is updated)"""
try:
key = self._tenant_context_cache_key(tenant_id)
client = await self._get_client()
await client.delete(key)
logger.info("Tenant context cache invalidated", tenant_id=tenant_id)
except Exception as e:
logger.error("Error invalidating tenant context cache", error=str(e))

View File

@@ -0,0 +1 @@
# services/external/app/core/__init__.py

77
services/external/app/core/config.py vendored Normal file
View File

@@ -0,0 +1,77 @@
# services/external/app/core/config.py
from shared.config.base import BaseServiceSettings
import os
from pydantic import Field
class DataSettings(BaseServiceSettings):
"""Data service specific settings"""
# Service Identity
SERVICE_NAME: str = "external-service"
VERSION: str = "1.0.0"
APP_NAME: str = "Bakery External Data Service"
DESCRIPTION: str = "External data collection service for weather and traffic data"
# API Configuration
API_V1_STR: str = "/api/v1"
# Database configuration (secure approach - build from components)
@property
def DATABASE_URL(self) -> str:
"""Build database URL from secure components"""
# Try complete URL first (for backward compatibility)
complete_url = os.getenv("EXTERNAL_DATABASE_URL")
if complete_url:
return complete_url
# Build from components (secure approach)
user = os.getenv("EXTERNAL_DB_USER", "external_user")
password = os.getenv("EXTERNAL_DB_PASSWORD", "external_pass123")
host = os.getenv("EXTERNAL_DB_HOST", "localhost")
port = os.getenv("EXTERNAL_DB_PORT", "5432")
name = os.getenv("EXTERNAL_DB_NAME", "external_db")
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
# External API Configuration
AEMET_API_KEY: str = os.getenv("AEMET_API_KEY", "")
AEMET_BASE_URL: str = "https://opendata.aemet.es/opendata"
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "90")) # Increased for unstable API
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "5")) # More retries for connection issues
AEMET_ENABLED: bool = os.getenv("AEMET_ENABLED", "true").lower() == "true" # Allow disabling AEMET
MADRID_OPENDATA_API_KEY: str = os.getenv("MADRID_OPENDATA_API_KEY", "")
MADRID_OPENDATA_BASE_URL: str = "https://datos.madrid.es"
MADRID_OPENDATA_TIMEOUT: int = int(os.getenv("MADRID_OPENDATA_TIMEOUT", "30"))
# Data Collection Configuration
WEATHER_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("WEATHER_COLLECTION_INTERVAL_HOURS", "1"))
TRAFFIC_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("TRAFFIC_COLLECTION_INTERVAL_HOURS", "1"))
EVENTS_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("EVENTS_COLLECTION_INTERVAL_HOURS", "6"))
# Cache TTL Configuration
WEATHER_CACHE_TTL_HOURS: int = int(os.getenv("WEATHER_CACHE_TTL_HOURS", "1"))
TRAFFIC_CACHE_TTL_HOURS: int = int(os.getenv("TRAFFIC_CACHE_TTL_HOURS", "1"))
EVENTS_CACHE_TTL_HOURS: int = int(os.getenv("EVENTS_CACHE_TTL_HOURS", "6"))
# Data Quality Configuration
DATA_VALIDATION_ENABLED: bool = os.getenv("DATA_VALIDATION_ENABLED", "true").lower() == "true"
OUTLIER_DETECTION_ENABLED: bool = os.getenv("OUTLIER_DETECTION_ENABLED", "true").lower() == "true"
DATA_COMPLETENESS_THRESHOLD: float = float(os.getenv("DATA_COMPLETENESS_THRESHOLD", "0.8"))
# Geolocation Settings (Madrid focus)
DEFAULT_LATITUDE: float = float(os.getenv("DEFAULT_LATITUDE", "40.4168")) # Madrid
DEFAULT_LONGITUDE: float = float(os.getenv("DEFAULT_LONGITUDE", "-3.7038")) # Madrid
LOCATION_RADIUS_KM: float = float(os.getenv("LOCATION_RADIUS_KM", "50.0"))
# Data Retention
RAW_DATA_RETENTION_DAYS: int = int(os.getenv("RAW_DATA_RETENTION_DAYS", "90"))
PROCESSED_DATA_RETENTION_DAYS: int = int(os.getenv("PROCESSED_DATA_RETENTION_DAYS", "365"))
# Batch Processing
BATCH_PROCESSING_ENABLED: bool = os.getenv("BATCH_PROCESSING_ENABLED", "true").lower() == "true"
BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "1000"))
PARALLEL_PROCESSING_WORKERS: int = int(os.getenv("PARALLEL_PROCESSING_WORKERS", "4"))
settings = DataSettings()

81
services/external/app/core/database.py vendored Normal file
View File

@@ -0,0 +1,81 @@
# services/external/app/core/database.py
"""
External Service Database Configuration using shared database manager
"""
import structlog
from contextlib import asynccontextmanager
from typing import AsyncGenerator
from app.core.config import settings
from shared.database.base import DatabaseManager, Base
logger = structlog.get_logger()
# Create database manager instance
database_manager = DatabaseManager(
database_url=settings.DATABASE_URL,
service_name="external-service"
)
async def get_db():
"""
Database dependency for FastAPI - using shared database manager
"""
async for session in database_manager.get_db():
yield session
async def init_db():
"""Initialize database tables using shared database manager"""
try:
logger.info("Initializing External Service database...")
# Import all models to ensure they're registered
from app.models import weather, traffic # noqa: F401
# Create all tables using database manager
await database_manager.create_tables(Base.metadata)
logger.info("External Service database initialized successfully")
except Exception as e:
logger.error("Failed to initialize database", error=str(e))
raise
async def close_db():
"""Close database connections using shared database manager"""
try:
await database_manager.close_connections()
logger.info("Database connections closed")
except Exception as e:
logger.error("Error closing database connections", error=str(e))
@asynccontextmanager
async def get_db_transaction():
"""
Context manager for database transactions using shared database manager
"""
async with database_manager.get_session() as session:
try:
async with session.begin():
yield session
except Exception as e:
logger.error("Transaction error", error=str(e))
raise
@asynccontextmanager
async def get_background_session():
"""
Context manager for background tasks using shared database manager
"""
async with database_manager.get_background_session() as session:
yield session
async def health_check():
"""Database health check using shared database manager"""
return await database_manager.health_check()

181
services/external/app/core/poi_config.py vendored Normal file
View File

@@ -0,0 +1,181 @@
"""
POI Detection Configuration
Defines POI categories, search parameters, and relevance thresholds
for automated Point of Interest detection and feature engineering.
"""
from dataclasses import dataclass
from typing import Dict
@dataclass
class POICategory:
"""POI category definition with OSM query and ML parameters"""
name: str
osm_query: str
search_radius_m: int
weight: float # Importance weight for ML model (positive or negative)
description: str
# POI Category Definitions based on OpenStreetMap tags
# Research-based search radii and weights for bakery demand forecasting
POI_CATEGORIES: Dict[str, POICategory] = {
"schools": POICategory(
name="schools",
osm_query='["amenity"~"school|kindergarten|university|college"]',
search_radius_m=500,
weight=1.5, # High positive impact - morning drop-off rush
description="Educational institutions causing morning/afternoon rush patterns"
),
"offices": POICategory(
name="offices",
osm_query='["office"]',
search_radius_m=800,
weight=1.3, # Positive impact - weekday lunch/breakfast demand
description="Office buildings and business centers"
),
"gyms_sports": POICategory(
name="gyms_sports",
osm_query='["leisure"~"fitness_centre|sports_centre|stadium"]',
search_radius_m=600,
weight=0.8, # Moderate impact - morning/evening activity
description="Fitness centers and sports facilities"
),
"residential": POICategory(
name="residential",
osm_query='["building"~"residential|apartments|house"]',
search_radius_m=400,
weight=1.0, # Base demand from residents
description="Residential buildings and housing"
),
"tourism": POICategory(
name="tourism",
osm_query='["tourism"~"attraction|museum|hotel|hostel|guest_house"]',
search_radius_m=1000,
weight=1.2, # Positive impact - tourist foot traffic
description="Tourist attractions, hotels, and points of interest"
),
"competitors": POICategory(
name="competitors",
osm_query='["shop"~"bakery|pastry|confectionery"]',
search_radius_m=1000,
weight=-0.5, # Negative impact - competition pressure
description="Competing bakeries and pastry shops"
),
"transport_hubs": POICategory(
name="transport_hubs",
osm_query='["public_transport"~"station|stop"]["railway"~"station|subway_entrance|tram_stop"]',
search_radius_m=800,
weight=1.4, # High impact - commuter foot traffic
description="Public transport stations and hubs"
),
"coworking": POICategory(
name="coworking",
osm_query='["amenity"="coworking_space"]',
search_radius_m=600,
weight=1.1, # Moderate-high impact - flexible workers
description="Coworking spaces and shared offices"
),
"retail": POICategory(
name="retail",
osm_query='["shop"]',
search_radius_m=500,
weight=0.9, # Moderate impact - general foot traffic
description="Retail shops and commercial areas"
)
}
# Feature Relevance Thresholds
# Determines which POI features are significant enough to include in ML models
# Based on retail gravity model research and distance decay patterns
RELEVANCE_THRESHOLDS: Dict[str, Dict[str, float]] = {
"schools": {
"min_proximity_score": 0.5, # At least moderate proximity required
"max_distance_to_nearest_m": 500, # Must be within 500m
"min_count": 1 # At least 1 school
},
"offices": {
"min_proximity_score": 0.3,
"max_distance_to_nearest_m": 800,
"min_count": 2 # Offices are common; need multiple for impact
},
"gyms_sports": {
"min_proximity_score": 0.4,
"max_distance_to_nearest_m": 600,
"min_count": 1
},
"residential": {
"min_proximity_score": 1.0, # High threshold; residential is everywhere in cities
"max_distance_to_nearest_m": 400,
"min_count": 5 # Need significant residential density
},
"tourism": {
"min_proximity_score": 0.2, # Lower threshold; tourism is high-impact even at distance
"max_distance_to_nearest_m": 1000,
"min_count": 1
},
"competitors": {
"min_proximity_score": 0.1, # Any competition is relevant (even distant)
"max_distance_to_nearest_m": 1000,
"min_count": 1
},
"transport_hubs": {
"min_proximity_score": 0.4,
"max_distance_to_nearest_m": 800,
"min_count": 1
},
"coworking": {
"min_proximity_score": 0.3,
"max_distance_to_nearest_m": 600,
"min_count": 1
},
"retail": {
"min_proximity_score": 0.8, # Retail is common; higher bar for relevance
"max_distance_to_nearest_m": 500,
"min_count": 3
}
}
# Overpass API Configuration
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
OVERPASS_TIMEOUT_SECONDS = 30
OVERPASS_MAX_RETRIES = 4 # Increased from 3 to 4 for better resilience
OVERPASS_RETRY_DELAY_SECONDS = 2 # Base delay (will use exponential backoff)
# POI Cache Configuration
POI_CACHE_TTL_DAYS = 90 # Cache POI results for 90 days
POI_REFRESH_INTERVAL_DAYS = 180 # Refresh every 6 months
POI_COORDINATE_PRECISION = 4 # Decimal places for cache key (≈10m precision)
# Distance Bands for Feature Engineering (meters)
DISTANCE_BANDS = [
(0, 100), # Immediate proximity
(100, 300), # Primary catchment (walking distance)
(300, 500), # Secondary catchment
(500, 1000) # Tertiary catchment
]
# Competitive Pressure Zones
COMPETITOR_ZONES = {
"direct": {
"max_distance_m": 100,
"pressure_multiplier": -1.0 # Strong negative impact
},
"nearby": {
"max_distance_m": 500,
"pressure_multiplier": -0.5 # Moderate negative impact
},
"market": {
"max_distance_m": 1000,
"min_count_for_district": 5, # If 5+ bakeries = bakery district
"district_multiplier": 0.3, # Positive impact (destination area)
"normal_multiplier": -0.2 # Slight negative (competitive market)
}
}

View File

@@ -0,0 +1,16 @@
"""
Redis Client for POI Service
Provides access to shared Redis client for POI caching.
"""
from shared.redis_utils import get_redis_client as get_shared_redis_client
async def get_redis_client():
"""
Get Redis client for POI service.
Uses shared Redis infrastructure from shared utilities.
"""
return await get_shared_redis_client()

View File

1004
services/external/app/external/aemet.py vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,10 @@
# ================================================================
# services/data/app/external/apis/__init__.py
# ================================================================
"""
External API clients module - Scalable architecture for multiple cities
"""
from .traffic import TrafficAPIClientFactory
__all__ = ["TrafficAPIClientFactory"]

View File

@@ -0,0 +1,410 @@
# ================================================================
# services/data/app/external/apis/madrid_traffic_client.py
# ================================================================
"""
Madrid traffic client - Orchestration layer only
Coordinates between HTTP client, data processor, and business logic components
"""
from datetime import datetime, timedelta, timezone
from typing import Dict, List, Any, Optional, Tuple
import structlog
from .traffic import BaseTrafficClient, SupportedCity
from ..base_client import BaseAPIClient
from ..clients.madrid_client import MadridTrafficAPIClient
from ..processors.madrid_processor import MadridTrafficDataProcessor
from ..processors.madrid_business_logic import MadridTrafficAnalyzer
from ..models.madrid_models import TrafficRecord, CongestionLevel
class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
"""
Enhanced Madrid traffic client - Orchestration layer
Coordinates HTTP, processing, and business logic components
"""
# Madrid geographic bounds
MADRID_BOUNDS = {
'lat_min': 40.31, 'lat_max': 40.56,
'lon_min': -3.89, 'lon_max': -3.51
}
# Configuration constants
MAX_HISTORICAL_DAYS = 1095 # 3 years
MAX_CSV_PROCESSING_ROWS = 5000000
MEASUREMENT_POINTS_LIMIT = 20
def __init__(self):
BaseTrafficClient.__init__(self, SupportedCity.MADRID)
BaseAPIClient.__init__(self, base_url="https://datos.madrid.es")
# Initialize components
self.api_client = MadridTrafficAPIClient()
self.processor = MadridTrafficDataProcessor()
self.analyzer = MadridTrafficAnalyzer()
self.logger = structlog.get_logger()
def supports_location(self, latitude: float, longitude: float) -> bool:
"""Check if location is within Madrid bounds"""
return (self.MADRID_BOUNDS['lat_min'] <= latitude <= self.MADRID_BOUNDS['lat_max'] and
self.MADRID_BOUNDS['lon_min'] <= longitude <= self.MADRID_BOUNDS['lon_max'])
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Get current traffic data with enhanced pedestrian inference"""
try:
if not self.supports_location(latitude, longitude):
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
return None
# Fetch XML data
xml_content = await self.api_client.fetch_current_traffic_xml()
if not xml_content:
self.logger.warning("No XML content received")
return None
# Parse XML data
traffic_points = self.processor.parse_traffic_xml(xml_content)
if not traffic_points:
self.logger.warning("No traffic points found in XML - API may be temporarily unavailable")
return None
# Find nearest traffic point
nearest_point = self.analyzer.find_nearest_traffic_point(traffic_points, latitude, longitude)
if not nearest_point:
self.logger.warning("No nearby traffic points found")
return None
# Enhance with business logic
enhanced_data = await self._enhance_traffic_data(nearest_point, latitude, longitude)
self.logger.info("Current traffic data retrieved",
point_id=nearest_point.get('measurement_point_id'),
distance=enhanced_data.get('distance_km', 0))
return enhanced_data
except Exception as e:
self.logger.error("Error getting current traffic", error=str(e))
return None
async def get_historical_traffic(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical traffic data with pedestrian enhancement"""
try:
if not self.supports_location(latitude, longitude):
self.logger.warning("Location outside Madrid bounds", lat=latitude, lon=longitude)
return []
# Validate date range
if (end_date - start_date).days > self.MAX_HISTORICAL_DAYS:
self.logger.warning("Date range too large, truncating",
requested_days=(end_date - start_date).days,
max_days=self.MAX_HISTORICAL_DAYS)
start_date = end_date - timedelta(days=self.MAX_HISTORICAL_DAYS)
# Fetch measurement points registry
csv_content = await self.api_client.fetch_measurement_points_csv()
if not csv_content:
self.logger.error("Failed to fetch measurement points registry")
return []
# Parse measurement points
measurement_points = self.processor.parse_measurement_points_csv(csv_content)
if not measurement_points:
self.logger.error("No measurement points found")
return []
# Find nearest measurement points
nearest_points = self.analyzer.find_nearest_measurement_points(
measurement_points, latitude, longitude, num_points=3
)
if not nearest_points:
self.logger.warning("No nearby measurement points found")
return []
# Process historical data
historical_records = await self._fetch_historical_data_enhanced(
latitude, longitude, start_date, end_date, nearest_points
)
self.logger.info("Historical traffic data retrieved",
records_count=len(historical_records),
date_range=f"{start_date.date()} to {end_date.date()}")
return historical_records
except Exception as e:
self.logger.error("Error getting historical traffic", error=str(e))
return []
async def get_events(self, latitude: float, longitude: float,
radius_km: float = 5.0) -> List[Dict[str, Any]]:
"""Get traffic events (incidents, construction, etc.)"""
# Madrid doesn't provide separate events endpoint
# Return enhanced current traffic data as events
current_data = await self.get_current_traffic(latitude, longitude)
if current_data and current_data.get('congestion_level') in ['high', 'blocked']:
return [{
'type': 'congestion',
'severity': current_data.get('congestion_level'),
'description': f"High traffic congestion at {current_data.get('measurement_point_name', 'measurement point')}",
'location': {
'latitude': current_data.get('latitude'),
'longitude': current_data.get('longitude')
},
'timestamp': current_data.get('timestamp')
}]
return []
async def _enhance_traffic_data(self, traffic_point: Dict[str, Any],
query_lat: float, query_lon: float) -> Dict[str, Any]:
"""Enhance traffic data with business logic and pedestrian inference"""
# Calculate distance
distance_km = self.analyzer.calculate_distance(
query_lat, query_lon,
traffic_point.get('latitude', 0),
traffic_point.get('longitude', 0)
)
# Classify road type
road_type = self.analyzer.classify_road_type(
traffic_point.get('measurement_point_name', '')
)
# Get congestion level
congestion_level = self.analyzer.get_congestion_level(
traffic_point.get('ocupacion', 0)
)
# Create traffic record for pedestrian inference
traffic_record = TrafficRecord(
date=datetime.now(timezone.utc),
traffic_volume=traffic_point.get('intensidad', 0),
occupation_percentage=int(traffic_point.get('ocupacion', 0)),
load_percentage=traffic_point.get('carga', 0),
average_speed=30, # Default speed
congestion_level=congestion_level,
pedestrian_count=0, # Will be calculated
measurement_point_id=traffic_point.get('measurement_point_id', ''),
measurement_point_name=traffic_point.get('measurement_point_name', ''),
road_type=road_type,
source='madrid_current_xml'
)
# Calculate pedestrian count
location_context = {
'latitude': traffic_point.get('latitude'),
'longitude': traffic_point.get('longitude'),
'measurement_point_name': traffic_point.get('measurement_point_name')
}
pedestrian_count, inference_metadata = self.analyzer.calculate_pedestrian_flow(
traffic_record, location_context
)
# Calculate average speed based on congestion level
if congestion_level == 'high':
average_speed = 15.0
elif congestion_level == 'medium':
average_speed = 35.0
elif congestion_level == 'low':
average_speed = 50.0
else:
average_speed = 30.0 # default
# Build enhanced response with required API fields
enhanced_data = {
'date': datetime.now(timezone.utc), # Required API field
'timestamp': datetime.now(timezone.utc),
'latitude': traffic_point.get('latitude'),
'longitude': traffic_point.get('longitude'),
'measurement_point_id': traffic_point.get('measurement_point_id'),
'measurement_point_name': traffic_point.get('measurement_point_name'),
'traffic_volume': traffic_point.get('intensidad', 0),
'pedestrian_count': pedestrian_count,
'congestion_level': congestion_level,
'average_speed': average_speed, # Required API field
'occupation_percentage': int(traffic_point.get('ocupacion', 0)),
'load_percentage': traffic_point.get('carga', 0),
'road_type': road_type,
'distance_km': distance_km,
'source': 'madrid_current_xml',
'city': 'madrid',
'inference_metadata': inference_metadata,
'raw_data': traffic_point
}
return enhanced_data
async def _fetch_historical_data_enhanced(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime,
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
"""Fetch and process historical traffic data"""
historical_records = []
try:
# Process by year and month to avoid memory issues
current_date = start_date.replace(day=1) # Start from beginning of month
now = datetime.now()
while current_date <= end_date:
year = current_date.year
month = current_date.month
# Skip current month and future months (no historical data available yet)
if (year == now.year and month >= now.month) or year > now.year:
self.logger.info("Skipping current/future month - no historical data available",
year=year, month=month)
current_date = self._next_month(current_date)
continue
# Build historical URL
zip_url = self.api_client._build_historical_url(year, month)
self.logger.info("Processing historical ZIP file",
year=year, month=month, zip_url=zip_url)
# Fetch ZIP content
zip_content = await self.api_client.fetch_historical_zip(zip_url)
if not zip_content:
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
current_date = self._next_month(current_date)
continue
# Process ZIP content with enhanced parsing
month_records = await self._process_historical_zip_enhanced(
zip_content, zip_url, latitude, longitude, nearest_points
)
# Filter by date range - ensure timezone consistency
# Make sure start_date and end_date have timezone info for comparison
start_tz = start_date if start_date.tzinfo else start_date.replace(tzinfo=timezone.utc)
end_tz = end_date if end_date.tzinfo else end_date.replace(tzinfo=timezone.utc)
filtered_records = []
for record in month_records:
record_date = record.get('date')
if not record_date:
continue
# Ensure record date has timezone info
if not record_date.tzinfo:
record_date = record_date.replace(tzinfo=timezone.utc)
# Now compare with consistent timezone info
if start_tz <= record_date <= end_tz:
filtered_records.append(record)
historical_records.extend(filtered_records)
self.logger.info("Month processing completed",
year=year, month=month,
month_records=len(month_records),
filtered_records=len(filtered_records),
total_records=len(historical_records))
# Move to next month - extracted to helper method
current_date = self._next_month(current_date)
return historical_records
except Exception as e:
self.logger.error("Error fetching historical data", error=str(e))
return historical_records # Return partial results
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
latitude: float, longitude: float,
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
"""Process historical ZIP file with memory-efficient streaming"""
try:
import zipfile
import io
import csv
import gc
historical_records = []
nearest_ids = {p[0] for p in nearest_points}
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
for csv_filename in csv_files:
try:
# Stream CSV file line-by-line to avoid loading entire file into memory
with zip_file.open(csv_filename) as csv_file:
# Use TextIOWrapper for efficient line-by-line reading
import codecs
text_wrapper = codecs.iterdecode(csv_file, 'utf-8', errors='ignore')
csv_reader = csv.DictReader(text_wrapper, delimiter=';')
# Process in small batches
batch_size = 5000
batch_records = []
row_count = 0
for row in csv_reader:
row_count += 1
measurement_point_id = row.get('id', '').strip()
# Skip rows we don't need
if measurement_point_id not in nearest_ids:
continue
try:
record_data = await self.processor.parse_historical_csv_row(row, nearest_points)
if record_data:
batch_records.append(record_data)
# Store and clear batch when full
if len(batch_records) >= batch_size:
historical_records.extend(batch_records)
batch_records = []
gc.collect()
except Exception:
continue
# Store remaining records
if batch_records:
historical_records.extend(batch_records)
batch_records = []
self.logger.info("CSV file processed",
filename=csv_filename,
rows_scanned=row_count,
records_extracted=len(historical_records))
# Aggressive garbage collection after each CSV
gc.collect()
except Exception as csv_error:
self.logger.warning("Error processing CSV file",
filename=csv_filename,
error=str(csv_error))
continue
self.logger.info("Historical ZIP processing completed",
zip_url=zip_url,
total_records=len(historical_records))
# Final cleanup
del zip_content
gc.collect()
return historical_records
except Exception as e:
self.logger.error("Error processing historical ZIP file",
zip_url=zip_url, error=str(e))
return []
def _next_month(self, current_date: datetime) -> datetime:
"""Helper method to move to next month"""
if current_date.month == 12:
return current_date.replace(year=current_date.year + 1, month=1)
else:
return current_date.replace(month=current_date.month + 1)

View File

@@ -0,0 +1,257 @@
# ================================================================
# services/data/app/external/apis/traffic.py
# ================================================================
"""
Traffic API abstraction layer for multiple cities
"""
import asyncio
from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum
from typing import Dict, List, Any, Optional, Tuple
import structlog
logger = structlog.get_logger()
class SupportedCity(Enum):
"""Supported cities for traffic data collection"""
MADRID = "madrid"
BARCELONA = "barcelona"
VALENCIA = "valencia"
class BaseTrafficClient(ABC):
"""
Abstract base class for city-specific traffic clients
Defines the contract that all traffic clients must implement
"""
def __init__(self, city: SupportedCity):
self.city = city
self.logger = structlog.get_logger().bind(city=city.value)
@abstractmethod
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Get current traffic data for location"""
pass
@abstractmethod
async def get_historical_traffic(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical traffic data"""
pass
@abstractmethod
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
"""Get traffic incidents and events"""
pass
@abstractmethod
def supports_location(self, latitude: float, longitude: float) -> bool:
"""Check if this client supports the given location"""
pass
class TrafficAPIClientFactory:
"""
Factory class to create appropriate traffic clients based on location
"""
# City geographical bounds
CITY_BOUNDS = {
SupportedCity.MADRID: {
'lat_min': 40.31, 'lat_max': 40.56,
'lon_min': -3.89, 'lon_max': -3.51
},
SupportedCity.BARCELONA: {
'lat_min': 41.32, 'lat_max': 41.47,
'lon_min': 2.05, 'lon_max': 2.25
},
SupportedCity.VALENCIA: {
'lat_min': 39.42, 'lat_max': 39.52,
'lon_min': -0.42, 'lon_max': -0.32
}
}
@classmethod
def get_client_for_location(cls, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
"""
Get appropriate traffic client for given location
Args:
latitude: Query location latitude
longitude: Query location longitude
Returns:
BaseTrafficClient instance or None if location not supported
"""
try:
# Check each city's bounds
for city, bounds in cls.CITY_BOUNDS.items():
if (bounds['lat_min'] <= latitude <= bounds['lat_max'] and
bounds['lon_min'] <= longitude <= bounds['lon_max']):
logger.info("Location matched to city",
city=city.value, lat=latitude, lon=longitude)
return cls._create_client(city)
# If no specific city matches, try to find closest supported city
closest_city = cls._find_closest_city(latitude, longitude)
if closest_city:
logger.info("Using closest city for location",
closest_city=closest_city.value, lat=latitude, lon=longitude)
return cls._create_client(closest_city)
logger.warning("No traffic client available for location",
lat=latitude, lon=longitude)
return None
except Exception as e:
logger.error("Error getting traffic client for location",
lat=latitude, lon=longitude, error=str(e))
return None
@classmethod
def _create_client(cls, city: SupportedCity) -> BaseTrafficClient:
"""Create traffic client for specific city"""
if city == SupportedCity.MADRID:
from .madrid_traffic_client import MadridTrafficClient
return MadridTrafficClient()
elif city == SupportedCity.BARCELONA:
# Future implementation
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
elif city == SupportedCity.VALENCIA:
# Future implementation
raise NotImplementedError(f"Traffic client for {city.value} not yet implemented")
else:
raise ValueError(f"Unsupported city: {city}")
@classmethod
def _find_closest_city(cls, latitude: float, longitude: float) -> Optional[SupportedCity]:
"""Find closest supported city to given coordinates"""
import math
def distance(lat1, lon1, lat2, lon2):
"""Calculate distance between two coordinates"""
R = 6371 # Earth's radius in km
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat/2) * math.sin(dlat/2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon/2) * math.sin(dlon/2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return R * c
min_distance = float('inf')
closest_city = None
# City centers for distance calculation
city_centers = {
SupportedCity.MADRID: (40.4168, -3.7038),
SupportedCity.BARCELONA: (41.3851, 2.1734),
SupportedCity.VALENCIA: (39.4699, -0.3763)
}
for city, (city_lat, city_lon) in city_centers.items():
dist = distance(latitude, longitude, city_lat, city_lon)
if dist < min_distance and dist < 100: # Within 100km
min_distance = dist
closest_city = city
return closest_city
@classmethod
def get_supported_cities(cls) -> List[Dict[str, Any]]:
"""Get list of supported cities with their bounds"""
cities = []
for city, bounds in cls.CITY_BOUNDS.items():
cities.append({
"city": city.value,
"bounds": bounds,
"status": "active" if city == SupportedCity.MADRID else "planned"
})
return cities
class UniversalTrafficClient:
"""
Universal traffic client that delegates to appropriate city-specific clients
This is the main interface that external services should use
"""
def __init__(self):
self.factory = TrafficAPIClientFactory()
self.client_cache = {} # Cache clients for performance
async def get_current_traffic(self, latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Get current traffic data for any supported location"""
try:
client = self._get_client_for_location(latitude, longitude)
if client:
return await client.get_current_traffic(latitude, longitude)
else:
logger.warning("No traffic data available for location",
lat=latitude, lon=longitude)
return None
except Exception as e:
logger.error("Error getting current traffic",
lat=latitude, lon=longitude, error=str(e))
return None
async def get_historical_traffic(self, latitude: float, longitude: float,
start_date: datetime, end_date: datetime) -> List[Dict[str, Any]]:
"""Get historical traffic data for any supported location"""
try:
client = self._get_client_for_location(latitude, longitude)
if client:
return await client.get_historical_traffic(latitude, longitude, start_date, end_date)
else:
logger.warning("No historical traffic data available for location",
lat=latitude, lon=longitude)
return []
except Exception as e:
logger.error("Error getting historical traffic",
lat=latitude, lon=longitude, error=str(e))
return []
async def get_events(self, latitude: float, longitude: float, radius_km: float = 5.0) -> List[Dict[str, Any]]:
"""Get traffic events for any supported location"""
try:
client = self._get_client_for_location(latitude, longitude)
if client:
return await client.get_events(latitude, longitude, radius_km)
else:
return []
except Exception as e:
logger.error("Error getting traffic events",
lat=latitude, lon=longitude, error=str(e))
return []
def _get_client_for_location(self, latitude: float, longitude: float) -> Optional[BaseTrafficClient]:
"""Get cached or create new client for location"""
cache_key = f"{latitude:.4f},{longitude:.4f}"
if cache_key not in self.client_cache:
client = self.factory.get_client_for_location(latitude, longitude)
self.client_cache[cache_key] = client
return self.client_cache[cache_key]
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
"""Get information about traffic data availability for location"""
client = self._get_client_for_location(latitude, longitude)
if client:
return {
"supported": True,
"city": client.city.value,
"features": ["current_traffic", "historical_traffic", "events"]
}
else:
return {
"supported": False,
"city": None,
"features": [],
"message": "No traffic data available for this location"
}

View File

@@ -0,0 +1,204 @@
# ================================================================
# services/data/app/external/base_client.py
# ================================================================
"""Base HTTP client for external APIs - Enhanced for AEMET"""
import httpx
from typing import Dict, Any, Optional
import structlog
from datetime import datetime
logger = structlog.get_logger()
class BaseAPIClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
self.base_url = base_url
self.api_key = api_key
# Increase timeout and add connection retries for unstable AEMET API
self.timeout = httpx.Timeout(60.0) # Increased from 30s
self.retries = 3
async def _get(self, endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Make GET request with retry logic for unstable APIs"""
url = f"{self.base_url}{endpoint}"
# Add API key to params for AEMET (not headers)
request_params = params or {}
if self.api_key:
request_params["api_key"] = self.api_key
# Add headers if provided
request_headers = headers or {}
logger.debug("Making API request", url=url, params=request_params)
# Retry logic for unstable AEMET API
for attempt in range(self.retries):
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, params=request_params, headers=request_headers)
response.raise_for_status()
# Log response for debugging
response_data = response.json()
logger.debug("API response received",
status_code=response.status_code,
response_keys=list(response_data.keys()) if isinstance(response_data, dict) else "non-dict",
attempt=attempt + 1)
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error", status_code=e.response.status_code, url=url,
response_text=e.response.text[:200], attempt=attempt + 1)
# Handle rate limiting (429) with longer backoff
if e.response.status_code == 429:
import asyncio
# Exponential backoff: 5s, 15s, 45s for rate limits
wait_time = 5 * (3 ** attempt)
logger.warning(f"Rate limit hit, waiting {wait_time}s before retry",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
if attempt < self.retries - 1:
continue
if attempt == self.retries - 1: # Last attempt
return None
except httpx.RequestError as e:
logger.error("Request error", error=str(e), url=url, attempt=attempt + 1)
if attempt == self.retries - 1: # Last attempt
return None
# Wait before retry (exponential backoff)
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying AEMET API in {wait_time}s", attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except Exception as e:
logger.error("Unexpected error", error=str(e), url=url, attempt=attempt + 1)
if attempt == self.retries - 1: # Last attempt
return None
return None
async def _fetch_url_directly(self, url: str, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Fetch data directly from a full URL (for AEMET datos URLs) with retry logic"""
request_headers = headers or {}
logger.debug("Making direct URL request", url=url)
# Retry logic for unstable AEMET datos URLs
for attempt in range(self.retries):
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
# Handle encoding issues common with Spanish data sources
try:
response_data = response.json()
except UnicodeDecodeError:
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
# Try common Spanish encodings
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
try:
text_content = response.content.decode(encoding)
import json
response_data = json.loads(text_content)
logger.info("Successfully decoded with encoding", encoding=encoding)
break
except (UnicodeDecodeError, json.JSONDecodeError):
continue
else:
logger.error("Failed to decode response with any encoding", url=url)
if attempt < self.retries - 1:
continue
return None
logger.debug("Direct URL response received",
status_code=response.status_code,
data_type=type(response_data),
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error in direct fetch",
status_code=e.response.status_code,
url=url,
attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
# Wait before retry
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying datos URL in {wait_time}s",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except httpx.RequestError as e:
logger.error("Request error in direct fetch",
error=str(e), url=url, attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
# Wait before retry
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying datos URL in {wait_time}s",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except Exception as e:
logger.error("Unexpected error in direct fetch",
error=str(e), url=url, attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
return None
async def _post(self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Make POST request"""
try:
url = f"{self.base_url}{endpoint}"
request_headers = headers or {}
if self.api_key:
request_headers["Authorization"] = f"Bearer {self.api_key}"
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(url, json=data, headers=request_headers)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
logger.error("HTTP error", status_code=e.response.status_code, url=url)
return None
except httpx.RequestError as e:
logger.error("Request error", error=str(e), url=url)
return None
except Exception as e:
logger.error("Unexpected error", error=str(e), url=url)
return None
async def get_direct(self, url: str, headers: Optional[Dict] = None, timeout: Optional[int] = None) -> httpx.Response:
"""
Public GET method for direct HTTP requests
Returns the raw httpx Response object for maximum flexibility
"""
request_headers = headers or {}
request_timeout = httpx.Timeout(timeout if timeout else 30.0)
async with httpx.AsyncClient(timeout=request_timeout, follow_redirects=True) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
return response

View File

@@ -0,0 +1,12 @@
# ================================================================
# services/data/app/external/clients/__init__.py
# ================================================================
"""
HTTP clients package
"""
from .madrid_client import MadridTrafficAPIClient
__all__ = [
'MadridTrafficAPIClient'
]

View File

@@ -0,0 +1,146 @@
# ================================================================
# services/data/app/external/clients/madrid_client.py
# ================================================================
"""
Pure HTTP client for Madrid traffic APIs
Handles only HTTP communication and response decoding
"""
import httpx
import structlog
from datetime import datetime
from typing import Optional, Dict, Any
from ..base_client import BaseAPIClient
class MadridTrafficAPIClient(BaseAPIClient):
"""Pure HTTP client for Madrid traffic APIs"""
TRAFFIC_ENDPOINT = "https://informo.madrid.es/informo/tmadrid/pm.xml"
MEASUREMENT_POINTS_URL = "https://datos.madrid.es/egob/catalogo/202468-263-intensidad-trafico.csv"
def __init__(self):
super().__init__(base_url="https://datos.madrid.es")
self.logger = structlog.get_logger()
def _decode_response_content(self, response) -> Optional[str]:
"""Decode response content with multiple encoding attempts"""
try:
return response.text
except UnicodeDecodeError:
# Try manual encoding for Spanish content
for encoding in ['utf-8', 'latin-1', 'windows-1252', 'iso-8859-1']:
try:
content = response.content.decode(encoding)
if content and len(content) > 100:
self.logger.debug("Successfully decoded with encoding", encoding=encoding)
return content
except UnicodeDecodeError:
continue
return None
def _build_historical_url(self, year: int, month: int) -> str:
"""Build historical ZIP URL for given year and month"""
# Madrid uses a direct file pattern now: https://datos.madrid.es/egobfiles/MANUAL/208627/MM-YYYY.zip
# Only historical data is available (not current month)
return f"https://datos.madrid.es/egobfiles/MANUAL/208627/{month:02d}-{year}.zip"
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
"""Fetch current traffic XML data"""
endpoint = endpoint or self.TRAFFIC_ENDPOINT
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'application/xml,text/xml,*/*',
'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'no-cache',
'Referer': 'https://datos.madrid.es/'
}
response = await self.get_direct(endpoint, headers=headers, timeout=30)
if not response or response.status_code != 200:
self.logger.warning("Failed to fetch XML data",
endpoint=endpoint,
status=response.status_code if response else None)
return None
# Get XML content with encoding handling
xml_content = self._decode_response_content(response)
if not xml_content:
self.logger.debug("No XML content received", endpoint=endpoint)
return None
self.logger.debug("Madrid XML content fetched",
length=len(xml_content),
endpoint=endpoint)
return xml_content
except Exception as e:
self.logger.error("Error fetching traffic XML data",
endpoint=endpoint,
error=str(e))
return None
async def fetch_measurement_points_csv(self, url: Optional[str] = None) -> Optional[str]:
"""Fetch measurement points CSV data"""
url = url or self.MEASUREMENT_POINTS_URL
try:
async with httpx.AsyncClient(
timeout=30.0,
headers={
'User-Agent': 'MadridTrafficClient/2.0',
'Accept': 'text/csv,application/csv,*/*'
},
follow_redirects=True
) as client:
self.logger.debug("Fetching measurement points registry", url=url)
response = await client.get(url)
if response.status_code == 200:
return response.text
else:
self.logger.warning("Failed to fetch measurement points",
status=response.status_code, url=url)
return None
except Exception as e:
self.logger.error("Error fetching measurement points registry",
url=url, error=str(e))
return None
async def fetch_historical_zip(self, zip_url: str) -> Optional[bytes]:
"""Fetch historical traffic ZIP file"""
try:
async with httpx.AsyncClient(
timeout=120.0, # Longer timeout for large files
headers={
'User-Agent': 'MadridTrafficClient/2.0',
'Accept': 'application/zip,*/*'
},
follow_redirects=True
) as client:
self.logger.debug("Fetching historical ZIP", url=zip_url)
response = await client.get(zip_url)
if response.status_code == 200:
self.logger.debug("Historical ZIP fetched",
url=zip_url,
size=len(response.content))
return response.content
else:
self.logger.warning("Failed to fetch historical ZIP",
status=response.status_code, url=zip_url)
return None
except Exception as e:
self.logger.error("Error fetching historical ZIP",
url=zip_url, error=str(e))
return None

View File

@@ -0,0 +1,20 @@
# ================================================================
# services/data/app/external/models/__init__.py
# ================================================================
"""
Madrid traffic models package
"""
from .madrid_models import (
TrafficServiceLevel,
CongestionLevel,
MeasurementPoint,
TrafficRecord
)
__all__ = [
'TrafficServiceLevel',
'CongestionLevel',
'MeasurementPoint',
'TrafficRecord'
]

View File

@@ -0,0 +1,66 @@
# ================================================================
# services/data/app/external/models/madrid_models.py
# ================================================================
"""
Data structures, enums, and dataclasses for Madrid traffic system
"""
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Optional
class TrafficServiceLevel(Enum):
"""Madrid traffic service levels"""
FLUID = 0
DENSE = 1
CONGESTED = 2
BLOCKED = 3
class CongestionLevel(Enum):
"""Standardized congestion levels"""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
BLOCKED = "blocked"
@dataclass
class MeasurementPoint:
"""Madrid measurement point data structure"""
id: str
latitude: float
longitude: float
distance: float
name: str
type: str
@dataclass
class TrafficRecord:
"""Standardized traffic record with pedestrian inference"""
date: datetime
traffic_volume: int
occupation_percentage: int
load_percentage: int
average_speed: int
congestion_level: str
pedestrian_count: int
measurement_point_id: str
measurement_point_name: str
road_type: str
source: str
district: Optional[str] = None
# Madrid-specific data
intensidad_raw: Optional[int] = None
ocupacion_raw: Optional[int] = None
carga_raw: Optional[int] = None
vmed_raw: Optional[int] = None
# Pedestrian inference metadata
pedestrian_multiplier: Optional[float] = None
time_pattern_factor: Optional[float] = None
district_factor: Optional[float] = None

View File

@@ -0,0 +1,14 @@
# ================================================================
# services/data/app/external/processors/__init__.py
# ================================================================
"""
Data processors package
"""
from .madrid_processor import MadridTrafficDataProcessor
from .madrid_business_logic import MadridTrafficAnalyzer
__all__ = [
'MadridTrafficDataProcessor',
'MadridTrafficAnalyzer'
]

View File

@@ -0,0 +1,346 @@
# ================================================================
# services/data/app/external/processors/madrid_business_logic.py
# ================================================================
"""
Business rules, inference, and domain logic for Madrid traffic data
Handles pedestrian inference, district mapping, road classification, and validation
"""
import math
import re
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
import structlog
from ..models.madrid_models import TrafficRecord, CongestionLevel
class MadridTrafficAnalyzer:
"""Handles business logic for Madrid traffic analysis"""
# Madrid district characteristics for pedestrian patterns
DISTRICT_MULTIPLIERS = {
'Centro': 2.5, # Historic center, high pedestrian activity
'Salamanca': 2.0, # Shopping area, high foot traffic
'Chamberí': 1.8, # Business district
'Retiro': 2.2, # Near park, high leisure activity
'Chamartín': 1.6, # Business/residential
'Tetuán': 1.4, # Mixed residential/commercial
'Fuencarral': 1.3, # Residential with commercial areas
'Moncloa': 1.7, # University area
'Latina': 1.5, # Residential area
'Carabanchel': 1.2, # Residential periphery
'Usera': 1.1, # Industrial/residential
'Villaverde': 1.0, # Industrial area
'Villa de Vallecas': 1.0, # Peripheral residential
'Vicálvaro': 0.9, # Peripheral
'San Blas': 1.1, # Residential
'Barajas': 0.8, # Airport area, low pedestrian activity
'Hortaleza': 1.2, # Mixed area
'Ciudad Lineal': 1.3, # Linear development
'Puente de Vallecas': 1.2, # Working class area
'Moratalaz': 1.1, # Residential
'Arganzuela': 1.6, # Near center, growing area
}
# Time-based patterns (hour of day)
TIME_PATTERNS = {
'morning_peak': {'hours': [7, 8, 9], 'multiplier': 2.0},
'lunch_peak': {'hours': [12, 13, 14], 'multiplier': 2.5},
'evening_peak': {'hours': [18, 19, 20], 'multiplier': 2.2},
'afternoon': {'hours': [15, 16, 17], 'multiplier': 1.8},
'late_evening': {'hours': [21, 22], 'multiplier': 1.5},
'night': {'hours': [23, 0, 1, 2, 3, 4, 5, 6], 'multiplier': 0.3},
'morning': {'hours': [10, 11], 'multiplier': 1.4}
}
# Road type specific patterns
ROAD_TYPE_BASE = {
'URB': 250, # Urban streets - high pedestrian activity
'M30': 50, # Ring road - minimal pedestrians
'C30': 75, # Secondary ring - some pedestrian access
'A': 25, # Highways - very low pedestrians
'R': 40 # Radial roads - low to moderate
}
# Weather impact on pedestrian activity
WEATHER_IMPACT = {
'rain': 0.6, # 40% reduction in rain
'hot_weather': 0.8, # 20% reduction when very hot
'cold_weather': 0.7, # 30% reduction when very cold
'normal': 1.0 # No impact
}
def __init__(self):
self.logger = structlog.get_logger()
def calculate_pedestrian_flow(
self,
traffic_record: TrafficRecord,
location_context: Optional[Dict[str, Any]] = None
) -> Tuple[int, Dict[str, float]]:
"""
Calculate pedestrian flow estimate with detailed metadata
Returns:
Tuple of (pedestrian_count, inference_metadata)
"""
# Base calculation from road type
road_type = traffic_record.road_type or 'URB'
base_pedestrians = self.ROAD_TYPE_BASE.get(road_type, 200)
# Time pattern adjustment
hour = traffic_record.date.hour
time_factor = self._get_time_pattern_factor(hour)
# District adjustment (if available)
district_factor = 1.0
district = traffic_record.district or self.infer_district_from_location(location_context)
if district:
district_factor = self.DISTRICT_MULTIPLIERS.get(district, 1.0)
# Traffic correlation adjustment
traffic_factor = self._calculate_traffic_correlation(traffic_record)
# Weather adjustment (if data available)
weather_factor = self._get_weather_factor(traffic_record.date, location_context)
# Weekend adjustment
weekend_factor = self._get_weekend_factor(traffic_record.date)
# Combined calculation
pedestrian_count = int(
base_pedestrians *
time_factor *
district_factor *
traffic_factor *
weather_factor *
weekend_factor
)
# Ensure reasonable bounds
pedestrian_count = max(10, min(2000, pedestrian_count))
# Metadata for model training
inference_metadata = {
'base_pedestrians': base_pedestrians,
'time_factor': time_factor,
'district_factor': district_factor,
'traffic_factor': traffic_factor,
'weather_factor': weather_factor,
'weekend_factor': weekend_factor,
'inferred_district': district,
'hour': hour,
'road_type': road_type
}
return pedestrian_count, inference_metadata
def _get_time_pattern_factor(self, hour: int) -> float:
"""Get time-based pedestrian activity multiplier"""
for pattern, config in self.TIME_PATTERNS.items():
if hour in config['hours']:
return config['multiplier']
return 1.0 # Default multiplier
def _calculate_traffic_correlation(self, traffic_record: TrafficRecord) -> float:
"""
Calculate pedestrian correlation with traffic patterns
Higher traffic in urban areas often correlates with more pedestrians
"""
if traffic_record.road_type == 'URB':
# Urban areas: moderate traffic indicates commercial activity
if 30 <= traffic_record.load_percentage <= 70:
return 1.3 # Sweet spot for pedestrian activity
elif traffic_record.load_percentage > 70:
return 0.9 # Too congested, pedestrians avoid
else:
return 1.0 # Normal correlation
else:
# Highway/ring roads: more traffic = fewer pedestrians
if traffic_record.load_percentage > 60:
return 0.5
else:
return 0.8
def _get_weather_factor(self, date: datetime, location_context: Optional[Dict] = None) -> float:
"""Estimate weather impact on pedestrian activity"""
# Simplified weather inference based on season and typical Madrid patterns
month = date.month
# Madrid seasonal patterns
if month in [12, 1, 2]: # Winter - cold weather impact
return self.WEATHER_IMPACT['cold_weather']
elif month in [7, 8]: # Summer - hot weather impact
return self.WEATHER_IMPACT['hot_weather']
elif month in [10, 11, 3, 4]: # Rainy seasons - moderate impact
return 0.85
else: # Spring/early summer - optimal weather
return 1.1
def _get_weekend_factor(self, date: datetime) -> float:
"""Weekend vs weekday pedestrian patterns"""
weekday = date.weekday()
hour = date.hour
if weekday >= 5: # Weekend
if 11 <= hour <= 16: # Weekend shopping/leisure hours
return 1.4
elif 20 <= hour <= 23: # Weekend evening activity
return 1.3
else:
return 0.9
else: # Weekday
return 1.0
def infer_district_from_location(self, location_context: Optional[Dict] = None) -> Optional[str]:
"""
Infer Madrid district from location context or coordinates
"""
if not location_context:
return None
lat = location_context.get('latitude')
lon = location_context.get('longitude')
if not (lat and lon):
return None
# Madrid district boundaries (simplified boundaries for inference)
districts = {
# Central districts
'Centro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.720, 'lon_max': -3.690},
'Arganzuela': {'lat_min': 40.385, 'lat_max': 40.410, 'lon_min': -3.720, 'lon_max': -3.680},
'Retiro': {'lat_min': 40.405, 'lat_max': 40.425, 'lon_min': -3.690, 'lon_max': -3.660},
'Salamanca': {'lat_min': 40.420, 'lat_max': 40.445, 'lon_min': -3.690, 'lon_max': -3.660},
'Chamartín': {'lat_min': 40.445, 'lat_max': 40.480, 'lon_min': -3.690, 'lon_max': -3.660},
'Tetuán': {'lat_min': 40.445, 'lat_max': 40.470, 'lon_min': -3.720, 'lon_max': -3.690},
'Chamberí': {'lat_min': 40.425, 'lat_max': 40.450, 'lon_min': -3.720, 'lon_max': -3.690},
'Fuencarral-El Pardo': {'lat_min': 40.470, 'lat_max': 40.540, 'lon_min': -3.750, 'lon_max': -3.650},
'Moncloa-Aravaca': {'lat_min': 40.430, 'lat_max': 40.480, 'lon_min': -3.750, 'lon_max': -3.720},
'Latina': {'lat_min': 40.380, 'lat_max': 40.420, 'lon_min': -3.750, 'lon_max': -3.720},
'Carabanchel': {'lat_min': 40.350, 'lat_max': 40.390, 'lon_min': -3.750, 'lon_max': -3.720},
'Usera': {'lat_min': 40.350, 'lat_max': 40.385, 'lon_min': -3.720, 'lon_max': -3.690},
'Puente de Vallecas': {'lat_min': 40.370, 'lat_max': 40.410, 'lon_min': -3.680, 'lon_max': -3.640},
'Moratalaz': {'lat_min': 40.400, 'lat_max': 40.430, 'lon_min': -3.650, 'lon_max': -3.620},
'Ciudad Lineal': {'lat_min': 40.430, 'lat_max': 40.460, 'lon_min': -3.650, 'lon_max': -3.620},
'Hortaleza': {'lat_min': 40.460, 'lat_max': 40.500, 'lon_min': -3.650, 'lon_max': -3.620},
'Villaverde': {'lat_min': 40.320, 'lat_max': 40.360, 'lon_min': -3.720, 'lon_max': -3.680},
}
# Find matching district
for district_name, bounds in districts.items():
if (bounds['lat_min'] <= lat <= bounds['lat_max'] and
bounds['lon_min'] <= lon <= bounds['lon_max']):
return district_name
# Default for coordinates in Madrid but not matching specific districts
if 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5:
return 'Other Madrid'
return None
def classify_road_type(self, measurement_point_name: str) -> str:
"""Classify road type based on measurement point name"""
if not measurement_point_name:
return 'URB' # Default to urban
name_upper = measurement_point_name.upper()
# Highway patterns
if any(pattern in name_upper for pattern in ['A-', 'AP-', 'AUTOPISTA', 'AUTOVIA']):
return 'A'
# M-30 Ring road
if 'M-30' in name_upper or 'M30' in name_upper:
return 'M30'
# Other M roads (ring roads)
if re.search(r'M-[0-9]', name_upper) or re.search(r'M[0-9]', name_upper):
return 'C30'
# Radial roads (R-1, R-2, etc.)
if re.search(r'R-[0-9]', name_upper) or 'RADIAL' in name_upper:
return 'R'
# Default to urban street
return 'URB'
def validate_madrid_coordinates(self, lat: float, lon: float) -> bool:
"""Validate coordinates are within Madrid bounds"""
# Madrid metropolitan area bounds
return 40.3 <= lat <= 40.6 and -3.8 <= lon <= -3.5
def get_congestion_level(self, occupation_pct: float) -> str:
"""Convert occupation percentage to congestion level"""
if occupation_pct >= 80:
return CongestionLevel.BLOCKED.value
elif occupation_pct >= 50:
return CongestionLevel.HIGH.value
elif occupation_pct >= 25:
return CongestionLevel.MEDIUM.value
else:
return CongestionLevel.LOW.value
def calculate_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance between two points in kilometers using Haversine formula"""
R = 6371 # Earth's radius in kilometers
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat/2) * math.sin(dlat/2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon/2) * math.sin(dlon/2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return R * c
def find_nearest_traffic_point(self, traffic_points: List[Dict[str, Any]],
latitude: float, longitude: float) -> Optional[Dict[str, Any]]:
"""Find the nearest traffic point to given coordinates"""
if not traffic_points:
return None
min_distance = float('inf')
nearest_point = None
for point in traffic_points:
point_lat = point.get('latitude')
point_lon = point.get('longitude')
if point_lat and point_lon:
distance = self.calculate_distance(latitude, longitude, point_lat, point_lon)
if distance < min_distance:
min_distance = distance
nearest_point = point
return nearest_point
def find_nearest_measurement_points(self, measurement_points: Dict[str, Dict[str, Any]],
latitude: float, longitude: float,
num_points: int = 3, max_distance_km: Optional[float] = 5.0) -> List[Tuple[str, Dict[str, Any], float]]:
"""Find nearest measurement points for historical data"""
distances = []
for point_id, point_data in measurement_points.items():
point_lat = point_data.get('latitude')
point_lon = point_data.get('longitude')
if point_lat and point_lon:
distance_km = self.calculate_distance(latitude, longitude, point_lat, point_lon)
distances.append((point_id, point_data, distance_km))
# Sort by distance and take nearest points
distances.sort(key=lambda x: x[2])
# Apply distance filter if specified
if max_distance_km is not None:
distances = [p for p in distances if p[2] <= max_distance_km]
nearest = distances[:num_points]
self.logger.info("Found nearest measurement points",
count=len(nearest),
nearest_distance_km=nearest[0][2] if nearest else None)
return nearest

View File

@@ -0,0 +1,493 @@
# ================================================================
# services/data/app/external/processors/madrid_processor.py
# ================================================================
"""
Data transformation and parsing for Madrid traffic data
Handles XML parsing, CSV processing, coordinate conversion, and data quality scoring
"""
import csv
import io
import math
import re
import xml.etree.ElementTree as ET
import zipfile
from datetime import datetime, timezone
from typing import Dict, List, Any, Optional, Tuple
import structlog
import pyproj
from ..models.madrid_models import TrafficRecord, MeasurementPoint, CongestionLevel
class MadridTrafficDataProcessor:
"""Handles all data transformation and parsing for Madrid traffic data"""
def __init__(self):
self.logger = structlog.get_logger()
# UTM Zone 30N (Madrid's coordinate system) - using modern pyproj API
self.transformer = pyproj.Transformer.from_crs("EPSG:25830", "EPSG:4326", always_xy=True)
def safe_int(self, value: str) -> int:
"""Safely convert string to int"""
try:
return int(float(value.replace(',', '.')))
except (ValueError, TypeError):
return 0
def _safe_float(self, value: str) -> float:
"""Safely convert string to float"""
try:
return float(value.replace(',', '.'))
except (ValueError, TypeError):
return 0.0
def clean_madrid_xml(self, xml_content: str) -> str:
"""Clean and prepare Madrid XML content for parsing"""
if not xml_content:
return ""
# Remove BOM and extra whitespace
cleaned = xml_content.strip()
if cleaned.startswith('\ufeff'):
cleaned = cleaned[1:]
# Fix common XML issues
cleaned = re.sub(r'&(?!amp;|lt;|gt;|quot;|apos;)', '&amp;', cleaned)
# Ensure proper encoding declaration
if not cleaned.startswith('<?xml'):
cleaned = '<?xml version="1.0" encoding="UTF-8"?>\n' + cleaned
return cleaned
def convert_utm_to_latlon(self, utm_x: str, utm_y: str) -> Tuple[Optional[float], Optional[float]]:
"""Convert UTM coordinates to latitude/longitude"""
try:
utm_x_float = float(utm_x.replace(',', '.'))
utm_y_float = float(utm_y.replace(',', '.'))
# Convert from UTM Zone 30N to WGS84 using modern pyproj API
longitude, latitude = self.transformer.transform(utm_x_float, utm_y_float)
# Validate coordinates are in Madrid area
if 40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5:
return latitude, longitude
else:
self.logger.debug("Coordinates outside Madrid bounds",
lat=latitude, lon=longitude, utm_x=utm_x, utm_y=utm_y)
return None, None
except Exception as e:
self.logger.debug("UTM conversion error",
utm_x=utm_x, utm_y=utm_y, error=str(e))
return None, None
def parse_traffic_xml(self, xml_content: str) -> List[Dict[str, Any]]:
"""Parse Madrid traffic XML data"""
traffic_points = []
try:
cleaned_xml = self.clean_madrid_xml(xml_content)
root = ET.fromstring(cleaned_xml)
self.logger.debug("Madrid XML structure", root_tag=root.tag, children_count=len(list(root)))
if root.tag == 'pms':
pm_elements = root.findall('pm')
self.logger.debug("Found PM elements", count=len(pm_elements))
for pm in pm_elements:
try:
traffic_point = self._extract_madrid_pm_element(pm)
if self._is_valid_traffic_point(traffic_point):
traffic_points.append(traffic_point)
# Log first few points for debugging
if len(traffic_points) <= 3:
self.logger.debug("Sample traffic point",
id=traffic_point['idelem'],
lat=traffic_point['latitude'],
lon=traffic_point['longitude'],
intensity=traffic_point.get('intensidad'))
except Exception as e:
self.logger.debug("Error parsing PM element", error=str(e))
continue
else:
self.logger.warning("Unexpected XML root tag", root_tag=root.tag)
self.logger.debug("Madrid traffic XML parsing completed", valid_points=len(traffic_points))
return traffic_points
except ET.ParseError as e:
self.logger.warning("Failed to parse Madrid XML", error=str(e))
return self._extract_traffic_data_regex(xml_content)
except Exception as e:
self.logger.error("Error in Madrid traffic XML parsing", error=str(e))
return []
def _extract_madrid_pm_element(self, pm_element) -> Dict[str, Any]:
"""Extract traffic data from Madrid <pm> element with coordinate conversion"""
try:
point_data = {}
utm_x = utm_y = None
# Extract all child elements
for child in pm_element:
tag, text = child.tag, child.text.strip() if child.text else ''
if tag == 'idelem':
point_data['idelem'] = text
elif tag == 'descripcion':
point_data['descripcion'] = text
elif tag == 'intensidad':
point_data['intensidad'] = self.safe_int(text)
elif tag == 'ocupacion':
point_data['ocupacion'] = self._safe_float(text)
elif tag == 'carga':
point_data['carga'] = self.safe_int(text)
elif tag == 'nivelServicio':
point_data['nivelServicio'] = self.safe_int(text)
elif tag == 'st_x': # UTM X coordinate
utm_x = text
point_data['utm_x'] = text
elif tag == 'st_y': # UTM Y coordinate
utm_y = text
point_data['utm_y'] = text
elif tag == 'error':
point_data['error'] = text
elif tag in ['subarea', 'accesoAsociado', 'intensidadSat']:
point_data[tag] = text
# Convert coordinates
if utm_x and utm_y:
latitude, longitude = self.convert_utm_to_latlon(utm_x, utm_y)
if latitude and longitude:
point_data.update({
'latitude': latitude,
'longitude': longitude,
'measurement_point_id': point_data.get('idelem'),
'measurement_point_name': point_data.get('descripcion'),
'timestamp': datetime.now(timezone.utc),
'source': 'madrid_opendata_xml'
})
return point_data
else:
self.logger.debug("Invalid coordinates after conversion",
idelem=point_data.get('idelem'), utm_x=utm_x, utm_y=utm_y)
return {}
else:
self.logger.debug("Missing UTM coordinates", idelem=point_data.get('idelem'))
return {}
except Exception as e:
self.logger.debug("Error extracting PM element", error=str(e))
return {}
def _is_valid_traffic_point(self, traffic_point: Dict[str, Any]) -> bool:
"""Validate traffic point data"""
required_fields = ['idelem', 'latitude', 'longitude']
return all(field in traffic_point and traffic_point[field] for field in required_fields)
def _extract_traffic_data_regex(self, xml_content: str) -> List[Dict[str, Any]]:
"""Fallback regex-based extraction if XML parsing fails"""
traffic_points = []
try:
# Pattern to match PM elements
pm_pattern = r'<pm>(.*?)</pm>'
pm_matches = re.findall(pm_pattern, xml_content, re.DOTALL)
for pm_content in pm_matches:
traffic_point = {}
# Extract key fields
patterns = {
'idelem': r'<idelem>(.*?)</idelem>',
'descripcion': r'<descripcion>(.*?)</descripcion>',
'intensidad': r'<intensidad>(.*?)</intensidad>',
'ocupacion': r'<ocupacion>(.*?)</ocupacion>',
'st_x': r'<st_x>(.*?)</st_x>',
'st_y': r'<st_y>(.*?)</st_y>'
}
for field, pattern in patterns.items():
match = re.search(pattern, pm_content)
if match:
traffic_point[field] = match.group(1).strip()
# Convert coordinates
if 'st_x' in traffic_point and 'st_y' in traffic_point:
latitude, longitude = self.convert_utm_to_latlon(
traffic_point['st_x'], traffic_point['st_y']
)
if latitude and longitude:
traffic_point.update({
'latitude': latitude,
'longitude': longitude,
'intensidad': self.safe_int(traffic_point.get('intensidad', '0')),
'ocupacion': self._safe_float(traffic_point.get('ocupacion', '0')),
'measurement_point_id': traffic_point.get('idelem'),
'measurement_point_name': traffic_point.get('descripcion'),
'timestamp': datetime.now(timezone.utc),
'source': 'madrid_opendata_xml_regex'
})
traffic_points.append(traffic_point)
self.logger.debug("Regex extraction completed", points=len(traffic_points))
return traffic_points
except Exception as e:
self.logger.error("Error in regex extraction", error=str(e))
return []
def parse_measurement_points_csv(self, csv_content: str) -> Dict[str, Dict[str, Any]]:
"""Parse measurement points CSV into lookup dictionary"""
measurement_points = {}
try:
# Parse CSV with semicolon delimiter
csv_reader = csv.DictReader(io.StringIO(csv_content), delimiter=';')
processed_count = 0
for row in csv_reader:
try:
# Extract point ID and coordinates
point_id = row.get('id', '').strip()
if not point_id:
continue
processed_count += 1
# Try different coordinate field names
lat_str = ''
lon_str = ''
# Common coordinate field patterns
lat_fields = ['lat', 'latitude', 'latitud', 'y', 'utm_y']
lon_fields = ['lon', 'lng', 'longitude', 'longitud', 'x', 'utm_x']
for field in lat_fields:
if field in row and row[field].strip():
lat_str = row[field].strip()
break
for field in lon_fields:
if field in row and row[field].strip():
lon_str = row[field].strip()
break
if lat_str and lon_str:
try:
# Try direct lat/lon first
latitude = self._safe_float(lat_str)
longitude = self._safe_float(lon_str)
# If values look like UTM coordinates, convert them
if latitude > 1000 or longitude > 1000:
latitude, longitude = self.convert_utm_to_latlon(lon_str, lat_str)
if not latitude or not longitude:
continue
# Validate Madrid area
if not (40.3 <= latitude <= 40.6 and -3.8 <= longitude <= -3.5):
continue
measurement_points[point_id] = {
'id': point_id,
'latitude': latitude,
'longitude': longitude,
'name': row.get('nombre', row.get('descripcion', f"Point {point_id}")),
'type': row.get('tipo', 'traffic'),
'raw_data': dict(row) # Keep original data
}
except Exception as e:
self.logger.debug("Error processing point coordinates",
point_id=point_id, error=str(e))
continue
except Exception as e:
self.logger.debug("Error processing CSV row", error=str(e))
continue
self.logger.info("Parsed measurement points registry",
total_points=len(measurement_points))
return measurement_points
except Exception as e:
self.logger.error("Error parsing measurement points CSV", error=str(e))
return {}
def calculate_data_quality_score(self, row: Dict[str, str]) -> float:
"""Calculate data quality score for a traffic record"""
try:
score = 1.0
# Check for missing or invalid values
intensidad = row.get('intensidad', '').strip()
if not intensidad or intensidad in ['N', '', '0']:
score *= 0.7
ocupacion = row.get('ocupacion', '').strip()
if not ocupacion or ocupacion in ['N', '', '0']:
score *= 0.8
error_status = row.get('error', '').strip()
if error_status and error_status != 'N':
score *= 0.6
# Check for reasonable value ranges
try:
intensidad_val = self.safe_int(intensidad)
if intensidad_val < 0 or intensidad_val > 5000: # Unrealistic traffic volume
score *= 0.7
ocupacion_val = self.safe_int(ocupacion)
if ocupacion_val < 0 or ocupacion_val > 100: # Invalid percentage
score *= 0.5
except:
score *= 0.6
return max(0.1, score) # Minimum quality score
except Exception as e:
self.logger.debug("Error calculating quality score", error=str(e))
return 0.5 # Default medium quality
async def process_csv_content_chunked(self, text_content: str, csv_filename: str,
nearest_ids: set, nearest_points: list) -> list:
"""Process CSV content in chunks to prevent memory issues"""
import csv
import io
import gc
try:
csv_reader = csv.DictReader(io.StringIO(text_content), delimiter=';')
chunk_size = 10000
chunk_records = []
all_records = []
processed_count = 0
total_rows_seen = 0
for row in csv_reader:
total_rows_seen += 1
measurement_point_id = row.get('id', '').strip()
if measurement_point_id not in nearest_ids:
continue
try:
record_data = await self.parse_historical_csv_row(row, nearest_points)
if record_data:
chunk_records.append(record_data)
processed_count += 1
if len(chunk_records) >= chunk_size:
all_records.extend(chunk_records)
chunk_records = []
gc.collect()
except Exception as e:
if processed_count < 5:
self.logger.error("Row parsing exception",
row_num=total_rows_seen,
measurement_point_id=measurement_point_id,
error=str(e))
continue
# Process remaining records
if chunk_records:
all_records.extend(chunk_records)
chunk_records = []
gc.collect()
self.logger.info("Processed CSV file",
filename=csv_filename,
total_rows_read=total_rows_seen,
processed_records=processed_count)
return all_records
except Exception as e:
self.logger.error("Error processing CSV content",
filename=csv_filename, error=str(e))
return []
async def parse_historical_csv_row(self, row: dict, nearest_points: list) -> dict:
"""Parse a single row from Madrid's historical traffic CSV"""
try:
# Extract date
fecha_str = row.get('fecha', '').strip()
if not fecha_str:
return None
try:
from datetime import datetime, timezone
date_obj = datetime.strptime(fecha_str, '%Y-%m-%d %H:%M:%S')
date_obj = date_obj.replace(tzinfo=timezone.utc)
except Exception:
return None
measurement_point_id = row.get('id', '').strip()
# Find point data
point_match = next((p for p in nearest_points if p[0] == measurement_point_id), None)
if not point_match:
return None
point_data = point_match[1]
distance_km = point_match[2]
# Extract traffic data
intensidad = self.safe_int(row.get('intensidad', '0'))
ocupacion = self.safe_int(row.get('ocupacion', '0'))
carga = self.safe_int(row.get('carga', '0'))
vmed = self.safe_int(row.get('vmed', '0'))
# Calculate average speed (vmed is in km/h, use it if available)
average_speed = float(vmed) if vmed > 0 else 30.0 # Default speed
# Determine congestion level based on occupation percentage
if ocupacion > 75:
congestion_level = 'high'
elif ocupacion > 40:
congestion_level = 'medium'
else:
congestion_level = 'low'
# Build result with API-compatible fields
result = {
'date': date_obj, # Required API field
'traffic_volume': intensidad, # Required API field
'pedestrian_count': max(1, int(intensidad * 0.1)), # Estimated pedestrian count
'congestion_level': congestion_level, # Required API field
'average_speed': average_speed, # Required API field
'source': 'madrid_historical_csv', # Required API field
'measurement_point_id': measurement_point_id,
'point_data': point_data,
'distance_km': distance_km,
'traffic_data': {
'intensidad': intensidad,
'ocupacion': ocupacion,
'carga': carga,
'vmed': vmed
},
'data_quality_score': self.calculate_data_quality_score(row),
'raw_row': row
}
return result
except Exception as e:
self.logger.debug("Error parsing historical CSV row", error=str(e))
return None

View File

@@ -0,0 +1 @@
"""Data ingestion module for multi-city external data"""

View File

@@ -0,0 +1,20 @@
# services/external/app/ingestion/adapters/__init__.py
"""
Adapter registry - Maps city IDs to adapter implementations
"""
from typing import Dict, Type
from ..base_adapter import CityDataAdapter
from .madrid_adapter import MadridAdapter
ADAPTER_REGISTRY: Dict[str, Type[CityDataAdapter]] = {
"madrid": MadridAdapter,
}
def get_adapter(city_id: str, config: Dict) -> CityDataAdapter:
"""Factory to instantiate appropriate adapter"""
adapter_class = ADAPTER_REGISTRY.get(city_id)
if not adapter_class:
raise ValueError(f"No adapter registered for city: {city_id}")
return adapter_class(city_id, config)

View File

@@ -0,0 +1,152 @@
# services/external/app/ingestion/adapters/madrid_adapter.py
"""
Madrid city data adapter - Uses existing AEMET and Madrid OpenData clients
"""
from typing import List, Dict, Any
from datetime import datetime
import structlog
from ..base_adapter import CityDataAdapter
from app.external.aemet import AEMETClient
from app.external.apis.madrid_traffic_client import MadridTrafficClient
logger = structlog.get_logger()
class MadridAdapter(CityDataAdapter):
"""Adapter for Madrid using AEMET + Madrid OpenData"""
def __init__(self, city_id: str, config: Dict[str, Any]):
super().__init__(city_id, config)
self.aemet_client = AEMETClient()
self.traffic_client = MadridTrafficClient()
self.madrid_lat = 40.4168
self.madrid_lon = -3.7038
async def fetch_historical_weather(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical weather from AEMET"""
try:
logger.info(
"Fetching Madrid historical weather",
start=start_date.isoformat(),
end=end_date.isoformat()
)
weather_data = await self.aemet_client.get_historical_weather(
self.madrid_lat,
self.madrid_lon,
start_date,
end_date
)
for record in weather_data:
record['city_id'] = self.city_id
record['city_name'] = 'Madrid'
logger.info(
"Madrid weather data fetched",
records=len(weather_data)
)
return weather_data
except Exception as e:
logger.error("Error fetching Madrid weather", error=str(e))
return []
async def fetch_historical_traffic(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical traffic from Madrid OpenData"""
try:
logger.info(
"Fetching Madrid historical traffic",
start=start_date.isoformat(),
end=end_date.isoformat()
)
traffic_data = await self.traffic_client.get_historical_traffic(
self.madrid_lat,
self.madrid_lon,
start_date,
end_date
)
for record in traffic_data:
record['city_id'] = self.city_id
record['city_name'] = 'Madrid'
logger.info(
"Madrid traffic data fetched",
records=len(traffic_data)
)
return traffic_data
except Exception as e:
logger.error("Error fetching Madrid traffic", error=str(e))
return []
async def validate_connection(self) -> bool:
"""Validate connection to AEMET and Madrid OpenData
Note: Validation is lenient - allows partial failures for temporary API issues.
AEMET rate limits may cause weather validation to fail during initialization.
Madrid traffic API outages should not block validation entirely.
"""
try:
traffic_validation_passed = False
weather_validation_passed = False
# Try traffic API first
try:
test_traffic = await self.traffic_client.get_current_traffic(
self.madrid_lat,
self.madrid_lon
)
if test_traffic is not None and len(test_traffic) > 0:
traffic_validation_passed = True
logger.info("Traffic API validation successful")
else:
logger.warning("Traffic API validation failed - temporary unavailability (proceeding anyway)")
except Exception as traffic_error:
logger.warning("Traffic API validation error (temporary unavailability) - proceeding anyway", error=str(traffic_error))
# Try weather API
try:
test_weather = await self.aemet_client.get_current_weather(
self.madrid_lat,
self.madrid_lon
)
if test_weather is not None:
weather_validation_passed = True
logger.info("Weather API validation successful")
else:
logger.warning("Weather API validation failed (likely rate limited) - proceeding anyway")
except Exception as weather_error:
logger.warning("Weather API validation error - proceeding anyway", error=str(weather_error))
# At least one validation should pass for basic connectivity
if not traffic_validation_passed and not weather_validation_passed:
logger.error("Both traffic and weather API validations failed - no connectivity")
return False
# Return success if at least one API is accessible
logger.info("Adapter connection validation passed",
traffic_valid=traffic_validation_passed,
weather_valid=weather_validation_passed)
return True
except Exception as e:
logger.error("Madrid adapter connection validation failed", error=str(e))
return False

View File

@@ -0,0 +1,43 @@
# services/external/app/ingestion/base_adapter.py
"""
Base adapter interface for city-specific data sources
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any
from datetime import datetime
class CityDataAdapter(ABC):
"""Abstract base class for city-specific data adapters"""
def __init__(self, city_id: str, config: Dict[str, Any]):
self.city_id = city_id
self.config = config
@abstractmethod
async def fetch_historical_weather(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical weather data for date range"""
pass
@abstractmethod
async def fetch_historical_traffic(
self,
start_date: datetime,
end_date: datetime
) -> List[Dict[str, Any]]:
"""Fetch historical traffic data for date range"""
pass
@abstractmethod
async def validate_connection(self) -> bool:
"""Validate connection to data source"""
pass
def get_city_id(self) -> str:
"""Get city identifier"""
return self.city_id

View File

@@ -0,0 +1,408 @@
# services/external/app/ingestion/ingestion_manager.py
"""
Data Ingestion Manager - Coordinates multi-city data collection
"""
from typing import List, Dict, Any
from datetime import datetime, timedelta
import structlog
import asyncio
from app.registry.city_registry import CityRegistry
from app.registry.calendar_registry import CalendarRegistry
from .adapters import get_adapter
from app.repositories.city_data_repository import CityDataRepository
from app.repositories.calendar_repository import CalendarRepository
from app.core.database import database_manager
logger = structlog.get_logger()
class DataIngestionManager:
"""Orchestrates data ingestion across all cities"""
def __init__(self):
self.registry = CityRegistry()
self.database_manager = database_manager
async def initialize_all_cities(self, months: int = 24):
"""
Initialize historical data for all enabled cities
Called by Kubernetes Init Job
"""
enabled_cities = self.registry.get_enabled_cities()
logger.info(
"Starting full data initialization",
cities=len(enabled_cities),
months=months
)
end_date = datetime.now()
start_date = end_date - timedelta(days=months * 30)
tasks = [
self.initialize_city(city.city_id, start_date, end_date)
for city in enabled_cities
]
results = await asyncio.gather(*tasks, return_exceptions=True)
successes = sum(1 for r in results if r is True)
failures = len(results) - successes
logger.info(
"Data initialization complete",
total=len(results),
successes=successes,
failures=failures
)
# Consider success if we have at least some cities initialized (majority success)
# This allows the system to continue even if some external APIs are temporarily unavailable
if successes > 0:
logger.info(
"Partial success achieved - continuing with available data",
success_ratio=f"{successes}/{len(results)}"
)
return True
else:
logger.error("All city initializations failed - system cannot proceed")
return False
async def initialize_city(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> bool:
"""Initialize historical data for a single city (idempotent)"""
try:
city = self.registry.get_city(city_id)
if not city:
logger.error("City not found", city_id=city_id)
return False
logger.info(
"Initializing city data",
city=city.name,
start=start_date.date(),
end=end_date.date()
)
# Check if data already exists (idempotency)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
coverage = await repo.get_data_coverage(city_id, start_date, end_date)
days_in_range = (end_date - start_date).days
expected_records = days_in_range # One record per day minimum
# If we have >= 90% coverage, skip initialization
threshold = expected_records * 0.9
weather_sufficient = coverage['weather'] >= threshold
traffic_sufficient = coverage['traffic'] >= threshold
if weather_sufficient and traffic_sufficient:
logger.info(
"City data already initialized, skipping",
city=city.name,
weather_records=coverage['weather'],
traffic_records=coverage['traffic'],
threshold=int(threshold)
)
return True
logger.info(
"Insufficient data coverage, proceeding with initialization",
city=city.name,
existing_weather=coverage['weather'],
existing_traffic=coverage['traffic'],
expected=expected_records
)
adapter = get_adapter(
city_id,
{
"weather_config": city.weather_config,
"traffic_config": city.traffic_config
}
)
if not await adapter.validate_connection():
logger.error("Adapter validation failed", city=city.name)
return False
# Fetch data with error handling to allow partial success
weather_data = []
traffic_data = []
# Fetch weather data
try:
weather_data = await adapter.fetch_historical_weather(
start_date, end_date
)
logger.info("Weather data fetched successfully",
records=len(weather_data), city=city.name)
except Exception as weather_error:
logger.error("Failed to fetch weather data",
city=city.name, error=str(weather_error))
# Don't return False here - continue with whatever data we can get
# Fetch traffic data
try:
traffic_data = await adapter.fetch_historical_traffic(
start_date, end_date
)
logger.info("Traffic data fetched successfully",
records=len(traffic_data), city=city.name)
except Exception as traffic_error:
logger.error("Failed to fetch traffic data",
city=city.name, error=str(traffic_error))
# Don't return False here - continue with weather data only if available
# Store available data (at least one type should be available for partial success)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
weather_stored = 0
traffic_stored = 0
if weather_data:
weather_stored = await repo.bulk_store_weather(
city_id, weather_data
)
if traffic_data:
traffic_stored = await repo.bulk_store_traffic(
city_id, traffic_data
)
# Only fail if both data types failed to fetch
if not weather_data and not traffic_data:
logger.error("Both weather and traffic data fetch failed", city=city.name)
return False
logger.info(
"City initialization complete",
city=city.name,
weather_records=weather_stored,
traffic_records=traffic_stored
)
return True
except Exception as e:
logger.error(
"City initialization failed",
city_id=city_id,
error=str(e)
)
return False
async def rotate_monthly_data(self):
"""
Rotate 24-month window: delete old, ingest new
Called by Kubernetes CronJob monthly
"""
enabled_cities = self.registry.get_enabled_cities()
logger.info("Starting monthly data rotation", cities=len(enabled_cities))
now = datetime.now()
cutoff_date = now - timedelta(days=24 * 30)
last_month_end = now.replace(day=1) - timedelta(days=1)
last_month_start = last_month_end.replace(day=1)
tasks = []
for city in enabled_cities:
tasks.append(
self._rotate_city_data(
city.city_id,
cutoff_date,
last_month_start,
last_month_end
)
)
results = await asyncio.gather(*tasks, return_exceptions=True)
successes = sum(1 for r in results if r is True)
logger.info(
"Monthly rotation complete",
total=len(results),
successes=successes
)
async def _rotate_city_data(
self,
city_id: str,
cutoff_date: datetime,
new_start: datetime,
new_end: datetime
) -> bool:
"""Rotate data for a single city"""
try:
city = self.registry.get_city(city_id)
if not city:
return False
logger.info(
"Rotating city data",
city=city.name,
cutoff=cutoff_date.date(),
new_month=new_start.strftime("%Y-%m")
)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
deleted_weather = await repo.delete_weather_before(
city_id, cutoff_date
)
deleted_traffic = await repo.delete_traffic_before(
city_id, cutoff_date
)
logger.info(
"Old data deleted",
city=city.name,
weather_deleted=deleted_weather,
traffic_deleted=deleted_traffic
)
adapter = get_adapter(city_id, {
"weather_config": city.weather_config,
"traffic_config": city.traffic_config
})
new_weather = await adapter.fetch_historical_weather(
new_start, new_end
)
new_traffic = await adapter.fetch_historical_traffic(
new_start, new_end
)
async with self.database_manager.get_session() as session:
repo = CityDataRepository(session)
weather_stored = await repo.bulk_store_weather(
city_id, new_weather
)
traffic_stored = await repo.bulk_store_traffic(
city_id, new_traffic
)
logger.info(
"New data ingested",
city=city.name,
weather_added=weather_stored,
traffic_added=traffic_stored
)
return True
except Exception as e:
logger.error(
"City rotation failed",
city_id=city_id,
error=str(e)
)
return False
async def seed_school_calendars(self) -> bool:
"""
Seed school calendars from CalendarRegistry into database
Called during initialization - idempotent
"""
try:
logger.info("Starting school calendar seeding...")
# Get all calendars from registry
calendars = CalendarRegistry.get_all_calendars()
logger.info(f"Found {len(calendars)} calendars in registry")
async with self.database_manager.get_session() as session:
repo = CalendarRepository(session)
seeded_count = 0
skipped_count = 0
for cal_def in calendars:
logger.info(
"Processing calendar",
calendar_id=cal_def.calendar_id,
city=cal_def.city_id,
type=cal_def.school_type.value,
year=cal_def.academic_year
)
# Check if calendar already exists (idempotency)
existing = await repo.get_calendar_by_city_type_year(
city_id=cal_def.city_id,
school_type=cal_def.school_type.value,
academic_year=cal_def.academic_year
)
if existing:
logger.info(
"Calendar already exists, skipping",
calendar_id=cal_def.calendar_id
)
skipped_count += 1
continue
# Convert holiday periods to dict format
holiday_periods = [
{
"name": hp.name,
"start_date": hp.start_date,
"end_date": hp.end_date,
"description": hp.description
}
for hp in cal_def.holiday_periods
]
# Convert school hours to dict format
school_hours = {
"morning_start": cal_def.school_hours.morning_start,
"morning_end": cal_def.school_hours.morning_end,
"has_afternoon_session": cal_def.school_hours.has_afternoon_session,
"afternoon_start": cal_def.school_hours.afternoon_start,
"afternoon_end": cal_def.school_hours.afternoon_end
}
# Create calendar in database
created_calendar = await repo.create_school_calendar(
city_id=cal_def.city_id,
calendar_name=cal_def.calendar_name,
school_type=cal_def.school_type.value,
academic_year=cal_def.academic_year,
holiday_periods=holiday_periods,
school_hours=school_hours,
source=cal_def.source,
enabled=cal_def.enabled
)
logger.info(
"Calendar seeded successfully",
calendar_id=str(created_calendar.id),
city=cal_def.city_id,
type=cal_def.school_type.value,
year=cal_def.academic_year
)
seeded_count += 1
logger.info(
"School calendar seeding completed",
seeded=seeded_count,
skipped=skipped_count,
total=len(calendars)
)
return True
except Exception as e:
logger.error("Error seeding school calendars", error=str(e))
return False

View File

@@ -0,0 +1 @@
"""Kubernetes job scripts for data initialization and rotation"""

View File

@@ -0,0 +1,69 @@
# services/external/app/jobs/initialize_data.py
"""
Kubernetes Init Job - Initialize 24-month historical data
"""
import asyncio
import argparse
import sys
import logging
import structlog
from app.ingestion.ingestion_manager import DataIngestionManager
from app.core.database import database_manager
logger = structlog.get_logger()
async def main(months: int = 24):
"""Initialize historical data for all enabled cities and seed calendars"""
logger.info("Starting data initialization job", months=months)
try:
manager = DataIngestionManager()
# Initialize weather and traffic data
weather_traffic_success = await manager.initialize_all_cities(months=months)
# Seed school calendars
logger.info("Proceeding to seed school calendars...")
calendar_success = await manager.seed_school_calendars()
# Calendar seeding is critical, but weather/traffic can have partial success
overall_success = calendar_success and weather_traffic_success
if overall_success:
logger.info("✅ Data initialization completed successfully (weather, traffic, calendars)")
sys.exit(0)
else:
if not calendar_success:
logger.error("❌ Calendar seeding failed - this is critical")
sys.exit(1)
elif not weather_traffic_success:
# Log as warning instead of error if some data was retrieved
logger.warning("⚠️ Weather/traffic initialization had partial failures, but system can continue")
logger.info("✅ Calendar seeding completed - system can operate with available data")
sys.exit(0) # Allow partial success for weather/traffic
except Exception as e:
logger.error("❌ Fatal error during initialization", error=str(e))
sys.exit(1)
finally:
await database_manager.close_connections()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Initialize historical data")
parser.add_argument("--months", type=int, default=24, help="Number of months to load")
parser.add_argument("--log-level", default="INFO", help="Log level")
args = parser.parse_args()
# Convert string log level to logging constant
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(log_level)
)
asyncio.run(main(months=args.months))

View File

@@ -0,0 +1,50 @@
# services/external/app/jobs/rotate_data.py
"""
Kubernetes CronJob - Monthly data rotation (24-month window)
"""
import asyncio
import argparse
import sys
import logging
import structlog
from app.ingestion.ingestion_manager import DataIngestionManager
from app.core.database import database_manager
logger = structlog.get_logger()
async def main():
"""Rotate 24-month data window"""
logger.info("Starting monthly data rotation job")
try:
manager = DataIngestionManager()
await manager.rotate_monthly_data()
logger.info("✅ Data rotation completed successfully")
sys.exit(0)
except Exception as e:
logger.error("❌ Fatal error during rotation", error=str(e))
sys.exit(1)
finally:
await database_manager.close_connections()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Rotate historical data")
parser.add_argument("--log-level", default="INFO", help="Log level")
parser.add_argument("--notify-slack", type=bool, default=False, help="Send Slack notification")
args = parser.parse_args()
# Convert string log level to logging constant
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(log_level)
)
asyncio.run(main())

207
services/external/app/main.py vendored Normal file
View File

@@ -0,0 +1,207 @@
# services/external/app/main.py
"""
External Service Main Application
"""
from fastapi import FastAPI
from sqlalchemy import text
from app.core.config import settings
from app.core.database import database_manager
# Removed import of non-existent messaging module
# External service will use unified messaging from base class
from shared.service_base import StandardFastAPIService
from shared.redis_utils import initialize_redis, close_redis
# Include routers
from app.api import weather_data, traffic_data, city_operations, calendar_operations, audit, poi_context, geocoding, poi_refresh_jobs
from app.services.poi_scheduler import start_scheduler, stop_scheduler
class ExternalService(StandardFastAPIService):
"""External Data Service with standardized setup"""
expected_migration_version = "00001"
async def on_startup(self, app):
"""Custom startup logic including migration verification"""
await self.verify_migrations()
await super().on_startup(app)
async def verify_migrations(self):
"""Verify database schema matches the latest migrations."""
try:
async with self.database_manager.get_session() as session:
result = await session.execute(text("SELECT version_num FROM alembic_version"))
version = result.scalar()
if version != self.expected_migration_version:
self.logger.error(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
raise RuntimeError(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
self.logger.info(f"Migration verification successful: {version}")
except Exception as e:
self.logger.error(f"Migration verification failed: {e}")
raise
def __init__(self):
# Define expected database tables for health checks
external_expected_tables = [
'weather_data', 'weather_forecasts', 'traffic_data',
'traffic_measurement_points', 'traffic_background_jobs',
'tenant_poi_contexts', 'poi_refresh_jobs'
]
# Define custom API checks
async def check_weather_api():
"""Check weather API configuration"""
try:
return bool(settings.AEMET_API_KEY)
except Exception as e:
self.logger.error("Weather API check failed", error=str(e))
return False
async def check_traffic_api():
"""Check traffic API configuration"""
try:
return bool(settings.MADRID_OPENDATA_API_KEY)
except Exception as e:
self.logger.error("Traffic API check failed", error=str(e))
return False
# Define custom metrics for external service
external_custom_metrics = {
"weather_api_calls_total": {
"type": "counter",
"description": "Total weather API calls"
},
"weather_api_success_total": {
"type": "counter",
"description": "Successful weather API calls"
},
"weather_api_failures_total": {
"type": "counter",
"description": "Failed weather API calls"
},
"traffic_api_calls_total": {
"type": "counter",
"description": "Total traffic API calls"
},
"traffic_api_success_total": {
"type": "counter",
"description": "Successful traffic API calls"
},
"traffic_api_failures_total": {
"type": "counter",
"description": "Failed traffic API calls"
},
"data_collection_jobs_total": {
"type": "counter",
"description": "Data collection jobs"
},
"data_records_stored_total": {
"type": "counter",
"description": "Data records stored"
},
"data_quality_issues_total": {
"type": "counter",
"description": "Data quality issues detected"
},
"weather_api_duration_seconds": {
"type": "histogram",
"description": "Weather API call duration"
},
"traffic_api_duration_seconds": {
"type": "histogram",
"description": "Traffic API call duration"
},
"data_collection_duration_seconds": {
"type": "histogram",
"description": "Data collection job duration"
},
"data_processing_duration_seconds": {
"type": "histogram",
"description": "Data processing duration"
}
}
super().__init__(
service_name="external-service",
app_name="Bakery External Data Service",
description="External data collection service for weather, traffic, and events data",
version="1.0.0",
log_level=settings.LOG_LEVEL,
cors_origins=settings.CORS_ORIGINS,
api_prefix="", # Empty because RouteBuilder already includes /api/v1
database_manager=database_manager,
expected_tables=external_expected_tables,
custom_health_checks={
"weather_api": check_weather_api,
"traffic_api": check_traffic_api
},
custom_metrics=external_custom_metrics,
enable_messaging=True
)
async def _setup_messaging(self):
"""Setup messaging for external service using unified messaging"""
# The base class will handle the unified messaging setup
# For external service, no additional setup is needed
self.logger.info("External service unified messaging initialized")
async def _cleanup_messaging(self):
"""Cleanup messaging for external service"""
# The base class will handle the unified messaging cleanup
self.logger.info("External service unified messaging cleaned up")
async def on_startup(self, app: FastAPI):
"""Custom startup logic for external service"""
# Initialize Redis connection
await initialize_redis(settings.REDIS_URL, db=0, max_connections=50)
self.logger.info("Redis initialized for external service")
# Start POI refresh scheduler
await start_scheduler()
self.logger.info("POI refresh scheduler started")
async def on_shutdown(self, app: FastAPI):
"""Custom shutdown logic for external service"""
# Stop POI refresh scheduler
await stop_scheduler()
self.logger.info("POI refresh scheduler stopped")
# Close Redis connection
await close_redis()
self.logger.info("Redis connection closed")
# Database cleanup is handled by the base class
def get_service_features(self):
"""Return external-specific features"""
return [
"weather_data_collection",
"traffic_data_collection",
"aemet_integration",
"madrid_opendata_integration",
"data_quality_monitoring",
"scheduled_collection_jobs",
"external_api_monitoring"
]
# Create service instance
service = ExternalService()
# Create FastAPI app with standardized setup
app = service.create_app()
# Setup standard endpoints
service.setup_standard_endpoints()
# Include routers
# IMPORTANT: Register audit router FIRST to avoid route matching conflicts
service.add_router(audit.router)
service.add_router(weather_data.router)
service.add_router(traffic_data.router)
service.add_router(city_operations.router) # New v2.0 city-based optimized endpoints
service.add_router(calendar_operations.router) # School calendars and hyperlocal data
service.add_router(poi_context.router) # POI detection and location-based features
service.add_router(geocoding.router) # Address search and geocoding
service.add_router(poi_refresh_jobs.router) # POI refresh background jobs

View File

@@ -0,0 +1,46 @@
"""
External Service Models Package
Import all models to ensure they are registered with SQLAlchemy Base.
"""
# Import AuditLog model for this service
from shared.security import create_audit_log_model
from shared.database.base import Base
# Create audit log model for this service
AuditLog = create_audit_log_model(Base)
# Import all models to register them with the Base metadata
from .traffic import (
TrafficData,
TrafficMeasurementPoint,
TrafficDataBackgroundJob,
)
from .weather import (
WeatherData,
WeatherForecast,
)
from .city_weather import CityWeatherData
from .city_traffic import CityTrafficData
from .calendar import SchoolCalendar, TenantLocationContext
# List all models for easier access
__all__ = [
# Traffic models
"TrafficData",
"TrafficMeasurementPoint",
"TrafficDataBackgroundJob",
# Weather models
"WeatherData",
"WeatherForecast",
# City-based models (new)
"CityWeatherData",
"CityTrafficData",
# Calendar models (hyperlocal)
"SchoolCalendar",
"TenantLocationContext",
"AuditLog",
]

View File

@@ -0,0 +1,86 @@
# services/external/app/models/calendar.py
"""
School Calendar and Tenant Location Context Models
Hyperlocal data for demand forecasting
"""
from sqlalchemy import Column, String, DateTime, Index, Boolean
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime
import uuid
from app.core.database import Base
class SchoolCalendar(Base):
"""City-based school calendar data for forecasting"""
__tablename__ = "school_calendars"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
city_id = Column(String(50), nullable=False, index=True)
calendar_name = Column(String(100), nullable=False)
school_type = Column(String(20), nullable=False) # primary, secondary, university
academic_year = Column(String(10), nullable=False) # e.g., "2024-2025"
# Holiday periods as array of date ranges
# Example: [
# {"name": "Christmas", "start": "2024-12-20", "end": "2025-01-08"},
# {"name": "Easter", "start": "2025-04-10", "end": "2025-04-21"},
# {"name": "Summer", "start": "2025-06-23", "end": "2025-09-09"}
# ]
holiday_periods = Column(JSONB, nullable=False, default=list)
# School hours configuration
# Example: {
# "morning_start": "09:00",
# "morning_end": "14:00",
# "afternoon_start": "15:00", # if applicable
# "afternoon_end": "17:00",
# "has_afternoon_session": false
# }
school_hours = Column(JSONB, nullable=False, default=dict)
# Metadata
source = Column(String(100), nullable=True) # e.g., "madrid_education_dept"
enabled = Column(Boolean, default=True, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('idx_school_calendar_city_year', 'city_id', 'academic_year'),
Index('idx_school_calendar_city_type', 'city_id', 'school_type'),
)
class TenantLocationContext(Base):
"""Tenant-specific location context for hyperlocal forecasting"""
__tablename__ = "tenant_location_contexts"
tenant_id = Column(UUID(as_uuid=True), primary_key=True)
city_id = Column(String(50), nullable=False, index=True)
# School calendar assignment
school_calendar_id = Column(UUID(as_uuid=True), nullable=True, index=True)
# Hyperlocal context
neighborhood = Column(String(100), nullable=True)
# Custom local events specific to this tenant's location
# Example: [
# {"name": "Neighborhood Festival", "date": "2025-06-15", "impact": "high"},
# {"name": "Local Market Day", "date": "2025-05-20", "impact": "medium"}
# ]
local_events = Column(JSONB, nullable=True, default=list)
# Additional metadata
notes = Column(String(500), nullable=True)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('idx_tenant_location_calendar', 'school_calendar_id'),
)

View File

@@ -0,0 +1,36 @@
# services/external/app/models/city_traffic.py
"""
City Traffic Data Model - Shared city-based traffic storage
"""
from sqlalchemy import Column, String, Integer, Float, DateTime, Text, Index
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime
import uuid
from app.core.database import Base
class CityTrafficData(Base):
"""City-based historical traffic data"""
__tablename__ = "city_traffic_data"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
city_id = Column(String(50), nullable=False, index=True)
date = Column(DateTime(timezone=True), nullable=False, index=True)
traffic_volume = Column(Integer, nullable=True)
pedestrian_count = Column(Integer, nullable=True)
congestion_level = Column(String(20), nullable=True)
average_speed = Column(Float, nullable=True)
source = Column(String(50), nullable=False)
raw_data = Column(JSONB, nullable=True)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('idx_city_traffic_lookup', 'city_id', 'date'),
)

View File

@@ -0,0 +1,38 @@
# services/external/app/models/city_weather.py
"""
City Weather Data Model - Shared city-based weather storage
"""
from sqlalchemy import Column, String, Float, DateTime, Text, Index
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime
import uuid
from app.core.database import Base
class CityWeatherData(Base):
"""City-based historical weather data"""
__tablename__ = "city_weather_data"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
city_id = Column(String(50), nullable=False, index=True)
date = Column(DateTime(timezone=True), nullable=False, index=True)
temperature = Column(Float, nullable=True)
precipitation = Column(Float, nullable=True)
humidity = Column(Float, nullable=True)
wind_speed = Column(Float, nullable=True)
pressure = Column(Float, nullable=True)
description = Column(String(200), nullable=True)
source = Column(String(50), nullable=False)
raw_data = Column(JSONB, nullable=True)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow)
updated_at = Column(DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow)
__table_args__ = (
Index('idx_city_weather_lookup', 'city_id', 'date'),
)

View File

@@ -0,0 +1,123 @@
"""
POI Context Model
Stores Point of Interest detection results and ML features for bakery locations.
Used for location-based demand forecasting with contextual features.
"""
from sqlalchemy import Column, String, DateTime, Float, Index, Integer
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime, timezone, timedelta
import uuid
from app.core.database import Base
class TenantPOIContext(Base):
"""
POI (Point of Interest) context for bakery location.
Stores detected POIs around bakery and calculated ML features
for demand forecasting with location-specific context.
"""
__tablename__ = "tenant_poi_contexts"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
tenant_id = Column(UUID(as_uuid=True), nullable=False, unique=True, index=True)
# Location (denormalized for quick reference and spatial queries)
latitude = Column(Float, nullable=False)
longitude = Column(Float, nullable=False)
# POI Detection Results (full raw data)
# Structure: {
# "schools": {
# "pois": [{"osm_id": "...", "name": "...", "lat": ..., "lon": ...}],
# "features": {"proximity_score": 3.45, "count_0_100m": 2, ...},
# "count": 5
# },
# "offices": {...},
# ...
# }
poi_detection_results = Column(JSONB, nullable=False, default=dict)
# ML Features (flat structure for easy model ingestion)
# Structure: {
# "poi_schools_proximity_score": 3.45,
# "poi_schools_weighted_proximity_score": 5.18,
# "poi_schools_count_0_100m": 2,
# "poi_offices_proximity_score": 1.23,
# ...
# }
ml_features = Column(JSONB, nullable=False, default=dict)
# Summary Statistics
total_pois_detected = Column(Integer, default=0)
high_impact_categories = Column(JSONB, default=list) # Categories with significant POI presence
relevant_categories = Column(JSONB, default=list) # Categories that passed relevance thresholds
# Detection Metadata
detection_timestamp = Column(DateTime(timezone=True), nullable=False)
detection_source = Column(String(50), default="overpass_api")
detection_status = Column(String(20), default="completed") # completed, failed, partial
detection_error = Column(String(500), nullable=True) # Error message if detection failed
# Data Freshness Strategy
# POIs don't change frequently, refresh every 6 months
next_refresh_date = Column(DateTime(timezone=True), nullable=True)
refresh_interval_days = Column(Integer, default=180) # 6 months default
last_refreshed_at = Column(DateTime(timezone=True), nullable=True)
# Timestamps
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc)
)
__table_args__ = (
Index('idx_tenant_poi_location', 'latitude', 'longitude'),
Index('idx_tenant_poi_refresh', 'next_refresh_date'),
Index('idx_tenant_poi_status', 'detection_status'),
)
def to_dict(self):
"""Convert to dictionary for API responses"""
return {
"id": str(self.id),
"tenant_id": str(self.tenant_id),
"location": {
"latitude": self.latitude,
"longitude": self.longitude
},
"poi_detection_results": self.poi_detection_results,
"ml_features": self.ml_features,
"total_pois_detected": self.total_pois_detected,
"high_impact_categories": self.high_impact_categories,
"relevant_categories": self.relevant_categories,
"detection_timestamp": self.detection_timestamp.isoformat() if self.detection_timestamp else None,
"detection_source": self.detection_source,
"detection_status": self.detection_status,
"detection_error": self.detection_error,
"next_refresh_date": self.next_refresh_date.isoformat() if self.next_refresh_date else None,
"last_refreshed_at": self.last_refreshed_at.isoformat() if self.last_refreshed_at else None,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None
}
def is_stale(self) -> bool:
"""Check if POI data needs refresh"""
if not self.next_refresh_date:
return True
return datetime.now(timezone.utc) > self.next_refresh_date
def calculate_next_refresh(self) -> datetime:
"""Calculate next refresh date based on interval"""
return datetime.now(timezone.utc) + timedelta(days=self.refresh_interval_days)
def mark_refreshed(self):
"""Mark as refreshed and calculate next refresh date"""
self.last_refreshed_at = datetime.now(timezone.utc)
self.next_refresh_date = self.calculate_next_refresh()

View File

@@ -0,0 +1,154 @@
"""
POI Refresh Job Model
Tracks background jobs for periodic POI context refresh.
"""
from sqlalchemy import Column, String, DateTime, Integer, Boolean, Text, Float
from sqlalchemy.dialects.postgresql import UUID, JSONB
from datetime import datetime, timezone
import uuid
from app.core.database import Base
class POIRefreshJob(Base):
"""
POI Refresh Background Job Model
Tracks periodic POI context refresh jobs for all tenants.
Jobs run on a configurable schedule (default: 180 days).
"""
__tablename__ = "poi_refresh_jobs"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
# Job scheduling
scheduled_at = Column(
DateTime(timezone=True),
nullable=False,
index=True,
comment="When this job was scheduled"
)
started_at = Column(
DateTime(timezone=True),
nullable=True,
comment="When job execution started"
)
completed_at = Column(
DateTime(timezone=True),
nullable=True,
comment="When job execution completed"
)
# Job status
status = Column(
String(50),
nullable=False,
default="pending",
index=True,
comment="Job status: pending, running, completed, failed"
)
# Job execution details
attempt_count = Column(
Integer,
nullable=False,
default=0,
comment="Number of execution attempts"
)
max_attempts = Column(
Integer,
nullable=False,
default=3,
comment="Maximum number of retry attempts"
)
# Location data (cached for job execution)
latitude = Column(
Float,
nullable=False,
comment="Bakery latitude for POI detection"
)
longitude = Column(
Float,
nullable=False,
comment="Bakery longitude for POI detection"
)
# Results
pois_detected = Column(
Integer,
nullable=True,
comment="Number of POIs detected in this refresh"
)
changes_detected = Column(
Boolean,
default=False,
comment="Whether significant changes were detected"
)
change_summary = Column(
JSONB,
nullable=True,
comment="Summary of changes detected"
)
# Error handling
error_message = Column(
Text,
nullable=True,
comment="Error message if job failed"
)
error_details = Column(
JSONB,
nullable=True,
comment="Detailed error information"
)
# Next execution
next_scheduled_at = Column(
DateTime(timezone=True),
nullable=True,
index=True,
comment="When next refresh should be scheduled"
)
# Metadata
created_at = Column(
DateTime(timezone=True),
nullable=False,
default=lambda: datetime.now(timezone.utc)
)
updated_at = Column(
DateTime(timezone=True),
nullable=False,
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc)
)
def __repr__(self):
return (
f"<POIRefreshJob(id={self.id}, tenant_id={self.tenant_id}, "
f"status={self.status}, scheduled_at={self.scheduled_at})>"
)
@property
def is_overdue(self) -> bool:
"""Check if job is overdue for execution"""
if self.status in ("completed", "running"):
return False
return datetime.now(timezone.utc) > self.scheduled_at
@property
def can_retry(self) -> bool:
"""Check if job can be retried"""
return self.attempt_count < self.max_attempts
@property
def duration_seconds(self) -> float | None:
"""Calculate job duration in seconds"""
if self.started_at and self.completed_at:
return (self.completed_at - self.started_at).total_seconds()
return None

294
services/external/app/models/traffic.py vendored Normal file
View File

@@ -0,0 +1,294 @@
# ================================================================
# services/data/app/models/traffic.py - Enhanced for Multiple Cities
# ================================================================
"""
Flexible traffic data models supporting multiple cities and extensible schemas
"""
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean, JSON
from sqlalchemy.dialects.postgresql import UUID
import uuid
from datetime import datetime, timezone
from typing import Dict, Any, Optional
from shared.database.base import Base
class TrafficData(Base):
"""
Flexible traffic data model supporting multiple cities
Designed to accommodate varying data structures across different cities
"""
__tablename__ = "traffic_data"
# Primary identification
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Location and temporal data
location_id = Column(String(100), nullable=False, index=True) # "lat,lon" or city-specific ID
city = Column(String(50), nullable=False, index=True) # madrid, barcelona, valencia, etc.
date = Column(DateTime(timezone=True), nullable=False, index=True)
# Core standardized traffic metrics (common across all cities)
traffic_volume = Column(Integer, nullable=True) # Vehicle count or intensity
congestion_level = Column(String(20), nullable=True) # low, medium, high, blocked
average_speed = Column(Float, nullable=True) # Average speed in km/h
# Enhanced metrics (may not be available for all cities)
occupation_percentage = Column(Float, nullable=True) # Road occupation %
load_percentage = Column(Float, nullable=True) # Traffic load %
pedestrian_count = Column(Integer, nullable=True) # Estimated pedestrian count
# Measurement point information
measurement_point_id = Column(String(100), nullable=True, index=True)
measurement_point_name = Column(String(500), nullable=True)
measurement_point_type = Column(String(50), nullable=True) # URB, M30, A, etc.
# Geographic data
latitude = Column(Float, nullable=True)
longitude = Column(Float, nullable=True)
district = Column(String(100), nullable=True) # City district/area
zone = Column(String(100), nullable=True) # Traffic zone or sector
# Data source and quality
source = Column(String(50), nullable=False, default="unknown") # madrid_opendata, synthetic, etc.
data_quality_score = Column(Float, nullable=True) # Quality score 0-100
is_synthetic = Column(Boolean, default=False)
has_pedestrian_inference = Column(Boolean, default=False)
# City-specific data (flexible JSON storage)
city_specific_data = Column(JSON, nullable=True) # Store city-specific fields
# Raw data backup
raw_data = Column(Text, nullable=True) # Original data for debugging
# Audit fields
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True) # For multi-tenancy
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc))
# Performance-optimized indexes
__table_args__ = (
# Core query patterns
Index('idx_traffic_location_date', 'location_id', 'date'),
Index('idx_traffic_city_date', 'city', 'date'),
Index('idx_traffic_tenant_date', 'tenant_id', 'date'),
# Advanced query patterns
Index('idx_traffic_city_location', 'city', 'location_id'),
Index('idx_traffic_measurement_point', 'city', 'measurement_point_id'),
Index('idx_traffic_district_date', 'city', 'district', 'date'),
# Training data queries
Index('idx_traffic_training', 'tenant_id', 'city', 'date', 'is_synthetic'),
Index('idx_traffic_quality', 'city', 'data_quality_score', 'date'),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert model to dictionary for API responses"""
result = {
'id': str(self.id),
'location_id': self.location_id,
'city': self.city,
'date': self.date.isoformat() if self.date else None,
'traffic_volume': self.traffic_volume,
'congestion_level': self.congestion_level,
'average_speed': self.average_speed,
'occupation_percentage': self.occupation_percentage,
'load_percentage': self.load_percentage,
'pedestrian_count': self.pedestrian_count,
'measurement_point_id': self.measurement_point_id,
'measurement_point_name': self.measurement_point_name,
'measurement_point_type': self.measurement_point_type,
'latitude': self.latitude,
'longitude': self.longitude,
'district': self.district,
'zone': self.zone,
'source': self.source,
'data_quality_score': self.data_quality_score,
'is_synthetic': self.is_synthetic,
'has_pedestrian_inference': self.has_pedestrian_inference,
'created_at': self.created_at.isoformat() if self.created_at else None
}
# Add city-specific data if present
if self.city_specific_data:
result['city_specific_data'] = self.city_specific_data
return result
def get_city_specific_field(self, field_name: str, default: Any = None) -> Any:
"""Safely get city-specific field value"""
if self.city_specific_data and isinstance(self.city_specific_data, dict):
return self.city_specific_data.get(field_name, default)
return default
def set_city_specific_field(self, field_name: str, value: Any) -> None:
"""Set city-specific field value"""
if not self.city_specific_data:
self.city_specific_data = {}
if not isinstance(self.city_specific_data, dict):
self.city_specific_data = {}
self.city_specific_data[field_name] = value
class TrafficMeasurementPoint(Base):
"""
Registry of traffic measurement points across all cities
Supports different city-specific measurement point schemas
"""
__tablename__ = "traffic_measurement_points"
# Primary identification
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Location and identification
city = Column(String(50), nullable=False, index=True)
measurement_point_id = Column(String(100), nullable=False, index=True) # City-specific ID
name = Column(String(500), nullable=True)
description = Column(Text, nullable=True)
# Geographic information
latitude = Column(Float, nullable=False)
longitude = Column(Float, nullable=False)
district = Column(String(100), nullable=True)
zone = Column(String(100), nullable=True)
# Classification
road_type = Column(String(50), nullable=True) # URB, M30, A, etc.
measurement_type = Column(String(50), nullable=True) # intensity, speed, etc.
point_category = Column(String(50), nullable=True) # urban, highway, ring_road
# Status and metadata
is_active = Column(Boolean, default=True)
installation_date = Column(DateTime(timezone=True), nullable=True)
last_data_received = Column(DateTime(timezone=True), nullable=True)
data_quality_rating = Column(Float, nullable=True) # Average quality 0-100
# City-specific point data
city_specific_metadata = Column(JSON, nullable=True)
# Audit fields
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
# Ensure unique measurement points per city
Index('idx_unique_city_point', 'city', 'measurement_point_id', unique=True),
# Geographic queries
Index('idx_points_city_location', 'city', 'latitude', 'longitude'),
Index('idx_points_district', 'city', 'district'),
Index('idx_points_road_type', 'city', 'road_type'),
# Status queries
Index('idx_points_active', 'city', 'is_active', 'last_data_received'),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert measurement point to dictionary"""
return {
'id': str(self.id),
'city': self.city,
'measurement_point_id': self.measurement_point_id,
'name': self.name,
'description': self.description,
'latitude': self.latitude,
'longitude': self.longitude,
'district': self.district,
'zone': self.zone,
'road_type': self.road_type,
'measurement_type': self.measurement_type,
'point_category': self.point_category,
'is_active': self.is_active,
'installation_date': self.installation_date.isoformat() if self.installation_date else None,
'last_data_received': self.last_data_received.isoformat() if self.last_data_received else None,
'data_quality_rating': self.data_quality_rating,
'city_specific_metadata': self.city_specific_metadata,
'created_at': self.created_at.isoformat() if self.created_at else None
}
class TrafficDataBackgroundJob(Base):
"""
Track background data collection jobs for multiple cities
Supports scheduling and monitoring of data fetching processes
"""
__tablename__ = "traffic_background_jobs"
# Primary identification
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Job configuration
job_type = Column(String(50), nullable=False) # historical_fetch, cleanup, etc.
city = Column(String(50), nullable=False, index=True)
location_pattern = Column(String(200), nullable=True) # Location pattern or specific coords
# Scheduling
scheduled_at = Column(DateTime(timezone=True), nullable=False)
started_at = Column(DateTime(timezone=True), nullable=True)
completed_at = Column(DateTime(timezone=True), nullable=True)
# Status tracking
status = Column(String(20), nullable=False, default='pending') # pending, running, completed, failed
progress_percentage = Column(Float, default=0.0)
records_processed = Column(Integer, default=0)
records_stored = Column(Integer, default=0)
# Date range for data jobs
data_start_date = Column(DateTime(timezone=True), nullable=True)
data_end_date = Column(DateTime(timezone=True), nullable=True)
# Results and error handling
success_count = Column(Integer, default=0)
error_count = Column(Integer, default=0)
error_message = Column(Text, nullable=True)
job_metadata = Column(JSON, nullable=True) # Additional job-specific data
# Tenant association
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
# Audit fields
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
# Job monitoring
Index('idx_jobs_city_status', 'city', 'status', 'scheduled_at'),
Index('idx_jobs_tenant_status', 'tenant_id', 'status', 'scheduled_at'),
Index('idx_jobs_type_city', 'job_type', 'city', 'scheduled_at'),
# Cleanup queries
Index('idx_jobs_completed', 'status', 'completed_at'),
)
def to_dict(self) -> Dict[str, Any]:
"""Convert job to dictionary"""
return {
'id': str(self.id),
'job_type': self.job_type,
'city': self.city,
'location_pattern': self.location_pattern,
'scheduled_at': self.scheduled_at.isoformat() if self.scheduled_at else None,
'started_at': self.started_at.isoformat() if self.started_at else None,
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
'status': self.status,
'progress_percentage': self.progress_percentage,
'records_processed': self.records_processed,
'records_stored': self.records_stored,
'data_start_date': self.data_start_date.isoformat() if self.data_start_date else None,
'data_end_date': self.data_end_date.isoformat() if self.data_end_date else None,
'success_count': self.success_count,
'error_count': self.error_count,
'error_message': self.error_message,
'job_metadata': self.job_metadata,
'tenant_id': str(self.tenant_id) if self.tenant_id else None,
'created_at': self.created_at.isoformat() if self.created_at else None,
'updated_at': self.updated_at.isoformat() if self.updated_at else None
}

74
services/external/app/models/weather.py vendored Normal file
View File

@@ -0,0 +1,74 @@
# ================================================================
# services/data/app/models/weather.py
# ================================================================
"""Weather data models"""
from sqlalchemy import Column, String, DateTime, Float, Integer, Text, Index, Boolean
from sqlalchemy.dialects.postgresql import UUID, JSON
import uuid
from datetime import datetime, timezone
from shared.database.base import Base
class WeatherData(Base):
__tablename__ = "weather_data"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
location_id = Column(String(100), nullable=False, index=True)
city = Column(String(50), nullable=False)
station_name = Column(String(200), nullable=True)
latitude = Column(Float, nullable=True)
longitude = Column(Float, nullable=True)
date = Column(DateTime(timezone=True), nullable=False, index=True)
forecast_date = Column(DateTime(timezone=True), nullable=True)
temperature = Column(Float, nullable=True) # Celsius
temperature_min = Column(Float, nullable=True)
temperature_max = Column(Float, nullable=True)
feels_like = Column(Float, nullable=True)
precipitation = Column(Float, nullable=True) # mm
precipitation_probability = Column(Float, nullable=True)
humidity = Column(Float, nullable=True) # percentage
wind_speed = Column(Float, nullable=True) # km/h
wind_direction = Column(Float, nullable=True)
wind_gust = Column(Float, nullable=True)
pressure = Column(Float, nullable=True) # hPa
visibility = Column(Float, nullable=True)
uv_index = Column(Float, nullable=True)
cloud_cover = Column(Float, nullable=True)
condition = Column(String(100), nullable=True)
description = Column(String(200), nullable=True)
weather_code = Column(String(20), nullable=True)
source = Column(String(50), nullable=False, default="aemet")
data_type = Column(String(20), nullable=False)
is_forecast = Column(Boolean, nullable=True)
data_quality_score = Column(Float, nullable=True)
raw_data = Column(JSON, nullable=True)
processed_data = Column(JSON, nullable=True)
tenant_id = Column(UUID(as_uuid=True), nullable=True, index=True)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
Index('idx_weather_location_date', 'location_id', 'date'),
)
class WeatherForecast(Base):
__tablename__ = "weather_forecasts"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
location_id = Column(String(100), nullable=False, index=True)
forecast_date = Column(DateTime(timezone=True), nullable=False)
generated_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
temperature = Column(Float, nullable=True)
precipitation = Column(Float, nullable=True)
humidity = Column(Float, nullable=True)
wind_speed = Column(Float, nullable=True)
description = Column(String(200), nullable=True)
source = Column(String(50), nullable=False, default="aemet")
raw_data = Column(Text, nullable=True)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
__table_args__ = (
Index('idx_forecast_location_date', 'location_id', 'forecast_date'),
)

View File

@@ -0,0 +1 @@
"""City registry module for multi-city support"""

View File

@@ -0,0 +1,377 @@
# services/external/app/registry/calendar_registry.py
"""
Calendar Registry - Pre-configured school calendars and local events
"""
from dataclasses import dataclass
from typing import List, Optional, Dict, Any
from datetime import date
from enum import Enum
class SchoolType(str, Enum):
PRIMARY = "primary"
SECONDARY = "secondary"
UNIVERSITY = "university"
@dataclass
class HolidayPeriod:
"""School holiday period definition"""
name: str
start_date: str # ISO format: "2024-12-20"
end_date: str # ISO format: "2025-01-08"
description: Optional[str] = None
@dataclass
class SchoolHours:
"""School operating hours configuration"""
morning_start: str # "09:00"
morning_end: str # "14:00"
has_afternoon_session: bool # True/False
afternoon_start: Optional[str] = None # "15:00" if has_afternoon_session
afternoon_end: Optional[str] = None # "17:00" if has_afternoon_session
@dataclass
class CalendarDefinition:
"""School calendar configuration for a specific city and school type"""
calendar_id: str
calendar_name: str
city_id: str
school_type: SchoolType
academic_year: str # "2024-2025"
holiday_periods: List[HolidayPeriod]
school_hours: SchoolHours
source: str
enabled: bool = True
class CalendarRegistry:
"""Central registry of school calendars for forecasting"""
# Madrid Primary School Calendar 2024-2025 (Official Comunidad de Madrid - ORDEN 1177/2024)
MADRID_PRIMARY_2024_2025 = CalendarDefinition(
calendar_id="madrid_primary_2024_2025",
calendar_name="Madrid Primary School Calendar 2024-2025",
city_id="madrid",
school_type=SchoolType.PRIMARY,
academic_year="2024-2025",
holiday_periods=[
HolidayPeriod(
name="Christmas Holiday",
start_date="2024-12-21",
end_date="2025-01-07",
description="Official Christmas break - Comunidad de Madrid (Dec 21 - Jan 7)"
),
HolidayPeriod(
name="Easter Holiday (Semana Santa)",
start_date="2025-04-11",
end_date="2025-04-21",
description="Official Easter break - Comunidad de Madrid (Apr 11-21)"
),
HolidayPeriod(
name="Summer Holiday",
start_date="2025-06-21",
end_date="2025-09-08",
description="Summer vacation (Last day Jun 20, classes resume Sep 9)"
),
HolidayPeriod(
name="All Saints Long Weekend",
start_date="2024-10-31",
end_date="2024-11-03",
description="October 31 - November 3 non-working days"
),
HolidayPeriod(
name="February Long Weekend",
start_date="2025-02-28",
end_date="2025-03-03",
description="February 28 - March 3 non-working days"
),
],
school_hours=SchoolHours(
morning_start="09:00",
morning_end="14:00",
has_afternoon_session=False
),
source="comunidad_madrid_orden_1177_2024",
enabled=True
)
# Madrid Secondary School Calendar 2024-2025 (Official Comunidad de Madrid - ORDEN 1177/2024)
MADRID_SECONDARY_2024_2025 = CalendarDefinition(
calendar_id="madrid_secondary_2024_2025",
calendar_name="Madrid Secondary School Calendar 2024-2025",
city_id="madrid",
school_type=SchoolType.SECONDARY,
academic_year="2024-2025",
holiday_periods=[
HolidayPeriod(
name="Christmas Holiday",
start_date="2024-12-21",
end_date="2025-01-07",
description="Official Christmas break - Comunidad de Madrid (Dec 21 - Jan 7)"
),
HolidayPeriod(
name="Easter Holiday (Semana Santa)",
start_date="2025-04-11",
end_date="2025-04-21",
description="Official Easter break - Comunidad de Madrid (Apr 11-21)"
),
HolidayPeriod(
name="Summer Holiday",
start_date="2025-06-21",
end_date="2025-09-09",
description="Summer vacation (Last day Jun 20, classes resume Sep 10)"
),
HolidayPeriod(
name="All Saints Long Weekend",
start_date="2024-10-31",
end_date="2024-11-03",
description="October 31 - November 3 non-working days"
),
HolidayPeriod(
name="February Long Weekend",
start_date="2025-02-28",
end_date="2025-03-03",
description="February 28 - March 3 non-working days"
),
],
school_hours=SchoolHours(
morning_start="09:00",
morning_end="14:00",
has_afternoon_session=False
),
source="comunidad_madrid_orden_1177_2024",
enabled=True
)
# Madrid Primary School Calendar 2025-2026 (Official Comunidad de Madrid - ORDEN 1476/2025)
MADRID_PRIMARY_2025_2026 = CalendarDefinition(
calendar_id="madrid_primary_2025_2026",
calendar_name="Madrid Primary School Calendar 2025-2026",
city_id="madrid",
school_type=SchoolType.PRIMARY,
academic_year="2025-2026",
holiday_periods=[
HolidayPeriod(
name="Christmas Holiday",
start_date="2025-12-20",
end_date="2026-01-07",
description="Official Christmas break - Comunidad de Madrid (Dec 20 - Jan 7)"
),
HolidayPeriod(
name="Easter Holiday (Semana Santa)",
start_date="2026-03-27",
end_date="2026-04-06",
description="Official Easter break - Comunidad de Madrid (Mar 27 - Apr 6)"
),
HolidayPeriod(
name="Summer Holiday",
start_date="2026-06-21",
end_date="2026-09-08",
description="Summer vacation (classes resume Sep 9)"
),
HolidayPeriod(
name="October Long Weekend",
start_date="2025-10-13",
end_date="2025-10-13",
description="October 13 non-working day (after Día de la Hispanidad)"
),
HolidayPeriod(
name="All Saints Long Weekend",
start_date="2025-11-03",
end_date="2025-11-03",
description="November 3 non-working day (after All Saints)"
),
],
school_hours=SchoolHours(
morning_start="09:00",
morning_end="14:00",
has_afternoon_session=False
),
source="comunidad_madrid_orden_1476_2025",
enabled=True
)
# Madrid Secondary School Calendar 2025-2026 (Official Comunidad de Madrid - ORDEN 1476/2025)
MADRID_SECONDARY_2025_2026 = CalendarDefinition(
calendar_id="madrid_secondary_2025_2026",
calendar_name="Madrid Secondary School Calendar 2025-2026",
city_id="madrid",
school_type=SchoolType.SECONDARY,
academic_year="2025-2026",
holiday_periods=[
HolidayPeriod(
name="Christmas Holiday",
start_date="2025-12-20",
end_date="2026-01-07",
description="Official Christmas break - Comunidad de Madrid (Dec 20 - Jan 7)"
),
HolidayPeriod(
name="Easter Holiday (Semana Santa)",
start_date="2026-03-27",
end_date="2026-04-06",
description="Official Easter break - Comunidad de Madrid (Mar 27 - Apr 6)"
),
HolidayPeriod(
name="Summer Holiday",
start_date="2026-06-21",
end_date="2026-09-09",
description="Summer vacation (classes resume Sep 10)"
),
HolidayPeriod(
name="October Long Weekend",
start_date="2025-10-13",
end_date="2025-10-13",
description="October 13 non-working day (after Día de la Hispanidad)"
),
HolidayPeriod(
name="All Saints Long Weekend",
start_date="2025-11-03",
end_date="2025-11-03",
description="November 3 non-working day (after All Saints)"
),
],
school_hours=SchoolHours(
morning_start="09:00",
morning_end="14:00",
has_afternoon_session=False
),
source="comunidad_madrid_orden_1476_2025",
enabled=True
)
# Registry of all calendars
CALENDARS: List[CalendarDefinition] = [
MADRID_PRIMARY_2024_2025,
MADRID_SECONDARY_2024_2025,
MADRID_PRIMARY_2025_2026,
MADRID_SECONDARY_2025_2026,
]
@classmethod
def get_all_calendars(cls) -> List[CalendarDefinition]:
"""Get all calendars"""
return cls.CALENDARS
@classmethod
def get_enabled_calendars(cls) -> List[CalendarDefinition]:
"""Get all enabled calendars"""
return [cal for cal in cls.CALENDARS if cal.enabled]
@classmethod
def get_calendar(cls, calendar_id: str) -> Optional[CalendarDefinition]:
"""Get calendar by ID"""
for cal in cls.CALENDARS:
if cal.calendar_id == calendar_id:
return cal
return None
@classmethod
def get_calendars_for_city(cls, city_id: str) -> List[CalendarDefinition]:
"""Get all enabled calendars for a specific city"""
return [
cal for cal in cls.CALENDARS
if cal.city_id == city_id and cal.enabled
]
@classmethod
def get_calendar_for_city_and_type(
cls,
city_id: str,
school_type: SchoolType,
academic_year: Optional[str] = None
) -> Optional[CalendarDefinition]:
"""Get specific calendar for city, type, and optionally year"""
for cal in cls.CALENDARS:
if (cal.city_id == city_id and
cal.school_type == school_type and
cal.enabled and
(academic_year is None or cal.academic_year == academic_year)):
return cal
return None
@classmethod
def to_dict(cls, calendar: CalendarDefinition) -> Dict[str, Any]:
"""Convert calendar definition to dictionary for JSON serialization"""
return {
"calendar_id": calendar.calendar_id,
"calendar_name": calendar.calendar_name,
"city_id": calendar.city_id,
"school_type": calendar.school_type.value,
"academic_year": calendar.academic_year,
"holiday_periods": [
{
"name": hp.name,
"start_date": hp.start_date,
"end_date": hp.end_date,
"description": hp.description
}
for hp in calendar.holiday_periods
],
"school_hours": {
"morning_start": calendar.school_hours.morning_start,
"morning_end": calendar.school_hours.morning_end,
"has_afternoon_session": calendar.school_hours.has_afternoon_session,
"afternoon_start": calendar.school_hours.afternoon_start,
"afternoon_end": calendar.school_hours.afternoon_end,
},
"source": calendar.source,
"enabled": calendar.enabled
}
# Local Events Registry for Madrid
@dataclass
class LocalEventDefinition:
"""Local event that impacts demand"""
event_id: str
name: str
city_id: str
date: str # ISO format or "annual-MM-DD" for recurring
impact_level: str # "low", "medium", "high"
description: Optional[str] = None
recurring: bool = False # True for annual events
class LocalEventsRegistry:
"""Registry of local events and festivals"""
MADRID_EVENTS = [
LocalEventDefinition(
event_id="madrid_san_isidro",
name="San Isidro Festival",
city_id="madrid",
date="annual-05-15",
impact_level="high",
description="Madrid's patron saint festival - major citywide celebration",
recurring=True
),
LocalEventDefinition(
event_id="madrid_dos_de_mayo",
name="Dos de Mayo",
city_id="madrid",
date="annual-05-02",
impact_level="medium",
description="Madrid regional holiday",
recurring=True
),
LocalEventDefinition(
event_id="madrid_almudena",
name="Virgen de la Almudena",
city_id="madrid",
date="annual-11-09",
impact_level="medium",
description="Madrid patron saint day",
recurring=True
),
]
@classmethod
def get_events_for_city(cls, city_id: str) -> List[LocalEventDefinition]:
"""Get all local events for a city"""
if city_id == "madrid":
return cls.MADRID_EVENTS
return []

View File

@@ -0,0 +1,163 @@
# services/external/app/registry/city_registry.py
"""
City Registry - Configuration-driven multi-city support
"""
from dataclasses import dataclass
from typing import List, Optional, Dict, Any
from enum import Enum
import math
class Country(str, Enum):
SPAIN = "ES"
FRANCE = "FR"
class WeatherProvider(str, Enum):
AEMET = "aemet"
METEO_FRANCE = "meteo_france"
OPEN_WEATHER = "open_weather"
class TrafficProvider(str, Enum):
MADRID_OPENDATA = "madrid_opendata"
VALENCIA_OPENDATA = "valencia_opendata"
BARCELONA_OPENDATA = "barcelona_opendata"
@dataclass
class CityDefinition:
"""City configuration with data source specifications"""
city_id: str
name: str
country: Country
latitude: float
longitude: float
radius_km: float
weather_provider: WeatherProvider
weather_config: Dict[str, Any]
traffic_provider: TrafficProvider
traffic_config: Dict[str, Any]
timezone: str
population: int
enabled: bool = True
class CityRegistry:
"""Central registry of supported cities"""
CITIES: List[CityDefinition] = [
CityDefinition(
city_id="madrid",
name="Madrid",
country=Country.SPAIN,
latitude=40.4168,
longitude=-3.7038,
radius_km=30.0,
weather_provider=WeatherProvider.AEMET,
weather_config={
"station_ids": ["3195", "3129", "3197"],
"municipality_code": "28079"
},
traffic_provider=TrafficProvider.MADRID_OPENDATA,
traffic_config={
"current_xml_url": "https://datos.madrid.es/egob/catalogo/...",
"historical_base_url": "https://datos.madrid.es/...",
"measurement_points_csv": "https://datos.madrid.es/..."
},
timezone="Europe/Madrid",
population=3_200_000
),
CityDefinition(
city_id="valencia",
name="Valencia",
country=Country.SPAIN,
latitude=39.4699,
longitude=-0.3763,
radius_km=25.0,
weather_provider=WeatherProvider.AEMET,
weather_config={
"station_ids": ["8416"],
"municipality_code": "46250"
},
traffic_provider=TrafficProvider.VALENCIA_OPENDATA,
traffic_config={
"api_endpoint": "https://valencia.opendatasoft.com/api/..."
},
timezone="Europe/Madrid",
population=800_000,
enabled=False
),
CityDefinition(
city_id="barcelona",
name="Barcelona",
country=Country.SPAIN,
latitude=41.3851,
longitude=2.1734,
radius_km=30.0,
weather_provider=WeatherProvider.AEMET,
weather_config={
"station_ids": ["0076"],
"municipality_code": "08019"
},
traffic_provider=TrafficProvider.BARCELONA_OPENDATA,
traffic_config={
"api_endpoint": "https://opendata-ajuntament.barcelona.cat/..."
},
timezone="Europe/Madrid",
population=1_600_000,
enabled=False
)
]
@classmethod
def get_enabled_cities(cls) -> List[CityDefinition]:
"""Get all enabled cities"""
return [city for city in cls.CITIES if city.enabled]
@classmethod
def get_city(cls, city_id: str) -> Optional[CityDefinition]:
"""Get city by ID"""
for city in cls.CITIES:
if city.city_id == city_id:
return city
return None
@classmethod
def find_nearest_city(cls, latitude: float, longitude: float) -> Optional[CityDefinition]:
"""Find nearest enabled city to coordinates"""
enabled_cities = cls.get_enabled_cities()
if not enabled_cities:
return None
min_distance = float('inf')
nearest_city = None
for city in enabled_cities:
distance = cls._haversine_distance(
latitude, longitude,
city.latitude, city.longitude
)
if distance <= city.radius_km and distance < min_distance:
min_distance = distance
nearest_city = city
return nearest_city
@staticmethod
def _haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Calculate distance in km between two coordinates"""
R = 6371
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat/2) ** 2 +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon/2) ** 2)
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
return R * c

View File

@@ -0,0 +1,58 @@
# services/external/app/registry/geolocation_mapper.py
"""
Geolocation Mapper - Maps tenant locations to cities
"""
from typing import Optional, Tuple
import structlog
from .city_registry import CityRegistry, CityDefinition
logger = structlog.get_logger()
class GeolocationMapper:
"""Maps tenant coordinates to nearest supported city"""
def __init__(self):
self.registry = CityRegistry()
def map_tenant_to_city(
self,
latitude: float,
longitude: float
) -> Optional[Tuple[CityDefinition, float]]:
"""
Map tenant coordinates to nearest city
Returns:
Tuple of (CityDefinition, distance_km) or None if no match
"""
nearest_city = self.registry.find_nearest_city(latitude, longitude)
if not nearest_city:
logger.warning(
"No supported city found for coordinates",
lat=latitude,
lon=longitude
)
return None
distance = self.registry._haversine_distance(
latitude, longitude,
nearest_city.latitude, nearest_city.longitude
)
logger.info(
"Mapped tenant to city",
lat=latitude,
lon=longitude,
city=nearest_city.name,
distance_km=round(distance, 2)
)
return (nearest_city, distance)
def validate_location_support(self, latitude: float, longitude: float) -> bool:
"""Check if coordinates are supported"""
result = self.map_tenant_to_city(latitude, longitude)
return result is not None

View File

View File

@@ -0,0 +1,329 @@
# services/external/app/repositories/calendar_repository.py
"""
Calendar Repository - Manages school calendars and tenant location contexts
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, and_, or_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import uuid
from app.models.calendar import SchoolCalendar, TenantLocationContext
logger = structlog.get_logger()
class CalendarRepository:
"""Repository for school calendar and tenant location data"""
def __init__(self, session: AsyncSession):
self.session = session
# ===== School Calendar Operations =====
async def create_school_calendar(
self,
city_id: str,
calendar_name: str,
school_type: str,
academic_year: str,
holiday_periods: List[Dict[str, Any]],
school_hours: Dict[str, Any],
source: Optional[str] = None,
enabled: bool = True
) -> SchoolCalendar:
"""Create a new school calendar"""
try:
calendar = SchoolCalendar(
id=uuid.uuid4(),
city_id=city_id,
calendar_name=calendar_name,
school_type=school_type,
academic_year=academic_year,
holiday_periods=holiday_periods,
school_hours=school_hours,
source=source,
enabled=enabled
)
self.session.add(calendar)
await self.session.commit()
await self.session.refresh(calendar)
logger.info(
"School calendar created",
calendar_id=str(calendar.id),
city_id=city_id,
school_type=school_type
)
return calendar
except Exception as e:
await self.session.rollback()
logger.error(
"Error creating school calendar",
city_id=city_id,
error=str(e)
)
raise
async def get_calendar_by_id(
self,
calendar_id: uuid.UUID
) -> Optional[SchoolCalendar]:
"""Get school calendar by ID"""
stmt = select(SchoolCalendar).where(SchoolCalendar.id == calendar_id)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def get_calendars_by_city(
self,
city_id: str,
enabled_only: bool = True
) -> List[SchoolCalendar]:
"""Get all school calendars for a city"""
stmt = select(SchoolCalendar).where(SchoolCalendar.city_id == city_id)
if enabled_only:
stmt = stmt.where(SchoolCalendar.enabled == True)
stmt = stmt.order_by(SchoolCalendar.academic_year.desc(), SchoolCalendar.school_type)
result = await self.session.execute(stmt)
return list(result.scalars().all())
async def get_calendar_by_city_type_year(
self,
city_id: str,
school_type: str,
academic_year: str
) -> Optional[SchoolCalendar]:
"""Get specific calendar by city, type, and year"""
stmt = select(SchoolCalendar).where(
and_(
SchoolCalendar.city_id == city_id,
SchoolCalendar.school_type == school_type,
SchoolCalendar.academic_year == academic_year,
SchoolCalendar.enabled == True
)
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def update_calendar(
self,
calendar_id: uuid.UUID,
**kwargs
) -> Optional[SchoolCalendar]:
"""Update school calendar"""
try:
calendar = await self.get_calendar_by_id(calendar_id)
if not calendar:
return None
for key, value in kwargs.items():
if hasattr(calendar, key):
setattr(calendar, key, value)
calendar.updated_at = datetime.utcnow()
await self.session.commit()
await self.session.refresh(calendar)
logger.info(
"School calendar updated",
calendar_id=str(calendar_id),
fields=list(kwargs.keys())
)
return calendar
except Exception as e:
await self.session.rollback()
logger.error(
"Error updating school calendar",
calendar_id=str(calendar_id),
error=str(e)
)
raise
async def delete_calendar(self, calendar_id: uuid.UUID) -> bool:
"""Delete school calendar"""
try:
calendar = await self.get_calendar_by_id(calendar_id)
if not calendar:
return False
await self.session.delete(calendar)
await self.session.commit()
logger.info("School calendar deleted", calendar_id=str(calendar_id))
return True
except Exception as e:
await self.session.rollback()
logger.error(
"Error deleting school calendar",
calendar_id=str(calendar_id),
error=str(e)
)
raise
# ===== Tenant Location Context Operations =====
async def create_or_update_tenant_location_context(
self,
tenant_id: uuid.UUID,
city_id: str,
school_calendar_id: Optional[uuid.UUID] = None,
neighborhood: Optional[str] = None,
local_events: Optional[List[Dict[str, Any]]] = None,
notes: Optional[str] = None
) -> TenantLocationContext:
"""Create or update tenant location context"""
try:
# Check if context exists
existing = await self.get_tenant_location_context(tenant_id)
if existing:
# Update existing
existing.city_id = city_id
if school_calendar_id is not None:
existing.school_calendar_id = school_calendar_id
if neighborhood is not None:
existing.neighborhood = neighborhood
if local_events is not None:
existing.local_events = local_events
if notes is not None:
existing.notes = notes
existing.updated_at = datetime.utcnow()
await self.session.commit()
await self.session.refresh(existing)
logger.info(
"Tenant location context updated",
tenant_id=str(tenant_id)
)
return existing
else:
# Create new
context = TenantLocationContext(
tenant_id=tenant_id,
city_id=city_id,
school_calendar_id=school_calendar_id,
neighborhood=neighborhood,
local_events=local_events or [],
notes=notes
)
self.session.add(context)
await self.session.commit()
await self.session.refresh(context)
logger.info(
"Tenant location context created",
tenant_id=str(tenant_id),
city_id=city_id
)
return context
except Exception as e:
await self.session.rollback()
logger.error(
"Error creating/updating tenant location context",
tenant_id=str(tenant_id),
error=str(e)
)
raise
async def get_tenant_location_context(
self,
tenant_id: uuid.UUID
) -> Optional[TenantLocationContext]:
"""Get tenant location context"""
stmt = select(TenantLocationContext).where(
TenantLocationContext.tenant_id == tenant_id
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def get_tenant_with_calendar(
self,
tenant_id: uuid.UUID
) -> Optional[Dict[str, Any]]:
"""Get tenant location context with full calendar details"""
context = await self.get_tenant_location_context(tenant_id)
if not context:
return None
result = {
"tenant_id": str(context.tenant_id),
"city_id": context.city_id,
"neighborhood": context.neighborhood,
"local_events": context.local_events,
"notes": context.notes,
"calendar": None
}
if context.school_calendar_id:
calendar = await self.get_calendar_by_id(context.school_calendar_id)
if calendar:
result["calendar"] = {
"calendar_id": str(calendar.id),
"calendar_name": calendar.calendar_name,
"school_type": calendar.school_type,
"academic_year": calendar.academic_year,
"holiday_periods": calendar.holiday_periods,
"school_hours": calendar.school_hours,
"source": calendar.source
}
return result
async def delete_tenant_location_context(
self,
tenant_id: uuid.UUID
) -> bool:
"""Delete tenant location context"""
try:
context = await self.get_tenant_location_context(tenant_id)
if not context:
return False
await self.session.delete(context)
await self.session.commit()
logger.info(
"Tenant location context deleted",
tenant_id=str(tenant_id)
)
return True
except Exception as e:
await self.session.rollback()
logger.error(
"Error deleting tenant location context",
tenant_id=str(tenant_id),
error=str(e)
)
raise
# ===== Helper Methods =====
async def get_all_tenants_for_calendar(
self,
calendar_id: uuid.UUID
) -> List[TenantLocationContext]:
"""Get all tenants using a specific calendar"""
stmt = select(TenantLocationContext).where(
TenantLocationContext.school_calendar_id == calendar_id
)
result = await self.session.execute(stmt)
return list(result.scalars().all())

View File

@@ -0,0 +1,249 @@
# services/external/app/repositories/city_data_repository.py
"""
City Data Repository - Manages shared city-based data storage
"""
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, delete, and_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.models.city_weather import CityWeatherData
from app.models.city_traffic import CityTrafficData
logger = structlog.get_logger()
class CityDataRepository:
"""Repository for city-based historical data"""
def __init__(self, session: AsyncSession):
self.session = session
async def bulk_store_weather(
self,
city_id: str,
weather_records: List[Dict[str, Any]]
) -> int:
"""Bulk insert weather records for a city"""
if not weather_records:
return 0
try:
objects = []
for record in weather_records:
obj = CityWeatherData(
city_id=city_id,
date=record.get('date'),
temperature=record.get('temperature'),
precipitation=record.get('precipitation'),
humidity=record.get('humidity'),
wind_speed=record.get('wind_speed'),
pressure=record.get('pressure'),
description=record.get('description'),
source=record.get('source', 'ingestion'),
raw_data=record.get('raw_data')
)
objects.append(obj)
self.session.add_all(objects)
await self.session.commit()
logger.info(
"Weather data stored",
city_id=city_id,
records=len(objects)
)
return len(objects)
except Exception as e:
await self.session.rollback()
logger.error(
"Error storing weather data",
city_id=city_id,
error=str(e)
)
raise
async def get_weather_by_city_and_range(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> List[CityWeatherData]:
"""Get weather data for city within date range"""
stmt = select(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date >= start_date,
CityWeatherData.date <= end_date
)
).order_by(CityWeatherData.date)
result = await self.session.execute(stmt)
return result.scalars().all()
async def delete_weather_before(
self,
city_id: str,
cutoff_date: datetime
) -> int:
"""Delete weather records older than cutoff date"""
stmt = delete(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date < cutoff_date
)
)
result = await self.session.execute(stmt)
await self.session.commit()
return result.rowcount
async def bulk_store_traffic(
self,
city_id: str,
traffic_records: List[Dict[str, Any]]
) -> int:
"""Bulk insert traffic records for a city"""
if not traffic_records:
return 0
try:
objects = []
for record in traffic_records:
obj = CityTrafficData(
city_id=city_id,
date=record.get('date'),
traffic_volume=record.get('traffic_volume'),
pedestrian_count=record.get('pedestrian_count'),
congestion_level=record.get('congestion_level'),
average_speed=record.get('average_speed'),
source=record.get('source', 'ingestion'),
raw_data=record.get('raw_data')
)
objects.append(obj)
self.session.add_all(objects)
await self.session.commit()
logger.info(
"Traffic data stored",
city_id=city_id,
records=len(objects)
)
return len(objects)
except Exception as e:
await self.session.rollback()
logger.error(
"Error storing traffic data",
city_id=city_id,
error=str(e)
)
raise
async def get_traffic_by_city_and_range(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> List[CityTrafficData]:
"""Get traffic data for city within date range - aggregated daily"""
from sqlalchemy import func, cast, Date
# Aggregate hourly data to daily averages to avoid loading hundreds of thousands of records
stmt = select(
cast(CityTrafficData.date, Date).label('date'),
func.avg(CityTrafficData.traffic_volume).label('traffic_volume'),
func.avg(CityTrafficData.pedestrian_count).label('pedestrian_count'),
func.avg(CityTrafficData.average_speed).label('average_speed'),
func.max(CityTrafficData.source).label('source')
).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date >= start_date,
CityTrafficData.date <= end_date
)
).group_by(
cast(CityTrafficData.date, Date)
).order_by(
cast(CityTrafficData.date, Date)
)
result = await self.session.execute(stmt)
# Convert aggregated rows to CityTrafficData objects
traffic_records = []
for row in result:
record = CityTrafficData(
city_id=city_id,
date=datetime.combine(row.date, datetime.min.time()),
traffic_volume=int(row.traffic_volume) if row.traffic_volume else None,
pedestrian_count=int(row.pedestrian_count) if row.pedestrian_count else None,
congestion_level='medium', # Default since we're averaging
average_speed=float(row.average_speed) if row.average_speed else None,
source=row.source or 'aggregated'
)
traffic_records.append(record)
return traffic_records
async def delete_traffic_before(
self,
city_id: str,
cutoff_date: datetime
) -> int:
"""Delete traffic records older than cutoff date"""
stmt = delete(CityTrafficData).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date < cutoff_date
)
)
result = await self.session.execute(stmt)
await self.session.commit()
return result.rowcount
async def get_data_coverage(
self,
city_id: str,
start_date: datetime,
end_date: datetime
) -> Dict[str, int]:
"""
Check how much data exists for a city in a date range
Returns dict with counts: {'weather': X, 'traffic': Y}
"""
# Count weather records
weather_stmt = select(CityWeatherData).where(
and_(
CityWeatherData.city_id == city_id,
CityWeatherData.date >= start_date,
CityWeatherData.date <= end_date
)
)
weather_result = await self.session.execute(weather_stmt)
weather_count = len(weather_result.scalars().all())
# Count traffic records
traffic_stmt = select(CityTrafficData).where(
and_(
CityTrafficData.city_id == city_id,
CityTrafficData.date >= start_date,
CityTrafficData.date <= end_date
)
)
traffic_result = await self.session.execute(traffic_stmt)
traffic_count = len(traffic_result.scalars().all())
return {
'weather': weather_count,
'traffic': traffic_count
}

View File

@@ -0,0 +1,271 @@
"""
POI Context Repository
Data access layer for TenantPOIContext model.
Handles CRUD operations for POI detection results and ML features.
"""
from typing import Optional, List
from datetime import datetime, timezone
from sqlalchemy import select, update, delete
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import uuid
from app.models.poi_context import TenantPOIContext
logger = structlog.get_logger()
class POIContextRepository:
"""
Repository for POI context data access.
Manages storage and retrieval of POI detection results
and ML features for tenant locations.
"""
def __init__(self, session: AsyncSession):
"""
Initialize repository.
Args:
session: SQLAlchemy async session
"""
self.session = session
async def create(self, poi_context_data: dict) -> TenantPOIContext:
"""
Create new POI context record.
Args:
poi_context_data: Dictionary with POI context data
Returns:
Created TenantPOIContext instance
"""
poi_context = TenantPOIContext(
tenant_id=poi_context_data["tenant_id"],
latitude=poi_context_data["latitude"],
longitude=poi_context_data["longitude"],
poi_detection_results=poi_context_data.get("poi_detection_results", {}),
ml_features=poi_context_data.get("ml_features", {}),
total_pois_detected=poi_context_data.get("total_pois_detected", 0),
high_impact_categories=poi_context_data.get("high_impact_categories", []),
relevant_categories=poi_context_data.get("relevant_categories", []),
detection_timestamp=poi_context_data.get(
"detection_timestamp",
datetime.now(timezone.utc)
),
detection_source=poi_context_data.get("detection_source", "overpass_api"),
detection_status=poi_context_data.get("detection_status", "completed"),
detection_error=poi_context_data.get("detection_error"),
refresh_interval_days=poi_context_data.get("refresh_interval_days", 180)
)
# Calculate next refresh date
poi_context.next_refresh_date = poi_context.calculate_next_refresh()
self.session.add(poi_context)
await self.session.commit()
await self.session.refresh(poi_context)
logger.info(
"POI context created",
tenant_id=str(poi_context.tenant_id),
total_pois=poi_context.total_pois_detected
)
return poi_context
async def get_by_tenant_id(self, tenant_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
"""
Get POI context by tenant ID.
Args:
tenant_id: Tenant UUID
Returns:
TenantPOIContext or None if not found
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
stmt = select(TenantPOIContext).where(
TenantPOIContext.tenant_id == tenant_id
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def get_by_id(self, poi_context_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
"""
Get POI context by ID.
Args:
poi_context_id: POI context UUID
Returns:
TenantPOIContext or None if not found
"""
if isinstance(poi_context_id, str):
poi_context_id = uuid.UUID(poi_context_id)
stmt = select(TenantPOIContext).where(
TenantPOIContext.id == poi_context_id
)
result = await self.session.execute(stmt)
return result.scalar_one_or_none()
async def update(
self,
tenant_id: str | uuid.UUID,
update_data: dict
) -> Optional[TenantPOIContext]:
"""
Update POI context for tenant.
Args:
tenant_id: Tenant UUID
update_data: Dictionary with fields to update
Returns:
Updated TenantPOIContext or None if not found
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
poi_context = await self.get_by_tenant_id(tenant_id)
if not poi_context:
return None
# Update fields
for key, value in update_data.items():
if hasattr(poi_context, key):
setattr(poi_context, key, value)
# Update timestamp
poi_context.updated_at = datetime.now(timezone.utc)
await self.session.commit()
await self.session.refresh(poi_context)
logger.info(
"POI context updated",
tenant_id=str(tenant_id),
updated_fields=list(update_data.keys())
)
return poi_context
async def create_or_update(
self,
tenant_id: str | uuid.UUID,
poi_detection_results: dict
) -> TenantPOIContext:
"""
Create new POI context or update existing one.
Args:
tenant_id: Tenant UUID
poi_detection_results: Full POI detection results
Returns:
Created or updated TenantPOIContext
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
existing = await self.get_by_tenant_id(tenant_id)
poi_context_data = {
"tenant_id": tenant_id,
"latitude": poi_detection_results["location"]["latitude"],
"longitude": poi_detection_results["location"]["longitude"],
"poi_detection_results": poi_detection_results.get("poi_categories", {}),
"ml_features": poi_detection_results.get("ml_features", {}),
"total_pois_detected": poi_detection_results.get("summary", {}).get("total_pois_detected", 0),
"high_impact_categories": poi_detection_results.get("summary", {}).get("high_impact_categories", []),
"relevant_categories": poi_detection_results.get("relevant_categories", []),
"detection_timestamp": datetime.fromisoformat(
poi_detection_results["detection_timestamp"].replace("Z", "+00:00")
) if isinstance(poi_detection_results.get("detection_timestamp"), str)
else datetime.now(timezone.utc),
"detection_status": poi_detection_results.get("detection_status", "completed"),
"detection_error": None if poi_detection_results.get("detection_status") == "completed"
else str(poi_detection_results.get("detection_errors"))
}
if existing:
# Update existing
update_data = {
**poi_context_data,
"last_refreshed_at": datetime.now(timezone.utc)
}
existing.mark_refreshed() # Update next_refresh_date
return await self.update(tenant_id, update_data)
else:
# Create new
return await self.create(poi_context_data)
async def delete_by_tenant_id(self, tenant_id: str | uuid.UUID) -> bool:
"""
Delete POI context for tenant.
Args:
tenant_id: Tenant UUID
Returns:
True if deleted, False if not found
"""
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
stmt = delete(TenantPOIContext).where(
TenantPOIContext.tenant_id == tenant_id
)
result = await self.session.execute(stmt)
await self.session.commit()
deleted = result.rowcount > 0
if deleted:
logger.info("POI context deleted", tenant_id=str(tenant_id))
return deleted
async def get_stale_contexts(self, limit: int = 100) -> List[TenantPOIContext]:
"""
Get POI contexts that need refresh.
Args:
limit: Maximum number of contexts to return
Returns:
List of stale TenantPOIContext instances
"""
now = datetime.now(timezone.utc)
stmt = (
select(TenantPOIContext)
.where(TenantPOIContext.next_refresh_date <= now)
.limit(limit)
)
result = await self.session.execute(stmt)
return list(result.scalars().all())
async def count_by_status(self) -> dict:
"""
Count POI contexts by detection status.
Returns:
Dictionary with counts by status
"""
from sqlalchemy import func
stmt = select(
TenantPOIContext.detection_status,
func.count(TenantPOIContext.id)
).group_by(TenantPOIContext.detection_status)
result = await self.session.execute(stmt)
rows = result.all()
return {status: count for status, count in rows}

View File

@@ -0,0 +1,226 @@
# ================================================================
# services/data/app/repositories/traffic_repository.py
# ================================================================
"""
Traffic Repository - Enhanced for multiple cities with comprehensive data access patterns
Follows existing repository architecture while adding city-specific functionality
"""
from typing import Optional, List, Dict, Any, Type, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, or_, func, desc, asc, text, update, delete
from sqlalchemy.orm import selectinload
from datetime import datetime, timezone, timedelta
import structlog
from app.models.traffic import TrafficData
from app.schemas.traffic import TrafficDataCreate, TrafficDataResponse
from shared.database.exceptions import DatabaseError, ValidationError
logger = structlog.get_logger()
class TrafficRepository:
"""
Enhanced repository for traffic data operations across multiple cities
Provides city-aware queries and advanced traffic analytics
"""
def __init__(self, session: AsyncSession):
self.session = session
self.model = TrafficData
# ================================================================
# CORE TRAFFIC DATA OPERATIONS
# ================================================================
async def get_by_location_and_date_range(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None
) -> List[TrafficData]:
"""Get traffic data by location and date range"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
# Build base query
query = select(self.model).where(self.model.location_id == location_id)
# Add tenant filter if specified
if tenant_id:
query = query.where(self.model.tenant_id == tenant_id)
# Add date range filters
if start_date:
query = query.where(self.model.date >= start_date)
if end_date:
query = query.where(self.model.date <= end_date)
# Order by date
query = query.order_by(self.model.date)
result = await self.session.execute(query)
return result.scalars().all()
except Exception as e:
logger.error("Failed to get traffic data by location and date range",
latitude=latitude, longitude=longitude,
error=str(e))
raise DatabaseError(f"Failed to get traffic data: {str(e)}")
async def store_traffic_data_batch(
self,
traffic_data_list: List[Dict[str, Any]],
location_id: str,
tenant_id: Optional[str] = None
) -> int:
"""Store a batch of traffic data records with enhanced validation and duplicate handling."""
stored_count = 0
try:
if not traffic_data_list:
return 0
# Check for existing records to avoid duplicates - batch the queries to avoid parameter limit
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
existing_dates = set()
if dates:
# PostgreSQL has a limit of 32767 parameters, so batch the queries
batch_size = 30000 # Safe batch size under the limit
for i in range(0, len(dates), batch_size):
date_batch = dates[i:i + batch_size]
existing_stmt = select(TrafficData.date).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date.in_(date_batch)
)
)
result = await self.session.execute(existing_stmt)
existing_dates.update({row[0] for row in result.fetchall()})
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
batch_records = []
for data in traffic_data_list:
record_date = data.get('date')
if not record_date or record_date in existing_dates:
continue # Skip duplicates
# Validate data before preparing for insertion
if self._validate_traffic_data(data):
batch_records.append({
'location_id': location_id,
'city': data.get('city', 'madrid'), # Default to madrid for historical data
'tenant_id': tenant_id, # Include tenant_id in batch insert
'date': record_date,
'traffic_volume': data.get('traffic_volume'),
'pedestrian_count': data.get('pedestrian_count'),
'congestion_level': data.get('congestion_level'),
'average_speed': data.get('average_speed'),
'source': data.get('source', 'unknown'),
'raw_data': str(data)
})
if batch_records:
# Use bulk insert for performance
await self.session.execute(
TrafficData.__table__.insert(),
batch_records
)
await self.session.commit()
stored_count = len(batch_records)
logger.info(f"Successfully stored {stored_count} traffic records for location {location_id}")
except Exception as e:
logger.error("Failed to store traffic data batch",
error=str(e), location_id=location_id)
await self.session.rollback()
raise DatabaseError(f"Batch store failed: {str(e)}")
return stored_count
def _validate_traffic_data(self, data: Dict[str, Any]) -> bool:
"""Validate traffic data before storage"""
required_fields = ['date']
# Check required fields
for field in required_fields:
if not data.get(field):
return False
# Validate data types and ranges
traffic_volume = data.get('traffic_volume')
if traffic_volume is not None and (traffic_volume < 0 or traffic_volume > 10000):
return False
pedestrian_count = data.get('pedestrian_count')
if pedestrian_count is not None and (pedestrian_count < 0 or pedestrian_count > 10000):
return False
average_speed = data.get('average_speed')
if average_speed is not None and (average_speed < 0 or average_speed > 200):
return False
congestion_level = data.get('congestion_level')
if congestion_level and congestion_level not in ['low', 'medium', 'high', 'blocked']:
return False
return True
async def get_historical_traffic_for_training(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[TrafficData]:
"""Retrieve stored traffic data for training ML models."""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= start_date,
TrafficData.date <= end_date
)
).order_by(TrafficData.date)
result = await self.session.execute(stmt)
return result.scalars().all()
except Exception as e:
logger.error("Failed to retrieve traffic data for training",
error=str(e), location_id=location_id)
raise DatabaseError(f"Training data retrieval failed: {str(e)}")
async def get_recent_by_location(
self,
latitude: float,
longitude: float,
cutoff_datetime: datetime,
tenant_id: Optional[str] = None
) -> List[TrafficData]:
"""Get recent traffic data by location after a cutoff datetime"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
stmt = select(TrafficData).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date >= cutoff_datetime
)
).order_by(TrafficData.date.desc())
result = await self.session.execute(stmt)
records = result.scalars().all()
logger.info("Retrieved recent traffic data",
location_id=location_id, count=len(records),
cutoff=cutoff_datetime.isoformat())
return records
except Exception as e:
logger.error("Failed to retrieve recent traffic data",
error=str(e), location_id=f"{latitude:.4f},{longitude:.4f}")
raise DatabaseError(f"Recent traffic data retrieval failed: {str(e)}")

View File

@@ -0,0 +1,138 @@
# services/external/app/repositories/weather_repository.py
from typing import List, Dict, Any, Optional
from datetime import datetime
from sqlalchemy import select, and_
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import json
from app.models.weather import WeatherData
logger = structlog.get_logger()
class WeatherRepository:
"""
Repository for weather data operations, adapted for WeatherService.
"""
def __init__(self, session: AsyncSession):
self.session = session
async def get_historical_weather(self,
location_id: str,
start_date: datetime,
end_date: datetime) -> List[WeatherData]:
"""
Retrieves historical weather data for a specific location and date range.
This method directly supports the data retrieval logic in WeatherService.
"""
try:
stmt = select(WeatherData).where(
and_(
WeatherData.location_id == location_id,
WeatherData.date >= start_date,
WeatherData.date <= end_date
)
).order_by(WeatherData.date)
result = await self.session.execute(stmt)
records = result.scalars().all()
logger.debug(f"Retrieved {len(records)} historical records for location {location_id}")
return list(records)
except Exception as e:
logger.error(
"Failed to get historical weather from repository",
error=str(e),
location_id=location_id
)
raise
def _serialize_json_fields(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Serialize JSON fields (raw_data, processed_data) to ensure proper JSON storage
"""
serialized = data.copy()
# Serialize raw_data if present
if 'raw_data' in serialized and serialized['raw_data'] is not None:
if not isinstance(serialized['raw_data'], str):
try:
# Convert datetime objects to strings for JSON serialization
raw_data = serialized['raw_data']
if isinstance(raw_data, dict):
# Handle datetime objects in the dict
json_safe_data = {}
for k, v in raw_data.items():
if hasattr(v, 'isoformat'): # datetime-like object
json_safe_data[k] = v.isoformat()
else:
json_safe_data[k] = v
serialized['raw_data'] = json_safe_data
except Exception as e:
logger.warning(f"Could not serialize raw_data, storing as string: {e}")
serialized['raw_data'] = str(raw_data)
# Serialize processed_data if present
if 'processed_data' in serialized and serialized['processed_data'] is not None:
if not isinstance(serialized['processed_data'], str):
try:
processed_data = serialized['processed_data']
if isinstance(processed_data, dict):
json_safe_data = {}
for k, v in processed_data.items():
if hasattr(v, 'isoformat'): # datetime-like object
json_safe_data[k] = v.isoformat()
else:
json_safe_data[k] = v
serialized['processed_data'] = json_safe_data
except Exception as e:
logger.warning(f"Could not serialize processed_data, storing as string: {e}")
serialized['processed_data'] = str(processed_data)
return serialized
async def bulk_create_weather_data(self, weather_records: List[Dict[str, Any]]) -> None:
"""
Bulk inserts new weather records into the database.
Used by WeatherService after fetching new historical data from an external API.
"""
try:
if not weather_records:
return
# Serialize JSON fields before creating model instances
serialized_records = [self._serialize_json_fields(data) for data in weather_records]
records = [WeatherData(**data) for data in serialized_records]
self.session.add_all(records)
await self.session.commit()
logger.info(f"Successfully bulk inserted {len(records)} weather records")
except Exception as e:
await self.session.rollback()
logger.error(
"Failed to bulk create weather records",
error=str(e),
count=len(weather_records)
)
raise
async def create_weather_data(self, data: Dict[str, Any]) -> WeatherData:
"""
Creates a single new weather data record.
"""
try:
# Serialize JSON fields before creating model instance
serialized_data = self._serialize_json_fields(data)
new_record = WeatherData(**serialized_data)
self.session.add(new_record)
await self.session.commit()
await self.session.refresh(new_record)
logger.info(f"Created new weather record with ID {new_record.id}")
return new_record
except Exception as e:
await self.session.rollback()
logger.error("Failed to create single weather record", error=str(e))
raise

View File

@@ -0,0 +1 @@
# services/external/app/schemas/__init__.py

View File

@@ -0,0 +1,134 @@
# services/external/app/schemas/calendar.py
"""
Calendar Schemas - Request/Response types for school calendars and location context
"""
from pydantic import BaseModel, Field
from typing import Optional, List, Dict, Any
from uuid import UUID
class SchoolCalendarResponse(BaseModel):
"""School calendar information"""
calendar_id: str
calendar_name: str
city_id: str
school_type: str
academic_year: str
holiday_periods: List[Dict[str, Any]]
school_hours: Dict[str, Any]
source: Optional[str] = None
enabled: bool = True
class Config:
json_schema_extra = {
"example": {
"calendar_id": "madrid_primary_2024_2025",
"calendar_name": "Madrid Primary School Calendar 2024-2025",
"city_id": "madrid",
"school_type": "primary",
"academic_year": "2024-2025",
"holiday_periods": [
{
"name": "Christmas Holiday",
"start_date": "2024-12-23",
"end_date": "2025-01-07",
"description": "Christmas and New Year break"
}
],
"school_hours": {
"morning_start": "09:00",
"morning_end": "14:00",
"has_afternoon_session": False
},
"source": "madrid_education_dept_2024",
"enabled": True
}
}
class SchoolCalendarListResponse(BaseModel):
"""List of school calendars for a city"""
city_id: str
calendars: List[SchoolCalendarResponse]
total: int
class CalendarCheckResponse(BaseModel):
"""Response for holiday check"""
date: str = Field(..., description="Date checked (ISO format)")
is_holiday: bool = Field(..., description="Whether the date is a school holiday")
holiday_name: Optional[str] = Field(None, description="Name of the holiday if applicable")
calendar_id: str
calendar_name: str
class TenantLocationContextResponse(BaseModel):
"""Tenant location context with calendar details"""
tenant_id: str
city_id: str
neighborhood: Optional[str] = None
local_events: Optional[List[Dict[str, Any]]] = None
notes: Optional[str] = None
calendar: Optional[Dict[str, Any]] = Field(
None,
description="Full calendar details if assigned"
)
class Config:
json_schema_extra = {
"example": {
"tenant_id": "fbffcf18-d02a-4104-b6e3-0b32006e3e47",
"city_id": "madrid",
"neighborhood": "Chamberí",
"local_events": [
{
"name": "Neighborhood Festival",
"date": "2025-06-15",
"impact": "high"
}
],
"notes": "Bakery near primary school",
"calendar": {
"calendar_id": "uuid",
"calendar_name": "Madrid Primary School Calendar 2024-2025",
"school_type": "primary",
"academic_year": "2024-2025",
"holiday_periods": [],
"school_hours": {},
"source": "madrid_education_dept_2024"
}
}
}
class TenantLocationContextCreateRequest(BaseModel):
"""Request to create/update tenant location context"""
city_id: str = Field(..., description="City ID (e.g., 'madrid')")
school_calendar_id: Optional[UUID] = Field(
None,
description="School calendar ID to assign"
)
neighborhood: Optional[str] = Field(None, description="Neighborhood name")
local_events: Optional[List[Dict[str, Any]]] = Field(
None,
description="Local events specific to this location"
)
notes: Optional[str] = Field(None, description="Additional notes")
class Config:
json_schema_extra = {
"example": {
"city_id": "madrid",
"school_calendar_id": "123e4567-e89b-12d3-a456-426614174000",
"neighborhood": "Chamberí",
"local_events": [
{
"name": "Local Market Day",
"date": "2025-05-20",
"impact": "medium"
}
],
"notes": "Bakery located near primary school entrance"
}
}

View File

@@ -0,0 +1,36 @@
# services/external/app/schemas/city_data.py
"""
City Data Schemas - New response types for city-based operations
"""
from pydantic import BaseModel, Field
from typing import Optional
class CityInfoResponse(BaseModel):
"""Information about a supported city"""
city_id: str
name: str
country: str
latitude: float
longitude: float
radius_km: float
weather_provider: str
traffic_provider: str
enabled: bool
class DataAvailabilityResponse(BaseModel):
"""Data availability for a city"""
city_id: str
city_name: str
weather_available: bool
weather_start_date: Optional[str] = None
weather_end_date: Optional[str] = None
weather_record_count: int = 0
traffic_available: bool
traffic_start_date: Optional[str] = None
traffic_end_date: Optional[str] = None
traffic_record_count: int = 0

106
services/external/app/schemas/traffic.py vendored Normal file
View File

@@ -0,0 +1,106 @@
# services/external/app/schemas/traffic.py
"""
Traffic Service Pydantic Schemas
"""
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import Optional, List
from uuid import UUID
class TrafficDataBase(BaseModel):
"""Base traffic data schema"""
location_id: str = Field(..., max_length=100, description="Traffic monitoring location ID")
date: datetime = Field(..., description="Date and time of traffic measurement")
traffic_volume: Optional[int] = Field(None, ge=0, description="Vehicles per hour")
pedestrian_count: Optional[int] = Field(None, ge=0, description="Pedestrians per hour")
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$", description="Traffic congestion level")
average_speed: Optional[float] = Field(None, ge=0, le=200, description="Average speed in km/h")
source: str = Field("madrid_opendata", max_length=50, description="Data source")
raw_data: Optional[str] = Field(None, description="Raw data from source")
class TrafficDataCreate(TrafficDataBase):
"""Schema for creating traffic data"""
pass
class TrafficDataUpdate(BaseModel):
"""Schema for updating traffic data"""
traffic_volume: Optional[int] = Field(None, ge=0)
pedestrian_count: Optional[int] = Field(None, ge=0)
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$")
average_speed: Optional[float] = Field(None, ge=0, le=200)
raw_data: Optional[str] = None
class TrafficDataResponseDB(TrafficDataBase):
"""Schema for traffic data responses from database"""
id: str = Field(..., description="Unique identifier")
created_at: datetime = Field(..., description="Creation timestamp")
updated_at: datetime = Field(..., description="Last update timestamp")
@field_validator('id', mode='before')
@classmethod
def convert_uuid_to_string(cls, v):
if isinstance(v, UUID):
return str(v)
return v
class Config:
from_attributes = True
json_encoders = {
datetime: lambda v: v.isoformat()
}
class TrafficDataList(BaseModel):
"""Schema for paginated traffic data responses"""
data: List[TrafficDataResponseDB]
total: int = Field(..., description="Total number of records")
page: int = Field(..., description="Current page number")
per_page: int = Field(..., description="Records per page")
has_next: bool = Field(..., description="Whether there are more pages")
has_prev: bool = Field(..., description="Whether there are previous pages")
class TrafficAnalytics(BaseModel):
"""Schema for traffic analytics"""
location_id: str
period_start: datetime
period_end: datetime
avg_traffic_volume: Optional[float] = None
avg_pedestrian_count: Optional[float] = None
peak_traffic_hour: Optional[int] = None
peak_pedestrian_hour: Optional[int] = None
congestion_distribution: dict = Field(default_factory=dict)
avg_speed: Optional[float] = None
class TrafficDataResponse(BaseModel):
"""Schema for API traffic data responses"""
date: datetime = Field(..., description="Date and time of traffic measurement")
traffic_volume: Optional[int] = Field(None, ge=0, description="Vehicles per hour")
pedestrian_count: Optional[int] = Field(None, ge=0, description="Pedestrians per hour")
congestion_level: Optional[str] = Field(None, pattern="^(low|medium|high)$", description="Traffic congestion level")
average_speed: Optional[float] = Field(None, ge=0, le=200, description="Average speed in km/h")
source: str = Field(..., description="Data source")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class LocationRequest(BaseModel):
latitude: float
longitude: float
address: Optional[str] = None
class DateRangeRequest(BaseModel):
start_date: datetime
end_date: datetime
class HistoricalTrafficRequest(BaseModel):
latitude: float
longitude: float
start_date: datetime
end_date: datetime
class TrafficForecastRequest(BaseModel):
latitude: float
longitude: float
hours: int = 24

173
services/external/app/schemas/weather.py vendored Normal file
View File

@@ -0,0 +1,173 @@
# services/external/app/schemas/weather.py
"""Weather data schemas"""
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import Optional, List
from uuid import UUID
class WeatherDataBase(BaseModel):
"""Base weather data schema"""
location_id: str = Field(..., max_length=100, description="Weather monitoring location ID")
date: datetime = Field(..., description="Date and time of weather measurement")
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Temperature in Celsius")
precipitation: Optional[float] = Field(None, ge=0, description="Precipitation in mm")
humidity: Optional[float] = Field(None, ge=0, le=100, description="Humidity percentage")
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Wind speed in km/h")
pressure: Optional[float] = Field(None, ge=800, le=1200, description="Atmospheric pressure in hPa")
description: Optional[str] = Field(None, max_length=200, description="Weather description")
source: str = Field("aemet", max_length=50, description="Data source")
raw_data: Optional[str] = Field(None, description="Raw data from source")
class WeatherDataCreate(WeatherDataBase):
"""Schema for creating weather data"""
pass
class WeatherDataUpdate(BaseModel):
"""Schema for updating weather data"""
temperature: Optional[float] = Field(None, ge=-50, le=60)
precipitation: Optional[float] = Field(None, ge=0)
humidity: Optional[float] = Field(None, ge=0, le=100)
wind_speed: Optional[float] = Field(None, ge=0, le=200)
pressure: Optional[float] = Field(None, ge=800, le=1200)
description: Optional[str] = Field(None, max_length=200)
raw_data: Optional[str] = None
class WeatherDataResponse(WeatherDataBase):
"""Schema for weather data responses"""
id: str = Field(..., description="Unique identifier")
created_at: datetime = Field(..., description="Creation timestamp")
updated_at: datetime = Field(..., description="Last update timestamp")
@field_validator('id', mode='before')
@classmethod
def convert_uuid_to_string(cls, v):
if isinstance(v, UUID):
return str(v)
return v
class Config:
from_attributes = True
json_encoders = {
datetime: lambda v: v.isoformat()
}
class WeatherForecastBase(BaseModel):
"""Base weather forecast schema"""
location_id: str = Field(..., max_length=100, description="Location ID")
forecast_date: datetime = Field(..., description="Date for forecast")
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Forecasted temperature")
precipitation: Optional[float] = Field(None, ge=0, description="Forecasted precipitation")
humidity: Optional[float] = Field(None, ge=0, le=100, description="Forecasted humidity")
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Forecasted wind speed")
description: Optional[str] = Field(None, max_length=200, description="Forecast description")
source: str = Field("aemet", max_length=50, description="Data source")
raw_data: Optional[str] = Field(None, description="Raw forecast data")
class WeatherForecastCreate(WeatherForecastBase):
"""Schema for creating weather forecasts"""
pass
class WeatherForecastResponse(WeatherForecastBase):
"""Schema for weather forecast responses"""
id: str = Field(..., description="Unique identifier")
generated_at: datetime = Field(..., description="When forecast was generated")
created_at: datetime = Field(..., description="Creation timestamp")
updated_at: datetime = Field(..., description="Last update timestamp")
@field_validator('id', mode='before')
@classmethod
def convert_uuid_to_string(cls, v):
if isinstance(v, UUID):
return str(v)
return v
class Config:
from_attributes = True
json_encoders = {
datetime: lambda v: v.isoformat()
}
class WeatherDataList(BaseModel):
"""Schema for paginated weather data responses"""
data: List[WeatherDataResponse]
total: int = Field(..., description="Total number of records")
page: int = Field(..., description="Current page number")
per_page: int = Field(..., description="Records per page")
has_next: bool = Field(..., description="Whether there are more pages")
has_prev: bool = Field(..., description="Whether there are previous pages")
class WeatherForecastList(BaseModel):
"""Schema for paginated weather forecast responses"""
forecasts: List[WeatherForecastResponse]
total: int = Field(..., description="Total number of forecasts")
page: int = Field(..., description="Current page number")
per_page: int = Field(..., description="Forecasts per page")
class WeatherAnalytics(BaseModel):
"""Schema for weather analytics"""
location_id: str
period_start: datetime
period_end: datetime
avg_temperature: Optional[float] = None
min_temperature: Optional[float] = None
max_temperature: Optional[float] = None
total_precipitation: Optional[float] = None
avg_humidity: Optional[float] = None
avg_wind_speed: Optional[float] = None
avg_pressure: Optional[float] = None
weather_conditions: dict = Field(default_factory=dict)
rainy_days: int = 0
sunny_days: int = 0
class LocationRequest(BaseModel):
latitude: float
longitude: float
address: Optional[str] = None
class DateRangeRequest(BaseModel):
start_date: datetime
end_date: datetime
class HistoricalWeatherRequest(BaseModel):
latitude: float
longitude: float
start_date: datetime
end_date: datetime
class WeatherForecastRequest(BaseModel):
latitude: float
longitude: float
days: int
class HourlyForecastRequest(BaseModel):
latitude: float
longitude: float
hours: int = Field(default=48, ge=1, le=48, description="Number of hours to forecast (1-48)")
class HourlyForecastResponse(BaseModel):
forecast_datetime: datetime
generated_at: datetime
temperature: Optional[float]
precipitation: Optional[float]
humidity: Optional[float]
wind_speed: Optional[float]
description: Optional[str]
source: str
hour: int
class WeatherForecastAPIResponse(BaseModel):
"""Simplified schema for API weather forecast responses (without database fields)"""
forecast_date: datetime = Field(..., description="Date for forecast")
generated_at: datetime = Field(..., description="When forecast was generated")
temperature: Optional[float] = Field(None, ge=-50, le=60, description="Forecasted temperature")
precipitation: Optional[float] = Field(None, ge=0, description="Forecasted precipitation")
humidity: Optional[float] = Field(None, ge=0, le=100, description="Forecasted humidity")
wind_speed: Optional[float] = Field(None, ge=0, le=200, description="Forecasted wind speed")
description: Optional[str] = Field(None, max_length=200, description="Forecast description")
source: str = Field("aemet", max_length=50, description="Data source")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}

View File

@@ -0,0 +1 @@
# services/external/app/services/__init__.py

View File

@@ -0,0 +1,269 @@
"""
Competitor Analyzer
Specialized analysis for competitor bakeries with competitive pressure modeling.
Treats competitor proximity differently than other POIs, considering market dynamics.
"""
from typing import Dict, List, Any, Tuple
import structlog
from math import radians, sin, cos, sqrt, atan2
from app.core.poi_config import COMPETITOR_ZONES
logger = structlog.get_logger()
class CompetitorAnalyzer:
"""
Competitive landscape analyzer for bakery locations.
Models competitive pressure considering:
- Direct competition (<100m): Strong negative impact
- Nearby competition (100-500m): Moderate negative impact
- Market saturation (500-1000m): Can be positive (bakery district)
or negative (competitive market)
"""
def analyze_competitive_landscape(
self,
competitor_pois: List[Dict[str, Any]],
bakery_location: Tuple[float, float],
tenant_id: str = None
) -> Dict[str, Any]:
"""
Analyze competitive pressure from nearby bakeries.
Args:
competitor_pois: List of detected competitor POIs
bakery_location: Tuple of (latitude, longitude)
tenant_id: Optional tenant ID for logging
Returns:
Competitive analysis with pressure scores and market classification
"""
if not competitor_pois:
logger.info(
"No competitors detected - underserved market",
tenant_id=tenant_id
)
return {
"competitive_pressure_score": 0.0,
"direct_competitors_count": 0,
"nearby_competitors_count": 0,
"market_competitors_count": 0,
"competitive_zone": "low_competition",
"market_type": "underserved",
"competitive_advantage": "first_mover",
"ml_feature_competitive_pressure": 0.0,
"ml_feature_has_direct_competitor": 0,
"ml_feature_competitor_density_500m": 0,
"competitor_details": []
}
# Categorize competitors by distance
direct_competitors = [] # <100m
nearby_competitors = [] # 100-500m
market_competitors = [] # 500-1000m
competitor_details = []
for poi in competitor_pois:
distance_m = self._calculate_distance(
bakery_location, (poi["lat"], poi["lon"])
) * 1000
competitor_info = {
"name": poi.get("name", "Unnamed"),
"osm_id": poi.get("osm_id"),
"distance_m": round(distance_m, 1),
"lat": poi["lat"],
"lon": poi["lon"]
}
if distance_m < COMPETITOR_ZONES["direct"]["max_distance_m"]:
direct_competitors.append(poi)
competitor_info["zone"] = "direct"
elif distance_m < COMPETITOR_ZONES["nearby"]["max_distance_m"]:
nearby_competitors.append(poi)
competitor_info["zone"] = "nearby"
elif distance_m < COMPETITOR_ZONES["market"]["max_distance_m"]:
market_competitors.append(poi)
competitor_info["zone"] = "market"
competitor_details.append(competitor_info)
# Calculate competitive pressure score
direct_pressure = (
len(direct_competitors) *
COMPETITOR_ZONES["direct"]["pressure_multiplier"]
)
nearby_pressure = (
len(nearby_competitors) *
COMPETITOR_ZONES["nearby"]["pressure_multiplier"]
)
# Market saturation analysis
min_for_district = COMPETITOR_ZONES["market"]["min_count_for_district"]
if len(market_competitors) >= min_for_district:
# Many bakeries = destination area (bakery district)
market_pressure = COMPETITOR_ZONES["market"]["district_multiplier"]
market_type = "bakery_district"
elif len(market_competitors) > 2:
market_pressure = COMPETITOR_ZONES["market"]["normal_multiplier"]
market_type = "competitive_market"
else:
market_pressure = 0.0
market_type = "normal_market"
competitive_pressure_score = (
direct_pressure + nearby_pressure + market_pressure
)
# Determine competitive zone classification
if len(direct_competitors) > 0:
competitive_zone = "high_competition"
competitive_advantage = "differentiation_required"
elif len(nearby_competitors) > 2:
competitive_zone = "moderate_competition"
competitive_advantage = "quality_focused"
else:
competitive_zone = "low_competition"
competitive_advantage = "local_leader"
# Sort competitors by distance
competitor_details.sort(key=lambda x: x["distance_m"])
logger.info(
"Competitive analysis complete",
tenant_id=tenant_id,
competitive_zone=competitive_zone,
market_type=market_type,
total_competitors=len(competitor_pois),
direct=len(direct_competitors),
nearby=len(nearby_competitors),
market=len(market_competitors),
pressure_score=competitive_pressure_score
)
return {
# Summary scores
"competitive_pressure_score": round(competitive_pressure_score, 2),
# Competitor counts by zone
"direct_competitors_count": len(direct_competitors),
"nearby_competitors_count": len(nearby_competitors),
"market_competitors_count": len(market_competitors),
"total_competitors_count": len(competitor_pois),
# Market classification
"competitive_zone": competitive_zone,
"market_type": market_type,
"competitive_advantage": competitive_advantage,
# ML features (for model integration)
"ml_feature_competitive_pressure": round(competitive_pressure_score, 2),
"ml_feature_has_direct_competitor": 1 if len(direct_competitors) > 0 else 0,
"ml_feature_competitor_density_500m": (
len(direct_competitors) + len(nearby_competitors)
),
# Detailed competitor information
"competitor_details": competitor_details,
# Nearest competitor
"nearest_competitor": competitor_details[0] if competitor_details else None
}
def _calculate_distance(
self,
coord1: Tuple[float, float],
coord2: Tuple[float, float]
) -> float:
"""
Calculate Haversine distance in kilometers.
Args:
coord1: Tuple of (latitude, longitude)
coord2: Tuple of (latitude, longitude)
Returns:
Distance in kilometers
"""
lat1, lon1 = coord1
lat2, lon2 = coord2
R = 6371 # Earth radius in km
dlat = radians(lat2 - lat1)
dlon = radians(lon2 - lon1)
a = (sin(dlat/2)**2 +
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
c = 2 * atan2(sqrt(a), sqrt(1-a))
return R * c
def get_competitive_insights(
self,
analysis_result: Dict[str, Any]
) -> List[str]:
"""
Generate human-readable competitive insights.
Args:
analysis_result: Result from analyze_competitive_landscape
Returns:
List of insight strings for business intelligence
"""
insights = []
zone = analysis_result["competitive_zone"]
market = analysis_result["market_type"]
pressure = analysis_result["competitive_pressure_score"]
direct = analysis_result["direct_competitors_count"]
nearby = analysis_result["nearby_competitors_count"]
# Zone-specific insights
if zone == "high_competition":
insights.append(
f"⚠️ High competition: {direct} direct competitor(s) within 100m. "
"Focus on differentiation and quality."
)
elif zone == "moderate_competition":
insights.append(
f"Moderate competition: {nearby} nearby competitor(s) within 500m. "
"Good opportunity for market share."
)
else:
insights.append(
"✅ Low competition: Local market leader opportunity."
)
# Market type insights
if market == "bakery_district":
insights.append(
"📍 Bakery district: High foot traffic area with multiple bakeries. "
"Customers actively seek bakery products here."
)
elif market == "competitive_market":
insights.append(
"Market has multiple bakeries. Quality and customer service critical."
)
elif market == "underserved":
insights.append(
"🎯 Underserved market: Potential for strong customer base growth."
)
# Pressure score insight
if pressure < -1.5:
insights.append(
"Strong competitive pressure expected to impact demand. "
"Marketing and differentiation essential."
)
elif pressure > 0:
insights.append(
"Positive market dynamics: Location benefits from bakery destination traffic."
)
return insights

View File

@@ -0,0 +1,282 @@
"""
Nominatim Geocoding Service
Provides address search and geocoding using OpenStreetMap Nominatim API.
For development: uses public API (rate-limited)
For production: should point to self-hosted Nominatim instance
"""
import httpx
from typing import List, Dict, Any, Optional
import structlog
from asyncio import sleep
logger = structlog.get_logger()
class NominatimService:
"""
Nominatim geocoding and address search service.
Uses OpenStreetMap Nominatim API for address autocomplete and geocoding.
Respects rate limits and usage policy.
"""
# For development: public API (rate-limited to 1 req/sec)
# For production: should be overridden with self-hosted instance
DEFAULT_BASE_URL = "https://nominatim.openstreetmap.org"
def __init__(self, base_url: Optional[str] = None, user_agent: str = "BakeryIA-Forecasting/1.0"):
"""
Initialize Nominatim service.
Args:
base_url: Nominatim server URL (defaults to public API)
user_agent: User agent for API requests (required by Nominatim policy)
"""
self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip("/")
self.user_agent = user_agent
self.headers = {
"User-Agent": self.user_agent
}
# Rate limiting for public API (1 request per second)
self.is_public_api = self.base_url == self.DEFAULT_BASE_URL
self.min_request_interval = 1.0 if self.is_public_api else 0.0
logger.info(
"Nominatim service initialized",
base_url=self.base_url,
is_public_api=self.is_public_api,
rate_limit=f"{self.min_request_interval}s" if self.is_public_api else "none"
)
async def search_address(
self,
query: str,
country_code: str = "es",
limit: int = 10
) -> List[Dict[str, Any]]:
"""
Search for addresses matching query (autocomplete).
Args:
query: Address search query
country_code: ISO country code to restrict search (default: Spain)
limit: Maximum number of results
Returns:
List of address suggestions with display_name, lat, lon, osm_id, etc.
"""
if not query or len(query.strip()) < 3:
logger.warning("Search query too short", query=query)
return []
try:
# Rate limiting for public API
if self.is_public_api:
await sleep(self.min_request_interval)
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.base_url}/search",
params={
"q": query,
"format": "json",
"addressdetails": 1,
"countrycodes": country_code,
"limit": limit,
"accept-language": "es"
},
headers=self.headers
)
response.raise_for_status()
results = response.json()
# Parse and enrich results
addresses = []
for result in results:
addresses.append({
"display_name": result.get("display_name"),
"lat": float(result.get("lat")),
"lon": float(result.get("lon")),
"osm_type": result.get("osm_type"),
"osm_id": result.get("osm_id"),
"place_id": result.get("place_id"),
"type": result.get("type"),
"class": result.get("class"),
"address": result.get("address", {}),
"boundingbox": result.get("boundingbox", [])
})
logger.info(
"Address search completed",
query=query,
result_count=len(addresses)
)
return addresses
except httpx.HTTPError as e:
logger.error(
"Nominatim API request failed",
query=query,
error=str(e)
)
return []
except Exception as e:
logger.error(
"Unexpected error in address search",
query=query,
error=str(e),
exc_info=True
)
return []
async def geocode_address(
self,
address: str,
country_code: str = "es"
) -> Optional[Dict[str, Any]]:
"""
Geocode an address to get coordinates.
Args:
address: Full address string
country_code: ISO country code
Returns:
Dictionary with lat, lon, display_name, address components or None
"""
results = await self.search_address(address, country_code, limit=1)
if not results:
logger.warning("No geocoding results found", address=address)
return None
result = results[0]
logger.info(
"Address geocoded successfully",
address=address,
lat=result["lat"],
lon=result["lon"]
)
return result
async def reverse_geocode(
self,
latitude: float,
longitude: float
) -> Optional[Dict[str, Any]]:
"""
Reverse geocode coordinates to get address.
Args:
latitude: Latitude coordinate
longitude: Longitude coordinate
Returns:
Dictionary with address information or None
"""
try:
# Rate limiting for public API
if self.is_public_api:
await sleep(self.min_request_interval)
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.base_url}/reverse",
params={
"lat": latitude,
"lon": longitude,
"format": "json",
"addressdetails": 1,
"accept-language": "es"
},
headers=self.headers
)
response.raise_for_status()
result = response.json()
address_info = {
"display_name": result.get("display_name"),
"lat": float(result.get("lat")),
"lon": float(result.get("lon")),
"osm_type": result.get("osm_type"),
"osm_id": result.get("osm_id"),
"place_id": result.get("place_id"),
"address": result.get("address", {}),
"boundingbox": result.get("boundingbox", [])
}
logger.info(
"Reverse geocoding completed",
lat=latitude,
lon=longitude,
address=address_info["display_name"]
)
return address_info
except httpx.HTTPError as e:
logger.error(
"Nominatim reverse geocoding failed",
lat=latitude,
lon=longitude,
error=str(e)
)
return None
except Exception as e:
logger.error(
"Unexpected error in reverse geocoding",
lat=latitude,
lon=longitude,
error=str(e),
exc_info=True
)
return None
async def validate_coordinates(
self,
latitude: float,
longitude: float
) -> bool:
"""
Validate that coordinates point to a real location.
Args:
latitude: Latitude to validate
longitude: Longitude to validate
Returns:
True if coordinates are valid, False otherwise
"""
if not (-90 <= latitude <= 90 and -180 <= longitude <= 180):
return False
result = await self.reverse_geocode(latitude, longitude)
return result is not None
async def health_check(self) -> bool:
"""
Check if Nominatim service is accessible.
Returns:
True if service is healthy, False otherwise
"""
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(
f"{self.base_url}/status",
params={"format": "json"},
headers=self.headers
)
return response.status_code == 200
except Exception as e:
logger.error(
"Nominatim health check failed",
error=str(e)
)
return False

View File

@@ -0,0 +1,466 @@
"""
POI Detection Service
Automated Point of Interest detection using Overpass API (OpenStreetMap).
Detects nearby POIs around bakery locations and generates ML features
for location-based demand forecasting.
"""
import overpy
from typing import List, Dict, Any, Tuple, Optional
from datetime import datetime, timezone, timedelta
import asyncio
import structlog
import httpx
from math import radians, sin, cos, sqrt, atan2
import random
from app.core.poi_config import (
POI_CATEGORIES,
OVERPASS_API_URL,
OVERPASS_TIMEOUT_SECONDS,
OVERPASS_MAX_RETRIES,
OVERPASS_RETRY_DELAY_SECONDS,
DISTANCE_BANDS
)
logger = structlog.get_logger()
class POIDetectionService:
"""
Automated POI detection using Overpass API (OpenStreetMap).
Detects points of interest near bakery locations and calculates
ML features for demand forecasting with location-specific context.
"""
def __init__(self, overpass_url: str = OVERPASS_API_URL):
self.overpass_url = overpass_url
self.api = overpy.Overpass(url=overpass_url)
self.timeout = OVERPASS_TIMEOUT_SECONDS
async def detect_pois_for_bakery(
self,
latitude: float,
longitude: float,
tenant_id: str
) -> Dict[str, Any]:
"""
Detect all POIs around a bakery location.
Args:
latitude: Bakery latitude
longitude: Bakery longitude
tenant_id: Tenant identifier for logging
Returns:
Complete POI detection results with ML features
"""
logger.info(
"Starting POI detection",
tenant_id=tenant_id,
location=(latitude, longitude)
)
poi_results = {}
detection_errors = []
# Query each POI category with inter-query delays
category_items = list(POI_CATEGORIES.items())
for idx, (category_key, category) in enumerate(category_items):
try:
pois = await self._query_pois_with_retry(
latitude,
longitude,
category.osm_query,
category.search_radius_m,
category_key
)
# Calculate features for this category
features = self._calculate_poi_features(
pois,
(latitude, longitude),
category
)
poi_results[category_key] = {
"pois": pois,
"features": features,
"count": len(pois)
}
logger.info(
f"Detected {category_key}",
count=len(pois),
proximity_score=features["proximity_score"]
)
# Add delay between categories to respect rate limits
# (except after the last category)
if idx < len(category_items) - 1:
inter_query_delay = 2.0 + random.uniform(0.5, 1.5)
await asyncio.sleep(inter_query_delay)
except Exception as e:
logger.error(
f"Failed to detect {category_key}",
error=str(e),
tenant_id=tenant_id
)
detection_errors.append({
"category": category_key,
"error": str(e)
})
poi_results[category_key] = {
"pois": [],
"features": self._get_empty_features(),
"count": 0,
"error": str(e)
}
# Add a longer delay after an error before continuing
if idx < len(category_items) - 1:
error_recovery_delay = 3.0 + random.uniform(1.0, 2.0)
await asyncio.sleep(error_recovery_delay)
# Generate combined ML features
ml_features = self._generate_ml_features(poi_results)
# Generate summary
summary = self._generate_summary(poi_results)
detection_status = "completed" if not detection_errors else (
"partial" if len(detection_errors) < len(POI_CATEGORIES) else "failed"
)
return {
"tenant_id": tenant_id,
"location": {"latitude": latitude, "longitude": longitude},
"detection_timestamp": datetime.now(timezone.utc).isoformat(),
"detection_status": detection_status,
"detection_errors": detection_errors if detection_errors else None,
"poi_categories": poi_results,
"ml_features": ml_features,
"summary": summary
}
async def _query_pois_with_retry(
self,
latitude: float,
longitude: float,
osm_query: str,
radius_m: int,
category_key: str
) -> List[Dict[str, Any]]:
"""
Query Overpass API with exponential backoff retry logic.
Implements:
- Exponential backoff with jitter
- Extended delays for rate limiting errors
- Proper error type detection
"""
last_error = None
base_delay = OVERPASS_RETRY_DELAY_SECONDS
for attempt in range(OVERPASS_MAX_RETRIES):
try:
return await self._query_pois(
latitude, longitude, osm_query, radius_m
)
except Exception as e:
last_error = e
error_message = str(e).lower()
# Determine if this is a rate limiting error
is_rate_limit = any(phrase in error_message for phrase in [
'too many requests',
'rate limit',
'server load too high',
'quota exceeded',
'retry later',
'429',
'503',
'504'
])
if attempt < OVERPASS_MAX_RETRIES - 1:
# Calculate exponential backoff with jitter
# For rate limiting: use longer delays (10-30 seconds)
# For other errors: use standard backoff (2-8 seconds)
if is_rate_limit:
delay = base_delay * (3 ** attempt) + random.uniform(1, 5)
delay = min(delay, 30) # Cap at 30 seconds
else:
delay = base_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
delay = min(delay, 10) # Cap at 10 seconds
logger.warning(
f"POI query retry {attempt + 1}/{OVERPASS_MAX_RETRIES}",
category=category_key,
error=str(e),
is_rate_limit=is_rate_limit,
retry_delay=f"{delay:.1f}s"
)
await asyncio.sleep(delay)
else:
logger.error(
"POI query failed after all retries",
category=category_key,
error=str(e),
is_rate_limit=is_rate_limit
)
raise last_error
async def _query_pois(
self,
latitude: float,
longitude: float,
osm_query: str,
radius_m: int
) -> List[Dict[str, Any]]:
"""
Query Overpass API for POIs in radius.
Raises:
Exception: With descriptive error message from Overpass API
"""
# Build Overpass QL query
query = f"""
[out:json][timeout:{self.timeout}];
(
node{osm_query}(around:{radius_m},{latitude},{longitude});
way{osm_query}(around:{radius_m},{latitude},{longitude});
);
out center;
"""
# Execute query (use asyncio thread pool for blocking overpy)
loop = asyncio.get_event_loop()
try:
result = await loop.run_in_executor(
None,
self.api.query,
query
)
except overpy.exception.OverpassTooManyRequests as e:
# Explicitly handle rate limiting
raise Exception("Too many requests - Overpass API rate limit exceeded") from e
except overpy.exception.OverpassGatewayTimeout as e:
# Query took too long
raise Exception("Gateway timeout - query too complex or server busy") from e
except overpy.exception.OverpassBadRequest as e:
# Query syntax error
raise Exception(f"Bad request - invalid query syntax: {str(e)}") from e
except Exception as e:
# Check if it's an HTTP error with status code
error_msg = str(e).lower()
if '429' in error_msg or 'too many' in error_msg:
raise Exception("Too many requests - rate limit exceeded") from e
elif '503' in error_msg or 'load too high' in error_msg:
raise Exception("Server load too high - Overpass API overloaded") from e
elif '504' in error_msg or 'timeout' in error_msg:
raise Exception("Gateway timeout - server busy") from e
else:
# Re-raise with original message
raise
# Parse results
pois = []
# Process nodes
for node in result.nodes:
pois.append({
"osm_id": str(node.id),
"type": "node",
"lat": float(node.lat),
"lon": float(node.lon),
"tags": dict(node.tags),
"name": node.tags.get("name", "Unnamed")
})
# Process ways (buildings, areas)
for way in result.ways:
# Get center point
if hasattr(way, 'center_lat') and way.center_lat:
lat, lon = float(way.center_lat), float(way.center_lon)
else:
# Calculate centroid from nodes
if way.nodes:
lats = [float(node.lat) for node in way.nodes]
lons = [float(node.lon) for node in way.nodes]
lat = sum(lats) / len(lats)
lon = sum(lons) / len(lons)
else:
continue
pois.append({
"osm_id": str(way.id),
"type": "way",
"lat": lat,
"lon": lon,
"tags": dict(way.tags),
"name": way.tags.get("name", "Unnamed")
})
return pois
def _calculate_poi_features(
self,
pois: List[Dict[str, Any]],
bakery_location: Tuple[float, float],
category
) -> Dict[str, float]:
"""Calculate ML features for POI category"""
if not pois:
return self._get_empty_features()
# Calculate distances
distances = []
for poi in pois:
dist_km = self._haversine_distance(
bakery_location,
(poi["lat"], poi["lon"])
)
distances.append(dist_km * 1000) # Convert to meters
# Feature Tier 1: Proximity Scores (PRIMARY)
proximity_score = sum(1.0 / (1.0 + d/1000) for d in distances)
weighted_proximity_score = proximity_score * category.weight
# Feature Tier 2: Distance Band Counts
count_0_100m = sum(1 for d in distances if d <= 100)
count_100_300m = sum(1 for d in distances if 100 < d <= 300)
count_300_500m = sum(1 for d in distances if 300 < d <= 500)
count_500_1000m = sum(1 for d in distances if 500 < d <= 1000)
# Feature Tier 3: Distance to Nearest
distance_to_nearest_m = min(distances) if distances else 9999.0
# Feature Tier 4: Binary Flags
has_within_100m = any(d <= 100 for d in distances)
has_within_300m = any(d <= 300 for d in distances)
has_within_500m = any(d <= 500 for d in distances)
return {
# Tier 1: Proximity scores (PRIMARY for ML)
"proximity_score": round(proximity_score, 4),
"weighted_proximity_score": round(weighted_proximity_score, 4),
# Tier 2: Distance bands
"count_0_100m": count_0_100m,
"count_100_300m": count_100_300m,
"count_300_500m": count_300_500m,
"count_500_1000m": count_500_1000m,
"total_count": len(pois),
# Tier 3: Distance to nearest
"distance_to_nearest_m": round(distance_to_nearest_m, 1),
# Tier 4: Binary flags
"has_within_100m": has_within_100m,
"has_within_300m": has_within_300m,
"has_within_500m": has_within_500m
}
def _generate_ml_features(self, poi_results: Dict[str, Any]) -> Dict[str, float]:
"""
Generate flat feature dictionary for ML model ingestion.
These features will be added to Prophet/XGBoost as regressors.
"""
ml_features = {}
for category_key, data in poi_results.items():
features = data.get("features", {})
# Flatten with category prefix
for feature_name, value in features.items():
ml_feature_name = f"poi_{category_key}_{feature_name}"
# Convert boolean to int for ML
if isinstance(value, bool):
value = 1 if value else 0
ml_features[ml_feature_name] = value
return ml_features
def _get_empty_features(self) -> Dict[str, float]:
"""Return zero features when no POIs found"""
return {
"proximity_score": 0.0,
"weighted_proximity_score": 0.0,
"count_0_100m": 0,
"count_100_300m": 0,
"count_300_500m": 0,
"count_500_1000m": 0,
"total_count": 0,
"distance_to_nearest_m": 9999.0,
"has_within_100m": False,
"has_within_300m": False,
"has_within_500m": False
}
def _haversine_distance(
self,
coord1: Tuple[float, float],
coord2: Tuple[float, float]
) -> float:
"""
Calculate distance between two coordinates in kilometers.
Uses Haversine formula for great-circle distance.
"""
lat1, lon1 = coord1
lat2, lon2 = coord2
R = 6371 # Earth radius in km
dlat = radians(lat2 - lat1)
dlon = radians(lon2 - lon1)
a = (sin(dlat/2)**2 +
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
c = 2 * atan2(sqrt(a), sqrt(1-a))
return R * c
def _generate_summary(self, poi_results: Dict[str, Any]) -> Dict[str, Any]:
"""Generate human-readable summary"""
total_pois = sum(r["count"] for r in poi_results.values())
categories_with_pois = [
k for k, v in poi_results.items() if v["count"] > 0
]
high_impact_categories = [
k for k, v in poi_results.items()
if v["features"]["proximity_score"] > 2.0
]
return {
"total_pois_detected": total_pois,
"categories_with_pois": categories_with_pois,
"high_impact_categories": high_impact_categories,
"categories_count": len(categories_with_pois)
}
async def health_check(self) -> Dict[str, Any]:
"""Check if Overpass API is accessible"""
try:
async with httpx.AsyncClient(timeout=5) as client:
response = await client.get(f"{self.overpass_url}/status")
is_healthy = response.status_code == 200
return {
"healthy": is_healthy,
"status_code": response.status_code,
"url": self.overpass_url
}
except Exception as e:
return {
"healthy": False,
"error": str(e),
"url": self.overpass_url
}

View File

@@ -0,0 +1,184 @@
"""
POI Feature Selector
Determines which POI features are relevant for ML model inclusion.
Filters out low-signal features to prevent model noise and overfitting.
"""
from typing import Dict, List, Any
import structlog
from app.core.poi_config import RELEVANCE_THRESHOLDS
logger = structlog.get_logger()
class POIFeatureSelector:
"""
Feature relevance engine for POI-based ML features.
Applies research-based thresholds to filter out irrelevant POI features
that would add noise to bakery-specific demand forecasting models.
"""
def __init__(self, thresholds: Dict[str, Dict[str, float]] = None):
"""
Initialize feature selector.
Args:
thresholds: Custom relevance thresholds (defaults to RELEVANCE_THRESHOLDS)
"""
self.thresholds = thresholds or RELEVANCE_THRESHOLDS
def select_relevant_features(
self,
poi_detection_results: Dict[str, Any],
tenant_id: str = None
) -> Dict[str, Any]:
"""
Filter POI features based on relevance thresholds.
Only includes features for POI categories that pass relevance tests.
This prevents adding noise to ML models for bakeries where certain
POI categories are not significant.
Args:
poi_detection_results: Full POI detection results
tenant_id: Optional tenant ID for logging
Returns:
Dictionary with relevant features and detailed relevance report
"""
relevant_features = {}
relevance_report = []
relevant_categories = []
for category_key, data in poi_detection_results.items():
features = data.get("features", {})
thresholds = self.thresholds.get(category_key, {})
if not thresholds:
logger.warning(
f"No thresholds defined for category {category_key}",
tenant_id=tenant_id
)
continue
# Check relevance criteria
is_relevant, rejection_reason = self._check_relevance(
features, thresholds, category_key
)
if is_relevant:
# Include features with category prefix
for feature_name, value in features.items():
ml_feature_name = f"poi_{category_key}_{feature_name}"
# Convert boolean to int for ML
if isinstance(value, bool):
value = 1 if value else 0
relevant_features[ml_feature_name] = value
relevant_categories.append(category_key)
relevance_report.append({
"category": category_key,
"relevant": True,
"reason": "Passes all relevance thresholds",
"proximity_score": features.get("proximity_score", 0),
"count": features.get("total_count", 0),
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
})
else:
relevance_report.append({
"category": category_key,
"relevant": False,
"reason": rejection_reason,
"proximity_score": features.get("proximity_score", 0),
"count": features.get("total_count", 0),
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
})
logger.info(
"POI feature selection complete",
tenant_id=tenant_id,
total_categories=len(poi_detection_results),
relevant_categories=len(relevant_categories),
rejected_categories=len(poi_detection_results) - len(relevant_categories)
)
return {
"features": relevant_features,
"relevant_categories": relevant_categories,
"relevance_report": relevance_report,
"total_features": len(relevant_features),
"total_relevant_categories": len(relevant_categories)
}
def _check_relevance(
self,
features: Dict[str, Any],
thresholds: Dict[str, float],
category_key: str
) -> tuple[bool, str]:
"""
Check if POI category passes relevance thresholds.
Returns:
Tuple of (is_relevant, rejection_reason)
"""
# Criterion 1: Proximity score
min_proximity = thresholds.get("min_proximity_score", 0)
actual_proximity = features.get("proximity_score", 0)
if actual_proximity < min_proximity:
return False, f"Proximity score too low ({actual_proximity:.2f} < {min_proximity})"
# Criterion 2: Distance to nearest
max_distance = thresholds.get("max_distance_to_nearest_m", 9999)
actual_distance = features.get("distance_to_nearest_m", 9999)
if actual_distance > max_distance:
return False, f"Nearest POI too far ({actual_distance:.0f}m > {max_distance}m)"
# Criterion 3: Count threshold
min_count = thresholds.get("min_count", 0)
actual_count = features.get("total_count", 0)
if actual_count < min_count:
return False, f"Count too low ({actual_count} < {min_count})"
return True, "Passes all thresholds"
def get_feature_importance_summary(
self,
poi_detection_results: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""
Generate summary of feature importance for all categories.
Useful for understanding POI landscape around a bakery.
"""
summary = []
for category_key, data in poi_detection_results.items():
features = data.get("features", {})
thresholds = self.thresholds.get(category_key, {})
is_relevant, reason = self._check_relevance(
features, thresholds, category_key
) if thresholds else (False, "No thresholds defined")
summary.append({
"category": category_key,
"is_relevant": is_relevant,
"proximity_score": features.get("proximity_score", 0),
"weighted_score": features.get("weighted_proximity_score", 0),
"total_count": features.get("total_count", 0),
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999),
"has_within_100m": features.get("has_within_100m", False),
"rejection_reason": None if is_relevant else reason
})
# Sort by relevance and proximity score
summary.sort(
key=lambda x: (x["is_relevant"], x["proximity_score"]),
reverse=True
)
return summary

View File

@@ -0,0 +1,468 @@
"""
POI Refresh Service
Manages periodic POI context refresh jobs.
Detects changes in POI landscape and updates tenant POI contexts.
"""
import asyncio
from datetime import datetime, timezone, timedelta
from typing import Optional, Dict, Any, List
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, or_
import structlog
from app.models.poi_refresh_job import POIRefreshJob
from app.models.poi_context import TenantPOIContext
from app.services.poi_detection_service import POIDetectionService
from app.core.database import database_manager
logger = structlog.get_logger()
class POIRefreshService:
"""
POI Refresh Service
Manages background jobs for periodic POI context refresh.
Default refresh cycle: 180 days (6 months).
"""
DEFAULT_REFRESH_INTERVAL_DAYS = 180
DEFAULT_MAX_ATTEMPTS = 3
STALE_THRESHOLD_DAYS = 180
def __init__(
self,
poi_detection_service: Optional[POIDetectionService] = None,
refresh_interval_days: int = DEFAULT_REFRESH_INTERVAL_DAYS
):
"""
Initialize POI refresh service.
Args:
poi_detection_service: POI detection service instance
refresh_interval_days: Days between POI refreshes (default: 180)
"""
self.poi_detection_service = poi_detection_service or POIDetectionService()
self.refresh_interval_days = refresh_interval_days
logger.info(
"POI Refresh Service initialized",
refresh_interval_days=refresh_interval_days
)
async def schedule_refresh_job(
self,
tenant_id: str,
latitude: float,
longitude: float,
scheduled_at: Optional[datetime] = None,
session: Optional[AsyncSession] = None
) -> POIRefreshJob:
"""
Schedule a POI refresh job for a tenant.
Args:
tenant_id: Tenant UUID
latitude: Bakery latitude
longitude: Bakery longitude
scheduled_at: When to run the job (default: now + refresh_interval)
session: Database session
Returns:
Created POIRefreshJob
"""
if scheduled_at is None:
scheduled_at = datetime.now(timezone.utc) + timedelta(
days=self.refresh_interval_days
)
async def _create_job(db_session: AsyncSession):
# Check if pending job already exists
result = await db_session.execute(
select(POIRefreshJob).where(
and_(
POIRefreshJob.tenant_id == tenant_id,
POIRefreshJob.status.in_(["pending", "running"])
)
)
)
existing_job = result.scalar_one_or_none()
if existing_job:
logger.info(
"POI refresh job already scheduled",
tenant_id=tenant_id,
job_id=str(existing_job.id),
scheduled_at=existing_job.scheduled_at
)
return existing_job
# Create new job
job = POIRefreshJob(
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude,
scheduled_at=scheduled_at,
status="pending",
max_attempts=self.DEFAULT_MAX_ATTEMPTS
)
db_session.add(job)
await db_session.commit()
await db_session.refresh(job)
logger.info(
"POI refresh job scheduled",
tenant_id=tenant_id,
job_id=str(job.id),
scheduled_at=scheduled_at
)
return job
if session:
return await _create_job(session)
else:
async with database_manager.get_session() as db_session:
return await _create_job(db_session)
async def execute_refresh_job(
self,
job_id: str,
session: Optional[AsyncSession] = None
) -> Dict[str, Any]:
"""
Execute a POI refresh job.
Args:
job_id: Job UUID
session: Database session
Returns:
Execution result with status and details
"""
async def _execute(db_session: AsyncSession):
# Load job
result = await db_session.execute(
select(POIRefreshJob).where(POIRefreshJob.id == job_id)
)
job = result.scalar_one_or_none()
if not job:
raise ValueError(f"Job not found: {job_id}")
if job.status == "running":
return {
"status": "already_running",
"job_id": str(job.id),
"message": "Job is already running"
}
if job.status == "completed":
return {
"status": "already_completed",
"job_id": str(job.id),
"message": "Job already completed"
}
if not job.can_retry:
return {
"status": "max_attempts_reached",
"job_id": str(job.id),
"message": f"Max attempts ({job.max_attempts}) reached"
}
# Update job status
job.status = "running"
job.started_at = datetime.now(timezone.utc)
job.attempt_count += 1
await db_session.commit()
logger.info(
"Executing POI refresh job",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
attempt=job.attempt_count
)
try:
# Get existing POI context
poi_result = await db_session.execute(
select(TenantPOIContext).where(
TenantPOIContext.tenant_id == job.tenant_id
)
)
existing_context = poi_result.scalar_one_or_none()
# Perform POI detection
detection_result = await self.poi_detection_service.detect_pois_for_bakery(
latitude=job.latitude,
longitude=job.longitude,
tenant_id=str(job.tenant_id),
force_refresh=True
)
# Analyze changes
changes = self._analyze_changes(
existing_context.poi_detection_results if existing_context else {},
detection_result
)
# Update job with results
job.status = "completed"
job.completed_at = datetime.now(timezone.utc)
job.pois_detected = sum(
data.get("count", 0)
for data in detection_result.values()
)
job.changes_detected = changes["has_significant_changes"]
job.change_summary = changes
# Schedule next refresh
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(
days=self.refresh_interval_days
)
await db_session.commit()
logger.info(
"POI refresh job completed",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
pois_detected=job.pois_detected,
changes_detected=job.changes_detected,
duration_seconds=job.duration_seconds
)
# Schedule next job
await self.schedule_refresh_job(
tenant_id=str(job.tenant_id),
latitude=job.latitude,
longitude=job.longitude,
scheduled_at=job.next_scheduled_at,
session=db_session
)
return {
"status": "success",
"job_id": str(job.id),
"pois_detected": job.pois_detected,
"changes_detected": job.changes_detected,
"change_summary": changes,
"duration_seconds": job.duration_seconds,
"next_scheduled_at": job.next_scheduled_at.isoformat()
}
except Exception as e:
# Job failed
job.status = "failed"
job.completed_at = datetime.now(timezone.utc)
job.error_message = str(e)
job.error_details = {
"error_type": type(e).__name__,
"error_message": str(e),
"attempt": job.attempt_count
}
# Schedule retry if attempts remaining
if job.can_retry:
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(hours=1)
logger.warning(
"POI refresh job failed, will retry",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
attempt=job.attempt_count,
max_attempts=job.max_attempts,
error=str(e)
)
else:
logger.error(
"POI refresh job failed permanently",
job_id=str(job.id),
tenant_id=str(job.tenant_id),
attempt=job.attempt_count,
error=str(e),
exc_info=True
)
await db_session.commit()
return {
"status": "failed",
"job_id": str(job.id),
"error": str(e),
"attempt": job.attempt_count,
"can_retry": job.can_retry
}
if session:
return await _execute(session)
else:
async with database_manager.get_session() as db_session:
return await _execute(db_session)
def _analyze_changes(
self,
old_results: Dict[str, Any],
new_results: Dict[str, Any]
) -> Dict[str, Any]:
"""
Analyze changes between old and new POI detection results.
Args:
old_results: Previous POI detection results
new_results: New POI detection results
Returns:
Change analysis with significance flag
"""
changes = {
"has_significant_changes": False,
"category_changes": {},
"total_poi_change": 0,
"new_categories": [],
"removed_categories": []
}
old_categories = set(old_results.keys())
new_categories = set(new_results.keys())
# New categories
changes["new_categories"] = list(new_categories - old_categories)
# Removed categories
changes["removed_categories"] = list(old_categories - new_categories)
# Analyze changes per category
for category in new_categories:
old_count = old_results.get(category, {}).get("count", 0)
new_count = new_results.get(category, {}).get("count", 0)
change = new_count - old_count
if abs(change) > 0:
changes["category_changes"][category] = {
"old_count": old_count,
"new_count": new_count,
"change": change,
"change_percent": (change / old_count * 100) if old_count > 0 else 100
}
changes["total_poi_change"] += abs(change)
# Determine if changes are significant
# Significant if: 10+ POIs changed OR 20%+ change OR new/removed categories
total_old_pois = sum(data.get("count", 0) for data in old_results.values())
if total_old_pois > 0:
change_percent = (changes["total_poi_change"] / total_old_pois) * 100
changes["total_change_percent"] = change_percent
changes["has_significant_changes"] = (
changes["total_poi_change"] >= 10
or change_percent >= 20
or len(changes["new_categories"]) > 0
or len(changes["removed_categories"]) > 0
)
else:
changes["has_significant_changes"] = changes["total_poi_change"] > 0
return changes
async def get_pending_jobs(
self,
limit: int = 100,
session: Optional[AsyncSession] = None
) -> List[POIRefreshJob]:
"""
Get pending jobs that are due for execution.
Args:
limit: Maximum number of jobs to return
session: Database session
Returns:
List of pending jobs
"""
async def _get_jobs(db_session: AsyncSession):
result = await db_session.execute(
select(POIRefreshJob)
.where(
and_(
POIRefreshJob.status == "pending",
POIRefreshJob.scheduled_at <= datetime.now(timezone.utc)
)
)
.order_by(POIRefreshJob.scheduled_at)
.limit(limit)
)
return result.scalars().all()
if session:
return await _get_jobs(session)
else:
async with database_manager.get_session() as db_session:
return await _get_jobs(db_session)
async def process_pending_jobs(
self,
max_concurrent: int = 5,
session: Optional[AsyncSession] = None
) -> Dict[str, Any]:
"""
Process all pending jobs concurrently.
Args:
max_concurrent: Maximum concurrent job executions
session: Database session
Returns:
Processing summary
"""
pending_jobs = await self.get_pending_jobs(session=session)
if not pending_jobs:
logger.info("No pending POI refresh jobs")
return {
"total_jobs": 0,
"successful": 0,
"failed": 0,
"results": []
}
logger.info(
"Processing pending POI refresh jobs",
count=len(pending_jobs),
max_concurrent=max_concurrent
)
# Process jobs with concurrency limit
semaphore = asyncio.Semaphore(max_concurrent)
async def process_job(job: POIRefreshJob):
async with semaphore:
return await self.execute_refresh_job(str(job.id))
results = await asyncio.gather(
*[process_job(job) for job in pending_jobs],
return_exceptions=True
)
# Summarize results
successful = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "success")
failed = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "failed")
errors = sum(1 for r in results if isinstance(r, Exception))
summary = {
"total_jobs": len(pending_jobs),
"successful": successful,
"failed": failed + errors,
"results": [r if not isinstance(r, Exception) else {"status": "error", "error": str(r)} for r in results]
}
logger.info(
"POI refresh jobs processing completed",
**summary
)
return summary

View File

@@ -0,0 +1,187 @@
"""
POI Refresh Scheduler
Background scheduler for periodic POI context refresh.
Runs every hour to check for and execute pending POI refresh jobs.
"""
import asyncio
from typing import Optional
from datetime import datetime, timezone
import structlog
from app.services.poi_refresh_service import POIRefreshService
logger = structlog.get_logger()
class POIRefreshScheduler:
"""
POI Refresh Scheduler
Background task that periodically checks for and executes
pending POI refresh jobs.
"""
def __init__(
self,
poi_refresh_service: Optional[POIRefreshService] = None,
check_interval_seconds: int = 3600, # 1 hour
max_concurrent_jobs: int = 5
):
"""
Initialize POI refresh scheduler.
Args:
poi_refresh_service: POI refresh service instance
check_interval_seconds: Seconds between checks (default: 3600 = 1 hour)
max_concurrent_jobs: Max concurrent job executions (default: 5)
"""
self.poi_refresh_service = poi_refresh_service or POIRefreshService()
self.check_interval_seconds = check_interval_seconds
self.max_concurrent_jobs = max_concurrent_jobs
self._task: Optional[asyncio.Task] = None
self._running = False
logger.info(
"POI Refresh Scheduler initialized",
check_interval_seconds=check_interval_seconds,
max_concurrent_jobs=max_concurrent_jobs
)
async def start(self):
"""Start the scheduler background task"""
if self._running:
logger.warning("POI Refresh Scheduler already running")
return
self._running = True
self._task = asyncio.create_task(self._run_scheduler())
logger.info("POI Refresh Scheduler started")
async def stop(self):
"""Stop the scheduler background task"""
if not self._running:
return
self._running = False
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
logger.info("POI Refresh Scheduler stopped")
async def _run_scheduler(self):
"""Main scheduler loop"""
logger.info("POI Refresh Scheduler loop started")
while self._running:
try:
await self._process_cycle()
except Exception as e:
logger.error(
"POI refresh scheduler cycle failed",
error=str(e),
exc_info=True
)
# Wait for next cycle
try:
await asyncio.sleep(self.check_interval_seconds)
except asyncio.CancelledError:
break
logger.info("POI Refresh Scheduler loop ended")
async def _process_cycle(self):
"""Process one scheduler cycle"""
cycle_start = datetime.now(timezone.utc)
logger.debug(
"POI refresh scheduler cycle started",
timestamp=cycle_start.isoformat()
)
# Process pending jobs
result = await self.poi_refresh_service.process_pending_jobs(
max_concurrent=self.max_concurrent_jobs
)
cycle_end = datetime.now(timezone.utc)
cycle_duration = (cycle_end - cycle_start).total_seconds()
if result["total_jobs"] > 0:
logger.info(
"POI refresh scheduler cycle completed",
total_jobs=result["total_jobs"],
successful=result["successful"],
failed=result["failed"],
cycle_duration_seconds=cycle_duration
)
else:
logger.debug(
"POI refresh scheduler cycle completed (no jobs)",
cycle_duration_seconds=cycle_duration
)
async def trigger_immediate_check(self):
"""Trigger an immediate check for pending jobs (bypasses schedule)"""
logger.info("POI refresh scheduler immediate check triggered")
try:
result = await self.poi_refresh_service.process_pending_jobs(
max_concurrent=self.max_concurrent_jobs
)
logger.info(
"POI refresh scheduler immediate check completed",
total_jobs=result["total_jobs"],
successful=result["successful"],
failed=result["failed"]
)
return result
except Exception as e:
logger.error(
"POI refresh scheduler immediate check failed",
error=str(e),
exc_info=True
)
raise
@property
def is_running(self) -> bool:
"""Check if scheduler is running"""
return self._running
# Global scheduler instance
_scheduler_instance: Optional[POIRefreshScheduler] = None
def get_scheduler() -> POIRefreshScheduler:
"""Get global scheduler instance (singleton)"""
global _scheduler_instance
if _scheduler_instance is None:
_scheduler_instance = POIRefreshScheduler()
return _scheduler_instance
async def start_scheduler():
"""Start global POI refresh scheduler"""
scheduler = get_scheduler()
await scheduler.start()
async def stop_scheduler():
"""Stop global POI refresh scheduler"""
scheduler = get_scheduler()
await scheduler.stop()

View File

@@ -0,0 +1,190 @@
# services/external/app/services/tenant_deletion_service.py
"""
Tenant Data Deletion Service for External Service
Handles deletion of tenant-specific data for the External service
"""
from typing import Dict
from sqlalchemy import select, func, delete
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.dialects.postgresql import UUID
import structlog
from shared.services.tenant_deletion import (
BaseTenantDataDeletionService,
TenantDataDeletionResult
)
from app.models import AuditLog, WeatherData
logger = structlog.get_logger(__name__)
class ExternalTenantDeletionService(BaseTenantDataDeletionService):
"""
Service for deleting tenant-specific external data
IMPORTANT NOTE:
The External service primarily stores SHARED city-wide data (weather, traffic)
that is NOT tenant-specific. This data is used by ALL tenants and should
NOT be deleted when a single tenant is removed.
Tenant-specific data in this service:
- Audit logs (tenant_id)
- Tenant-specific weather data (if any exists with tenant_id)
City-wide data that is NOT deleted (shared across all tenants):
- CityWeatherData (no tenant_id - city-wide data)
- CityTrafficData (no tenant_id - city-wide data)
- TrafficData (no tenant_id - city-wide data)
- TrafficMeasurementPoint (no tenant_id - reference data)
- WeatherForecast (no tenant_id - city-wide forecasts)
"""
def __init__(self, db: AsyncSession):
self.db = db
self.service_name = "external"
async def get_tenant_data_preview(self, tenant_id: str) -> Dict[str, int]:
"""
Get counts of what would be deleted for a tenant (dry-run)
Args:
tenant_id: The tenant ID to preview deletion for
Returns:
Dictionary with entity names and their counts
"""
logger.info("external.tenant_deletion.preview", tenant_id=tenant_id)
preview = {}
try:
# Count tenant-specific weather data (if any)
weather_count = await self.db.scalar(
select(func.count(WeatherData.id)).where(
WeatherData.tenant_id == tenant_id
)
)
preview["tenant_weather_data"] = weather_count or 0
# Count audit logs
audit_count = await self.db.scalar(
select(func.count(AuditLog.id)).where(
AuditLog.tenant_id == tenant_id
)
)
preview["audit_logs"] = audit_count or 0
# Add informational message about shared data
logger.info(
"external.tenant_deletion.preview_complete",
tenant_id=tenant_id,
preview=preview,
note="City-wide data (traffic, weather) is shared and will NOT be deleted"
)
except Exception as e:
logger.error(
"external.tenant_deletion.preview_error",
tenant_id=tenant_id,
error=str(e),
exc_info=True
)
raise
return preview
async def delete_tenant_data(self, tenant_id: str) -> TenantDataDeletionResult:
"""
Permanently delete tenant-specific external data
NOTE: This only deletes tenant-specific data. City-wide shared data
(CityWeatherData, CityTrafficData, TrafficData, etc.) is intentionally
preserved as it's used by all tenants.
Args:
tenant_id: The tenant ID to delete data for
Returns:
TenantDataDeletionResult with deletion counts and any errors
"""
logger.info(
"external.tenant_deletion.started",
tenant_id=tenant_id,
note="Only deleting tenant-specific data; city-wide data preserved"
)
result = TenantDataDeletionResult(tenant_id=tenant_id, service_name=self.service_name)
try:
# Step 1: Delete tenant-specific weather data (if any exists)
logger.info("external.tenant_deletion.deleting_weather_data", tenant_id=tenant_id)
weather_result = await self.db.execute(
delete(WeatherData).where(
WeatherData.tenant_id == tenant_id
)
)
result.deleted_counts["tenant_weather_data"] = weather_result.rowcount
logger.info(
"external.tenant_deletion.weather_data_deleted",
tenant_id=tenant_id,
count=weather_result.rowcount
)
# Step 2: Delete audit logs
logger.info("external.tenant_deletion.deleting_audit_logs", tenant_id=tenant_id)
audit_result = await self.db.execute(
delete(AuditLog).where(
AuditLog.tenant_id == tenant_id
)
)
result.deleted_counts["audit_logs"] = audit_result.rowcount
logger.info(
"external.tenant_deletion.audit_logs_deleted",
tenant_id=tenant_id,
count=audit_result.rowcount
)
# Commit the transaction
await self.db.commit()
# Calculate total deleted
total_deleted = sum(result.deleted_counts.values())
# Add informational note about preserved data
result.deleted_counts["_note"] = "City-wide data preserved (shared across tenants)"
logger.info(
"external.tenant_deletion.completed",
tenant_id=tenant_id,
total_deleted=total_deleted,
breakdown=result.deleted_counts,
preserved_data="CityWeatherData, CityTrafficData, TrafficData (shared)"
)
result.success = True
except Exception as e:
await self.db.rollback()
error_msg = f"Failed to delete external data for tenant {tenant_id}: {str(e)}"
logger.error(
"external.tenant_deletion.failed",
tenant_id=tenant_id,
error=str(e),
exc_info=True
)
result.errors.append(error_msg)
result.success = False
return result
def get_external_tenant_deletion_service(db: AsyncSession) -> ExternalTenantDeletionService:
"""
Factory function to create ExternalTenantDeletionService instance
Args:
db: AsyncSession database session
Returns:
ExternalTenantDeletionService instance
"""
return ExternalTenantDeletionService(db)

View File

@@ -0,0 +1,411 @@
# ================================================================
# services/data/app/services/traffic_service.py
# ================================================================
"""
Abstracted Traffic Service - Universal interface for traffic data across multiple cities
"""
import asyncio
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.external.apis.traffic import UniversalTrafficClient
from app.models.traffic import TrafficData
from app.repositories.traffic_repository import TrafficRepository
logger = structlog.get_logger()
from app.core.database import database_manager
class TrafficService:
"""
Abstracted traffic service providing unified interface for traffic data
Routes requests to appropriate city-specific clients automatically
"""
def __init__(self):
self.universal_client = UniversalTrafficClient()
self.database_manager = database_manager
async def get_current_traffic(
self,
latitude: float,
longitude: float,
tenant_id: Optional[str] = None,
force_refresh: bool = False,
cache_duration_minutes: int = 5
) -> Optional[Dict[str, Any]]:
"""
Get current traffic data with intelligent cache-first strategy
Args:
latitude: Query location latitude
longitude: Query location longitude
tenant_id: Optional tenant identifier for logging/analytics
force_refresh: If True, bypass cache and fetch fresh data
cache_duration_minutes: How long to consider cached data valid (default: 5 minutes)
Returns:
Dict with current traffic data or None if not available
"""
try:
logger.info("Getting current traffic data",
lat=latitude, lon=longitude, tenant_id=tenant_id,
force_refresh=force_refresh, cache_duration=cache_duration_minutes)
location_id = f"{latitude:.4f},{longitude:.4f}"
# Step 1: Check database cache first (unless force_refresh)
if not force_refresh:
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
# Get recent traffic data (within cache_duration_minutes)
from datetime import timedelta
cache_cutoff = datetime.now() - timedelta(minutes=cache_duration_minutes)
cached_records = await traffic_repo.get_recent_by_location(
latitude, longitude, cache_cutoff, tenant_id
)
if cached_records:
logger.info("Current traffic data found in cache",
count=len(cached_records), cache_age_minutes=cache_duration_minutes)
# Return the most recent cached record
latest_record = max(cached_records, key=lambda x: x.date)
cached_data = self._convert_db_record_to_dict(latest_record)
# Add cache metadata
cached_data['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'data_source': 'cache',
'cache_age_minutes': (datetime.now() - latest_record.date).total_seconds() / 60
}
return cached_data
# Step 2: Fetch fresh data from external API
logger.info("Fetching fresh current traffic data" +
(" (force refresh)" if force_refresh else " (no valid cache)"))
traffic_data = await self.universal_client.get_current_traffic(latitude, longitude)
if traffic_data:
# Add service metadata
traffic_data['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'data_source': 'fresh_api'
}
# Step 3: Store fresh data in cache for future requests
try:
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
# Store the fresh data as a single record
stored_count = await traffic_repo.store_traffic_data_batch(
[traffic_data], location_id, tenant_id
)
logger.info("Stored fresh current traffic data in cache",
stored_records=stored_count)
except Exception as cache_error:
logger.warning("Failed to cache current traffic data", error=str(cache_error))
logger.info("Successfully retrieved fresh current traffic data",
lat=latitude, lon=longitude,
source=traffic_data.get('source', 'unknown'))
return traffic_data
else:
logger.warning("No current traffic data available",
lat=latitude, lon=longitude)
return None
except Exception as e:
logger.error("Error getting current traffic data",
lat=latitude, lon=longitude, error=str(e))
return None
async def get_historical_traffic(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Get historical traffic data for any supported location with database storage
Args:
latitude: Query location latitude
longitude: Query location longitude
start_date: Start date for historical data
end_date: End date for historical data
tenant_id: Optional tenant identifier
Returns:
List of historical traffic data dictionaries
"""
try:
logger.info("Getting historical traffic data",
lat=latitude, lon=longitude,
start=start_date, end=end_date, tenant_id=tenant_id)
# Validate date range
if start_date >= end_date:
logger.warning("Invalid date range", start=start_date, end=end_date)
return []
location_id = f"{latitude:.4f},{longitude:.4f}"
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
# Check database first using the repository
db_records = await traffic_repo.get_by_location_and_date_range(
latitude, longitude, start_date, end_date, tenant_id
)
if db_records:
logger.info("Historical traffic data found in database",
count=len(db_records))
return [self._convert_db_record_to_dict(record) for record in db_records]
# Delegate to universal client if not in DB
traffic_data = await self.universal_client.get_historical_traffic(
latitude, longitude, start_date, end_date
)
if traffic_data:
# Add service metadata to each record
for record in traffic_data:
record['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'date_range': {
'start': start_date.isoformat(),
'end': end_date.isoformat()
}
}
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
# Store in database using the repository
stored_count = await traffic_repo.store_traffic_data_batch(
traffic_data, location_id, tenant_id
)
logger.info("Traffic data stored for re-training",
fetched=len(traffic_data), stored=stored_count,
location=location_id)
logger.info("Successfully retrieved historical traffic data",
lat=latitude, lon=longitude, records=len(traffic_data))
return traffic_data
else:
logger.info("No historical traffic data available",
lat=latitude, lon=longitude)
return []
except Exception as e:
logger.error("Error getting historical traffic data",
lat=latitude, lon=longitude, error=str(e))
return []
def _convert_db_record_to_dict(self, record: TrafficData) -> Dict[str, Any]:
"""Convert database record to dictionary format"""
return {
'date': record.date,
'traffic_volume': record.traffic_volume,
'pedestrian_count': record.pedestrian_count,
'congestion_level': record.congestion_level,
'average_speed': record.average_speed,
'source': record.source,
'location_id': record.location_id,
'raw_data': record.raw_data
}
async def get_traffic_events(
self,
latitude: float,
longitude: float,
radius_km: float = 5.0,
tenant_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Get traffic events and incidents for any supported location
Args:
latitude: Query location latitude
longitude: Query location longitude
radius_km: Search radius in kilometers
tenant_id: Optional tenant identifier
Returns:
List of traffic events
"""
try:
logger.info("Getting traffic events",
lat=latitude, lon=longitude, radius=radius_km, tenant_id=tenant_id)
# Delegate to universal client
events = await self.universal_client.get_events(latitude, longitude, radius_km)
# Add metadata to events
for event in events:
event['service_metadata'] = {
'request_timestamp': datetime.now().isoformat(),
'tenant_id': tenant_id,
'service_version': '2.0',
'query_location': {'latitude': latitude, 'longitude': longitude},
'search_radius_km': radius_km
}
logger.info("Retrieved traffic events",
lat=latitude, lon=longitude, events=len(events))
return events
except Exception as e:
logger.error("Error getting traffic events",
lat=latitude, lon=longitude, error=str(e))
return []
def get_location_info(self, latitude: float, longitude: float) -> Dict[str, Any]:
"""
Get information about traffic data availability for location
Args:
latitude: Query location latitude
longitude: Query location longitude
Returns:
Dict with location support information
"""
try:
info = self.universal_client.get_location_info(latitude, longitude)
# Add service layer information
info['service_layer'] = {
'version': '2.0',
'abstraction_level': 'universal',
'supported_operations': [
'current_traffic',
'historical_traffic',
'traffic_events',
'bulk_requests'
]
}
return info
except Exception as e:
logger.error("Error getting location info",
lat=latitude, lon=longitude, error=str(e))
return {
'supported': False,
'error': str(e),
'service_layer': {'version': '2.0'}
}
async def get_stored_traffic_for_training(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[Dict[str, Any]]:
"""Retrieve stored traffic data specifically for training purposes"""
try:
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
records = await traffic_repo.get_historical_traffic_for_training(
latitude, longitude, start_date, end_date
)
# Convert to training format
training_data = []
for record in records:
training_data.append({
'date': record.date,
'traffic_volume': record.traffic_volume,
'pedestrian_count': record.pedestrian_count,
'congestion_level': record.congestion_level,
'average_speed': record.average_speed,
'location_id': record.location_id,
'source': record.source,
'measurement_point_id': record.raw_data # Contains additional metadata
})
logger.info(f"Retrieved {len(training_data)} traffic records for training",
location_id=f"{latitude:.4f},{longitude:.4f}", start=start_date, end=end_date)
return training_data
except Exception as e:
logger.error("Failed to retrieve traffic data for training",
error=str(e), location_id=f"{latitude:.4f},{longitude:.4f}")
return []
# ============= UNIFIED CONVENIENCE METHODS =============
async def get_current_traffic_fresh(
self,
latitude: float,
longitude: float,
tenant_id: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""Get current traffic data, forcing fresh API call (bypass cache)"""
return await self.get_current_traffic(
latitude=latitude,
longitude=longitude,
tenant_id=tenant_id,
force_refresh=True
)
async def get_historical_traffic_fresh(
self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime,
tenant_id: Optional[str] = None
) -> List[Dict[str, Any]]:
"""Get historical traffic data, forcing fresh API call (bypass cache)"""
# For historical data, we can implement force_refresh logic
# For now, historical already has good cache-first logic
return await self.get_historical_traffic(
latitude=latitude,
longitude=longitude,
start_date=start_date,
end_date=end_date,
tenant_id=tenant_id
)
async def clear_traffic_cache(
self,
latitude: float,
longitude: float,
tenant_id: Optional[str] = None
) -> bool:
"""Clear cached traffic data for a specific location"""
try:
location_id = f"{latitude:.4f},{longitude:.4f}"
async with self.database_manager.get_session() as session:
traffic_repo = TrafficRepository(session)
# This would need a new repository method to delete by location
# For now, just log the intent
logger.info("Traffic cache clear requested",
location_id=location_id, tenant_id=tenant_id)
return True
except Exception as e:
logger.error("Error clearing traffic cache",
lat=latitude, lon=longitude, error=str(e))
return False

View File

@@ -0,0 +1,219 @@
# services/data/app/services/weather_service.py - REVISED VERSION
"""Weather data service with repository pattern"""
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.models.weather import WeatherData, WeatherForecast
from app.external.aemet import AEMETClient
from app.schemas.weather import WeatherDataResponse, WeatherForecastResponse, WeatherForecastAPIResponse, HourlyForecastResponse
from app.repositories.weather_repository import WeatherRepository
logger = structlog.get_logger()
from app.core.database import database_manager
class WeatherService:
def __init__(self):
self.aemet_client = AEMETClient()
self.database_manager = database_manager
async def get_current_weather(self, latitude: float, longitude: float) -> Optional[WeatherDataResponse]:
"""Get current weather for location with graceful failure handling"""
try:
logger.debug("Getting current weather", lat=latitude, lon=longitude)
weather_data = await self.aemet_client.get_current_weather(latitude, longitude)
if weather_data:
logger.debug("Weather data received", source=weather_data.get('source'))
return WeatherDataResponse(**weather_data)
else:
logger.warning("No weather data received from AEMET client - providing service unavailable response")
# Return a response indicating service unavailable rather than None
return WeatherDataResponse(
date=datetime.utcnow().isoformat(),
temperature=None,
precipitation=None,
humidity=None,
wind_speed=None,
pressure=None,
description="Servicio meteorológico temporalmente no disponible",
source="unavailable"
)
except Exception as e:
logger.error("Failed to get current weather", error=str(e), lat=latitude, lon=longitude)
# Return error response rather than None to prevent 404
return WeatherDataResponse(
date=datetime.utcnow().isoformat(),
temperature=None,
precipitation=None,
humidity=None,
wind_speed=None,
pressure=None,
description="Error al obtener datos meteorológicos",
source="error"
)
async def get_weather_forecast(self, latitude: float, longitude: float, days: int = 7) -> List[Dict[str, Any]]:
"""Get weather forecast for location - returns plain dicts"""
try:
logger.debug("Getting weather forecast", lat=latitude, lon=longitude, days=days)
forecast_data = await self.aemet_client.get_forecast(latitude, longitude, days)
if forecast_data:
logger.debug("Forecast data received", count=len(forecast_data))
# Validate and normalize each forecast item
valid_forecasts = []
for item in forecast_data:
try:
if isinstance(item, dict):
# Ensure required fields are present and convert to serializable format
forecast_date = item.get("forecast_date", datetime.now())
generated_at = item.get("generated_at", datetime.now())
forecast_item = {
"forecast_date": forecast_date.isoformat() if isinstance(forecast_date, datetime) else str(forecast_date),
"generated_at": generated_at.isoformat() if isinstance(generated_at, datetime) else str(generated_at),
"temperature": float(item.get("temperature", 15.0)),
"precipitation": float(item.get("precipitation", 0.0)),
"humidity": float(item.get("humidity", 50.0)),
"wind_speed": float(item.get("wind_speed", 10.0)),
"description": str(item.get("description", "Variable")),
"source": str(item.get("source", "unknown"))
}
valid_forecasts.append(forecast_item)
else:
logger.warning("Invalid forecast item type", item_type=type(item))
except Exception as item_error:
logger.warning("Error processing forecast item", error=str(item_error), item=item)
continue
logger.debug("Valid forecasts processed", count=len(valid_forecasts))
return valid_forecasts
else:
logger.warning("No forecast data received from AEMET client")
return []
except Exception as e:
logger.error("Failed to get weather forecast", error=str(e), lat=latitude, lon=longitude)
return []
async def get_hourly_forecast(self, latitude: float, longitude: float, hours: int = 48) -> List[HourlyForecastResponse]:
"""Get hourly weather forecast for location"""
try:
logger.debug("Getting hourly weather forecast", lat=latitude, lon=longitude, hours=hours)
hourly_data = await self.aemet_client.get_hourly_forecast(latitude, longitude, hours)
if hourly_data:
logger.debug("Hourly forecast data received", count=len(hourly_data))
# Validate each hourly forecast item before creating response
valid_forecasts = []
for item in hourly_data:
try:
if isinstance(item, dict):
# Ensure required fields are present
hourly_item = {
"forecast_datetime": item.get("forecast_datetime", datetime.now()),
"generated_at": item.get("generated_at", datetime.now()),
"temperature": float(item.get("temperature", 15.0)),
"precipitation": float(item.get("precipitation", 0.0)),
"humidity": float(item.get("humidity", 50.0)),
"wind_speed": float(item.get("wind_speed", 10.0)),
"description": str(item.get("description", "Variable")),
"source": str(item.get("source", "unknown")),
"hour": int(item.get("hour", 0))
}
valid_forecasts.append(HourlyForecastResponse(**hourly_item))
else:
logger.warning("Invalid hourly forecast item type", item_type=type(item))
except Exception as item_error:
logger.warning("Error processing hourly forecast item", error=str(item_error), item=item)
continue
logger.debug("Valid hourly forecasts processed", count=len(valid_forecasts))
return valid_forecasts
else:
logger.warning("No hourly forecast data received from AEMET client")
return []
except Exception as e:
logger.error("Failed to get hourly weather forecast", error=str(e), lat=latitude, lon=longitude)
return []
async def get_historical_weather(self,
latitude: float,
longitude: float,
start_date: datetime,
end_date: datetime) -> List[WeatherDataResponse]:
"""Get historical weather data"""
try:
logger.debug("Getting historical weather",
lat=latitude, lon=longitude,
start=start_date, end=end_date)
location_id = f"{latitude:.4f},{longitude:.4f}"
async with self.database_manager.get_session() as session:
weather_repository = WeatherRepository(session)
# Use the repository to get data from the database
db_records = await weather_repository.get_historical_weather(
location_id,
start_date,
end_date
)
if db_records:
logger.debug("Historical data found in database", count=len(db_records))
return [WeatherDataResponse(
date=record.date,
temperature=record.temperature,
precipitation=record.precipitation,
humidity=record.humidity,
wind_speed=record.wind_speed,
pressure=record.pressure,
description=record.description,
source=record.source
) for record in db_records]
# If not in database, fetch from API and store
logger.debug("Fetching historical data from AEMET API")
weather_data = await self.aemet_client.get_historical_weather(
latitude, longitude, start_date, end_date
)
if weather_data:
# Use the repository to store the new data
records_to_store = [{
"location_id": location_id,
"city": "Madrid", # Default city for AEMET data
"date": data.get('date', datetime.now()),
"temperature": data.get('temperature'),
"precipitation": data.get('precipitation'),
"humidity": data.get('humidity'),
"wind_speed": data.get('wind_speed'),
"pressure": data.get('pressure'),
"description": data.get('description'),
"source": "aemet",
"data_type": "historical",
"raw_data": data, # Pass as dict, not string
"tenant_id": None
} for data in weather_data]
async with self.database_manager.get_session() as session:
weather_repository = WeatherRepository(session)
await weather_repository.bulk_create_weather_data(records_to_store)
logger.debug("Historical data stored in database", count=len(weather_data))
return [WeatherDataResponse(**item) for item in weather_data]
else:
logger.warning("No historical weather data received")
return []
except Exception as e:
logger.error("Failed to get historical weather", error=str(e))
return []

View File

@@ -0,0 +1,342 @@
"""
Calendar Suggester Utility
Provides intelligent school calendar suggestions based on POI detection data,
tenant location, and heuristics optimized for bakery demand forecasting.
"""
from typing import Optional, Dict, List, Any, Tuple
from datetime import datetime, date, timezone
import structlog
logger = structlog.get_logger()
class CalendarSuggester:
"""
Suggests appropriate school calendars for tenants based on location context.
Uses POI detection data, proximity analysis, and bakery-specific heuristics
to provide intelligent calendar recommendations with confidence scores.
"""
def __init__(self):
self.logger = logger
def suggest_calendar_for_tenant(
self,
city_id: str,
available_calendars: List[Dict[str, Any]],
poi_context: Optional[Dict[str, Any]] = None,
tenant_data: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Suggest the most appropriate calendar for a tenant.
Args:
city_id: Normalized city ID (e.g., "madrid")
available_calendars: List of available school calendars for the city
poi_context: Optional POI detection results including school data
tenant_data: Optional tenant information (location, etc.)
Returns:
Dict with:
- suggested_calendar_id: UUID of suggested calendar or None
- calendar_name: Name of suggested calendar
- confidence: Float 0.0-1.0 confidence score
- reasoning: List of reasoning steps
- fallback_calendars: Alternative suggestions
- should_assign: Boolean recommendation to auto-assign
"""
if not available_calendars:
return self._no_calendars_available(city_id)
# Get current academic year
academic_year = self._get_current_academic_year()
# Filter calendars for current academic year
current_year_calendars = [
cal for cal in available_calendars
if cal.get("academic_year") == academic_year
]
if not current_year_calendars:
# Fallback to any calendar if current year not available
current_year_calendars = available_calendars
self.logger.warning(
"No calendars for current academic year, using all available",
city_id=city_id,
academic_year=academic_year
)
# Analyze POI context if available
school_analysis = self._analyze_schools_from_poi(poi_context) if poi_context else None
# Apply bakery-specific heuristics
suggestion = self._apply_suggestion_heuristics(
current_year_calendars,
school_analysis,
city_id
)
return suggestion
def _get_current_academic_year(self) -> str:
"""
Determine current academic year based on date.
Academic year runs September to June (Spain):
- Jan-Aug: Previous year (e.g., 2024-2025)
- Sep-Dec: Current year (e.g., 2025-2026)
Returns:
Academic year string (e.g., "2024-2025")
"""
today = date.today()
year = today.year
# Academic year starts in September
if today.month >= 9: # September onwards
return f"{year}-{year + 1}"
else: # January-August
return f"{year - 1}-{year}"
def _analyze_schools_from_poi(
self,
poi_context: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""
Analyze school POIs to infer school type preferences.
Args:
poi_context: POI detection results
Returns:
Dict with:
- has_schools_nearby: Boolean
- school_count: Int count of schools
- nearest_distance: Float distance to nearest school (meters)
- proximity_score: Float proximity score
- school_names: List of detected school names
"""
try:
poi_results = poi_context.get("poi_detection_results", {})
schools_data = poi_results.get("schools", {})
if not schools_data:
return None
school_pois = schools_data.get("pois", [])
school_count = len(school_pois)
if school_count == 0:
return None
# Extract school details
school_names = [
poi.get("name", "Unknown School")
for poi in school_pois
if poi.get("name")
]
# Get proximity metrics
features = schools_data.get("features", {})
proximity_score = features.get("proximity_score", 0.0)
# Calculate nearest distance (approximate from POI data)
nearest_distance = None
if school_pois:
# If we have POIs, estimate nearest distance
# This is approximate - exact calculation would require tenant coords
nearest_distance = 100.0 # Default assumption if schools detected
return {
"has_schools_nearby": True,
"school_count": school_count,
"nearest_distance": nearest_distance,
"proximity_score": proximity_score,
"school_names": school_names
}
except Exception as e:
self.logger.warning(
"Failed to analyze schools from POI",
error=str(e)
)
return None
def _apply_suggestion_heuristics(
self,
calendars: List[Dict[str, Any]],
school_analysis: Optional[Dict[str, Any]],
city_id: str
) -> Dict[str, Any]:
"""
Apply heuristics to suggest best calendar.
Bakery-specific heuristics:
1. If schools detected nearby -> Prefer primary (stronger morning rush)
2. If no schools detected -> Still suggest primary (more common, safer default)
3. Primary schools have stronger impact on bakery traffic
Args:
calendars: List of available calendars
school_analysis: Analysis of nearby schools
city_id: City identifier
Returns:
Suggestion dict with confidence and reasoning
"""
reasoning = []
confidence = 0.0
# Separate calendars by type
primary_calendars = [c for c in calendars if c.get("school_type") == "primary"]
secondary_calendars = [c for c in calendars if c.get("school_type") == "secondary"]
other_calendars = [c for c in calendars if c.get("school_type") not in ["primary", "secondary"]]
# Heuristic 1: Schools detected nearby
if school_analysis and school_analysis.get("has_schools_nearby"):
school_count = school_analysis.get("school_count", 0)
proximity_score = school_analysis.get("proximity_score", 0.0)
reasoning.append(f"Detected {school_count} schools nearby (proximity score: {proximity_score:.2f})")
if primary_calendars:
suggested = primary_calendars[0]
confidence = min(0.85, 0.65 + (proximity_score * 0.1)) # 65-85% confidence
reasoning.append("Primary schools create strong morning rush (7:30-9am drop-off)")
reasoning.append("Primary calendars recommended for bakeries near schools")
elif secondary_calendars:
suggested = secondary_calendars[0]
confidence = 0.70
reasoning.append("Secondary school calendars available (later morning start)")
else:
suggested = calendars[0]
confidence = 0.50
reasoning.append("Using available calendar (school type not specified)")
# Heuristic 2: No schools detected
else:
reasoning.append("No schools detected within 500m radius")
if primary_calendars:
suggested = primary_calendars[0]
confidence = 0.60 # Lower confidence without detected schools
reasoning.append("Defaulting to primary calendar (more common, safer choice)")
reasoning.append("Primary school holidays still affect general foot traffic")
elif secondary_calendars:
suggested = secondary_calendars[0]
confidence = 0.55
reasoning.append("Secondary calendar available as default")
elif other_calendars:
suggested = other_calendars[0]
confidence = 0.50
reasoning.append("Using available calendar")
else:
suggested = calendars[0]
confidence = 0.45
reasoning.append("No preferred calendar type available")
# Confidence adjustment based on school analysis quality
if school_analysis:
if school_analysis.get("school_count", 0) >= 3:
confidence = min(1.0, confidence + 0.05) # Boost for multiple schools
reasoning.append("High confidence: Multiple schools detected")
proximity = school_analysis.get("proximity_score", 0.0)
if proximity > 2.0:
confidence = min(1.0, confidence + 0.05) # Boost for close proximity
reasoning.append("High confidence: Schools very close to bakery")
# Determine if we should auto-assign
# Only auto-assign if confidence >= 75% AND schools detected
should_auto_assign = (
confidence >= 0.75 and
school_analysis is not None and
school_analysis.get("has_schools_nearby", False)
)
# Build fallback suggestions
fallback_calendars = []
for cal in calendars:
if cal.get("id") != suggested.get("id"):
fallback_calendars.append({
"calendar_id": str(cal.get("id")),
"calendar_name": cal.get("name"),
"school_type": cal.get("school_type"),
"academic_year": cal.get("academic_year")
})
return {
"suggested_calendar_id": str(suggested.get("id")),
"calendar_name": suggested.get("name"),
"school_type": suggested.get("school_type"),
"academic_year": suggested.get("academic_year"),
"confidence": round(confidence, 2),
"confidence_percentage": round(confidence * 100, 1),
"reasoning": reasoning,
"fallback_calendars": fallback_calendars[:2], # Top 2 alternatives
"should_auto_assign": should_auto_assign,
"school_analysis": school_analysis,
"city_id": city_id
}
def _no_calendars_available(self, city_id: str) -> Dict[str, Any]:
"""Return response when no calendars available for city."""
return {
"suggested_calendar_id": None,
"calendar_name": None,
"school_type": None,
"academic_year": None,
"confidence": 0.0,
"confidence_percentage": 0.0,
"reasoning": [
f"No school calendars configured for city: {city_id}",
"Calendar assignment not possible at this time",
"Location context created without calendar (can be added later)"
],
"fallback_calendars": [],
"should_auto_assign": False,
"school_analysis": None,
"city_id": city_id
}
def format_suggestion_for_admin(self, suggestion: Dict[str, Any]) -> str:
"""
Format suggestion as human-readable text for admin UI.
Args:
suggestion: Suggestion dict from suggest_calendar_for_tenant
Returns:
Formatted string for display
"""
if not suggestion.get("suggested_calendar_id"):
return f"⚠️ No calendars available for {suggestion.get('city_id', 'this city')}"
confidence_pct = suggestion.get("confidence_percentage", 0)
calendar_name = suggestion.get("calendar_name", "Unknown")
school_type = suggestion.get("school_type", "").capitalize()
# Confidence emoji
if confidence_pct >= 80:
emoji = ""
elif confidence_pct >= 60:
emoji = "📊"
else:
emoji = "💡"
text = f"{emoji} **Suggested**: {calendar_name}\n"
text += f"**Type**: {school_type} | **Confidence**: {confidence_pct}%\n\n"
text += "**Reasoning**:\n"
for reason in suggestion.get("reasoning", []):
text += f"{reason}\n"
if suggestion.get("fallback_calendars"):
text += "\n**Alternatives**:\n"
for alt in suggestion.get("fallback_calendars", [])[:2]:
text += f"{alt.get('calendar_name')} ({alt.get('school_type')})\n"
return text

141
services/external/migrations/env.py vendored Normal file
View File

@@ -0,0 +1,141 @@
"""Alembic environment configuration for external service"""
import asyncio
import os
import sys
from logging.config import fileConfig
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
from alembic import context
# Add the service directory to the Python path
service_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if service_path not in sys.path:
sys.path.insert(0, service_path)
# Add shared modules to path
shared_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "shared"))
if shared_path not in sys.path:
sys.path.insert(0, shared_path)
try:
from app.core.config import settings
from shared.database.base import Base
# Import all models to ensure they are registered with Base.metadata
from app.models import * # noqa: F401, F403
except ImportError as e:
print(f"Import error in migrations env.py: {e}")
print(f"Current Python path: {sys.path}")
raise
# this is the Alembic Config object
config = context.config
# Determine service name from file path
service_name = os.path.basename(os.path.dirname(os.path.dirname(__file__)))
service_name_upper = service_name.upper().replace('-', '_')
# Set database URL from environment variables with multiple fallback strategies
database_url = (
os.getenv(f'{service_name_upper}_DATABASE_URL') or # Service-specific
os.getenv('DATABASE_URL') # Generic fallback
)
# If DATABASE_URL is not set, construct from individual components
if not database_url:
# Try generic PostgreSQL environment variables first
postgres_host = os.getenv('POSTGRES_HOST')
postgres_port = os.getenv('POSTGRES_PORT', '5432')
postgres_db = os.getenv('POSTGRES_DB')
postgres_user = os.getenv('POSTGRES_USER')
postgres_password = os.getenv('POSTGRES_PASSWORD')
if all([postgres_host, postgres_db, postgres_user, postgres_password]):
database_url = f"postgresql+asyncpg://{postgres_user}:{postgres_password}@{postgres_host}:{postgres_port}/{postgres_db}"
else:
# Try service-specific environment variables
db_host = os.getenv(f'{service_name_upper}_DB_HOST', f'{service_name}-db-service')
db_port = os.getenv(f'{service_name_upper}_DB_PORT', '5432')
db_name = os.getenv(f'{service_name_upper}_DB_NAME', f'{service_name.replace("-", "_")}_db')
db_user = os.getenv(f'{service_name_upper}_DB_USER', f'{service_name.replace("-", "_")}_user')
db_password = os.getenv(f'{service_name_upper}_DB_PASSWORD')
if db_password:
database_url = f"postgresql+asyncpg://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
else:
# Final fallback: try to get from settings object
try:
database_url = getattr(settings, 'DATABASE_URL', None)
except Exception:
pass
if not database_url:
error_msg = f"ERROR: No database URL configured for {service_name} service"
print(error_msg)
raise Exception(error_msg)
config.set_main_option("sqlalchemy.url", database_url)
# Interpret the config file for Python logging
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Set target metadata
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
compare_type=True,
compare_server_default=True,
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection: Connection) -> None:
"""Execute migrations with the given connection."""
context.configure(
connection=connection,
target_metadata=target_metadata,
compare_type=True,
compare_server_default=True,
)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
"""Run migrations in 'online' mode with async support."""
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,464 @@
"""unified_initial_schema
Revision ID: 00001
Revises:
Create Date: 2025-11-10 19:00:00.000000+01:00
Complete unified initial schema for External Service including:
- Weather data collection (weather_data, weather_forecasts, city_weather_data)
- Traffic data collection (traffic_data, traffic_measurement_points, traffic_background_jobs, city_traffic_data)
- School calendars and location context (school_calendars, tenant_location_contexts)
- POI detection system (tenant_poi_contexts, poi_refresh_jobs)
- Audit logging (audit_logs)
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID, JSONB
# revision identifiers, used by Alembic.
revision: str = '00001'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Create all tables for External Service"""
# ============================================================================
# AUDIT LOGS
# ============================================================================
op.create_table(
'audit_logs',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False),
sa.Column('user_id', UUID(as_uuid=True), nullable=False),
sa.Column('action', sa.String(length=100), nullable=False),
sa.Column('resource_type', sa.String(length=100), nullable=False),
sa.Column('resource_id', sa.String(length=255), nullable=True),
sa.Column('severity', sa.String(length=20), nullable=False),
sa.Column('service_name', sa.String(length=100), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('changes', JSONB, nullable=True),
sa.Column('audit_metadata', JSONB, nullable=True),
sa.Column('ip_address', sa.String(length=45), nullable=True),
sa.Column('user_agent', sa.Text(), nullable=True),
sa.Column('endpoint', sa.String(length=255), nullable=True),
sa.Column('method', sa.String(length=10), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_audit_resource_type_action', 'audit_logs', ['resource_type', 'action'])
op.create_index('idx_audit_service_created', 'audit_logs', ['service_name', 'created_at'])
op.create_index('idx_audit_severity_created', 'audit_logs', ['severity', 'created_at'])
op.create_index('idx_audit_tenant_created', 'audit_logs', ['tenant_id', 'created_at'])
op.create_index('idx_audit_user_created', 'audit_logs', ['user_id', 'created_at'])
op.create_index(op.f('ix_audit_logs_action'), 'audit_logs', ['action'])
op.create_index(op.f('ix_audit_logs_created_at'), 'audit_logs', ['created_at'])
op.create_index(op.f('ix_audit_logs_resource_id'), 'audit_logs', ['resource_id'])
op.create_index(op.f('ix_audit_logs_resource_type'), 'audit_logs', ['resource_type'])
op.create_index(op.f('ix_audit_logs_service_name'), 'audit_logs', ['service_name'])
op.create_index(op.f('ix_audit_logs_severity'), 'audit_logs', ['severity'])
op.create_index(op.f('ix_audit_logs_tenant_id'), 'audit_logs', ['tenant_id'])
op.create_index(op.f('ix_audit_logs_user_id'), 'audit_logs', ['user_id'])
# ============================================================================
# WEATHER DATA
# ============================================================================
op.create_table(
'city_weather_data',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('city_id', sa.String(length=50), nullable=False),
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
sa.Column('temperature', sa.Float(), nullable=True),
sa.Column('precipitation', sa.Float(), nullable=True),
sa.Column('humidity', sa.Float(), nullable=True),
sa.Column('wind_speed', sa.Float(), nullable=True),
sa.Column('pressure', sa.Float(), nullable=True),
sa.Column('description', sa.String(length=200), nullable=True),
sa.Column('source', sa.String(length=50), nullable=False),
sa.Column('raw_data', JSONB, nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_city_weather_lookup', 'city_weather_data', ['city_id', 'date'])
op.create_index(op.f('ix_city_weather_data_city_id'), 'city_weather_data', ['city_id'])
op.create_index(op.f('ix_city_weather_data_date'), 'city_weather_data', ['date'])
op.create_table(
'weather_data',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('location_id', sa.String(length=100), nullable=False),
sa.Column('city', sa.String(length=50), nullable=False),
sa.Column('station_name', sa.String(length=200), nullable=True),
sa.Column('latitude', sa.Float(), nullable=True),
sa.Column('longitude', sa.Float(), nullable=True),
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
sa.Column('forecast_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('temperature', sa.Float(), nullable=True),
sa.Column('temperature_min', sa.Float(), nullable=True),
sa.Column('temperature_max', sa.Float(), nullable=True),
sa.Column('feels_like', sa.Float(), nullable=True),
sa.Column('precipitation', sa.Float(), nullable=True),
sa.Column('precipitation_probability', sa.Float(), nullable=True),
sa.Column('humidity', sa.Float(), nullable=True),
sa.Column('wind_speed', sa.Float(), nullable=True),
sa.Column('wind_direction', sa.Float(), nullable=True),
sa.Column('wind_gust', sa.Float(), nullable=True),
sa.Column('pressure', sa.Float(), nullable=True),
sa.Column('visibility', sa.Float(), nullable=True),
sa.Column('uv_index', sa.Float(), nullable=True),
sa.Column('cloud_cover', sa.Float(), nullable=True),
sa.Column('condition', sa.String(length=100), nullable=True),
sa.Column('description', sa.String(length=200), nullable=True),
sa.Column('weather_code', sa.String(length=20), nullable=True),
sa.Column('source', sa.String(length=50), nullable=False),
sa.Column('data_type', sa.String(length=20), nullable=False),
sa.Column('is_forecast', sa.Boolean(), nullable=True),
sa.Column('data_quality_score', sa.Float(), nullable=True),
sa.Column('raw_data', JSONB, nullable=True),
sa.Column('processed_data', JSONB, nullable=True),
sa.Column('tenant_id', UUID(as_uuid=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_weather_location_date', 'weather_data', ['location_id', 'date'])
op.create_index(op.f('ix_weather_data_date'), 'weather_data', ['date'])
op.create_index(op.f('ix_weather_data_location_id'), 'weather_data', ['location_id'])
op.create_index(op.f('ix_weather_data_tenant_id'), 'weather_data', ['tenant_id'])
op.create_table(
'weather_forecasts',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('location_id', sa.String(length=100), nullable=False),
sa.Column('forecast_date', sa.DateTime(timezone=True), nullable=False),
sa.Column('generated_at', sa.DateTime(timezone=True), nullable=False),
sa.Column('temperature', sa.Float(), nullable=True),
sa.Column('precipitation', sa.Float(), nullable=True),
sa.Column('humidity', sa.Float(), nullable=True),
sa.Column('wind_speed', sa.Float(), nullable=True),
sa.Column('description', sa.String(length=200), nullable=True),
sa.Column('source', sa.String(length=50), nullable=False),
sa.Column('raw_data', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_forecast_location_date', 'weather_forecasts', ['location_id', 'forecast_date'])
op.create_index(op.f('ix_weather_forecasts_location_id'), 'weather_forecasts', ['location_id'])
# ============================================================================
# TRAFFIC DATA
# ============================================================================
op.create_table(
'city_traffic_data',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('city_id', sa.String(length=50), nullable=False),
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
sa.Column('traffic_volume', sa.Integer(), nullable=True),
sa.Column('pedestrian_count', sa.Integer(), nullable=True),
sa.Column('congestion_level', sa.String(length=20), nullable=True),
sa.Column('average_speed', sa.Float(), nullable=True),
sa.Column('source', sa.String(length=50), nullable=False),
sa.Column('raw_data', JSONB, nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_city_traffic_lookup', 'city_traffic_data', ['city_id', 'date'])
op.create_index(op.f('ix_city_traffic_data_city_id'), 'city_traffic_data', ['city_id'])
op.create_index(op.f('ix_city_traffic_data_date'), 'city_traffic_data', ['date'])
op.create_table(
'traffic_measurement_points',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('city', sa.String(length=50), nullable=False),
sa.Column('measurement_point_id', sa.String(length=100), nullable=False),
sa.Column('name', sa.String(length=500), nullable=True),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('latitude', sa.Float(), nullable=False),
sa.Column('longitude', sa.Float(), nullable=False),
sa.Column('district', sa.String(length=100), nullable=True),
sa.Column('zone', sa.String(length=100), nullable=True),
sa.Column('road_type', sa.String(length=50), nullable=True),
sa.Column('measurement_type', sa.String(length=50), nullable=True),
sa.Column('point_category', sa.String(length=50), nullable=True),
sa.Column('is_active', sa.Boolean(), nullable=True),
sa.Column('installation_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('last_data_received', sa.DateTime(timezone=True), nullable=True),
sa.Column('data_quality_rating', sa.Float(), nullable=True),
sa.Column('city_specific_metadata', JSONB, nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_points_active', 'traffic_measurement_points', ['city', 'is_active', 'last_data_received'])
op.create_index('idx_points_city_location', 'traffic_measurement_points', ['city', 'latitude', 'longitude'])
op.create_index('idx_points_district', 'traffic_measurement_points', ['city', 'district'])
op.create_index('idx_points_road_type', 'traffic_measurement_points', ['city', 'road_type'])
op.create_index('idx_unique_city_point', 'traffic_measurement_points', ['city', 'measurement_point_id'], unique=True)
op.create_index(op.f('ix_traffic_measurement_points_city'), 'traffic_measurement_points', ['city'])
op.create_index(op.f('ix_traffic_measurement_points_measurement_point_id'), 'traffic_measurement_points', ['measurement_point_id'])
op.create_table(
'traffic_data',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('location_id', sa.String(length=100), nullable=False),
sa.Column('city', sa.String(length=50), nullable=False),
sa.Column('date', sa.DateTime(timezone=True), nullable=False),
sa.Column('traffic_volume', sa.Integer(), nullable=True),
sa.Column('congestion_level', sa.String(length=20), nullable=True),
sa.Column('average_speed', sa.Float(), nullable=True),
sa.Column('occupation_percentage', sa.Float(), nullable=True),
sa.Column('load_percentage', sa.Float(), nullable=True),
sa.Column('pedestrian_count', sa.Integer(), nullable=True),
sa.Column('measurement_point_id', sa.String(length=100), nullable=True),
sa.Column('measurement_point_name', sa.String(length=500), nullable=True),
sa.Column('measurement_point_type', sa.String(length=50), nullable=True),
sa.Column('latitude', sa.Float(), nullable=True),
sa.Column('longitude', sa.Float(), nullable=True),
sa.Column('district', sa.String(length=100), nullable=True),
sa.Column('zone', sa.String(length=100), nullable=True),
sa.Column('source', sa.String(length=50), nullable=False),
sa.Column('data_quality_score', sa.Float(), nullable=True),
sa.Column('is_synthetic', sa.Boolean(), nullable=True),
sa.Column('has_pedestrian_inference', sa.Boolean(), nullable=True),
sa.Column('city_specific_data', JSONB, nullable=True),
sa.Column('raw_data', sa.Text(), nullable=True),
sa.Column('tenant_id', UUID(as_uuid=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_traffic_city_date', 'traffic_data', ['city', 'date'])
op.create_index('idx_traffic_city_location', 'traffic_data', ['city', 'location_id'])
op.create_index('idx_traffic_district_date', 'traffic_data', ['city', 'district', 'date'])
op.create_index('idx_traffic_location_date', 'traffic_data', ['location_id', 'date'])
op.create_index('idx_traffic_measurement_point', 'traffic_data', ['city', 'measurement_point_id'])
op.create_index('idx_traffic_quality', 'traffic_data', ['city', 'data_quality_score', 'date'])
op.create_index('idx_traffic_tenant_date', 'traffic_data', ['tenant_id', 'date'])
op.create_index('idx_traffic_training', 'traffic_data', ['tenant_id', 'city', 'date', 'is_synthetic'])
op.create_index(op.f('ix_traffic_data_city'), 'traffic_data', ['city'])
op.create_index(op.f('ix_traffic_data_date'), 'traffic_data', ['date'])
op.create_index(op.f('ix_traffic_data_location_id'), 'traffic_data', ['location_id'])
op.create_index(op.f('ix_traffic_data_measurement_point_id'), 'traffic_data', ['measurement_point_id'])
op.create_index(op.f('ix_traffic_data_tenant_id'), 'traffic_data', ['tenant_id'])
op.create_table(
'traffic_background_jobs',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('job_type', sa.String(length=50), nullable=False),
sa.Column('city', sa.String(length=50), nullable=False),
sa.Column('location_pattern', sa.String(length=200), nullable=True),
sa.Column('scheduled_at', sa.DateTime(timezone=True), nullable=False),
sa.Column('started_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('status', sa.String(length=20), nullable=False),
sa.Column('progress_percentage', sa.Float(), nullable=True),
sa.Column('records_processed', sa.Integer(), nullable=True),
sa.Column('records_stored', sa.Integer(), nullable=True),
sa.Column('data_start_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('data_end_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('success_count', sa.Integer(), nullable=True),
sa.Column('error_count', sa.Integer(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('job_metadata', JSONB, nullable=True),
sa.Column('tenant_id', UUID(as_uuid=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_jobs_city_status', 'traffic_background_jobs', ['city', 'status', 'scheduled_at'])
op.create_index('idx_jobs_completed', 'traffic_background_jobs', ['status', 'completed_at'])
op.create_index('idx_jobs_tenant_status', 'traffic_background_jobs', ['tenant_id', 'status', 'scheduled_at'])
op.create_index('idx_jobs_type_city', 'traffic_background_jobs', ['job_type', 'city', 'scheduled_at'])
op.create_index(op.f('ix_traffic_background_jobs_city'), 'traffic_background_jobs', ['city'])
op.create_index(op.f('ix_traffic_background_jobs_tenant_id'), 'traffic_background_jobs', ['tenant_id'])
# ============================================================================
# SCHOOL CALENDARS & LOCATION CONTEXT
# ============================================================================
op.create_table(
'school_calendars',
sa.Column('id', UUID(as_uuid=True), nullable=False),
sa.Column('city_id', sa.String(length=50), nullable=False),
sa.Column('calendar_name', sa.String(length=100), nullable=False),
sa.Column('school_type', sa.String(length=20), nullable=False),
sa.Column('academic_year', sa.String(length=10), nullable=False),
sa.Column('holiday_periods', JSONB, nullable=False),
sa.Column('school_hours', JSONB, nullable=False),
sa.Column('source', sa.String(length=100), nullable=True),
sa.Column('enabled', sa.Boolean(), nullable=False),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_school_calendar_city_year', 'school_calendars', ['city_id', 'academic_year'])
op.create_index('idx_school_calendar_city_type', 'school_calendars', ['city_id', 'school_type'])
op.create_index(op.f('ix_school_calendars_city_id'), 'school_calendars', ['city_id'])
op.create_table(
'tenant_location_contexts',
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False),
sa.Column('city_id', sa.String(length=50), nullable=False),
sa.Column('school_calendar_id', UUID(as_uuid=True), nullable=True),
sa.Column('neighborhood', sa.String(length=100), nullable=True),
sa.Column('local_events', JSONB, nullable=True),
sa.Column('notes', sa.String(length=500), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('tenant_id')
)
op.create_index('idx_tenant_location_calendar', 'tenant_location_contexts', ['school_calendar_id'])
op.create_index(op.f('ix_tenant_location_contexts_city_id'), 'tenant_location_contexts', ['city_id'])
# ============================================================================
# POI DETECTION SYSTEM
# ============================================================================
op.create_table(
'tenant_poi_contexts',
sa.Column('id', UUID(as_uuid=True), primary_key=True),
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False, unique=True, index=True),
sa.Column('latitude', sa.Float(), nullable=False),
sa.Column('longitude', sa.Float(), nullable=False),
sa.Column('poi_detection_results', JSONB, nullable=False, server_default='{}'),
sa.Column('ml_features', JSONB, nullable=False, server_default='{}'),
sa.Column('total_pois_detected', sa.Integer(), default=0),
sa.Column('high_impact_categories', JSONB, server_default='[]'),
sa.Column('relevant_categories', JSONB, server_default='[]'),
sa.Column('detection_timestamp', sa.DateTime(timezone=True), nullable=False),
sa.Column('detection_source', sa.String(50), default='overpass_api'),
sa.Column('detection_status', sa.String(20), default='completed'),
sa.Column('detection_error', sa.String(500), nullable=True),
sa.Column('next_refresh_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('refresh_interval_days', sa.Integer(), default=180),
sa.Column('last_refreshed_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now())
)
op.create_index('idx_tenant_poi_location', 'tenant_poi_contexts', ['latitude', 'longitude'])
op.create_index('idx_tenant_poi_refresh', 'tenant_poi_contexts', ['next_refresh_date'])
op.create_index('idx_tenant_poi_status', 'tenant_poi_contexts', ['detection_status'])
op.create_table(
'poi_refresh_jobs',
sa.Column('id', UUID(as_uuid=True), primary_key=True),
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False, index=True),
sa.Column('scheduled_at', sa.DateTime(timezone=True), nullable=False, index=True),
sa.Column('started_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('status', sa.String(50), nullable=False, default='pending', index=True),
sa.Column('attempt_count', sa.Integer, nullable=False, default=0),
sa.Column('max_attempts', sa.Integer, nullable=False, default=3),
sa.Column('latitude', sa.Float, nullable=False),
sa.Column('longitude', sa.Float, nullable=False),
sa.Column('pois_detected', sa.Integer, nullable=True),
sa.Column('changes_detected', sa.Boolean, default=False),
sa.Column('change_summary', JSONB, nullable=True),
sa.Column('error_message', sa.Text, nullable=True),
sa.Column('error_details', JSONB, nullable=True),
sa.Column('next_scheduled_at', sa.DateTime(timezone=True), nullable=True, index=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now(), onupdate=sa.func.now())
)
op.create_index('idx_poi_refresh_jobs_tenant_status', 'poi_refresh_jobs', ['tenant_id', 'status'])
op.create_index('idx_poi_refresh_jobs_status_scheduled', 'poi_refresh_jobs', ['status', 'scheduled_at'])
def downgrade() -> None:
"""Drop all tables"""
# POI Detection System
op.drop_index('idx_poi_refresh_jobs_status_scheduled', table_name='poi_refresh_jobs')
op.drop_index('idx_poi_refresh_jobs_tenant_status', table_name='poi_refresh_jobs')
op.drop_table('poi_refresh_jobs')
op.drop_index('idx_tenant_poi_status', table_name='tenant_poi_contexts')
op.drop_index('idx_tenant_poi_refresh', table_name='tenant_poi_contexts')
op.drop_index('idx_tenant_poi_location', table_name='tenant_poi_contexts')
op.drop_table('tenant_poi_contexts')
# School Calendars & Location Context
op.drop_index(op.f('ix_tenant_location_contexts_city_id'), table_name='tenant_location_contexts')
op.drop_index('idx_tenant_location_calendar', table_name='tenant_location_contexts')
op.drop_table('tenant_location_contexts')
op.drop_index(op.f('ix_school_calendars_city_id'), table_name='school_calendars')
op.drop_index('idx_school_calendar_city_type', table_name='school_calendars')
op.drop_index('idx_school_calendar_city_year', table_name='school_calendars')
op.drop_table('school_calendars')
# Traffic Data
op.drop_index(op.f('ix_traffic_background_jobs_tenant_id'), table_name='traffic_background_jobs')
op.drop_index(op.f('ix_traffic_background_jobs_city'), table_name='traffic_background_jobs')
op.drop_index('idx_jobs_type_city', table_name='traffic_background_jobs')
op.drop_index('idx_jobs_tenant_status', table_name='traffic_background_jobs')
op.drop_index('idx_jobs_completed', table_name='traffic_background_jobs')
op.drop_index('idx_jobs_city_status', table_name='traffic_background_jobs')
op.drop_table('traffic_background_jobs')
op.drop_index(op.f('ix_traffic_data_tenant_id'), table_name='traffic_data')
op.drop_index(op.f('ix_traffic_data_measurement_point_id'), table_name='traffic_data')
op.drop_index(op.f('ix_traffic_data_location_id'), table_name='traffic_data')
op.drop_index(op.f('ix_traffic_data_date'), table_name='traffic_data')
op.drop_index(op.f('ix_traffic_data_city'), table_name='traffic_data')
op.drop_index('idx_traffic_training', table_name='traffic_data')
op.drop_index('idx_traffic_tenant_date', table_name='traffic_data')
op.drop_index('idx_traffic_quality', table_name='traffic_data')
op.drop_index('idx_traffic_measurement_point', table_name='traffic_data')
op.drop_index('idx_traffic_location_date', table_name='traffic_data')
op.drop_index('idx_traffic_district_date', table_name='traffic_data')
op.drop_index('idx_traffic_city_location', table_name='traffic_data')
op.drop_index('idx_traffic_city_date', table_name='traffic_data')
op.drop_table('traffic_data')
op.drop_index(op.f('ix_traffic_measurement_points_measurement_point_id'), table_name='traffic_measurement_points')
op.drop_index(op.f('ix_traffic_measurement_points_city'), table_name='traffic_measurement_points')
op.drop_index('idx_unique_city_point', table_name='traffic_measurement_points')
op.drop_index('idx_points_road_type', table_name='traffic_measurement_points')
op.drop_index('idx_points_district', table_name='traffic_measurement_points')
op.drop_index('idx_points_city_location', table_name='traffic_measurement_points')
op.drop_index('idx_points_active', table_name='traffic_measurement_points')
op.drop_table('traffic_measurement_points')
op.drop_index(op.f('ix_city_traffic_data_date'), table_name='city_traffic_data')
op.drop_index(op.f('ix_city_traffic_data_city_id'), table_name='city_traffic_data')
op.drop_index('idx_city_traffic_lookup', table_name='city_traffic_data')
op.drop_table('city_traffic_data')
# Weather Data
op.drop_index(op.f('ix_weather_forecasts_location_id'), table_name='weather_forecasts')
op.drop_index('idx_forecast_location_date', table_name='weather_forecasts')
op.drop_table('weather_forecasts')
op.drop_index(op.f('ix_weather_data_tenant_id'), table_name='weather_data')
op.drop_index(op.f('ix_weather_data_location_id'), table_name='weather_data')
op.drop_index(op.f('ix_weather_data_date'), table_name='weather_data')
op.drop_index('idx_weather_location_date', table_name='weather_data')
op.drop_table('weather_data')
op.drop_index(op.f('ix_city_weather_data_date'), table_name='city_weather_data')
op.drop_index(op.f('ix_city_weather_data_city_id'), table_name='city_weather_data')
op.drop_index('idx_city_weather_lookup', table_name='city_weather_data')
op.drop_table('city_weather_data')
# Audit Logs
op.drop_index(op.f('ix_audit_logs_user_id'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_tenant_id'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_severity'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_service_name'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_resource_type'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_resource_id'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_created_at'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_action'), table_name='audit_logs')
op.drop_index('idx_audit_user_created', table_name='audit_logs')
op.drop_index('idx_audit_tenant_created', table_name='audit_logs')
op.drop_index('idx_audit_severity_created', table_name='audit_logs')
op.drop_index('idx_audit_service_created', table_name='audit_logs')
op.drop_index('idx_audit_resource_type_action', table_name='audit_logs')
op.drop_table('audit_logs')

19
services/external/pytest.ini vendored Normal file
View File

@@ -0,0 +1,19 @@
[tool:pytest]
testpaths = tests
asyncio_mode = auto
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts =
-v
--tb=short
--strict-markers
--disable-warnings
--cov=app
--cov-report=term-missing
--cov-report=html:htmlcov
markers =
unit: Unit tests
integration: Integration tests
slow: Slow running tests
external: Tests requiring external services

59
services/external/requirements.txt vendored Normal file
View File

@@ -0,0 +1,59 @@
# services/external/requirements.txt
# FastAPI and web framework
fastapi==0.119.0
uvicorn[standard]==0.32.1
# Database
sqlalchemy==2.0.44
psycopg2-binary==2.9.10
asyncpg==0.30.0
aiosqlite==0.20.0
alembic==1.17.0
# HTTP clients for external APIs
httpx==0.28.1
aiofiles==24.1.0
requests==2.32.3
# Data processing and time series
pandas==2.2.3
numpy==2.2.2
# Validation and serialization
pydantic==2.12.3
pydantic-settings==2.7.1
email-validator==2.2.0
# Authentication and security
python-jose[cryptography]==3.3.0
cryptography==44.0.0
# Logging and monitoring
structlog==25.4.0
psutil==5.9.8
# Message queues
aio-pika==9.4.3
# Background job processing
redis==6.4.0
# Date and time handling
pytz==2024.2
python-dateutil==2.9.0.post0
# XML parsing (for some APIs)
lxml==5.3.0
# Geospatial processing
pyproj==3.7.1
# OpenStreetMap / POI detection
overpy==0.7
# Development
python-multipart==0.0.6
# External API specific
beautifulsoup4==4.12.3
xmltodict==0.14.2

View File

@@ -0,0 +1,119 @@
#!/usr/bin/env python3
"""
Seed School Calendars Script
Loads school calendars from CalendarRegistry into the database
"""
import asyncio
import sys
import os
# Add parent directory to path to allow imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app.core.database import database_manager
from app.repositories.calendar_repository import CalendarRepository
from app.registry.calendar_registry import CalendarRegistry
import structlog
logger = structlog.get_logger()
async def seed_calendars():
"""Seed school calendars from registry into database"""
logger.info("Starting school calendar seeding...")
# Get all calendars from registry
calendars = CalendarRegistry.get_all_calendars()
logger.info(f"Found {len(calendars)} calendars in registry")
# Initialize database
await database_manager.initialize()
try:
async with database_manager.get_session() as session:
repo = CalendarRepository(session)
seeded_count = 0
skipped_count = 0
for cal_def in calendars:
logger.info(
"Processing calendar",
calendar_id=cal_def.calendar_id,
city=cal_def.city_id,
type=cal_def.school_type.value
)
# Check if calendar already exists
existing = await repo.get_calendar_by_city_type_year(
city_id=cal_def.city_id,
school_type=cal_def.school_type.value,
academic_year=cal_def.academic_year
)
if existing:
logger.info(
"Calendar already exists, skipping",
calendar_id=cal_def.calendar_id
)
skipped_count += 1
continue
# Convert holiday periods to dict format
holiday_periods = [
{
"name": hp.name,
"start_date": hp.start_date,
"end_date": hp.end_date,
"description": hp.description
}
for hp in cal_def.holiday_periods
]
# Convert school hours to dict format
school_hours = {
"morning_start": cal_def.school_hours.morning_start,
"morning_end": cal_def.school_hours.morning_end,
"has_afternoon_session": cal_def.school_hours.has_afternoon_session,
"afternoon_start": cal_def.school_hours.afternoon_start,
"afternoon_end": cal_def.school_hours.afternoon_end
}
# Create calendar in database
created_calendar = await repo.create_school_calendar(
city_id=cal_def.city_id,
calendar_name=cal_def.calendar_name,
school_type=cal_def.school_type.value,
academic_year=cal_def.academic_year,
holiday_periods=holiday_periods,
school_hours=school_hours,
source=cal_def.source,
enabled=cal_def.enabled
)
logger.info(
"Calendar seeded successfully",
calendar_id=str(created_calendar.id),
city=cal_def.city_id,
type=cal_def.school_type.value
)
seeded_count += 1
logger.info(
"Calendar seeding completed",
seeded=seeded_count,
skipped=skipped_count,
total=len(calendars)
)
except Exception as e:
logger.error("Error seeding calendars", error=str(e))
raise
finally:
await database_manager.close()
if __name__ == "__main__":
asyncio.run(seed_calendars())

314
services/external/tests/conftest.py vendored Normal file
View File

@@ -0,0 +1,314 @@
# services/external/tests/conftest.py
"""
Pytest configuration and fixtures for External Service tests
"""
import pytest
import asyncio
from datetime import datetime, timezone
from typing import AsyncGenerator
from uuid import uuid4, UUID
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.pool import StaticPool
from fastapi.testclient import TestClient
from app.main import app
from app.core.config import settings
from app.core.database import Base, get_db
from app.models.weather import WeatherData, WeatherStation
from app.models.traffic import TrafficData, TrafficMeasurementPoint
# Test database configuration
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
@pytest.fixture(scope="session")
def event_loop():
"""Create event loop for the test session"""
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture
async def test_engine():
"""Create test database engine"""
engine = create_async_engine(
TEST_DATABASE_URL,
poolclass=StaticPool,
connect_args={"check_same_thread": False}
)
# Create tables
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
yield engine
await engine.dispose()
@pytest.fixture
async def test_db_session(test_engine) -> AsyncGenerator[AsyncSession, None]:
"""Create test database session"""
async_session = async_sessionmaker(
test_engine, class_=AsyncSession, expire_on_commit=False
)
async with async_session() as session:
yield session
@pytest.fixture
def test_client():
"""Create test client"""
return TestClient(app)
@pytest.fixture
async def override_get_db(test_db_session):
"""Override get_db dependency for testing"""
async def _override_get_db():
yield test_db_session
app.dependency_overrides[get_db] = _override_get_db
yield
app.dependency_overrides.clear()
# Test data fixtures
@pytest.fixture
def sample_tenant_id() -> UUID:
"""Sample tenant ID for testing"""
return uuid4()
@pytest.fixture
def sample_weather_data() -> dict:
"""Sample weather data for testing"""
return {
"city": "madrid",
"location_id": "40.4168,-3.7038",
"date": datetime.now(timezone.utc),
"temperature": 18.5,
"humidity": 65.0,
"pressure": 1013.2,
"wind_speed": 10.2,
"condition": "partly_cloudy",
"description": "Parcialmente nublado",
"source": "aemet",
"data_type": "current",
"is_forecast": False,
"data_quality_score": 95.0
}
@pytest.fixture
def sample_traffic_data() -> dict:
"""Sample traffic data for testing"""
return {
"city": "madrid",
"location_id": "PM_M30_001",
"date": datetime.now(timezone.utc),
"measurement_point_id": "PM_M30_001",
"measurement_point_name": "M-30 Norte - Nudo Norte",
"measurement_point_type": "M30",
"traffic_volume": 850,
"average_speed": 65.2,
"congestion_level": "medium",
"occupation_percentage": 45.8,
"latitude": 40.4501,
"longitude": -3.6919,
"district": "Chamartín",
"source": "madrid_opendata",
"data_quality_score": 92.0,
"is_synthetic": False
}
@pytest.fixture
def sample_weather_forecast() -> list[dict]:
"""Sample weather forecast data"""
base_date = datetime.now(timezone.utc)
return [
{
"city": "madrid",
"location_id": "40.4168,-3.7038",
"date": base_date,
"forecast_date": base_date,
"temperature": 20.0,
"temperature_min": 15.0,
"temperature_max": 25.0,
"precipitation": 0.0,
"humidity": 60.0,
"wind_speed": 12.0,
"condition": "sunny",
"description": "Soleado",
"source": "aemet",
"data_type": "forecast",
"is_forecast": True,
"data_quality_score": 85.0
}
]
@pytest.fixture
async def populated_weather_db(test_db_session: AsyncSession, sample_weather_data: dict):
"""Database populated with weather test data"""
weather_record = WeatherData(**sample_weather_data)
test_db_session.add(weather_record)
await test_db_session.commit()
yield test_db_session
@pytest.fixture
async def populated_traffic_db(test_db_session: AsyncSession, sample_traffic_data: dict):
"""Database populated with traffic test data"""
traffic_record = TrafficData(**sample_traffic_data)
test_db_session.add(traffic_record)
await test_db_session.commit()
yield test_db_session
# Mock external API fixtures
@pytest.fixture
def mock_aemet_response():
"""Mock AEMET API response"""
return {
"date": datetime.now(timezone.utc),
"temperature": 18.5,
"humidity": 65.0,
"pressure": 1013.2,
"wind_speed": 10.2,
"description": "Parcialmente nublado",
"source": "aemet"
}
@pytest.fixture
def mock_madrid_traffic_xml():
"""Mock Madrid Open Data traffic XML"""
return """<?xml version="1.0" encoding="UTF-8"?>
<pms>
<pm codigo="PM_M30_001" nombre="M-30 Norte - Nudo Norte">
<intensidad>850</intensidad>
<ocupacion>45</ocupacion>
<velocidad>65</velocidad>
<fechahora>2024-01-15T10:30:00</fechahora>
</pm>
<pm codigo="PM_URB_002" nombre="Gran Vía - Plaza España">
<intensidad>320</intensidad>
<ocupacion>78</ocupacion>
<velocidad>25</velocidad>
<fechahora>2024-01-15T10:30:00</fechahora>
</pm>
</pms>"""
@pytest.fixture
def mock_messaging():
"""Mock messaging service"""
class MockMessaging:
def __init__(self):
self.published_events = []
async def publish_weather_updated(self, data):
self.published_events.append(("weather_updated", data))
return True
async def publish_traffic_updated(self, data):
self.published_events.append(("traffic_updated", data))
return True
async def publish_collection_job_started(self, data):
self.published_events.append(("job_started", data))
return True
async def publish_collection_job_completed(self, data):
self.published_events.append(("job_completed", data))
return True
return MockMessaging()
# Mock external clients
@pytest.fixture
def mock_aemet_client():
"""Mock AEMET client"""
class MockAEMETClient:
async def get_current_weather(self, lat, lon):
return {
"date": datetime.now(timezone.utc),
"temperature": 18.5,
"humidity": 65.0,
"pressure": 1013.2,
"wind_speed": 10.2,
"description": "Parcialmente nublado",
"source": "aemet"
}
async def get_forecast(self, lat, lon, days):
return [
{
"forecast_date": datetime.now(timezone.utc),
"temperature": 20.0,
"temperature_min": 15.0,
"temperature_max": 25.0,
"precipitation": 0.0,
"humidity": 60.0,
"wind_speed": 12.0,
"description": "Soleado",
"source": "aemet"
}
]
return MockAEMETClient()
@pytest.fixture
def mock_madrid_client():
"""Mock Madrid traffic client"""
class MockMadridClient:
async def fetch_current_traffic_xml(self):
return """<?xml version="1.0" encoding="UTF-8"?>
<pms>
<pm codigo="PM_TEST_001" nombre="Test Point">
<intensidad>500</intensidad>
<ocupacion>50</ocupacion>
<velocidad>50</velocidad>
<fechahora>2024-01-15T10:30:00</fechahora>
</pm>
</pms>"""
return MockMadridClient()
@pytest.fixture
def mock_madrid_processor():
"""Mock Madrid traffic processor"""
class MockMadridProcessor:
async def process_current_traffic_xml(self, xml_content):
return [
{
"city": "madrid",
"location_id": "PM_TEST_001",
"date": datetime.now(timezone.utc),
"measurement_point_id": "PM_TEST_001",
"measurement_point_name": "Test Point",
"measurement_point_type": "TEST",
"traffic_volume": 500,
"average_speed": 50.0,
"congestion_level": "medium",
"occupation_percentage": 50.0,
"latitude": 40.4168,
"longitude": -3.7038,
"district": "Centro",
"source": "madrid_opendata",
"data_quality_score": 90.0,
"is_synthetic": False
}
]
return MockMadridProcessor()

View File

@@ -0,0 +1,9 @@
# Testing dependencies for External Service
pytest==7.4.3
pytest-asyncio==0.21.1
pytest-mock==3.12.0
httpx==0.25.2
fastapi[all]==0.104.1
sqlalchemy[asyncio]==2.0.23
aiosqlite==0.19.0
coverage==7.3.2

View File

@@ -0,0 +1,393 @@
# services/external/tests/unit/test_repositories.py
"""
Unit tests for External Service Repositories
"""
import pytest
from datetime import datetime, timezone, timedelta
from uuid import uuid4
from app.repositories.weather_repository import WeatherRepository
from app.repositories.traffic_repository import TrafficRepository
from app.models.weather import WeatherData, WeatherStation, WeatherDataJob
from app.models.traffic import TrafficData, TrafficMeasurementPoint, TrafficDataJob
@pytest.mark.asyncio
class TestWeatherRepository:
"""Test Weather Repository operations"""
async def test_create_weather_data(self, test_db_session, sample_weather_data):
"""Test creating weather data"""
repository = WeatherRepository(test_db_session)
record = await repository.create_weather_data(sample_weather_data)
assert record is not None
assert record.id is not None
assert record.city == sample_weather_data["city"]
assert record.temperature == sample_weather_data["temperature"]
async def test_get_current_weather(self, populated_weather_db, sample_weather_data):
"""Test getting current weather data"""
repository = WeatherRepository(populated_weather_db)
result = await repository.get_current_weather("madrid")
assert result is not None
assert result.city == "madrid"
assert result.temperature == sample_weather_data["temperature"]
async def test_get_weather_forecast(self, test_db_session, sample_weather_forecast):
"""Test getting weather forecast"""
repository = WeatherRepository(test_db_session)
# Create forecast data
for forecast_item in sample_weather_forecast:
await repository.create_weather_data(forecast_item)
result = await repository.get_weather_forecast("madrid", 7)
assert len(result) == 1
assert result[0].is_forecast is True
async def test_get_historical_weather(self, test_db_session, sample_weather_data):
"""Test getting historical weather data"""
repository = WeatherRepository(test_db_session)
# Create historical data
historical_data = sample_weather_data.copy()
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
await repository.create_weather_data(historical_data)
start_date = datetime.now(timezone.utc) - timedelta(days=2)
end_date = datetime.now(timezone.utc)
result = await repository.get_historical_weather("madrid", start_date, end_date)
assert len(result) >= 1
async def test_create_weather_station(self, test_db_session):
"""Test creating weather station"""
repository = WeatherRepository(test_db_session)
station_data = {
"station_id": "TEST_001",
"name": "Test Station",
"city": "madrid",
"latitude": 40.4168,
"longitude": -3.7038,
"altitude": 650.0,
"is_active": True
}
station = await repository.create_weather_station(station_data)
assert station is not None
assert station.station_id == "TEST_001"
assert station.name == "Test Station"
async def test_get_weather_stations(self, test_db_session):
"""Test getting weather stations"""
repository = WeatherRepository(test_db_session)
# Create test station
station_data = {
"station_id": "TEST_001",
"name": "Test Station",
"city": "madrid",
"latitude": 40.4168,
"longitude": -3.7038,
"is_active": True
}
await repository.create_weather_station(station_data)
stations = await repository.get_weather_stations("madrid")
assert len(stations) == 1
assert stations[0].station_id == "TEST_001"
async def test_create_weather_job(self, test_db_session, sample_tenant_id):
"""Test creating weather data collection job"""
repository = WeatherRepository(test_db_session)
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_weather_job(job_data)
assert job is not None
assert job.job_type == "current"
assert job.status == "pending"
async def test_update_weather_job(self, test_db_session, sample_tenant_id):
"""Test updating weather job"""
repository = WeatherRepository(test_db_session)
# Create job first
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_weather_job(job_data)
# Update job
update_data = {
"status": "completed",
"completed_at": datetime.utcnow(),
"success_count": 1
}
success = await repository.update_weather_job(job.id, update_data)
assert success is True
async def test_get_weather_jobs(self, test_db_session, sample_tenant_id):
"""Test getting weather jobs"""
repository = WeatherRepository(test_db_session)
# Create test job
job_data = {
"job_type": "forecast",
"city": "madrid",
"status": "completed",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
await repository.create_weather_job(job_data)
jobs = await repository.get_weather_jobs()
assert len(jobs) >= 1
assert any(job.job_type == "forecast" for job in jobs)
@pytest.mark.asyncio
class TestTrafficRepository:
"""Test Traffic Repository operations"""
async def test_create_traffic_data(self, test_db_session, sample_traffic_data):
"""Test creating traffic data"""
repository = TrafficRepository(test_db_session)
# Convert sample data to list for bulk create
traffic_list = [sample_traffic_data]
count = await repository.bulk_create_traffic_data(traffic_list)
assert count == 1
async def test_get_current_traffic(self, populated_traffic_db, sample_traffic_data):
"""Test getting current traffic data"""
repository = TrafficRepository(populated_traffic_db)
result = await repository.get_current_traffic("madrid")
assert len(result) >= 1
assert result[0].city == "madrid"
async def test_get_current_traffic_with_filters(self, populated_traffic_db):
"""Test getting current traffic with filters"""
repository = TrafficRepository(populated_traffic_db)
result = await repository.get_current_traffic("madrid", district="Chamartín")
# Should return results based on filter
assert isinstance(result, list)
async def test_get_historical_traffic(self, test_db_session, sample_traffic_data):
"""Test getting historical traffic data"""
repository = TrafficRepository(test_db_session)
# Create historical data
historical_data = sample_traffic_data.copy()
historical_data["date"] = datetime.now(timezone.utc) - timedelta(days=1)
await repository.bulk_create_traffic_data([historical_data])
start_date = datetime.now(timezone.utc) - timedelta(days=2)
end_date = datetime.now(timezone.utc)
result = await repository.get_historical_traffic("madrid", start_date, end_date)
assert len(result) >= 1
async def test_create_measurement_point(self, test_db_session):
"""Test creating traffic measurement point"""
repository = TrafficRepository(test_db_session)
point_data = {
"point_id": "TEST_POINT_001",
"name": "Test Measurement Point",
"city": "madrid",
"point_type": "TEST",
"latitude": 40.4168,
"longitude": -3.7038,
"district": "Centro",
"road_name": "Test Road",
"is_active": True
}
point = await repository.create_measurement_point(point_data)
assert point is not None
assert point.point_id == "TEST_POINT_001"
assert point.name == "Test Measurement Point"
async def test_get_measurement_points(self, test_db_session):
"""Test getting measurement points"""
repository = TrafficRepository(test_db_session)
# Create test point
point_data = {
"point_id": "TEST_POINT_001",
"name": "Test Point",
"city": "madrid",
"point_type": "TEST",
"latitude": 40.4168,
"longitude": -3.7038,
"is_active": True
}
await repository.create_measurement_point(point_data)
points = await repository.get_measurement_points("madrid")
assert len(points) == 1
assert points[0].point_id == "TEST_POINT_001"
async def test_get_measurement_points_with_filters(self, test_db_session):
"""Test getting measurement points with filters"""
repository = TrafficRepository(test_db_session)
# Create test points with different types
for i, point_type in enumerate(["M30", "URB", "TEST"]):
point_data = {
"point_id": f"TEST_POINT_{i:03d}",
"name": f"Test Point {i}",
"city": "madrid",
"point_type": point_type,
"latitude": 40.4168,
"longitude": -3.7038,
"is_active": True
}
await repository.create_measurement_point(point_data)
# Filter by type
points = await repository.get_measurement_points("madrid", road_type="M30")
assert len(points) == 1
assert points[0].point_type == "M30"
async def test_get_traffic_analytics(self, populated_traffic_db):
"""Test getting traffic analytics"""
repository = TrafficRepository(populated_traffic_db)
analytics = await repository.get_traffic_analytics("madrid")
assert isinstance(analytics, dict)
assert "total_measurements" in analytics
assert "average_volume" in analytics
async def test_create_traffic_job(self, test_db_session, sample_tenant_id):
"""Test creating traffic collection job"""
repository = TrafficRepository(test_db_session)
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_traffic_job(job_data)
assert job is not None
assert job.job_type == "current"
assert job.status == "pending"
async def test_update_traffic_job(self, test_db_session, sample_tenant_id):
"""Test updating traffic job"""
repository = TrafficRepository(test_db_session)
# Create job first
job_data = {
"job_type": "current",
"city": "madrid",
"status": "pending",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
job = await repository.create_traffic_job(job_data)
# Update job
update_data = {
"status": "completed",
"completed_at": datetime.utcnow(),
"success_count": 10
}
success = await repository.update_traffic_job(job.id, update_data)
assert success is True
async def test_get_traffic_jobs(self, test_db_session, sample_tenant_id):
"""Test getting traffic jobs"""
repository = TrafficRepository(test_db_session)
# Create test job
job_data = {
"job_type": "historical",
"city": "madrid",
"status": "completed",
"scheduled_at": datetime.utcnow(),
"tenant_id": sample_tenant_id
}
await repository.create_traffic_job(job_data)
jobs = await repository.get_traffic_jobs()
assert len(jobs) >= 1
assert any(job.job_type == "historical" for job in jobs)
async def test_bulk_create_performance(self, test_db_session):
"""Test bulk create performance"""
repository = TrafficRepository(test_db_session)
# Create large dataset
bulk_data = []
for i in range(100):
data = {
"city": "madrid",
"location_id": f"PM_TEST_{i:03d}",
"date": datetime.now(timezone.utc),
"measurement_point_id": f"PM_TEST_{i:03d}",
"measurement_point_name": f"Test Point {i}",
"measurement_point_type": "TEST",
"traffic_volume": 100 + i,
"average_speed": 50.0,
"congestion_level": "medium",
"occupation_percentage": 50.0,
"latitude": 40.4168,
"longitude": -3.7038,
"source": "test"
}
bulk_data.append(data)
import time
start_time = time.time()
count = await repository.bulk_create_traffic_data(bulk_data)
end_time = time.time()
execution_time = end_time - start_time
assert count == 100
assert execution_time < 3.0 # Should complete in under 3 seconds

View File

@@ -0,0 +1,445 @@
# services/external/tests/unit/test_services.py
"""
Unit tests for External Service Services
"""
import pytest
from datetime import datetime, timezone, timedelta
from unittest.mock import AsyncMock, patch
from uuid import uuid4
from app.services.weather_service import WeatherService
from app.services.traffic_service import TrafficService
@pytest.mark.asyncio
class TestWeatherService:
"""Test Weather Service business logic"""
@pytest.fixture
def weather_service(self):
"""Create weather service instance"""
return WeatherService()
async def test_get_current_weather_from_cache(self, weather_service):
"""Test getting current weather from cache"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_weather = AsyncMock()
mock_weather.date = datetime.now(timezone.utc) - timedelta(minutes=30) # Fresh data
mock_weather.to_dict.return_value = {"temperature": 18.5, "city": "madrid"}
mock_repository.get_current_weather.return_value = mock_weather
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_current_weather("madrid")
assert result is not None
assert result["temperature"] == 18.5
assert result["city"] == "madrid"
async def test_get_current_weather_fetch_from_api(self, weather_service, mock_aemet_response):
"""Test getting current weather from API when cache is stale"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# No cached data or stale data
mock_repository.get_current_weather.return_value = None
mock_stored = AsyncMock()
mock_stored.to_dict.return_value = {"temperature": 20.0}
mock_repository.create_weather_data.return_value = mock_stored
# Mock AEMET client
mock_client = AsyncMock()
mock_client.get_current_weather.return_value = mock_aemet_response
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
weather_service.aemet_client = mock_client
result = await weather_service.get_current_weather("madrid")
assert result is not None
assert result["temperature"] == 20.0
mock_client.get_current_weather.assert_called_once()
async def test_get_weather_forecast_from_cache(self, weather_service):
"""Test getting weather forecast from cache"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_forecast = [AsyncMock(), AsyncMock()]
for item in mock_forecast:
item.created_at = datetime.now(timezone.utc) - timedelta(hours=1) # Fresh
item.to_dict.return_value = {"temperature": 22.0}
mock_repository.get_weather_forecast.return_value = mock_forecast
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_weather_forecast("madrid", 7)
assert len(result) == 2
assert all(item["temperature"] == 22.0 for item in result)
async def test_get_weather_forecast_fetch_from_api(self, weather_service):
"""Test getting weather forecast from API when cache is stale"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# No cached data
mock_repository.get_weather_forecast.return_value = []
mock_stored = AsyncMock()
mock_stored.to_dict.return_value = {"temperature": 25.0}
mock_repository.create_weather_data.return_value = mock_stored
# Mock AEMET client
mock_client = AsyncMock()
mock_client.get_forecast.return_value = [
{"forecast_date": datetime.now(), "temperature": 25.0}
]
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
weather_service.aemet_client = mock_client
result = await weather_service.get_weather_forecast("madrid", 7)
assert len(result) == 1
assert result[0]["temperature"] == 25.0
mock_client.get_forecast.assert_called_once()
async def test_get_historical_weather(self, weather_service, sample_tenant_id):
"""Test getting historical weather data"""
start_date = datetime.now(timezone.utc) - timedelta(days=7)
end_date = datetime.now(timezone.utc)
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_historical = [AsyncMock(), AsyncMock()]
for item in mock_historical:
item.to_dict.return_value = {"temperature": 18.0}
mock_repository.get_historical_weather.return_value = mock_historical
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_historical_weather(
"madrid", start_date, end_date, sample_tenant_id
)
assert len(result) == 2
assert all(item["temperature"] == 18.0 for item in result)
async def test_get_weather_stations(self, weather_service):
"""Test getting weather stations"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_stations = [AsyncMock()]
mock_stations[0].to_dict.return_value = {"station_id": "TEST_001"}
mock_repository.get_weather_stations.return_value = mock_stations
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.get_weather_stations("madrid")
assert len(result) == 1
assert result[0]["station_id"] == "TEST_001"
async def test_trigger_weather_collection(self, weather_service, sample_tenant_id):
"""Test triggering weather data collection"""
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_job = AsyncMock()
mock_job.id = uuid4()
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
mock_repository.create_weather_job.return_value = mock_job
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
result = await weather_service.trigger_weather_collection(
"madrid", "current", sample_tenant_id
)
assert result["status"] == "pending"
mock_repository.create_weather_job.assert_called_once()
async def test_process_weather_collection_job(self, weather_service):
"""Test processing weather collection job"""
job_id = uuid4()
with patch('app.services.weather_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# Mock job
mock_job = AsyncMock()
mock_job.id = job_id
mock_job.job_type = "current"
mock_job.city = "madrid"
mock_repository.get_weather_jobs.return_value = [mock_job]
mock_repository.update_weather_job.return_value = True
# Mock updated job after completion
mock_updated_job = AsyncMock()
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
# Mock methods for different calls
def mock_get_jobs_side_effect():
return [mock_updated_job] # Return completed job
mock_repository.get_weather_jobs.side_effect = [
[mock_job], # First call returns pending job
[mock_updated_job] # Second call returns completed job
]
with patch('app.services.weather_service.WeatherRepository', return_value=mock_repository):
with patch.object(weather_service, '_collect_current_weather', return_value=1):
result = await weather_service.process_weather_collection_job(job_id)
assert result["status"] == "completed"
async def test_map_weather_condition(self, weather_service):
"""Test weather condition mapping"""
test_cases = [
("Soleado", "clear"),
("Nublado", "cloudy"),
("Parcialmente nublado", "partly_cloudy"),
("Lluvioso", "rainy"),
("Nevando", "snowy"),
("Tormenta", "stormy"),
("Desconocido", "unknown")
]
for description, expected in test_cases:
result = weather_service._map_weather_condition(description)
assert result == expected
@pytest.mark.asyncio
class TestTrafficService:
"""Test Traffic Service business logic"""
@pytest.fixture
def traffic_service(self):
"""Create traffic service instance"""
return TrafficService()
async def test_get_current_traffic_from_cache(self, traffic_service):
"""Test getting current traffic from cache"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_traffic = [AsyncMock()]
mock_traffic[0].date = datetime.now(timezone.utc) - timedelta(minutes=5) # Fresh
mock_traffic[0].to_dict.return_value = {"traffic_volume": 850}
mock_repository.get_current_traffic.return_value = mock_traffic
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_current_traffic("madrid")
assert len(result) == 1
assert result[0]["traffic_volume"] == 850
async def test_get_current_traffic_fetch_from_api(self, traffic_service, mock_madrid_traffic_xml):
"""Test getting current traffic from API when cache is stale"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# No cached data
mock_repository.get_current_traffic.return_value = []
mock_repository.bulk_create_traffic_data.return_value = 2
# Mock clients
mock_client = AsyncMock()
mock_client.fetch_current_traffic_xml.return_value = mock_madrid_traffic_xml
mock_processor = AsyncMock()
mock_processor.process_current_traffic_xml.return_value = [
{"traffic_volume": 850, "measurement_point_id": "PM_M30_001"},
{"traffic_volume": 320, "measurement_point_id": "PM_URB_002"}
]
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
traffic_service.madrid_client = mock_client
traffic_service.madrid_processor = mock_processor
result = await traffic_service.get_current_traffic("madrid")
assert len(result) == 2
assert result[0]["traffic_volume"] == 850
mock_client.fetch_current_traffic_xml.assert_called_once()
async def test_get_historical_traffic(self, traffic_service, sample_tenant_id):
"""Test getting historical traffic data"""
start_date = datetime.now(timezone.utc) - timedelta(days=7)
end_date = datetime.now(timezone.utc)
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_historical = [AsyncMock(), AsyncMock()]
for item in mock_historical:
item.to_dict.return_value = {"traffic_volume": 500}
mock_repository.get_historical_traffic.return_value = mock_historical
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_historical_traffic(
"madrid", start_date, end_date, tenant_id=sample_tenant_id
)
assert len(result) == 2
assert all(item["traffic_volume"] == 500 for item in result)
async def test_get_measurement_points(self, traffic_service):
"""Test getting measurement points"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_points = [AsyncMock()]
mock_points[0].to_dict.return_value = {"point_id": "PM_TEST_001"}
mock_repository.get_measurement_points.return_value = mock_points
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_measurement_points("madrid")
assert len(result) == 1
assert result[0]["point_id"] == "PM_TEST_001"
async def test_get_traffic_analytics(self, traffic_service):
"""Test getting traffic analytics"""
start_date = datetime.now(timezone.utc) - timedelta(days=30)
end_date = datetime.now(timezone.utc)
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_analytics = {
"total_measurements": 1000,
"average_volume": 650.5,
"peak_hour": "08:00"
}
mock_repository.get_traffic_analytics.return_value = mock_analytics
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.get_traffic_analytics(
"madrid", start_date, end_date
)
assert result["total_measurements"] == 1000
assert result["average_volume"] == 650.5
assert "generated_at" in result
async def test_trigger_traffic_collection(self, traffic_service, sample_tenant_id):
"""Test triggering traffic data collection"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_job = AsyncMock()
mock_job.id = uuid4()
mock_job.to_dict.return_value = {"id": str(mock_job.id), "status": "pending"}
mock_repository.create_traffic_job.return_value = mock_job
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
result = await traffic_service.trigger_traffic_collection(
"madrid", "current", user_id=sample_tenant_id
)
assert result["status"] == "pending"
mock_repository.create_traffic_job.assert_called_once()
async def test_process_traffic_collection_job(self, traffic_service):
"""Test processing traffic collection job"""
job_id = uuid4()
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
# Mock job
mock_job = AsyncMock()
mock_job.id = job_id
mock_job.job_type = "current"
mock_job.city = "madrid"
mock_job.location_pattern = None
mock_repository.get_traffic_jobs.return_value = [mock_job]
mock_repository.update_traffic_job.return_value = True
# Mock updated job after completion
mock_updated_job = AsyncMock()
mock_updated_job.to_dict.return_value = {"id": str(job_id), "status": "completed"}
mock_repository.get_traffic_jobs.side_effect = [
[mock_job], # First call returns pending job
[mock_updated_job] # Second call returns completed job
]
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
with patch.object(traffic_service, '_collect_current_traffic', return_value=125):
result = await traffic_service.process_traffic_collection_job(job_id)
assert result["status"] == "completed"
async def test_is_traffic_data_fresh(self, traffic_service):
"""Test traffic data freshness check"""
from app.models.traffic import TrafficData
# Fresh data (5 minutes old)
fresh_data = [AsyncMock()]
fresh_data[0].date = datetime.utcnow() - timedelta(minutes=5)
result = traffic_service._is_traffic_data_fresh(fresh_data)
assert result is True
# Stale data (15 minutes old)
stale_data = [AsyncMock()]
stale_data[0].date = datetime.utcnow() - timedelta(minutes=15)
result = traffic_service._is_traffic_data_fresh(stale_data)
assert result is False
# Empty data
result = traffic_service._is_traffic_data_fresh([])
assert result is False
async def test_collect_current_traffic(self, traffic_service):
"""Test current traffic collection"""
with patch('app.services.traffic_service.get_db_transaction') as mock_get_db:
mock_db = AsyncMock()
mock_get_db.return_value.__aenter__.return_value = mock_db
mock_repository = AsyncMock()
mock_repository.bulk_create_traffic_data.return_value = 10
with patch('app.services.traffic_service.TrafficRepository', return_value=mock_repository):
with patch.object(traffic_service, '_fetch_current_traffic_from_api', return_value=[{} for _ in range(10)]):
result = await traffic_service._collect_current_traffic("madrid", None)
assert result == 10