Initial commit - production deployment

This commit is contained in:
2026-01-21 17:17:16 +01:00
commit c23d00dd92
2289 changed files with 638440 additions and 0 deletions

View File

@@ -0,0 +1 @@
# services/external/app/core/__init__.py

77
services/external/app/core/config.py vendored Normal file
View File

@@ -0,0 +1,77 @@
# services/external/app/core/config.py
from shared.config.base import BaseServiceSettings
import os
from pydantic import Field
class DataSettings(BaseServiceSettings):
"""Data service specific settings"""
# Service Identity
SERVICE_NAME: str = "external-service"
VERSION: str = "1.0.0"
APP_NAME: str = "Bakery External Data Service"
DESCRIPTION: str = "External data collection service for weather and traffic data"
# API Configuration
API_V1_STR: str = "/api/v1"
# Database configuration (secure approach - build from components)
@property
def DATABASE_URL(self) -> str:
"""Build database URL from secure components"""
# Try complete URL first (for backward compatibility)
complete_url = os.getenv("EXTERNAL_DATABASE_URL")
if complete_url:
return complete_url
# Build from components (secure approach)
user = os.getenv("EXTERNAL_DB_USER", "external_user")
password = os.getenv("EXTERNAL_DB_PASSWORD", "external_pass123")
host = os.getenv("EXTERNAL_DB_HOST", "localhost")
port = os.getenv("EXTERNAL_DB_PORT", "5432")
name = os.getenv("EXTERNAL_DB_NAME", "external_db")
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
# External API Configuration
AEMET_API_KEY: str = os.getenv("AEMET_API_KEY", "")
AEMET_BASE_URL: str = "https://opendata.aemet.es/opendata"
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "90")) # Increased for unstable API
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "5")) # More retries for connection issues
AEMET_ENABLED: bool = os.getenv("AEMET_ENABLED", "true").lower() == "true" # Allow disabling AEMET
MADRID_OPENDATA_API_KEY: str = os.getenv("MADRID_OPENDATA_API_KEY", "")
MADRID_OPENDATA_BASE_URL: str = "https://datos.madrid.es"
MADRID_OPENDATA_TIMEOUT: int = int(os.getenv("MADRID_OPENDATA_TIMEOUT", "30"))
# Data Collection Configuration
WEATHER_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("WEATHER_COLLECTION_INTERVAL_HOURS", "1"))
TRAFFIC_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("TRAFFIC_COLLECTION_INTERVAL_HOURS", "1"))
EVENTS_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("EVENTS_COLLECTION_INTERVAL_HOURS", "6"))
# Cache TTL Configuration
WEATHER_CACHE_TTL_HOURS: int = int(os.getenv("WEATHER_CACHE_TTL_HOURS", "1"))
TRAFFIC_CACHE_TTL_HOURS: int = int(os.getenv("TRAFFIC_CACHE_TTL_HOURS", "1"))
EVENTS_CACHE_TTL_HOURS: int = int(os.getenv("EVENTS_CACHE_TTL_HOURS", "6"))
# Data Quality Configuration
DATA_VALIDATION_ENABLED: bool = os.getenv("DATA_VALIDATION_ENABLED", "true").lower() == "true"
OUTLIER_DETECTION_ENABLED: bool = os.getenv("OUTLIER_DETECTION_ENABLED", "true").lower() == "true"
DATA_COMPLETENESS_THRESHOLD: float = float(os.getenv("DATA_COMPLETENESS_THRESHOLD", "0.8"))
# Geolocation Settings (Madrid focus)
DEFAULT_LATITUDE: float = float(os.getenv("DEFAULT_LATITUDE", "40.4168")) # Madrid
DEFAULT_LONGITUDE: float = float(os.getenv("DEFAULT_LONGITUDE", "-3.7038")) # Madrid
LOCATION_RADIUS_KM: float = float(os.getenv("LOCATION_RADIUS_KM", "50.0"))
# Data Retention
RAW_DATA_RETENTION_DAYS: int = int(os.getenv("RAW_DATA_RETENTION_DAYS", "90"))
PROCESSED_DATA_RETENTION_DAYS: int = int(os.getenv("PROCESSED_DATA_RETENTION_DAYS", "365"))
# Batch Processing
BATCH_PROCESSING_ENABLED: bool = os.getenv("BATCH_PROCESSING_ENABLED", "true").lower() == "true"
BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "1000"))
PARALLEL_PROCESSING_WORKERS: int = int(os.getenv("PARALLEL_PROCESSING_WORKERS", "4"))
settings = DataSettings()

81
services/external/app/core/database.py vendored Normal file
View File

@@ -0,0 +1,81 @@
# services/external/app/core/database.py
"""
External Service Database Configuration using shared database manager
"""
import structlog
from contextlib import asynccontextmanager
from typing import AsyncGenerator
from app.core.config import settings
from shared.database.base import DatabaseManager, Base
logger = structlog.get_logger()
# Create database manager instance
database_manager = DatabaseManager(
database_url=settings.DATABASE_URL,
service_name="external-service"
)
async def get_db():
"""
Database dependency for FastAPI - using shared database manager
"""
async for session in database_manager.get_db():
yield session
async def init_db():
"""Initialize database tables using shared database manager"""
try:
logger.info("Initializing External Service database...")
# Import all models to ensure they're registered
from app.models import weather, traffic # noqa: F401
# Create all tables using database manager
await database_manager.create_tables(Base.metadata)
logger.info("External Service database initialized successfully")
except Exception as e:
logger.error("Failed to initialize database", error=str(e))
raise
async def close_db():
"""Close database connections using shared database manager"""
try:
await database_manager.close_connections()
logger.info("Database connections closed")
except Exception as e:
logger.error("Error closing database connections", error=str(e))
@asynccontextmanager
async def get_db_transaction():
"""
Context manager for database transactions using shared database manager
"""
async with database_manager.get_session() as session:
try:
async with session.begin():
yield session
except Exception as e:
logger.error("Transaction error", error=str(e))
raise
@asynccontextmanager
async def get_background_session():
"""
Context manager for background tasks using shared database manager
"""
async with database_manager.get_background_session() as session:
yield session
async def health_check():
"""Database health check using shared database manager"""
return await database_manager.health_check()

181
services/external/app/core/poi_config.py vendored Normal file
View File

@@ -0,0 +1,181 @@
"""
POI Detection Configuration
Defines POI categories, search parameters, and relevance thresholds
for automated Point of Interest detection and feature engineering.
"""
from dataclasses import dataclass
from typing import Dict
@dataclass
class POICategory:
"""POI category definition with OSM query and ML parameters"""
name: str
osm_query: str
search_radius_m: int
weight: float # Importance weight for ML model (positive or negative)
description: str
# POI Category Definitions based on OpenStreetMap tags
# Research-based search radii and weights for bakery demand forecasting
POI_CATEGORIES: Dict[str, POICategory] = {
"schools": POICategory(
name="schools",
osm_query='["amenity"~"school|kindergarten|university|college"]',
search_radius_m=500,
weight=1.5, # High positive impact - morning drop-off rush
description="Educational institutions causing morning/afternoon rush patterns"
),
"offices": POICategory(
name="offices",
osm_query='["office"]',
search_radius_m=800,
weight=1.3, # Positive impact - weekday lunch/breakfast demand
description="Office buildings and business centers"
),
"gyms_sports": POICategory(
name="gyms_sports",
osm_query='["leisure"~"fitness_centre|sports_centre|stadium"]',
search_radius_m=600,
weight=0.8, # Moderate impact - morning/evening activity
description="Fitness centers and sports facilities"
),
"residential": POICategory(
name="residential",
osm_query='["building"~"residential|apartments|house"]',
search_radius_m=400,
weight=1.0, # Base demand from residents
description="Residential buildings and housing"
),
"tourism": POICategory(
name="tourism",
osm_query='["tourism"~"attraction|museum|hotel|hostel|guest_house"]',
search_radius_m=1000,
weight=1.2, # Positive impact - tourist foot traffic
description="Tourist attractions, hotels, and points of interest"
),
"competitors": POICategory(
name="competitors",
osm_query='["shop"~"bakery|pastry|confectionery"]',
search_radius_m=1000,
weight=-0.5, # Negative impact - competition pressure
description="Competing bakeries and pastry shops"
),
"transport_hubs": POICategory(
name="transport_hubs",
osm_query='["public_transport"~"station|stop"]["railway"~"station|subway_entrance|tram_stop"]',
search_radius_m=800,
weight=1.4, # High impact - commuter foot traffic
description="Public transport stations and hubs"
),
"coworking": POICategory(
name="coworking",
osm_query='["amenity"="coworking_space"]',
search_radius_m=600,
weight=1.1, # Moderate-high impact - flexible workers
description="Coworking spaces and shared offices"
),
"retail": POICategory(
name="retail",
osm_query='["shop"]',
search_radius_m=500,
weight=0.9, # Moderate impact - general foot traffic
description="Retail shops and commercial areas"
)
}
# Feature Relevance Thresholds
# Determines which POI features are significant enough to include in ML models
# Based on retail gravity model research and distance decay patterns
RELEVANCE_THRESHOLDS: Dict[str, Dict[str, float]] = {
"schools": {
"min_proximity_score": 0.5, # At least moderate proximity required
"max_distance_to_nearest_m": 500, # Must be within 500m
"min_count": 1 # At least 1 school
},
"offices": {
"min_proximity_score": 0.3,
"max_distance_to_nearest_m": 800,
"min_count": 2 # Offices are common; need multiple for impact
},
"gyms_sports": {
"min_proximity_score": 0.4,
"max_distance_to_nearest_m": 600,
"min_count": 1
},
"residential": {
"min_proximity_score": 1.0, # High threshold; residential is everywhere in cities
"max_distance_to_nearest_m": 400,
"min_count": 5 # Need significant residential density
},
"tourism": {
"min_proximity_score": 0.2, # Lower threshold; tourism is high-impact even at distance
"max_distance_to_nearest_m": 1000,
"min_count": 1
},
"competitors": {
"min_proximity_score": 0.1, # Any competition is relevant (even distant)
"max_distance_to_nearest_m": 1000,
"min_count": 1
},
"transport_hubs": {
"min_proximity_score": 0.4,
"max_distance_to_nearest_m": 800,
"min_count": 1
},
"coworking": {
"min_proximity_score": 0.3,
"max_distance_to_nearest_m": 600,
"min_count": 1
},
"retail": {
"min_proximity_score": 0.8, # Retail is common; higher bar for relevance
"max_distance_to_nearest_m": 500,
"min_count": 3
}
}
# Overpass API Configuration
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
OVERPASS_TIMEOUT_SECONDS = 30
OVERPASS_MAX_RETRIES = 4 # Increased from 3 to 4 for better resilience
OVERPASS_RETRY_DELAY_SECONDS = 2 # Base delay (will use exponential backoff)
# POI Cache Configuration
POI_CACHE_TTL_DAYS = 90 # Cache POI results for 90 days
POI_REFRESH_INTERVAL_DAYS = 180 # Refresh every 6 months
POI_COORDINATE_PRECISION = 4 # Decimal places for cache key (≈10m precision)
# Distance Bands for Feature Engineering (meters)
DISTANCE_BANDS = [
(0, 100), # Immediate proximity
(100, 300), # Primary catchment (walking distance)
(300, 500), # Secondary catchment
(500, 1000) # Tertiary catchment
]
# Competitive Pressure Zones
COMPETITOR_ZONES = {
"direct": {
"max_distance_m": 100,
"pressure_multiplier": -1.0 # Strong negative impact
},
"nearby": {
"max_distance_m": 500,
"pressure_multiplier": -0.5 # Moderate negative impact
},
"market": {
"max_distance_m": 1000,
"min_count_for_district": 5, # If 5+ bakeries = bakery district
"district_multiplier": 0.3, # Positive impact (destination area)
"normal_multiplier": -0.2 # Slight negative (competitive market)
}
}

View File

@@ -0,0 +1,16 @@
"""
Redis Client for POI Service
Provides access to shared Redis client for POI caching.
"""
from shared.redis_utils import get_redis_client as get_shared_redis_client
async def get_redis_client():
"""
Get Redis client for POI service.
Uses shared Redis infrastructure from shared utilities.
"""
return await get_shared_redis_client()