Initial commit - production deployment
This commit is contained in:
1
services/external/app/core/__init__.py
vendored
Normal file
1
services/external/app/core/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
# services/external/app/core/__init__.py
|
||||
77
services/external/app/core/config.py
vendored
Normal file
77
services/external/app/core/config.py
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
# services/external/app/core/config.py
|
||||
|
||||
from shared.config.base import BaseServiceSettings
|
||||
import os
|
||||
from pydantic import Field
|
||||
|
||||
class DataSettings(BaseServiceSettings):
|
||||
"""Data service specific settings"""
|
||||
|
||||
# Service Identity
|
||||
SERVICE_NAME: str = "external-service"
|
||||
VERSION: str = "1.0.0"
|
||||
APP_NAME: str = "Bakery External Data Service"
|
||||
DESCRIPTION: str = "External data collection service for weather and traffic data"
|
||||
|
||||
# API Configuration
|
||||
API_V1_STR: str = "/api/v1"
|
||||
|
||||
# Database configuration (secure approach - build from components)
|
||||
@property
|
||||
def DATABASE_URL(self) -> str:
|
||||
"""Build database URL from secure components"""
|
||||
# Try complete URL first (for backward compatibility)
|
||||
complete_url = os.getenv("EXTERNAL_DATABASE_URL")
|
||||
if complete_url:
|
||||
return complete_url
|
||||
|
||||
# Build from components (secure approach)
|
||||
user = os.getenv("EXTERNAL_DB_USER", "external_user")
|
||||
password = os.getenv("EXTERNAL_DB_PASSWORD", "external_pass123")
|
||||
host = os.getenv("EXTERNAL_DB_HOST", "localhost")
|
||||
port = os.getenv("EXTERNAL_DB_PORT", "5432")
|
||||
name = os.getenv("EXTERNAL_DB_NAME", "external_db")
|
||||
|
||||
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
|
||||
|
||||
# External API Configuration
|
||||
AEMET_API_KEY: str = os.getenv("AEMET_API_KEY", "")
|
||||
AEMET_BASE_URL: str = "https://opendata.aemet.es/opendata"
|
||||
AEMET_TIMEOUT: int = int(os.getenv("AEMET_TIMEOUT", "90")) # Increased for unstable API
|
||||
AEMET_RETRY_ATTEMPTS: int = int(os.getenv("AEMET_RETRY_ATTEMPTS", "5")) # More retries for connection issues
|
||||
AEMET_ENABLED: bool = os.getenv("AEMET_ENABLED", "true").lower() == "true" # Allow disabling AEMET
|
||||
|
||||
MADRID_OPENDATA_API_KEY: str = os.getenv("MADRID_OPENDATA_API_KEY", "")
|
||||
MADRID_OPENDATA_BASE_URL: str = "https://datos.madrid.es"
|
||||
MADRID_OPENDATA_TIMEOUT: int = int(os.getenv("MADRID_OPENDATA_TIMEOUT", "30"))
|
||||
|
||||
# Data Collection Configuration
|
||||
WEATHER_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("WEATHER_COLLECTION_INTERVAL_HOURS", "1"))
|
||||
TRAFFIC_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("TRAFFIC_COLLECTION_INTERVAL_HOURS", "1"))
|
||||
EVENTS_COLLECTION_INTERVAL_HOURS: int = int(os.getenv("EVENTS_COLLECTION_INTERVAL_HOURS", "6"))
|
||||
|
||||
# Cache TTL Configuration
|
||||
WEATHER_CACHE_TTL_HOURS: int = int(os.getenv("WEATHER_CACHE_TTL_HOURS", "1"))
|
||||
TRAFFIC_CACHE_TTL_HOURS: int = int(os.getenv("TRAFFIC_CACHE_TTL_HOURS", "1"))
|
||||
EVENTS_CACHE_TTL_HOURS: int = int(os.getenv("EVENTS_CACHE_TTL_HOURS", "6"))
|
||||
|
||||
# Data Quality Configuration
|
||||
DATA_VALIDATION_ENABLED: bool = os.getenv("DATA_VALIDATION_ENABLED", "true").lower() == "true"
|
||||
OUTLIER_DETECTION_ENABLED: bool = os.getenv("OUTLIER_DETECTION_ENABLED", "true").lower() == "true"
|
||||
DATA_COMPLETENESS_THRESHOLD: float = float(os.getenv("DATA_COMPLETENESS_THRESHOLD", "0.8"))
|
||||
|
||||
# Geolocation Settings (Madrid focus)
|
||||
DEFAULT_LATITUDE: float = float(os.getenv("DEFAULT_LATITUDE", "40.4168")) # Madrid
|
||||
DEFAULT_LONGITUDE: float = float(os.getenv("DEFAULT_LONGITUDE", "-3.7038")) # Madrid
|
||||
LOCATION_RADIUS_KM: float = float(os.getenv("LOCATION_RADIUS_KM", "50.0"))
|
||||
|
||||
# Data Retention
|
||||
RAW_DATA_RETENTION_DAYS: int = int(os.getenv("RAW_DATA_RETENTION_DAYS", "90"))
|
||||
PROCESSED_DATA_RETENTION_DAYS: int = int(os.getenv("PROCESSED_DATA_RETENTION_DAYS", "365"))
|
||||
|
||||
# Batch Processing
|
||||
BATCH_PROCESSING_ENABLED: bool = os.getenv("BATCH_PROCESSING_ENABLED", "true").lower() == "true"
|
||||
BATCH_SIZE: int = int(os.getenv("BATCH_SIZE", "1000"))
|
||||
PARALLEL_PROCESSING_WORKERS: int = int(os.getenv("PARALLEL_PROCESSING_WORKERS", "4"))
|
||||
|
||||
settings = DataSettings()
|
||||
81
services/external/app/core/database.py
vendored
Normal file
81
services/external/app/core/database.py
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
# services/external/app/core/database.py
|
||||
"""
|
||||
External Service Database Configuration using shared database manager
|
||||
"""
|
||||
|
||||
import structlog
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from app.core.config import settings
|
||||
from shared.database.base import DatabaseManager, Base
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Create database manager instance
|
||||
database_manager = DatabaseManager(
|
||||
database_url=settings.DATABASE_URL,
|
||||
service_name="external-service"
|
||||
)
|
||||
|
||||
async def get_db():
|
||||
"""
|
||||
Database dependency for FastAPI - using shared database manager
|
||||
"""
|
||||
async for session in database_manager.get_db():
|
||||
yield session
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""Initialize database tables using shared database manager"""
|
||||
try:
|
||||
logger.info("Initializing External Service database...")
|
||||
|
||||
# Import all models to ensure they're registered
|
||||
from app.models import weather, traffic # noqa: F401
|
||||
|
||||
# Create all tables using database manager
|
||||
await database_manager.create_tables(Base.metadata)
|
||||
|
||||
logger.info("External Service database initialized successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize database", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
async def close_db():
|
||||
"""Close database connections using shared database manager"""
|
||||
try:
|
||||
await database_manager.close_connections()
|
||||
logger.info("Database connections closed")
|
||||
except Exception as e:
|
||||
logger.error("Error closing database connections", error=str(e))
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_db_transaction():
|
||||
"""
|
||||
Context manager for database transactions using shared database manager
|
||||
"""
|
||||
async with database_manager.get_session() as session:
|
||||
try:
|
||||
async with session.begin():
|
||||
yield session
|
||||
except Exception as e:
|
||||
logger.error("Transaction error", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def get_background_session():
|
||||
"""
|
||||
Context manager for background tasks using shared database manager
|
||||
"""
|
||||
async with database_manager.get_background_session() as session:
|
||||
yield session
|
||||
|
||||
|
||||
async def health_check():
|
||||
"""Database health check using shared database manager"""
|
||||
return await database_manager.health_check()
|
||||
181
services/external/app/core/poi_config.py
vendored
Normal file
181
services/external/app/core/poi_config.py
vendored
Normal file
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
POI Detection Configuration
|
||||
|
||||
Defines POI categories, search parameters, and relevance thresholds
|
||||
for automated Point of Interest detection and feature engineering.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class POICategory:
|
||||
"""POI category definition with OSM query and ML parameters"""
|
||||
name: str
|
||||
osm_query: str
|
||||
search_radius_m: int
|
||||
weight: float # Importance weight for ML model (positive or negative)
|
||||
description: str
|
||||
|
||||
|
||||
# POI Category Definitions based on OpenStreetMap tags
|
||||
# Research-based search radii and weights for bakery demand forecasting
|
||||
POI_CATEGORIES: Dict[str, POICategory] = {
|
||||
"schools": POICategory(
|
||||
name="schools",
|
||||
osm_query='["amenity"~"school|kindergarten|university|college"]',
|
||||
search_radius_m=500,
|
||||
weight=1.5, # High positive impact - morning drop-off rush
|
||||
description="Educational institutions causing morning/afternoon rush patterns"
|
||||
),
|
||||
"offices": POICategory(
|
||||
name="offices",
|
||||
osm_query='["office"]',
|
||||
search_radius_m=800,
|
||||
weight=1.3, # Positive impact - weekday lunch/breakfast demand
|
||||
description="Office buildings and business centers"
|
||||
),
|
||||
"gyms_sports": POICategory(
|
||||
name="gyms_sports",
|
||||
osm_query='["leisure"~"fitness_centre|sports_centre|stadium"]',
|
||||
search_radius_m=600,
|
||||
weight=0.8, # Moderate impact - morning/evening activity
|
||||
description="Fitness centers and sports facilities"
|
||||
),
|
||||
"residential": POICategory(
|
||||
name="residential",
|
||||
osm_query='["building"~"residential|apartments|house"]',
|
||||
search_radius_m=400,
|
||||
weight=1.0, # Base demand from residents
|
||||
description="Residential buildings and housing"
|
||||
),
|
||||
"tourism": POICategory(
|
||||
name="tourism",
|
||||
osm_query='["tourism"~"attraction|museum|hotel|hostel|guest_house"]',
|
||||
search_radius_m=1000,
|
||||
weight=1.2, # Positive impact - tourist foot traffic
|
||||
description="Tourist attractions, hotels, and points of interest"
|
||||
),
|
||||
"competitors": POICategory(
|
||||
name="competitors",
|
||||
osm_query='["shop"~"bakery|pastry|confectionery"]',
|
||||
search_radius_m=1000,
|
||||
weight=-0.5, # Negative impact - competition pressure
|
||||
description="Competing bakeries and pastry shops"
|
||||
),
|
||||
"transport_hubs": POICategory(
|
||||
name="transport_hubs",
|
||||
osm_query='["public_transport"~"station|stop"]["railway"~"station|subway_entrance|tram_stop"]',
|
||||
search_radius_m=800,
|
||||
weight=1.4, # High impact - commuter foot traffic
|
||||
description="Public transport stations and hubs"
|
||||
),
|
||||
"coworking": POICategory(
|
||||
name="coworking",
|
||||
osm_query='["amenity"="coworking_space"]',
|
||||
search_radius_m=600,
|
||||
weight=1.1, # Moderate-high impact - flexible workers
|
||||
description="Coworking spaces and shared offices"
|
||||
),
|
||||
"retail": POICategory(
|
||||
name="retail",
|
||||
osm_query='["shop"]',
|
||||
search_radius_m=500,
|
||||
weight=0.9, # Moderate impact - general foot traffic
|
||||
description="Retail shops and commercial areas"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# Feature Relevance Thresholds
|
||||
# Determines which POI features are significant enough to include in ML models
|
||||
# Based on retail gravity model research and distance decay patterns
|
||||
RELEVANCE_THRESHOLDS: Dict[str, Dict[str, float]] = {
|
||||
"schools": {
|
||||
"min_proximity_score": 0.5, # At least moderate proximity required
|
||||
"max_distance_to_nearest_m": 500, # Must be within 500m
|
||||
"min_count": 1 # At least 1 school
|
||||
},
|
||||
"offices": {
|
||||
"min_proximity_score": 0.3,
|
||||
"max_distance_to_nearest_m": 800,
|
||||
"min_count": 2 # Offices are common; need multiple for impact
|
||||
},
|
||||
"gyms_sports": {
|
||||
"min_proximity_score": 0.4,
|
||||
"max_distance_to_nearest_m": 600,
|
||||
"min_count": 1
|
||||
},
|
||||
"residential": {
|
||||
"min_proximity_score": 1.0, # High threshold; residential is everywhere in cities
|
||||
"max_distance_to_nearest_m": 400,
|
||||
"min_count": 5 # Need significant residential density
|
||||
},
|
||||
"tourism": {
|
||||
"min_proximity_score": 0.2, # Lower threshold; tourism is high-impact even at distance
|
||||
"max_distance_to_nearest_m": 1000,
|
||||
"min_count": 1
|
||||
},
|
||||
"competitors": {
|
||||
"min_proximity_score": 0.1, # Any competition is relevant (even distant)
|
||||
"max_distance_to_nearest_m": 1000,
|
||||
"min_count": 1
|
||||
},
|
||||
"transport_hubs": {
|
||||
"min_proximity_score": 0.4,
|
||||
"max_distance_to_nearest_m": 800,
|
||||
"min_count": 1
|
||||
},
|
||||
"coworking": {
|
||||
"min_proximity_score": 0.3,
|
||||
"max_distance_to_nearest_m": 600,
|
||||
"min_count": 1
|
||||
},
|
||||
"retail": {
|
||||
"min_proximity_score": 0.8, # Retail is common; higher bar for relevance
|
||||
"max_distance_to_nearest_m": 500,
|
||||
"min_count": 3
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Overpass API Configuration
|
||||
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
|
||||
OVERPASS_TIMEOUT_SECONDS = 30
|
||||
OVERPASS_MAX_RETRIES = 4 # Increased from 3 to 4 for better resilience
|
||||
OVERPASS_RETRY_DELAY_SECONDS = 2 # Base delay (will use exponential backoff)
|
||||
|
||||
|
||||
# POI Cache Configuration
|
||||
POI_CACHE_TTL_DAYS = 90 # Cache POI results for 90 days
|
||||
POI_REFRESH_INTERVAL_DAYS = 180 # Refresh every 6 months
|
||||
POI_COORDINATE_PRECISION = 4 # Decimal places for cache key (≈10m precision)
|
||||
|
||||
|
||||
# Distance Bands for Feature Engineering (meters)
|
||||
DISTANCE_BANDS = [
|
||||
(0, 100), # Immediate proximity
|
||||
(100, 300), # Primary catchment (walking distance)
|
||||
(300, 500), # Secondary catchment
|
||||
(500, 1000) # Tertiary catchment
|
||||
]
|
||||
|
||||
|
||||
# Competitive Pressure Zones
|
||||
COMPETITOR_ZONES = {
|
||||
"direct": {
|
||||
"max_distance_m": 100,
|
||||
"pressure_multiplier": -1.0 # Strong negative impact
|
||||
},
|
||||
"nearby": {
|
||||
"max_distance_m": 500,
|
||||
"pressure_multiplier": -0.5 # Moderate negative impact
|
||||
},
|
||||
"market": {
|
||||
"max_distance_m": 1000,
|
||||
"min_count_for_district": 5, # If 5+ bakeries = bakery district
|
||||
"district_multiplier": 0.3, # Positive impact (destination area)
|
||||
"normal_multiplier": -0.2 # Slight negative (competitive market)
|
||||
}
|
||||
}
|
||||
16
services/external/app/core/redis_client.py
vendored
Normal file
16
services/external/app/core/redis_client.py
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Redis Client for POI Service
|
||||
|
||||
Provides access to shared Redis client for POI caching.
|
||||
"""
|
||||
|
||||
from shared.redis_utils import get_redis_client as get_shared_redis_client
|
||||
|
||||
|
||||
async def get_redis_client():
|
||||
"""
|
||||
Get Redis client for POI service.
|
||||
|
||||
Uses shared Redis infrastructure from shared utilities.
|
||||
"""
|
||||
return await get_shared_redis_client()
|
||||
Reference in New Issue
Block a user