Files
bakery-ia/services/external/app/core/poi_config.py

182 lines
6.1 KiB
Python
Raw Normal View History

"""
POI Detection Configuration
Defines POI categories, search parameters, and relevance thresholds
for automated Point of Interest detection and feature engineering.
"""
from dataclasses import dataclass
from typing import Dict
@dataclass
class POICategory:
"""POI category definition with OSM query and ML parameters"""
name: str
osm_query: str
search_radius_m: int
weight: float # Importance weight for ML model (positive or negative)
description: str
# POI Category Definitions based on OpenStreetMap tags
# Research-based search radii and weights for bakery demand forecasting
POI_CATEGORIES: Dict[str, POICategory] = {
"schools": POICategory(
name="schools",
osm_query='["amenity"~"school|kindergarten|university|college"]',
search_radius_m=500,
weight=1.5, # High positive impact - morning drop-off rush
description="Educational institutions causing morning/afternoon rush patterns"
),
"offices": POICategory(
name="offices",
osm_query='["office"]',
search_radius_m=800,
weight=1.3, # Positive impact - weekday lunch/breakfast demand
description="Office buildings and business centers"
),
"gyms_sports": POICategory(
name="gyms_sports",
osm_query='["leisure"~"fitness_centre|sports_centre|stadium"]',
search_radius_m=600,
weight=0.8, # Moderate impact - morning/evening activity
description="Fitness centers and sports facilities"
),
"residential": POICategory(
name="residential",
osm_query='["building"~"residential|apartments|house"]',
search_radius_m=400,
weight=1.0, # Base demand from residents
description="Residential buildings and housing"
),
"tourism": POICategory(
name="tourism",
osm_query='["tourism"~"attraction|museum|hotel|hostel|guest_house"]',
search_radius_m=1000,
weight=1.2, # Positive impact - tourist foot traffic
description="Tourist attractions, hotels, and points of interest"
),
"competitors": POICategory(
name="competitors",
osm_query='["shop"~"bakery|pastry|confectionery"]',
search_radius_m=1000,
weight=-0.5, # Negative impact - competition pressure
description="Competing bakeries and pastry shops"
),
"transport_hubs": POICategory(
name="transport_hubs",
osm_query='["public_transport"~"station|stop"]["railway"~"station|subway_entrance|tram_stop"]',
search_radius_m=800,
weight=1.4, # High impact - commuter foot traffic
description="Public transport stations and hubs"
),
"coworking": POICategory(
name="coworking",
osm_query='["amenity"="coworking_space"]',
search_radius_m=600,
weight=1.1, # Moderate-high impact - flexible workers
description="Coworking spaces and shared offices"
),
"retail": POICategory(
name="retail",
osm_query='["shop"]',
search_radius_m=500,
weight=0.9, # Moderate impact - general foot traffic
description="Retail shops and commercial areas"
)
}
# Feature Relevance Thresholds
# Determines which POI features are significant enough to include in ML models
# Based on retail gravity model research and distance decay patterns
RELEVANCE_THRESHOLDS: Dict[str, Dict[str, float]] = {
"schools": {
"min_proximity_score": 0.5, # At least moderate proximity required
"max_distance_to_nearest_m": 500, # Must be within 500m
"min_count": 1 # At least 1 school
},
"offices": {
"min_proximity_score": 0.3,
"max_distance_to_nearest_m": 800,
"min_count": 2 # Offices are common; need multiple for impact
},
"gyms_sports": {
"min_proximity_score": 0.4,
"max_distance_to_nearest_m": 600,
"min_count": 1
},
"residential": {
"min_proximity_score": 1.0, # High threshold; residential is everywhere in cities
"max_distance_to_nearest_m": 400,
"min_count": 5 # Need significant residential density
},
"tourism": {
"min_proximity_score": 0.2, # Lower threshold; tourism is high-impact even at distance
"max_distance_to_nearest_m": 1000,
"min_count": 1
},
"competitors": {
"min_proximity_score": 0.1, # Any competition is relevant (even distant)
"max_distance_to_nearest_m": 1000,
"min_count": 1
},
"transport_hubs": {
"min_proximity_score": 0.4,
"max_distance_to_nearest_m": 800,
"min_count": 1
},
"coworking": {
"min_proximity_score": 0.3,
"max_distance_to_nearest_m": 600,
"min_count": 1
},
"retail": {
"min_proximity_score": 0.8, # Retail is common; higher bar for relevance
"max_distance_to_nearest_m": 500,
"min_count": 3
}
}
# Overpass API Configuration
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
OVERPASS_TIMEOUT_SECONDS = 30
OVERPASS_MAX_RETRIES = 4 # Increased from 3 to 4 for better resilience
OVERPASS_RETRY_DELAY_SECONDS = 2 # Base delay (will use exponential backoff)
# POI Cache Configuration
POI_CACHE_TTL_DAYS = 90 # Cache POI results for 90 days
POI_REFRESH_INTERVAL_DAYS = 180 # Refresh every 6 months
POI_COORDINATE_PRECISION = 4 # Decimal places for cache key (≈10m precision)
# Distance Bands for Feature Engineering (meters)
DISTANCE_BANDS = [
(0, 100), # Immediate proximity
(100, 300), # Primary catchment (walking distance)
(300, 500), # Secondary catchment
(500, 1000) # Tertiary catchment
]
# Competitive Pressure Zones
COMPETITOR_ZONES = {
"direct": {
"max_distance_m": 100,
"pressure_multiplier": -1.0 # Strong negative impact
},
"nearby": {
"max_distance_m": 500,
"pressure_multiplier": -0.5 # Moderate negative impact
},
"market": {
"max_distance_m": 1000,
"min_count_for_district": 5, # If 5+ bakeries = bakery district
"district_multiplier": 0.3, # Positive impact (destination area)
"normal_multiplier": -0.2 # Slight negative (competitive market)
}
}