182 lines
6.1 KiB
Python
182 lines
6.1 KiB
Python
"""
|
|
POI Detection Configuration
|
|
|
|
Defines POI categories, search parameters, and relevance thresholds
|
|
for automated Point of Interest detection and feature engineering.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Dict
|
|
|
|
|
|
@dataclass
|
|
class POICategory:
|
|
"""POI category definition with OSM query and ML parameters"""
|
|
name: str
|
|
osm_query: str
|
|
search_radius_m: int
|
|
weight: float # Importance weight for ML model (positive or negative)
|
|
description: str
|
|
|
|
|
|
# POI Category Definitions based on OpenStreetMap tags
|
|
# Research-based search radii and weights for bakery demand forecasting
|
|
POI_CATEGORIES: Dict[str, POICategory] = {
|
|
"schools": POICategory(
|
|
name="schools",
|
|
osm_query='["amenity"~"school|kindergarten|university|college"]',
|
|
search_radius_m=500,
|
|
weight=1.5, # High positive impact - morning drop-off rush
|
|
description="Educational institutions causing morning/afternoon rush patterns"
|
|
),
|
|
"offices": POICategory(
|
|
name="offices",
|
|
osm_query='["office"]',
|
|
search_radius_m=800,
|
|
weight=1.3, # Positive impact - weekday lunch/breakfast demand
|
|
description="Office buildings and business centers"
|
|
),
|
|
"gyms_sports": POICategory(
|
|
name="gyms_sports",
|
|
osm_query='["leisure"~"fitness_centre|sports_centre|stadium"]',
|
|
search_radius_m=600,
|
|
weight=0.8, # Moderate impact - morning/evening activity
|
|
description="Fitness centers and sports facilities"
|
|
),
|
|
"residential": POICategory(
|
|
name="residential",
|
|
osm_query='["building"~"residential|apartments|house"]',
|
|
search_radius_m=400,
|
|
weight=1.0, # Base demand from residents
|
|
description="Residential buildings and housing"
|
|
),
|
|
"tourism": POICategory(
|
|
name="tourism",
|
|
osm_query='["tourism"~"attraction|museum|hotel|hostel|guest_house"]',
|
|
search_radius_m=1000,
|
|
weight=1.2, # Positive impact - tourist foot traffic
|
|
description="Tourist attractions, hotels, and points of interest"
|
|
),
|
|
"competitors": POICategory(
|
|
name="competitors",
|
|
osm_query='["shop"~"bakery|pastry|confectionery"]',
|
|
search_radius_m=1000,
|
|
weight=-0.5, # Negative impact - competition pressure
|
|
description="Competing bakeries and pastry shops"
|
|
),
|
|
"transport_hubs": POICategory(
|
|
name="transport_hubs",
|
|
osm_query='["public_transport"~"station|stop"]["railway"~"station|subway_entrance|tram_stop"]',
|
|
search_radius_m=800,
|
|
weight=1.4, # High impact - commuter foot traffic
|
|
description="Public transport stations and hubs"
|
|
),
|
|
"coworking": POICategory(
|
|
name="coworking",
|
|
osm_query='["amenity"="coworking_space"]',
|
|
search_radius_m=600,
|
|
weight=1.1, # Moderate-high impact - flexible workers
|
|
description="Coworking spaces and shared offices"
|
|
),
|
|
"retail": POICategory(
|
|
name="retail",
|
|
osm_query='["shop"]',
|
|
search_radius_m=500,
|
|
weight=0.9, # Moderate impact - general foot traffic
|
|
description="Retail shops and commercial areas"
|
|
)
|
|
}
|
|
|
|
|
|
# Feature Relevance Thresholds
|
|
# Determines which POI features are significant enough to include in ML models
|
|
# Based on retail gravity model research and distance decay patterns
|
|
RELEVANCE_THRESHOLDS: Dict[str, Dict[str, float]] = {
|
|
"schools": {
|
|
"min_proximity_score": 0.5, # At least moderate proximity required
|
|
"max_distance_to_nearest_m": 500, # Must be within 500m
|
|
"min_count": 1 # At least 1 school
|
|
},
|
|
"offices": {
|
|
"min_proximity_score": 0.3,
|
|
"max_distance_to_nearest_m": 800,
|
|
"min_count": 2 # Offices are common; need multiple for impact
|
|
},
|
|
"gyms_sports": {
|
|
"min_proximity_score": 0.4,
|
|
"max_distance_to_nearest_m": 600,
|
|
"min_count": 1
|
|
},
|
|
"residential": {
|
|
"min_proximity_score": 1.0, # High threshold; residential is everywhere in cities
|
|
"max_distance_to_nearest_m": 400,
|
|
"min_count": 5 # Need significant residential density
|
|
},
|
|
"tourism": {
|
|
"min_proximity_score": 0.2, # Lower threshold; tourism is high-impact even at distance
|
|
"max_distance_to_nearest_m": 1000,
|
|
"min_count": 1
|
|
},
|
|
"competitors": {
|
|
"min_proximity_score": 0.1, # Any competition is relevant (even distant)
|
|
"max_distance_to_nearest_m": 1000,
|
|
"min_count": 1
|
|
},
|
|
"transport_hubs": {
|
|
"min_proximity_score": 0.4,
|
|
"max_distance_to_nearest_m": 800,
|
|
"min_count": 1
|
|
},
|
|
"coworking": {
|
|
"min_proximity_score": 0.3,
|
|
"max_distance_to_nearest_m": 600,
|
|
"min_count": 1
|
|
},
|
|
"retail": {
|
|
"min_proximity_score": 0.8, # Retail is common; higher bar for relevance
|
|
"max_distance_to_nearest_m": 500,
|
|
"min_count": 3
|
|
}
|
|
}
|
|
|
|
|
|
# Overpass API Configuration
|
|
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
|
|
OVERPASS_TIMEOUT_SECONDS = 30
|
|
OVERPASS_MAX_RETRIES = 4 # Increased from 3 to 4 for better resilience
|
|
OVERPASS_RETRY_DELAY_SECONDS = 2 # Base delay (will use exponential backoff)
|
|
|
|
|
|
# POI Cache Configuration
|
|
POI_CACHE_TTL_DAYS = 90 # Cache POI results for 90 days
|
|
POI_REFRESH_INTERVAL_DAYS = 180 # Refresh every 6 months
|
|
POI_COORDINATE_PRECISION = 4 # Decimal places for cache key (≈10m precision)
|
|
|
|
|
|
# Distance Bands for Feature Engineering (meters)
|
|
DISTANCE_BANDS = [
|
|
(0, 100), # Immediate proximity
|
|
(100, 300), # Primary catchment (walking distance)
|
|
(300, 500), # Secondary catchment
|
|
(500, 1000) # Tertiary catchment
|
|
]
|
|
|
|
|
|
# Competitive Pressure Zones
|
|
COMPETITOR_ZONES = {
|
|
"direct": {
|
|
"max_distance_m": 100,
|
|
"pressure_multiplier": -1.0 # Strong negative impact
|
|
},
|
|
"nearby": {
|
|
"max_distance_m": 500,
|
|
"pressure_multiplier": -0.5 # Moderate negative impact
|
|
},
|
|
"market": {
|
|
"max_distance_m": 1000,
|
|
"min_count_for_district": 5, # If 5+ bakeries = bakery district
|
|
"district_multiplier": 0.3, # Positive impact (destination area)
|
|
"normal_multiplier": -0.2 # Slight negative (competitive market)
|
|
}
|
|
}
|