467 lines
16 KiB
Python
467 lines
16 KiB
Python
"""
|
|
POI Detection Service
|
|
|
|
Automated Point of Interest detection using Overpass API (OpenStreetMap).
|
|
Detects nearby POIs around bakery locations and generates ML features
|
|
for location-based demand forecasting.
|
|
"""
|
|
|
|
import overpy
|
|
from typing import List, Dict, Any, Tuple, Optional
|
|
from datetime import datetime, timezone, timedelta
|
|
import asyncio
|
|
import structlog
|
|
import httpx
|
|
from math import radians, sin, cos, sqrt, atan2
|
|
import random
|
|
|
|
from app.core.poi_config import (
|
|
POI_CATEGORIES,
|
|
OVERPASS_API_URL,
|
|
OVERPASS_TIMEOUT_SECONDS,
|
|
OVERPASS_MAX_RETRIES,
|
|
OVERPASS_RETRY_DELAY_SECONDS,
|
|
DISTANCE_BANDS
|
|
)
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class POIDetectionService:
|
|
"""
|
|
Automated POI detection using Overpass API (OpenStreetMap).
|
|
|
|
Detects points of interest near bakery locations and calculates
|
|
ML features for demand forecasting with location-specific context.
|
|
"""
|
|
|
|
def __init__(self, overpass_url: str = OVERPASS_API_URL):
|
|
self.overpass_url = overpass_url
|
|
self.api = overpy.Overpass(url=overpass_url)
|
|
self.timeout = OVERPASS_TIMEOUT_SECONDS
|
|
|
|
async def detect_pois_for_bakery(
|
|
self,
|
|
latitude: float,
|
|
longitude: float,
|
|
tenant_id: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Detect all POIs around a bakery location.
|
|
|
|
Args:
|
|
latitude: Bakery latitude
|
|
longitude: Bakery longitude
|
|
tenant_id: Tenant identifier for logging
|
|
|
|
Returns:
|
|
Complete POI detection results with ML features
|
|
"""
|
|
logger.info(
|
|
"Starting POI detection",
|
|
tenant_id=tenant_id,
|
|
location=(latitude, longitude)
|
|
)
|
|
|
|
poi_results = {}
|
|
detection_errors = []
|
|
|
|
# Query each POI category with inter-query delays
|
|
category_items = list(POI_CATEGORIES.items())
|
|
for idx, (category_key, category) in enumerate(category_items):
|
|
try:
|
|
pois = await self._query_pois_with_retry(
|
|
latitude,
|
|
longitude,
|
|
category.osm_query,
|
|
category.search_radius_m,
|
|
category_key
|
|
)
|
|
|
|
# Calculate features for this category
|
|
features = self._calculate_poi_features(
|
|
pois,
|
|
(latitude, longitude),
|
|
category
|
|
)
|
|
|
|
poi_results[category_key] = {
|
|
"pois": pois,
|
|
"features": features,
|
|
"count": len(pois)
|
|
}
|
|
|
|
logger.info(
|
|
f"Detected {category_key}",
|
|
count=len(pois),
|
|
proximity_score=features["proximity_score"]
|
|
)
|
|
|
|
# Add delay between categories to respect rate limits
|
|
# (except after the last category)
|
|
if idx < len(category_items) - 1:
|
|
inter_query_delay = 2.0 + random.uniform(0.5, 1.5)
|
|
await asyncio.sleep(inter_query_delay)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Failed to detect {category_key}",
|
|
error=str(e),
|
|
tenant_id=tenant_id
|
|
)
|
|
detection_errors.append({
|
|
"category": category_key,
|
|
"error": str(e)
|
|
})
|
|
poi_results[category_key] = {
|
|
"pois": [],
|
|
"features": self._get_empty_features(),
|
|
"count": 0,
|
|
"error": str(e)
|
|
}
|
|
|
|
# Add a longer delay after an error before continuing
|
|
if idx < len(category_items) - 1:
|
|
error_recovery_delay = 3.0 + random.uniform(1.0, 2.0)
|
|
await asyncio.sleep(error_recovery_delay)
|
|
|
|
# Generate combined ML features
|
|
ml_features = self._generate_ml_features(poi_results)
|
|
|
|
# Generate summary
|
|
summary = self._generate_summary(poi_results)
|
|
|
|
detection_status = "completed" if not detection_errors else (
|
|
"partial" if len(detection_errors) < len(POI_CATEGORIES) else "failed"
|
|
)
|
|
|
|
return {
|
|
"tenant_id": tenant_id,
|
|
"location": {"latitude": latitude, "longitude": longitude},
|
|
"detection_timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"detection_status": detection_status,
|
|
"detection_errors": detection_errors if detection_errors else None,
|
|
"poi_categories": poi_results,
|
|
"ml_features": ml_features,
|
|
"summary": summary
|
|
}
|
|
|
|
async def _query_pois_with_retry(
|
|
self,
|
|
latitude: float,
|
|
longitude: float,
|
|
osm_query: str,
|
|
radius_m: int,
|
|
category_key: str
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Query Overpass API with exponential backoff retry logic.
|
|
|
|
Implements:
|
|
- Exponential backoff with jitter
|
|
- Extended delays for rate limiting errors
|
|
- Proper error type detection
|
|
"""
|
|
last_error = None
|
|
base_delay = OVERPASS_RETRY_DELAY_SECONDS
|
|
|
|
for attempt in range(OVERPASS_MAX_RETRIES):
|
|
try:
|
|
return await self._query_pois(
|
|
latitude, longitude, osm_query, radius_m
|
|
)
|
|
except Exception as e:
|
|
last_error = e
|
|
error_message = str(e).lower()
|
|
|
|
# Determine if this is a rate limiting error
|
|
is_rate_limit = any(phrase in error_message for phrase in [
|
|
'too many requests',
|
|
'rate limit',
|
|
'server load too high',
|
|
'quota exceeded',
|
|
'retry later',
|
|
'429',
|
|
'503',
|
|
'504'
|
|
])
|
|
|
|
if attempt < OVERPASS_MAX_RETRIES - 1:
|
|
# Calculate exponential backoff with jitter
|
|
# For rate limiting: use longer delays (10-30 seconds)
|
|
# For other errors: use standard backoff (2-8 seconds)
|
|
if is_rate_limit:
|
|
delay = base_delay * (3 ** attempt) + random.uniform(1, 5)
|
|
delay = min(delay, 30) # Cap at 30 seconds
|
|
else:
|
|
delay = base_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
|
|
delay = min(delay, 10) # Cap at 10 seconds
|
|
|
|
logger.warning(
|
|
f"POI query retry {attempt + 1}/{OVERPASS_MAX_RETRIES}",
|
|
category=category_key,
|
|
error=str(e),
|
|
is_rate_limit=is_rate_limit,
|
|
retry_delay=f"{delay:.1f}s"
|
|
)
|
|
await asyncio.sleep(delay)
|
|
else:
|
|
logger.error(
|
|
"POI query failed after all retries",
|
|
category=category_key,
|
|
error=str(e),
|
|
is_rate_limit=is_rate_limit
|
|
)
|
|
|
|
raise last_error
|
|
|
|
async def _query_pois(
|
|
self,
|
|
latitude: float,
|
|
longitude: float,
|
|
osm_query: str,
|
|
radius_m: int
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Query Overpass API for POIs in radius.
|
|
|
|
Raises:
|
|
Exception: With descriptive error message from Overpass API
|
|
"""
|
|
|
|
# Build Overpass QL query
|
|
query = f"""
|
|
[out:json][timeout:{self.timeout}];
|
|
(
|
|
node{osm_query}(around:{radius_m},{latitude},{longitude});
|
|
way{osm_query}(around:{radius_m},{latitude},{longitude});
|
|
);
|
|
out center;
|
|
"""
|
|
|
|
# Execute query (use asyncio thread pool for blocking overpy)
|
|
loop = asyncio.get_event_loop()
|
|
try:
|
|
result = await loop.run_in_executor(
|
|
None,
|
|
self.api.query,
|
|
query
|
|
)
|
|
except overpy.exception.OverpassTooManyRequests as e:
|
|
# Explicitly handle rate limiting
|
|
raise Exception("Too many requests - Overpass API rate limit exceeded") from e
|
|
except overpy.exception.OverpassGatewayTimeout as e:
|
|
# Query took too long
|
|
raise Exception("Gateway timeout - query too complex or server busy") from e
|
|
except overpy.exception.OverpassBadRequest as e:
|
|
# Query syntax error
|
|
raise Exception(f"Bad request - invalid query syntax: {str(e)}") from e
|
|
except Exception as e:
|
|
# Check if it's an HTTP error with status code
|
|
error_msg = str(e).lower()
|
|
if '429' in error_msg or 'too many' in error_msg:
|
|
raise Exception("Too many requests - rate limit exceeded") from e
|
|
elif '503' in error_msg or 'load too high' in error_msg:
|
|
raise Exception("Server load too high - Overpass API overloaded") from e
|
|
elif '504' in error_msg or 'timeout' in error_msg:
|
|
raise Exception("Gateway timeout - server busy") from e
|
|
else:
|
|
# Re-raise with original message
|
|
raise
|
|
|
|
# Parse results
|
|
pois = []
|
|
|
|
# Process nodes
|
|
for node in result.nodes:
|
|
pois.append({
|
|
"osm_id": str(node.id),
|
|
"type": "node",
|
|
"lat": float(node.lat),
|
|
"lon": float(node.lon),
|
|
"tags": dict(node.tags),
|
|
"name": node.tags.get("name", "Unnamed")
|
|
})
|
|
|
|
# Process ways (buildings, areas)
|
|
for way in result.ways:
|
|
# Get center point
|
|
if hasattr(way, 'center_lat') and way.center_lat:
|
|
lat, lon = float(way.center_lat), float(way.center_lon)
|
|
else:
|
|
# Calculate centroid from nodes
|
|
if way.nodes:
|
|
lats = [float(node.lat) for node in way.nodes]
|
|
lons = [float(node.lon) for node in way.nodes]
|
|
lat = sum(lats) / len(lats)
|
|
lon = sum(lons) / len(lons)
|
|
else:
|
|
continue
|
|
|
|
pois.append({
|
|
"osm_id": str(way.id),
|
|
"type": "way",
|
|
"lat": lat,
|
|
"lon": lon,
|
|
"tags": dict(way.tags),
|
|
"name": way.tags.get("name", "Unnamed")
|
|
})
|
|
|
|
return pois
|
|
|
|
def _calculate_poi_features(
|
|
self,
|
|
pois: List[Dict[str, Any]],
|
|
bakery_location: Tuple[float, float],
|
|
category
|
|
) -> Dict[str, float]:
|
|
"""Calculate ML features for POI category"""
|
|
|
|
if not pois:
|
|
return self._get_empty_features()
|
|
|
|
# Calculate distances
|
|
distances = []
|
|
for poi in pois:
|
|
dist_km = self._haversine_distance(
|
|
bakery_location,
|
|
(poi["lat"], poi["lon"])
|
|
)
|
|
distances.append(dist_km * 1000) # Convert to meters
|
|
|
|
# Feature Tier 1: Proximity Scores (PRIMARY)
|
|
proximity_score = sum(1.0 / (1.0 + d/1000) for d in distances)
|
|
weighted_proximity_score = proximity_score * category.weight
|
|
|
|
# Feature Tier 2: Distance Band Counts
|
|
count_0_100m = sum(1 for d in distances if d <= 100)
|
|
count_100_300m = sum(1 for d in distances if 100 < d <= 300)
|
|
count_300_500m = sum(1 for d in distances if 300 < d <= 500)
|
|
count_500_1000m = sum(1 for d in distances if 500 < d <= 1000)
|
|
|
|
# Feature Tier 3: Distance to Nearest
|
|
distance_to_nearest_m = min(distances) if distances else 9999.0
|
|
|
|
# Feature Tier 4: Binary Flags
|
|
has_within_100m = any(d <= 100 for d in distances)
|
|
has_within_300m = any(d <= 300 for d in distances)
|
|
has_within_500m = any(d <= 500 for d in distances)
|
|
|
|
return {
|
|
# Tier 1: Proximity scores (PRIMARY for ML)
|
|
"proximity_score": round(proximity_score, 4),
|
|
"weighted_proximity_score": round(weighted_proximity_score, 4),
|
|
|
|
# Tier 2: Distance bands
|
|
"count_0_100m": count_0_100m,
|
|
"count_100_300m": count_100_300m,
|
|
"count_300_500m": count_300_500m,
|
|
"count_500_1000m": count_500_1000m,
|
|
"total_count": len(pois),
|
|
|
|
# Tier 3: Distance to nearest
|
|
"distance_to_nearest_m": round(distance_to_nearest_m, 1),
|
|
|
|
# Tier 4: Binary flags
|
|
"has_within_100m": has_within_100m,
|
|
"has_within_300m": has_within_300m,
|
|
"has_within_500m": has_within_500m
|
|
}
|
|
|
|
def _generate_ml_features(self, poi_results: Dict[str, Any]) -> Dict[str, float]:
|
|
"""
|
|
Generate flat feature dictionary for ML model ingestion.
|
|
|
|
These features will be added to Prophet/XGBoost as regressors.
|
|
"""
|
|
ml_features = {}
|
|
|
|
for category_key, data in poi_results.items():
|
|
features = data.get("features", {})
|
|
|
|
# Flatten with category prefix
|
|
for feature_name, value in features.items():
|
|
ml_feature_name = f"poi_{category_key}_{feature_name}"
|
|
# Convert boolean to int for ML
|
|
if isinstance(value, bool):
|
|
value = 1 if value else 0
|
|
ml_features[ml_feature_name] = value
|
|
|
|
return ml_features
|
|
|
|
def _get_empty_features(self) -> Dict[str, float]:
|
|
"""Return zero features when no POIs found"""
|
|
return {
|
|
"proximity_score": 0.0,
|
|
"weighted_proximity_score": 0.0,
|
|
"count_0_100m": 0,
|
|
"count_100_300m": 0,
|
|
"count_300_500m": 0,
|
|
"count_500_1000m": 0,
|
|
"total_count": 0,
|
|
"distance_to_nearest_m": 9999.0,
|
|
"has_within_100m": False,
|
|
"has_within_300m": False,
|
|
"has_within_500m": False
|
|
}
|
|
|
|
def _haversine_distance(
|
|
self,
|
|
coord1: Tuple[float, float],
|
|
coord2: Tuple[float, float]
|
|
) -> float:
|
|
"""
|
|
Calculate distance between two coordinates in kilometers.
|
|
|
|
Uses Haversine formula for great-circle distance.
|
|
"""
|
|
lat1, lon1 = coord1
|
|
lat2, lon2 = coord2
|
|
|
|
R = 6371 # Earth radius in km
|
|
|
|
dlat = radians(lat2 - lat1)
|
|
dlon = radians(lon2 - lon1)
|
|
|
|
a = (sin(dlat/2)**2 +
|
|
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
|
|
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
|
|
|
return R * c
|
|
|
|
def _generate_summary(self, poi_results: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Generate human-readable summary"""
|
|
total_pois = sum(r["count"] for r in poi_results.values())
|
|
categories_with_pois = [
|
|
k for k, v in poi_results.items() if v["count"] > 0
|
|
]
|
|
high_impact_categories = [
|
|
k for k, v in poi_results.items()
|
|
if v["features"]["proximity_score"] > 2.0
|
|
]
|
|
|
|
return {
|
|
"total_pois_detected": total_pois,
|
|
"categories_with_pois": categories_with_pois,
|
|
"high_impact_categories": high_impact_categories,
|
|
"categories_count": len(categories_with_pois)
|
|
}
|
|
|
|
async def health_check(self) -> Dict[str, Any]:
|
|
"""Check if Overpass API is accessible"""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5) as client:
|
|
response = await client.get(f"{self.overpass_url}/status")
|
|
is_healthy = response.status_code == 200
|
|
return {
|
|
"healthy": is_healthy,
|
|
"status_code": response.status_code,
|
|
"url": self.overpass_url
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"healthy": False,
|
|
"error": str(e),
|
|
"url": self.overpass_url
|
|
}
|