Add POI feature and imporve the overall backend implementation
This commit is contained in:
269
services/external/app/services/competitor_analyzer.py
vendored
Normal file
269
services/external/app/services/competitor_analyzer.py
vendored
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Competitor Analyzer
|
||||
|
||||
Specialized analysis for competitor bakeries with competitive pressure modeling.
|
||||
Treats competitor proximity differently than other POIs, considering market dynamics.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any, Tuple
|
||||
import structlog
|
||||
from math import radians, sin, cos, sqrt, atan2
|
||||
|
||||
from app.core.poi_config import COMPETITOR_ZONES
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CompetitorAnalyzer:
|
||||
"""
|
||||
Competitive landscape analyzer for bakery locations.
|
||||
|
||||
Models competitive pressure considering:
|
||||
- Direct competition (<100m): Strong negative impact
|
||||
- Nearby competition (100-500m): Moderate negative impact
|
||||
- Market saturation (500-1000m): Can be positive (bakery district)
|
||||
or negative (competitive market)
|
||||
"""
|
||||
|
||||
def analyze_competitive_landscape(
|
||||
self,
|
||||
competitor_pois: List[Dict[str, Any]],
|
||||
bakery_location: Tuple[float, float],
|
||||
tenant_id: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze competitive pressure from nearby bakeries.
|
||||
|
||||
Args:
|
||||
competitor_pois: List of detected competitor POIs
|
||||
bakery_location: Tuple of (latitude, longitude)
|
||||
tenant_id: Optional tenant ID for logging
|
||||
|
||||
Returns:
|
||||
Competitive analysis with pressure scores and market classification
|
||||
"""
|
||||
if not competitor_pois:
|
||||
logger.info(
|
||||
"No competitors detected - underserved market",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return {
|
||||
"competitive_pressure_score": 0.0,
|
||||
"direct_competitors_count": 0,
|
||||
"nearby_competitors_count": 0,
|
||||
"market_competitors_count": 0,
|
||||
"competitive_zone": "low_competition",
|
||||
"market_type": "underserved",
|
||||
"competitive_advantage": "first_mover",
|
||||
"ml_feature_competitive_pressure": 0.0,
|
||||
"ml_feature_has_direct_competitor": 0,
|
||||
"ml_feature_competitor_density_500m": 0,
|
||||
"competitor_details": []
|
||||
}
|
||||
|
||||
# Categorize competitors by distance
|
||||
direct_competitors = [] # <100m
|
||||
nearby_competitors = [] # 100-500m
|
||||
market_competitors = [] # 500-1000m
|
||||
competitor_details = []
|
||||
|
||||
for poi in competitor_pois:
|
||||
distance_m = self._calculate_distance(
|
||||
bakery_location, (poi["lat"], poi["lon"])
|
||||
) * 1000
|
||||
|
||||
competitor_info = {
|
||||
"name": poi.get("name", "Unnamed"),
|
||||
"osm_id": poi.get("osm_id"),
|
||||
"distance_m": round(distance_m, 1),
|
||||
"lat": poi["lat"],
|
||||
"lon": poi["lon"]
|
||||
}
|
||||
|
||||
if distance_m < COMPETITOR_ZONES["direct"]["max_distance_m"]:
|
||||
direct_competitors.append(poi)
|
||||
competitor_info["zone"] = "direct"
|
||||
elif distance_m < COMPETITOR_ZONES["nearby"]["max_distance_m"]:
|
||||
nearby_competitors.append(poi)
|
||||
competitor_info["zone"] = "nearby"
|
||||
elif distance_m < COMPETITOR_ZONES["market"]["max_distance_m"]:
|
||||
market_competitors.append(poi)
|
||||
competitor_info["zone"] = "market"
|
||||
|
||||
competitor_details.append(competitor_info)
|
||||
|
||||
# Calculate competitive pressure score
|
||||
direct_pressure = (
|
||||
len(direct_competitors) *
|
||||
COMPETITOR_ZONES["direct"]["pressure_multiplier"]
|
||||
)
|
||||
nearby_pressure = (
|
||||
len(nearby_competitors) *
|
||||
COMPETITOR_ZONES["nearby"]["pressure_multiplier"]
|
||||
)
|
||||
|
||||
# Market saturation analysis
|
||||
min_for_district = COMPETITOR_ZONES["market"]["min_count_for_district"]
|
||||
if len(market_competitors) >= min_for_district:
|
||||
# Many bakeries = destination area (bakery district)
|
||||
market_pressure = COMPETITOR_ZONES["market"]["district_multiplier"]
|
||||
market_type = "bakery_district"
|
||||
elif len(market_competitors) > 2:
|
||||
market_pressure = COMPETITOR_ZONES["market"]["normal_multiplier"]
|
||||
market_type = "competitive_market"
|
||||
else:
|
||||
market_pressure = 0.0
|
||||
market_type = "normal_market"
|
||||
|
||||
competitive_pressure_score = (
|
||||
direct_pressure + nearby_pressure + market_pressure
|
||||
)
|
||||
|
||||
# Determine competitive zone classification
|
||||
if len(direct_competitors) > 0:
|
||||
competitive_zone = "high_competition"
|
||||
competitive_advantage = "differentiation_required"
|
||||
elif len(nearby_competitors) > 2:
|
||||
competitive_zone = "moderate_competition"
|
||||
competitive_advantage = "quality_focused"
|
||||
else:
|
||||
competitive_zone = "low_competition"
|
||||
competitive_advantage = "local_leader"
|
||||
|
||||
# Sort competitors by distance
|
||||
competitor_details.sort(key=lambda x: x["distance_m"])
|
||||
|
||||
logger.info(
|
||||
"Competitive analysis complete",
|
||||
tenant_id=tenant_id,
|
||||
competitive_zone=competitive_zone,
|
||||
market_type=market_type,
|
||||
total_competitors=len(competitor_pois),
|
||||
direct=len(direct_competitors),
|
||||
nearby=len(nearby_competitors),
|
||||
market=len(market_competitors),
|
||||
pressure_score=competitive_pressure_score
|
||||
)
|
||||
|
||||
return {
|
||||
# Summary scores
|
||||
"competitive_pressure_score": round(competitive_pressure_score, 2),
|
||||
|
||||
# Competitor counts by zone
|
||||
"direct_competitors_count": len(direct_competitors),
|
||||
"nearby_competitors_count": len(nearby_competitors),
|
||||
"market_competitors_count": len(market_competitors),
|
||||
"total_competitors_count": len(competitor_pois),
|
||||
|
||||
# Market classification
|
||||
"competitive_zone": competitive_zone,
|
||||
"market_type": market_type,
|
||||
"competitive_advantage": competitive_advantage,
|
||||
|
||||
# ML features (for model integration)
|
||||
"ml_feature_competitive_pressure": round(competitive_pressure_score, 2),
|
||||
"ml_feature_has_direct_competitor": 1 if len(direct_competitors) > 0 else 0,
|
||||
"ml_feature_competitor_density_500m": (
|
||||
len(direct_competitors) + len(nearby_competitors)
|
||||
),
|
||||
|
||||
# Detailed competitor information
|
||||
"competitor_details": competitor_details,
|
||||
|
||||
# Nearest competitor
|
||||
"nearest_competitor": competitor_details[0] if competitor_details else None
|
||||
}
|
||||
|
||||
def _calculate_distance(
|
||||
self,
|
||||
coord1: Tuple[float, float],
|
||||
coord2: Tuple[float, float]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Haversine distance in kilometers.
|
||||
|
||||
Args:
|
||||
coord1: Tuple of (latitude, longitude)
|
||||
coord2: Tuple of (latitude, longitude)
|
||||
|
||||
Returns:
|
||||
Distance in kilometers
|
||||
"""
|
||||
lat1, lon1 = coord1
|
||||
lat2, lon2 = coord2
|
||||
|
||||
R = 6371 # Earth radius in km
|
||||
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
|
||||
a = (sin(dlat/2)**2 +
|
||||
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
|
||||
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def get_competitive_insights(
|
||||
self,
|
||||
analysis_result: Dict[str, Any]
|
||||
) -> List[str]:
|
||||
"""
|
||||
Generate human-readable competitive insights.
|
||||
|
||||
Args:
|
||||
analysis_result: Result from analyze_competitive_landscape
|
||||
|
||||
Returns:
|
||||
List of insight strings for business intelligence
|
||||
"""
|
||||
insights = []
|
||||
|
||||
zone = analysis_result["competitive_zone"]
|
||||
market = analysis_result["market_type"]
|
||||
pressure = analysis_result["competitive_pressure_score"]
|
||||
direct = analysis_result["direct_competitors_count"]
|
||||
nearby = analysis_result["nearby_competitors_count"]
|
||||
|
||||
# Zone-specific insights
|
||||
if zone == "high_competition":
|
||||
insights.append(
|
||||
f"⚠️ High competition: {direct} direct competitor(s) within 100m. "
|
||||
"Focus on differentiation and quality."
|
||||
)
|
||||
elif zone == "moderate_competition":
|
||||
insights.append(
|
||||
f"Moderate competition: {nearby} nearby competitor(s) within 500m. "
|
||||
"Good opportunity for market share."
|
||||
)
|
||||
else:
|
||||
insights.append(
|
||||
"✅ Low competition: Local market leader opportunity."
|
||||
)
|
||||
|
||||
# Market type insights
|
||||
if market == "bakery_district":
|
||||
insights.append(
|
||||
"📍 Bakery district: High foot traffic area with multiple bakeries. "
|
||||
"Customers actively seek bakery products here."
|
||||
)
|
||||
elif market == "competitive_market":
|
||||
insights.append(
|
||||
"Market has multiple bakeries. Quality and customer service critical."
|
||||
)
|
||||
elif market == "underserved":
|
||||
insights.append(
|
||||
"🎯 Underserved market: Potential for strong customer base growth."
|
||||
)
|
||||
|
||||
# Pressure score insight
|
||||
if pressure < -1.5:
|
||||
insights.append(
|
||||
"Strong competitive pressure expected to impact demand. "
|
||||
"Marketing and differentiation essential."
|
||||
)
|
||||
elif pressure > 0:
|
||||
insights.append(
|
||||
"Positive market dynamics: Location benefits from bakery destination traffic."
|
||||
)
|
||||
|
||||
return insights
|
||||
282
services/external/app/services/nominatim_service.py
vendored
Normal file
282
services/external/app/services/nominatim_service.py
vendored
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
Nominatim Geocoding Service
|
||||
|
||||
Provides address search and geocoding using OpenStreetMap Nominatim API.
|
||||
For development: uses public API (rate-limited)
|
||||
For production: should point to self-hosted Nominatim instance
|
||||
"""
|
||||
|
||||
import httpx
|
||||
from typing import List, Dict, Any, Optional
|
||||
import structlog
|
||||
from asyncio import sleep
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class NominatimService:
|
||||
"""
|
||||
Nominatim geocoding and address search service.
|
||||
|
||||
Uses OpenStreetMap Nominatim API for address autocomplete and geocoding.
|
||||
Respects rate limits and usage policy.
|
||||
"""
|
||||
|
||||
# For development: public API (rate-limited to 1 req/sec)
|
||||
# For production: should be overridden with self-hosted instance
|
||||
DEFAULT_BASE_URL = "https://nominatim.openstreetmap.org"
|
||||
|
||||
def __init__(self, base_url: Optional[str] = None, user_agent: str = "BakeryIA-Forecasting/1.0"):
|
||||
"""
|
||||
Initialize Nominatim service.
|
||||
|
||||
Args:
|
||||
base_url: Nominatim server URL (defaults to public API)
|
||||
user_agent: User agent for API requests (required by Nominatim policy)
|
||||
"""
|
||||
self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip("/")
|
||||
self.user_agent = user_agent
|
||||
self.headers = {
|
||||
"User-Agent": self.user_agent
|
||||
}
|
||||
|
||||
# Rate limiting for public API (1 request per second)
|
||||
self.is_public_api = self.base_url == self.DEFAULT_BASE_URL
|
||||
self.min_request_interval = 1.0 if self.is_public_api else 0.0
|
||||
|
||||
logger.info(
|
||||
"Nominatim service initialized",
|
||||
base_url=self.base_url,
|
||||
is_public_api=self.is_public_api,
|
||||
rate_limit=f"{self.min_request_interval}s" if self.is_public_api else "none"
|
||||
)
|
||||
|
||||
async def search_address(
|
||||
self,
|
||||
query: str,
|
||||
country_code: str = "es",
|
||||
limit: int = 10
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for addresses matching query (autocomplete).
|
||||
|
||||
Args:
|
||||
query: Address search query
|
||||
country_code: ISO country code to restrict search (default: Spain)
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of address suggestions with display_name, lat, lon, osm_id, etc.
|
||||
"""
|
||||
if not query or len(query.strip()) < 3:
|
||||
logger.warning("Search query too short", query=query)
|
||||
return []
|
||||
|
||||
try:
|
||||
# Rate limiting for public API
|
||||
if self.is_public_api:
|
||||
await sleep(self.min_request_interval)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/search",
|
||||
params={
|
||||
"q": query,
|
||||
"format": "json",
|
||||
"addressdetails": 1,
|
||||
"countrycodes": country_code,
|
||||
"limit": limit,
|
||||
"accept-language": "es"
|
||||
},
|
||||
headers=self.headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
results = response.json()
|
||||
|
||||
# Parse and enrich results
|
||||
addresses = []
|
||||
for result in results:
|
||||
addresses.append({
|
||||
"display_name": result.get("display_name"),
|
||||
"lat": float(result.get("lat")),
|
||||
"lon": float(result.get("lon")),
|
||||
"osm_type": result.get("osm_type"),
|
||||
"osm_id": result.get("osm_id"),
|
||||
"place_id": result.get("place_id"),
|
||||
"type": result.get("type"),
|
||||
"class": result.get("class"),
|
||||
"address": result.get("address", {}),
|
||||
"boundingbox": result.get("boundingbox", [])
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Address search completed",
|
||||
query=query,
|
||||
result_count=len(addresses)
|
||||
)
|
||||
|
||||
return addresses
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(
|
||||
"Nominatim API request failed",
|
||||
query=query,
|
||||
error=str(e)
|
||||
)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unexpected error in address search",
|
||||
query=query,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return []
|
||||
|
||||
async def geocode_address(
|
||||
self,
|
||||
address: str,
|
||||
country_code: str = "es"
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Geocode an address to get coordinates.
|
||||
|
||||
Args:
|
||||
address: Full address string
|
||||
country_code: ISO country code
|
||||
|
||||
Returns:
|
||||
Dictionary with lat, lon, display_name, address components or None
|
||||
"""
|
||||
results = await self.search_address(address, country_code, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.warning("No geocoding results found", address=address)
|
||||
return None
|
||||
|
||||
result = results[0]
|
||||
|
||||
logger.info(
|
||||
"Address geocoded successfully",
|
||||
address=address,
|
||||
lat=result["lat"],
|
||||
lon=result["lon"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
async def reverse_geocode(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Reverse geocode coordinates to get address.
|
||||
|
||||
Args:
|
||||
latitude: Latitude coordinate
|
||||
longitude: Longitude coordinate
|
||||
|
||||
Returns:
|
||||
Dictionary with address information or None
|
||||
"""
|
||||
try:
|
||||
# Rate limiting for public API
|
||||
if self.is_public_api:
|
||||
await sleep(self.min_request_interval)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/reverse",
|
||||
params={
|
||||
"lat": latitude,
|
||||
"lon": longitude,
|
||||
"format": "json",
|
||||
"addressdetails": 1,
|
||||
"accept-language": "es"
|
||||
},
|
||||
headers=self.headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
address_info = {
|
||||
"display_name": result.get("display_name"),
|
||||
"lat": float(result.get("lat")),
|
||||
"lon": float(result.get("lon")),
|
||||
"osm_type": result.get("osm_type"),
|
||||
"osm_id": result.get("osm_id"),
|
||||
"place_id": result.get("place_id"),
|
||||
"address": result.get("address", {}),
|
||||
"boundingbox": result.get("boundingbox", [])
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Reverse geocoding completed",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
address=address_info["display_name"]
|
||||
)
|
||||
|
||||
return address_info
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(
|
||||
"Nominatim reverse geocoding failed",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unexpected error in reverse geocoding",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return None
|
||||
|
||||
async def validate_coordinates(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> bool:
|
||||
"""
|
||||
Validate that coordinates point to a real location.
|
||||
|
||||
Args:
|
||||
latitude: Latitude to validate
|
||||
longitude: Longitude to validate
|
||||
|
||||
Returns:
|
||||
True if coordinates are valid, False otherwise
|
||||
"""
|
||||
if not (-90 <= latitude <= 90 and -180 <= longitude <= 180):
|
||||
return False
|
||||
|
||||
result = await self.reverse_geocode(latitude, longitude)
|
||||
return result is not None
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""
|
||||
Check if Nominatim service is accessible.
|
||||
|
||||
Returns:
|
||||
True if service is healthy, False otherwise
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/status",
|
||||
params={"format": "json"},
|
||||
headers=self.headers
|
||||
)
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Nominatim health check failed",
|
||||
error=str(e)
|
||||
)
|
||||
return False
|
||||
466
services/external/app/services/poi_detection_service.py
vendored
Normal file
466
services/external/app/services/poi_detection_service.py
vendored
Normal file
@@ -0,0 +1,466 @@
|
||||
"""
|
||||
POI Detection Service
|
||||
|
||||
Automated Point of Interest detection using Overpass API (OpenStreetMap).
|
||||
Detects nearby POIs around bakery locations and generates ML features
|
||||
for location-based demand forecasting.
|
||||
"""
|
||||
|
||||
import overpy
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import asyncio
|
||||
import structlog
|
||||
import httpx
|
||||
from math import radians, sin, cos, sqrt, atan2
|
||||
import random
|
||||
|
||||
from app.core.poi_config import (
|
||||
POI_CATEGORIES,
|
||||
OVERPASS_API_URL,
|
||||
OVERPASS_TIMEOUT_SECONDS,
|
||||
OVERPASS_MAX_RETRIES,
|
||||
OVERPASS_RETRY_DELAY_SECONDS,
|
||||
DISTANCE_BANDS
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIDetectionService:
|
||||
"""
|
||||
Automated POI detection using Overpass API (OpenStreetMap).
|
||||
|
||||
Detects points of interest near bakery locations and calculates
|
||||
ML features for demand forecasting with location-specific context.
|
||||
"""
|
||||
|
||||
def __init__(self, overpass_url: str = OVERPASS_API_URL):
|
||||
self.overpass_url = overpass_url
|
||||
self.api = overpy.Overpass(url=overpass_url)
|
||||
self.timeout = OVERPASS_TIMEOUT_SECONDS
|
||||
|
||||
async def detect_pois_for_bakery(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect all POIs around a bakery location.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
tenant_id: Tenant identifier for logging
|
||||
|
||||
Returns:
|
||||
Complete POI detection results with ML features
|
||||
"""
|
||||
logger.info(
|
||||
"Starting POI detection",
|
||||
tenant_id=tenant_id,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
|
||||
poi_results = {}
|
||||
detection_errors = []
|
||||
|
||||
# Query each POI category with inter-query delays
|
||||
category_items = list(POI_CATEGORIES.items())
|
||||
for idx, (category_key, category) in enumerate(category_items):
|
||||
try:
|
||||
pois = await self._query_pois_with_retry(
|
||||
latitude,
|
||||
longitude,
|
||||
category.osm_query,
|
||||
category.search_radius_m,
|
||||
category_key
|
||||
)
|
||||
|
||||
# Calculate features for this category
|
||||
features = self._calculate_poi_features(
|
||||
pois,
|
||||
(latitude, longitude),
|
||||
category
|
||||
)
|
||||
|
||||
poi_results[category_key] = {
|
||||
"pois": pois,
|
||||
"features": features,
|
||||
"count": len(pois)
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Detected {category_key}",
|
||||
count=len(pois),
|
||||
proximity_score=features["proximity_score"]
|
||||
)
|
||||
|
||||
# Add delay between categories to respect rate limits
|
||||
# (except after the last category)
|
||||
if idx < len(category_items) - 1:
|
||||
inter_query_delay = 2.0 + random.uniform(0.5, 1.5)
|
||||
await asyncio.sleep(inter_query_delay)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to detect {category_key}",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
detection_errors.append({
|
||||
"category": category_key,
|
||||
"error": str(e)
|
||||
})
|
||||
poi_results[category_key] = {
|
||||
"pois": [],
|
||||
"features": self._get_empty_features(),
|
||||
"count": 0,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
# Add a longer delay after an error before continuing
|
||||
if idx < len(category_items) - 1:
|
||||
error_recovery_delay = 3.0 + random.uniform(1.0, 2.0)
|
||||
await asyncio.sleep(error_recovery_delay)
|
||||
|
||||
# Generate combined ML features
|
||||
ml_features = self._generate_ml_features(poi_results)
|
||||
|
||||
# Generate summary
|
||||
summary = self._generate_summary(poi_results)
|
||||
|
||||
detection_status = "completed" if not detection_errors else (
|
||||
"partial" if len(detection_errors) < len(POI_CATEGORIES) else "failed"
|
||||
)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"location": {"latitude": latitude, "longitude": longitude},
|
||||
"detection_timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"detection_status": detection_status,
|
||||
"detection_errors": detection_errors if detection_errors else None,
|
||||
"poi_categories": poi_results,
|
||||
"ml_features": ml_features,
|
||||
"summary": summary
|
||||
}
|
||||
|
||||
async def _query_pois_with_retry(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
osm_query: str,
|
||||
radius_m: int,
|
||||
category_key: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query Overpass API with exponential backoff retry logic.
|
||||
|
||||
Implements:
|
||||
- Exponential backoff with jitter
|
||||
- Extended delays for rate limiting errors
|
||||
- Proper error type detection
|
||||
"""
|
||||
last_error = None
|
||||
base_delay = OVERPASS_RETRY_DELAY_SECONDS
|
||||
|
||||
for attempt in range(OVERPASS_MAX_RETRIES):
|
||||
try:
|
||||
return await self._query_pois(
|
||||
latitude, longitude, osm_query, radius_m
|
||||
)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
error_message = str(e).lower()
|
||||
|
||||
# Determine if this is a rate limiting error
|
||||
is_rate_limit = any(phrase in error_message for phrase in [
|
||||
'too many requests',
|
||||
'rate limit',
|
||||
'server load too high',
|
||||
'quota exceeded',
|
||||
'retry later',
|
||||
'429',
|
||||
'503',
|
||||
'504'
|
||||
])
|
||||
|
||||
if attempt < OVERPASS_MAX_RETRIES - 1:
|
||||
# Calculate exponential backoff with jitter
|
||||
# For rate limiting: use longer delays (10-30 seconds)
|
||||
# For other errors: use standard backoff (2-8 seconds)
|
||||
if is_rate_limit:
|
||||
delay = base_delay * (3 ** attempt) + random.uniform(1, 5)
|
||||
delay = min(delay, 30) # Cap at 30 seconds
|
||||
else:
|
||||
delay = base_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
|
||||
delay = min(delay, 10) # Cap at 10 seconds
|
||||
|
||||
logger.warning(
|
||||
f"POI query retry {attempt + 1}/{OVERPASS_MAX_RETRIES}",
|
||||
category=category_key,
|
||||
error=str(e),
|
||||
is_rate_limit=is_rate_limit,
|
||||
retry_delay=f"{delay:.1f}s"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
logger.error(
|
||||
"POI query failed after all retries",
|
||||
category=category_key,
|
||||
error=str(e),
|
||||
is_rate_limit=is_rate_limit
|
||||
)
|
||||
|
||||
raise last_error
|
||||
|
||||
async def _query_pois(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
osm_query: str,
|
||||
radius_m: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query Overpass API for POIs in radius.
|
||||
|
||||
Raises:
|
||||
Exception: With descriptive error message from Overpass API
|
||||
"""
|
||||
|
||||
# Build Overpass QL query
|
||||
query = f"""
|
||||
[out:json][timeout:{self.timeout}];
|
||||
(
|
||||
node{osm_query}(around:{radius_m},{latitude},{longitude});
|
||||
way{osm_query}(around:{radius_m},{latitude},{longitude});
|
||||
);
|
||||
out center;
|
||||
"""
|
||||
|
||||
# Execute query (use asyncio thread pool for blocking overpy)
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
self.api.query,
|
||||
query
|
||||
)
|
||||
except overpy.exception.OverpassTooManyRequests as e:
|
||||
# Explicitly handle rate limiting
|
||||
raise Exception("Too many requests - Overpass API rate limit exceeded") from e
|
||||
except overpy.exception.OverpassGatewayTimeout as e:
|
||||
# Query took too long
|
||||
raise Exception("Gateway timeout - query too complex or server busy") from e
|
||||
except overpy.exception.OverpassBadRequest as e:
|
||||
# Query syntax error
|
||||
raise Exception(f"Bad request - invalid query syntax: {str(e)}") from e
|
||||
except Exception as e:
|
||||
# Check if it's an HTTP error with status code
|
||||
error_msg = str(e).lower()
|
||||
if '429' in error_msg or 'too many' in error_msg:
|
||||
raise Exception("Too many requests - rate limit exceeded") from e
|
||||
elif '503' in error_msg or 'load too high' in error_msg:
|
||||
raise Exception("Server load too high - Overpass API overloaded") from e
|
||||
elif '504' in error_msg or 'timeout' in error_msg:
|
||||
raise Exception("Gateway timeout - server busy") from e
|
||||
else:
|
||||
# Re-raise with original message
|
||||
raise
|
||||
|
||||
# Parse results
|
||||
pois = []
|
||||
|
||||
# Process nodes
|
||||
for node in result.nodes:
|
||||
pois.append({
|
||||
"osm_id": str(node.id),
|
||||
"type": "node",
|
||||
"lat": float(node.lat),
|
||||
"lon": float(node.lon),
|
||||
"tags": dict(node.tags),
|
||||
"name": node.tags.get("name", "Unnamed")
|
||||
})
|
||||
|
||||
# Process ways (buildings, areas)
|
||||
for way in result.ways:
|
||||
# Get center point
|
||||
if hasattr(way, 'center_lat') and way.center_lat:
|
||||
lat, lon = float(way.center_lat), float(way.center_lon)
|
||||
else:
|
||||
# Calculate centroid from nodes
|
||||
if way.nodes:
|
||||
lats = [float(node.lat) for node in way.nodes]
|
||||
lons = [float(node.lon) for node in way.nodes]
|
||||
lat = sum(lats) / len(lats)
|
||||
lon = sum(lons) / len(lons)
|
||||
else:
|
||||
continue
|
||||
|
||||
pois.append({
|
||||
"osm_id": str(way.id),
|
||||
"type": "way",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"tags": dict(way.tags),
|
||||
"name": way.tags.get("name", "Unnamed")
|
||||
})
|
||||
|
||||
return pois
|
||||
|
||||
def _calculate_poi_features(
|
||||
self,
|
||||
pois: List[Dict[str, Any]],
|
||||
bakery_location: Tuple[float, float],
|
||||
category
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate ML features for POI category"""
|
||||
|
||||
if not pois:
|
||||
return self._get_empty_features()
|
||||
|
||||
# Calculate distances
|
||||
distances = []
|
||||
for poi in pois:
|
||||
dist_km = self._haversine_distance(
|
||||
bakery_location,
|
||||
(poi["lat"], poi["lon"])
|
||||
)
|
||||
distances.append(dist_km * 1000) # Convert to meters
|
||||
|
||||
# Feature Tier 1: Proximity Scores (PRIMARY)
|
||||
proximity_score = sum(1.0 / (1.0 + d/1000) for d in distances)
|
||||
weighted_proximity_score = proximity_score * category.weight
|
||||
|
||||
# Feature Tier 2: Distance Band Counts
|
||||
count_0_100m = sum(1 for d in distances if d <= 100)
|
||||
count_100_300m = sum(1 for d in distances if 100 < d <= 300)
|
||||
count_300_500m = sum(1 for d in distances if 300 < d <= 500)
|
||||
count_500_1000m = sum(1 for d in distances if 500 < d <= 1000)
|
||||
|
||||
# Feature Tier 3: Distance to Nearest
|
||||
distance_to_nearest_m = min(distances) if distances else 9999.0
|
||||
|
||||
# Feature Tier 4: Binary Flags
|
||||
has_within_100m = any(d <= 100 for d in distances)
|
||||
has_within_300m = any(d <= 300 for d in distances)
|
||||
has_within_500m = any(d <= 500 for d in distances)
|
||||
|
||||
return {
|
||||
# Tier 1: Proximity scores (PRIMARY for ML)
|
||||
"proximity_score": round(proximity_score, 4),
|
||||
"weighted_proximity_score": round(weighted_proximity_score, 4),
|
||||
|
||||
# Tier 2: Distance bands
|
||||
"count_0_100m": count_0_100m,
|
||||
"count_100_300m": count_100_300m,
|
||||
"count_300_500m": count_300_500m,
|
||||
"count_500_1000m": count_500_1000m,
|
||||
"total_count": len(pois),
|
||||
|
||||
# Tier 3: Distance to nearest
|
||||
"distance_to_nearest_m": round(distance_to_nearest_m, 1),
|
||||
|
||||
# Tier 4: Binary flags
|
||||
"has_within_100m": has_within_100m,
|
||||
"has_within_300m": has_within_300m,
|
||||
"has_within_500m": has_within_500m
|
||||
}
|
||||
|
||||
def _generate_ml_features(self, poi_results: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""
|
||||
Generate flat feature dictionary for ML model ingestion.
|
||||
|
||||
These features will be added to Prophet/XGBoost as regressors.
|
||||
"""
|
||||
ml_features = {}
|
||||
|
||||
for category_key, data in poi_results.items():
|
||||
features = data.get("features", {})
|
||||
|
||||
# Flatten with category prefix
|
||||
for feature_name, value in features.items():
|
||||
ml_feature_name = f"poi_{category_key}_{feature_name}"
|
||||
# Convert boolean to int for ML
|
||||
if isinstance(value, bool):
|
||||
value = 1 if value else 0
|
||||
ml_features[ml_feature_name] = value
|
||||
|
||||
return ml_features
|
||||
|
||||
def _get_empty_features(self) -> Dict[str, float]:
|
||||
"""Return zero features when no POIs found"""
|
||||
return {
|
||||
"proximity_score": 0.0,
|
||||
"weighted_proximity_score": 0.0,
|
||||
"count_0_100m": 0,
|
||||
"count_100_300m": 0,
|
||||
"count_300_500m": 0,
|
||||
"count_500_1000m": 0,
|
||||
"total_count": 0,
|
||||
"distance_to_nearest_m": 9999.0,
|
||||
"has_within_100m": False,
|
||||
"has_within_300m": False,
|
||||
"has_within_500m": False
|
||||
}
|
||||
|
||||
def _haversine_distance(
|
||||
self,
|
||||
coord1: Tuple[float, float],
|
||||
coord2: Tuple[float, float]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate distance between two coordinates in kilometers.
|
||||
|
||||
Uses Haversine formula for great-circle distance.
|
||||
"""
|
||||
lat1, lon1 = coord1
|
||||
lat2, lon2 = coord2
|
||||
|
||||
R = 6371 # Earth radius in km
|
||||
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
|
||||
a = (sin(dlat/2)**2 +
|
||||
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
|
||||
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def _generate_summary(self, poi_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate human-readable summary"""
|
||||
total_pois = sum(r["count"] for r in poi_results.values())
|
||||
categories_with_pois = [
|
||||
k for k, v in poi_results.items() if v["count"] > 0
|
||||
]
|
||||
high_impact_categories = [
|
||||
k for k, v in poi_results.items()
|
||||
if v["features"]["proximity_score"] > 2.0
|
||||
]
|
||||
|
||||
return {
|
||||
"total_pois_detected": total_pois,
|
||||
"categories_with_pois": categories_with_pois,
|
||||
"high_impact_categories": high_impact_categories,
|
||||
"categories_count": len(categories_with_pois)
|
||||
}
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Check if Overpass API is accessible"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
response = await client.get(f"{self.overpass_url}/status")
|
||||
is_healthy = response.status_code == 200
|
||||
return {
|
||||
"healthy": is_healthy,
|
||||
"status_code": response.status_code,
|
||||
"url": self.overpass_url
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"healthy": False,
|
||||
"error": str(e),
|
||||
"url": self.overpass_url
|
||||
}
|
||||
184
services/external/app/services/poi_feature_selector.py
vendored
Normal file
184
services/external/app/services/poi_feature_selector.py
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
POI Feature Selector
|
||||
|
||||
Determines which POI features are relevant for ML model inclusion.
|
||||
Filters out low-signal features to prevent model noise and overfitting.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any
|
||||
import structlog
|
||||
|
||||
from app.core.poi_config import RELEVANCE_THRESHOLDS
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIFeatureSelector:
|
||||
"""
|
||||
Feature relevance engine for POI-based ML features.
|
||||
|
||||
Applies research-based thresholds to filter out irrelevant POI features
|
||||
that would add noise to bakery-specific demand forecasting models.
|
||||
"""
|
||||
|
||||
def __init__(self, thresholds: Dict[str, Dict[str, float]] = None):
|
||||
"""
|
||||
Initialize feature selector.
|
||||
|
||||
Args:
|
||||
thresholds: Custom relevance thresholds (defaults to RELEVANCE_THRESHOLDS)
|
||||
"""
|
||||
self.thresholds = thresholds or RELEVANCE_THRESHOLDS
|
||||
|
||||
def select_relevant_features(
|
||||
self,
|
||||
poi_detection_results: Dict[str, Any],
|
||||
tenant_id: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Filter POI features based on relevance thresholds.
|
||||
|
||||
Only includes features for POI categories that pass relevance tests.
|
||||
This prevents adding noise to ML models for bakeries where certain
|
||||
POI categories are not significant.
|
||||
|
||||
Args:
|
||||
poi_detection_results: Full POI detection results
|
||||
tenant_id: Optional tenant ID for logging
|
||||
|
||||
Returns:
|
||||
Dictionary with relevant features and detailed relevance report
|
||||
"""
|
||||
relevant_features = {}
|
||||
relevance_report = []
|
||||
relevant_categories = []
|
||||
|
||||
for category_key, data in poi_detection_results.items():
|
||||
features = data.get("features", {})
|
||||
thresholds = self.thresholds.get(category_key, {})
|
||||
|
||||
if not thresholds:
|
||||
logger.warning(
|
||||
f"No thresholds defined for category {category_key}",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
continue
|
||||
|
||||
# Check relevance criteria
|
||||
is_relevant, rejection_reason = self._check_relevance(
|
||||
features, thresholds, category_key
|
||||
)
|
||||
|
||||
if is_relevant:
|
||||
# Include features with category prefix
|
||||
for feature_name, value in features.items():
|
||||
ml_feature_name = f"poi_{category_key}_{feature_name}"
|
||||
# Convert boolean to int for ML
|
||||
if isinstance(value, bool):
|
||||
value = 1 if value else 0
|
||||
relevant_features[ml_feature_name] = value
|
||||
|
||||
relevant_categories.append(category_key)
|
||||
relevance_report.append({
|
||||
"category": category_key,
|
||||
"relevant": True,
|
||||
"reason": "Passes all relevance thresholds",
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
|
||||
})
|
||||
else:
|
||||
relevance_report.append({
|
||||
"category": category_key,
|
||||
"relevant": False,
|
||||
"reason": rejection_reason,
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"POI feature selection complete",
|
||||
tenant_id=tenant_id,
|
||||
total_categories=len(poi_detection_results),
|
||||
relevant_categories=len(relevant_categories),
|
||||
rejected_categories=len(poi_detection_results) - len(relevant_categories)
|
||||
)
|
||||
|
||||
return {
|
||||
"features": relevant_features,
|
||||
"relevant_categories": relevant_categories,
|
||||
"relevance_report": relevance_report,
|
||||
"total_features": len(relevant_features),
|
||||
"total_relevant_categories": len(relevant_categories)
|
||||
}
|
||||
|
||||
def _check_relevance(
|
||||
self,
|
||||
features: Dict[str, Any],
|
||||
thresholds: Dict[str, float],
|
||||
category_key: str
|
||||
) -> tuple[bool, str]:
|
||||
"""
|
||||
Check if POI category passes relevance thresholds.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_relevant, rejection_reason)
|
||||
"""
|
||||
# Criterion 1: Proximity score
|
||||
min_proximity = thresholds.get("min_proximity_score", 0)
|
||||
actual_proximity = features.get("proximity_score", 0)
|
||||
if actual_proximity < min_proximity:
|
||||
return False, f"Proximity score too low ({actual_proximity:.2f} < {min_proximity})"
|
||||
|
||||
# Criterion 2: Distance to nearest
|
||||
max_distance = thresholds.get("max_distance_to_nearest_m", 9999)
|
||||
actual_distance = features.get("distance_to_nearest_m", 9999)
|
||||
if actual_distance > max_distance:
|
||||
return False, f"Nearest POI too far ({actual_distance:.0f}m > {max_distance}m)"
|
||||
|
||||
# Criterion 3: Count threshold
|
||||
min_count = thresholds.get("min_count", 0)
|
||||
actual_count = features.get("total_count", 0)
|
||||
if actual_count < min_count:
|
||||
return False, f"Count too low ({actual_count} < {min_count})"
|
||||
|
||||
return True, "Passes all thresholds"
|
||||
|
||||
def get_feature_importance_summary(
|
||||
self,
|
||||
poi_detection_results: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate summary of feature importance for all categories.
|
||||
|
||||
Useful for understanding POI landscape around a bakery.
|
||||
"""
|
||||
summary = []
|
||||
|
||||
for category_key, data in poi_detection_results.items():
|
||||
features = data.get("features", {})
|
||||
thresholds = self.thresholds.get(category_key, {})
|
||||
|
||||
is_relevant, reason = self._check_relevance(
|
||||
features, thresholds, category_key
|
||||
) if thresholds else (False, "No thresholds defined")
|
||||
|
||||
summary.append({
|
||||
"category": category_key,
|
||||
"is_relevant": is_relevant,
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"weighted_score": features.get("weighted_proximity_score", 0),
|
||||
"total_count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999),
|
||||
"has_within_100m": features.get("has_within_100m", False),
|
||||
"rejection_reason": None if is_relevant else reason
|
||||
})
|
||||
|
||||
# Sort by relevance and proximity score
|
||||
summary.sort(
|
||||
key=lambda x: (x["is_relevant"], x["proximity_score"]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return summary
|
||||
468
services/external/app/services/poi_refresh_service.py
vendored
Normal file
468
services/external/app/services/poi_refresh_service.py
vendored
Normal file
@@ -0,0 +1,468 @@
|
||||
"""
|
||||
POI Refresh Service
|
||||
|
||||
Manages periodic POI context refresh jobs.
|
||||
Detects changes in POI landscape and updates tenant POI contexts.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional, Dict, Any, List
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, or_
|
||||
import structlog
|
||||
|
||||
from app.models.poi_refresh_job import POIRefreshJob
|
||||
from app.models.poi_context import TenantPOIContext
|
||||
from app.services.poi_detection_service import POIDetectionService
|
||||
from app.core.database import database_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIRefreshService:
|
||||
"""
|
||||
POI Refresh Service
|
||||
|
||||
Manages background jobs for periodic POI context refresh.
|
||||
Default refresh cycle: 180 days (6 months).
|
||||
"""
|
||||
|
||||
DEFAULT_REFRESH_INTERVAL_DAYS = 180
|
||||
DEFAULT_MAX_ATTEMPTS = 3
|
||||
STALE_THRESHOLD_DAYS = 180
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
poi_detection_service: Optional[POIDetectionService] = None,
|
||||
refresh_interval_days: int = DEFAULT_REFRESH_INTERVAL_DAYS
|
||||
):
|
||||
"""
|
||||
Initialize POI refresh service.
|
||||
|
||||
Args:
|
||||
poi_detection_service: POI detection service instance
|
||||
refresh_interval_days: Days between POI refreshes (default: 180)
|
||||
"""
|
||||
self.poi_detection_service = poi_detection_service or POIDetectionService()
|
||||
self.refresh_interval_days = refresh_interval_days
|
||||
|
||||
logger.info(
|
||||
"POI Refresh Service initialized",
|
||||
refresh_interval_days=refresh_interval_days
|
||||
)
|
||||
|
||||
async def schedule_refresh_job(
|
||||
self,
|
||||
tenant_id: str,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
scheduled_at: Optional[datetime] = None,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> POIRefreshJob:
|
||||
"""
|
||||
Schedule a POI refresh job for a tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
scheduled_at: When to run the job (default: now + refresh_interval)
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Created POIRefreshJob
|
||||
"""
|
||||
if scheduled_at is None:
|
||||
scheduled_at = datetime.now(timezone.utc) + timedelta(
|
||||
days=self.refresh_interval_days
|
||||
)
|
||||
|
||||
async def _create_job(db_session: AsyncSession):
|
||||
# Check if pending job already exists
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob).where(
|
||||
and_(
|
||||
POIRefreshJob.tenant_id == tenant_id,
|
||||
POIRefreshJob.status.in_(["pending", "running"])
|
||||
)
|
||||
)
|
||||
)
|
||||
existing_job = result.scalar_one_or_none()
|
||||
|
||||
if existing_job:
|
||||
logger.info(
|
||||
"POI refresh job already scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(existing_job.id),
|
||||
scheduled_at=existing_job.scheduled_at
|
||||
)
|
||||
return existing_job
|
||||
|
||||
# Create new job
|
||||
job = POIRefreshJob(
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
scheduled_at=scheduled_at,
|
||||
status="pending",
|
||||
max_attempts=self.DEFAULT_MAX_ATTEMPTS
|
||||
)
|
||||
|
||||
db_session.add(job)
|
||||
await db_session.commit()
|
||||
await db_session.refresh(job)
|
||||
|
||||
logger.info(
|
||||
"POI refresh job scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(job.id),
|
||||
scheduled_at=scheduled_at
|
||||
)
|
||||
|
||||
return job
|
||||
|
||||
if session:
|
||||
return await _create_job(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _create_job(db_session)
|
||||
|
||||
async def execute_refresh_job(
|
||||
self,
|
||||
job_id: str,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a POI refresh job.
|
||||
|
||||
Args:
|
||||
job_id: Job UUID
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Execution result with status and details
|
||||
"""
|
||||
async def _execute(db_session: AsyncSession):
|
||||
# Load job
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob).where(POIRefreshJob.id == job_id)
|
||||
)
|
||||
job = result.scalar_one_or_none()
|
||||
|
||||
if not job:
|
||||
raise ValueError(f"Job not found: {job_id}")
|
||||
|
||||
if job.status == "running":
|
||||
return {
|
||||
"status": "already_running",
|
||||
"job_id": str(job.id),
|
||||
"message": "Job is already running"
|
||||
}
|
||||
|
||||
if job.status == "completed":
|
||||
return {
|
||||
"status": "already_completed",
|
||||
"job_id": str(job.id),
|
||||
"message": "Job already completed"
|
||||
}
|
||||
|
||||
if not job.can_retry:
|
||||
return {
|
||||
"status": "max_attempts_reached",
|
||||
"job_id": str(job.id),
|
||||
"message": f"Max attempts ({job.max_attempts}) reached"
|
||||
}
|
||||
|
||||
# Update job status
|
||||
job.status = "running"
|
||||
job.started_at = datetime.now(timezone.utc)
|
||||
job.attempt_count += 1
|
||||
await db_session.commit()
|
||||
|
||||
logger.info(
|
||||
"Executing POI refresh job",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count
|
||||
)
|
||||
|
||||
try:
|
||||
# Get existing POI context
|
||||
poi_result = await db_session.execute(
|
||||
select(TenantPOIContext).where(
|
||||
TenantPOIContext.tenant_id == job.tenant_id
|
||||
)
|
||||
)
|
||||
existing_context = poi_result.scalar_one_or_none()
|
||||
|
||||
# Perform POI detection
|
||||
detection_result = await self.poi_detection_service.detect_pois_for_bakery(
|
||||
latitude=job.latitude,
|
||||
longitude=job.longitude,
|
||||
tenant_id=str(job.tenant_id),
|
||||
force_refresh=True
|
||||
)
|
||||
|
||||
# Analyze changes
|
||||
changes = self._analyze_changes(
|
||||
existing_context.poi_detection_results if existing_context else {},
|
||||
detection_result
|
||||
)
|
||||
|
||||
# Update job with results
|
||||
job.status = "completed"
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
job.pois_detected = sum(
|
||||
data.get("count", 0)
|
||||
for data in detection_result.values()
|
||||
)
|
||||
job.changes_detected = changes["has_significant_changes"]
|
||||
job.change_summary = changes
|
||||
|
||||
# Schedule next refresh
|
||||
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(
|
||||
days=self.refresh_interval_days
|
||||
)
|
||||
|
||||
await db_session.commit()
|
||||
|
||||
logger.info(
|
||||
"POI refresh job completed",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
duration_seconds=job.duration_seconds
|
||||
)
|
||||
|
||||
# Schedule next job
|
||||
await self.schedule_refresh_job(
|
||||
tenant_id=str(job.tenant_id),
|
||||
latitude=job.latitude,
|
||||
longitude=job.longitude,
|
||||
scheduled_at=job.next_scheduled_at,
|
||||
session=db_session
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"job_id": str(job.id),
|
||||
"pois_detected": job.pois_detected,
|
||||
"changes_detected": job.changes_detected,
|
||||
"change_summary": changes,
|
||||
"duration_seconds": job.duration_seconds,
|
||||
"next_scheduled_at": job.next_scheduled_at.isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# Job failed
|
||||
job.status = "failed"
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
job.error_message = str(e)
|
||||
job.error_details = {
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e),
|
||||
"attempt": job.attempt_count
|
||||
}
|
||||
|
||||
# Schedule retry if attempts remaining
|
||||
if job.can_retry:
|
||||
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
logger.warning(
|
||||
"POI refresh job failed, will retry",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
error=str(e)
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"POI refresh job failed permanently",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
await db_session.commit()
|
||||
|
||||
return {
|
||||
"status": "failed",
|
||||
"job_id": str(job.id),
|
||||
"error": str(e),
|
||||
"attempt": job.attempt_count,
|
||||
"can_retry": job.can_retry
|
||||
}
|
||||
|
||||
if session:
|
||||
return await _execute(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _execute(db_session)
|
||||
|
||||
def _analyze_changes(
|
||||
self,
|
||||
old_results: Dict[str, Any],
|
||||
new_results: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze changes between old and new POI detection results.
|
||||
|
||||
Args:
|
||||
old_results: Previous POI detection results
|
||||
new_results: New POI detection results
|
||||
|
||||
Returns:
|
||||
Change analysis with significance flag
|
||||
"""
|
||||
changes = {
|
||||
"has_significant_changes": False,
|
||||
"category_changes": {},
|
||||
"total_poi_change": 0,
|
||||
"new_categories": [],
|
||||
"removed_categories": []
|
||||
}
|
||||
|
||||
old_categories = set(old_results.keys())
|
||||
new_categories = set(new_results.keys())
|
||||
|
||||
# New categories
|
||||
changes["new_categories"] = list(new_categories - old_categories)
|
||||
|
||||
# Removed categories
|
||||
changes["removed_categories"] = list(old_categories - new_categories)
|
||||
|
||||
# Analyze changes per category
|
||||
for category in new_categories:
|
||||
old_count = old_results.get(category, {}).get("count", 0)
|
||||
new_count = new_results.get(category, {}).get("count", 0)
|
||||
change = new_count - old_count
|
||||
|
||||
if abs(change) > 0:
|
||||
changes["category_changes"][category] = {
|
||||
"old_count": old_count,
|
||||
"new_count": new_count,
|
||||
"change": change,
|
||||
"change_percent": (change / old_count * 100) if old_count > 0 else 100
|
||||
}
|
||||
|
||||
changes["total_poi_change"] += abs(change)
|
||||
|
||||
# Determine if changes are significant
|
||||
# Significant if: 10+ POIs changed OR 20%+ change OR new/removed categories
|
||||
total_old_pois = sum(data.get("count", 0) for data in old_results.values())
|
||||
if total_old_pois > 0:
|
||||
change_percent = (changes["total_poi_change"] / total_old_pois) * 100
|
||||
changes["total_change_percent"] = change_percent
|
||||
|
||||
changes["has_significant_changes"] = (
|
||||
changes["total_poi_change"] >= 10
|
||||
or change_percent >= 20
|
||||
or len(changes["new_categories"]) > 0
|
||||
or len(changes["removed_categories"]) > 0
|
||||
)
|
||||
else:
|
||||
changes["has_significant_changes"] = changes["total_poi_change"] > 0
|
||||
|
||||
return changes
|
||||
|
||||
async def get_pending_jobs(
|
||||
self,
|
||||
limit: int = 100,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> List[POIRefreshJob]:
|
||||
"""
|
||||
Get pending jobs that are due for execution.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of jobs to return
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
List of pending jobs
|
||||
"""
|
||||
async def _get_jobs(db_session: AsyncSession):
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob)
|
||||
.where(
|
||||
and_(
|
||||
POIRefreshJob.status == "pending",
|
||||
POIRefreshJob.scheduled_at <= datetime.now(timezone.utc)
|
||||
)
|
||||
)
|
||||
.order_by(POIRefreshJob.scheduled_at)
|
||||
.limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
if session:
|
||||
return await _get_jobs(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _get_jobs(db_session)
|
||||
|
||||
async def process_pending_jobs(
|
||||
self,
|
||||
max_concurrent: int = 5,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process all pending jobs concurrently.
|
||||
|
||||
Args:
|
||||
max_concurrent: Maximum concurrent job executions
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Processing summary
|
||||
"""
|
||||
pending_jobs = await self.get_pending_jobs(session=session)
|
||||
|
||||
if not pending_jobs:
|
||||
logger.info("No pending POI refresh jobs")
|
||||
return {
|
||||
"total_jobs": 0,
|
||||
"successful": 0,
|
||||
"failed": 0,
|
||||
"results": []
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Processing pending POI refresh jobs",
|
||||
count=len(pending_jobs),
|
||||
max_concurrent=max_concurrent
|
||||
)
|
||||
|
||||
# Process jobs with concurrency limit
|
||||
semaphore = asyncio.Semaphore(max_concurrent)
|
||||
|
||||
async def process_job(job: POIRefreshJob):
|
||||
async with semaphore:
|
||||
return await self.execute_refresh_job(str(job.id))
|
||||
|
||||
results = await asyncio.gather(
|
||||
*[process_job(job) for job in pending_jobs],
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
# Summarize results
|
||||
successful = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "success")
|
||||
failed = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "failed")
|
||||
errors = sum(1 for r in results if isinstance(r, Exception))
|
||||
|
||||
summary = {
|
||||
"total_jobs": len(pending_jobs),
|
||||
"successful": successful,
|
||||
"failed": failed + errors,
|
||||
"results": [r if not isinstance(r, Exception) else {"status": "error", "error": str(r)} for r in results]
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"POI refresh jobs processing completed",
|
||||
**summary
|
||||
)
|
||||
|
||||
return summary
|
||||
187
services/external/app/services/poi_scheduler.py
vendored
Normal file
187
services/external/app/services/poi_scheduler.py
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
POI Refresh Scheduler
|
||||
|
||||
Background scheduler for periodic POI context refresh.
|
||||
Runs every hour to check for and execute pending POI refresh jobs.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from datetime import datetime, timezone
|
||||
import structlog
|
||||
|
||||
from app.services.poi_refresh_service import POIRefreshService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIRefreshScheduler:
|
||||
"""
|
||||
POI Refresh Scheduler
|
||||
|
||||
Background task that periodically checks for and executes
|
||||
pending POI refresh jobs.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
poi_refresh_service: Optional[POIRefreshService] = None,
|
||||
check_interval_seconds: int = 3600, # 1 hour
|
||||
max_concurrent_jobs: int = 5
|
||||
):
|
||||
"""
|
||||
Initialize POI refresh scheduler.
|
||||
|
||||
Args:
|
||||
poi_refresh_service: POI refresh service instance
|
||||
check_interval_seconds: Seconds between checks (default: 3600 = 1 hour)
|
||||
max_concurrent_jobs: Max concurrent job executions (default: 5)
|
||||
"""
|
||||
self.poi_refresh_service = poi_refresh_service or POIRefreshService()
|
||||
self.check_interval_seconds = check_interval_seconds
|
||||
self.max_concurrent_jobs = max_concurrent_jobs
|
||||
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._running = False
|
||||
|
||||
logger.info(
|
||||
"POI Refresh Scheduler initialized",
|
||||
check_interval_seconds=check_interval_seconds,
|
||||
max_concurrent_jobs=max_concurrent_jobs
|
||||
)
|
||||
|
||||
async def start(self):
|
||||
"""Start the scheduler background task"""
|
||||
if self._running:
|
||||
logger.warning("POI Refresh Scheduler already running")
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._run_scheduler())
|
||||
|
||||
logger.info("POI Refresh Scheduler started")
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the scheduler background task"""
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
self._running = False
|
||||
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
logger.info("POI Refresh Scheduler stopped")
|
||||
|
||||
async def _run_scheduler(self):
|
||||
"""Main scheduler loop"""
|
||||
logger.info("POI Refresh Scheduler loop started")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await self._process_cycle()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI refresh scheduler cycle failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
# Wait for next cycle
|
||||
try:
|
||||
await asyncio.sleep(self.check_interval_seconds)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
|
||||
logger.info("POI Refresh Scheduler loop ended")
|
||||
|
||||
async def _process_cycle(self):
|
||||
"""Process one scheduler cycle"""
|
||||
cycle_start = datetime.now(timezone.utc)
|
||||
|
||||
logger.debug(
|
||||
"POI refresh scheduler cycle started",
|
||||
timestamp=cycle_start.isoformat()
|
||||
)
|
||||
|
||||
# Process pending jobs
|
||||
result = await self.poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=self.max_concurrent_jobs
|
||||
)
|
||||
|
||||
cycle_end = datetime.now(timezone.utc)
|
||||
cycle_duration = (cycle_end - cycle_start).total_seconds()
|
||||
|
||||
if result["total_jobs"] > 0:
|
||||
logger.info(
|
||||
"POI refresh scheduler cycle completed",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"],
|
||||
cycle_duration_seconds=cycle_duration
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"POI refresh scheduler cycle completed (no jobs)",
|
||||
cycle_duration_seconds=cycle_duration
|
||||
)
|
||||
|
||||
async def trigger_immediate_check(self):
|
||||
"""Trigger an immediate check for pending jobs (bypasses schedule)"""
|
||||
logger.info("POI refresh scheduler immediate check triggered")
|
||||
|
||||
try:
|
||||
result = await self.poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=self.max_concurrent_jobs
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI refresh scheduler immediate check completed",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"]
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI refresh scheduler immediate check failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Check if scheduler is running"""
|
||||
return self._running
|
||||
|
||||
|
||||
# Global scheduler instance
|
||||
_scheduler_instance: Optional[POIRefreshScheduler] = None
|
||||
|
||||
|
||||
def get_scheduler() -> POIRefreshScheduler:
|
||||
"""Get global scheduler instance (singleton)"""
|
||||
global _scheduler_instance
|
||||
|
||||
if _scheduler_instance is None:
|
||||
_scheduler_instance = POIRefreshScheduler()
|
||||
|
||||
return _scheduler_instance
|
||||
|
||||
|
||||
async def start_scheduler():
|
||||
"""Start global POI refresh scheduler"""
|
||||
scheduler = get_scheduler()
|
||||
await scheduler.start()
|
||||
|
||||
|
||||
async def stop_scheduler():
|
||||
"""Stop global POI refresh scheduler"""
|
||||
scheduler = get_scheduler()
|
||||
await scheduler.stop()
|
||||
Reference in New Issue
Block a user