Add POI feature and imporve the overall backend implementation
This commit is contained in:
302
services/external/app/api/geocoding.py
vendored
Normal file
302
services/external/app/api/geocoding.py
vendored
Normal file
@@ -0,0 +1,302 @@
|
||||
"""
|
||||
Geocoding API Endpoints
|
||||
|
||||
Provides address search, autocomplete, and geocoding via Nominatim.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Query, HTTPException
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
import structlog
|
||||
|
||||
from app.services.nominatim_service import NominatimService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
router = APIRouter(prefix="/api/v1/geocoding", tags=["Geocoding"])
|
||||
|
||||
# Initialize Nominatim service
|
||||
# In production, override with environment variable for self-hosted instance
|
||||
nominatim_service = NominatimService()
|
||||
|
||||
|
||||
# Response Models
|
||||
class AddressResult(BaseModel):
|
||||
"""Address search result"""
|
||||
display_name: str = Field(..., description="Full formatted address")
|
||||
lat: float = Field(..., description="Latitude")
|
||||
lon: float = Field(..., description="Longitude")
|
||||
osm_type: str = Field(..., description="OSM object type")
|
||||
osm_id: int = Field(..., description="OSM object ID")
|
||||
place_id: int = Field(..., description="Nominatim place ID")
|
||||
type: str = Field(..., description="Place type")
|
||||
class_: str = Field(..., alias="class", description="OSM class")
|
||||
address: dict = Field(..., description="Parsed address components")
|
||||
boundingbox: List[str] = Field(..., description="Bounding box coordinates")
|
||||
|
||||
|
||||
class GeocodeResult(BaseModel):
|
||||
"""Geocoding result"""
|
||||
display_name: str = Field(..., description="Full formatted address")
|
||||
lat: float = Field(..., description="Latitude")
|
||||
lon: float = Field(..., description="Longitude")
|
||||
address: dict = Field(..., description="Parsed address components")
|
||||
|
||||
|
||||
class CoordinateValidation(BaseModel):
|
||||
"""Coordinate validation result"""
|
||||
valid: bool = Field(..., description="Whether coordinates are valid")
|
||||
address: Optional[str] = Field(None, description="Address at coordinates if valid")
|
||||
|
||||
|
||||
# Endpoints
|
||||
@router.get(
|
||||
"/search",
|
||||
response_model=List[AddressResult],
|
||||
summary="Search for addresses",
|
||||
description="Search for addresses matching query (autocomplete). Minimum 3 characters required."
|
||||
)
|
||||
async def search_addresses(
|
||||
q: str = Query(..., min_length=3, description="Search query (minimum 3 characters)"),
|
||||
country_code: str = Query("es", description="ISO country code to restrict search"),
|
||||
limit: int = Query(10, ge=1, le=50, description="Maximum number of results")
|
||||
):
|
||||
"""
|
||||
Search for addresses matching the query.
|
||||
|
||||
This endpoint provides autocomplete functionality for address input.
|
||||
Results are restricted to the specified country and sorted by relevance.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/search?q=Gran%20Via%20Madrid&limit=5
|
||||
"""
|
||||
try:
|
||||
results = await nominatim_service.search_address(
|
||||
query=q,
|
||||
country_code=country_code,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Address search request",
|
||||
query=q,
|
||||
country=country_code,
|
||||
result_count=len(results)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Address search failed",
|
||||
query=q,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Address search failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/geocode",
|
||||
response_model=GeocodeResult,
|
||||
summary="Geocode an address",
|
||||
description="Convert an address string to coordinates (lat/lon)"
|
||||
)
|
||||
async def geocode_address(
|
||||
address: str = Query(..., min_length=5, description="Full address to geocode"),
|
||||
country_code: str = Query("es", description="ISO country code")
|
||||
):
|
||||
"""
|
||||
Geocode an address to get coordinates.
|
||||
|
||||
Returns the best matching location for the given address.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/geocode?address=Gran%20Via%2028,%20Madrid
|
||||
"""
|
||||
try:
|
||||
result = await nominatim_service.geocode_address(
|
||||
address=address,
|
||||
country_code=country_code
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Address not found: {address}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Geocoding request",
|
||||
address=address,
|
||||
lat=result["lat"],
|
||||
lon=result["lon"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Geocoding failed",
|
||||
address=address,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Geocoding failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/reverse",
|
||||
response_model=GeocodeResult,
|
||||
summary="Reverse geocode coordinates",
|
||||
description="Convert coordinates (lat/lon) to an address"
|
||||
)
|
||||
async def reverse_geocode(
|
||||
lat: float = Query(..., ge=-90, le=90, description="Latitude"),
|
||||
lon: float = Query(..., ge=-180, le=180, description="Longitude")
|
||||
):
|
||||
"""
|
||||
Reverse geocode coordinates to get address.
|
||||
|
||||
Returns the address at the specified coordinates.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/reverse?lat=40.4168&lon=-3.7038
|
||||
"""
|
||||
try:
|
||||
result = await nominatim_service.reverse_geocode(
|
||||
latitude=lat,
|
||||
longitude=lon
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"No address found at coordinates: {lat}, {lon}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Reverse geocoding request",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
address=result["display_name"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Reverse geocoding failed",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Reverse geocoding failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/validate",
|
||||
response_model=CoordinateValidation,
|
||||
summary="Validate coordinates",
|
||||
description="Check if coordinates point to a valid location"
|
||||
)
|
||||
async def validate_coordinates(
|
||||
lat: float = Query(..., ge=-90, le=90, description="Latitude"),
|
||||
lon: float = Query(..., ge=-180, le=180, description="Longitude")
|
||||
):
|
||||
"""
|
||||
Validate that coordinates point to a real location.
|
||||
|
||||
Returns validation result with address if valid.
|
||||
|
||||
Example:
|
||||
GET /api/v1/geocoding/validate?lat=40.4168&lon=-3.7038
|
||||
"""
|
||||
try:
|
||||
is_valid = await nominatim_service.validate_coordinates(
|
||||
latitude=lat,
|
||||
longitude=lon
|
||||
)
|
||||
|
||||
result = {"valid": is_valid, "address": None}
|
||||
|
||||
if is_valid:
|
||||
geocode_result = await nominatim_service.reverse_geocode(lat, lon)
|
||||
if geocode_result:
|
||||
result["address"] = geocode_result["display_name"]
|
||||
|
||||
logger.info(
|
||||
"Coordinate validation request",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
valid=is_valid
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Coordinate validation failed",
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Coordinate validation failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health",
|
||||
summary="Check geocoding service health",
|
||||
description="Check if Nominatim service is accessible"
|
||||
)
|
||||
async def health_check():
|
||||
"""
|
||||
Check if Nominatim service is accessible.
|
||||
|
||||
Returns service health status.
|
||||
"""
|
||||
try:
|
||||
is_healthy = await nominatim_service.health_check()
|
||||
|
||||
if not is_healthy:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Nominatim service is unavailable"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "nominatim",
|
||||
"base_url": nominatim_service.base_url,
|
||||
"is_public_api": nominatim_service.is_public_api
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Health check failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"Health check failed: {str(e)}"
|
||||
)
|
||||
452
services/external/app/api/poi_context.py
vendored
Normal file
452
services/external/app/api/poi_context.py
vendored
Normal file
@@ -0,0 +1,452 @@
|
||||
"""
|
||||
POI Context API Endpoints
|
||||
|
||||
REST API for POI detection, retrieval, and management.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import Optional
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.poi_detection_service import POIDetectionService
|
||||
from app.services.poi_feature_selector import POIFeatureSelector
|
||||
from app.services.competitor_analyzer import CompetitorAnalyzer
|
||||
from app.services.poi_refresh_service import POIRefreshService
|
||||
from app.repositories.poi_context_repository import POIContextRepository
|
||||
from app.cache.poi_cache_service import POICacheService
|
||||
from app.core.redis_client import get_redis_client
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
router = APIRouter(prefix="/poi-context", tags=["POI Context"])
|
||||
|
||||
|
||||
@router.post("/{tenant_id}/detect")
|
||||
async def detect_pois_for_tenant(
|
||||
tenant_id: str,
|
||||
latitude: float = Query(..., description="Bakery latitude"),
|
||||
longitude: float = Query(..., description="Bakery longitude"),
|
||||
force_refresh: bool = Query(False, description="Force refresh, skip cache"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Detect POIs for a tenant's bakery location.
|
||||
|
||||
Performs automated POI detection using Overpass API, calculates ML features,
|
||||
and stores results for demand forecasting.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
logger.info(
|
||||
"POI detection requested",
|
||||
tenant_id=tenant_id,
|
||||
location=(latitude, longitude),
|
||||
force_refresh=force_refresh
|
||||
)
|
||||
|
||||
try:
|
||||
# Initialize services
|
||||
poi_service = POIDetectionService()
|
||||
feature_selector = POIFeatureSelector()
|
||||
competitor_analyzer = CompetitorAnalyzer()
|
||||
poi_repo = POIContextRepository(db)
|
||||
redis_client = await get_redis_client()
|
||||
cache_service = POICacheService(redis_client)
|
||||
|
||||
# Check cache first (unless force refresh)
|
||||
if not force_refresh:
|
||||
cached_result = await cache_service.get_cached_pois(latitude, longitude)
|
||||
if cached_result:
|
||||
logger.info("Using cached POI results", tenant_id=tenant_id)
|
||||
# Still save to database for this tenant
|
||||
poi_context = await poi_repo.create_or_update(tenant_uuid, cached_result)
|
||||
return {
|
||||
"status": "success",
|
||||
"source": "cache",
|
||||
"poi_context": poi_context.to_dict()
|
||||
}
|
||||
|
||||
# Detect POIs
|
||||
poi_results = await poi_service.detect_pois_for_bakery(
|
||||
latitude, longitude, tenant_id
|
||||
)
|
||||
|
||||
# Select relevant features
|
||||
try:
|
||||
feature_selection = feature_selector.select_relevant_features(
|
||||
poi_results["poi_categories"],
|
||||
tenant_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Feature selection failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
# Provide default feature selection to continue
|
||||
feature_selection = {
|
||||
"features": {},
|
||||
"relevant_categories": [],
|
||||
"relevance_report": [],
|
||||
"total_features": 0,
|
||||
"total_relevant_categories": 0
|
||||
}
|
||||
|
||||
# Analyze competitors specifically
|
||||
try:
|
||||
competitors_data = poi_results["poi_categories"].get("competitors", {})
|
||||
competitor_pois = competitors_data.get("pois", [])
|
||||
competitor_analysis = competitor_analyzer.analyze_competitive_landscape(
|
||||
competitor_pois,
|
||||
(latitude, longitude),
|
||||
tenant_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Competitor analysis failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
# Provide default competitor analysis to continue
|
||||
competitor_analysis = {
|
||||
"competitive_pressure_score": 0.0,
|
||||
"direct_competitors_count": 0,
|
||||
"nearby_competitors_count": 0,
|
||||
"market_competitors_count": 0,
|
||||
"total_competitors_count": 0,
|
||||
"competitive_zone": "low_competition",
|
||||
"market_type": "underserved",
|
||||
"competitive_advantage": "first_mover",
|
||||
"ml_feature_competitive_pressure": 0.0,
|
||||
"ml_feature_has_direct_competitor": 0,
|
||||
"ml_feature_competitor_density_500m": 0,
|
||||
"competitor_details": [],
|
||||
"nearest_competitor": None
|
||||
}
|
||||
|
||||
# Generate competitive insights
|
||||
try:
|
||||
competitive_insights = competitor_analyzer.get_competitive_insights(
|
||||
competitor_analysis
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to generate competitive insights",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
competitive_insights = []
|
||||
|
||||
# Combine results
|
||||
enhanced_results = {
|
||||
**poi_results,
|
||||
"ml_features": feature_selection.get("features", {}),
|
||||
"relevant_categories": feature_selection.get("relevant_categories", []),
|
||||
"relevance_report": feature_selection.get("relevance_report", []),
|
||||
"competitor_analysis": competitor_analysis,
|
||||
"competitive_insights": competitive_insights
|
||||
}
|
||||
|
||||
# Cache results
|
||||
try:
|
||||
await cache_service.cache_poi_results(latitude, longitude, enhanced_results)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to cache POI results",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
# Save to database
|
||||
try:
|
||||
poi_context = await poi_repo.create_or_update(tenant_uuid, enhanced_results)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to save POI context to database",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to save POI context: {str(e)}"
|
||||
)
|
||||
|
||||
# Schedule automatic refresh job (180 days from now)
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
refresh_job = await poi_refresh_service.schedule_refresh_job(
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
session=db
|
||||
)
|
||||
logger.info(
|
||||
"POI refresh job scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(refresh_job.id),
|
||||
scheduled_at=refresh_job.scheduled_at
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to schedule POI refresh job",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI detection completed",
|
||||
tenant_id=tenant_id,
|
||||
total_pois=poi_context.total_pois_detected,
|
||||
relevant_categories=len(feature_selection.get("relevant_categories", []))
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"source": "detection",
|
||||
"poi_context": poi_context.to_dict(),
|
||||
"feature_selection": feature_selection,
|
||||
"competitor_analysis": competitor_analysis,
|
||||
"competitive_insights": competitive_insights
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI detection failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"POI detection failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{tenant_id}")
|
||||
async def get_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get POI context for a tenant.
|
||||
|
||||
Returns stored POI detection results and ML features.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not poi_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
# Check if stale
|
||||
is_stale = poi_context.is_stale()
|
||||
|
||||
return {
|
||||
"poi_context": poi_context.to_dict(),
|
||||
"is_stale": is_stale,
|
||||
"needs_refresh": is_stale
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{tenant_id}/refresh")
|
||||
async def refresh_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Refresh POI context for a tenant.
|
||||
|
||||
Re-detects POIs and updates stored data.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
existing_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not existing_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}. Use detect endpoint first."
|
||||
)
|
||||
|
||||
# Perform detection with force_refresh=True
|
||||
return await detect_pois_for_tenant(
|
||||
tenant_id=tenant_id,
|
||||
latitude=existing_context.latitude,
|
||||
longitude=existing_context.longitude,
|
||||
force_refresh=True,
|
||||
db=db
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{tenant_id}")
|
||||
async def delete_poi_context(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Delete POI context for a tenant.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
deleted = await poi_repo.delete_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not deleted:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"POI context deleted for tenant {tenant_id}"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/feature-importance")
|
||||
async def get_feature_importance(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get feature importance summary for tenant's POI context.
|
||||
|
||||
Shows which POI categories are relevant and their impact scores.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not poi_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
feature_selector = POIFeatureSelector()
|
||||
importance_summary = feature_selector.get_feature_importance_summary(
|
||||
poi_context.poi_detection_results
|
||||
)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"feature_importance": importance_summary,
|
||||
"total_categories": len(importance_summary),
|
||||
"relevant_categories": sum(1 for cat in importance_summary if cat["is_relevant"])
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{tenant_id}/competitor-analysis")
|
||||
async def get_competitor_analysis(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get detailed competitor analysis for tenant location.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
poi_repo = POIContextRepository(db)
|
||||
poi_context = await poi_repo.get_by_tenant_id(tenant_uuid)
|
||||
|
||||
if not poi_context:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"POI context not found for tenant {tenant_id}"
|
||||
)
|
||||
|
||||
competitor_analyzer = CompetitorAnalyzer()
|
||||
competitors = poi_context.poi_detection_results.get("competitors", {}).get("pois", [])
|
||||
|
||||
analysis = competitor_analyzer.analyze_competitive_landscape(
|
||||
competitors,
|
||||
(poi_context.latitude, poi_context.longitude),
|
||||
tenant_id
|
||||
)
|
||||
|
||||
insights = competitor_analyzer.get_competitive_insights(analysis)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"location": {
|
||||
"latitude": poi_context.latitude,
|
||||
"longitude": poi_context.longitude
|
||||
},
|
||||
"competitor_analysis": analysis,
|
||||
"insights": insights
|
||||
}
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def poi_health_check():
|
||||
"""
|
||||
Check POI detection service health.
|
||||
|
||||
Verifies Overpass API accessibility.
|
||||
"""
|
||||
poi_service = POIDetectionService()
|
||||
health = await poi_service.health_check()
|
||||
|
||||
if not health["healthy"]:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail=f"POI detection service unhealthy: {health.get('error', 'Unknown error')}"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "healthy",
|
||||
"overpass_api": health
|
||||
}
|
||||
|
||||
|
||||
@router.get("/cache/stats")
|
||||
async def get_cache_stats():
|
||||
"""
|
||||
Get POI cache statistics.
|
||||
"""
|
||||
try:
|
||||
redis_client = await get_redis_client()
|
||||
cache_service = POICacheService(redis_client)
|
||||
stats = await cache_service.get_cache_stats()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"cache_stats": stats
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Failed to get cache stats", error=str(e))
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get cache stats: {str(e)}"
|
||||
)
|
||||
441
services/external/app/api/poi_refresh_jobs.py
vendored
Normal file
441
services/external/app/api/poi_refresh_jobs.py
vendored
Normal file
@@ -0,0 +1,441 @@
|
||||
"""
|
||||
POI Refresh Jobs API Endpoints
|
||||
|
||||
REST API for managing POI refresh background jobs.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, desc
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, timezone
|
||||
from pydantic import BaseModel, Field
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.services.poi_refresh_service import POIRefreshService
|
||||
from app.services.poi_scheduler import get_scheduler
|
||||
from app.models.poi_refresh_job import POIRefreshJob
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
router = APIRouter(prefix="/poi-refresh-jobs", tags=["POI Refresh Jobs"])
|
||||
|
||||
|
||||
# Response Models
|
||||
class POIRefreshJobResponse(BaseModel):
|
||||
"""POI refresh job response"""
|
||||
id: str
|
||||
tenant_id: str
|
||||
status: str
|
||||
scheduled_at: datetime
|
||||
started_at: Optional[datetime] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
attempt_count: int
|
||||
max_attempts: int
|
||||
pois_detected: Optional[int] = None
|
||||
changes_detected: bool = False
|
||||
change_summary: Optional[dict] = None
|
||||
error_message: Optional[str] = None
|
||||
next_scheduled_at: Optional[datetime] = None
|
||||
duration_seconds: Optional[float] = None
|
||||
is_overdue: bool
|
||||
can_retry: bool
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class ScheduleJobRequest(BaseModel):
|
||||
"""Schedule POI refresh job request"""
|
||||
tenant_id: str = Field(..., description="Tenant UUID")
|
||||
latitude: float = Field(..., ge=-90, le=90, description="Bakery latitude")
|
||||
longitude: float = Field(..., ge=-180, le=180, description="Bakery longitude")
|
||||
scheduled_at: Optional[datetime] = Field(None, description="When to run (default: 180 days from now)")
|
||||
|
||||
|
||||
class JobExecutionResult(BaseModel):
|
||||
"""Job execution result"""
|
||||
status: str
|
||||
job_id: str
|
||||
message: Optional[str] = None
|
||||
pois_detected: Optional[int] = None
|
||||
changes_detected: Optional[bool] = None
|
||||
change_summary: Optional[dict] = None
|
||||
duration_seconds: Optional[float] = None
|
||||
next_scheduled_at: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
attempt: Optional[int] = None
|
||||
can_retry: Optional[bool] = None
|
||||
|
||||
|
||||
# Endpoints
|
||||
@router.post(
|
||||
"/schedule",
|
||||
response_model=POIRefreshJobResponse,
|
||||
summary="Schedule POI refresh job",
|
||||
description="Schedule a background job to refresh POI context for a tenant"
|
||||
)
|
||||
async def schedule_refresh_job(
|
||||
request: ScheduleJobRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Schedule a POI refresh job for a tenant.
|
||||
|
||||
Creates a background job that will detect POIs for the tenant's location
|
||||
at the scheduled time. Default schedule is 180 days from now.
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(request.tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
job = await poi_refresh_service.schedule_refresh_job(
|
||||
tenant_id=request.tenant_id,
|
||||
latitude=request.latitude,
|
||||
longitude=request.longitude,
|
||||
scheduled_at=request.scheduled_at,
|
||||
session=db
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI refresh job scheduled via API",
|
||||
tenant_id=request.tenant_id,
|
||||
job_id=str(job.id),
|
||||
scheduled_at=job.scheduled_at
|
||||
)
|
||||
|
||||
return POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to schedule POI refresh job",
|
||||
tenant_id=request.tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to schedule refresh job: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{job_id}",
|
||||
response_model=POIRefreshJobResponse,
|
||||
summary="Get refresh job by ID",
|
||||
description="Retrieve details of a specific POI refresh job"
|
||||
)
|
||||
async def get_refresh_job(
|
||||
job_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get POI refresh job by ID"""
|
||||
try:
|
||||
job_uuid = uuid.UUID(job_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid job_id format")
|
||||
|
||||
result = await db.execute(
|
||||
select(POIRefreshJob).where(POIRefreshJob.id == job_uuid)
|
||||
)
|
||||
job = result.scalar_one_or_none()
|
||||
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
|
||||
|
||||
return POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/tenant/{tenant_id}",
|
||||
response_model=List[POIRefreshJobResponse],
|
||||
summary="Get refresh jobs for tenant",
|
||||
description="Retrieve all POI refresh jobs for a specific tenant"
|
||||
)
|
||||
async def get_tenant_refresh_jobs(
|
||||
tenant_id: str,
|
||||
status: Optional[str] = Query(None, description="Filter by status"),
|
||||
limit: int = Query(50, ge=1, le=200, description="Maximum number of results"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get all POI refresh jobs for a tenant"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid tenant_id format")
|
||||
|
||||
query = select(POIRefreshJob).where(POIRefreshJob.tenant_id == tenant_uuid)
|
||||
|
||||
if status:
|
||||
query = query.where(POIRefreshJob.status == status)
|
||||
|
||||
query = query.order_by(desc(POIRefreshJob.scheduled_at)).limit(limit)
|
||||
|
||||
result = await db.execute(query)
|
||||
jobs = result.scalars().all()
|
||||
|
||||
return [
|
||||
POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
for job in jobs
|
||||
]
|
||||
|
||||
|
||||
@router.post(
|
||||
"/{job_id}/execute",
|
||||
response_model=JobExecutionResult,
|
||||
summary="Execute refresh job",
|
||||
description="Manually trigger execution of a pending POI refresh job"
|
||||
)
|
||||
async def execute_refresh_job(
|
||||
job_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Manually execute a POI refresh job"""
|
||||
try:
|
||||
job_uuid = uuid.UUID(job_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=400, detail="Invalid job_id format")
|
||||
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
result = await poi_refresh_service.execute_refresh_job(
|
||||
job_id=job_id,
|
||||
session=db
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI refresh job executed via API",
|
||||
job_id=job_id,
|
||||
status=result["status"]
|
||||
)
|
||||
|
||||
return JobExecutionResult(**result)
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to execute POI refresh job",
|
||||
job_id=job_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to execute refresh job: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/process-pending",
|
||||
summary="Process all pending jobs",
|
||||
description="Manually trigger processing of all pending POI refresh jobs"
|
||||
)
|
||||
async def process_pending_jobs(
|
||||
max_concurrent: int = Query(5, ge=1, le=20, description="Max concurrent executions"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Process all pending POI refresh jobs"""
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
result = await poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=max_concurrent,
|
||||
session=db
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Pending POI refresh jobs processed via API",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to process pending POI refresh jobs",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to process pending jobs: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/pending",
|
||||
response_model=List[POIRefreshJobResponse],
|
||||
summary="Get pending jobs",
|
||||
description="Retrieve all pending POI refresh jobs that are due for execution"
|
||||
)
|
||||
async def get_pending_jobs(
|
||||
limit: int = Query(100, ge=1, le=500, description="Maximum number of results"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get all pending POI refresh jobs"""
|
||||
try:
|
||||
poi_refresh_service = POIRefreshService()
|
||||
jobs = await poi_refresh_service.get_pending_jobs(
|
||||
limit=limit,
|
||||
session=db
|
||||
)
|
||||
|
||||
return [
|
||||
POIRefreshJobResponse(
|
||||
id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
status=job.status,
|
||||
scheduled_at=job.scheduled_at,
|
||||
started_at=job.started_at,
|
||||
completed_at=job.completed_at,
|
||||
attempt_count=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
change_summary=job.change_summary,
|
||||
error_message=job.error_message,
|
||||
next_scheduled_at=job.next_scheduled_at,
|
||||
duration_seconds=job.duration_seconds,
|
||||
is_overdue=job.is_overdue,
|
||||
can_retry=job.can_retry
|
||||
)
|
||||
for job in jobs
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to get pending POI refresh jobs",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get pending jobs: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/trigger-scheduler",
|
||||
summary="Trigger scheduler immediately",
|
||||
description="Trigger an immediate check for pending jobs (bypasses schedule)"
|
||||
)
|
||||
async def trigger_scheduler():
|
||||
"""Trigger POI refresh scheduler immediately"""
|
||||
try:
|
||||
scheduler = get_scheduler()
|
||||
|
||||
if not scheduler.is_running:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="POI refresh scheduler is not running"
|
||||
)
|
||||
|
||||
result = await scheduler.trigger_immediate_check()
|
||||
|
||||
logger.info(
|
||||
"POI refresh scheduler triggered via API",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to trigger POI refresh scheduler",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to trigger scheduler: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/scheduler/status",
|
||||
summary="Get scheduler status",
|
||||
description="Check if POI refresh scheduler is running"
|
||||
)
|
||||
async def get_scheduler_status():
|
||||
"""Get POI refresh scheduler status"""
|
||||
try:
|
||||
scheduler = get_scheduler()
|
||||
|
||||
return {
|
||||
"is_running": scheduler.is_running,
|
||||
"check_interval_seconds": scheduler.check_interval_seconds,
|
||||
"max_concurrent_jobs": scheduler.max_concurrent_jobs
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to get scheduler status",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get scheduler status: {str(e)}"
|
||||
)
|
||||
208
services/external/app/cache/poi_cache_service.py
vendored
Normal file
208
services/external/app/cache/poi_cache_service.py
vendored
Normal file
@@ -0,0 +1,208 @@
|
||||
"""
|
||||
POI Cache Service
|
||||
|
||||
Caches POI detection results to avoid hammering Overpass API.
|
||||
POI data doesn't change frequently, so aggressive caching is appropriate.
|
||||
"""
|
||||
|
||||
from typing import Optional, Dict, Any
|
||||
import json
|
||||
import structlog
|
||||
from datetime import timedelta
|
||||
|
||||
from app.core.poi_config import (
|
||||
POI_CACHE_TTL_DAYS,
|
||||
POI_COORDINATE_PRECISION
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POICacheService:
|
||||
"""
|
||||
Redis-based cache for POI detection results.
|
||||
|
||||
Caches results by rounded coordinates to allow reuse for nearby locations.
|
||||
Reduces load on Overpass API and improves onboarding performance.
|
||||
"""
|
||||
|
||||
def __init__(self, redis_client):
|
||||
"""
|
||||
Initialize cache service.
|
||||
|
||||
Args:
|
||||
redis_client: Redis client instance
|
||||
"""
|
||||
self.redis = redis_client
|
||||
self.cache_ttl_days = POI_CACHE_TTL_DAYS
|
||||
self.coordinate_precision = POI_COORDINATE_PRECISION
|
||||
|
||||
def _generate_cache_key(self, latitude: float, longitude: float) -> str:
|
||||
"""
|
||||
Generate cache key from coordinates.
|
||||
|
||||
Rounds coordinates to specified precision (default 4 decimals ≈ 10m).
|
||||
This allows cache reuse for bakeries in very close proximity.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
|
||||
Returns:
|
||||
Redis cache key
|
||||
"""
|
||||
lat_rounded = round(latitude, self.coordinate_precision)
|
||||
lon_rounded = round(longitude, self.coordinate_precision)
|
||||
return f"poi_cache:{lat_rounded}:{lon_rounded}"
|
||||
|
||||
async def get_cached_pois(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get cached POI results for location.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
|
||||
Returns:
|
||||
Cached POI detection results or None if not cached
|
||||
"""
|
||||
cache_key = self._generate_cache_key(latitude, longitude)
|
||||
|
||||
try:
|
||||
cached_data = await self.redis.get(cache_key)
|
||||
if cached_data:
|
||||
logger.info(
|
||||
"POI cache hit",
|
||||
cache_key=cache_key,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return json.loads(cached_data)
|
||||
else:
|
||||
logger.debug(
|
||||
"POI cache miss",
|
||||
cache_key=cache_key,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to retrieve POI cache",
|
||||
error=str(e),
|
||||
cache_key=cache_key
|
||||
)
|
||||
return None
|
||||
|
||||
async def cache_poi_results(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
poi_data: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""
|
||||
Cache POI detection results.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
poi_data: Complete POI detection results
|
||||
|
||||
Returns:
|
||||
True if cached successfully, False otherwise
|
||||
"""
|
||||
cache_key = self._generate_cache_key(latitude, longitude)
|
||||
ttl_seconds = self.cache_ttl_days * 24 * 60 * 60
|
||||
|
||||
try:
|
||||
await self.redis.setex(
|
||||
cache_key,
|
||||
ttl_seconds,
|
||||
json.dumps(poi_data)
|
||||
)
|
||||
logger.info(
|
||||
"POI results cached",
|
||||
cache_key=cache_key,
|
||||
ttl_days=self.cache_ttl_days,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to cache POI results",
|
||||
error=str(e),
|
||||
cache_key=cache_key
|
||||
)
|
||||
return False
|
||||
|
||||
async def invalidate_cache(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> bool:
|
||||
"""
|
||||
Invalidate cached POI results for location.
|
||||
|
||||
Useful for manual refresh or data corrections.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
|
||||
Returns:
|
||||
True if invalidated successfully
|
||||
"""
|
||||
cache_key = self._generate_cache_key(latitude, longitude)
|
||||
|
||||
try:
|
||||
deleted = await self.redis.delete(cache_key)
|
||||
if deleted:
|
||||
logger.info(
|
||||
"POI cache invalidated",
|
||||
cache_key=cache_key,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
return bool(deleted)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to invalidate POI cache",
|
||||
error=str(e),
|
||||
cache_key=cache_key
|
||||
)
|
||||
return False
|
||||
|
||||
async def get_cache_stats(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get cache statistics.
|
||||
|
||||
Returns:
|
||||
Dictionary with cache stats (key count, memory usage, etc.)
|
||||
"""
|
||||
try:
|
||||
# Count POI cache keys
|
||||
pattern = "poi_cache:*"
|
||||
cursor = 0
|
||||
key_count = 0
|
||||
|
||||
while True:
|
||||
cursor, keys = await self.redis.scan(
|
||||
cursor=cursor,
|
||||
match=pattern,
|
||||
count=100
|
||||
)
|
||||
key_count += len(keys)
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
return {
|
||||
"total_cached_locations": key_count,
|
||||
"cache_ttl_days": self.cache_ttl_days,
|
||||
"coordinate_precision": self.coordinate_precision
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Failed to get cache stats", error=str(e))
|
||||
return {
|
||||
"error": str(e)
|
||||
}
|
||||
181
services/external/app/core/poi_config.py
vendored
Normal file
181
services/external/app/core/poi_config.py
vendored
Normal file
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
POI Detection Configuration
|
||||
|
||||
Defines POI categories, search parameters, and relevance thresholds
|
||||
for automated Point of Interest detection and feature engineering.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class POICategory:
|
||||
"""POI category definition with OSM query and ML parameters"""
|
||||
name: str
|
||||
osm_query: str
|
||||
search_radius_m: int
|
||||
weight: float # Importance weight for ML model (positive or negative)
|
||||
description: str
|
||||
|
||||
|
||||
# POI Category Definitions based on OpenStreetMap tags
|
||||
# Research-based search radii and weights for bakery demand forecasting
|
||||
POI_CATEGORIES: Dict[str, POICategory] = {
|
||||
"schools": POICategory(
|
||||
name="schools",
|
||||
osm_query='["amenity"~"school|kindergarten|university|college"]',
|
||||
search_radius_m=500,
|
||||
weight=1.5, # High positive impact - morning drop-off rush
|
||||
description="Educational institutions causing morning/afternoon rush patterns"
|
||||
),
|
||||
"offices": POICategory(
|
||||
name="offices",
|
||||
osm_query='["office"]',
|
||||
search_radius_m=800,
|
||||
weight=1.3, # Positive impact - weekday lunch/breakfast demand
|
||||
description="Office buildings and business centers"
|
||||
),
|
||||
"gyms_sports": POICategory(
|
||||
name="gyms_sports",
|
||||
osm_query='["leisure"~"fitness_centre|sports_centre|stadium"]',
|
||||
search_radius_m=600,
|
||||
weight=0.8, # Moderate impact - morning/evening activity
|
||||
description="Fitness centers and sports facilities"
|
||||
),
|
||||
"residential": POICategory(
|
||||
name="residential",
|
||||
osm_query='["building"~"residential|apartments|house"]',
|
||||
search_radius_m=400,
|
||||
weight=1.0, # Base demand from residents
|
||||
description="Residential buildings and housing"
|
||||
),
|
||||
"tourism": POICategory(
|
||||
name="tourism",
|
||||
osm_query='["tourism"~"attraction|museum|hotel|hostel|guest_house"]',
|
||||
search_radius_m=1000,
|
||||
weight=1.2, # Positive impact - tourist foot traffic
|
||||
description="Tourist attractions, hotels, and points of interest"
|
||||
),
|
||||
"competitors": POICategory(
|
||||
name="competitors",
|
||||
osm_query='["shop"~"bakery|pastry|confectionery"]',
|
||||
search_radius_m=1000,
|
||||
weight=-0.5, # Negative impact - competition pressure
|
||||
description="Competing bakeries and pastry shops"
|
||||
),
|
||||
"transport_hubs": POICategory(
|
||||
name="transport_hubs",
|
||||
osm_query='["public_transport"~"station|stop"]["railway"~"station|subway_entrance|tram_stop"]',
|
||||
search_radius_m=800,
|
||||
weight=1.4, # High impact - commuter foot traffic
|
||||
description="Public transport stations and hubs"
|
||||
),
|
||||
"coworking": POICategory(
|
||||
name="coworking",
|
||||
osm_query='["amenity"="coworking_space"]',
|
||||
search_radius_m=600,
|
||||
weight=1.1, # Moderate-high impact - flexible workers
|
||||
description="Coworking spaces and shared offices"
|
||||
),
|
||||
"retail": POICategory(
|
||||
name="retail",
|
||||
osm_query='["shop"]',
|
||||
search_radius_m=500,
|
||||
weight=0.9, # Moderate impact - general foot traffic
|
||||
description="Retail shops and commercial areas"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# Feature Relevance Thresholds
|
||||
# Determines which POI features are significant enough to include in ML models
|
||||
# Based on retail gravity model research and distance decay patterns
|
||||
RELEVANCE_THRESHOLDS: Dict[str, Dict[str, float]] = {
|
||||
"schools": {
|
||||
"min_proximity_score": 0.5, # At least moderate proximity required
|
||||
"max_distance_to_nearest_m": 500, # Must be within 500m
|
||||
"min_count": 1 # At least 1 school
|
||||
},
|
||||
"offices": {
|
||||
"min_proximity_score": 0.3,
|
||||
"max_distance_to_nearest_m": 800,
|
||||
"min_count": 2 # Offices are common; need multiple for impact
|
||||
},
|
||||
"gyms_sports": {
|
||||
"min_proximity_score": 0.4,
|
||||
"max_distance_to_nearest_m": 600,
|
||||
"min_count": 1
|
||||
},
|
||||
"residential": {
|
||||
"min_proximity_score": 1.0, # High threshold; residential is everywhere in cities
|
||||
"max_distance_to_nearest_m": 400,
|
||||
"min_count": 5 # Need significant residential density
|
||||
},
|
||||
"tourism": {
|
||||
"min_proximity_score": 0.2, # Lower threshold; tourism is high-impact even at distance
|
||||
"max_distance_to_nearest_m": 1000,
|
||||
"min_count": 1
|
||||
},
|
||||
"competitors": {
|
||||
"min_proximity_score": 0.1, # Any competition is relevant (even distant)
|
||||
"max_distance_to_nearest_m": 1000,
|
||||
"min_count": 1
|
||||
},
|
||||
"transport_hubs": {
|
||||
"min_proximity_score": 0.4,
|
||||
"max_distance_to_nearest_m": 800,
|
||||
"min_count": 1
|
||||
},
|
||||
"coworking": {
|
||||
"min_proximity_score": 0.3,
|
||||
"max_distance_to_nearest_m": 600,
|
||||
"min_count": 1
|
||||
},
|
||||
"retail": {
|
||||
"min_proximity_score": 0.8, # Retail is common; higher bar for relevance
|
||||
"max_distance_to_nearest_m": 500,
|
||||
"min_count": 3
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Overpass API Configuration
|
||||
OVERPASS_API_URL = "https://overpass-api.de/api/interpreter"
|
||||
OVERPASS_TIMEOUT_SECONDS = 30
|
||||
OVERPASS_MAX_RETRIES = 4 # Increased from 3 to 4 for better resilience
|
||||
OVERPASS_RETRY_DELAY_SECONDS = 2 # Base delay (will use exponential backoff)
|
||||
|
||||
|
||||
# POI Cache Configuration
|
||||
POI_CACHE_TTL_DAYS = 90 # Cache POI results for 90 days
|
||||
POI_REFRESH_INTERVAL_DAYS = 180 # Refresh every 6 months
|
||||
POI_COORDINATE_PRECISION = 4 # Decimal places for cache key (≈10m precision)
|
||||
|
||||
|
||||
# Distance Bands for Feature Engineering (meters)
|
||||
DISTANCE_BANDS = [
|
||||
(0, 100), # Immediate proximity
|
||||
(100, 300), # Primary catchment (walking distance)
|
||||
(300, 500), # Secondary catchment
|
||||
(500, 1000) # Tertiary catchment
|
||||
]
|
||||
|
||||
|
||||
# Competitive Pressure Zones
|
||||
COMPETITOR_ZONES = {
|
||||
"direct": {
|
||||
"max_distance_m": 100,
|
||||
"pressure_multiplier": -1.0 # Strong negative impact
|
||||
},
|
||||
"nearby": {
|
||||
"max_distance_m": 500,
|
||||
"pressure_multiplier": -0.5 # Moderate negative impact
|
||||
},
|
||||
"market": {
|
||||
"max_distance_m": 1000,
|
||||
"min_count_for_district": 5, # If 5+ bakeries = bakery district
|
||||
"district_multiplier": 0.3, # Positive impact (destination area)
|
||||
"normal_multiplier": -0.2 # Slight negative (competitive market)
|
||||
}
|
||||
}
|
||||
16
services/external/app/core/redis_client.py
vendored
Normal file
16
services/external/app/core/redis_client.py
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Redis Client for POI Service
|
||||
|
||||
Provides access to shared Redis client for POI caching.
|
||||
"""
|
||||
|
||||
from shared.redis_utils import get_redis_client as get_shared_redis_client
|
||||
|
||||
|
||||
async def get_redis_client():
|
||||
"""
|
||||
Get Redis client for POI service.
|
||||
|
||||
Uses shared Redis infrastructure from shared utilities.
|
||||
"""
|
||||
return await get_shared_redis_client()
|
||||
@@ -66,7 +66,7 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
# Parse XML data
|
||||
traffic_points = self.processor.parse_traffic_xml(xml_content)
|
||||
if not traffic_points:
|
||||
self.logger.warning("No traffic points found in XML")
|
||||
self.logger.warning("No traffic points found in XML - API may be temporarily unavailable")
|
||||
return None
|
||||
|
||||
# Find nearest traffic point
|
||||
|
||||
@@ -98,32 +98,53 @@ class MadridAdapter(CityDataAdapter):
|
||||
async def validate_connection(self) -> bool:
|
||||
"""Validate connection to AEMET and Madrid OpenData
|
||||
|
||||
Note: Validation is lenient - passes if traffic API works.
|
||||
Note: Validation is lenient - allows partial failures for temporary API issues.
|
||||
AEMET rate limits may cause weather validation to fail during initialization.
|
||||
Madrid traffic API outages should not block validation entirely.
|
||||
"""
|
||||
try:
|
||||
test_traffic = await self.traffic_client.get_current_traffic(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon
|
||||
)
|
||||
traffic_validation_passed = False
|
||||
weather_validation_passed = False
|
||||
|
||||
# Traffic API must work (critical for operations)
|
||||
if test_traffic is None:
|
||||
logger.error("Traffic API validation failed - this is critical")
|
||||
# Try traffic API first
|
||||
try:
|
||||
test_traffic = await self.traffic_client.get_current_traffic(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon
|
||||
)
|
||||
|
||||
if test_traffic is not None and len(test_traffic) > 0:
|
||||
traffic_validation_passed = True
|
||||
logger.info("Traffic API validation successful")
|
||||
else:
|
||||
logger.warning("Traffic API validation failed - temporary unavailability (proceeding anyway)")
|
||||
except Exception as traffic_error:
|
||||
logger.warning("Traffic API validation error (temporary unavailability) - proceeding anyway", error=str(traffic_error))
|
||||
|
||||
# Try weather API
|
||||
try:
|
||||
test_weather = await self.aemet_client.get_current_weather(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon
|
||||
)
|
||||
|
||||
if test_weather is not None:
|
||||
weather_validation_passed = True
|
||||
logger.info("Weather API validation successful")
|
||||
else:
|
||||
logger.warning("Weather API validation failed (likely rate limited) - proceeding anyway")
|
||||
except Exception as weather_error:
|
||||
logger.warning("Weather API validation error - proceeding anyway", error=str(weather_error))
|
||||
|
||||
# At least one validation should pass for basic connectivity
|
||||
if not traffic_validation_passed and not weather_validation_passed:
|
||||
logger.error("Both traffic and weather API validations failed - no connectivity")
|
||||
return False
|
||||
|
||||
# Try weather API, but don't fail validation if rate limited
|
||||
test_weather = await self.aemet_client.get_current_weather(
|
||||
self.madrid_lat,
|
||||
self.madrid_lon
|
||||
)
|
||||
|
||||
if test_weather is None:
|
||||
logger.warning("Weather API validation failed (likely rate limited) - proceeding anyway")
|
||||
else:
|
||||
logger.info("Weather API validation successful")
|
||||
|
||||
# Pass validation if traffic works (weather can be fetched later)
|
||||
# Return success if at least one API is accessible
|
||||
logger.info("Adapter connection validation passed",
|
||||
traffic_valid=traffic_validation_passed,
|
||||
weather_valid=weather_validation_passed)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -58,7 +58,17 @@ class DataIngestionManager:
|
||||
failures=failures
|
||||
)
|
||||
|
||||
return successes == len(results)
|
||||
# Consider success if we have at least some cities initialized (majority success)
|
||||
# This allows the system to continue even if some external APIs are temporarily unavailable
|
||||
if successes > 0:
|
||||
logger.info(
|
||||
"Partial success achieved - continuing with available data",
|
||||
success_ratio=f"{successes}/{len(results)}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
logger.error("All city initializations failed - system cannot proceed")
|
||||
return False
|
||||
|
||||
async def initialize_city(
|
||||
self,
|
||||
@@ -123,23 +133,55 @@ class DataIngestionManager:
|
||||
logger.error("Adapter validation failed", city=city.name)
|
||||
return False
|
||||
|
||||
weather_data = await adapter.fetch_historical_weather(
|
||||
start_date, end_date
|
||||
)
|
||||
# Fetch data with error handling to allow partial success
|
||||
weather_data = []
|
||||
traffic_data = []
|
||||
|
||||
# Fetch weather data
|
||||
try:
|
||||
weather_data = await adapter.fetch_historical_weather(
|
||||
start_date, end_date
|
||||
)
|
||||
logger.info("Weather data fetched successfully",
|
||||
records=len(weather_data), city=city.name)
|
||||
except Exception as weather_error:
|
||||
logger.error("Failed to fetch weather data",
|
||||
city=city.name, error=str(weather_error))
|
||||
# Don't return False here - continue with whatever data we can get
|
||||
|
||||
traffic_data = await adapter.fetch_historical_traffic(
|
||||
start_date, end_date
|
||||
)
|
||||
# Fetch traffic data
|
||||
try:
|
||||
traffic_data = await adapter.fetch_historical_traffic(
|
||||
start_date, end_date
|
||||
)
|
||||
logger.info("Traffic data fetched successfully",
|
||||
records=len(traffic_data), city=city.name)
|
||||
except Exception as traffic_error:
|
||||
logger.error("Failed to fetch traffic data",
|
||||
city=city.name, error=str(traffic_error))
|
||||
# Don't return False here - continue with weather data only if available
|
||||
|
||||
# Store available data (at least one type should be available for partial success)
|
||||
async with self.database_manager.get_session() as session:
|
||||
repo = CityDataRepository(session)
|
||||
|
||||
weather_stored = await repo.bulk_store_weather(
|
||||
city_id, weather_data
|
||||
)
|
||||
traffic_stored = await repo.bulk_store_traffic(
|
||||
city_id, traffic_data
|
||||
)
|
||||
weather_stored = 0
|
||||
traffic_stored = 0
|
||||
|
||||
if weather_data:
|
||||
weather_stored = await repo.bulk_store_weather(
|
||||
city_id, weather_data
|
||||
)
|
||||
|
||||
if traffic_data:
|
||||
traffic_stored = await repo.bulk_store_traffic(
|
||||
city_id, traffic_data
|
||||
)
|
||||
|
||||
# Only fail if both data types failed to fetch
|
||||
if not weather_data and not traffic_data:
|
||||
logger.error("Both weather and traffic data fetch failed", city=city.name)
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
"City initialization complete",
|
||||
|
||||
15
services/external/app/jobs/initialize_data.py
vendored
15
services/external/app/jobs/initialize_data.py
vendored
@@ -29,18 +29,21 @@ async def main(months: int = 24):
|
||||
logger.info("Proceeding to seed school calendars...")
|
||||
calendar_success = await manager.seed_school_calendars()
|
||||
|
||||
# Both must succeed
|
||||
overall_success = weather_traffic_success and calendar_success
|
||||
# Calendar seeding is critical, but weather/traffic can have partial success
|
||||
overall_success = calendar_success and weather_traffic_success
|
||||
|
||||
if overall_success:
|
||||
logger.info("✅ Data initialization completed successfully (weather, traffic, calendars)")
|
||||
sys.exit(0)
|
||||
else:
|
||||
if not weather_traffic_success:
|
||||
logger.error("❌ Weather/traffic initialization failed")
|
||||
if not calendar_success:
|
||||
logger.error("❌ Calendar seeding failed")
|
||||
sys.exit(1)
|
||||
logger.error("❌ Calendar seeding failed - this is critical")
|
||||
sys.exit(1)
|
||||
elif not weather_traffic_success:
|
||||
# Log as warning instead of error if some data was retrieved
|
||||
logger.warning("⚠️ Weather/traffic initialization had partial failures, but system can continue")
|
||||
logger.info("✅ Calendar seeding completed - system can operate with available data")
|
||||
sys.exit(0) # Allow partial success for weather/traffic
|
||||
|
||||
except Exception as e:
|
||||
logger.error("❌ Fatal error during initialization", error=str(e))
|
||||
|
||||
29
services/external/app/main.py
vendored
29
services/external/app/main.py
vendored
@@ -9,8 +9,10 @@ from app.core.config import settings
|
||||
from app.core.database import database_manager
|
||||
from app.services.messaging import setup_messaging, cleanup_messaging
|
||||
from shared.service_base import StandardFastAPIService
|
||||
from shared.redis_utils import initialize_redis, close_redis
|
||||
# Include routers
|
||||
from app.api import weather_data, traffic_data, city_operations, calendar_operations, audit
|
||||
from app.api import weather_data, traffic_data, city_operations, calendar_operations, audit, poi_context, geocoding, poi_refresh_jobs
|
||||
from app.services.poi_scheduler import start_scheduler, stop_scheduler
|
||||
|
||||
|
||||
class ExternalService(StandardFastAPIService):
|
||||
@@ -41,7 +43,8 @@ class ExternalService(StandardFastAPIService):
|
||||
# Define expected database tables for health checks
|
||||
external_expected_tables = [
|
||||
'weather_data', 'weather_forecasts', 'traffic_data',
|
||||
'traffic_measurement_points', 'traffic_background_jobs'
|
||||
'traffic_measurement_points', 'traffic_background_jobs',
|
||||
'tenant_poi_contexts', 'poi_refresh_jobs'
|
||||
]
|
||||
|
||||
# Define custom API checks
|
||||
@@ -146,12 +149,25 @@ class ExternalService(StandardFastAPIService):
|
||||
|
||||
async def on_startup(self, app: FastAPI):
|
||||
"""Custom startup logic for external service"""
|
||||
pass
|
||||
# Initialize Redis connection
|
||||
await initialize_redis(settings.REDIS_URL, db=0, max_connections=50)
|
||||
self.logger.info("Redis initialized for external service")
|
||||
|
||||
# Start POI refresh scheduler
|
||||
await start_scheduler()
|
||||
self.logger.info("POI refresh scheduler started")
|
||||
|
||||
async def on_shutdown(self, app: FastAPI):
|
||||
"""Custom shutdown logic for external service"""
|
||||
# Stop POI refresh scheduler
|
||||
await stop_scheduler()
|
||||
self.logger.info("POI refresh scheduler stopped")
|
||||
|
||||
# Close Redis connection
|
||||
await close_redis()
|
||||
self.logger.info("Redis connection closed")
|
||||
|
||||
# Database cleanup is handled by the base class
|
||||
pass
|
||||
|
||||
def get_service_features(self):
|
||||
"""Return external-specific features"""
|
||||
@@ -182,4 +198,7 @@ service.add_router(audit.router)
|
||||
service.add_router(weather_data.router)
|
||||
service.add_router(traffic_data.router)
|
||||
service.add_router(city_operations.router) # New v2.0 city-based optimized endpoints
|
||||
service.add_router(calendar_operations.router) # School calendars and hyperlocal data
|
||||
service.add_router(calendar_operations.router) # School calendars and hyperlocal data
|
||||
service.add_router(poi_context.router) # POI detection and location-based features
|
||||
service.add_router(geocoding.router) # Address search and geocoding
|
||||
service.add_router(poi_refresh_jobs.router) # POI refresh background jobs
|
||||
123
services/external/app/models/poi_context.py
vendored
Normal file
123
services/external/app/models/poi_context.py
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
"""
|
||||
POI Context Model
|
||||
|
||||
Stores Point of Interest detection results and ML features for bakery locations.
|
||||
Used for location-based demand forecasting with contextual features.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Float, Index, Integer
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class TenantPOIContext(Base):
|
||||
"""
|
||||
POI (Point of Interest) context for bakery location.
|
||||
|
||||
Stores detected POIs around bakery and calculated ML features
|
||||
for demand forecasting with location-specific context.
|
||||
"""
|
||||
|
||||
__tablename__ = "tenant_poi_contexts"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, unique=True, index=True)
|
||||
|
||||
# Location (denormalized for quick reference and spatial queries)
|
||||
latitude = Column(Float, nullable=False)
|
||||
longitude = Column(Float, nullable=False)
|
||||
|
||||
# POI Detection Results (full raw data)
|
||||
# Structure: {
|
||||
# "schools": {
|
||||
# "pois": [{"osm_id": "...", "name": "...", "lat": ..., "lon": ...}],
|
||||
# "features": {"proximity_score": 3.45, "count_0_100m": 2, ...},
|
||||
# "count": 5
|
||||
# },
|
||||
# "offices": {...},
|
||||
# ...
|
||||
# }
|
||||
poi_detection_results = Column(JSONB, nullable=False, default=dict)
|
||||
|
||||
# ML Features (flat structure for easy model ingestion)
|
||||
# Structure: {
|
||||
# "poi_schools_proximity_score": 3.45,
|
||||
# "poi_schools_weighted_proximity_score": 5.18,
|
||||
# "poi_schools_count_0_100m": 2,
|
||||
# "poi_offices_proximity_score": 1.23,
|
||||
# ...
|
||||
# }
|
||||
ml_features = Column(JSONB, nullable=False, default=dict)
|
||||
|
||||
# Summary Statistics
|
||||
total_pois_detected = Column(Integer, default=0)
|
||||
high_impact_categories = Column(JSONB, default=list) # Categories with significant POI presence
|
||||
relevant_categories = Column(JSONB, default=list) # Categories that passed relevance thresholds
|
||||
|
||||
# Detection Metadata
|
||||
detection_timestamp = Column(DateTime(timezone=True), nullable=False)
|
||||
detection_source = Column(String(50), default="overpass_api")
|
||||
detection_status = Column(String(20), default="completed") # completed, failed, partial
|
||||
detection_error = Column(String(500), nullable=True) # Error message if detection failed
|
||||
|
||||
# Data Freshness Strategy
|
||||
# POIs don't change frequently, refresh every 6 months
|
||||
next_refresh_date = Column(DateTime(timezone=True), nullable=True)
|
||||
refresh_interval_days = Column(Integer, default=180) # 6 months default
|
||||
last_refreshed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index('idx_tenant_poi_location', 'latitude', 'longitude'),
|
||||
Index('idx_tenant_poi_refresh', 'next_refresh_date'),
|
||||
Index('idx_tenant_poi_status', 'detection_status'),
|
||||
)
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert to dictionary for API responses"""
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"tenant_id": str(self.tenant_id),
|
||||
"location": {
|
||||
"latitude": self.latitude,
|
||||
"longitude": self.longitude
|
||||
},
|
||||
"poi_detection_results": self.poi_detection_results,
|
||||
"ml_features": self.ml_features,
|
||||
"total_pois_detected": self.total_pois_detected,
|
||||
"high_impact_categories": self.high_impact_categories,
|
||||
"relevant_categories": self.relevant_categories,
|
||||
"detection_timestamp": self.detection_timestamp.isoformat() if self.detection_timestamp else None,
|
||||
"detection_source": self.detection_source,
|
||||
"detection_status": self.detection_status,
|
||||
"detection_error": self.detection_error,
|
||||
"next_refresh_date": self.next_refresh_date.isoformat() if self.next_refresh_date else None,
|
||||
"last_refreshed_at": self.last_refreshed_at.isoformat() if self.last_refreshed_at else None,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None
|
||||
}
|
||||
|
||||
def is_stale(self) -> bool:
|
||||
"""Check if POI data needs refresh"""
|
||||
if not self.next_refresh_date:
|
||||
return True
|
||||
return datetime.now(timezone.utc) > self.next_refresh_date
|
||||
|
||||
def calculate_next_refresh(self) -> datetime:
|
||||
"""Calculate next refresh date based on interval"""
|
||||
return datetime.now(timezone.utc) + timedelta(days=self.refresh_interval_days)
|
||||
|
||||
def mark_refreshed(self):
|
||||
"""Mark as refreshed and calculate next refresh date"""
|
||||
self.last_refreshed_at = datetime.now(timezone.utc)
|
||||
self.next_refresh_date = self.calculate_next_refresh()
|
||||
154
services/external/app/models/poi_refresh_job.py
vendored
Normal file
154
services/external/app/models/poi_refresh_job.py
vendored
Normal file
@@ -0,0 +1,154 @@
|
||||
"""
|
||||
POI Refresh Job Model
|
||||
|
||||
Tracks background jobs for periodic POI context refresh.
|
||||
"""
|
||||
|
||||
from sqlalchemy import Column, String, DateTime, Integer, Boolean, Text, Float
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from datetime import datetime, timezone
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class POIRefreshJob(Base):
|
||||
"""
|
||||
POI Refresh Background Job Model
|
||||
|
||||
Tracks periodic POI context refresh jobs for all tenants.
|
||||
Jobs run on a configurable schedule (default: 180 days).
|
||||
"""
|
||||
|
||||
__tablename__ = "poi_refresh_jobs"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
|
||||
# Job scheduling
|
||||
scheduled_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="When this job was scheduled"
|
||||
)
|
||||
started_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
comment="When job execution started"
|
||||
)
|
||||
completed_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
comment="When job execution completed"
|
||||
)
|
||||
|
||||
# Job status
|
||||
status = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
default="pending",
|
||||
index=True,
|
||||
comment="Job status: pending, running, completed, failed"
|
||||
)
|
||||
|
||||
# Job execution details
|
||||
attempt_count = Column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
default=0,
|
||||
comment="Number of execution attempts"
|
||||
)
|
||||
max_attempts = Column(
|
||||
Integer,
|
||||
nullable=False,
|
||||
default=3,
|
||||
comment="Maximum number of retry attempts"
|
||||
)
|
||||
|
||||
# Location data (cached for job execution)
|
||||
latitude = Column(
|
||||
Float,
|
||||
nullable=False,
|
||||
comment="Bakery latitude for POI detection"
|
||||
)
|
||||
longitude = Column(
|
||||
Float,
|
||||
nullable=False,
|
||||
comment="Bakery longitude for POI detection"
|
||||
)
|
||||
|
||||
# Results
|
||||
pois_detected = Column(
|
||||
Integer,
|
||||
nullable=True,
|
||||
comment="Number of POIs detected in this refresh"
|
||||
)
|
||||
changes_detected = Column(
|
||||
Boolean,
|
||||
default=False,
|
||||
comment="Whether significant changes were detected"
|
||||
)
|
||||
change_summary = Column(
|
||||
JSONB,
|
||||
nullable=True,
|
||||
comment="Summary of changes detected"
|
||||
)
|
||||
|
||||
# Error handling
|
||||
error_message = Column(
|
||||
Text,
|
||||
nullable=True,
|
||||
comment="Error message if job failed"
|
||||
)
|
||||
error_details = Column(
|
||||
JSONB,
|
||||
nullable=True,
|
||||
comment="Detailed error information"
|
||||
)
|
||||
|
||||
# Next execution
|
||||
next_scheduled_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=True,
|
||||
index=True,
|
||||
comment="When next refresh should be scheduled"
|
||||
)
|
||||
|
||||
# Metadata
|
||||
created_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(timezone.utc),
|
||||
onupdate=lambda: datetime.now(timezone.utc)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
f"<POIRefreshJob(id={self.id}, tenant_id={self.tenant_id}, "
|
||||
f"status={self.status}, scheduled_at={self.scheduled_at})>"
|
||||
)
|
||||
|
||||
@property
|
||||
def is_overdue(self) -> bool:
|
||||
"""Check if job is overdue for execution"""
|
||||
if self.status in ("completed", "running"):
|
||||
return False
|
||||
return datetime.now(timezone.utc) > self.scheduled_at
|
||||
|
||||
@property
|
||||
def can_retry(self) -> bool:
|
||||
"""Check if job can be retried"""
|
||||
return self.attempt_count < self.max_attempts
|
||||
|
||||
@property
|
||||
def duration_seconds(self) -> float | None:
|
||||
"""Calculate job duration in seconds"""
|
||||
if self.started_at and self.completed_at:
|
||||
return (self.completed_at - self.started_at).total_seconds()
|
||||
return None
|
||||
271
services/external/app/repositories/poi_context_repository.py
vendored
Normal file
271
services/external/app/repositories/poi_context_repository.py
vendored
Normal file
@@ -0,0 +1,271 @@
|
||||
"""
|
||||
POI Context Repository
|
||||
|
||||
Data access layer for TenantPOIContext model.
|
||||
Handles CRUD operations for POI detection results and ML features.
|
||||
"""
|
||||
|
||||
from typing import Optional, List
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import select, update, delete
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.models.poi_context import TenantPOIContext
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIContextRepository:
|
||||
"""
|
||||
Repository for POI context data access.
|
||||
|
||||
Manages storage and retrieval of POI detection results
|
||||
and ML features for tenant locations.
|
||||
"""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
"""
|
||||
Initialize repository.
|
||||
|
||||
Args:
|
||||
session: SQLAlchemy async session
|
||||
"""
|
||||
self.session = session
|
||||
|
||||
async def create(self, poi_context_data: dict) -> TenantPOIContext:
|
||||
"""
|
||||
Create new POI context record.
|
||||
|
||||
Args:
|
||||
poi_context_data: Dictionary with POI context data
|
||||
|
||||
Returns:
|
||||
Created TenantPOIContext instance
|
||||
"""
|
||||
poi_context = TenantPOIContext(
|
||||
tenant_id=poi_context_data["tenant_id"],
|
||||
latitude=poi_context_data["latitude"],
|
||||
longitude=poi_context_data["longitude"],
|
||||
poi_detection_results=poi_context_data.get("poi_detection_results", {}),
|
||||
ml_features=poi_context_data.get("ml_features", {}),
|
||||
total_pois_detected=poi_context_data.get("total_pois_detected", 0),
|
||||
high_impact_categories=poi_context_data.get("high_impact_categories", []),
|
||||
relevant_categories=poi_context_data.get("relevant_categories", []),
|
||||
detection_timestamp=poi_context_data.get(
|
||||
"detection_timestamp",
|
||||
datetime.now(timezone.utc)
|
||||
),
|
||||
detection_source=poi_context_data.get("detection_source", "overpass_api"),
|
||||
detection_status=poi_context_data.get("detection_status", "completed"),
|
||||
detection_error=poi_context_data.get("detection_error"),
|
||||
refresh_interval_days=poi_context_data.get("refresh_interval_days", 180)
|
||||
)
|
||||
|
||||
# Calculate next refresh date
|
||||
poi_context.next_refresh_date = poi_context.calculate_next_refresh()
|
||||
|
||||
self.session.add(poi_context)
|
||||
await self.session.commit()
|
||||
await self.session.refresh(poi_context)
|
||||
|
||||
logger.info(
|
||||
"POI context created",
|
||||
tenant_id=str(poi_context.tenant_id),
|
||||
total_pois=poi_context.total_pois_detected
|
||||
)
|
||||
|
||||
return poi_context
|
||||
|
||||
async def get_by_tenant_id(self, tenant_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
|
||||
"""
|
||||
Get POI context by tenant ID.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
|
||||
Returns:
|
||||
TenantPOIContext or None if not found
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
stmt = select(TenantPOIContext).where(
|
||||
TenantPOIContext.tenant_id == tenant_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_id(self, poi_context_id: str | uuid.UUID) -> Optional[TenantPOIContext]:
|
||||
"""
|
||||
Get POI context by ID.
|
||||
|
||||
Args:
|
||||
poi_context_id: POI context UUID
|
||||
|
||||
Returns:
|
||||
TenantPOIContext or None if not found
|
||||
"""
|
||||
if isinstance(poi_context_id, str):
|
||||
poi_context_id = uuid.UUID(poi_context_id)
|
||||
|
||||
stmt = select(TenantPOIContext).where(
|
||||
TenantPOIContext.id == poi_context_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def update(
|
||||
self,
|
||||
tenant_id: str | uuid.UUID,
|
||||
update_data: dict
|
||||
) -> Optional[TenantPOIContext]:
|
||||
"""
|
||||
Update POI context for tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
update_data: Dictionary with fields to update
|
||||
|
||||
Returns:
|
||||
Updated TenantPOIContext or None if not found
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
poi_context = await self.get_by_tenant_id(tenant_id)
|
||||
if not poi_context:
|
||||
return None
|
||||
|
||||
# Update fields
|
||||
for key, value in update_data.items():
|
||||
if hasattr(poi_context, key):
|
||||
setattr(poi_context, key, value)
|
||||
|
||||
# Update timestamp
|
||||
poi_context.updated_at = datetime.now(timezone.utc)
|
||||
|
||||
await self.session.commit()
|
||||
await self.session.refresh(poi_context)
|
||||
|
||||
logger.info(
|
||||
"POI context updated",
|
||||
tenant_id=str(tenant_id),
|
||||
updated_fields=list(update_data.keys())
|
||||
)
|
||||
|
||||
return poi_context
|
||||
|
||||
async def create_or_update(
|
||||
self,
|
||||
tenant_id: str | uuid.UUID,
|
||||
poi_detection_results: dict
|
||||
) -> TenantPOIContext:
|
||||
"""
|
||||
Create new POI context or update existing one.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
poi_detection_results: Full POI detection results
|
||||
|
||||
Returns:
|
||||
Created or updated TenantPOIContext
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
existing = await self.get_by_tenant_id(tenant_id)
|
||||
|
||||
poi_context_data = {
|
||||
"tenant_id": tenant_id,
|
||||
"latitude": poi_detection_results["location"]["latitude"],
|
||||
"longitude": poi_detection_results["location"]["longitude"],
|
||||
"poi_detection_results": poi_detection_results.get("poi_categories", {}),
|
||||
"ml_features": poi_detection_results.get("ml_features", {}),
|
||||
"total_pois_detected": poi_detection_results.get("summary", {}).get("total_pois_detected", 0),
|
||||
"high_impact_categories": poi_detection_results.get("summary", {}).get("high_impact_categories", []),
|
||||
"relevant_categories": poi_detection_results.get("relevant_categories", []),
|
||||
"detection_timestamp": datetime.fromisoformat(
|
||||
poi_detection_results["detection_timestamp"].replace("Z", "+00:00")
|
||||
) if isinstance(poi_detection_results.get("detection_timestamp"), str)
|
||||
else datetime.now(timezone.utc),
|
||||
"detection_status": poi_detection_results.get("detection_status", "completed"),
|
||||
"detection_error": None if poi_detection_results.get("detection_status") == "completed"
|
||||
else str(poi_detection_results.get("detection_errors"))
|
||||
}
|
||||
|
||||
if existing:
|
||||
# Update existing
|
||||
update_data = {
|
||||
**poi_context_data,
|
||||
"last_refreshed_at": datetime.now(timezone.utc)
|
||||
}
|
||||
existing.mark_refreshed() # Update next_refresh_date
|
||||
return await self.update(tenant_id, update_data)
|
||||
else:
|
||||
# Create new
|
||||
return await self.create(poi_context_data)
|
||||
|
||||
async def delete_by_tenant_id(self, tenant_id: str | uuid.UUID) -> bool:
|
||||
"""
|
||||
Delete POI context for tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
|
||||
Returns:
|
||||
True if deleted, False if not found
|
||||
"""
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
|
||||
stmt = delete(TenantPOIContext).where(
|
||||
TenantPOIContext.tenant_id == tenant_id
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
await self.session.commit()
|
||||
|
||||
deleted = result.rowcount > 0
|
||||
if deleted:
|
||||
logger.info("POI context deleted", tenant_id=str(tenant_id))
|
||||
|
||||
return deleted
|
||||
|
||||
async def get_stale_contexts(self, limit: int = 100) -> List[TenantPOIContext]:
|
||||
"""
|
||||
Get POI contexts that need refresh.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of contexts to return
|
||||
|
||||
Returns:
|
||||
List of stale TenantPOIContext instances
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
stmt = (
|
||||
select(TenantPOIContext)
|
||||
.where(TenantPOIContext.next_refresh_date <= now)
|
||||
.limit(limit)
|
||||
)
|
||||
result = await self.session.execute(stmt)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def count_by_status(self) -> dict:
|
||||
"""
|
||||
Count POI contexts by detection status.
|
||||
|
||||
Returns:
|
||||
Dictionary with counts by status
|
||||
"""
|
||||
from sqlalchemy import func
|
||||
|
||||
stmt = select(
|
||||
TenantPOIContext.detection_status,
|
||||
func.count(TenantPOIContext.id)
|
||||
).group_by(TenantPOIContext.detection_status)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
rows = result.all()
|
||||
|
||||
return {status: count for status, count in rows}
|
||||
269
services/external/app/services/competitor_analyzer.py
vendored
Normal file
269
services/external/app/services/competitor_analyzer.py
vendored
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Competitor Analyzer
|
||||
|
||||
Specialized analysis for competitor bakeries with competitive pressure modeling.
|
||||
Treats competitor proximity differently than other POIs, considering market dynamics.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any, Tuple
|
||||
import structlog
|
||||
from math import radians, sin, cos, sqrt, atan2
|
||||
|
||||
from app.core.poi_config import COMPETITOR_ZONES
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CompetitorAnalyzer:
|
||||
"""
|
||||
Competitive landscape analyzer for bakery locations.
|
||||
|
||||
Models competitive pressure considering:
|
||||
- Direct competition (<100m): Strong negative impact
|
||||
- Nearby competition (100-500m): Moderate negative impact
|
||||
- Market saturation (500-1000m): Can be positive (bakery district)
|
||||
or negative (competitive market)
|
||||
"""
|
||||
|
||||
def analyze_competitive_landscape(
|
||||
self,
|
||||
competitor_pois: List[Dict[str, Any]],
|
||||
bakery_location: Tuple[float, float],
|
||||
tenant_id: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze competitive pressure from nearby bakeries.
|
||||
|
||||
Args:
|
||||
competitor_pois: List of detected competitor POIs
|
||||
bakery_location: Tuple of (latitude, longitude)
|
||||
tenant_id: Optional tenant ID for logging
|
||||
|
||||
Returns:
|
||||
Competitive analysis with pressure scores and market classification
|
||||
"""
|
||||
if not competitor_pois:
|
||||
logger.info(
|
||||
"No competitors detected - underserved market",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return {
|
||||
"competitive_pressure_score": 0.0,
|
||||
"direct_competitors_count": 0,
|
||||
"nearby_competitors_count": 0,
|
||||
"market_competitors_count": 0,
|
||||
"competitive_zone": "low_competition",
|
||||
"market_type": "underserved",
|
||||
"competitive_advantage": "first_mover",
|
||||
"ml_feature_competitive_pressure": 0.0,
|
||||
"ml_feature_has_direct_competitor": 0,
|
||||
"ml_feature_competitor_density_500m": 0,
|
||||
"competitor_details": []
|
||||
}
|
||||
|
||||
# Categorize competitors by distance
|
||||
direct_competitors = [] # <100m
|
||||
nearby_competitors = [] # 100-500m
|
||||
market_competitors = [] # 500-1000m
|
||||
competitor_details = []
|
||||
|
||||
for poi in competitor_pois:
|
||||
distance_m = self._calculate_distance(
|
||||
bakery_location, (poi["lat"], poi["lon"])
|
||||
) * 1000
|
||||
|
||||
competitor_info = {
|
||||
"name": poi.get("name", "Unnamed"),
|
||||
"osm_id": poi.get("osm_id"),
|
||||
"distance_m": round(distance_m, 1),
|
||||
"lat": poi["lat"],
|
||||
"lon": poi["lon"]
|
||||
}
|
||||
|
||||
if distance_m < COMPETITOR_ZONES["direct"]["max_distance_m"]:
|
||||
direct_competitors.append(poi)
|
||||
competitor_info["zone"] = "direct"
|
||||
elif distance_m < COMPETITOR_ZONES["nearby"]["max_distance_m"]:
|
||||
nearby_competitors.append(poi)
|
||||
competitor_info["zone"] = "nearby"
|
||||
elif distance_m < COMPETITOR_ZONES["market"]["max_distance_m"]:
|
||||
market_competitors.append(poi)
|
||||
competitor_info["zone"] = "market"
|
||||
|
||||
competitor_details.append(competitor_info)
|
||||
|
||||
# Calculate competitive pressure score
|
||||
direct_pressure = (
|
||||
len(direct_competitors) *
|
||||
COMPETITOR_ZONES["direct"]["pressure_multiplier"]
|
||||
)
|
||||
nearby_pressure = (
|
||||
len(nearby_competitors) *
|
||||
COMPETITOR_ZONES["nearby"]["pressure_multiplier"]
|
||||
)
|
||||
|
||||
# Market saturation analysis
|
||||
min_for_district = COMPETITOR_ZONES["market"]["min_count_for_district"]
|
||||
if len(market_competitors) >= min_for_district:
|
||||
# Many bakeries = destination area (bakery district)
|
||||
market_pressure = COMPETITOR_ZONES["market"]["district_multiplier"]
|
||||
market_type = "bakery_district"
|
||||
elif len(market_competitors) > 2:
|
||||
market_pressure = COMPETITOR_ZONES["market"]["normal_multiplier"]
|
||||
market_type = "competitive_market"
|
||||
else:
|
||||
market_pressure = 0.0
|
||||
market_type = "normal_market"
|
||||
|
||||
competitive_pressure_score = (
|
||||
direct_pressure + nearby_pressure + market_pressure
|
||||
)
|
||||
|
||||
# Determine competitive zone classification
|
||||
if len(direct_competitors) > 0:
|
||||
competitive_zone = "high_competition"
|
||||
competitive_advantage = "differentiation_required"
|
||||
elif len(nearby_competitors) > 2:
|
||||
competitive_zone = "moderate_competition"
|
||||
competitive_advantage = "quality_focused"
|
||||
else:
|
||||
competitive_zone = "low_competition"
|
||||
competitive_advantage = "local_leader"
|
||||
|
||||
# Sort competitors by distance
|
||||
competitor_details.sort(key=lambda x: x["distance_m"])
|
||||
|
||||
logger.info(
|
||||
"Competitive analysis complete",
|
||||
tenant_id=tenant_id,
|
||||
competitive_zone=competitive_zone,
|
||||
market_type=market_type,
|
||||
total_competitors=len(competitor_pois),
|
||||
direct=len(direct_competitors),
|
||||
nearby=len(nearby_competitors),
|
||||
market=len(market_competitors),
|
||||
pressure_score=competitive_pressure_score
|
||||
)
|
||||
|
||||
return {
|
||||
# Summary scores
|
||||
"competitive_pressure_score": round(competitive_pressure_score, 2),
|
||||
|
||||
# Competitor counts by zone
|
||||
"direct_competitors_count": len(direct_competitors),
|
||||
"nearby_competitors_count": len(nearby_competitors),
|
||||
"market_competitors_count": len(market_competitors),
|
||||
"total_competitors_count": len(competitor_pois),
|
||||
|
||||
# Market classification
|
||||
"competitive_zone": competitive_zone,
|
||||
"market_type": market_type,
|
||||
"competitive_advantage": competitive_advantage,
|
||||
|
||||
# ML features (for model integration)
|
||||
"ml_feature_competitive_pressure": round(competitive_pressure_score, 2),
|
||||
"ml_feature_has_direct_competitor": 1 if len(direct_competitors) > 0 else 0,
|
||||
"ml_feature_competitor_density_500m": (
|
||||
len(direct_competitors) + len(nearby_competitors)
|
||||
),
|
||||
|
||||
# Detailed competitor information
|
||||
"competitor_details": competitor_details,
|
||||
|
||||
# Nearest competitor
|
||||
"nearest_competitor": competitor_details[0] if competitor_details else None
|
||||
}
|
||||
|
||||
def _calculate_distance(
|
||||
self,
|
||||
coord1: Tuple[float, float],
|
||||
coord2: Tuple[float, float]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Haversine distance in kilometers.
|
||||
|
||||
Args:
|
||||
coord1: Tuple of (latitude, longitude)
|
||||
coord2: Tuple of (latitude, longitude)
|
||||
|
||||
Returns:
|
||||
Distance in kilometers
|
||||
"""
|
||||
lat1, lon1 = coord1
|
||||
lat2, lon2 = coord2
|
||||
|
||||
R = 6371 # Earth radius in km
|
||||
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
|
||||
a = (sin(dlat/2)**2 +
|
||||
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
|
||||
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def get_competitive_insights(
|
||||
self,
|
||||
analysis_result: Dict[str, Any]
|
||||
) -> List[str]:
|
||||
"""
|
||||
Generate human-readable competitive insights.
|
||||
|
||||
Args:
|
||||
analysis_result: Result from analyze_competitive_landscape
|
||||
|
||||
Returns:
|
||||
List of insight strings for business intelligence
|
||||
"""
|
||||
insights = []
|
||||
|
||||
zone = analysis_result["competitive_zone"]
|
||||
market = analysis_result["market_type"]
|
||||
pressure = analysis_result["competitive_pressure_score"]
|
||||
direct = analysis_result["direct_competitors_count"]
|
||||
nearby = analysis_result["nearby_competitors_count"]
|
||||
|
||||
# Zone-specific insights
|
||||
if zone == "high_competition":
|
||||
insights.append(
|
||||
f"⚠️ High competition: {direct} direct competitor(s) within 100m. "
|
||||
"Focus on differentiation and quality."
|
||||
)
|
||||
elif zone == "moderate_competition":
|
||||
insights.append(
|
||||
f"Moderate competition: {nearby} nearby competitor(s) within 500m. "
|
||||
"Good opportunity for market share."
|
||||
)
|
||||
else:
|
||||
insights.append(
|
||||
"✅ Low competition: Local market leader opportunity."
|
||||
)
|
||||
|
||||
# Market type insights
|
||||
if market == "bakery_district":
|
||||
insights.append(
|
||||
"📍 Bakery district: High foot traffic area with multiple bakeries. "
|
||||
"Customers actively seek bakery products here."
|
||||
)
|
||||
elif market == "competitive_market":
|
||||
insights.append(
|
||||
"Market has multiple bakeries. Quality and customer service critical."
|
||||
)
|
||||
elif market == "underserved":
|
||||
insights.append(
|
||||
"🎯 Underserved market: Potential for strong customer base growth."
|
||||
)
|
||||
|
||||
# Pressure score insight
|
||||
if pressure < -1.5:
|
||||
insights.append(
|
||||
"Strong competitive pressure expected to impact demand. "
|
||||
"Marketing and differentiation essential."
|
||||
)
|
||||
elif pressure > 0:
|
||||
insights.append(
|
||||
"Positive market dynamics: Location benefits from bakery destination traffic."
|
||||
)
|
||||
|
||||
return insights
|
||||
282
services/external/app/services/nominatim_service.py
vendored
Normal file
282
services/external/app/services/nominatim_service.py
vendored
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
Nominatim Geocoding Service
|
||||
|
||||
Provides address search and geocoding using OpenStreetMap Nominatim API.
|
||||
For development: uses public API (rate-limited)
|
||||
For production: should point to self-hosted Nominatim instance
|
||||
"""
|
||||
|
||||
import httpx
|
||||
from typing import List, Dict, Any, Optional
|
||||
import structlog
|
||||
from asyncio import sleep
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class NominatimService:
|
||||
"""
|
||||
Nominatim geocoding and address search service.
|
||||
|
||||
Uses OpenStreetMap Nominatim API for address autocomplete and geocoding.
|
||||
Respects rate limits and usage policy.
|
||||
"""
|
||||
|
||||
# For development: public API (rate-limited to 1 req/sec)
|
||||
# For production: should be overridden with self-hosted instance
|
||||
DEFAULT_BASE_URL = "https://nominatim.openstreetmap.org"
|
||||
|
||||
def __init__(self, base_url: Optional[str] = None, user_agent: str = "BakeryIA-Forecasting/1.0"):
|
||||
"""
|
||||
Initialize Nominatim service.
|
||||
|
||||
Args:
|
||||
base_url: Nominatim server URL (defaults to public API)
|
||||
user_agent: User agent for API requests (required by Nominatim policy)
|
||||
"""
|
||||
self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip("/")
|
||||
self.user_agent = user_agent
|
||||
self.headers = {
|
||||
"User-Agent": self.user_agent
|
||||
}
|
||||
|
||||
# Rate limiting for public API (1 request per second)
|
||||
self.is_public_api = self.base_url == self.DEFAULT_BASE_URL
|
||||
self.min_request_interval = 1.0 if self.is_public_api else 0.0
|
||||
|
||||
logger.info(
|
||||
"Nominatim service initialized",
|
||||
base_url=self.base_url,
|
||||
is_public_api=self.is_public_api,
|
||||
rate_limit=f"{self.min_request_interval}s" if self.is_public_api else "none"
|
||||
)
|
||||
|
||||
async def search_address(
|
||||
self,
|
||||
query: str,
|
||||
country_code: str = "es",
|
||||
limit: int = 10
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for addresses matching query (autocomplete).
|
||||
|
||||
Args:
|
||||
query: Address search query
|
||||
country_code: ISO country code to restrict search (default: Spain)
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of address suggestions with display_name, lat, lon, osm_id, etc.
|
||||
"""
|
||||
if not query or len(query.strip()) < 3:
|
||||
logger.warning("Search query too short", query=query)
|
||||
return []
|
||||
|
||||
try:
|
||||
# Rate limiting for public API
|
||||
if self.is_public_api:
|
||||
await sleep(self.min_request_interval)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/search",
|
||||
params={
|
||||
"q": query,
|
||||
"format": "json",
|
||||
"addressdetails": 1,
|
||||
"countrycodes": country_code,
|
||||
"limit": limit,
|
||||
"accept-language": "es"
|
||||
},
|
||||
headers=self.headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
results = response.json()
|
||||
|
||||
# Parse and enrich results
|
||||
addresses = []
|
||||
for result in results:
|
||||
addresses.append({
|
||||
"display_name": result.get("display_name"),
|
||||
"lat": float(result.get("lat")),
|
||||
"lon": float(result.get("lon")),
|
||||
"osm_type": result.get("osm_type"),
|
||||
"osm_id": result.get("osm_id"),
|
||||
"place_id": result.get("place_id"),
|
||||
"type": result.get("type"),
|
||||
"class": result.get("class"),
|
||||
"address": result.get("address", {}),
|
||||
"boundingbox": result.get("boundingbox", [])
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Address search completed",
|
||||
query=query,
|
||||
result_count=len(addresses)
|
||||
)
|
||||
|
||||
return addresses
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(
|
||||
"Nominatim API request failed",
|
||||
query=query,
|
||||
error=str(e)
|
||||
)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unexpected error in address search",
|
||||
query=query,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return []
|
||||
|
||||
async def geocode_address(
|
||||
self,
|
||||
address: str,
|
||||
country_code: str = "es"
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Geocode an address to get coordinates.
|
||||
|
||||
Args:
|
||||
address: Full address string
|
||||
country_code: ISO country code
|
||||
|
||||
Returns:
|
||||
Dictionary with lat, lon, display_name, address components or None
|
||||
"""
|
||||
results = await self.search_address(address, country_code, limit=1)
|
||||
|
||||
if not results:
|
||||
logger.warning("No geocoding results found", address=address)
|
||||
return None
|
||||
|
||||
result = results[0]
|
||||
|
||||
logger.info(
|
||||
"Address geocoded successfully",
|
||||
address=address,
|
||||
lat=result["lat"],
|
||||
lon=result["lon"]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
async def reverse_geocode(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Reverse geocode coordinates to get address.
|
||||
|
||||
Args:
|
||||
latitude: Latitude coordinate
|
||||
longitude: Longitude coordinate
|
||||
|
||||
Returns:
|
||||
Dictionary with address information or None
|
||||
"""
|
||||
try:
|
||||
# Rate limiting for public API
|
||||
if self.is_public_api:
|
||||
await sleep(self.min_request_interval)
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/reverse",
|
||||
params={
|
||||
"lat": latitude,
|
||||
"lon": longitude,
|
||||
"format": "json",
|
||||
"addressdetails": 1,
|
||||
"accept-language": "es"
|
||||
},
|
||||
headers=self.headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
address_info = {
|
||||
"display_name": result.get("display_name"),
|
||||
"lat": float(result.get("lat")),
|
||||
"lon": float(result.get("lon")),
|
||||
"osm_type": result.get("osm_type"),
|
||||
"osm_id": result.get("osm_id"),
|
||||
"place_id": result.get("place_id"),
|
||||
"address": result.get("address", {}),
|
||||
"boundingbox": result.get("boundingbox", [])
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Reverse geocoding completed",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
address=address_info["display_name"]
|
||||
)
|
||||
|
||||
return address_info
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(
|
||||
"Nominatim reverse geocoding failed",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Unexpected error in reverse geocoding",
|
||||
lat=latitude,
|
||||
lon=longitude,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return None
|
||||
|
||||
async def validate_coordinates(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float
|
||||
) -> bool:
|
||||
"""
|
||||
Validate that coordinates point to a real location.
|
||||
|
||||
Args:
|
||||
latitude: Latitude to validate
|
||||
longitude: Longitude to validate
|
||||
|
||||
Returns:
|
||||
True if coordinates are valid, False otherwise
|
||||
"""
|
||||
if not (-90 <= latitude <= 90 and -180 <= longitude <= 180):
|
||||
return False
|
||||
|
||||
result = await self.reverse_geocode(latitude, longitude)
|
||||
return result is not None
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""
|
||||
Check if Nominatim service is accessible.
|
||||
|
||||
Returns:
|
||||
True if service is healthy, False otherwise
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/status",
|
||||
params={"format": "json"},
|
||||
headers=self.headers
|
||||
)
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Nominatim health check failed",
|
||||
error=str(e)
|
||||
)
|
||||
return False
|
||||
466
services/external/app/services/poi_detection_service.py
vendored
Normal file
466
services/external/app/services/poi_detection_service.py
vendored
Normal file
@@ -0,0 +1,466 @@
|
||||
"""
|
||||
POI Detection Service
|
||||
|
||||
Automated Point of Interest detection using Overpass API (OpenStreetMap).
|
||||
Detects nearby POIs around bakery locations and generates ML features
|
||||
for location-based demand forecasting.
|
||||
"""
|
||||
|
||||
import overpy
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import asyncio
|
||||
import structlog
|
||||
import httpx
|
||||
from math import radians, sin, cos, sqrt, atan2
|
||||
import random
|
||||
|
||||
from app.core.poi_config import (
|
||||
POI_CATEGORIES,
|
||||
OVERPASS_API_URL,
|
||||
OVERPASS_TIMEOUT_SECONDS,
|
||||
OVERPASS_MAX_RETRIES,
|
||||
OVERPASS_RETRY_DELAY_SECONDS,
|
||||
DISTANCE_BANDS
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIDetectionService:
|
||||
"""
|
||||
Automated POI detection using Overpass API (OpenStreetMap).
|
||||
|
||||
Detects points of interest near bakery locations and calculates
|
||||
ML features for demand forecasting with location-specific context.
|
||||
"""
|
||||
|
||||
def __init__(self, overpass_url: str = OVERPASS_API_URL):
|
||||
self.overpass_url = overpass_url
|
||||
self.api = overpy.Overpass(url=overpass_url)
|
||||
self.timeout = OVERPASS_TIMEOUT_SECONDS
|
||||
|
||||
async def detect_pois_for_bakery(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect all POIs around a bakery location.
|
||||
|
||||
Args:
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
tenant_id: Tenant identifier for logging
|
||||
|
||||
Returns:
|
||||
Complete POI detection results with ML features
|
||||
"""
|
||||
logger.info(
|
||||
"Starting POI detection",
|
||||
tenant_id=tenant_id,
|
||||
location=(latitude, longitude)
|
||||
)
|
||||
|
||||
poi_results = {}
|
||||
detection_errors = []
|
||||
|
||||
# Query each POI category with inter-query delays
|
||||
category_items = list(POI_CATEGORIES.items())
|
||||
for idx, (category_key, category) in enumerate(category_items):
|
||||
try:
|
||||
pois = await self._query_pois_with_retry(
|
||||
latitude,
|
||||
longitude,
|
||||
category.osm_query,
|
||||
category.search_radius_m,
|
||||
category_key
|
||||
)
|
||||
|
||||
# Calculate features for this category
|
||||
features = self._calculate_poi_features(
|
||||
pois,
|
||||
(latitude, longitude),
|
||||
category
|
||||
)
|
||||
|
||||
poi_results[category_key] = {
|
||||
"pois": pois,
|
||||
"features": features,
|
||||
"count": len(pois)
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Detected {category_key}",
|
||||
count=len(pois),
|
||||
proximity_score=features["proximity_score"]
|
||||
)
|
||||
|
||||
# Add delay between categories to respect rate limits
|
||||
# (except after the last category)
|
||||
if idx < len(category_items) - 1:
|
||||
inter_query_delay = 2.0 + random.uniform(0.5, 1.5)
|
||||
await asyncio.sleep(inter_query_delay)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to detect {category_key}",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
detection_errors.append({
|
||||
"category": category_key,
|
||||
"error": str(e)
|
||||
})
|
||||
poi_results[category_key] = {
|
||||
"pois": [],
|
||||
"features": self._get_empty_features(),
|
||||
"count": 0,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
# Add a longer delay after an error before continuing
|
||||
if idx < len(category_items) - 1:
|
||||
error_recovery_delay = 3.0 + random.uniform(1.0, 2.0)
|
||||
await asyncio.sleep(error_recovery_delay)
|
||||
|
||||
# Generate combined ML features
|
||||
ml_features = self._generate_ml_features(poi_results)
|
||||
|
||||
# Generate summary
|
||||
summary = self._generate_summary(poi_results)
|
||||
|
||||
detection_status = "completed" if not detection_errors else (
|
||||
"partial" if len(detection_errors) < len(POI_CATEGORIES) else "failed"
|
||||
)
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"location": {"latitude": latitude, "longitude": longitude},
|
||||
"detection_timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"detection_status": detection_status,
|
||||
"detection_errors": detection_errors if detection_errors else None,
|
||||
"poi_categories": poi_results,
|
||||
"ml_features": ml_features,
|
||||
"summary": summary
|
||||
}
|
||||
|
||||
async def _query_pois_with_retry(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
osm_query: str,
|
||||
radius_m: int,
|
||||
category_key: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query Overpass API with exponential backoff retry logic.
|
||||
|
||||
Implements:
|
||||
- Exponential backoff with jitter
|
||||
- Extended delays for rate limiting errors
|
||||
- Proper error type detection
|
||||
"""
|
||||
last_error = None
|
||||
base_delay = OVERPASS_RETRY_DELAY_SECONDS
|
||||
|
||||
for attempt in range(OVERPASS_MAX_RETRIES):
|
||||
try:
|
||||
return await self._query_pois(
|
||||
latitude, longitude, osm_query, radius_m
|
||||
)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
error_message = str(e).lower()
|
||||
|
||||
# Determine if this is a rate limiting error
|
||||
is_rate_limit = any(phrase in error_message for phrase in [
|
||||
'too many requests',
|
||||
'rate limit',
|
||||
'server load too high',
|
||||
'quota exceeded',
|
||||
'retry later',
|
||||
'429',
|
||||
'503',
|
||||
'504'
|
||||
])
|
||||
|
||||
if attempt < OVERPASS_MAX_RETRIES - 1:
|
||||
# Calculate exponential backoff with jitter
|
||||
# For rate limiting: use longer delays (10-30 seconds)
|
||||
# For other errors: use standard backoff (2-8 seconds)
|
||||
if is_rate_limit:
|
||||
delay = base_delay * (3 ** attempt) + random.uniform(1, 5)
|
||||
delay = min(delay, 30) # Cap at 30 seconds
|
||||
else:
|
||||
delay = base_delay * (2 ** attempt) + random.uniform(0.5, 1.5)
|
||||
delay = min(delay, 10) # Cap at 10 seconds
|
||||
|
||||
logger.warning(
|
||||
f"POI query retry {attempt + 1}/{OVERPASS_MAX_RETRIES}",
|
||||
category=category_key,
|
||||
error=str(e),
|
||||
is_rate_limit=is_rate_limit,
|
||||
retry_delay=f"{delay:.1f}s"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
logger.error(
|
||||
"POI query failed after all retries",
|
||||
category=category_key,
|
||||
error=str(e),
|
||||
is_rate_limit=is_rate_limit
|
||||
)
|
||||
|
||||
raise last_error
|
||||
|
||||
async def _query_pois(
|
||||
self,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
osm_query: str,
|
||||
radius_m: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query Overpass API for POIs in radius.
|
||||
|
||||
Raises:
|
||||
Exception: With descriptive error message from Overpass API
|
||||
"""
|
||||
|
||||
# Build Overpass QL query
|
||||
query = f"""
|
||||
[out:json][timeout:{self.timeout}];
|
||||
(
|
||||
node{osm_query}(around:{radius_m},{latitude},{longitude});
|
||||
way{osm_query}(around:{radius_m},{latitude},{longitude});
|
||||
);
|
||||
out center;
|
||||
"""
|
||||
|
||||
# Execute query (use asyncio thread pool for blocking overpy)
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
self.api.query,
|
||||
query
|
||||
)
|
||||
except overpy.exception.OverpassTooManyRequests as e:
|
||||
# Explicitly handle rate limiting
|
||||
raise Exception("Too many requests - Overpass API rate limit exceeded") from e
|
||||
except overpy.exception.OverpassGatewayTimeout as e:
|
||||
# Query took too long
|
||||
raise Exception("Gateway timeout - query too complex or server busy") from e
|
||||
except overpy.exception.OverpassBadRequest as e:
|
||||
# Query syntax error
|
||||
raise Exception(f"Bad request - invalid query syntax: {str(e)}") from e
|
||||
except Exception as e:
|
||||
# Check if it's an HTTP error with status code
|
||||
error_msg = str(e).lower()
|
||||
if '429' in error_msg or 'too many' in error_msg:
|
||||
raise Exception("Too many requests - rate limit exceeded") from e
|
||||
elif '503' in error_msg or 'load too high' in error_msg:
|
||||
raise Exception("Server load too high - Overpass API overloaded") from e
|
||||
elif '504' in error_msg or 'timeout' in error_msg:
|
||||
raise Exception("Gateway timeout - server busy") from e
|
||||
else:
|
||||
# Re-raise with original message
|
||||
raise
|
||||
|
||||
# Parse results
|
||||
pois = []
|
||||
|
||||
# Process nodes
|
||||
for node in result.nodes:
|
||||
pois.append({
|
||||
"osm_id": str(node.id),
|
||||
"type": "node",
|
||||
"lat": float(node.lat),
|
||||
"lon": float(node.lon),
|
||||
"tags": dict(node.tags),
|
||||
"name": node.tags.get("name", "Unnamed")
|
||||
})
|
||||
|
||||
# Process ways (buildings, areas)
|
||||
for way in result.ways:
|
||||
# Get center point
|
||||
if hasattr(way, 'center_lat') and way.center_lat:
|
||||
lat, lon = float(way.center_lat), float(way.center_lon)
|
||||
else:
|
||||
# Calculate centroid from nodes
|
||||
if way.nodes:
|
||||
lats = [float(node.lat) for node in way.nodes]
|
||||
lons = [float(node.lon) for node in way.nodes]
|
||||
lat = sum(lats) / len(lats)
|
||||
lon = sum(lons) / len(lons)
|
||||
else:
|
||||
continue
|
||||
|
||||
pois.append({
|
||||
"osm_id": str(way.id),
|
||||
"type": "way",
|
||||
"lat": lat,
|
||||
"lon": lon,
|
||||
"tags": dict(way.tags),
|
||||
"name": way.tags.get("name", "Unnamed")
|
||||
})
|
||||
|
||||
return pois
|
||||
|
||||
def _calculate_poi_features(
|
||||
self,
|
||||
pois: List[Dict[str, Any]],
|
||||
bakery_location: Tuple[float, float],
|
||||
category
|
||||
) -> Dict[str, float]:
|
||||
"""Calculate ML features for POI category"""
|
||||
|
||||
if not pois:
|
||||
return self._get_empty_features()
|
||||
|
||||
# Calculate distances
|
||||
distances = []
|
||||
for poi in pois:
|
||||
dist_km = self._haversine_distance(
|
||||
bakery_location,
|
||||
(poi["lat"], poi["lon"])
|
||||
)
|
||||
distances.append(dist_km * 1000) # Convert to meters
|
||||
|
||||
# Feature Tier 1: Proximity Scores (PRIMARY)
|
||||
proximity_score = sum(1.0 / (1.0 + d/1000) for d in distances)
|
||||
weighted_proximity_score = proximity_score * category.weight
|
||||
|
||||
# Feature Tier 2: Distance Band Counts
|
||||
count_0_100m = sum(1 for d in distances if d <= 100)
|
||||
count_100_300m = sum(1 for d in distances if 100 < d <= 300)
|
||||
count_300_500m = sum(1 for d in distances if 300 < d <= 500)
|
||||
count_500_1000m = sum(1 for d in distances if 500 < d <= 1000)
|
||||
|
||||
# Feature Tier 3: Distance to Nearest
|
||||
distance_to_nearest_m = min(distances) if distances else 9999.0
|
||||
|
||||
# Feature Tier 4: Binary Flags
|
||||
has_within_100m = any(d <= 100 for d in distances)
|
||||
has_within_300m = any(d <= 300 for d in distances)
|
||||
has_within_500m = any(d <= 500 for d in distances)
|
||||
|
||||
return {
|
||||
# Tier 1: Proximity scores (PRIMARY for ML)
|
||||
"proximity_score": round(proximity_score, 4),
|
||||
"weighted_proximity_score": round(weighted_proximity_score, 4),
|
||||
|
||||
# Tier 2: Distance bands
|
||||
"count_0_100m": count_0_100m,
|
||||
"count_100_300m": count_100_300m,
|
||||
"count_300_500m": count_300_500m,
|
||||
"count_500_1000m": count_500_1000m,
|
||||
"total_count": len(pois),
|
||||
|
||||
# Tier 3: Distance to nearest
|
||||
"distance_to_nearest_m": round(distance_to_nearest_m, 1),
|
||||
|
||||
# Tier 4: Binary flags
|
||||
"has_within_100m": has_within_100m,
|
||||
"has_within_300m": has_within_300m,
|
||||
"has_within_500m": has_within_500m
|
||||
}
|
||||
|
||||
def _generate_ml_features(self, poi_results: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""
|
||||
Generate flat feature dictionary for ML model ingestion.
|
||||
|
||||
These features will be added to Prophet/XGBoost as regressors.
|
||||
"""
|
||||
ml_features = {}
|
||||
|
||||
for category_key, data in poi_results.items():
|
||||
features = data.get("features", {})
|
||||
|
||||
# Flatten with category prefix
|
||||
for feature_name, value in features.items():
|
||||
ml_feature_name = f"poi_{category_key}_{feature_name}"
|
||||
# Convert boolean to int for ML
|
||||
if isinstance(value, bool):
|
||||
value = 1 if value else 0
|
||||
ml_features[ml_feature_name] = value
|
||||
|
||||
return ml_features
|
||||
|
||||
def _get_empty_features(self) -> Dict[str, float]:
|
||||
"""Return zero features when no POIs found"""
|
||||
return {
|
||||
"proximity_score": 0.0,
|
||||
"weighted_proximity_score": 0.0,
|
||||
"count_0_100m": 0,
|
||||
"count_100_300m": 0,
|
||||
"count_300_500m": 0,
|
||||
"count_500_1000m": 0,
|
||||
"total_count": 0,
|
||||
"distance_to_nearest_m": 9999.0,
|
||||
"has_within_100m": False,
|
||||
"has_within_300m": False,
|
||||
"has_within_500m": False
|
||||
}
|
||||
|
||||
def _haversine_distance(
|
||||
self,
|
||||
coord1: Tuple[float, float],
|
||||
coord2: Tuple[float, float]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate distance between two coordinates in kilometers.
|
||||
|
||||
Uses Haversine formula for great-circle distance.
|
||||
"""
|
||||
lat1, lon1 = coord1
|
||||
lat2, lon2 = coord2
|
||||
|
||||
R = 6371 # Earth radius in km
|
||||
|
||||
dlat = radians(lat2 - lat1)
|
||||
dlon = radians(lon2 - lon1)
|
||||
|
||||
a = (sin(dlat/2)**2 +
|
||||
cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2)
|
||||
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
||||
|
||||
return R * c
|
||||
|
||||
def _generate_summary(self, poi_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate human-readable summary"""
|
||||
total_pois = sum(r["count"] for r in poi_results.values())
|
||||
categories_with_pois = [
|
||||
k for k, v in poi_results.items() if v["count"] > 0
|
||||
]
|
||||
high_impact_categories = [
|
||||
k for k, v in poi_results.items()
|
||||
if v["features"]["proximity_score"] > 2.0
|
||||
]
|
||||
|
||||
return {
|
||||
"total_pois_detected": total_pois,
|
||||
"categories_with_pois": categories_with_pois,
|
||||
"high_impact_categories": high_impact_categories,
|
||||
"categories_count": len(categories_with_pois)
|
||||
}
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Check if Overpass API is accessible"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
response = await client.get(f"{self.overpass_url}/status")
|
||||
is_healthy = response.status_code == 200
|
||||
return {
|
||||
"healthy": is_healthy,
|
||||
"status_code": response.status_code,
|
||||
"url": self.overpass_url
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"healthy": False,
|
||||
"error": str(e),
|
||||
"url": self.overpass_url
|
||||
}
|
||||
184
services/external/app/services/poi_feature_selector.py
vendored
Normal file
184
services/external/app/services/poi_feature_selector.py
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
POI Feature Selector
|
||||
|
||||
Determines which POI features are relevant for ML model inclusion.
|
||||
Filters out low-signal features to prevent model noise and overfitting.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any
|
||||
import structlog
|
||||
|
||||
from app.core.poi_config import RELEVANCE_THRESHOLDS
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIFeatureSelector:
|
||||
"""
|
||||
Feature relevance engine for POI-based ML features.
|
||||
|
||||
Applies research-based thresholds to filter out irrelevant POI features
|
||||
that would add noise to bakery-specific demand forecasting models.
|
||||
"""
|
||||
|
||||
def __init__(self, thresholds: Dict[str, Dict[str, float]] = None):
|
||||
"""
|
||||
Initialize feature selector.
|
||||
|
||||
Args:
|
||||
thresholds: Custom relevance thresholds (defaults to RELEVANCE_THRESHOLDS)
|
||||
"""
|
||||
self.thresholds = thresholds or RELEVANCE_THRESHOLDS
|
||||
|
||||
def select_relevant_features(
|
||||
self,
|
||||
poi_detection_results: Dict[str, Any],
|
||||
tenant_id: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Filter POI features based on relevance thresholds.
|
||||
|
||||
Only includes features for POI categories that pass relevance tests.
|
||||
This prevents adding noise to ML models for bakeries where certain
|
||||
POI categories are not significant.
|
||||
|
||||
Args:
|
||||
poi_detection_results: Full POI detection results
|
||||
tenant_id: Optional tenant ID for logging
|
||||
|
||||
Returns:
|
||||
Dictionary with relevant features and detailed relevance report
|
||||
"""
|
||||
relevant_features = {}
|
||||
relevance_report = []
|
||||
relevant_categories = []
|
||||
|
||||
for category_key, data in poi_detection_results.items():
|
||||
features = data.get("features", {})
|
||||
thresholds = self.thresholds.get(category_key, {})
|
||||
|
||||
if not thresholds:
|
||||
logger.warning(
|
||||
f"No thresholds defined for category {category_key}",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
continue
|
||||
|
||||
# Check relevance criteria
|
||||
is_relevant, rejection_reason = self._check_relevance(
|
||||
features, thresholds, category_key
|
||||
)
|
||||
|
||||
if is_relevant:
|
||||
# Include features with category prefix
|
||||
for feature_name, value in features.items():
|
||||
ml_feature_name = f"poi_{category_key}_{feature_name}"
|
||||
# Convert boolean to int for ML
|
||||
if isinstance(value, bool):
|
||||
value = 1 if value else 0
|
||||
relevant_features[ml_feature_name] = value
|
||||
|
||||
relevant_categories.append(category_key)
|
||||
relevance_report.append({
|
||||
"category": category_key,
|
||||
"relevant": True,
|
||||
"reason": "Passes all relevance thresholds",
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
|
||||
})
|
||||
else:
|
||||
relevance_report.append({
|
||||
"category": category_key,
|
||||
"relevant": False,
|
||||
"reason": rejection_reason,
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999)
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"POI feature selection complete",
|
||||
tenant_id=tenant_id,
|
||||
total_categories=len(poi_detection_results),
|
||||
relevant_categories=len(relevant_categories),
|
||||
rejected_categories=len(poi_detection_results) - len(relevant_categories)
|
||||
)
|
||||
|
||||
return {
|
||||
"features": relevant_features,
|
||||
"relevant_categories": relevant_categories,
|
||||
"relevance_report": relevance_report,
|
||||
"total_features": len(relevant_features),
|
||||
"total_relevant_categories": len(relevant_categories)
|
||||
}
|
||||
|
||||
def _check_relevance(
|
||||
self,
|
||||
features: Dict[str, Any],
|
||||
thresholds: Dict[str, float],
|
||||
category_key: str
|
||||
) -> tuple[bool, str]:
|
||||
"""
|
||||
Check if POI category passes relevance thresholds.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_relevant, rejection_reason)
|
||||
"""
|
||||
# Criterion 1: Proximity score
|
||||
min_proximity = thresholds.get("min_proximity_score", 0)
|
||||
actual_proximity = features.get("proximity_score", 0)
|
||||
if actual_proximity < min_proximity:
|
||||
return False, f"Proximity score too low ({actual_proximity:.2f} < {min_proximity})"
|
||||
|
||||
# Criterion 2: Distance to nearest
|
||||
max_distance = thresholds.get("max_distance_to_nearest_m", 9999)
|
||||
actual_distance = features.get("distance_to_nearest_m", 9999)
|
||||
if actual_distance > max_distance:
|
||||
return False, f"Nearest POI too far ({actual_distance:.0f}m > {max_distance}m)"
|
||||
|
||||
# Criterion 3: Count threshold
|
||||
min_count = thresholds.get("min_count", 0)
|
||||
actual_count = features.get("total_count", 0)
|
||||
if actual_count < min_count:
|
||||
return False, f"Count too low ({actual_count} < {min_count})"
|
||||
|
||||
return True, "Passes all thresholds"
|
||||
|
||||
def get_feature_importance_summary(
|
||||
self,
|
||||
poi_detection_results: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate summary of feature importance for all categories.
|
||||
|
||||
Useful for understanding POI landscape around a bakery.
|
||||
"""
|
||||
summary = []
|
||||
|
||||
for category_key, data in poi_detection_results.items():
|
||||
features = data.get("features", {})
|
||||
thresholds = self.thresholds.get(category_key, {})
|
||||
|
||||
is_relevant, reason = self._check_relevance(
|
||||
features, thresholds, category_key
|
||||
) if thresholds else (False, "No thresholds defined")
|
||||
|
||||
summary.append({
|
||||
"category": category_key,
|
||||
"is_relevant": is_relevant,
|
||||
"proximity_score": features.get("proximity_score", 0),
|
||||
"weighted_score": features.get("weighted_proximity_score", 0),
|
||||
"total_count": features.get("total_count", 0),
|
||||
"distance_to_nearest_m": features.get("distance_to_nearest_m", 9999),
|
||||
"has_within_100m": features.get("has_within_100m", False),
|
||||
"rejection_reason": None if is_relevant else reason
|
||||
})
|
||||
|
||||
# Sort by relevance and proximity score
|
||||
summary.sort(
|
||||
key=lambda x: (x["is_relevant"], x["proximity_score"]),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return summary
|
||||
468
services/external/app/services/poi_refresh_service.py
vendored
Normal file
468
services/external/app/services/poi_refresh_service.py
vendored
Normal file
@@ -0,0 +1,468 @@
|
||||
"""
|
||||
POI Refresh Service
|
||||
|
||||
Manages periodic POI context refresh jobs.
|
||||
Detects changes in POI landscape and updates tenant POI contexts.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional, Dict, Any, List
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, or_
|
||||
import structlog
|
||||
|
||||
from app.models.poi_refresh_job import POIRefreshJob
|
||||
from app.models.poi_context import TenantPOIContext
|
||||
from app.services.poi_detection_service import POIDetectionService
|
||||
from app.core.database import database_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIRefreshService:
|
||||
"""
|
||||
POI Refresh Service
|
||||
|
||||
Manages background jobs for periodic POI context refresh.
|
||||
Default refresh cycle: 180 days (6 months).
|
||||
"""
|
||||
|
||||
DEFAULT_REFRESH_INTERVAL_DAYS = 180
|
||||
DEFAULT_MAX_ATTEMPTS = 3
|
||||
STALE_THRESHOLD_DAYS = 180
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
poi_detection_service: Optional[POIDetectionService] = None,
|
||||
refresh_interval_days: int = DEFAULT_REFRESH_INTERVAL_DAYS
|
||||
):
|
||||
"""
|
||||
Initialize POI refresh service.
|
||||
|
||||
Args:
|
||||
poi_detection_service: POI detection service instance
|
||||
refresh_interval_days: Days between POI refreshes (default: 180)
|
||||
"""
|
||||
self.poi_detection_service = poi_detection_service or POIDetectionService()
|
||||
self.refresh_interval_days = refresh_interval_days
|
||||
|
||||
logger.info(
|
||||
"POI Refresh Service initialized",
|
||||
refresh_interval_days=refresh_interval_days
|
||||
)
|
||||
|
||||
async def schedule_refresh_job(
|
||||
self,
|
||||
tenant_id: str,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
scheduled_at: Optional[datetime] = None,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> POIRefreshJob:
|
||||
"""
|
||||
Schedule a POI refresh job for a tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
scheduled_at: When to run the job (default: now + refresh_interval)
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Created POIRefreshJob
|
||||
"""
|
||||
if scheduled_at is None:
|
||||
scheduled_at = datetime.now(timezone.utc) + timedelta(
|
||||
days=self.refresh_interval_days
|
||||
)
|
||||
|
||||
async def _create_job(db_session: AsyncSession):
|
||||
# Check if pending job already exists
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob).where(
|
||||
and_(
|
||||
POIRefreshJob.tenant_id == tenant_id,
|
||||
POIRefreshJob.status.in_(["pending", "running"])
|
||||
)
|
||||
)
|
||||
)
|
||||
existing_job = result.scalar_one_or_none()
|
||||
|
||||
if existing_job:
|
||||
logger.info(
|
||||
"POI refresh job already scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(existing_job.id),
|
||||
scheduled_at=existing_job.scheduled_at
|
||||
)
|
||||
return existing_job
|
||||
|
||||
# Create new job
|
||||
job = POIRefreshJob(
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
scheduled_at=scheduled_at,
|
||||
status="pending",
|
||||
max_attempts=self.DEFAULT_MAX_ATTEMPTS
|
||||
)
|
||||
|
||||
db_session.add(job)
|
||||
await db_session.commit()
|
||||
await db_session.refresh(job)
|
||||
|
||||
logger.info(
|
||||
"POI refresh job scheduled",
|
||||
tenant_id=tenant_id,
|
||||
job_id=str(job.id),
|
||||
scheduled_at=scheduled_at
|
||||
)
|
||||
|
||||
return job
|
||||
|
||||
if session:
|
||||
return await _create_job(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _create_job(db_session)
|
||||
|
||||
async def execute_refresh_job(
|
||||
self,
|
||||
job_id: str,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a POI refresh job.
|
||||
|
||||
Args:
|
||||
job_id: Job UUID
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Execution result with status and details
|
||||
"""
|
||||
async def _execute(db_session: AsyncSession):
|
||||
# Load job
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob).where(POIRefreshJob.id == job_id)
|
||||
)
|
||||
job = result.scalar_one_or_none()
|
||||
|
||||
if not job:
|
||||
raise ValueError(f"Job not found: {job_id}")
|
||||
|
||||
if job.status == "running":
|
||||
return {
|
||||
"status": "already_running",
|
||||
"job_id": str(job.id),
|
||||
"message": "Job is already running"
|
||||
}
|
||||
|
||||
if job.status == "completed":
|
||||
return {
|
||||
"status": "already_completed",
|
||||
"job_id": str(job.id),
|
||||
"message": "Job already completed"
|
||||
}
|
||||
|
||||
if not job.can_retry:
|
||||
return {
|
||||
"status": "max_attempts_reached",
|
||||
"job_id": str(job.id),
|
||||
"message": f"Max attempts ({job.max_attempts}) reached"
|
||||
}
|
||||
|
||||
# Update job status
|
||||
job.status = "running"
|
||||
job.started_at = datetime.now(timezone.utc)
|
||||
job.attempt_count += 1
|
||||
await db_session.commit()
|
||||
|
||||
logger.info(
|
||||
"Executing POI refresh job",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count
|
||||
)
|
||||
|
||||
try:
|
||||
# Get existing POI context
|
||||
poi_result = await db_session.execute(
|
||||
select(TenantPOIContext).where(
|
||||
TenantPOIContext.tenant_id == job.tenant_id
|
||||
)
|
||||
)
|
||||
existing_context = poi_result.scalar_one_or_none()
|
||||
|
||||
# Perform POI detection
|
||||
detection_result = await self.poi_detection_service.detect_pois_for_bakery(
|
||||
latitude=job.latitude,
|
||||
longitude=job.longitude,
|
||||
tenant_id=str(job.tenant_id),
|
||||
force_refresh=True
|
||||
)
|
||||
|
||||
# Analyze changes
|
||||
changes = self._analyze_changes(
|
||||
existing_context.poi_detection_results if existing_context else {},
|
||||
detection_result
|
||||
)
|
||||
|
||||
# Update job with results
|
||||
job.status = "completed"
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
job.pois_detected = sum(
|
||||
data.get("count", 0)
|
||||
for data in detection_result.values()
|
||||
)
|
||||
job.changes_detected = changes["has_significant_changes"]
|
||||
job.change_summary = changes
|
||||
|
||||
# Schedule next refresh
|
||||
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(
|
||||
days=self.refresh_interval_days
|
||||
)
|
||||
|
||||
await db_session.commit()
|
||||
|
||||
logger.info(
|
||||
"POI refresh job completed",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
pois_detected=job.pois_detected,
|
||||
changes_detected=job.changes_detected,
|
||||
duration_seconds=job.duration_seconds
|
||||
)
|
||||
|
||||
# Schedule next job
|
||||
await self.schedule_refresh_job(
|
||||
tenant_id=str(job.tenant_id),
|
||||
latitude=job.latitude,
|
||||
longitude=job.longitude,
|
||||
scheduled_at=job.next_scheduled_at,
|
||||
session=db_session
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"job_id": str(job.id),
|
||||
"pois_detected": job.pois_detected,
|
||||
"changes_detected": job.changes_detected,
|
||||
"change_summary": changes,
|
||||
"duration_seconds": job.duration_seconds,
|
||||
"next_scheduled_at": job.next_scheduled_at.isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# Job failed
|
||||
job.status = "failed"
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
job.error_message = str(e)
|
||||
job.error_details = {
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e),
|
||||
"attempt": job.attempt_count
|
||||
}
|
||||
|
||||
# Schedule retry if attempts remaining
|
||||
if job.can_retry:
|
||||
job.next_scheduled_at = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
logger.warning(
|
||||
"POI refresh job failed, will retry",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count,
|
||||
max_attempts=job.max_attempts,
|
||||
error=str(e)
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"POI refresh job failed permanently",
|
||||
job_id=str(job.id),
|
||||
tenant_id=str(job.tenant_id),
|
||||
attempt=job.attempt_count,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
await db_session.commit()
|
||||
|
||||
return {
|
||||
"status": "failed",
|
||||
"job_id": str(job.id),
|
||||
"error": str(e),
|
||||
"attempt": job.attempt_count,
|
||||
"can_retry": job.can_retry
|
||||
}
|
||||
|
||||
if session:
|
||||
return await _execute(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _execute(db_session)
|
||||
|
||||
def _analyze_changes(
|
||||
self,
|
||||
old_results: Dict[str, Any],
|
||||
new_results: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze changes between old and new POI detection results.
|
||||
|
||||
Args:
|
||||
old_results: Previous POI detection results
|
||||
new_results: New POI detection results
|
||||
|
||||
Returns:
|
||||
Change analysis with significance flag
|
||||
"""
|
||||
changes = {
|
||||
"has_significant_changes": False,
|
||||
"category_changes": {},
|
||||
"total_poi_change": 0,
|
||||
"new_categories": [],
|
||||
"removed_categories": []
|
||||
}
|
||||
|
||||
old_categories = set(old_results.keys())
|
||||
new_categories = set(new_results.keys())
|
||||
|
||||
# New categories
|
||||
changes["new_categories"] = list(new_categories - old_categories)
|
||||
|
||||
# Removed categories
|
||||
changes["removed_categories"] = list(old_categories - new_categories)
|
||||
|
||||
# Analyze changes per category
|
||||
for category in new_categories:
|
||||
old_count = old_results.get(category, {}).get("count", 0)
|
||||
new_count = new_results.get(category, {}).get("count", 0)
|
||||
change = new_count - old_count
|
||||
|
||||
if abs(change) > 0:
|
||||
changes["category_changes"][category] = {
|
||||
"old_count": old_count,
|
||||
"new_count": new_count,
|
||||
"change": change,
|
||||
"change_percent": (change / old_count * 100) if old_count > 0 else 100
|
||||
}
|
||||
|
||||
changes["total_poi_change"] += abs(change)
|
||||
|
||||
# Determine if changes are significant
|
||||
# Significant if: 10+ POIs changed OR 20%+ change OR new/removed categories
|
||||
total_old_pois = sum(data.get("count", 0) for data in old_results.values())
|
||||
if total_old_pois > 0:
|
||||
change_percent = (changes["total_poi_change"] / total_old_pois) * 100
|
||||
changes["total_change_percent"] = change_percent
|
||||
|
||||
changes["has_significant_changes"] = (
|
||||
changes["total_poi_change"] >= 10
|
||||
or change_percent >= 20
|
||||
or len(changes["new_categories"]) > 0
|
||||
or len(changes["removed_categories"]) > 0
|
||||
)
|
||||
else:
|
||||
changes["has_significant_changes"] = changes["total_poi_change"] > 0
|
||||
|
||||
return changes
|
||||
|
||||
async def get_pending_jobs(
|
||||
self,
|
||||
limit: int = 100,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> List[POIRefreshJob]:
|
||||
"""
|
||||
Get pending jobs that are due for execution.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of jobs to return
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
List of pending jobs
|
||||
"""
|
||||
async def _get_jobs(db_session: AsyncSession):
|
||||
result = await db_session.execute(
|
||||
select(POIRefreshJob)
|
||||
.where(
|
||||
and_(
|
||||
POIRefreshJob.status == "pending",
|
||||
POIRefreshJob.scheduled_at <= datetime.now(timezone.utc)
|
||||
)
|
||||
)
|
||||
.order_by(POIRefreshJob.scheduled_at)
|
||||
.limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
if session:
|
||||
return await _get_jobs(session)
|
||||
else:
|
||||
async with database_manager.get_session() as db_session:
|
||||
return await _get_jobs(db_session)
|
||||
|
||||
async def process_pending_jobs(
|
||||
self,
|
||||
max_concurrent: int = 5,
|
||||
session: Optional[AsyncSession] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process all pending jobs concurrently.
|
||||
|
||||
Args:
|
||||
max_concurrent: Maximum concurrent job executions
|
||||
session: Database session
|
||||
|
||||
Returns:
|
||||
Processing summary
|
||||
"""
|
||||
pending_jobs = await self.get_pending_jobs(session=session)
|
||||
|
||||
if not pending_jobs:
|
||||
logger.info("No pending POI refresh jobs")
|
||||
return {
|
||||
"total_jobs": 0,
|
||||
"successful": 0,
|
||||
"failed": 0,
|
||||
"results": []
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"Processing pending POI refresh jobs",
|
||||
count=len(pending_jobs),
|
||||
max_concurrent=max_concurrent
|
||||
)
|
||||
|
||||
# Process jobs with concurrency limit
|
||||
semaphore = asyncio.Semaphore(max_concurrent)
|
||||
|
||||
async def process_job(job: POIRefreshJob):
|
||||
async with semaphore:
|
||||
return await self.execute_refresh_job(str(job.id))
|
||||
|
||||
results = await asyncio.gather(
|
||||
*[process_job(job) for job in pending_jobs],
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
# Summarize results
|
||||
successful = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "success")
|
||||
failed = sum(1 for r in results if isinstance(r, dict) and r.get("status") == "failed")
|
||||
errors = sum(1 for r in results if isinstance(r, Exception))
|
||||
|
||||
summary = {
|
||||
"total_jobs": len(pending_jobs),
|
||||
"successful": successful,
|
||||
"failed": failed + errors,
|
||||
"results": [r if not isinstance(r, Exception) else {"status": "error", "error": str(r)} for r in results]
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"POI refresh jobs processing completed",
|
||||
**summary
|
||||
)
|
||||
|
||||
return summary
|
||||
187
services/external/app/services/poi_scheduler.py
vendored
Normal file
187
services/external/app/services/poi_scheduler.py
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
"""
|
||||
POI Refresh Scheduler
|
||||
|
||||
Background scheduler for periodic POI context refresh.
|
||||
Runs every hour to check for and execute pending POI refresh jobs.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from datetime import datetime, timezone
|
||||
import structlog
|
||||
|
||||
from app.services.poi_refresh_service import POIRefreshService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIRefreshScheduler:
|
||||
"""
|
||||
POI Refresh Scheduler
|
||||
|
||||
Background task that periodically checks for and executes
|
||||
pending POI refresh jobs.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
poi_refresh_service: Optional[POIRefreshService] = None,
|
||||
check_interval_seconds: int = 3600, # 1 hour
|
||||
max_concurrent_jobs: int = 5
|
||||
):
|
||||
"""
|
||||
Initialize POI refresh scheduler.
|
||||
|
||||
Args:
|
||||
poi_refresh_service: POI refresh service instance
|
||||
check_interval_seconds: Seconds between checks (default: 3600 = 1 hour)
|
||||
max_concurrent_jobs: Max concurrent job executions (default: 5)
|
||||
"""
|
||||
self.poi_refresh_service = poi_refresh_service or POIRefreshService()
|
||||
self.check_interval_seconds = check_interval_seconds
|
||||
self.max_concurrent_jobs = max_concurrent_jobs
|
||||
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._running = False
|
||||
|
||||
logger.info(
|
||||
"POI Refresh Scheduler initialized",
|
||||
check_interval_seconds=check_interval_seconds,
|
||||
max_concurrent_jobs=max_concurrent_jobs
|
||||
)
|
||||
|
||||
async def start(self):
|
||||
"""Start the scheduler background task"""
|
||||
if self._running:
|
||||
logger.warning("POI Refresh Scheduler already running")
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._run_scheduler())
|
||||
|
||||
logger.info("POI Refresh Scheduler started")
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the scheduler background task"""
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
self._running = False
|
||||
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
logger.info("POI Refresh Scheduler stopped")
|
||||
|
||||
async def _run_scheduler(self):
|
||||
"""Main scheduler loop"""
|
||||
logger.info("POI Refresh Scheduler loop started")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await self._process_cycle()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI refresh scheduler cycle failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
# Wait for next cycle
|
||||
try:
|
||||
await asyncio.sleep(self.check_interval_seconds)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
|
||||
logger.info("POI Refresh Scheduler loop ended")
|
||||
|
||||
async def _process_cycle(self):
|
||||
"""Process one scheduler cycle"""
|
||||
cycle_start = datetime.now(timezone.utc)
|
||||
|
||||
logger.debug(
|
||||
"POI refresh scheduler cycle started",
|
||||
timestamp=cycle_start.isoformat()
|
||||
)
|
||||
|
||||
# Process pending jobs
|
||||
result = await self.poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=self.max_concurrent_jobs
|
||||
)
|
||||
|
||||
cycle_end = datetime.now(timezone.utc)
|
||||
cycle_duration = (cycle_end - cycle_start).total_seconds()
|
||||
|
||||
if result["total_jobs"] > 0:
|
||||
logger.info(
|
||||
"POI refresh scheduler cycle completed",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"],
|
||||
cycle_duration_seconds=cycle_duration
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"POI refresh scheduler cycle completed (no jobs)",
|
||||
cycle_duration_seconds=cycle_duration
|
||||
)
|
||||
|
||||
async def trigger_immediate_check(self):
|
||||
"""Trigger an immediate check for pending jobs (bypasses schedule)"""
|
||||
logger.info("POI refresh scheduler immediate check triggered")
|
||||
|
||||
try:
|
||||
result = await self.poi_refresh_service.process_pending_jobs(
|
||||
max_concurrent=self.max_concurrent_jobs
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"POI refresh scheduler immediate check completed",
|
||||
total_jobs=result["total_jobs"],
|
||||
successful=result["successful"],
|
||||
failed=result["failed"]
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI refresh scheduler immediate check failed",
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
raise
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Check if scheduler is running"""
|
||||
return self._running
|
||||
|
||||
|
||||
# Global scheduler instance
|
||||
_scheduler_instance: Optional[POIRefreshScheduler] = None
|
||||
|
||||
|
||||
def get_scheduler() -> POIRefreshScheduler:
|
||||
"""Get global scheduler instance (singleton)"""
|
||||
global _scheduler_instance
|
||||
|
||||
if _scheduler_instance is None:
|
||||
_scheduler_instance = POIRefreshScheduler()
|
||||
|
||||
return _scheduler_instance
|
||||
|
||||
|
||||
async def start_scheduler():
|
||||
"""Start global POI refresh scheduler"""
|
||||
scheduler = get_scheduler()
|
||||
await scheduler.start()
|
||||
|
||||
|
||||
async def stop_scheduler():
|
||||
"""Stop global POI refresh scheduler"""
|
||||
scheduler = get_scheduler()
|
||||
await scheduler.stop()
|
||||
Reference in New Issue
Block a user