193 lines
6.5 KiB
Python
193 lines
6.5 KiB
Python
"""
|
|
POI Feature Integrator
|
|
|
|
Integrates POI features into ML training pipeline.
|
|
Fetches POI context from External service and merges features into training data.
|
|
"""
|
|
|
|
from typing import Dict, Any, Optional, List
|
|
import structlog
|
|
import pandas as pd
|
|
|
|
from shared.clients.external_client import ExternalServiceClient
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class POIFeatureIntegrator:
|
|
"""
|
|
POI feature integration for ML training.
|
|
|
|
Fetches POI context from External service and adds features
|
|
to training dataframes for location-based demand forecasting.
|
|
"""
|
|
|
|
def __init__(self, external_client: ExternalServiceClient = None):
|
|
"""
|
|
Initialize POI feature integrator.
|
|
|
|
Args:
|
|
external_client: External service client instance (optional)
|
|
"""
|
|
if external_client is None:
|
|
from app.core.config import settings
|
|
self.external_client = ExternalServiceClient(settings, "training-service")
|
|
else:
|
|
self.external_client = external_client
|
|
|
|
async def fetch_poi_features(
|
|
self,
|
|
tenant_id: str,
|
|
latitude: float,
|
|
longitude: float,
|
|
force_refresh: bool = False
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Fetch POI features for tenant location (optimized for training).
|
|
|
|
First checks if POI context exists. If not, returns None without triggering detection.
|
|
POI detection should be triggered during tenant registration, not during training.
|
|
|
|
Args:
|
|
tenant_id: Tenant UUID
|
|
latitude: Bakery latitude
|
|
longitude: Bakery longitude
|
|
force_refresh: Force re-detection (only use if POI context already exists)
|
|
|
|
Returns:
|
|
Dictionary with POI features or None if not available
|
|
"""
|
|
try:
|
|
# Try to get existing POI context first
|
|
existing_context = await self.external_client.get_poi_context(tenant_id)
|
|
|
|
if existing_context:
|
|
poi_context = existing_context.get("poi_context", {})
|
|
ml_features = poi_context.get("ml_features", {})
|
|
|
|
# Check if stale and force_refresh is requested
|
|
is_stale = existing_context.get("is_stale", False)
|
|
|
|
if not is_stale or not force_refresh:
|
|
logger.info(
|
|
"Using existing POI context",
|
|
tenant_id=tenant_id,
|
|
is_stale=is_stale,
|
|
feature_count=len(ml_features)
|
|
)
|
|
return ml_features
|
|
else:
|
|
logger.info(
|
|
"POI context is stale and force_refresh=True, refreshing",
|
|
tenant_id=tenant_id
|
|
)
|
|
# Only refresh if explicitly requested and context exists
|
|
detection_result = await self.external_client.detect_poi_for_tenant(
|
|
tenant_id=tenant_id,
|
|
latitude=latitude,
|
|
longitude=longitude,
|
|
force_refresh=True
|
|
)
|
|
|
|
if detection_result:
|
|
poi_context = detection_result.get("poi_context", {})
|
|
ml_features = poi_context.get("ml_features", {})
|
|
logger.info(
|
|
"POI refresh completed",
|
|
tenant_id=tenant_id,
|
|
feature_count=len(ml_features)
|
|
)
|
|
return ml_features
|
|
else:
|
|
logger.warning(
|
|
"POI refresh failed, returning existing features",
|
|
tenant_id=tenant_id
|
|
)
|
|
return ml_features
|
|
else:
|
|
logger.info(
|
|
"No existing POI context found - POI detection should be triggered during tenant registration",
|
|
tenant_id=tenant_id
|
|
)
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.warning(
|
|
"Error fetching POI features - returning None",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
return None
|
|
|
|
def add_poi_features_to_dataframe(
|
|
self,
|
|
df: pd.DataFrame,
|
|
poi_features: Dict[str, Any]
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Add POI features to training dataframe.
|
|
|
|
POI features are static (don't vary by date), so they're
|
|
broadcast to all rows in the dataframe.
|
|
|
|
Args:
|
|
df: Training dataframe
|
|
poi_features: Dictionary of POI ML features
|
|
|
|
Returns:
|
|
Dataframe with POI features added as columns
|
|
"""
|
|
if not poi_features:
|
|
logger.warning("No POI features to add")
|
|
return df
|
|
|
|
logger.info(
|
|
"Adding POI features to dataframe",
|
|
feature_count=len(poi_features),
|
|
dataframe_rows=len(df)
|
|
)
|
|
|
|
# Add each POI feature as a column with constant value
|
|
for feature_name, feature_value in poi_features.items():
|
|
df[feature_name] = feature_value
|
|
|
|
logger.info(
|
|
"POI features added successfully",
|
|
new_columns=list(poi_features.keys())
|
|
)
|
|
|
|
return df
|
|
|
|
def get_poi_feature_names(self, poi_features: Dict[str, Any]) -> List[str]:
|
|
"""
|
|
Get list of POI feature names for model registration.
|
|
|
|
Args:
|
|
poi_features: Dictionary of POI ML features
|
|
|
|
Returns:
|
|
List of feature names
|
|
"""
|
|
return list(poi_features.keys()) if poi_features else []
|
|
|
|
async def check_poi_service_health(self) -> bool:
|
|
"""
|
|
Check if POI service is accessible through the external client.
|
|
|
|
Returns:
|
|
True if service is healthy, False otherwise
|
|
"""
|
|
try:
|
|
# We can test the external service health by attempting to get POI context for a dummy tenant
|
|
# This will go through the proper authentication and routing
|
|
dummy_context = await self.external_client.get_poi_context("test-tenant")
|
|
# If we can successfully make a request (even if it returns None for missing tenant),
|
|
# it means the service is accessible
|
|
return True
|
|
except Exception as e:
|
|
logger.error(
|
|
"POI service health check failed",
|
|
error=str(e)
|
|
)
|
|
return False
|