Initial commit - production deployment
This commit is contained in:
192
services/training/app/ml/poi_feature_integrator.py
Normal file
192
services/training/app/ml/poi_feature_integrator.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""
|
||||
POI Feature Integrator
|
||||
|
||||
Integrates POI features into ML training pipeline.
|
||||
Fetches POI context from External service and merges features into training data.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
import structlog
|
||||
import pandas as pd
|
||||
|
||||
from shared.clients.external_client import ExternalServiceClient
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class POIFeatureIntegrator:
|
||||
"""
|
||||
POI feature integration for ML training.
|
||||
|
||||
Fetches POI context from External service and adds features
|
||||
to training dataframes for location-based demand forecasting.
|
||||
"""
|
||||
|
||||
def __init__(self, external_client: ExternalServiceClient = None):
|
||||
"""
|
||||
Initialize POI feature integrator.
|
||||
|
||||
Args:
|
||||
external_client: External service client instance (optional)
|
||||
"""
|
||||
if external_client is None:
|
||||
from app.core.config import settings
|
||||
self.external_client = ExternalServiceClient(settings, "training-service")
|
||||
else:
|
||||
self.external_client = external_client
|
||||
|
||||
async def fetch_poi_features(
|
||||
self,
|
||||
tenant_id: str,
|
||||
latitude: float,
|
||||
longitude: float,
|
||||
force_refresh: bool = False
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch POI features for tenant location (optimized for training).
|
||||
|
||||
First checks if POI context exists. If not, returns None without triggering detection.
|
||||
POI detection should be triggered during tenant registration, not during training.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
latitude: Bakery latitude
|
||||
longitude: Bakery longitude
|
||||
force_refresh: Force re-detection (only use if POI context already exists)
|
||||
|
||||
Returns:
|
||||
Dictionary with POI features or None if not available
|
||||
"""
|
||||
try:
|
||||
# Try to get existing POI context first
|
||||
existing_context = await self.external_client.get_poi_context(tenant_id)
|
||||
|
||||
if existing_context:
|
||||
poi_context = existing_context.get("poi_context", {})
|
||||
ml_features = poi_context.get("ml_features", {})
|
||||
|
||||
# Check if stale and force_refresh is requested
|
||||
is_stale = existing_context.get("is_stale", False)
|
||||
|
||||
if not is_stale or not force_refresh:
|
||||
logger.info(
|
||||
"Using existing POI context",
|
||||
tenant_id=tenant_id,
|
||||
is_stale=is_stale,
|
||||
feature_count=len(ml_features)
|
||||
)
|
||||
return ml_features
|
||||
else:
|
||||
logger.info(
|
||||
"POI context is stale and force_refresh=True, refreshing",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
# Only refresh if explicitly requested and context exists
|
||||
detection_result = await self.external_client.detect_poi_for_tenant(
|
||||
tenant_id=tenant_id,
|
||||
latitude=latitude,
|
||||
longitude=longitude,
|
||||
force_refresh=True
|
||||
)
|
||||
|
||||
if detection_result:
|
||||
poi_context = detection_result.get("poi_context", {})
|
||||
ml_features = poi_context.get("ml_features", {})
|
||||
logger.info(
|
||||
"POI refresh completed",
|
||||
tenant_id=tenant_id,
|
||||
feature_count=len(ml_features)
|
||||
)
|
||||
return ml_features
|
||||
else:
|
||||
logger.warning(
|
||||
"POI refresh failed, returning existing features",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return ml_features
|
||||
else:
|
||||
logger.info(
|
||||
"No existing POI context found - POI detection should be triggered during tenant registration",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Error fetching POI features - returning None",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
|
||||
def add_poi_features_to_dataframe(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
poi_features: Dict[str, Any]
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Add POI features to training dataframe.
|
||||
|
||||
POI features are static (don't vary by date), so they're
|
||||
broadcast to all rows in the dataframe.
|
||||
|
||||
Args:
|
||||
df: Training dataframe
|
||||
poi_features: Dictionary of POI ML features
|
||||
|
||||
Returns:
|
||||
Dataframe with POI features added as columns
|
||||
"""
|
||||
if not poi_features:
|
||||
logger.warning("No POI features to add")
|
||||
return df
|
||||
|
||||
logger.info(
|
||||
"Adding POI features to dataframe",
|
||||
feature_count=len(poi_features),
|
||||
dataframe_rows=len(df)
|
||||
)
|
||||
|
||||
# Add each POI feature as a column with constant value
|
||||
for feature_name, feature_value in poi_features.items():
|
||||
df[feature_name] = feature_value
|
||||
|
||||
logger.info(
|
||||
"POI features added successfully",
|
||||
new_columns=list(poi_features.keys())
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
def get_poi_feature_names(self, poi_features: Dict[str, Any]) -> List[str]:
|
||||
"""
|
||||
Get list of POI feature names for model registration.
|
||||
|
||||
Args:
|
||||
poi_features: Dictionary of POI ML features
|
||||
|
||||
Returns:
|
||||
List of feature names
|
||||
"""
|
||||
return list(poi_features.keys()) if poi_features else []
|
||||
|
||||
async def check_poi_service_health(self) -> bool:
|
||||
"""
|
||||
Check if POI service is accessible through the external client.
|
||||
|
||||
Returns:
|
||||
True if service is healthy, False otherwise
|
||||
"""
|
||||
try:
|
||||
# We can test the external service health by attempting to get POI context for a dummy tenant
|
||||
# This will go through the proper authentication and routing
|
||||
dummy_context = await self.external_client.get_poi_context("test-tenant")
|
||||
# If we can successfully make a request (even if it returns None for missing tenant),
|
||||
# it means the service is accessible
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"POI service health check failed",
|
||||
error=str(e)
|
||||
)
|
||||
return False
|
||||
Reference in New Issue
Block a user