Files
bakery-ia/services/training/app/ml/poi_feature_integrator.py

193 lines
6.5 KiB
Python
Raw Normal View History

"""
POI Feature Integrator
Integrates POI features into ML training pipeline.
Fetches POI context from External service and merges features into training data.
"""
from typing import Dict, Any, Optional, List
import structlog
import pandas as pd
2025-11-14 20:27:39 +01:00
from shared.clients.external_client import ExternalServiceClient
logger = structlog.get_logger()
class POIFeatureIntegrator:
"""
POI feature integration for ML training.
Fetches POI context from External service and adds features
to training dataframes for location-based demand forecasting.
"""
2025-11-14 20:27:39 +01:00
def __init__(self, external_client: ExternalServiceClient = None):
"""
Initialize POI feature integrator.
Args:
2025-11-14 20:27:39 +01:00
external_client: External service client instance (optional)
"""
2025-11-14 20:27:39 +01:00
if external_client is None:
from app.core.config import settings
self.external_client = ExternalServiceClient(settings, "training-service")
else:
self.external_client = external_client
async def fetch_poi_features(
self,
tenant_id: str,
latitude: float,
longitude: float,
force_refresh: bool = False
) -> Optional[Dict[str, Any]]:
"""
2026-01-12 22:15:11 +01:00
Fetch POI features for tenant location (optimized for training).
2026-01-12 22:15:11 +01:00
First checks if POI context exists. If not, returns None without triggering detection.
POI detection should be triggered during tenant registration, not during training.
Args:
tenant_id: Tenant UUID
latitude: Bakery latitude
longitude: Bakery longitude
2026-01-12 22:15:11 +01:00
force_refresh: Force re-detection (only use if POI context already exists)
Returns:
2026-01-12 22:15:11 +01:00
Dictionary with POI features or None if not available
"""
try:
2025-11-14 20:27:39 +01:00
# Try to get existing POI context first
2026-01-12 22:15:11 +01:00
existing_context = await self.external_client.get_poi_context(tenant_id)
if existing_context:
poi_context = existing_context.get("poi_context", {})
ml_features = poi_context.get("ml_features", {})
# Check if stale and force_refresh is requested
is_stale = existing_context.get("is_stale", False)
if not is_stale or not force_refresh:
logger.info(
"Using existing POI context",
tenant_id=tenant_id,
is_stale=is_stale,
feature_count=len(ml_features)
)
return ml_features
else:
logger.info(
"POI context is stale and force_refresh=True, refreshing",
tenant_id=tenant_id
)
# Only refresh if explicitly requested and context exists
detection_result = await self.external_client.detect_poi_for_tenant(
tenant_id=tenant_id,
latitude=latitude,
longitude=longitude,
force_refresh=True
)
if detection_result:
poi_context = detection_result.get("poi_context", {})
ml_features = poi_context.get("ml_features", {})
2025-11-14 20:27:39 +01:00
logger.info(
2026-01-12 22:15:11 +01:00
"POI refresh completed",
tenant_id=tenant_id,
feature_count=len(ml_features)
)
2025-11-14 20:27:39 +01:00
return ml_features
else:
2026-01-12 22:15:11 +01:00
logger.warning(
"POI refresh failed, returning existing features",
tenant_id=tenant_id
)
2026-01-12 22:15:11 +01:00
return ml_features
2025-11-14 20:27:39 +01:00
else:
2026-01-12 22:15:11 +01:00
logger.info(
"No existing POI context found - POI detection should be triggered during tenant registration",
2025-11-14 20:27:39 +01:00
tenant_id=tenant_id
)
return None
except Exception as e:
2026-01-12 22:15:11 +01:00
logger.warning(
"Error fetching POI features - returning None",
tenant_id=tenant_id,
2026-01-12 22:15:11 +01:00
error=str(e)
)
return None
def add_poi_features_to_dataframe(
self,
df: pd.DataFrame,
poi_features: Dict[str, Any]
) -> pd.DataFrame:
"""
Add POI features to training dataframe.
POI features are static (don't vary by date), so they're
broadcast to all rows in the dataframe.
Args:
df: Training dataframe
poi_features: Dictionary of POI ML features
Returns:
Dataframe with POI features added as columns
"""
if not poi_features:
logger.warning("No POI features to add")
return df
logger.info(
"Adding POI features to dataframe",
feature_count=len(poi_features),
dataframe_rows=len(df)
)
# Add each POI feature as a column with constant value
for feature_name, feature_value in poi_features.items():
df[feature_name] = feature_value
logger.info(
"POI features added successfully",
new_columns=list(poi_features.keys())
)
return df
def get_poi_feature_names(self, poi_features: Dict[str, Any]) -> List[str]:
"""
Get list of POI feature names for model registration.
Args:
poi_features: Dictionary of POI ML features
Returns:
List of feature names
"""
return list(poi_features.keys()) if poi_features else []
async def check_poi_service_health(self) -> bool:
"""
2025-11-14 20:27:39 +01:00
Check if POI service is accessible through the external client.
Returns:
True if service is healthy, False otherwise
"""
try:
2025-11-14 20:27:39 +01:00
# We can test the external service health by attempting to get POI context for a dummy tenant
# This will go through the proper authentication and routing
dummy_context = await self.external_client.get_poi_context("test-tenant")
# If we can successfully make a request (even if it returns None for missing tenant),
# it means the service is accessible
return True
except Exception as e:
logger.error(
"POI service health check failed",
error=str(e)
)
return False