Initial commit - production deployment
This commit is contained in:
307
services/training/app/ml/calendar_features.py
Normal file
307
services/training/app/ml/calendar_features.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
Calendar-based Feature Engineering
|
||||
Hyperlocal school calendar and event features for demand forecasting
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import structlog
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime, date, time, timedelta
|
||||
from shared.clients.external_client import ExternalServiceClient
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class CalendarFeatureEngine:
|
||||
"""
|
||||
Generates features based on school calendars and local events
|
||||
for hyperlocal demand forecasting enhancement
|
||||
"""
|
||||
|
||||
def __init__(self, external_client: ExternalServiceClient):
|
||||
self.external_client = external_client
|
||||
self.calendar_cache = {} # Cache calendar data to avoid repeated API calls
|
||||
|
||||
async def get_calendar_for_tenant(
|
||||
self,
|
||||
tenant_id: str,
|
||||
city_id: Optional[str] = "madrid"
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get the assigned school calendar for a tenant
|
||||
If tenant has no assignment, returns None
|
||||
"""
|
||||
try:
|
||||
# Check cache first
|
||||
cache_key = f"tenant_{tenant_id}_calendar"
|
||||
if cache_key in self.calendar_cache:
|
||||
logger.debug("Using cached calendar", tenant_id=tenant_id)
|
||||
return self.calendar_cache[cache_key]
|
||||
|
||||
# Get tenant location context
|
||||
context = await self.external_client.get_tenant_location_context(tenant_id)
|
||||
|
||||
if not context or not context.get("calendar"):
|
||||
logger.info(
|
||||
"No calendar assigned to tenant, using default if available",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return None
|
||||
|
||||
calendar = context["calendar"]
|
||||
self.calendar_cache[cache_key] = calendar
|
||||
|
||||
logger.info(
|
||||
"Retrieved calendar for tenant",
|
||||
tenant_id=tenant_id,
|
||||
calendar_name=calendar.get("calendar_name")
|
||||
)
|
||||
|
||||
return calendar
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error retrieving calendar for tenant",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
|
||||
def _is_date_in_holiday_period(
|
||||
self,
|
||||
check_date: date,
|
||||
holiday_periods: List[Dict[str, Any]]
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Check if a date falls within any holiday period
|
||||
|
||||
Returns:
|
||||
(is_holiday, holiday_name)
|
||||
"""
|
||||
for period in holiday_periods:
|
||||
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
|
||||
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
|
||||
|
||||
if start <= check_date <= end:
|
||||
return True, period["name"]
|
||||
|
||||
return False, None
|
||||
|
||||
def _is_school_hours_active(
|
||||
self,
|
||||
check_datetime: datetime,
|
||||
school_hours: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""
|
||||
Check if datetime falls during school operating hours
|
||||
|
||||
Args:
|
||||
check_datetime: DateTime to check
|
||||
school_hours: School hours configuration dict
|
||||
|
||||
Returns:
|
||||
True if during school hours, False otherwise
|
||||
"""
|
||||
# Only check weekdays
|
||||
if check_datetime.weekday() >= 5: # Saturday=5, Sunday=6
|
||||
return False
|
||||
|
||||
check_time = check_datetime.time()
|
||||
|
||||
# Morning session
|
||||
morning_start = datetime.strptime(
|
||||
school_hours["morning_start"], "%H:%M"
|
||||
).time()
|
||||
morning_end = datetime.strptime(
|
||||
school_hours["morning_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
if morning_start <= check_time <= morning_end:
|
||||
return True
|
||||
|
||||
# Afternoon session (if applicable)
|
||||
if school_hours.get("has_afternoon_session", False):
|
||||
afternoon_start = datetime.strptime(
|
||||
school_hours["afternoon_start"], "%H:%M"
|
||||
).time()
|
||||
afternoon_end = datetime.strptime(
|
||||
school_hours["afternoon_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
if afternoon_start <= check_time <= afternoon_end:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _calculate_school_proximity_intensity(
|
||||
self,
|
||||
check_datetime: datetime,
|
||||
school_hours: Dict[str, Any]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate intensity of school-related foot traffic
|
||||
Peaks during drop-off and pick-up times
|
||||
|
||||
Returns:
|
||||
Float between 0.0 (no impact) and 1.0 (peak impact)
|
||||
"""
|
||||
# Only weekdays
|
||||
if check_datetime.weekday() >= 5:
|
||||
return 0.0
|
||||
|
||||
check_time = check_datetime.time()
|
||||
|
||||
# Define peak windows (30 minutes before and after school start/end)
|
||||
morning_start = datetime.strptime(
|
||||
school_hours["morning_start"], "%H:%M"
|
||||
).time()
|
||||
morning_end = datetime.strptime(
|
||||
school_hours["morning_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
# Morning drop-off peak (30 min before to 15 min after start)
|
||||
drop_off_start = (
|
||||
datetime.combine(date.today(), morning_start) - timedelta(minutes=30)
|
||||
).time()
|
||||
drop_off_end = (
|
||||
datetime.combine(date.today(), morning_start) + timedelta(minutes=15)
|
||||
).time()
|
||||
|
||||
if drop_off_start <= check_time <= drop_off_end:
|
||||
return 1.0 # Peak morning traffic
|
||||
|
||||
# Morning pick-up peak (15 min before to 30 min after end)
|
||||
pickup_start = (
|
||||
datetime.combine(date.today(), morning_end) - timedelta(minutes=15)
|
||||
).time()
|
||||
pickup_end = (
|
||||
datetime.combine(date.today(), morning_end) + timedelta(minutes=30)
|
||||
).time()
|
||||
|
||||
if pickup_start <= check_time <= pickup_end:
|
||||
return 1.0 # Peak afternoon traffic
|
||||
|
||||
# During school hours (moderate impact)
|
||||
if morning_start <= check_time <= morning_end:
|
||||
return 0.3
|
||||
|
||||
# Afternoon session if applicable
|
||||
if school_hours.get("has_afternoon_session", False):
|
||||
afternoon_start = datetime.strptime(
|
||||
school_hours["afternoon_start"], "%H:%M"
|
||||
).time()
|
||||
afternoon_end = datetime.strptime(
|
||||
school_hours["afternoon_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
if afternoon_start <= check_time <= afternoon_end:
|
||||
return 0.3
|
||||
|
||||
return 0.0
|
||||
|
||||
async def add_calendar_features(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
tenant_id: str,
|
||||
date_column: str = "date"
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Add calendar-based features to dataframe
|
||||
|
||||
Features added:
|
||||
- is_school_holiday: Binary (1/0)
|
||||
- school_holiday_name: String (name of holiday or None)
|
||||
- school_hours_active: Binary (1/0) - if during school operating hours
|
||||
- school_proximity_intensity: Float (0.0-1.0) - peak during drop-off/pick-up
|
||||
|
||||
Args:
|
||||
df: DataFrame with date/datetime column
|
||||
tenant_id: Tenant ID to get calendar assignment
|
||||
date_column: Name of date column
|
||||
|
||||
Returns:
|
||||
DataFrame with added calendar features
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Adding calendar-based features",
|
||||
tenant_id=tenant_id,
|
||||
rows=len(df)
|
||||
)
|
||||
|
||||
# Get calendar for tenant
|
||||
calendar = await self.get_calendar_for_tenant(tenant_id)
|
||||
|
||||
if not calendar:
|
||||
logger.warning(
|
||||
"No calendar available, using fallback features",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
# Add default features (all zeros)
|
||||
df["is_school_holiday"] = 0
|
||||
df["school_holiday_name"] = None
|
||||
df["school_hours_active"] = 0
|
||||
df["school_proximity_intensity"] = 0.0
|
||||
return df
|
||||
|
||||
holiday_periods = calendar.get("holiday_periods", [])
|
||||
school_hours = calendar.get("school_hours", {})
|
||||
|
||||
# Initialize feature columns
|
||||
school_holidays = []
|
||||
holiday_names = []
|
||||
hours_active = []
|
||||
proximity_intensity = []
|
||||
|
||||
# Process each row
|
||||
for idx, row in df.iterrows():
|
||||
row_date = pd.to_datetime(row[date_column])
|
||||
|
||||
# Check if holiday
|
||||
is_holiday, holiday_name = self._is_date_in_holiday_period(
|
||||
row_date.date(),
|
||||
holiday_periods
|
||||
)
|
||||
school_holidays.append(1 if is_holiday else 0)
|
||||
holiday_names.append(holiday_name)
|
||||
|
||||
# Check if during school hours (requires time component)
|
||||
if hasattr(row_date, 'hour'): # Has time component
|
||||
hours_active.append(
|
||||
1 if self._is_school_hours_active(row_date, school_hours) else 0
|
||||
)
|
||||
proximity_intensity.append(
|
||||
self._calculate_school_proximity_intensity(row_date, school_hours)
|
||||
)
|
||||
else:
|
||||
# Date only, no time component
|
||||
hours_active.append(0)
|
||||
proximity_intensity.append(0.0)
|
||||
|
||||
# Add features to dataframe
|
||||
df["is_school_holiday"] = school_holidays
|
||||
df["school_holiday_name"] = holiday_names
|
||||
df["school_hours_active"] = hours_active
|
||||
df["school_proximity_intensity"] = proximity_intensity
|
||||
|
||||
logger.info(
|
||||
"Calendar features added successfully",
|
||||
tenant_id=tenant_id,
|
||||
holiday_periods_count=len(holiday_periods),
|
||||
holidays_found=sum(school_holidays)
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error adding calendar features",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
# Return df with default features on error
|
||||
df["is_school_holiday"] = 0
|
||||
df["school_holiday_name"] = None
|
||||
df["school_hours_active"] = 0
|
||||
df["school_proximity_intensity"] = 0.0
|
||||
return df
|
||||
Reference in New Issue
Block a user