308 lines
10 KiB
Python
308 lines
10 KiB
Python
"""
|
|
Calendar-based Feature Engineering
|
|
Hyperlocal school calendar and event features for demand forecasting
|
|
"""
|
|
|
|
import pandas as pd
|
|
import structlog
|
|
from typing import Dict, List, Any, Optional
|
|
from datetime import datetime, date, time, timedelta
|
|
from shared.clients.external_client import ExternalServiceClient
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class CalendarFeatureEngine:
|
|
"""
|
|
Generates features based on school calendars and local events
|
|
for hyperlocal demand forecasting enhancement
|
|
"""
|
|
|
|
def __init__(self, external_client: ExternalServiceClient):
|
|
self.external_client = external_client
|
|
self.calendar_cache = {} # Cache calendar data to avoid repeated API calls
|
|
|
|
async def get_calendar_for_tenant(
|
|
self,
|
|
tenant_id: str,
|
|
city_id: Optional[str] = "madrid"
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get the assigned school calendar for a tenant
|
|
If tenant has no assignment, returns None
|
|
"""
|
|
try:
|
|
# Check cache first
|
|
cache_key = f"tenant_{tenant_id}_calendar"
|
|
if cache_key in self.calendar_cache:
|
|
logger.debug("Using cached calendar", tenant_id=tenant_id)
|
|
return self.calendar_cache[cache_key]
|
|
|
|
# Get tenant location context
|
|
context = await self.external_client.get_tenant_location_context(tenant_id)
|
|
|
|
if not context or not context.get("calendar"):
|
|
logger.info(
|
|
"No calendar assigned to tenant, using default if available",
|
|
tenant_id=tenant_id
|
|
)
|
|
return None
|
|
|
|
calendar = context["calendar"]
|
|
self.calendar_cache[cache_key] = calendar
|
|
|
|
logger.info(
|
|
"Retrieved calendar for tenant",
|
|
tenant_id=tenant_id,
|
|
calendar_name=calendar.get("calendar_name")
|
|
)
|
|
|
|
return calendar
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error retrieving calendar for tenant",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
return None
|
|
|
|
def _is_date_in_holiday_period(
|
|
self,
|
|
check_date: date,
|
|
holiday_periods: List[Dict[str, Any]]
|
|
) -> tuple[bool, Optional[str]]:
|
|
"""
|
|
Check if a date falls within any holiday period
|
|
|
|
Returns:
|
|
(is_holiday, holiday_name)
|
|
"""
|
|
for period in holiday_periods:
|
|
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
|
|
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
|
|
|
|
if start <= check_date <= end:
|
|
return True, period["name"]
|
|
|
|
return False, None
|
|
|
|
def _is_school_hours_active(
|
|
self,
|
|
check_datetime: datetime,
|
|
school_hours: Dict[str, Any]
|
|
) -> bool:
|
|
"""
|
|
Check if datetime falls during school operating hours
|
|
|
|
Args:
|
|
check_datetime: DateTime to check
|
|
school_hours: School hours configuration dict
|
|
|
|
Returns:
|
|
True if during school hours, False otherwise
|
|
"""
|
|
# Only check weekdays
|
|
if check_datetime.weekday() >= 5: # Saturday=5, Sunday=6
|
|
return False
|
|
|
|
check_time = check_datetime.time()
|
|
|
|
# Morning session
|
|
morning_start = datetime.strptime(
|
|
school_hours["morning_start"], "%H:%M"
|
|
).time()
|
|
morning_end = datetime.strptime(
|
|
school_hours["morning_end"], "%H:%M"
|
|
).time()
|
|
|
|
if morning_start <= check_time <= morning_end:
|
|
return True
|
|
|
|
# Afternoon session (if applicable)
|
|
if school_hours.get("has_afternoon_session", False):
|
|
afternoon_start = datetime.strptime(
|
|
school_hours["afternoon_start"], "%H:%M"
|
|
).time()
|
|
afternoon_end = datetime.strptime(
|
|
school_hours["afternoon_end"], "%H:%M"
|
|
).time()
|
|
|
|
if afternoon_start <= check_time <= afternoon_end:
|
|
return True
|
|
|
|
return False
|
|
|
|
def _calculate_school_proximity_intensity(
|
|
self,
|
|
check_datetime: datetime,
|
|
school_hours: Dict[str, Any]
|
|
) -> float:
|
|
"""
|
|
Calculate intensity of school-related foot traffic
|
|
Peaks during drop-off and pick-up times
|
|
|
|
Returns:
|
|
Float between 0.0 (no impact) and 1.0 (peak impact)
|
|
"""
|
|
# Only weekdays
|
|
if check_datetime.weekday() >= 5:
|
|
return 0.0
|
|
|
|
check_time = check_datetime.time()
|
|
|
|
# Define peak windows (30 minutes before and after school start/end)
|
|
morning_start = datetime.strptime(
|
|
school_hours["morning_start"], "%H:%M"
|
|
).time()
|
|
morning_end = datetime.strptime(
|
|
school_hours["morning_end"], "%H:%M"
|
|
).time()
|
|
|
|
# Morning drop-off peak (30 min before to 15 min after start)
|
|
drop_off_start = (
|
|
datetime.combine(date.today(), morning_start) - timedelta(minutes=30)
|
|
).time()
|
|
drop_off_end = (
|
|
datetime.combine(date.today(), morning_start) + timedelta(minutes=15)
|
|
).time()
|
|
|
|
if drop_off_start <= check_time <= drop_off_end:
|
|
return 1.0 # Peak morning traffic
|
|
|
|
# Morning pick-up peak (15 min before to 30 min after end)
|
|
pickup_start = (
|
|
datetime.combine(date.today(), morning_end) - timedelta(minutes=15)
|
|
).time()
|
|
pickup_end = (
|
|
datetime.combine(date.today(), morning_end) + timedelta(minutes=30)
|
|
).time()
|
|
|
|
if pickup_start <= check_time <= pickup_end:
|
|
return 1.0 # Peak afternoon traffic
|
|
|
|
# During school hours (moderate impact)
|
|
if morning_start <= check_time <= morning_end:
|
|
return 0.3
|
|
|
|
# Afternoon session if applicable
|
|
if school_hours.get("has_afternoon_session", False):
|
|
afternoon_start = datetime.strptime(
|
|
school_hours["afternoon_start"], "%H:%M"
|
|
).time()
|
|
afternoon_end = datetime.strptime(
|
|
school_hours["afternoon_end"], "%H:%M"
|
|
).time()
|
|
|
|
if afternoon_start <= check_time <= afternoon_end:
|
|
return 0.3
|
|
|
|
return 0.0
|
|
|
|
async def add_calendar_features(
|
|
self,
|
|
df: pd.DataFrame,
|
|
tenant_id: str,
|
|
date_column: str = "date"
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Add calendar-based features to dataframe
|
|
|
|
Features added:
|
|
- is_school_holiday: Binary (1/0)
|
|
- school_holiday_name: String (name of holiday or None)
|
|
- school_hours_active: Binary (1/0) - if during school operating hours
|
|
- school_proximity_intensity: Float (0.0-1.0) - peak during drop-off/pick-up
|
|
|
|
Args:
|
|
df: DataFrame with date/datetime column
|
|
tenant_id: Tenant ID to get calendar assignment
|
|
date_column: Name of date column
|
|
|
|
Returns:
|
|
DataFrame with added calendar features
|
|
"""
|
|
try:
|
|
logger.info(
|
|
"Adding calendar-based features",
|
|
tenant_id=tenant_id,
|
|
rows=len(df)
|
|
)
|
|
|
|
# Get calendar for tenant
|
|
calendar = await self.get_calendar_for_tenant(tenant_id)
|
|
|
|
if not calendar:
|
|
logger.warning(
|
|
"No calendar available, using fallback features",
|
|
tenant_id=tenant_id
|
|
)
|
|
# Add default features (all zeros)
|
|
df["is_school_holiday"] = 0
|
|
df["school_holiday_name"] = None
|
|
df["school_hours_active"] = 0
|
|
df["school_proximity_intensity"] = 0.0
|
|
return df
|
|
|
|
holiday_periods = calendar.get("holiday_periods", [])
|
|
school_hours = calendar.get("school_hours", {})
|
|
|
|
# Initialize feature columns
|
|
school_holidays = []
|
|
holiday_names = []
|
|
hours_active = []
|
|
proximity_intensity = []
|
|
|
|
# Process each row
|
|
for idx, row in df.iterrows():
|
|
row_date = pd.to_datetime(row[date_column])
|
|
|
|
# Check if holiday
|
|
is_holiday, holiday_name = self._is_date_in_holiday_period(
|
|
row_date.date(),
|
|
holiday_periods
|
|
)
|
|
school_holidays.append(1 if is_holiday else 0)
|
|
holiday_names.append(holiday_name)
|
|
|
|
# Check if during school hours (requires time component)
|
|
if hasattr(row_date, 'hour'): # Has time component
|
|
hours_active.append(
|
|
1 if self._is_school_hours_active(row_date, school_hours) else 0
|
|
)
|
|
proximity_intensity.append(
|
|
self._calculate_school_proximity_intensity(row_date, school_hours)
|
|
)
|
|
else:
|
|
# Date only, no time component
|
|
hours_active.append(0)
|
|
proximity_intensity.append(0.0)
|
|
|
|
# Add features to dataframe
|
|
df["is_school_holiday"] = school_holidays
|
|
df["school_holiday_name"] = holiday_names
|
|
df["school_hours_active"] = hours_active
|
|
df["school_proximity_intensity"] = proximity_intensity
|
|
|
|
logger.info(
|
|
"Calendar features added successfully",
|
|
tenant_id=tenant_id,
|
|
holiday_periods_count=len(holiday_periods),
|
|
holidays_found=sum(school_holidays)
|
|
)
|
|
|
|
return df
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error adding calendar features",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
# Return df with default features on error
|
|
df["is_school_holiday"] = 0
|
|
df["school_holiday_name"] = None
|
|
df["school_hours_active"] = 0
|
|
df["school_proximity_intensity"] = 0.0
|
|
return df
|