236 lines
7.3 KiB
Python
236 lines
7.3 KiB
Python
|
|
"""
|
||
|
|
Calendar-based Feature Engineering for Forecasting Service
|
||
|
|
Generates calendar features for future date predictions
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
import structlog
|
||
|
|
from typing import Dict, List, Any, Optional
|
||
|
|
from datetime import datetime, date, time, timedelta
|
||
|
|
from app.services.data_client import data_client
|
||
|
|
|
||
|
|
logger = structlog.get_logger()
|
||
|
|
|
||
|
|
|
||
|
|
class ForecastCalendarFeatures:
|
||
|
|
"""
|
||
|
|
Generates calendar-based features for future predictions
|
||
|
|
Optimized for forecasting service (future dates only)
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self):
|
||
|
|
self.calendar_cache = {} # Cache calendar data per tenant
|
||
|
|
|
||
|
|
async def get_calendar_for_tenant(
|
||
|
|
self,
|
||
|
|
tenant_id: str
|
||
|
|
) -> Optional[Dict[str, Any]]:
|
||
|
|
"""Get cached calendar for tenant"""
|
||
|
|
if tenant_id in self.calendar_cache:
|
||
|
|
return self.calendar_cache[tenant_id]
|
||
|
|
|
||
|
|
calendar = await data_client.fetch_tenant_calendar(tenant_id)
|
||
|
|
if calendar:
|
||
|
|
self.calendar_cache[tenant_id] = calendar
|
||
|
|
|
||
|
|
return calendar
|
||
|
|
|
||
|
|
def _is_date_in_holiday_period(
|
||
|
|
self,
|
||
|
|
check_date: date,
|
||
|
|
holiday_periods: List[Dict[str, Any]]
|
||
|
|
) -> tuple[bool, Optional[str]]:
|
||
|
|
"""Check if date is within any holiday period"""
|
||
|
|
for period in holiday_periods:
|
||
|
|
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
|
||
|
|
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
|
||
|
|
|
||
|
|
if start <= check_date <= end:
|
||
|
|
return True, period["name"]
|
||
|
|
|
||
|
|
return False, None
|
||
|
|
|
||
|
|
def _is_school_hours_active(
|
||
|
|
self,
|
||
|
|
check_datetime: datetime,
|
||
|
|
school_hours: Dict[str, Any]
|
||
|
|
) -> bool:
|
||
|
|
"""Check if datetime falls during school operating hours"""
|
||
|
|
# Only weekdays
|
||
|
|
if check_datetime.weekday() >= 5:
|
||
|
|
return False
|
||
|
|
|
||
|
|
check_time = check_datetime.time()
|
||
|
|
|
||
|
|
# Morning session
|
||
|
|
morning_start = datetime.strptime(
|
||
|
|
school_hours["morning_start"], "%H:%M"
|
||
|
|
).time()
|
||
|
|
morning_end = datetime.strptime(
|
||
|
|
school_hours["morning_end"], "%H:%M"
|
||
|
|
).time()
|
||
|
|
|
||
|
|
if morning_start <= check_time <= morning_end:
|
||
|
|
return True
|
||
|
|
|
||
|
|
# Afternoon session if exists
|
||
|
|
if school_hours.get("has_afternoon_session", False):
|
||
|
|
afternoon_start = datetime.strptime(
|
||
|
|
school_hours["afternoon_start"], "%H:%M"
|
||
|
|
).time()
|
||
|
|
afternoon_end = datetime.strptime(
|
||
|
|
school_hours["afternoon_end"], "%H:%M"
|
||
|
|
).time()
|
||
|
|
|
||
|
|
if afternoon_start <= check_time <= afternoon_end:
|
||
|
|
return True
|
||
|
|
|
||
|
|
return False
|
||
|
|
|
||
|
|
def _calculate_school_proximity_intensity(
|
||
|
|
self,
|
||
|
|
check_datetime: datetime,
|
||
|
|
school_hours: Dict[str, Any]
|
||
|
|
) -> float:
|
||
|
|
"""
|
||
|
|
Calculate school proximity impact intensity
|
||
|
|
Returns 0.0-1.0 based on drop-off/pick-up times
|
||
|
|
"""
|
||
|
|
# Only weekdays
|
||
|
|
if check_datetime.weekday() >= 5:
|
||
|
|
return 0.0
|
||
|
|
|
||
|
|
check_time = check_datetime.time()
|
||
|
|
|
||
|
|
morning_start = datetime.strptime(
|
||
|
|
school_hours["morning_start"], "%H:%M"
|
||
|
|
).time()
|
||
|
|
morning_end = datetime.strptime(
|
||
|
|
school_hours["morning_end"], "%H:%M"
|
||
|
|
).time()
|
||
|
|
|
||
|
|
# Morning drop-off peak (30 min before to 15 min after start)
|
||
|
|
drop_off_start = (
|
||
|
|
datetime.combine(date.today(), morning_start) - timedelta(minutes=30)
|
||
|
|
).time()
|
||
|
|
drop_off_end = (
|
||
|
|
datetime.combine(date.today(), morning_start) + timedelta(minutes=15)
|
||
|
|
).time()
|
||
|
|
|
||
|
|
if drop_off_start <= check_time <= drop_off_end:
|
||
|
|
return 1.0 # Peak
|
||
|
|
|
||
|
|
# Morning pick-up peak (15 min before to 30 min after end)
|
||
|
|
pickup_start = (
|
||
|
|
datetime.combine(date.today(), morning_end) - timedelta(minutes=15)
|
||
|
|
).time()
|
||
|
|
pickup_end = (
|
||
|
|
datetime.combine(date.today(), morning_end) + timedelta(minutes=30)
|
||
|
|
).time()
|
||
|
|
|
||
|
|
if pickup_start <= check_time <= pickup_end:
|
||
|
|
return 1.0 # Peak
|
||
|
|
|
||
|
|
# During school hours (moderate)
|
||
|
|
if morning_start <= check_time <= morning_end:
|
||
|
|
return 0.3
|
||
|
|
|
||
|
|
return 0.0
|
||
|
|
|
||
|
|
async def add_calendar_features(
|
||
|
|
self,
|
||
|
|
df: pd.DataFrame,
|
||
|
|
tenant_id: str,
|
||
|
|
date_column: str = "ds"
|
||
|
|
) -> pd.DataFrame:
|
||
|
|
"""
|
||
|
|
Add calendar features to forecast dataframe
|
||
|
|
|
||
|
|
Args:
|
||
|
|
df: Forecast dataframe with future dates
|
||
|
|
tenant_id: Tenant ID to fetch calendar
|
||
|
|
date_column: Name of date column (default 'ds' for Prophet)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
DataFrame with calendar features added
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
logger.info(
|
||
|
|
"Adding calendar features to forecast",
|
||
|
|
tenant_id=tenant_id,
|
||
|
|
rows=len(df)
|
||
|
|
)
|
||
|
|
|
||
|
|
# Get calendar
|
||
|
|
calendar = await self.get_calendar_for_tenant(tenant_id)
|
||
|
|
|
||
|
|
if not calendar:
|
||
|
|
logger.info(
|
||
|
|
"No calendar available, using zero features",
|
||
|
|
tenant_id=tenant_id
|
||
|
|
)
|
||
|
|
df["is_school_holiday"] = 0
|
||
|
|
df["school_hours_active"] = 0
|
||
|
|
df["school_proximity_intensity"] = 0.0
|
||
|
|
return df
|
||
|
|
|
||
|
|
holiday_periods = calendar.get("holiday_periods", [])
|
||
|
|
school_hours = calendar.get("school_hours", {})
|
||
|
|
|
||
|
|
# Initialize feature lists
|
||
|
|
school_holidays = []
|
||
|
|
hours_active = []
|
||
|
|
proximity_intensity = []
|
||
|
|
|
||
|
|
# Process each row
|
||
|
|
for idx, row in df.iterrows():
|
||
|
|
row_date = pd.to_datetime(row[date_column])
|
||
|
|
|
||
|
|
# Check holiday
|
||
|
|
is_holiday, _ = self._is_date_in_holiday_period(
|
||
|
|
row_date.date(),
|
||
|
|
holiday_periods
|
||
|
|
)
|
||
|
|
school_holidays.append(1 if is_holiday else 0)
|
||
|
|
|
||
|
|
# Check school hours and proximity (if datetime has time component)
|
||
|
|
if hasattr(row_date, 'hour'):
|
||
|
|
hours_active.append(
|
||
|
|
1 if self._is_school_hours_active(row_date, school_hours) else 0
|
||
|
|
)
|
||
|
|
proximity_intensity.append(
|
||
|
|
self._calculate_school_proximity_intensity(row_date, school_hours)
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
hours_active.append(0)
|
||
|
|
proximity_intensity.append(0.0)
|
||
|
|
|
||
|
|
# Add features
|
||
|
|
df["is_school_holiday"] = school_holidays
|
||
|
|
df["school_hours_active"] = hours_active
|
||
|
|
df["school_proximity_intensity"] = proximity_intensity
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
"Calendar features added to forecast",
|
||
|
|
tenant_id=tenant_id,
|
||
|
|
holidays_in_forecast=sum(school_holidays)
|
||
|
|
)
|
||
|
|
|
||
|
|
return df
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(
|
||
|
|
"Error adding calendar features to forecast",
|
||
|
|
tenant_id=tenant_id,
|
||
|
|
error=str(e)
|
||
|
|
)
|
||
|
|
# Return with zero features on error
|
||
|
|
df["is_school_holiday"] = 0
|
||
|
|
df["school_hours_active"] = 0
|
||
|
|
df["school_proximity_intensity"] = 0.0
|
||
|
|
return df
|
||
|
|
|
||
|
|
|
||
|
|
# Global instance
|
||
|
|
forecast_calendar_features = ForecastCalendarFeatures()
|