Files
bakery-ia/services/forecasting/app/ml/calendar_features.py

236 lines
7.3 KiB
Python

"""
Calendar-based Feature Engineering for Forecasting Service
Generates calendar features for future date predictions
"""
import pandas as pd
import structlog
from typing import Dict, List, Any, Optional
from datetime import datetime, date, time, timedelta
from app.services.data_client import data_client
logger = structlog.get_logger()
class ForecastCalendarFeatures:
"""
Generates calendar-based features for future predictions
Optimized for forecasting service (future dates only)
"""
def __init__(self):
self.calendar_cache = {} # Cache calendar data per tenant
async def get_calendar_for_tenant(
self,
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Get cached calendar for tenant"""
if tenant_id in self.calendar_cache:
return self.calendar_cache[tenant_id]
calendar = await data_client.fetch_tenant_calendar(tenant_id)
if calendar:
self.calendar_cache[tenant_id] = calendar
return calendar
def _is_date_in_holiday_period(
self,
check_date: date,
holiday_periods: List[Dict[str, Any]]
) -> tuple[bool, Optional[str]]:
"""Check if date is within any holiday period"""
for period in holiday_periods:
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
if start <= check_date <= end:
return True, period["name"]
return False, None
def _is_school_hours_active(
self,
check_datetime: datetime,
school_hours: Dict[str, Any]
) -> bool:
"""Check if datetime falls during school operating hours"""
# Only weekdays
if check_datetime.weekday() >= 5:
return False
check_time = check_datetime.time()
# Morning session
morning_start = datetime.strptime(
school_hours["morning_start"], "%H:%M"
).time()
morning_end = datetime.strptime(
school_hours["morning_end"], "%H:%M"
).time()
if morning_start <= check_time <= morning_end:
return True
# Afternoon session if exists
if school_hours.get("has_afternoon_session", False):
afternoon_start = datetime.strptime(
school_hours["afternoon_start"], "%H:%M"
).time()
afternoon_end = datetime.strptime(
school_hours["afternoon_end"], "%H:%M"
).time()
if afternoon_start <= check_time <= afternoon_end:
return True
return False
def _calculate_school_proximity_intensity(
self,
check_datetime: datetime,
school_hours: Dict[str, Any]
) -> float:
"""
Calculate school proximity impact intensity
Returns 0.0-1.0 based on drop-off/pick-up times
"""
# Only weekdays
if check_datetime.weekday() >= 5:
return 0.0
check_time = check_datetime.time()
morning_start = datetime.strptime(
school_hours["morning_start"], "%H:%M"
).time()
morning_end = datetime.strptime(
school_hours["morning_end"], "%H:%M"
).time()
# Morning drop-off peak (30 min before to 15 min after start)
drop_off_start = (
datetime.combine(date.today(), morning_start) - timedelta(minutes=30)
).time()
drop_off_end = (
datetime.combine(date.today(), morning_start) + timedelta(minutes=15)
).time()
if drop_off_start <= check_time <= drop_off_end:
return 1.0 # Peak
# Morning pick-up peak (15 min before to 30 min after end)
pickup_start = (
datetime.combine(date.today(), morning_end) - timedelta(minutes=15)
).time()
pickup_end = (
datetime.combine(date.today(), morning_end) + timedelta(minutes=30)
).time()
if pickup_start <= check_time <= pickup_end:
return 1.0 # Peak
# During school hours (moderate)
if morning_start <= check_time <= morning_end:
return 0.3
return 0.0
async def add_calendar_features(
self,
df: pd.DataFrame,
tenant_id: str,
date_column: str = "ds"
) -> pd.DataFrame:
"""
Add calendar features to forecast dataframe
Args:
df: Forecast dataframe with future dates
tenant_id: Tenant ID to fetch calendar
date_column: Name of date column (default 'ds' for Prophet)
Returns:
DataFrame with calendar features added
"""
try:
logger.info(
"Adding calendar features to forecast",
tenant_id=tenant_id,
rows=len(df)
)
# Get calendar
calendar = await self.get_calendar_for_tenant(tenant_id)
if not calendar:
logger.info(
"No calendar available, using zero features",
tenant_id=tenant_id
)
df["is_school_holiday"] = 0
df["school_hours_active"] = 0
df["school_proximity_intensity"] = 0.0
return df
holiday_periods = calendar.get("holiday_periods", [])
school_hours = calendar.get("school_hours", {})
# Initialize feature lists
school_holidays = []
hours_active = []
proximity_intensity = []
# Process each row
for idx, row in df.iterrows():
row_date = pd.to_datetime(row[date_column])
# Check holiday
is_holiday, _ = self._is_date_in_holiday_period(
row_date.date(),
holiday_periods
)
school_holidays.append(1 if is_holiday else 0)
# Check school hours and proximity (if datetime has time component)
if hasattr(row_date, 'hour'):
hours_active.append(
1 if self._is_school_hours_active(row_date, school_hours) else 0
)
proximity_intensity.append(
self._calculate_school_proximity_intensity(row_date, school_hours)
)
else:
hours_active.append(0)
proximity_intensity.append(0.0)
# Add features
df["is_school_holiday"] = school_holidays
df["school_hours_active"] = hours_active
df["school_proximity_intensity"] = proximity_intensity
logger.info(
"Calendar features added to forecast",
tenant_id=tenant_id,
holidays_in_forecast=sum(school_holidays)
)
return df
except Exception as e:
logger.error(
"Error adding calendar features to forecast",
tenant_id=tenant_id,
error=str(e)
)
# Return with zero features on error
df["is_school_holiday"] = 0
df["school_hours_active"] = 0
df["school_proximity_intensity"] = 0.0
return df
# Global instance
forecast_calendar_features = ForecastCalendarFeatures()