Initial commit - production deployment
This commit is contained in:
235
services/forecasting/app/ml/calendar_features.py
Normal file
235
services/forecasting/app/ml/calendar_features.py
Normal file
@@ -0,0 +1,235 @@
|
||||
"""
|
||||
Calendar-based Feature Engineering for Forecasting Service
|
||||
Generates calendar features for future date predictions
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import structlog
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime, date, time, timedelta
|
||||
from app.services.data_client import data_client
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class ForecastCalendarFeatures:
|
||||
"""
|
||||
Generates calendar-based features for future predictions
|
||||
Optimized for forecasting service (future dates only)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.calendar_cache = {} # Cache calendar data per tenant
|
||||
|
||||
async def get_calendar_for_tenant(
|
||||
self,
|
||||
tenant_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached calendar for tenant"""
|
||||
if tenant_id in self.calendar_cache:
|
||||
return self.calendar_cache[tenant_id]
|
||||
|
||||
calendar = await data_client.fetch_tenant_calendar(tenant_id)
|
||||
if calendar:
|
||||
self.calendar_cache[tenant_id] = calendar
|
||||
|
||||
return calendar
|
||||
|
||||
def _is_date_in_holiday_period(
|
||||
self,
|
||||
check_date: date,
|
||||
holiday_periods: List[Dict[str, Any]]
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Check if date is within any holiday period"""
|
||||
for period in holiday_periods:
|
||||
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
|
||||
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
|
||||
|
||||
if start <= check_date <= end:
|
||||
return True, period["name"]
|
||||
|
||||
return False, None
|
||||
|
||||
def _is_school_hours_active(
|
||||
self,
|
||||
check_datetime: datetime,
|
||||
school_hours: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""Check if datetime falls during school operating hours"""
|
||||
# Only weekdays
|
||||
if check_datetime.weekday() >= 5:
|
||||
return False
|
||||
|
||||
check_time = check_datetime.time()
|
||||
|
||||
# Morning session
|
||||
morning_start = datetime.strptime(
|
||||
school_hours["morning_start"], "%H:%M"
|
||||
).time()
|
||||
morning_end = datetime.strptime(
|
||||
school_hours["morning_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
if morning_start <= check_time <= morning_end:
|
||||
return True
|
||||
|
||||
# Afternoon session if exists
|
||||
if school_hours.get("has_afternoon_session", False):
|
||||
afternoon_start = datetime.strptime(
|
||||
school_hours["afternoon_start"], "%H:%M"
|
||||
).time()
|
||||
afternoon_end = datetime.strptime(
|
||||
school_hours["afternoon_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
if afternoon_start <= check_time <= afternoon_end:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _calculate_school_proximity_intensity(
|
||||
self,
|
||||
check_datetime: datetime,
|
||||
school_hours: Dict[str, Any]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate school proximity impact intensity
|
||||
Returns 0.0-1.0 based on drop-off/pick-up times
|
||||
"""
|
||||
# Only weekdays
|
||||
if check_datetime.weekday() >= 5:
|
||||
return 0.0
|
||||
|
||||
check_time = check_datetime.time()
|
||||
|
||||
morning_start = datetime.strptime(
|
||||
school_hours["morning_start"], "%H:%M"
|
||||
).time()
|
||||
morning_end = datetime.strptime(
|
||||
school_hours["morning_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
# Morning drop-off peak (30 min before to 15 min after start)
|
||||
drop_off_start = (
|
||||
datetime.combine(date.today(), morning_start) - timedelta(minutes=30)
|
||||
).time()
|
||||
drop_off_end = (
|
||||
datetime.combine(date.today(), morning_start) + timedelta(minutes=15)
|
||||
).time()
|
||||
|
||||
if drop_off_start <= check_time <= drop_off_end:
|
||||
return 1.0 # Peak
|
||||
|
||||
# Morning pick-up peak (15 min before to 30 min after end)
|
||||
pickup_start = (
|
||||
datetime.combine(date.today(), morning_end) - timedelta(minutes=15)
|
||||
).time()
|
||||
pickup_end = (
|
||||
datetime.combine(date.today(), morning_end) + timedelta(minutes=30)
|
||||
).time()
|
||||
|
||||
if pickup_start <= check_time <= pickup_end:
|
||||
return 1.0 # Peak
|
||||
|
||||
# During school hours (moderate)
|
||||
if morning_start <= check_time <= morning_end:
|
||||
return 0.3
|
||||
|
||||
return 0.0
|
||||
|
||||
async def add_calendar_features(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
tenant_id: str,
|
||||
date_column: str = "ds"
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Add calendar features to forecast dataframe
|
||||
|
||||
Args:
|
||||
df: Forecast dataframe with future dates
|
||||
tenant_id: Tenant ID to fetch calendar
|
||||
date_column: Name of date column (default 'ds' for Prophet)
|
||||
|
||||
Returns:
|
||||
DataFrame with calendar features added
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Adding calendar features to forecast",
|
||||
tenant_id=tenant_id,
|
||||
rows=len(df)
|
||||
)
|
||||
|
||||
# Get calendar
|
||||
calendar = await self.get_calendar_for_tenant(tenant_id)
|
||||
|
||||
if not calendar:
|
||||
logger.info(
|
||||
"No calendar available, using zero features",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
df["is_school_holiday"] = 0
|
||||
df["school_hours_active"] = 0
|
||||
df["school_proximity_intensity"] = 0.0
|
||||
return df
|
||||
|
||||
holiday_periods = calendar.get("holiday_periods", [])
|
||||
school_hours = calendar.get("school_hours", {})
|
||||
|
||||
# Initialize feature lists
|
||||
school_holidays = []
|
||||
hours_active = []
|
||||
proximity_intensity = []
|
||||
|
||||
# Process each row
|
||||
for idx, row in df.iterrows():
|
||||
row_date = pd.to_datetime(row[date_column])
|
||||
|
||||
# Check holiday
|
||||
is_holiday, _ = self._is_date_in_holiday_period(
|
||||
row_date.date(),
|
||||
holiday_periods
|
||||
)
|
||||
school_holidays.append(1 if is_holiday else 0)
|
||||
|
||||
# Check school hours and proximity (if datetime has time component)
|
||||
if hasattr(row_date, 'hour'):
|
||||
hours_active.append(
|
||||
1 if self._is_school_hours_active(row_date, school_hours) else 0
|
||||
)
|
||||
proximity_intensity.append(
|
||||
self._calculate_school_proximity_intensity(row_date, school_hours)
|
||||
)
|
||||
else:
|
||||
hours_active.append(0)
|
||||
proximity_intensity.append(0.0)
|
||||
|
||||
# Add features
|
||||
df["is_school_holiday"] = school_holidays
|
||||
df["school_hours_active"] = hours_active
|
||||
df["school_proximity_intensity"] = proximity_intensity
|
||||
|
||||
logger.info(
|
||||
"Calendar features added to forecast",
|
||||
tenant_id=tenant_id,
|
||||
holidays_in_forecast=sum(school_holidays)
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error adding calendar features to forecast",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
# Return with zero features on error
|
||||
df["is_school_holiday"] = 0
|
||||
df["school_hours_active"] = 0
|
||||
df["school_proximity_intensity"] = 0.0
|
||||
return df
|
||||
|
||||
|
||||
# Global instance
|
||||
forecast_calendar_features = ForecastCalendarFeatures()
|
||||
Reference in New Issue
Block a user