""" Calendar-based Feature Engineering for Forecasting Service Generates calendar features for future date predictions """ import pandas as pd import structlog from typing import Dict, List, Any, Optional from datetime import datetime, date, time, timedelta from app.services.data_client import data_client logger = structlog.get_logger() class ForecastCalendarFeatures: """ Generates calendar-based features for future predictions Optimized for forecasting service (future dates only) """ def __init__(self): self.calendar_cache = {} # Cache calendar data per tenant async def get_calendar_for_tenant( self, tenant_id: str ) -> Optional[Dict[str, Any]]: """Get cached calendar for tenant""" if tenant_id in self.calendar_cache: return self.calendar_cache[tenant_id] calendar = await data_client.fetch_tenant_calendar(tenant_id) if calendar: self.calendar_cache[tenant_id] = calendar return calendar def _is_date_in_holiday_period( self, check_date: date, holiday_periods: List[Dict[str, Any]] ) -> tuple[bool, Optional[str]]: """Check if date is within any holiday period""" for period in holiday_periods: start = datetime.strptime(period["start_date"], "%Y-%m-%d").date() end = datetime.strptime(period["end_date"], "%Y-%m-%d").date() if start <= check_date <= end: return True, period["name"] return False, None def _is_school_hours_active( self, check_datetime: datetime, school_hours: Dict[str, Any] ) -> bool: """Check if datetime falls during school operating hours""" # Only weekdays if check_datetime.weekday() >= 5: return False check_time = check_datetime.time() # Morning session morning_start = datetime.strptime( school_hours["morning_start"], "%H:%M" ).time() morning_end = datetime.strptime( school_hours["morning_end"], "%H:%M" ).time() if morning_start <= check_time <= morning_end: return True # Afternoon session if exists if school_hours.get("has_afternoon_session", False): afternoon_start = datetime.strptime( school_hours["afternoon_start"], "%H:%M" ).time() afternoon_end = datetime.strptime( school_hours["afternoon_end"], "%H:%M" ).time() if afternoon_start <= check_time <= afternoon_end: return True return False def _calculate_school_proximity_intensity( self, check_datetime: datetime, school_hours: Dict[str, Any] ) -> float: """ Calculate school proximity impact intensity Returns 0.0-1.0 based on drop-off/pick-up times """ # Only weekdays if check_datetime.weekday() >= 5: return 0.0 check_time = check_datetime.time() morning_start = datetime.strptime( school_hours["morning_start"], "%H:%M" ).time() morning_end = datetime.strptime( school_hours["morning_end"], "%H:%M" ).time() # Morning drop-off peak (30 min before to 15 min after start) drop_off_start = ( datetime.combine(date.today(), morning_start) - timedelta(minutes=30) ).time() drop_off_end = ( datetime.combine(date.today(), morning_start) + timedelta(minutes=15) ).time() if drop_off_start <= check_time <= drop_off_end: return 1.0 # Peak # Morning pick-up peak (15 min before to 30 min after end) pickup_start = ( datetime.combine(date.today(), morning_end) - timedelta(minutes=15) ).time() pickup_end = ( datetime.combine(date.today(), morning_end) + timedelta(minutes=30) ).time() if pickup_start <= check_time <= pickup_end: return 1.0 # Peak # During school hours (moderate) if morning_start <= check_time <= morning_end: return 0.3 return 0.0 async def add_calendar_features( self, df: pd.DataFrame, tenant_id: str, date_column: str = "ds" ) -> pd.DataFrame: """ Add calendar features to forecast dataframe Args: df: Forecast dataframe with future dates tenant_id: Tenant ID to fetch calendar date_column: Name of date column (default 'ds' for Prophet) Returns: DataFrame with calendar features added """ try: logger.info( "Adding calendar features to forecast", tenant_id=tenant_id, rows=len(df) ) # Get calendar calendar = await self.get_calendar_for_tenant(tenant_id) if not calendar: logger.info( "No calendar available, using zero features", tenant_id=tenant_id ) df["is_school_holiday"] = 0 df["school_hours_active"] = 0 df["school_proximity_intensity"] = 0.0 return df holiday_periods = calendar.get("holiday_periods", []) school_hours = calendar.get("school_hours", {}) # Initialize feature lists school_holidays = [] hours_active = [] proximity_intensity = [] # Process each row for idx, row in df.iterrows(): row_date = pd.to_datetime(row[date_column]) # Check holiday is_holiday, _ = self._is_date_in_holiday_period( row_date.date(), holiday_periods ) school_holidays.append(1 if is_holiday else 0) # Check school hours and proximity (if datetime has time component) if hasattr(row_date, 'hour'): hours_active.append( 1 if self._is_school_hours_active(row_date, school_hours) else 0 ) proximity_intensity.append( self._calculate_school_proximity_intensity(row_date, school_hours) ) else: hours_active.append(0) proximity_intensity.append(0.0) # Add features df["is_school_holiday"] = school_holidays df["school_hours_active"] = hours_active df["school_proximity_intensity"] = proximity_intensity logger.info( "Calendar features added to forecast", tenant_id=tenant_id, holidays_in_forecast=sum(school_holidays) ) return df except Exception as e: logger.error( "Error adding calendar features to forecast", tenant_id=tenant_id, error=str(e) ) # Return with zero features on error df["is_school_holiday"] = 0 df["school_hours_active"] = 0 df["school_proximity_intensity"] = 0.0 return df # Global instance forecast_calendar_features = ForecastCalendarFeatures()