""" Calendar-based Feature Engineering Hyperlocal school calendar and event features for demand forecasting """ import pandas as pd import structlog from typing import Dict, List, Any, Optional from datetime import datetime, date, time, timedelta from shared.clients.external_client import ExternalServiceClient logger = structlog.get_logger() class CalendarFeatureEngine: """ Generates features based on school calendars and local events for hyperlocal demand forecasting enhancement """ def __init__(self, external_client: ExternalServiceClient): self.external_client = external_client self.calendar_cache = {} # Cache calendar data to avoid repeated API calls async def get_calendar_for_tenant( self, tenant_id: str, city_id: Optional[str] = "madrid" ) -> Optional[Dict[str, Any]]: """ Get the assigned school calendar for a tenant If tenant has no assignment, returns None """ try: # Check cache first cache_key = f"tenant_{tenant_id}_calendar" if cache_key in self.calendar_cache: logger.debug("Using cached calendar", tenant_id=tenant_id) return self.calendar_cache[cache_key] # Get tenant location context context = await self.external_client.get_tenant_location_context(tenant_id) if not context or not context.get("calendar"): logger.info( "No calendar assigned to tenant, using default if available", tenant_id=tenant_id ) return None calendar = context["calendar"] self.calendar_cache[cache_key] = calendar logger.info( "Retrieved calendar for tenant", tenant_id=tenant_id, calendar_name=calendar.get("calendar_name") ) return calendar except Exception as e: logger.error( "Error retrieving calendar for tenant", tenant_id=tenant_id, error=str(e) ) return None def _is_date_in_holiday_period( self, check_date: date, holiday_periods: List[Dict[str, Any]] ) -> tuple[bool, Optional[str]]: """ Check if a date falls within any holiday period Returns: (is_holiday, holiday_name) """ for period in holiday_periods: start = datetime.strptime(period["start_date"], "%Y-%m-%d").date() end = datetime.strptime(period["end_date"], "%Y-%m-%d").date() if start <= check_date <= end: return True, period["name"] return False, None def _is_school_hours_active( self, check_datetime: datetime, school_hours: Dict[str, Any] ) -> bool: """ Check if datetime falls during school operating hours Args: check_datetime: DateTime to check school_hours: School hours configuration dict Returns: True if during school hours, False otherwise """ # Only check weekdays if check_datetime.weekday() >= 5: # Saturday=5, Sunday=6 return False check_time = check_datetime.time() # Morning session morning_start = datetime.strptime( school_hours["morning_start"], "%H:%M" ).time() morning_end = datetime.strptime( school_hours["morning_end"], "%H:%M" ).time() if morning_start <= check_time <= morning_end: return True # Afternoon session (if applicable) if school_hours.get("has_afternoon_session", False): afternoon_start = datetime.strptime( school_hours["afternoon_start"], "%H:%M" ).time() afternoon_end = datetime.strptime( school_hours["afternoon_end"], "%H:%M" ).time() if afternoon_start <= check_time <= afternoon_end: return True return False def _calculate_school_proximity_intensity( self, check_datetime: datetime, school_hours: Dict[str, Any] ) -> float: """ Calculate intensity of school-related foot traffic Peaks during drop-off and pick-up times Returns: Float between 0.0 (no impact) and 1.0 (peak impact) """ # Only weekdays if check_datetime.weekday() >= 5: return 0.0 check_time = check_datetime.time() # Define peak windows (30 minutes before and after school start/end) morning_start = datetime.strptime( school_hours["morning_start"], "%H:%M" ).time() morning_end = datetime.strptime( school_hours["morning_end"], "%H:%M" ).time() # Morning drop-off peak (30 min before to 15 min after start) drop_off_start = ( datetime.combine(date.today(), morning_start) - timedelta(minutes=30) ).time() drop_off_end = ( datetime.combine(date.today(), morning_start) + timedelta(minutes=15) ).time() if drop_off_start <= check_time <= drop_off_end: return 1.0 # Peak morning traffic # Morning pick-up peak (15 min before to 30 min after end) pickup_start = ( datetime.combine(date.today(), morning_end) - timedelta(minutes=15) ).time() pickup_end = ( datetime.combine(date.today(), morning_end) + timedelta(minutes=30) ).time() if pickup_start <= check_time <= pickup_end: return 1.0 # Peak afternoon traffic # During school hours (moderate impact) if morning_start <= check_time <= morning_end: return 0.3 # Afternoon session if applicable if school_hours.get("has_afternoon_session", False): afternoon_start = datetime.strptime( school_hours["afternoon_start"], "%H:%M" ).time() afternoon_end = datetime.strptime( school_hours["afternoon_end"], "%H:%M" ).time() if afternoon_start <= check_time <= afternoon_end: return 0.3 return 0.0 async def add_calendar_features( self, df: pd.DataFrame, tenant_id: str, date_column: str = "date" ) -> pd.DataFrame: """ Add calendar-based features to dataframe Features added: - is_school_holiday: Binary (1/0) - school_holiday_name: String (name of holiday or None) - school_hours_active: Binary (1/0) - if during school operating hours - school_proximity_intensity: Float (0.0-1.0) - peak during drop-off/pick-up Args: df: DataFrame with date/datetime column tenant_id: Tenant ID to get calendar assignment date_column: Name of date column Returns: DataFrame with added calendar features """ try: logger.info( "Adding calendar-based features", tenant_id=tenant_id, rows=len(df) ) # Get calendar for tenant calendar = await self.get_calendar_for_tenant(tenant_id) if not calendar: logger.warning( "No calendar available, using fallback features", tenant_id=tenant_id ) # Add default features (all zeros) df["is_school_holiday"] = 0 df["school_holiday_name"] = None df["school_hours_active"] = 0 df["school_proximity_intensity"] = 0.0 return df holiday_periods = calendar.get("holiday_periods", []) school_hours = calendar.get("school_hours", {}) # Initialize feature columns school_holidays = [] holiday_names = [] hours_active = [] proximity_intensity = [] # Process each row for idx, row in df.iterrows(): row_date = pd.to_datetime(row[date_column]) # Check if holiday is_holiday, holiday_name = self._is_date_in_holiday_period( row_date.date(), holiday_periods ) school_holidays.append(1 if is_holiday else 0) holiday_names.append(holiday_name) # Check if during school hours (requires time component) if hasattr(row_date, 'hour'): # Has time component hours_active.append( 1 if self._is_school_hours_active(row_date, school_hours) else 0 ) proximity_intensity.append( self._calculate_school_proximity_intensity(row_date, school_hours) ) else: # Date only, no time component hours_active.append(0) proximity_intensity.append(0.0) # Add features to dataframe df["is_school_holiday"] = school_holidays df["school_holiday_name"] = holiday_names df["school_hours_active"] = hours_active df["school_proximity_intensity"] = proximity_intensity logger.info( "Calendar features added successfully", tenant_id=tenant_id, holiday_periods_count=len(holiday_periods), holidays_found=sum(school_holidays) ) return df except Exception as e: logger.error( "Error adding calendar features", tenant_id=tenant_id, error=str(e) ) # Return df with default features on error df["is_school_holiday"] = 0 df["school_holiday_name"] = None df["school_hours_active"] = 0 df["school_proximity_intensity"] = 0.0 return df