Improve AI logic
This commit is contained in:
253
services/training/app/ml/event_feature_generator.py
Normal file
253
services/training/app/ml/event_feature_generator.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""
|
||||
Event Feature Generator
|
||||
Converts calendar events into features for demand forecasting
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import date, timedelta
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class EventFeatureGenerator:
|
||||
"""
|
||||
Generate event-related features for demand forecasting.
|
||||
|
||||
Features include:
|
||||
- Binary flags for event presence
|
||||
- Event impact multipliers
|
||||
- Event type indicators
|
||||
- Days until/since major events
|
||||
"""
|
||||
|
||||
# Event type impact weights (default multipliers)
|
||||
EVENT_IMPACT_WEIGHTS = {
|
||||
'promotion': 1.3,
|
||||
'festival': 1.8,
|
||||
'holiday': 0.7, # Bakeries often close or have reduced demand
|
||||
'weather_event': 0.8, # Bad weather reduces foot traffic
|
||||
'school_break': 1.2,
|
||||
'sport_event': 1.4,
|
||||
'market': 1.5,
|
||||
'concert': 1.3,
|
||||
'local_event': 1.2
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def generate_event_features(
|
||||
self,
|
||||
dates: pd.DatetimeIndex,
|
||||
events: List[Dict[str, Any]]
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Generate event features for given dates.
|
||||
|
||||
Args:
|
||||
dates: Dates to generate features for
|
||||
events: List of event dictionaries with keys:
|
||||
- event_date: date
|
||||
- event_type: str
|
||||
- impact_multiplier: float (optional)
|
||||
- event_name: str
|
||||
|
||||
Returns:
|
||||
DataFrame with event features
|
||||
"""
|
||||
df = pd.DataFrame({'date': dates})
|
||||
|
||||
# Initialize feature columns
|
||||
df['has_event'] = 0
|
||||
df['event_impact'] = 1.0 # Neutral impact
|
||||
df['is_promotion'] = 0
|
||||
df['is_festival'] = 0
|
||||
df['is_local_event'] = 0
|
||||
df['days_to_next_event'] = 365
|
||||
df['days_since_last_event'] = 365
|
||||
|
||||
if not events:
|
||||
logger.debug("No events provided, returning default features")
|
||||
return df
|
||||
|
||||
# Convert events to DataFrame for easier processing
|
||||
events_df = pd.DataFrame(events)
|
||||
events_df['event_date'] = pd.to_datetime(events_df['event_date'])
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
current_date = pd.to_datetime(row['date'])
|
||||
|
||||
# Check if there's an event on this date
|
||||
day_events = events_df[events_df['event_date'] == current_date]
|
||||
|
||||
if not day_events.empty:
|
||||
df.at[idx, 'has_event'] = 1
|
||||
|
||||
# Use custom impact multiplier if provided, else use default
|
||||
if 'impact_multiplier' in day_events.columns and not day_events['impact_multiplier'].isna().all():
|
||||
impact = day_events['impact_multiplier'].max()
|
||||
else:
|
||||
# Use default impact based on event type
|
||||
event_types = day_events['event_type'].tolist()
|
||||
impacts = [self.EVENT_IMPACT_WEIGHTS.get(et, 1.0) for et in event_types]
|
||||
impact = max(impacts)
|
||||
|
||||
df.at[idx, 'event_impact'] = impact
|
||||
|
||||
# Set event type flags
|
||||
event_types = day_events['event_type'].tolist()
|
||||
if 'promotion' in event_types:
|
||||
df.at[idx, 'is_promotion'] = 1
|
||||
if 'festival' in event_types:
|
||||
df.at[idx, 'is_festival'] = 1
|
||||
if 'local_event' in event_types or 'market' in event_types:
|
||||
df.at[idx, 'is_local_event'] = 1
|
||||
|
||||
# Calculate days to/from nearest event
|
||||
future_events = events_df[events_df['event_date'] > current_date]
|
||||
if not future_events.empty:
|
||||
next_event_date = future_events['event_date'].min()
|
||||
df.at[idx, 'days_to_next_event'] = (next_event_date - current_date).days
|
||||
|
||||
past_events = events_df[events_df['event_date'] < current_date]
|
||||
if not past_events.empty:
|
||||
last_event_date = past_events['event_date'].max()
|
||||
df.at[idx, 'days_since_last_event'] = (current_date - last_event_date).days
|
||||
|
||||
# Cap days values at 365
|
||||
df['days_to_next_event'] = df['days_to_next_event'].clip(upper=365)
|
||||
df['days_since_last_event'] = df['days_since_last_event'].clip(upper=365)
|
||||
|
||||
logger.debug("Generated event features",
|
||||
total_days=len(df),
|
||||
days_with_events=df['has_event'].sum())
|
||||
|
||||
return df
|
||||
|
||||
def add_event_features_to_forecast_data(
|
||||
self,
|
||||
forecast_data: pd.DataFrame,
|
||||
event_features: pd.DataFrame
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Add event features to forecast input data.
|
||||
|
||||
Args:
|
||||
forecast_data: Existing forecast data with 'date' column
|
||||
event_features: Event features from generate_event_features()
|
||||
|
||||
Returns:
|
||||
Enhanced forecast data with event features
|
||||
"""
|
||||
forecast_data = forecast_data.copy()
|
||||
forecast_data['date'] = pd.to_datetime(forecast_data['date'])
|
||||
event_features['date'] = pd.to_datetime(event_features['date'])
|
||||
|
||||
# Merge event features
|
||||
enhanced_data = forecast_data.merge(
|
||||
event_features[[
|
||||
'date', 'has_event', 'event_impact', 'is_promotion',
|
||||
'is_festival', 'is_local_event', 'days_to_next_event',
|
||||
'days_since_last_event'
|
||||
]],
|
||||
on='date',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Fill missing with defaults
|
||||
enhanced_data['has_event'].fillna(0, inplace=True)
|
||||
enhanced_data['event_impact'].fillna(1.0, inplace=True)
|
||||
enhanced_data['is_promotion'].fillna(0, inplace=True)
|
||||
enhanced_data['is_festival'].fillna(0, inplace=True)
|
||||
enhanced_data['is_local_event'].fillna(0, inplace=True)
|
||||
enhanced_data['days_to_next_event'].fillna(365, inplace=True)
|
||||
enhanced_data['days_since_last_event'].fillna(365, inplace=True)
|
||||
|
||||
return enhanced_data
|
||||
|
||||
def get_event_summary(self, events: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Get summary statistics about events.
|
||||
|
||||
Args:
|
||||
events: List of event dictionaries
|
||||
|
||||
Returns:
|
||||
Summary dict with counts by type, avg impact, etc.
|
||||
"""
|
||||
if not events:
|
||||
return {
|
||||
'total_events': 0,
|
||||
'events_by_type': {},
|
||||
'avg_impact': 1.0
|
||||
}
|
||||
|
||||
events_df = pd.DataFrame(events)
|
||||
|
||||
summary = {
|
||||
'total_events': len(events),
|
||||
'events_by_type': events_df['event_type'].value_counts().to_dict(),
|
||||
'date_range': {
|
||||
'start': events_df['event_date'].min().isoformat() if not events_df.empty else None,
|
||||
'end': events_df['event_date'].max().isoformat() if not events_df.empty else None
|
||||
}
|
||||
}
|
||||
|
||||
if 'impact_multiplier' in events_df.columns:
|
||||
summary['avg_impact'] = float(events_df['impact_multiplier'].mean())
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def create_event_calendar_features(
|
||||
dates: pd.DatetimeIndex,
|
||||
tenant_id: str,
|
||||
event_repository = None
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Convenience function to fetch events from database and generate features.
|
||||
|
||||
Args:
|
||||
dates: Dates to generate features for
|
||||
tenant_id: Tenant UUID
|
||||
event_repository: EventRepository instance (optional)
|
||||
|
||||
Returns:
|
||||
DataFrame with event features
|
||||
"""
|
||||
if event_repository is None:
|
||||
logger.warning("No event repository provided, using empty events")
|
||||
events = []
|
||||
else:
|
||||
# Fetch events from database
|
||||
from datetime import date
|
||||
start_date = dates.min().date()
|
||||
end_date = dates.max().date()
|
||||
|
||||
try:
|
||||
import asyncio
|
||||
from uuid import UUID
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
events_objects = loop.run_until_complete(
|
||||
event_repository.get_events_by_date_range(
|
||||
tenant_id=UUID(tenant_id),
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
confirmed_only=False
|
||||
)
|
||||
)
|
||||
|
||||
# Convert to dict format
|
||||
events = [event.to_dict() for event in events_objects]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch events from database: {e}")
|
||||
events = []
|
||||
|
||||
# Generate features
|
||||
generator = EventFeatureGenerator()
|
||||
return generator.generate_event_features(dates, events)
|
||||
Reference in New Issue
Block a user