Improve AI logic
This commit is contained in:
758
services/forecasting/app/ml/dynamic_rules_engine.py
Normal file
758
services/forecasting/app/ml/dynamic_rules_engine.py
Normal file
@@ -0,0 +1,758 @@
|
||||
"""
|
||||
Dynamic Business Rules Engine
|
||||
Learns optimal adjustment factors from historical data instead of using hardcoded values
|
||||
Replaces hardcoded weather multipliers, holiday adjustments, event impacts with learned values
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
from datetime import datetime, timedelta
|
||||
from scipy import stats
|
||||
from sklearn.linear_model import Ridge
|
||||
from collections import defaultdict
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class DynamicRulesEngine:
|
||||
"""
|
||||
Learns business rules from historical data instead of using hardcoded values.
|
||||
|
||||
Current hardcoded values to replace:
|
||||
- Weather: rain = -15%, snow = -25%, extreme_heat = -10%
|
||||
- Holidays: +50% (all holidays treated the same)
|
||||
- Events: +30% (all events treated the same)
|
||||
- Weekend: Manual assumptions
|
||||
|
||||
Dynamic approach:
|
||||
- Learn actual weather impact per weather condition per product
|
||||
- Learn holiday multipliers per holiday type
|
||||
- Learn event impact by event type
|
||||
- Learn day-of-week patterns per product
|
||||
- Generate insights when learned values differ from hardcoded assumptions
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.weather_rules = {}
|
||||
self.holiday_rules = {}
|
||||
self.event_rules = {}
|
||||
self.dow_rules = {}
|
||||
self.month_rules = {}
|
||||
|
||||
async def learn_all_rules(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: Optional[pd.DataFrame] = None,
|
||||
min_samples: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Learn all business rules from historical data.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Historical sales data with 'date', 'quantity' columns
|
||||
external_data: Optional weather/events/holidays data
|
||||
min_samples: Minimum samples required to learn a rule
|
||||
|
||||
Returns:
|
||||
Dictionary of learned rules and insights
|
||||
"""
|
||||
logger.info(
|
||||
"Learning dynamic business rules from historical data",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
data_points=len(sales_data)
|
||||
)
|
||||
|
||||
results = {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'learned_at': datetime.utcnow().isoformat(),
|
||||
'rules': {},
|
||||
'insights': []
|
||||
}
|
||||
|
||||
# Ensure date column is datetime
|
||||
if 'date' not in sales_data.columns:
|
||||
sales_data = sales_data.copy()
|
||||
sales_data['date'] = sales_data['ds']
|
||||
|
||||
sales_data['date'] = pd.to_datetime(sales_data['date'])
|
||||
|
||||
# Learn weather impact rules
|
||||
if external_data is not None and 'weather_condition' in external_data.columns:
|
||||
weather_rules, weather_insights = await self._learn_weather_rules(
|
||||
sales_data, external_data, min_samples
|
||||
)
|
||||
results['rules']['weather'] = weather_rules
|
||||
results['insights'].extend(weather_insights)
|
||||
self.weather_rules[inventory_product_id] = weather_rules
|
||||
|
||||
# Learn holiday rules
|
||||
if external_data is not None and 'is_holiday' in external_data.columns:
|
||||
holiday_rules, holiday_insights = await self._learn_holiday_rules(
|
||||
sales_data, external_data, min_samples
|
||||
)
|
||||
results['rules']['holidays'] = holiday_rules
|
||||
results['insights'].extend(holiday_insights)
|
||||
self.holiday_rules[inventory_product_id] = holiday_rules
|
||||
|
||||
# Learn event rules
|
||||
if external_data is not None and 'event_type' in external_data.columns:
|
||||
event_rules, event_insights = await self._learn_event_rules(
|
||||
sales_data, external_data, min_samples
|
||||
)
|
||||
results['rules']['events'] = event_rules
|
||||
results['insights'].extend(event_insights)
|
||||
self.event_rules[inventory_product_id] = event_rules
|
||||
|
||||
# Learn day-of-week patterns (always available)
|
||||
dow_rules, dow_insights = await self._learn_day_of_week_rules(
|
||||
sales_data, min_samples
|
||||
)
|
||||
results['rules']['day_of_week'] = dow_rules
|
||||
results['insights'].extend(dow_insights)
|
||||
self.dow_rules[inventory_product_id] = dow_rules
|
||||
|
||||
# Learn monthly seasonality
|
||||
month_rules, month_insights = await self._learn_month_rules(
|
||||
sales_data, min_samples
|
||||
)
|
||||
results['rules']['months'] = month_rules
|
||||
results['insights'].extend(month_insights)
|
||||
self.month_rules[inventory_product_id] = month_rules
|
||||
|
||||
logger.info(
|
||||
"Dynamic rules learning complete",
|
||||
total_insights=len(results['insights']),
|
||||
rules_learned=len(results['rules'])
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
async def _learn_weather_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn actual weather impact from historical data.
|
||||
|
||||
Hardcoded assumptions:
|
||||
- rain: -15%
|
||||
- snow: -25%
|
||||
- extreme_heat: -10%
|
||||
|
||||
Learn actual impact for this product.
|
||||
"""
|
||||
logger.info("Learning weather impact rules")
|
||||
|
||||
# Merge sales with weather data
|
||||
merged = sales_data.merge(
|
||||
external_data[['date', 'weather_condition', 'temperature', 'precipitation']],
|
||||
on='date',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Baseline: average sales on clear days
|
||||
clear_days = merged[
|
||||
(merged['weather_condition'].isin(['clear', 'sunny', 'partly_cloudy'])) |
|
||||
(merged['weather_condition'].isna())
|
||||
]
|
||||
baseline_avg = clear_days['quantity'].mean()
|
||||
|
||||
weather_rules = {
|
||||
'baseline_avg': float(baseline_avg),
|
||||
'conditions': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
# Hardcoded values for comparison
|
||||
hardcoded_impacts = {
|
||||
'rain': -0.15,
|
||||
'snow': -0.25,
|
||||
'extreme_heat': -0.10
|
||||
}
|
||||
|
||||
# Learn impact for each weather condition
|
||||
for condition in ['rain', 'rainy', 'snow', 'snowy', 'extreme_heat', 'hot', 'storm', 'fog']:
|
||||
condition_days = merged[merged['weather_condition'].str.contains(condition, case=False, na=False)]
|
||||
|
||||
if len(condition_days) >= min_samples:
|
||||
condition_avg = condition_days['quantity'].mean()
|
||||
learned_impact = (condition_avg - baseline_avg) / baseline_avg
|
||||
|
||||
# Statistical significance test
|
||||
t_stat, p_value = stats.ttest_ind(
|
||||
condition_days['quantity'].values,
|
||||
clear_days['quantity'].values,
|
||||
equal_var=False
|
||||
)
|
||||
|
||||
weather_rules['conditions'][condition] = {
|
||||
'learned_multiplier': float(1 + learned_impact),
|
||||
'learned_impact_pct': float(learned_impact * 100),
|
||||
'sample_size': int(len(condition_days)),
|
||||
'avg_quantity': float(condition_avg),
|
||||
'p_value': float(p_value),
|
||||
'significant': bool(p_value < 0.05)
|
||||
}
|
||||
|
||||
# Compare with hardcoded value if exists
|
||||
if condition in hardcoded_impacts and p_value < 0.05:
|
||||
hardcoded_impact = hardcoded_impacts[condition]
|
||||
difference = abs(learned_impact - hardcoded_impact)
|
||||
|
||||
if difference > 0.05: # More than 5% difference
|
||||
insight = {
|
||||
'type': 'optimization',
|
||||
'priority': 'high' if difference > 0.15 else 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Weather Rule Mismatch: {condition.title()}',
|
||||
'description': f'Learned {condition} impact is {learned_impact*100:.1f}% vs hardcoded {hardcoded_impact*100:.1f}%. Updating rule could improve forecast accuracy by {difference*100:.1f}%.',
|
||||
'impact_type': 'forecast_improvement',
|
||||
'impact_value': difference * 100,
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': self._calculate_confidence(len(condition_days), p_value),
|
||||
'metrics_json': {
|
||||
'weather_condition': condition,
|
||||
'learned_impact_pct': round(learned_impact * 100, 2),
|
||||
'hardcoded_impact_pct': round(hardcoded_impact * 100, 2),
|
||||
'difference_pct': round(difference * 100, 2),
|
||||
'baseline_avg': round(baseline_avg, 2),
|
||||
'condition_avg': round(condition_avg, 2),
|
||||
'sample_size': len(condition_days),
|
||||
'p_value': round(p_value, 4)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Update Weather Rule',
|
||||
'action': 'update_weather_multiplier',
|
||||
'params': {
|
||||
'condition': condition,
|
||||
'new_multiplier': round(1 + learned_impact, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
logger.info(
|
||||
"Weather rule discrepancy detected",
|
||||
condition=condition,
|
||||
learned=f"{learned_impact*100:.1f}%",
|
||||
hardcoded=f"{hardcoded_impact*100:.1f}%"
|
||||
)
|
||||
|
||||
return weather_rules, insights
|
||||
|
||||
async def _learn_holiday_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn holiday impact by holiday type instead of uniform +50%.
|
||||
|
||||
Hardcoded: All holidays = +50%
|
||||
Learn: Christmas vs Easter vs National holidays have different impacts
|
||||
"""
|
||||
logger.info("Learning holiday impact rules")
|
||||
|
||||
# Merge sales with holiday data
|
||||
merged = sales_data.merge(
|
||||
external_data[['date', 'is_holiday', 'holiday_name', 'holiday_type']],
|
||||
on='date',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Baseline: non-holiday average
|
||||
non_holidays = merged[merged['is_holiday'] == False]
|
||||
baseline_avg = non_holidays['quantity'].mean()
|
||||
|
||||
holiday_rules = {
|
||||
'baseline_avg': float(baseline_avg),
|
||||
'hardcoded_multiplier': 1.5, # Current +50%
|
||||
'holiday_types': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
# Learn impact per holiday type
|
||||
if 'holiday_type' in merged.columns:
|
||||
for holiday_type in merged[merged['is_holiday'] == True]['holiday_type'].unique():
|
||||
if pd.isna(holiday_type):
|
||||
continue
|
||||
|
||||
holiday_days = merged[merged['holiday_type'] == holiday_type]
|
||||
|
||||
if len(holiday_days) >= min_samples:
|
||||
holiday_avg = holiday_days['quantity'].mean()
|
||||
learned_multiplier = holiday_avg / baseline_avg
|
||||
learned_impact = (learned_multiplier - 1) * 100
|
||||
|
||||
# Statistical test
|
||||
t_stat, p_value = stats.ttest_ind(
|
||||
holiday_days['quantity'].values,
|
||||
non_holidays['quantity'].values,
|
||||
equal_var=False
|
||||
)
|
||||
|
||||
holiday_rules['holiday_types'][holiday_type] = {
|
||||
'learned_multiplier': float(learned_multiplier),
|
||||
'learned_impact_pct': float(learned_impact),
|
||||
'sample_size': int(len(holiday_days)),
|
||||
'avg_quantity': float(holiday_avg),
|
||||
'p_value': float(p_value),
|
||||
'significant': bool(p_value < 0.05)
|
||||
}
|
||||
|
||||
# Compare with hardcoded +50%
|
||||
hardcoded_multiplier = 1.5
|
||||
difference = abs(learned_multiplier - hardcoded_multiplier)
|
||||
|
||||
if difference > 0.1 and p_value < 0.05: # More than 10% difference
|
||||
insight = {
|
||||
'type': 'recommendation',
|
||||
'priority': 'high' if difference > 0.3 else 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Holiday Rule Optimization: {holiday_type}',
|
||||
'description': f'{holiday_type} shows {learned_impact:.1f}% impact vs hardcoded +50%. Using learned multiplier {learned_multiplier:.2f}x could improve forecast accuracy.',
|
||||
'impact_type': 'forecast_improvement',
|
||||
'impact_value': difference * 100,
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': self._calculate_confidence(len(holiday_days), p_value),
|
||||
'metrics_json': {
|
||||
'holiday_type': holiday_type,
|
||||
'learned_multiplier': round(learned_multiplier, 3),
|
||||
'hardcoded_multiplier': 1.5,
|
||||
'learned_impact_pct': round(learned_impact, 2),
|
||||
'hardcoded_impact_pct': 50.0,
|
||||
'baseline_avg': round(baseline_avg, 2),
|
||||
'holiday_avg': round(holiday_avg, 2),
|
||||
'sample_size': len(holiday_days),
|
||||
'p_value': round(p_value, 4)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Update Holiday Rule',
|
||||
'action': 'update_holiday_multiplier',
|
||||
'params': {
|
||||
'holiday_type': holiday_type,
|
||||
'new_multiplier': round(learned_multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
logger.info(
|
||||
"Holiday rule optimization identified",
|
||||
holiday_type=holiday_type,
|
||||
learned=f"{learned_multiplier:.2f}x",
|
||||
hardcoded="1.5x"
|
||||
)
|
||||
|
||||
# Overall holiday impact
|
||||
all_holidays = merged[merged['is_holiday'] == True]
|
||||
if len(all_holidays) >= min_samples:
|
||||
overall_avg = all_holidays['quantity'].mean()
|
||||
overall_multiplier = overall_avg / baseline_avg
|
||||
|
||||
holiday_rules['overall_learned_multiplier'] = float(overall_multiplier)
|
||||
holiday_rules['overall_learned_impact_pct'] = float((overall_multiplier - 1) * 100)
|
||||
|
||||
return holiday_rules, insights
|
||||
|
||||
async def _learn_event_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn event impact by event type instead of uniform +30%.
|
||||
|
||||
Hardcoded: All events = +30%
|
||||
Learn: Sports events vs concerts vs festivals have different impacts
|
||||
"""
|
||||
logger.info("Learning event impact rules")
|
||||
|
||||
# Merge sales with event data
|
||||
merged = sales_data.merge(
|
||||
external_data[['date', 'event_name', 'event_type', 'event_attendance']],
|
||||
on='date',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Baseline: non-event days
|
||||
non_events = merged[merged['event_name'].isna()]
|
||||
baseline_avg = non_events['quantity'].mean()
|
||||
|
||||
event_rules = {
|
||||
'baseline_avg': float(baseline_avg),
|
||||
'hardcoded_multiplier': 1.3, # Current +30%
|
||||
'event_types': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
# Learn impact per event type
|
||||
if 'event_type' in merged.columns:
|
||||
for event_type in merged[merged['event_type'].notna()]['event_type'].unique():
|
||||
if pd.isna(event_type):
|
||||
continue
|
||||
|
||||
event_days = merged[merged['event_type'] == event_type]
|
||||
|
||||
if len(event_days) >= min_samples:
|
||||
event_avg = event_days['quantity'].mean()
|
||||
learned_multiplier = event_avg / baseline_avg
|
||||
learned_impact = (learned_multiplier - 1) * 100
|
||||
|
||||
# Statistical test
|
||||
t_stat, p_value = stats.ttest_ind(
|
||||
event_days['quantity'].values,
|
||||
non_events['quantity'].values,
|
||||
equal_var=False
|
||||
)
|
||||
|
||||
event_rules['event_types'][event_type] = {
|
||||
'learned_multiplier': float(learned_multiplier),
|
||||
'learned_impact_pct': float(learned_impact),
|
||||
'sample_size': int(len(event_days)),
|
||||
'avg_quantity': float(event_avg),
|
||||
'p_value': float(p_value),
|
||||
'significant': bool(p_value < 0.05)
|
||||
}
|
||||
|
||||
# Compare with hardcoded +30%
|
||||
hardcoded_multiplier = 1.3
|
||||
difference = abs(learned_multiplier - hardcoded_multiplier)
|
||||
|
||||
if difference > 0.1 and p_value < 0.05:
|
||||
insight = {
|
||||
'type': 'recommendation',
|
||||
'priority': 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Event Rule Optimization: {event_type}',
|
||||
'description': f'{event_type} events show {learned_impact:.1f}% impact vs hardcoded +30%. Using learned multiplier could improve event forecasts.',
|
||||
'impact_type': 'forecast_improvement',
|
||||
'impact_value': difference * 100,
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': self._calculate_confidence(len(event_days), p_value),
|
||||
'metrics_json': {
|
||||
'event_type': event_type,
|
||||
'learned_multiplier': round(learned_multiplier, 3),
|
||||
'hardcoded_multiplier': 1.3,
|
||||
'learned_impact_pct': round(learned_impact, 2),
|
||||
'hardcoded_impact_pct': 30.0,
|
||||
'baseline_avg': round(baseline_avg, 2),
|
||||
'event_avg': round(event_avg, 2),
|
||||
'sample_size': len(event_days),
|
||||
'p_value': round(p_value, 4)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Update Event Rule',
|
||||
'action': 'update_event_multiplier',
|
||||
'params': {
|
||||
'event_type': event_type,
|
||||
'new_multiplier': round(learned_multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
return event_rules, insights
|
||||
|
||||
async def _learn_day_of_week_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn day-of-week patterns per product.
|
||||
Replace general assumptions with product-specific patterns.
|
||||
"""
|
||||
logger.info("Learning day-of-week patterns")
|
||||
|
||||
sales_data = sales_data.copy()
|
||||
sales_data['day_of_week'] = sales_data['date'].dt.dayofweek
|
||||
sales_data['day_name'] = sales_data['date'].dt.day_name()
|
||||
|
||||
# Calculate average per day of week
|
||||
dow_avg = sales_data.groupby('day_of_week')['quantity'].agg(['mean', 'std', 'count'])
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
dow_rules = {
|
||||
'overall_avg': float(overall_avg),
|
||||
'days': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
|
||||
|
||||
for dow in range(7):
|
||||
if dow not in dow_avg.index or dow_avg.loc[dow, 'count'] < min_samples:
|
||||
continue
|
||||
|
||||
day_avg = dow_avg.loc[dow, 'mean']
|
||||
day_std = dow_avg.loc[dow, 'std']
|
||||
day_count = dow_avg.loc[dow, 'count']
|
||||
|
||||
multiplier = day_avg / overall_avg
|
||||
impact_pct = (multiplier - 1) * 100
|
||||
|
||||
# Coefficient of variation
|
||||
cv = (day_std / day_avg) if day_avg > 0 else 0
|
||||
|
||||
dow_rules['days'][day_names[dow]] = {
|
||||
'day_of_week': int(dow),
|
||||
'learned_multiplier': float(multiplier),
|
||||
'impact_pct': float(impact_pct),
|
||||
'avg_quantity': float(day_avg),
|
||||
'std_quantity': float(day_std),
|
||||
'sample_size': int(day_count),
|
||||
'coefficient_of_variation': float(cv)
|
||||
}
|
||||
|
||||
# Insight for significant deviations
|
||||
if abs(impact_pct) > 20: # More than 20% difference
|
||||
insight = {
|
||||
'type': 'insight',
|
||||
'priority': 'medium' if abs(impact_pct) > 30 else 'low',
|
||||
'category': 'forecasting',
|
||||
'title': f'{day_names[dow]} Pattern: {abs(impact_pct):.0f}% {"Higher" if impact_pct > 0 else "Lower"}',
|
||||
'description': f'{day_names[dow]} sales average {day_avg:.1f} units ({impact_pct:+.1f}% vs weekly average {overall_avg:.1f}). Consider this pattern in production planning.',
|
||||
'impact_type': 'operational_insight',
|
||||
'impact_value': abs(impact_pct),
|
||||
'impact_unit': 'percentage',
|
||||
'confidence': self._calculate_confidence(day_count, 0.01), # Low p-value for large samples
|
||||
'metrics_json': {
|
||||
'day_of_week': day_names[dow],
|
||||
'day_multiplier': round(multiplier, 3),
|
||||
'impact_pct': round(impact_pct, 2),
|
||||
'day_avg': round(day_avg, 2),
|
||||
'overall_avg': round(overall_avg, 2),
|
||||
'sample_size': int(day_count),
|
||||
'std': round(day_std, 2)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Adjust Production Schedule',
|
||||
'action': 'adjust_weekly_production',
|
||||
'params': {
|
||||
'day': day_names[dow],
|
||||
'multiplier': round(multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
return dow_rules, insights
|
||||
|
||||
async def _learn_month_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn monthly seasonality patterns per product.
|
||||
"""
|
||||
logger.info("Learning monthly seasonality patterns")
|
||||
|
||||
sales_data = sales_data.copy()
|
||||
sales_data['month'] = sales_data['date'].dt.month
|
||||
sales_data['month_name'] = sales_data['date'].dt.month_name()
|
||||
|
||||
# Calculate average per month
|
||||
month_avg = sales_data.groupby('month')['quantity'].agg(['mean', 'std', 'count'])
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
month_rules = {
|
||||
'overall_avg': float(overall_avg),
|
||||
'months': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
month_names = ['January', 'February', 'March', 'April', 'May', 'June',
|
||||
'July', 'August', 'September', 'October', 'November', 'December']
|
||||
|
||||
for month in range(1, 13):
|
||||
if month not in month_avg.index or month_avg.loc[month, 'count'] < min_samples:
|
||||
continue
|
||||
|
||||
month_mean = month_avg.loc[month, 'mean']
|
||||
month_std = month_avg.loc[month, 'std']
|
||||
month_count = month_avg.loc[month, 'count']
|
||||
|
||||
multiplier = month_mean / overall_avg
|
||||
impact_pct = (multiplier - 1) * 100
|
||||
|
||||
month_rules['months'][month_names[month - 1]] = {
|
||||
'month': int(month),
|
||||
'learned_multiplier': float(multiplier),
|
||||
'impact_pct': float(impact_pct),
|
||||
'avg_quantity': float(month_mean),
|
||||
'std_quantity': float(month_std),
|
||||
'sample_size': int(month_count)
|
||||
}
|
||||
|
||||
# Insight for significant seasonal patterns
|
||||
if abs(impact_pct) > 25: # More than 25% seasonal variation
|
||||
insight = {
|
||||
'type': 'insight',
|
||||
'priority': 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Seasonal Pattern: {month_names[month - 1]} {abs(impact_pct):.0f}% {"Higher" if impact_pct > 0 else "Lower"}',
|
||||
'description': f'{month_names[month - 1]} shows strong seasonality with {impact_pct:+.1f}% vs annual average. Plan inventory accordingly.',
|
||||
'impact_type': 'operational_insight',
|
||||
'impact_value': abs(impact_pct),
|
||||
'impact_unit': 'percentage',
|
||||
'confidence': self._calculate_confidence(month_count, 0.01),
|
||||
'metrics_json': {
|
||||
'month': month_names[month - 1],
|
||||
'multiplier': round(multiplier, 3),
|
||||
'impact_pct': round(impact_pct, 2),
|
||||
'month_avg': round(month_mean, 2),
|
||||
'annual_avg': round(overall_avg, 2),
|
||||
'sample_size': int(month_count)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Adjust Seasonal Planning',
|
||||
'action': 'adjust_seasonal_forecast',
|
||||
'params': {
|
||||
'month': month_names[month - 1],
|
||||
'multiplier': round(multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
return month_rules, insights
|
||||
|
||||
def _calculate_confidence(self, sample_size: int, p_value: float) -> int:
|
||||
"""
|
||||
Calculate confidence score (0-100) based on sample size and statistical significance.
|
||||
|
||||
Args:
|
||||
sample_size: Number of observations
|
||||
p_value: Statistical significance p-value
|
||||
|
||||
Returns:
|
||||
Confidence score 0-100
|
||||
"""
|
||||
# Sample size score (0-50 points)
|
||||
if sample_size >= 100:
|
||||
sample_score = 50
|
||||
elif sample_size >= 50:
|
||||
sample_score = 40
|
||||
elif sample_size >= 30:
|
||||
sample_score = 30
|
||||
elif sample_size >= 20:
|
||||
sample_score = 20
|
||||
else:
|
||||
sample_score = 10
|
||||
|
||||
# Statistical significance score (0-50 points)
|
||||
if p_value < 0.001:
|
||||
sig_score = 50
|
||||
elif p_value < 0.01:
|
||||
sig_score = 45
|
||||
elif p_value < 0.05:
|
||||
sig_score = 35
|
||||
elif p_value < 0.1:
|
||||
sig_score = 20
|
||||
else:
|
||||
sig_score = 10
|
||||
|
||||
return min(100, sample_score + sig_score)
|
||||
|
||||
def get_rule(
|
||||
self,
|
||||
inventory_product_id: str,
|
||||
rule_type: str,
|
||||
key: str
|
||||
) -> Optional[float]:
|
||||
"""
|
||||
Get learned rule multiplier for a specific condition.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
rule_type: 'weather', 'holiday', 'event', 'day_of_week', 'month'
|
||||
key: Specific condition key (e.g., 'rain', 'Christmas', 'Monday')
|
||||
|
||||
Returns:
|
||||
Learned multiplier or None if not learned
|
||||
"""
|
||||
if rule_type == 'weather':
|
||||
rules = self.weather_rules.get(inventory_product_id, {})
|
||||
return rules.get('conditions', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'holiday':
|
||||
rules = self.holiday_rules.get(inventory_product_id, {})
|
||||
return rules.get('holiday_types', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'event':
|
||||
rules = self.event_rules.get(inventory_product_id, {})
|
||||
return rules.get('event_types', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'day_of_week':
|
||||
rules = self.dow_rules.get(inventory_product_id, {})
|
||||
return rules.get('days', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'month':
|
||||
rules = self.month_rules.get(inventory_product_id, {})
|
||||
return rules.get('months', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
return None
|
||||
|
||||
def export_rules_for_prophet(
|
||||
self,
|
||||
inventory_product_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Export learned rules in format suitable for Prophet model integration.
|
||||
|
||||
Returns:
|
||||
Dictionary with multipliers for Prophet custom seasonality/regressors
|
||||
"""
|
||||
return {
|
||||
'weather': self.weather_rules.get(inventory_product_id, {}),
|
||||
'holidays': self.holiday_rules.get(inventory_product_id, {}),
|
||||
'events': self.event_rules.get(inventory_product_id, {}),
|
||||
'day_of_week': self.dow_rules.get(inventory_product_id, {}),
|
||||
'months': self.month_rules.get(inventory_product_id, {})
|
||||
}
|
||||
263
services/forecasting/app/ml/multi_horizon_forecaster.py
Normal file
263
services/forecasting/app/ml/multi_horizon_forecaster.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
Multi-Horizon Forecasting System
|
||||
Generates forecasts for multiple time horizons (7, 14, 30, 90 days)
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta, date
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class MultiHorizonForecaster:
|
||||
"""
|
||||
Multi-horizon forecasting with horizon-specific models.
|
||||
|
||||
Horizons:
|
||||
- Short-term (1-7 days): High precision, detailed features
|
||||
- Medium-term (8-14 days): Balanced approach
|
||||
- Long-term (15-30 days): Focus on trends, seasonal patterns
|
||||
- Very long-term (31-90 days): Strategic planning, major trends only
|
||||
"""
|
||||
|
||||
HORIZONS = {
|
||||
'short': (1, 7),
|
||||
'medium': (8, 14),
|
||||
'long': (15, 30),
|
||||
'very_long': (31, 90)
|
||||
}
|
||||
|
||||
def __init__(self, base_forecaster=None):
|
||||
"""
|
||||
Initialize multi-horizon forecaster.
|
||||
|
||||
Args:
|
||||
base_forecaster: Base forecaster (e.g., BakeryForecaster) to use
|
||||
"""
|
||||
self.base_forecaster = base_forecaster
|
||||
|
||||
async def generate_multi_horizon_forecast(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
start_date: date,
|
||||
horizons: List[str] = None,
|
||||
include_confidence_intervals: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate forecasts for multiple horizons.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
start_date: Start date for forecasts
|
||||
horizons: List of horizons to forecast ('short', 'medium', 'long', 'very_long')
|
||||
include_confidence_intervals: Include confidence intervals
|
||||
|
||||
Returns:
|
||||
Dictionary with forecasts by horizon
|
||||
"""
|
||||
if horizons is None:
|
||||
horizons = ['short', 'medium', 'long']
|
||||
|
||||
logger.info(
|
||||
"Generating multi-horizon forecast",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
horizons=horizons
|
||||
)
|
||||
|
||||
results = {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'start_date': start_date.isoformat(),
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'horizons': {}
|
||||
}
|
||||
|
||||
for horizon_name in horizons:
|
||||
if horizon_name not in self.HORIZONS:
|
||||
logger.warning(f"Unknown horizon: {horizon_name}, skipping")
|
||||
continue
|
||||
|
||||
start_day, end_day = self.HORIZONS[horizon_name]
|
||||
|
||||
# Generate forecast for this horizon
|
||||
horizon_forecast = await self._generate_horizon_forecast(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
start_date=start_date,
|
||||
days_ahead=end_day,
|
||||
horizon_name=horizon_name,
|
||||
include_confidence=include_confidence_intervals
|
||||
)
|
||||
|
||||
results['horizons'][horizon_name] = horizon_forecast
|
||||
|
||||
logger.info("Multi-horizon forecast complete",
|
||||
horizons_generated=len(results['horizons']))
|
||||
|
||||
return results
|
||||
|
||||
async def _generate_horizon_forecast(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
start_date: date,
|
||||
days_ahead: int,
|
||||
horizon_name: str,
|
||||
include_confidence: bool
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate forecast for a specific horizon.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
start_date: Start date
|
||||
days_ahead: Number of days ahead
|
||||
horizon_name: Horizon name ('short', 'medium', etc.)
|
||||
include_confidence: Include confidence intervals
|
||||
|
||||
Returns:
|
||||
Forecast data for the horizon
|
||||
"""
|
||||
# Generate date range
|
||||
dates = [start_date + timedelta(days=i) for i in range(days_ahead)]
|
||||
|
||||
# Use base forecaster if available
|
||||
if self.base_forecaster:
|
||||
# Call base forecaster for predictions
|
||||
forecasts = []
|
||||
|
||||
for forecast_date in dates:
|
||||
try:
|
||||
# This would call the actual forecasting service
|
||||
# For now, we'll return a structured response
|
||||
forecasts.append({
|
||||
'date': forecast_date.isoformat(),
|
||||
'predicted_demand': 0, # Placeholder
|
||||
'confidence_lower': 0 if include_confidence else None,
|
||||
'confidence_upper': 0 if include_confidence else None
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate forecast for {forecast_date}: {e}")
|
||||
|
||||
return {
|
||||
'horizon_name': horizon_name,
|
||||
'days_ahead': days_ahead,
|
||||
'start_date': start_date.isoformat(),
|
||||
'end_date': dates[-1].isoformat(),
|
||||
'forecasts': forecasts,
|
||||
'aggregates': self._calculate_horizon_aggregates(forecasts)
|
||||
}
|
||||
else:
|
||||
logger.warning("No base forecaster available, returning placeholder")
|
||||
return {
|
||||
'horizon_name': horizon_name,
|
||||
'days_ahead': days_ahead,
|
||||
'forecasts': [],
|
||||
'aggregates': {}
|
||||
}
|
||||
|
||||
def _calculate_horizon_aggregates(self, forecasts: List[Dict]) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate aggregate statistics for a horizon.
|
||||
|
||||
Args:
|
||||
forecasts: List of daily forecasts
|
||||
|
||||
Returns:
|
||||
Aggregate statistics
|
||||
"""
|
||||
if not forecasts:
|
||||
return {}
|
||||
|
||||
demands = [f['predicted_demand'] for f in forecasts if f.get('predicted_demand')]
|
||||
|
||||
if not demands:
|
||||
return {}
|
||||
|
||||
return {
|
||||
'total_demand': sum(demands),
|
||||
'avg_daily_demand': np.mean(demands),
|
||||
'max_daily_demand': max(demands),
|
||||
'min_daily_demand': min(demands),
|
||||
'demand_volatility': np.std(demands) if len(demands) > 1 else 0
|
||||
}
|
||||
|
||||
def get_horizon_recommendation(
|
||||
self,
|
||||
horizon_name: str,
|
||||
forecast_data: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate recommendations based on horizon forecast.
|
||||
|
||||
Args:
|
||||
horizon_name: Horizon name
|
||||
forecast_data: Forecast data for the horizon
|
||||
|
||||
Returns:
|
||||
Recommendations dictionary
|
||||
"""
|
||||
aggregates = forecast_data.get('aggregates', {})
|
||||
total_demand = aggregates.get('total_demand', 0)
|
||||
volatility = aggregates.get('demand_volatility', 0)
|
||||
|
||||
recommendations = {
|
||||
'horizon': horizon_name,
|
||||
'actions': []
|
||||
}
|
||||
|
||||
if horizon_name == 'short':
|
||||
# Short-term: Operational recommendations
|
||||
if total_demand > 0:
|
||||
recommendations['actions'].append(f"Prepare {total_demand:.0f} units for next 7 days")
|
||||
if volatility > 10:
|
||||
recommendations['actions'].append("High volatility expected - increase safety stock")
|
||||
|
||||
elif horizon_name == 'medium':
|
||||
# Medium-term: Procurement planning
|
||||
recommendations['actions'].append(f"Order supplies for {total_demand:.0f} units (2-week demand)")
|
||||
if aggregates.get('max_daily_demand', 0) > aggregates.get('avg_daily_demand', 0) * 1.5:
|
||||
recommendations['actions'].append("Peak demand day detected - plan extra capacity")
|
||||
|
||||
elif horizon_name == 'long':
|
||||
# Long-term: Strategic planning
|
||||
avg_weekly_demand = total_demand / 4 if total_demand > 0 else 0
|
||||
recommendations['actions'].append(f"Monthly demand projection: {total_demand:.0f} units")
|
||||
recommendations['actions'].append(f"Average weekly demand: {avg_weekly_demand:.0f} units")
|
||||
|
||||
elif horizon_name == 'very_long':
|
||||
# Very long-term: Capacity planning
|
||||
recommendations['actions'].append(f"Quarterly demand projection: {total_demand:.0f} units")
|
||||
recommendations['actions'].append("Review capacity and staffing needs")
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
def get_appropriate_horizons_for_use_case(use_case: str) -> List[str]:
|
||||
"""
|
||||
Get appropriate forecast horizons for a use case.
|
||||
|
||||
Args:
|
||||
use_case: Use case name (e.g., 'production_planning', 'procurement', 'strategic')
|
||||
|
||||
Returns:
|
||||
List of horizon names
|
||||
"""
|
||||
use_case_horizons = {
|
||||
'production_planning': ['short'],
|
||||
'procurement': ['short', 'medium'],
|
||||
'inventory_optimization': ['short', 'medium'],
|
||||
'capacity_planning': ['medium', 'long'],
|
||||
'strategic_planning': ['long', 'very_long'],
|
||||
'financial_planning': ['long', 'very_long'],
|
||||
'all': ['short', 'medium', 'long', 'very_long']
|
||||
}
|
||||
|
||||
return use_case_horizons.get(use_case, ['short', 'medium'])
|
||||
593
services/forecasting/app/ml/pattern_detector.py
Normal file
593
services/forecasting/app/ml/pattern_detector.py
Normal file
@@ -0,0 +1,593 @@
|
||||
"""
|
||||
Pattern Detection Engine for Sales Data
|
||||
Automatically identifies patterns and generates insights
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from scipy import stats
|
||||
from collections import defaultdict
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class SalesPatternDetector:
|
||||
"""
|
||||
Detect sales patterns and generate actionable insights.
|
||||
|
||||
Patterns detected:
|
||||
- Time-of-day patterns (hourly peaks)
|
||||
- Day-of-week patterns (weekend spikes)
|
||||
- Weekly seasonality patterns
|
||||
- Monthly patterns
|
||||
- Holiday impact patterns
|
||||
- Weather correlation patterns
|
||||
"""
|
||||
|
||||
def __init__(self, significance_threshold: float = 0.15):
|
||||
"""
|
||||
Initialize pattern detector.
|
||||
|
||||
Args:
|
||||
significance_threshold: Minimum percentage difference to consider significant (default 15%)
|
||||
"""
|
||||
self.significance_threshold = significance_threshold
|
||||
self.detected_patterns = []
|
||||
|
||||
async def detect_all_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int = 70
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect all patterns in sales data and generate insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Sales data with columns: date, quantity, (optional: hour, temperature, etc.)
|
||||
min_confidence: Minimum confidence score for insights
|
||||
|
||||
Returns:
|
||||
List of insight dictionaries ready for AI Insights Service
|
||||
"""
|
||||
logger.info(
|
||||
"Starting pattern detection",
|
||||
tenant_id=tenant_id,
|
||||
product_id=inventory_product_id,
|
||||
data_points=len(sales_data)
|
||||
)
|
||||
|
||||
insights = []
|
||||
|
||||
# Ensure date column is datetime
|
||||
if 'date' in sales_data.columns:
|
||||
sales_data['date'] = pd.to_datetime(sales_data['date'])
|
||||
|
||||
# 1. Day-of-week patterns
|
||||
dow_insights = await self._detect_day_of_week_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(dow_insights)
|
||||
|
||||
# 2. Weekend vs weekday patterns
|
||||
weekend_insights = await self._detect_weekend_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(weekend_insights)
|
||||
|
||||
# 3. Month-end patterns
|
||||
month_end_insights = await self._detect_month_end_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(month_end_insights)
|
||||
|
||||
# 4. Hourly patterns (if hour data available)
|
||||
if 'hour' in sales_data.columns:
|
||||
hourly_insights = await self._detect_hourly_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(hourly_insights)
|
||||
|
||||
# 5. Weather correlation (if temperature data available)
|
||||
if 'temperature' in sales_data.columns:
|
||||
weather_insights = await self._detect_weather_correlations(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(weather_insights)
|
||||
|
||||
# 6. Trend detection
|
||||
trend_insights = await self._detect_trends(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(trend_insights)
|
||||
|
||||
logger.info(
|
||||
"Pattern detection complete",
|
||||
total_insights=len(insights),
|
||||
product_id=inventory_product_id
|
||||
)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_day_of_week_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect day-of-week patterns (e.g., Friday sales spike)."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Add day of week
|
||||
sales_data['day_of_week'] = sales_data['date'].dt.dayofweek
|
||||
sales_data['day_name'] = sales_data['date'].dt.day_name()
|
||||
|
||||
# Calculate average sales per day of week
|
||||
dow_avg = sales_data.groupby(['day_of_week', 'day_name'])['quantity'].agg(['mean', 'count']).reset_index()
|
||||
|
||||
# Only consider days with sufficient data (at least 4 observations)
|
||||
dow_avg = dow_avg[dow_avg['count'] >= 4]
|
||||
|
||||
if len(dow_avg) < 2:
|
||||
return insights
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
# Find days significantly above average
|
||||
for _, row in dow_avg.iterrows():
|
||||
day_avg = row['mean']
|
||||
pct_diff = ((day_avg - overall_avg) / overall_avg) * 100
|
||||
|
||||
if abs(pct_diff) > self.significance_threshold * 100:
|
||||
# Calculate confidence based on sample size and consistency
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=int(row['count']),
|
||||
effect_size=abs(pct_diff) / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence:
|
||||
if pct_diff > 0:
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='pattern',
|
||||
category='sales',
|
||||
priority='medium' if pct_diff > 20 else 'low',
|
||||
title=f'{row["day_name"]} Sales Pattern Detected',
|
||||
description=f'Sales on {row["day_name"]} are {abs(pct_diff):.1f}% {"higher" if pct_diff > 0 else "lower"} than average ({day_avg:.1f} vs {overall_avg:.1f} units).',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'day_of_week': row['day_name'],
|
||||
'avg_sales': float(day_avg),
|
||||
'overall_avg': float(overall_avg),
|
||||
'difference_pct': float(pct_diff),
|
||||
'sample_size': int(row['count'])
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Adjust Production', 'action': 'adjust_daily_production'},
|
||||
{'label': 'Review Schedule', 'action': 'review_production_schedule'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_weekend_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect weekend vs weekday patterns."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Classify weekend vs weekday
|
||||
sales_data['is_weekend'] = sales_data['date'].dt.dayofweek.isin([5, 6])
|
||||
|
||||
# Calculate averages
|
||||
weekend_avg = sales_data[sales_data['is_weekend']]['quantity'].mean()
|
||||
weekday_avg = sales_data[~sales_data['is_weekend']]['quantity'].mean()
|
||||
|
||||
weekend_count = sales_data[sales_data['is_weekend']]['quantity'].count()
|
||||
weekday_count = sales_data[~sales_data['is_weekend']]['quantity'].count()
|
||||
|
||||
if weekend_count < 4 or weekday_count < 4:
|
||||
return insights
|
||||
|
||||
pct_diff = ((weekend_avg - weekday_avg) / weekday_avg) * 100
|
||||
|
||||
if abs(pct_diff) > self.significance_threshold * 100:
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=min(weekend_count, weekday_count),
|
||||
effect_size=abs(pct_diff) / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence:
|
||||
# Estimate revenue impact
|
||||
impact_value = abs(weekend_avg - weekday_avg) * 8 * 4 # 8 weekend days per month
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='recommendation',
|
||||
category='forecasting',
|
||||
priority='high' if abs(pct_diff) > 25 else 'medium',
|
||||
title=f'Weekend Demand Pattern: {abs(pct_diff):.0f}% {"Higher" if pct_diff > 0 else "Lower"}',
|
||||
description=f'Weekend sales average {weekend_avg:.1f} units vs {weekday_avg:.1f} on weekdays ({abs(pct_diff):.0f}% {"increase" if pct_diff > 0 else "decrease"}). Recommend adjusting weekend production targets.',
|
||||
confidence=confidence,
|
||||
impact_type='revenue_increase' if pct_diff > 0 else 'cost_savings',
|
||||
impact_value=float(impact_value),
|
||||
impact_unit='units/month',
|
||||
metrics={
|
||||
'weekend_avg': float(weekend_avg),
|
||||
'weekday_avg': float(weekday_avg),
|
||||
'difference_pct': float(pct_diff),
|
||||
'weekend_samples': int(weekend_count),
|
||||
'weekday_samples': int(weekday_count)
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Increase Weekend Production', 'action': 'adjust_weekend_production'},
|
||||
{'label': 'Update Forecast Multiplier', 'action': 'update_forecast_rule'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_month_end_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect month-end and payday patterns."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Identify payday periods (15th and last 3 days of month)
|
||||
sales_data['day_of_month'] = sales_data['date'].dt.day
|
||||
sales_data['is_payday'] = (
|
||||
(sales_data['day_of_month'] == 15) |
|
||||
(sales_data['date'].dt.is_month_end) |
|
||||
(sales_data['day_of_month'] >= sales_data['date'].dt.days_in_month - 2)
|
||||
)
|
||||
|
||||
payday_avg = sales_data[sales_data['is_payday']]['quantity'].mean()
|
||||
regular_avg = sales_data[~sales_data['is_payday']]['quantity'].mean()
|
||||
|
||||
payday_count = sales_data[sales_data['is_payday']]['quantity'].count()
|
||||
|
||||
if payday_count < 4:
|
||||
return insights
|
||||
|
||||
pct_diff = ((payday_avg - regular_avg) / regular_avg) * 100
|
||||
|
||||
if abs(pct_diff) > self.significance_threshold * 100:
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=payday_count,
|
||||
effect_size=abs(pct_diff) / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence and pct_diff > 0:
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='pattern',
|
||||
category='sales',
|
||||
priority='medium',
|
||||
title=f'Payday Shopping Pattern Detected',
|
||||
description=f'Sales increase {pct_diff:.0f}% during payday periods (15th and month-end). Average {payday_avg:.1f} vs {regular_avg:.1f} units.',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'payday_avg': float(payday_avg),
|
||||
'regular_avg': float(regular_avg),
|
||||
'difference_pct': float(pct_diff)
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Increase Payday Stock', 'action': 'adjust_payday_production'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_hourly_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect hourly sales patterns (if POS data available)."""
|
||||
insights = []
|
||||
|
||||
if 'hour' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
hourly_avg = sales_data.groupby('hour')['quantity'].agg(['mean', 'count']).reset_index()
|
||||
hourly_avg = hourly_avg[hourly_avg['count'] >= 3] # At least 3 observations
|
||||
|
||||
if len(hourly_avg) < 3:
|
||||
return insights
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
# Find peak hours (top 3)
|
||||
top_hours = hourly_avg.nlargest(3, 'mean')
|
||||
|
||||
for _, row in top_hours.iterrows():
|
||||
hour_avg = row['mean']
|
||||
pct_diff = ((hour_avg - overall_avg) / overall_avg) * 100
|
||||
|
||||
if pct_diff > self.significance_threshold * 100:
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=int(row['count']),
|
||||
effect_size=pct_diff / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence:
|
||||
hour = int(row['hour'])
|
||||
time_label = f"{hour:02d}:00-{(hour+1):02d}:00"
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='pattern',
|
||||
category='sales',
|
||||
priority='low',
|
||||
title=f'Peak Sales Hour: {time_label}',
|
||||
description=f'Sales peak during {time_label} with {hour_avg:.1f} units ({pct_diff:.0f}% above average).',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'peak_hour': hour,
|
||||
'avg_sales': float(hour_avg),
|
||||
'overall_avg': float(overall_avg),
|
||||
'difference_pct': float(pct_diff)
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Ensure Fresh Stock', 'action': 'schedule_production'},
|
||||
{'label': 'Increase Staffing', 'action': 'adjust_staffing'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_weather_correlations(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect weather-sales correlations."""
|
||||
insights = []
|
||||
|
||||
if 'temperature' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Remove NaN values
|
||||
clean_data = sales_data[['temperature', 'quantity']].dropna()
|
||||
|
||||
if len(clean_data) < 30: # Need sufficient data
|
||||
return insights
|
||||
|
||||
# Calculate correlation
|
||||
correlation, p_value = stats.pearsonr(clean_data['temperature'], clean_data['quantity'])
|
||||
|
||||
if abs(correlation) > 0.3 and p_value < 0.05: # Moderate correlation and significant
|
||||
confidence = self._calculate_correlation_confidence(correlation, p_value, len(clean_data))
|
||||
|
||||
if confidence >= min_confidence:
|
||||
direction = 'increase' if correlation > 0 else 'decrease'
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='insight',
|
||||
category='forecasting',
|
||||
priority='medium' if abs(correlation) > 0.5 else 'low',
|
||||
title=f'Temperature Impact on Sales: {abs(correlation):.0%} Correlation',
|
||||
description=f'Sales {direction} with temperature (correlation: {correlation:.2f}). {"Warmer" if correlation > 0 else "Colder"} weather associated with {"higher" if correlation > 0 else "lower"} sales.',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'correlation': float(correlation),
|
||||
'p_value': float(p_value),
|
||||
'sample_size': len(clean_data),
|
||||
'direction': direction
|
||||
},
|
||||
actionable=False
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_trends(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect overall trends (growing, declining, stable)."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns or len(sales_data) < 60:
|
||||
return insights
|
||||
|
||||
# Sort by date
|
||||
sales_data = sales_data.sort_values('date')
|
||||
|
||||
# Calculate 30-day rolling average
|
||||
sales_data['rolling_30d'] = sales_data['quantity'].rolling(window=30, min_periods=15).mean()
|
||||
|
||||
# Compare first and last 30-day averages
|
||||
first_30_avg = sales_data['rolling_30d'].iloc[:30].mean()
|
||||
last_30_avg = sales_data['rolling_30d'].iloc[-30:].mean()
|
||||
|
||||
if pd.isna(first_30_avg) or pd.isna(last_30_avg):
|
||||
return insights
|
||||
|
||||
pct_change = ((last_30_avg - first_30_avg) / first_30_avg) * 100
|
||||
|
||||
if abs(pct_change) > 10: # 10% change is significant
|
||||
confidence = min(95, 70 + int(abs(pct_change))) # Higher change = higher confidence
|
||||
|
||||
trend_type = 'growing' if pct_change > 0 else 'declining'
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='prediction',
|
||||
category='forecasting',
|
||||
priority='high' if abs(pct_change) > 20 else 'medium',
|
||||
title=f'Sales Trend: {trend_type.title()} {abs(pct_change):.0f}%',
|
||||
description=f'Sales show a {trend_type} trend over the period. Current 30-day average: {last_30_avg:.1f} vs earlier: {first_30_avg:.1f} ({pct_change:+.0f}%).',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'current_avg': float(last_30_avg),
|
||||
'previous_avg': float(first_30_avg),
|
||||
'change_pct': float(pct_change),
|
||||
'trend': trend_type
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Adjust Forecast Model', 'action': 'update_forecast'},
|
||||
{'label': 'Review Capacity', 'action': 'review_production_capacity'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
def _calculate_pattern_confidence(
|
||||
self,
|
||||
sample_size: int,
|
||||
effect_size: float,
|
||||
variability: float
|
||||
) -> int:
|
||||
"""
|
||||
Calculate confidence score for detected pattern.
|
||||
|
||||
Args:
|
||||
sample_size: Number of observations
|
||||
effect_size: Size of the effect (e.g., 0.25 for 25% difference)
|
||||
variability: Standard deviation of data
|
||||
|
||||
Returns:
|
||||
Confidence score (0-100)
|
||||
"""
|
||||
# Base confidence from sample size
|
||||
if sample_size < 4:
|
||||
base = 50
|
||||
elif sample_size < 10:
|
||||
base = 65
|
||||
elif sample_size < 30:
|
||||
base = 75
|
||||
elif sample_size < 100:
|
||||
base = 85
|
||||
else:
|
||||
base = 90
|
||||
|
||||
# Adjust for effect size
|
||||
effect_boost = min(15, effect_size * 30)
|
||||
|
||||
# Adjust for variability (penalize high variability)
|
||||
variability_penalty = min(10, variability / 10)
|
||||
|
||||
confidence = base + effect_boost - variability_penalty
|
||||
|
||||
return int(max(0, min(100, confidence)))
|
||||
|
||||
def _calculate_correlation_confidence(
|
||||
self,
|
||||
correlation: float,
|
||||
p_value: float,
|
||||
sample_size: int
|
||||
) -> int:
|
||||
"""Calculate confidence for correlation insights."""
|
||||
# Base confidence from correlation strength
|
||||
base = abs(correlation) * 100
|
||||
|
||||
# Boost for significance
|
||||
if p_value < 0.001:
|
||||
significance_boost = 15
|
||||
elif p_value < 0.01:
|
||||
significance_boost = 10
|
||||
elif p_value < 0.05:
|
||||
significance_boost = 5
|
||||
else:
|
||||
significance_boost = 0
|
||||
|
||||
# Boost for sample size
|
||||
if sample_size > 100:
|
||||
sample_boost = 10
|
||||
elif sample_size > 50:
|
||||
sample_boost = 5
|
||||
else:
|
||||
sample_boost = 0
|
||||
|
||||
confidence = base + significance_boost + sample_boost
|
||||
|
||||
return int(max(0, min(100, confidence)))
|
||||
|
||||
def _create_insight(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
insight_type: str,
|
||||
category: str,
|
||||
priority: str,
|
||||
title: str,
|
||||
description: str,
|
||||
confidence: int,
|
||||
metrics: Dict[str, Any],
|
||||
actionable: bool,
|
||||
actions: List[Dict[str, str]] = None,
|
||||
impact_type: str = None,
|
||||
impact_value: float = None,
|
||||
impact_unit: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Create an insight dictionary for AI Insights Service."""
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'type': insight_type,
|
||||
'priority': priority,
|
||||
'category': category,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'impact_type': impact_type,
|
||||
'impact_value': impact_value,
|
||||
'impact_unit': impact_unit,
|
||||
'confidence': confidence,
|
||||
'metrics_json': metrics,
|
||||
'actionable': actionable,
|
||||
'recommendation_actions': actions or [],
|
||||
'source_service': 'forecasting',
|
||||
'source_data_id': f'pattern_detection_{inventory_product_id}_{datetime.utcnow().strftime("%Y%m%d")}'
|
||||
}
|
||||
@@ -25,20 +25,52 @@ class BakeryPredictor:
|
||||
Advanced predictor for bakery demand forecasting with dependency injection
|
||||
Handles Prophet models and business-specific logic
|
||||
"""
|
||||
|
||||
def __init__(self, database_manager=None):
|
||||
|
||||
def __init__(self, database_manager=None, use_dynamic_rules=True):
|
||||
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
|
||||
self.model_cache = {}
|
||||
self.business_rules = BakeryBusinessRules()
|
||||
self.use_dynamic_rules = use_dynamic_rules
|
||||
|
||||
if use_dynamic_rules:
|
||||
from app.ml.dynamic_rules_engine import DynamicRulesEngine
|
||||
from shared.clients.ai_insights_client import AIInsightsClient
|
||||
self.rules_engine = DynamicRulesEngine()
|
||||
self.ai_insights_client = AIInsightsClient(
|
||||
base_url=settings.AI_INSIGHTS_SERVICE_URL or "http://ai-insights-service:8000"
|
||||
)
|
||||
else:
|
||||
self.business_rules = BakeryBusinessRules()
|
||||
|
||||
class BakeryForecaster:
|
||||
"""
|
||||
Enhanced forecaster that integrates with repository pattern
|
||||
Uses enhanced features from training service for predictions
|
||||
"""
|
||||
|
||||
def __init__(self, database_manager=None):
|
||||
|
||||
def __init__(self, database_manager=None, use_enhanced_features=True):
|
||||
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
|
||||
self.predictor = BakeryPredictor(database_manager)
|
||||
self.use_enhanced_features = use_enhanced_features
|
||||
|
||||
if use_enhanced_features:
|
||||
# Import enhanced data processor from training service
|
||||
import sys
|
||||
import os
|
||||
# Add training service to path
|
||||
training_path = os.path.join(os.path.dirname(__file__), '../../../training')
|
||||
if training_path not in sys.path:
|
||||
sys.path.insert(0, training_path)
|
||||
|
||||
try:
|
||||
from app.ml.data_processor import EnhancedBakeryDataProcessor
|
||||
self.data_processor = EnhancedBakeryDataProcessor(database_manager)
|
||||
logger.info("Enhanced features enabled for forecasting")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Could not import EnhancedBakeryDataProcessor: {e}, falling back to basic features")
|
||||
self.use_enhanced_features = False
|
||||
self.data_processor = None
|
||||
else:
|
||||
self.data_processor = None
|
||||
|
||||
async def generate_forecast_with_repository(self, tenant_id: str, inventory_product_id: str,
|
||||
forecast_date: date, model_id: str = None) -> Dict[str, Any]:
|
||||
@@ -110,45 +142,87 @@ class BakeryForecaster:
|
||||
logger.error("Error generating base prediction", error=str(e))
|
||||
raise
|
||||
|
||||
def _prepare_prophet_dataframe(self, features: Dict[str, Any]) -> pd.DataFrame:
|
||||
"""Convert features to Prophet-compatible DataFrame"""
|
||||
|
||||
async def _prepare_prophet_dataframe(self, features: Dict[str, Any],
|
||||
historical_data: pd.DataFrame = None) -> pd.DataFrame:
|
||||
"""
|
||||
Convert features to Prophet-compatible DataFrame.
|
||||
Uses enhanced features when available (60+ features vs basic 10).
|
||||
"""
|
||||
|
||||
try:
|
||||
# Create base DataFrame
|
||||
df = pd.DataFrame({
|
||||
'ds': [pd.to_datetime(features['date'])]
|
||||
})
|
||||
|
||||
# Add regressor features
|
||||
feature_mapping = {
|
||||
'temperature': 'temperature',
|
||||
'precipitation': 'precipitation',
|
||||
'humidity': 'humidity',
|
||||
'wind_speed': 'wind_speed',
|
||||
'traffic_volume': 'traffic_volume',
|
||||
'pedestrian_count': 'pedestrian_count'
|
||||
}
|
||||
|
||||
for feature_key, df_column in feature_mapping.items():
|
||||
if feature_key in features and features[feature_key] is not None:
|
||||
df[df_column] = float(features[feature_key])
|
||||
else:
|
||||
df[df_column] = 0.0
|
||||
|
||||
# Add categorical features
|
||||
df['day_of_week'] = int(features.get('day_of_week', 0))
|
||||
if self.use_enhanced_features and self.data_processor:
|
||||
# Use enhanced data processor from training service
|
||||
logger.info("Generating enhanced features for prediction")
|
||||
|
||||
# Create future date range
|
||||
future_dates = pd.DatetimeIndex([pd.to_datetime(features['date'])])
|
||||
|
||||
# Prepare weather forecast DataFrame
|
||||
weather_df = pd.DataFrame({
|
||||
'date': [pd.to_datetime(features['date'])],
|
||||
'temperature': [features.get('temperature', 15.0)],
|
||||
'precipitation': [features.get('precipitation', 0.0)],
|
||||
'humidity': [features.get('humidity', 60.0)],
|
||||
'wind_speed': [features.get('wind_speed', 5.0)],
|
||||
'pressure': [features.get('pressure', 1013.0)]
|
||||
})
|
||||
|
||||
# Use data processor to create ALL enhanced features
|
||||
df = await self.data_processor.prepare_prediction_features(
|
||||
future_dates=future_dates,
|
||||
weather_forecast=weather_df,
|
||||
traffic_forecast=None, # Will add when traffic forecasting is implemented
|
||||
historical_data=historical_data # For lagged features
|
||||
)
|
||||
|
||||
logger.info(f"Generated {len(df.columns)} enhanced features for prediction")
|
||||
return df
|
||||
|
||||
else:
|
||||
# Fallback to basic features
|
||||
logger.info("Using basic features for prediction")
|
||||
|
||||
# Create base DataFrame
|
||||
df = pd.DataFrame({
|
||||
'ds': [pd.to_datetime(features['date'])]
|
||||
})
|
||||
|
||||
# Add regressor features
|
||||
feature_mapping = {
|
||||
'temperature': 'temperature',
|
||||
'precipitation': 'precipitation',
|
||||
'humidity': 'humidity',
|
||||
'wind_speed': 'wind_speed',
|
||||
'traffic_volume': 'traffic_volume',
|
||||
'pedestrian_count': 'pedestrian_count'
|
||||
}
|
||||
|
||||
for feature_key, df_column in feature_mapping.items():
|
||||
if feature_key in features and features[feature_key] is not None:
|
||||
df[df_column] = float(features[feature_key])
|
||||
else:
|
||||
df[df_column] = 0.0
|
||||
|
||||
# Add categorical features
|
||||
df['day_of_week'] = int(features.get('day_of_week', 0))
|
||||
df['is_weekend'] = int(features.get('is_weekend', False))
|
||||
df['is_holiday'] = int(features.get('is_holiday', False))
|
||||
|
||||
# Business type
|
||||
business_type = features.get('business_type', 'individual')
|
||||
df['is_central_workshop'] = int(business_type == 'central_workshop')
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error preparing Prophet dataframe: {e}, falling back to basic features")
|
||||
# Fallback to basic implementation on error
|
||||
df = pd.DataFrame({'ds': [pd.to_datetime(features['date'])]})
|
||||
df['temperature'] = features.get('temperature', 15.0)
|
||||
df['precipitation'] = features.get('precipitation', 0.0)
|
||||
df['is_weekend'] = int(features.get('is_weekend', False))
|
||||
df['is_holiday'] = int(features.get('is_holiday', False))
|
||||
|
||||
# Business type
|
||||
business_type = features.get('business_type', 'individual')
|
||||
df['is_central_workshop'] = int(business_type == 'central_workshop')
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error preparing Prophet dataframe", error=str(e))
|
||||
raise
|
||||
|
||||
def _add_uncertainty_bands(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any]) -> Dict[str, float]:
|
||||
@@ -225,80 +299,256 @@ class BakeryForecaster:
|
||||
|
||||
def _calculate_weekend_uncertainty(self, features: Dict[str, Any]) -> float:
|
||||
"""Calculate weekend-based uncertainty"""
|
||||
|
||||
|
||||
if features.get('is_weekend', False):
|
||||
return 0.1 # 10% additional uncertainty on weekends
|
||||
return 0.0
|
||||
|
||||
async def _get_dynamic_rules(self, tenant_id: str, inventory_product_id: str, rule_type: str) -> Dict[str, float]:
|
||||
"""
|
||||
Fetch learned dynamic rules from AI Insights Service.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
inventory_product_id: Product UUID
|
||||
rule_type: Type of rules (weather, temporal, holiday, etc.)
|
||||
|
||||
Returns:
|
||||
Dictionary of learned rules with factors
|
||||
"""
|
||||
try:
|
||||
from uuid import UUID
|
||||
|
||||
# Fetch latest rules insight for this product
|
||||
insights = await self.ai_insights_client.get_insights(
|
||||
tenant_id=UUID(tenant_id),
|
||||
filters={
|
||||
'category': 'forecasting',
|
||||
'actionable_only': False,
|
||||
'page_size': 100
|
||||
}
|
||||
)
|
||||
|
||||
if not insights or 'items' not in insights:
|
||||
return {}
|
||||
|
||||
# Find the most recent rules insight for this product
|
||||
for insight in insights['items']:
|
||||
if insight.get('source_model') == 'dynamic_rules_engine':
|
||||
metrics = insight.get('metrics_json', {})
|
||||
if metrics.get('inventory_product_id') == inventory_product_id:
|
||||
rules_data = metrics.get('rules', {})
|
||||
return rules_data.get(rule_type, {})
|
||||
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch dynamic rules: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
class BakeryBusinessRules:
|
||||
"""
|
||||
Business rules for Spanish bakeries
|
||||
Applies domain-specific adjustments to predictions
|
||||
Supports both dynamic learned rules and hardcoded fallbacks
|
||||
"""
|
||||
|
||||
def apply_rules(self, prediction: Dict[str, float], features: Dict[str, Any],
|
||||
business_type: str) -> Dict[str, float]:
|
||||
"""Apply all business rules to prediction"""
|
||||
|
||||
|
||||
def __init__(self, use_dynamic_rules=False, ai_insights_client=None):
|
||||
self.use_dynamic_rules = use_dynamic_rules
|
||||
self.ai_insights_client = ai_insights_client
|
||||
self.rules_cache = {}
|
||||
|
||||
async def apply_rules(self, prediction: Dict[str, float], features: Dict[str, Any],
|
||||
business_type: str, tenant_id: str = None, inventory_product_id: str = None) -> Dict[str, float]:
|
||||
"""Apply all business rules to prediction (dynamic or hardcoded)"""
|
||||
|
||||
adjusted_prediction = prediction.copy()
|
||||
|
||||
|
||||
# Apply weather rules
|
||||
adjusted_prediction = self._apply_weather_rules(adjusted_prediction, features)
|
||||
|
||||
adjusted_prediction = await self._apply_weather_rules(
|
||||
adjusted_prediction, features, tenant_id, inventory_product_id
|
||||
)
|
||||
|
||||
# Apply time-based rules
|
||||
adjusted_prediction = self._apply_time_rules(adjusted_prediction, features)
|
||||
|
||||
adjusted_prediction = await self._apply_time_rules(
|
||||
adjusted_prediction, features, tenant_id, inventory_product_id
|
||||
)
|
||||
|
||||
# Apply business type rules
|
||||
adjusted_prediction = self._apply_business_type_rules(adjusted_prediction, business_type)
|
||||
|
||||
|
||||
# Apply Spanish-specific rules
|
||||
adjusted_prediction = self._apply_spanish_rules(adjusted_prediction, features)
|
||||
|
||||
|
||||
return adjusted_prediction
|
||||
|
||||
def _apply_weather_rules(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""Apply weather-based business rules"""
|
||||
|
||||
# Rain reduces foot traffic
|
||||
precipitation = features.get('precipitation', 0)
|
||||
if precipitation > 0:
|
||||
rain_factor = settings.RAIN_IMPACT_FACTOR
|
||||
prediction["yhat"] *= rain_factor
|
||||
prediction["yhat_lower"] *= rain_factor
|
||||
prediction["yhat_upper"] *= rain_factor
|
||||
|
||||
# Extreme temperatures affect different products differently
|
||||
temperature = features.get('temperature')
|
||||
if temperature is not None:
|
||||
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
# Hot weather reduces bread sales, increases cold drinks
|
||||
prediction["yhat"] *= 0.9
|
||||
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
|
||||
# Cold weather increases hot beverage sales
|
||||
prediction["yhat"] *= 1.1
|
||||
|
||||
|
||||
async def _get_dynamic_rules(self, tenant_id: str, inventory_product_id: str, rule_type: str) -> Dict[str, float]:
|
||||
"""
|
||||
Fetch learned dynamic rules from AI Insights Service.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
inventory_product_id: Product UUID
|
||||
rule_type: Type of rules (weather, temporal, holiday, etc.)
|
||||
|
||||
Returns:
|
||||
Dictionary of learned rules with factors
|
||||
"""
|
||||
# Check cache first
|
||||
cache_key = f"{tenant_id}:{inventory_product_id}:{rule_type}"
|
||||
if cache_key in self.rules_cache:
|
||||
return self.rules_cache[cache_key]
|
||||
|
||||
try:
|
||||
from uuid import UUID
|
||||
|
||||
if not self.ai_insights_client:
|
||||
return {}
|
||||
|
||||
# Fetch latest rules insight for this product
|
||||
insights = await self.ai_insights_client.get_insights(
|
||||
tenant_id=UUID(tenant_id),
|
||||
filters={
|
||||
'category': 'forecasting',
|
||||
'actionable_only': False,
|
||||
'page_size': 100
|
||||
}
|
||||
)
|
||||
|
||||
if not insights or 'items' not in insights:
|
||||
return {}
|
||||
|
||||
# Find the most recent rules insight for this product
|
||||
for insight in insights['items']:
|
||||
if insight.get('source_model') == 'dynamic_rules_engine':
|
||||
metrics = insight.get('metrics_json', {})
|
||||
if metrics.get('inventory_product_id') == inventory_product_id:
|
||||
rules_data = metrics.get('rules', {})
|
||||
result = rules_data.get(rule_type, {})
|
||||
# Cache the result
|
||||
self.rules_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch dynamic rules: {e}")
|
||||
return {}
|
||||
|
||||
async def _apply_weather_rules(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any],
|
||||
tenant_id: str = None,
|
||||
inventory_product_id: str = None) -> Dict[str, float]:
|
||||
"""Apply weather-based business rules (dynamic or hardcoded fallback)"""
|
||||
|
||||
if self.use_dynamic_rules and tenant_id and inventory_product_id:
|
||||
try:
|
||||
# Fetch dynamic weather rules
|
||||
rules = await self._get_dynamic_rules(tenant_id, inventory_product_id, 'weather')
|
||||
|
||||
# Apply learned rain impact
|
||||
precipitation = features.get('precipitation', 0)
|
||||
if precipitation > 0:
|
||||
rain_factor = rules.get('rain_factor', settings.RAIN_IMPACT_FACTOR)
|
||||
prediction["yhat"] *= rain_factor
|
||||
prediction["yhat_lower"] *= rain_factor
|
||||
prediction["yhat_upper"] *= rain_factor
|
||||
|
||||
# Apply learned temperature impact
|
||||
temperature = features.get('temperature')
|
||||
if temperature is not None:
|
||||
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
hot_factor = rules.get('temperature_hot_factor', 0.9)
|
||||
prediction["yhat"] *= hot_factor
|
||||
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
|
||||
cold_factor = rules.get('temperature_cold_factor', 1.1)
|
||||
prediction["yhat"] *= cold_factor
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to apply dynamic weather rules, using fallback: {e}")
|
||||
# Fallback to hardcoded
|
||||
precipitation = features.get('precipitation', 0)
|
||||
if precipitation > 0:
|
||||
prediction["yhat"] *= settings.RAIN_IMPACT_FACTOR
|
||||
prediction["yhat_lower"] *= settings.RAIN_IMPACT_FACTOR
|
||||
prediction["yhat_upper"] *= settings.RAIN_IMPACT_FACTOR
|
||||
|
||||
temperature = features.get('temperature')
|
||||
if temperature is not None:
|
||||
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
prediction["yhat"] *= 0.9
|
||||
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
|
||||
prediction["yhat"] *= 1.1
|
||||
else:
|
||||
# Use hardcoded rules
|
||||
precipitation = features.get('precipitation', 0)
|
||||
if precipitation > 0:
|
||||
rain_factor = settings.RAIN_IMPACT_FACTOR
|
||||
prediction["yhat"] *= rain_factor
|
||||
prediction["yhat_lower"] *= rain_factor
|
||||
prediction["yhat_upper"] *= rain_factor
|
||||
|
||||
temperature = features.get('temperature')
|
||||
if temperature is not None:
|
||||
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
prediction["yhat"] *= 0.9
|
||||
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
|
||||
prediction["yhat"] *= 1.1
|
||||
|
||||
return prediction
|
||||
|
||||
def _apply_time_rules(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""Apply time-based business rules"""
|
||||
|
||||
# Weekend adjustment
|
||||
if features.get('is_weekend', False):
|
||||
weekend_factor = settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
prediction["yhat"] *= weekend_factor
|
||||
prediction["yhat_lower"] *= weekend_factor
|
||||
prediction["yhat_upper"] *= weekend_factor
|
||||
|
||||
# Holiday adjustment
|
||||
if features.get('is_holiday', False):
|
||||
holiday_factor = settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
prediction["yhat"] *= holiday_factor
|
||||
prediction["yhat_lower"] *= holiday_factor
|
||||
prediction["yhat_upper"] *= holiday_factor
|
||||
|
||||
async def _apply_time_rules(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any],
|
||||
tenant_id: str = None,
|
||||
inventory_product_id: str = None) -> Dict[str, float]:
|
||||
"""Apply time-based business rules (dynamic or hardcoded fallback)"""
|
||||
|
||||
if self.use_dynamic_rules and tenant_id and inventory_product_id:
|
||||
try:
|
||||
# Fetch dynamic temporal rules
|
||||
rules = await self._get_dynamic_rules(tenant_id, inventory_product_id, 'temporal')
|
||||
|
||||
# Apply learned weekend adjustment
|
||||
if features.get('is_weekend', False):
|
||||
weekend_factor = rules.get('weekend_factor', settings.WEEKEND_ADJUSTMENT_FACTOR)
|
||||
prediction["yhat"] *= weekend_factor
|
||||
prediction["yhat_lower"] *= weekend_factor
|
||||
prediction["yhat_upper"] *= weekend_factor
|
||||
|
||||
# Apply learned holiday adjustment
|
||||
if features.get('is_holiday', False):
|
||||
holiday_factor = rules.get('holiday_factor', settings.HOLIDAY_ADJUSTMENT_FACTOR)
|
||||
prediction["yhat"] *= holiday_factor
|
||||
prediction["yhat_lower"] *= holiday_factor
|
||||
prediction["yhat_upper"] *= holiday_factor
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to apply dynamic time rules, using fallback: {e}")
|
||||
# Fallback to hardcoded
|
||||
if features.get('is_weekend', False):
|
||||
prediction["yhat"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_lower"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_upper"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
|
||||
if features.get('is_holiday', False):
|
||||
prediction["yhat"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_lower"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_upper"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
else:
|
||||
# Use hardcoded rules
|
||||
if features.get('is_weekend', False):
|
||||
weekend_factor = settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
prediction["yhat"] *= weekend_factor
|
||||
prediction["yhat_lower"] *= weekend_factor
|
||||
prediction["yhat_upper"] *= weekend_factor
|
||||
|
||||
if features.get('is_holiday', False):
|
||||
holiday_factor = settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
prediction["yhat"] *= holiday_factor
|
||||
prediction["yhat_lower"] *= holiday_factor
|
||||
prediction["yhat_upper"] *= holiday_factor
|
||||
|
||||
return prediction
|
||||
|
||||
def _apply_business_type_rules(self, prediction: Dict[str, float],
|
||||
|
||||
234
services/forecasting/app/ml/rules_orchestrator.py
Normal file
234
services/forecasting/app/ml/rules_orchestrator.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""
|
||||
Rules Orchestrator
|
||||
Coordinates dynamic rules learning, insight posting, and integration with forecasting service
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Any, Optional
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from app.ml.dynamic_rules_engine import DynamicRulesEngine
|
||||
from app.clients.ai_insights_client import AIInsightsClient
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class RulesOrchestrator:
|
||||
"""
|
||||
Orchestrates dynamic rules learning and insight generation workflow.
|
||||
|
||||
Workflow:
|
||||
1. Learn dynamic rules from historical data
|
||||
2. Generate insights comparing learned vs hardcoded rules
|
||||
3. Post insights to AI Insights Service
|
||||
4. Provide learned rules for forecasting integration
|
||||
5. Track rule updates and performance
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ai_insights_base_url: str = "http://ai-insights-service:8000"
|
||||
):
|
||||
self.rules_engine = DynamicRulesEngine()
|
||||
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
|
||||
|
||||
async def learn_and_post_rules(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: Optional[pd.DataFrame] = None,
|
||||
min_samples: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Complete workflow: Learn rules and post insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Historical sales data
|
||||
external_data: Optional weather/events/holidays data
|
||||
min_samples: Minimum samples for rule learning
|
||||
|
||||
Returns:
|
||||
Workflow results with learned rules and posted insights
|
||||
"""
|
||||
logger.info(
|
||||
"Starting dynamic rules learning workflow",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id
|
||||
)
|
||||
|
||||
# Step 1: Learn all rules from data
|
||||
rules_results = await self.rules_engine.learn_all_rules(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
external_data=external_data,
|
||||
min_samples=min_samples
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Rules learning complete",
|
||||
insights_generated=len(rules_results['insights']),
|
||||
rules_learned=len(rules_results['rules'])
|
||||
)
|
||||
|
||||
# Step 2: Enrich insights with tenant_id and product context
|
||||
enriched_insights = self._enrich_insights(
|
||||
rules_results['insights'],
|
||||
tenant_id,
|
||||
inventory_product_id
|
||||
)
|
||||
|
||||
# Step 3: Post insights to AI Insights Service
|
||||
if enriched_insights:
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_insights
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Insights posted to AI Insights Service",
|
||||
total=post_results['total'],
|
||||
successful=post_results['successful'],
|
||||
failed=post_results['failed']
|
||||
)
|
||||
else:
|
||||
post_results = {'total': 0, 'successful': 0, 'failed': 0}
|
||||
logger.info("No insights to post")
|
||||
|
||||
# Step 4: Return comprehensive results
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'learned_at': rules_results['learned_at'],
|
||||
'rules': rules_results['rules'],
|
||||
'insights_generated': len(enriched_insights),
|
||||
'insights_posted': post_results['successful'],
|
||||
'insights_failed': post_results['failed'],
|
||||
'created_insights': post_results.get('created_insights', [])
|
||||
}
|
||||
|
||||
def _enrich_insights(
|
||||
self,
|
||||
insights: List[Dict[str, Any]],
|
||||
tenant_id: str,
|
||||
inventory_product_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Enrich insights with required fields for AI Insights Service.
|
||||
|
||||
Args:
|
||||
insights: Raw insights from rules engine
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Enriched insights ready for posting
|
||||
"""
|
||||
enriched = []
|
||||
|
||||
for insight in insights:
|
||||
# Add required tenant_id and product context
|
||||
enriched_insight = insight.copy()
|
||||
enriched_insight['tenant_id'] = tenant_id
|
||||
|
||||
# Add product context to metrics
|
||||
if 'metrics_json' not in enriched_insight:
|
||||
enriched_insight['metrics_json'] = {}
|
||||
|
||||
enriched_insight['metrics_json']['inventory_product_id'] = inventory_product_id
|
||||
|
||||
# Add source metadata
|
||||
enriched_insight['source_service'] = 'forecasting'
|
||||
enriched_insight['source_model'] = 'dynamic_rules_engine'
|
||||
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
enriched.append(enriched_insight)
|
||||
|
||||
return enriched
|
||||
|
||||
async def get_learned_rules_for_forecasting(
|
||||
self,
|
||||
inventory_product_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get learned rules in format ready for forecasting integration.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Dictionary with learned multipliers for all rule types
|
||||
"""
|
||||
return self.rules_engine.export_rules_for_prophet(inventory_product_id)
|
||||
|
||||
def get_rule_multiplier(
|
||||
self,
|
||||
inventory_product_id: str,
|
||||
rule_type: str,
|
||||
key: str,
|
||||
default: float = 1.0
|
||||
) -> float:
|
||||
"""
|
||||
Get learned rule multiplier with fallback to default.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
rule_type: 'weather', 'holiday', 'event', 'day_of_week', 'month'
|
||||
key: Condition key
|
||||
default: Default multiplier if rule not learned
|
||||
|
||||
Returns:
|
||||
Learned multiplier or default
|
||||
"""
|
||||
learned = self.rules_engine.get_rule(inventory_product_id, rule_type, key)
|
||||
return learned if learned is not None else default
|
||||
|
||||
async def update_rules_periodically(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: Optional[pd.DataFrame] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Update learned rules with new data (for periodic refresh).
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Updated historical sales data
|
||||
external_data: Updated external data
|
||||
|
||||
Returns:
|
||||
Update results
|
||||
"""
|
||||
logger.info(
|
||||
"Updating learned rules with new data",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
new_data_points=len(sales_data)
|
||||
)
|
||||
|
||||
# Re-learn rules with updated data
|
||||
results = await self.learn_and_post_rules(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
external_data=external_data
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Rules update complete",
|
||||
insights_posted=results['insights_posted']
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client connections."""
|
||||
await self.ai_insights_client.close()
|
||||
385
services/forecasting/app/ml/scenario_planner.py
Normal file
385
services/forecasting/app/ml/scenario_planner.py
Normal file
@@ -0,0 +1,385 @@
|
||||
"""
|
||||
Scenario Planning System
|
||||
What-if analysis for demand forecasting
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime, date, timedelta
|
||||
import structlog
|
||||
from enum import Enum
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class ScenarioType(str, Enum):
|
||||
"""Types of scenarios"""
|
||||
BASELINE = "baseline"
|
||||
OPTIMISTIC = "optimistic"
|
||||
PESSIMISTIC = "pessimistic"
|
||||
CUSTOM = "custom"
|
||||
PROMOTION = "promotion"
|
||||
EVENT = "event"
|
||||
WEATHER = "weather"
|
||||
PRICE_CHANGE = "price_change"
|
||||
|
||||
|
||||
class ScenarioPlanner:
|
||||
"""
|
||||
Scenario planning for demand forecasting.
|
||||
|
||||
Allows testing "what-if" scenarios:
|
||||
- What if we run a promotion?
|
||||
- What if there's a local festival?
|
||||
- What if weather is unusually bad?
|
||||
- What if we change prices?
|
||||
"""
|
||||
|
||||
def __init__(self, base_forecaster=None):
|
||||
"""
|
||||
Initialize scenario planner.
|
||||
|
||||
Args:
|
||||
base_forecaster: Base forecaster to use for baseline predictions
|
||||
"""
|
||||
self.base_forecaster = base_forecaster
|
||||
|
||||
async def create_scenario(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
scenario_name: str,
|
||||
scenario_type: ScenarioType,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
adjustments: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a forecast scenario with adjustments.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
scenario_name: Name for the scenario
|
||||
scenario_type: Type of scenario
|
||||
start_date: Scenario start date
|
||||
end_date: Scenario end date
|
||||
adjustments: Dictionary of adjustments to apply
|
||||
|
||||
Returns:
|
||||
Scenario forecast results
|
||||
"""
|
||||
logger.info(
|
||||
"Creating forecast scenario",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
scenario_name=scenario_name,
|
||||
scenario_type=scenario_type
|
||||
)
|
||||
|
||||
# Generate baseline forecast first
|
||||
baseline_forecast = await self._generate_baseline_forecast(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
# Apply scenario adjustments
|
||||
scenario_forecast = self._apply_scenario_adjustments(
|
||||
baseline_forecast=baseline_forecast,
|
||||
adjustments=adjustments,
|
||||
scenario_type=scenario_type
|
||||
)
|
||||
|
||||
# Calculate impact
|
||||
impact_analysis = self._calculate_scenario_impact(
|
||||
baseline_forecast=baseline_forecast,
|
||||
scenario_forecast=scenario_forecast
|
||||
)
|
||||
|
||||
return {
|
||||
'scenario_id': f"scenario_{tenant_id}_{inventory_product_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
|
||||
'scenario_name': scenario_name,
|
||||
'scenario_type': scenario_type,
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'date_range': {
|
||||
'start': start_date.isoformat(),
|
||||
'end': end_date.isoformat()
|
||||
},
|
||||
'baseline_forecast': baseline_forecast,
|
||||
'scenario_forecast': scenario_forecast,
|
||||
'impact_analysis': impact_analysis,
|
||||
'adjustments_applied': adjustments,
|
||||
'created_at': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
async def compare_scenarios(
|
||||
self,
|
||||
scenarios: List[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Compare multiple scenarios side-by-side.
|
||||
|
||||
Args:
|
||||
scenarios: List of scenario results from create_scenario()
|
||||
|
||||
Returns:
|
||||
Comparison analysis
|
||||
"""
|
||||
if len(scenarios) < 2:
|
||||
return {'error': 'Need at least 2 scenarios to compare'}
|
||||
|
||||
comparison = {
|
||||
'scenarios_compared': len(scenarios),
|
||||
'scenario_names': [s['scenario_name'] for s in scenarios],
|
||||
'comparison_metrics': {}
|
||||
}
|
||||
|
||||
# Extract total demand for each scenario
|
||||
for scenario in scenarios:
|
||||
scenario_name = scenario['scenario_name']
|
||||
scenario_forecast = scenario['scenario_forecast']
|
||||
|
||||
total_demand = sum(f['predicted_demand'] for f in scenario_forecast)
|
||||
|
||||
comparison['comparison_metrics'][scenario_name] = {
|
||||
'total_demand': total_demand,
|
||||
'avg_daily_demand': total_demand / len(scenario_forecast) if scenario_forecast else 0,
|
||||
'peak_demand': max(f['predicted_demand'] for f in scenario_forecast) if scenario_forecast else 0
|
||||
}
|
||||
|
||||
# Determine best and worst scenarios
|
||||
total_demands = {
|
||||
name: metrics['total_demand']
|
||||
for name, metrics in comparison['comparison_metrics'].items()
|
||||
}
|
||||
|
||||
comparison['best_scenario'] = max(total_demands, key=total_demands.get)
|
||||
comparison['worst_scenario'] = min(total_demands, key=total_demands.get)
|
||||
|
||||
comparison['demand_range'] = {
|
||||
'min': min(total_demands.values()),
|
||||
'max': max(total_demands.values()),
|
||||
'spread': max(total_demands.values()) - min(total_demands.values())
|
||||
}
|
||||
|
||||
return comparison
|
||||
|
||||
async def _generate_baseline_forecast(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate baseline forecast without adjustments.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
start_date: Start date
|
||||
end_date: End date
|
||||
|
||||
Returns:
|
||||
List of daily forecasts
|
||||
"""
|
||||
# Generate date range
|
||||
dates = []
|
||||
current_date = start_date
|
||||
while current_date <= end_date:
|
||||
dates.append(current_date)
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
# Placeholder forecast (in real implementation, call forecasting service)
|
||||
baseline = []
|
||||
for forecast_date in dates:
|
||||
baseline.append({
|
||||
'date': forecast_date.isoformat(),
|
||||
'predicted_demand': 100, # Placeholder
|
||||
'confidence_lower': 80,
|
||||
'confidence_upper': 120
|
||||
})
|
||||
|
||||
return baseline
|
||||
|
||||
def _apply_scenario_adjustments(
|
||||
self,
|
||||
baseline_forecast: List[Dict[str, Any]],
|
||||
adjustments: Dict[str, Any],
|
||||
scenario_type: ScenarioType
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Apply adjustments to baseline forecast.
|
||||
|
||||
Args:
|
||||
baseline_forecast: Baseline forecast data
|
||||
adjustments: Adjustments to apply
|
||||
scenario_type: Type of scenario
|
||||
|
||||
Returns:
|
||||
Adjusted forecast
|
||||
"""
|
||||
scenario_forecast = []
|
||||
|
||||
for day_forecast in baseline_forecast:
|
||||
adjusted_forecast = day_forecast.copy()
|
||||
|
||||
# Apply different adjustment types
|
||||
if 'demand_multiplier' in adjustments:
|
||||
# Multiply demand by factor
|
||||
multiplier = adjustments['demand_multiplier']
|
||||
adjusted_forecast['predicted_demand'] *= multiplier
|
||||
adjusted_forecast['confidence_lower'] *= multiplier
|
||||
adjusted_forecast['confidence_upper'] *= multiplier
|
||||
|
||||
if 'demand_offset' in adjustments:
|
||||
# Add/subtract fixed amount
|
||||
offset = adjustments['demand_offset']
|
||||
adjusted_forecast['predicted_demand'] += offset
|
||||
adjusted_forecast['confidence_lower'] += offset
|
||||
adjusted_forecast['confidence_upper'] += offset
|
||||
|
||||
if 'event_impact' in adjustments:
|
||||
# Apply event-specific impact
|
||||
event_multiplier = adjustments['event_impact']
|
||||
adjusted_forecast['predicted_demand'] *= event_multiplier
|
||||
|
||||
if 'weather_impact' in adjustments:
|
||||
# Apply weather adjustments
|
||||
weather_factor = adjustments['weather_impact']
|
||||
adjusted_forecast['predicted_demand'] *= weather_factor
|
||||
|
||||
if 'price_elasticity' in adjustments and 'price_change_percent' in adjustments:
|
||||
# Apply price elasticity
|
||||
elasticity = adjustments['price_elasticity']
|
||||
price_change = adjustments['price_change_percent']
|
||||
demand_change = -elasticity * price_change # Negative correlation
|
||||
adjusted_forecast['predicted_demand'] *= (1 + demand_change)
|
||||
|
||||
# Ensure non-negative demand
|
||||
adjusted_forecast['predicted_demand'] = max(0, adjusted_forecast['predicted_demand'])
|
||||
adjusted_forecast['confidence_lower'] = max(0, adjusted_forecast['confidence_lower'])
|
||||
|
||||
scenario_forecast.append(adjusted_forecast)
|
||||
|
||||
return scenario_forecast
|
||||
|
||||
def _calculate_scenario_impact(
|
||||
self,
|
||||
baseline_forecast: List[Dict[str, Any]],
|
||||
scenario_forecast: List[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate impact of scenario vs baseline.
|
||||
|
||||
Args:
|
||||
baseline_forecast: Baseline forecast
|
||||
scenario_forecast: Scenario forecast
|
||||
|
||||
Returns:
|
||||
Impact analysis
|
||||
"""
|
||||
baseline_total = sum(f['predicted_demand'] for f in baseline_forecast)
|
||||
scenario_total = sum(f['predicted_demand'] for f in scenario_forecast)
|
||||
|
||||
difference = scenario_total - baseline_total
|
||||
percent_change = (difference / baseline_total * 100) if baseline_total > 0 else 0
|
||||
|
||||
return {
|
||||
'baseline_total_demand': baseline_total,
|
||||
'scenario_total_demand': scenario_total,
|
||||
'absolute_difference': difference,
|
||||
'percent_change': percent_change,
|
||||
'impact_category': self._categorize_impact(percent_change),
|
||||
'days_analyzed': len(baseline_forecast)
|
||||
}
|
||||
|
||||
def _categorize_impact(self, percent_change: float) -> str:
|
||||
"""Categorize impact magnitude"""
|
||||
if abs(percent_change) < 5:
|
||||
return "minimal"
|
||||
elif abs(percent_change) < 15:
|
||||
return "moderate"
|
||||
elif abs(percent_change) < 30:
|
||||
return "significant"
|
||||
else:
|
||||
return "major"
|
||||
|
||||
def generate_predefined_scenarios(
|
||||
self,
|
||||
base_scenario: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate common predefined scenarios for comparison.
|
||||
|
||||
Args:
|
||||
base_scenario: Base scenario parameters
|
||||
|
||||
Returns:
|
||||
List of scenario configurations
|
||||
"""
|
||||
scenarios = []
|
||||
|
||||
# Baseline scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Baseline',
|
||||
'scenario_type': ScenarioType.BASELINE,
|
||||
'adjustments': {}
|
||||
})
|
||||
|
||||
# Optimistic scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Optimistic',
|
||||
'scenario_type': ScenarioType.OPTIMISTIC,
|
||||
'adjustments': {
|
||||
'demand_multiplier': 1.2, # 20% increase
|
||||
'description': '+20% demand increase'
|
||||
}
|
||||
})
|
||||
|
||||
# Pessimistic scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Pessimistic',
|
||||
'scenario_type': ScenarioType.PESSIMISTIC,
|
||||
'adjustments': {
|
||||
'demand_multiplier': 0.8, # 20% decrease
|
||||
'description': '-20% demand decrease'
|
||||
}
|
||||
})
|
||||
|
||||
# Promotion scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Promotion Campaign',
|
||||
'scenario_type': ScenarioType.PROMOTION,
|
||||
'adjustments': {
|
||||
'demand_multiplier': 1.5, # 50% increase
|
||||
'description': '50% promotion boost'
|
||||
}
|
||||
})
|
||||
|
||||
# Bad weather scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Bad Weather',
|
||||
'scenario_type': ScenarioType.WEATHER,
|
||||
'adjustments': {
|
||||
'weather_impact': 0.7, # 30% decrease
|
||||
'description': 'Bad weather reduces foot traffic'
|
||||
}
|
||||
})
|
||||
|
||||
# Price increase scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Price Increase 10%',
|
||||
'scenario_type': ScenarioType.PRICE_CHANGE,
|
||||
'adjustments': {
|
||||
'price_elasticity': 1.2, # Elastic demand
|
||||
'price_change_percent': 0.10, # 10% price increase
|
||||
'description': '10% price increase with elastic demand'
|
||||
}
|
||||
})
|
||||
|
||||
return scenarios
|
||||
Reference in New Issue
Block a user