Initial commit - production deployment

This commit is contained in:
2026-01-21 17:17:16 +01:00
commit c23d00dd92
2289 changed files with 638440 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
"""
ML Components for Forecasting
Machine learning prediction and forecasting components
"""
from .predictor import BakeryPredictor, BakeryForecaster
__all__ = [
"BakeryPredictor",
"BakeryForecaster"
]

View File

@@ -0,0 +1,393 @@
"""
Business Rules Insights Orchestrator
Coordinates business rules optimization and insight posting
"""
import pandas as pd
from typing import Dict, List, Any, Optional
import structlog
from datetime import datetime
from uuid import UUID
import sys
import os
# Add shared clients to path
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
from shared.clients.ai_insights_client import AIInsightsClient
from shared.messaging import UnifiedEventPublisher
from app.ml.dynamic_rules_engine import DynamicRulesEngine
logger = structlog.get_logger()
class BusinessRulesInsightsOrchestrator:
"""
Orchestrates business rules analysis and insight generation workflow.
Workflow:
1. Analyze dynamic business rule performance
2. Generate insights for rule optimization
3. Post insights to AI Insights Service
4. Publish recommendation events to RabbitMQ
5. Provide rule optimization for forecasting
6. Track rule effectiveness and improvements
"""
def __init__(
self,
ai_insights_base_url: str = "http://ai-insights-service:8000",
event_publisher: Optional[UnifiedEventPublisher] = None
):
self.rules_engine = DynamicRulesEngine()
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
self.event_publisher = event_publisher
async def analyze_and_post_business_rules_insights(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_samples: int = 10
) -> Dict[str, Any]:
"""
Complete workflow: Analyze business rules and post insights.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
sales_data: Historical sales data
min_samples: Minimum samples for rule analysis
Returns:
Workflow results with analysis and posted insights
"""
logger.info(
"Starting business rules analysis workflow",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
samples=len(sales_data)
)
# Step 1: Learn and analyze rules
rules_results = await self.rules_engine.learn_all_rules(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
sales_data=sales_data,
external_data=None,
min_samples=min_samples
)
logger.info(
"Business rules analysis complete",
insights_generated=len(rules_results.get('insights', [])),
rules_learned=len(rules_results.get('rules', {}))
)
# Step 2: Enrich insights with tenant_id and product context
enriched_insights = self._enrich_insights(
rules_results.get('insights', []),
tenant_id,
inventory_product_id
)
# Step 3: Post insights to AI Insights Service
if enriched_insights:
post_results = await self.ai_insights_client.create_insights_bulk(
tenant_id=UUID(tenant_id),
insights=enriched_insights
)
logger.info(
"Business rules insights posted to AI Insights Service",
inventory_product_id=inventory_product_id,
total=post_results['total'],
successful=post_results['successful'],
failed=post_results['failed']
)
else:
post_results = {'total': 0, 'successful': 0, 'failed': 0}
logger.info("No insights to post for product", inventory_product_id=inventory_product_id)
# Step 4: Publish insight events to RabbitMQ
created_insights = post_results.get('created_insights', [])
if created_insights:
product_context = {'inventory_product_id': inventory_product_id}
await self._publish_insight_events(
tenant_id=tenant_id,
insights=created_insights,
product_context=product_context
)
# Step 5: Return comprehensive results
return {
'tenant_id': tenant_id,
'inventory_product_id': inventory_product_id,
'learned_at': rules_results['learned_at'],
'rules': rules_results.get('rules', {}),
'insights_generated': len(enriched_insights),
'insights_posted': post_results['successful'],
'insights_failed': post_results['failed'],
'created_insights': post_results.get('created_insights', [])
}
def _enrich_insights(
self,
insights: List[Dict[str, Any]],
tenant_id: str,
inventory_product_id: str
) -> List[Dict[str, Any]]:
"""
Enrich insights with required fields for AI Insights Service.
Args:
insights: Raw insights from rules engine
tenant_id: Tenant identifier
inventory_product_id: Product identifier
Returns:
Enriched insights ready for posting
"""
enriched = []
for insight in insights:
# Add required tenant_id
enriched_insight = insight.copy()
enriched_insight['tenant_id'] = tenant_id
# Add product context to metrics
if 'metrics_json' not in enriched_insight:
enriched_insight['metrics_json'] = {}
enriched_insight['metrics_json']['inventory_product_id'] = inventory_product_id
# Add source metadata
enriched_insight['source_service'] = 'forecasting'
enriched_insight['source_model'] = 'dynamic_rules_engine'
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
enriched.append(enriched_insight)
return enriched
async def analyze_all_business_rules(
self,
tenant_id: str,
products_data: Dict[str, pd.DataFrame],
min_samples: int = 10
) -> Dict[str, Any]:
"""
Analyze all products for business rules optimization and generate comparative insights.
Args:
tenant_id: Tenant identifier
products_data: Dict of {inventory_product_id: sales_data DataFrame}
min_samples: Minimum samples for rule analysis
Returns:
Comprehensive analysis with rule optimization insights
"""
logger.info(
"Analyzing business rules for all products",
tenant_id=tenant_id,
products=len(products_data)
)
all_results = []
total_insights_posted = 0
# Analyze each product
for inventory_product_id, sales_data in products_data.items():
try:
results = await self.analyze_and_post_business_rules_insights(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
sales_data=sales_data,
min_samples=min_samples
)
all_results.append(results)
total_insights_posted += results['insights_posted']
except Exception as e:
logger.error(
"Error analyzing business rules for product",
inventory_product_id=inventory_product_id,
error=str(e)
)
# Generate summary insight
if total_insights_posted > 0:
summary_insight = self._generate_portfolio_summary_insight(
tenant_id, all_results
)
if summary_insight:
enriched_summary = self._enrich_insights(
[summary_insight], tenant_id, 'all_products'
)
post_results = await self.ai_insights_client.create_insights_bulk(
tenant_id=UUID(tenant_id),
insights=enriched_summary
)
total_insights_posted += post_results['successful']
logger.info(
"All business rules analysis complete",
tenant_id=tenant_id,
products_analyzed=len(all_results),
total_insights_posted=total_insights_posted
)
return {
'tenant_id': tenant_id,
'analyzed_at': datetime.utcnow().isoformat(),
'products_analyzed': len(all_results),
'product_results': all_results,
'total_insights_posted': total_insights_posted
}
def _generate_portfolio_summary_insight(
self,
tenant_id: str,
all_results: List[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
"""
Generate portfolio-level business rules summary insight.
Args:
tenant_id: Tenant identifier
all_results: All product analysis results
Returns:
Summary insight or None
"""
if not all_results:
return None
# Calculate summary statistics
total_products = len(all_results)
total_rules = sum(len(r.get('rules', {})) for r in all_results)
# Count products with significant rule improvements
significant_improvements = sum(1 for r in all_results
if any('improvement' in str(v).lower() for v in r.get('rules', {}).values()))
return {
'type': 'recommendation',
'priority': 'high' if significant_improvements > total_products * 0.3 else 'medium',
'category': 'forecasting',
'title': f'Business Rule Optimization: {total_products} Products Analyzed',
'description': f'Learned {total_rules} dynamic rules across {total_products} products. Identified {significant_improvements} products with significant rule improvements.',
'impact_type': 'operational_efficiency',
'impact_value': total_rules,
'impact_unit': 'rules',
'confidence': 80,
'metrics_json': {
'total_products': total_products,
'total_rules': total_rules,
'significant_improvements': significant_improvements,
'rules_per_product': round(total_rules / total_products, 2)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Review Learned Rules',
'action': 'review_business_rules',
'params': {'tenant_id': tenant_id}
},
{
'label': 'Implement Optimized Rules',
'action': 'implement_business_rules',
'params': {'tenant_id': tenant_id}
}
],
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
async def get_learned_rules(
self,
inventory_product_id: str
) -> Optional[Dict[str, Any]]:
"""
Get cached learned rules for a product.
Args:
inventory_product_id: Product identifier
Returns:
Learned rules or None if not analyzed
"""
return self.rules_engine.get_all_rules(inventory_product_id)
async def _publish_insight_events(self, tenant_id, insights, product_context=None):
"""
Publish insight events to RabbitMQ for alert processing.
Args:
tenant_id: Tenant identifier
insights: List of created insights
product_context: Additional context about the product
"""
if not self.event_publisher:
logger.warning("No event publisher available for business rules insights")
return
for insight in insights:
# Determine severity based on confidence and priority
confidence = insight.get('confidence', 0)
priority = insight.get('priority', 'medium')
# Map priority to severity, with confidence as tiebreaker
if priority == 'critical' or (priority == 'high' and confidence >= 70):
severity = 'high'
elif priority == 'high' or (priority == 'medium' and confidence >= 80):
severity = 'medium'
else:
severity = 'low'
# Prepare the event data
event_data = {
'insight_id': insight.get('id'),
'type': insight.get('type'),
'title': insight.get('title'),
'description': insight.get('description'),
'category': insight.get('category'),
'priority': insight.get('priority'),
'confidence': confidence,
'recommendation': insight.get('recommendation_actions', []),
'impact_type': insight.get('impact_type'),
'impact_value': insight.get('impact_value'),
'inventory_product_id': product_context.get('inventory_product_id') if product_context else None,
'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
try:
await self.event_publisher.publish_recommendation(
event_type='ai_business_rule',
tenant_id=tenant_id,
severity=severity,
data=event_data
)
logger.info(
"Published business rules insight event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
severity=severity
)
except Exception as e:
logger.error(
"Failed to publish business rules insight event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
error=str(e)
)
async def close(self):
"""Close HTTP client connections."""
await self.ai_insights_client.close()

View File

@@ -0,0 +1,235 @@
"""
Calendar-based Feature Engineering for Forecasting Service
Generates calendar features for future date predictions
"""
import pandas as pd
import structlog
from typing import Dict, List, Any, Optional
from datetime import datetime, date, time, timedelta
from app.services.data_client import data_client
logger = structlog.get_logger()
class ForecastCalendarFeatures:
"""
Generates calendar-based features for future predictions
Optimized for forecasting service (future dates only)
"""
def __init__(self):
self.calendar_cache = {} # Cache calendar data per tenant
async def get_calendar_for_tenant(
self,
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Get cached calendar for tenant"""
if tenant_id in self.calendar_cache:
return self.calendar_cache[tenant_id]
calendar = await data_client.fetch_tenant_calendar(tenant_id)
if calendar:
self.calendar_cache[tenant_id] = calendar
return calendar
def _is_date_in_holiday_period(
self,
check_date: date,
holiday_periods: List[Dict[str, Any]]
) -> tuple[bool, Optional[str]]:
"""Check if date is within any holiday period"""
for period in holiday_periods:
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
if start <= check_date <= end:
return True, period["name"]
return False, None
def _is_school_hours_active(
self,
check_datetime: datetime,
school_hours: Dict[str, Any]
) -> bool:
"""Check if datetime falls during school operating hours"""
# Only weekdays
if check_datetime.weekday() >= 5:
return False
check_time = check_datetime.time()
# Morning session
morning_start = datetime.strptime(
school_hours["morning_start"], "%H:%M"
).time()
morning_end = datetime.strptime(
school_hours["morning_end"], "%H:%M"
).time()
if morning_start <= check_time <= morning_end:
return True
# Afternoon session if exists
if school_hours.get("has_afternoon_session", False):
afternoon_start = datetime.strptime(
school_hours["afternoon_start"], "%H:%M"
).time()
afternoon_end = datetime.strptime(
school_hours["afternoon_end"], "%H:%M"
).time()
if afternoon_start <= check_time <= afternoon_end:
return True
return False
def _calculate_school_proximity_intensity(
self,
check_datetime: datetime,
school_hours: Dict[str, Any]
) -> float:
"""
Calculate school proximity impact intensity
Returns 0.0-1.0 based on drop-off/pick-up times
"""
# Only weekdays
if check_datetime.weekday() >= 5:
return 0.0
check_time = check_datetime.time()
morning_start = datetime.strptime(
school_hours["morning_start"], "%H:%M"
).time()
morning_end = datetime.strptime(
school_hours["morning_end"], "%H:%M"
).time()
# Morning drop-off peak (30 min before to 15 min after start)
drop_off_start = (
datetime.combine(date.today(), morning_start) - timedelta(minutes=30)
).time()
drop_off_end = (
datetime.combine(date.today(), morning_start) + timedelta(minutes=15)
).time()
if drop_off_start <= check_time <= drop_off_end:
return 1.0 # Peak
# Morning pick-up peak (15 min before to 30 min after end)
pickup_start = (
datetime.combine(date.today(), morning_end) - timedelta(minutes=15)
).time()
pickup_end = (
datetime.combine(date.today(), morning_end) + timedelta(minutes=30)
).time()
if pickup_start <= check_time <= pickup_end:
return 1.0 # Peak
# During school hours (moderate)
if morning_start <= check_time <= morning_end:
return 0.3
return 0.0
async def add_calendar_features(
self,
df: pd.DataFrame,
tenant_id: str,
date_column: str = "ds"
) -> pd.DataFrame:
"""
Add calendar features to forecast dataframe
Args:
df: Forecast dataframe with future dates
tenant_id: Tenant ID to fetch calendar
date_column: Name of date column (default 'ds' for Prophet)
Returns:
DataFrame with calendar features added
"""
try:
logger.info(
"Adding calendar features to forecast",
tenant_id=tenant_id,
rows=len(df)
)
# Get calendar
calendar = await self.get_calendar_for_tenant(tenant_id)
if not calendar:
logger.info(
"No calendar available, using zero features",
tenant_id=tenant_id
)
df["is_school_holiday"] = 0
df["school_hours_active"] = 0
df["school_proximity_intensity"] = 0.0
return df
holiday_periods = calendar.get("holiday_periods", [])
school_hours = calendar.get("school_hours", {})
# Initialize feature lists
school_holidays = []
hours_active = []
proximity_intensity = []
# Process each row
for idx, row in df.iterrows():
row_date = pd.to_datetime(row[date_column])
# Check holiday
is_holiday, _ = self._is_date_in_holiday_period(
row_date.date(),
holiday_periods
)
school_holidays.append(1 if is_holiday else 0)
# Check school hours and proximity (if datetime has time component)
if hasattr(row_date, 'hour'):
hours_active.append(
1 if self._is_school_hours_active(row_date, school_hours) else 0
)
proximity_intensity.append(
self._calculate_school_proximity_intensity(row_date, school_hours)
)
else:
hours_active.append(0)
proximity_intensity.append(0.0)
# Add features
df["is_school_holiday"] = school_holidays
df["school_hours_active"] = hours_active
df["school_proximity_intensity"] = proximity_intensity
logger.info(
"Calendar features added to forecast",
tenant_id=tenant_id,
holidays_in_forecast=sum(school_holidays)
)
return df
except Exception as e:
logger.error(
"Error adding calendar features to forecast",
tenant_id=tenant_id,
error=str(e)
)
# Return with zero features on error
df["is_school_holiday"] = 0
df["school_hours_active"] = 0
df["school_proximity_intensity"] = 0.0
return df
# Global instance
forecast_calendar_features = ForecastCalendarFeatures()

View File

@@ -0,0 +1,403 @@
"""
Demand Insights Orchestrator
Coordinates demand forecasting analysis and insight posting
"""
import pandas as pd
from typing import Dict, List, Any, Optional
import structlog
from datetime import datetime
from uuid import UUID
import sys
import os
# Add shared clients to path
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
from shared.clients.ai_insights_client import AIInsightsClient
from shared.messaging import UnifiedEventPublisher
from app.ml.predictor import BakeryForecaster
logger = structlog.get_logger()
class DemandInsightsOrchestrator:
"""
Orchestrates demand forecasting analysis and insight generation workflow.
Workflow:
1. Analyze historical demand patterns from sales data
2. Generate insights for demand optimization
3. Post insights to AI Insights Service
4. Publish recommendation events to RabbitMQ
5. Provide demand pattern analysis for forecasting
6. Track demand forecasting performance
"""
def __init__(
self,
ai_insights_base_url: str = "http://ai-insights-service:8000",
event_publisher: Optional[UnifiedEventPublisher] = None
):
self.forecaster = BakeryForecaster()
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
self.event_publisher = event_publisher
async def analyze_and_post_demand_insights(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
forecast_horizon_days: int = 30,
min_history_days: int = 90
) -> Dict[str, Any]:
"""
Complete workflow: Analyze demand and post insights.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
sales_data: Historical sales data
forecast_horizon_days: Days to forecast ahead
min_history_days: Minimum days of history required
Returns:
Workflow results with analysis and posted insights
"""
logger.info(
"Starting demand forecasting analysis workflow",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
history_days=len(sales_data)
)
# Step 1: Analyze demand patterns
analysis_results = await self.forecaster.analyze_demand_patterns(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
sales_data=sales_data,
forecast_horizon_days=forecast_horizon_days,
min_history_days=min_history_days
)
logger.info(
"Demand analysis complete",
inventory_product_id=inventory_product_id,
insights_generated=len(analysis_results.get('insights', []))
)
# Step 2: Enrich insights with tenant_id and product context
enriched_insights = self._enrich_insights(
analysis_results.get('insights', []),
tenant_id,
inventory_product_id
)
# Step 3: Post insights to AI Insights Service
if enriched_insights:
post_results = await self.ai_insights_client.create_insights_bulk(
tenant_id=UUID(tenant_id),
insights=enriched_insights
)
logger.info(
"Demand insights posted to AI Insights Service",
inventory_product_id=inventory_product_id,
total=post_results['total'],
successful=post_results['successful'],
failed=post_results['failed']
)
else:
post_results = {'total': 0, 'successful': 0, 'failed': 0}
logger.info("No insights to post for product", inventory_product_id=inventory_product_id)
# Step 4: Publish insight events to RabbitMQ
created_insights = post_results.get('created_insights', [])
if created_insights:
product_context = {'inventory_product_id': inventory_product_id}
await self._publish_insight_events(
tenant_id=tenant_id,
insights=created_insights,
product_context=product_context
)
# Step 5: Return comprehensive results
return {
'tenant_id': tenant_id,
'inventory_product_id': inventory_product_id,
'analyzed_at': analysis_results['analyzed_at'],
'history_days': analysis_results['history_days'],
'demand_patterns': analysis_results.get('patterns', {}),
'trend_analysis': analysis_results.get('trend_analysis', {}),
'seasonal_factors': analysis_results.get('seasonal_factors', {}),
'insights_generated': len(enriched_insights),
'insights_posted': post_results['successful'],
'insights_failed': post_results['failed'],
'created_insights': post_results.get('created_insights', [])
}
def _enrich_insights(
self,
insights: List[Dict[str, Any]],
tenant_id: str,
inventory_product_id: str
) -> List[Dict[str, Any]]:
"""
Enrich insights with required fields for AI Insights Service.
Args:
insights: Raw insights from forecaster
tenant_id: Tenant identifier
inventory_product_id: Product identifier
Returns:
Enriched insights ready for posting
"""
enriched = []
for insight in insights:
# Add required tenant_id
enriched_insight = insight.copy()
enriched_insight['tenant_id'] = tenant_id
# Add product context to metrics
if 'metrics_json' not in enriched_insight:
enriched_insight['metrics_json'] = {}
enriched_insight['metrics_json']['inventory_product_id'] = inventory_product_id
# Add source metadata
enriched_insight['source_service'] = 'forecasting'
enriched_insight['source_model'] = 'demand_analyzer'
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
enriched.append(enriched_insight)
return enriched
async def analyze_all_products(
self,
tenant_id: str,
products_data: Dict[str, pd.DataFrame],
forecast_horizon_days: int = 30,
min_history_days: int = 90
) -> Dict[str, Any]:
"""
Analyze all products for a tenant and generate comparative insights.
Args:
tenant_id: Tenant identifier
products_data: Dict of {inventory_product_id: sales_data DataFrame}
forecast_horizon_days: Days to forecast
min_history_days: Minimum history required
Returns:
Comprehensive analysis with product comparison
"""
logger.info(
"Analyzing all products for tenant",
tenant_id=tenant_id,
products=len(products_data)
)
all_results = []
total_insights_posted = 0
# Analyze each product
for inventory_product_id, sales_data in products_data.items():
try:
results = await self.analyze_and_post_demand_insights(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
sales_data=sales_data,
forecast_horizon_days=forecast_horizon_days,
min_history_days=min_history_days
)
all_results.append(results)
total_insights_posted += results['insights_posted']
except Exception as e:
logger.error(
"Error analyzing product",
inventory_product_id=inventory_product_id,
error=str(e)
)
# Generate summary insight
if total_insights_posted > 0:
summary_insight = self._generate_portfolio_summary_insight(
tenant_id, all_results
)
if summary_insight:
enriched_summary = self._enrich_insights(
[summary_insight], tenant_id, 'all_products'
)
post_results = await self.ai_insights_client.create_insights_bulk(
tenant_id=UUID(tenant_id),
insights=enriched_summary
)
total_insights_posted += post_results['successful']
logger.info(
"All products analysis complete",
tenant_id=tenant_id,
products_analyzed=len(all_results),
total_insights_posted=total_insights_posted
)
return {
'tenant_id': tenant_id,
'analyzed_at': datetime.utcnow().isoformat(),
'products_analyzed': len(all_results),
'product_results': all_results,
'total_insights_posted': total_insights_posted
}
def _generate_portfolio_summary_insight(
self,
tenant_id: str,
all_results: List[Dict[str, Any]]
) -> Optional[Dict[str, Any]]:
"""
Generate portfolio-level summary insight.
Args:
tenant_id: Tenant identifier
all_results: All product analysis results
Returns:
Summary insight or None
"""
if not all_results:
return None
# Calculate summary statistics
total_products = len(all_results)
high_demand_products = sum(1 for r in all_results if r.get('trend_analysis', {}).get('is_increasing', False))
avg_seasonal_factor = sum(
r.get('seasonal_factors', {}).get('peak_ratio', 1.0)
for r in all_results
if r.get('seasonal_factors', {}).get('peak_ratio')
) / max(1, len(all_results))
return {
'type': 'recommendation',
'priority': 'medium' if high_demand_products > total_products * 0.5 else 'low',
'category': 'forecasting',
'title': f'Demand Pattern Summary: {total_products} Products Analyzed',
'description': f'Detected {high_demand_products} products with increasing demand trends. Average seasonal peak ratio: {avg_seasonal_factor:.2f}x.',
'impact_type': 'demand_optimization',
'impact_value': high_demand_products,
'impact_unit': 'products',
'confidence': 75,
'metrics_json': {
'total_products': total_products,
'high_demand_products': high_demand_products,
'avg_seasonal_factor': round(avg_seasonal_factor, 2),
'trend_strength': 'strong' if high_demand_products > total_products * 0.7 else 'moderate'
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Review Production Schedule',
'action': 'review_production_schedule',
'params': {'tenant_id': tenant_id}
},
{
'label': 'Adjust Inventory Levels',
'action': 'adjust_inventory_levels',
'params': {'tenant_id': tenant_id}
}
],
'source_service': 'forecasting',
'source_model': 'demand_analyzer'
}
async def get_demand_patterns(
self,
inventory_product_id: str
) -> Optional[Dict[str, Any]]:
"""
Get cached demand patterns for a product.
Args:
inventory_product_id: Product identifier
Returns:
Demand patterns or None if not analyzed
"""
return self.forecaster.get_cached_demand_patterns(inventory_product_id)
async def _publish_insight_events(self, tenant_id, insights, product_context=None):
"""
Publish insight events to RabbitMQ for alert processing.
Args:
tenant_id: Tenant identifier
insights: List of created insights
product_context: Additional context about the product
"""
if not self.event_publisher:
logger.warning("No event publisher available for demand insights")
return
for insight in insights:
# Determine severity based on confidence and priority
confidence = insight.get('confidence', 0)
priority = insight.get('priority', 'medium')
# Map priority to severity, with confidence as tiebreaker
if priority == 'critical' or (priority == 'high' and confidence >= 70):
severity = 'high'
elif priority == 'high' or (priority == 'medium' and confidence >= 80):
severity = 'medium'
else:
severity = 'low'
# Prepare the event data
event_data = {
'insight_id': insight.get('id'),
'type': insight.get('type'),
'title': insight.get('title'),
'description': insight.get('description'),
'category': insight.get('category'),
'priority': insight.get('priority'),
'confidence': confidence,
'recommendation': insight.get('recommendation_actions', []),
'impact_type': insight.get('impact_type'),
'impact_value': insight.get('impact_value'),
'inventory_product_id': product_context.get('inventory_product_id') if product_context else None,
'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),
'source_service': 'forecasting',
'source_model': 'demand_analyzer'
}
try:
await self.event_publisher.publish_recommendation(
event_type='ai_demand_forecast',
tenant_id=tenant_id,
severity=severity,
data=event_data
)
logger.info(
"Published demand insight event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
severity=severity
)
except Exception as e:
logger.error(
"Failed to publish demand insight event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
error=str(e)
)
async def close(self):
"""Close HTTP client connections."""
await self.ai_insights_client.close()

View File

@@ -0,0 +1,758 @@
"""
Dynamic Business Rules Engine
Learns optimal adjustment factors from historical data instead of using hardcoded values
Replaces hardcoded weather multipliers, holiday adjustments, event impacts with learned values
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
import structlog
from datetime import datetime, timedelta
from scipy import stats
from sklearn.linear_model import Ridge
from collections import defaultdict
logger = structlog.get_logger()
class DynamicRulesEngine:
"""
Learns business rules from historical data instead of using hardcoded values.
Current hardcoded values to replace:
- Weather: rain = -15%, snow = -25%, extreme_heat = -10%
- Holidays: +50% (all holidays treated the same)
- Events: +30% (all events treated the same)
- Weekend: Manual assumptions
Dynamic approach:
- Learn actual weather impact per weather condition per product
- Learn holiday multipliers per holiday type
- Learn event impact by event type
- Learn day-of-week patterns per product
- Generate insights when learned values differ from hardcoded assumptions
"""
def __init__(self):
self.weather_rules = {}
self.holiday_rules = {}
self.event_rules = {}
self.dow_rules = {}
self.month_rules = {}
async def learn_all_rules(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
external_data: Optional[pd.DataFrame] = None,
min_samples: int = 10
) -> Dict[str, Any]:
"""
Learn all business rules from historical data.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
sales_data: Historical sales data with 'date', 'quantity' columns
external_data: Optional weather/events/holidays data
min_samples: Minimum samples required to learn a rule
Returns:
Dictionary of learned rules and insights
"""
logger.info(
"Learning dynamic business rules from historical data",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
data_points=len(sales_data)
)
results = {
'tenant_id': tenant_id,
'inventory_product_id': inventory_product_id,
'learned_at': datetime.utcnow().isoformat(),
'rules': {},
'insights': []
}
# Ensure date column is datetime
if 'date' not in sales_data.columns:
sales_data = sales_data.copy()
sales_data['date'] = sales_data['ds']
sales_data['date'] = pd.to_datetime(sales_data['date'])
# Learn weather impact rules
if external_data is not None and 'weather_condition' in external_data.columns:
weather_rules, weather_insights = await self._learn_weather_rules(
sales_data, external_data, min_samples
)
results['rules']['weather'] = weather_rules
results['insights'].extend(weather_insights)
self.weather_rules[inventory_product_id] = weather_rules
# Learn holiday rules
if external_data is not None and 'is_holiday' in external_data.columns:
holiday_rules, holiday_insights = await self._learn_holiday_rules(
sales_data, external_data, min_samples
)
results['rules']['holidays'] = holiday_rules
results['insights'].extend(holiday_insights)
self.holiday_rules[inventory_product_id] = holiday_rules
# Learn event rules
if external_data is not None and 'event_type' in external_data.columns:
event_rules, event_insights = await self._learn_event_rules(
sales_data, external_data, min_samples
)
results['rules']['events'] = event_rules
results['insights'].extend(event_insights)
self.event_rules[inventory_product_id] = event_rules
# Learn day-of-week patterns (always available)
dow_rules, dow_insights = await self._learn_day_of_week_rules(
sales_data, min_samples
)
results['rules']['day_of_week'] = dow_rules
results['insights'].extend(dow_insights)
self.dow_rules[inventory_product_id] = dow_rules
# Learn monthly seasonality
month_rules, month_insights = await self._learn_month_rules(
sales_data, min_samples
)
results['rules']['months'] = month_rules
results['insights'].extend(month_insights)
self.month_rules[inventory_product_id] = month_rules
logger.info(
"Dynamic rules learning complete",
total_insights=len(results['insights']),
rules_learned=len(results['rules'])
)
return results
async def _learn_weather_rules(
self,
sales_data: pd.DataFrame,
external_data: pd.DataFrame,
min_samples: int
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Learn actual weather impact from historical data.
Hardcoded assumptions:
- rain: -15%
- snow: -25%
- extreme_heat: -10%
Learn actual impact for this product.
"""
logger.info("Learning weather impact rules")
# Merge sales with weather data
merged = sales_data.merge(
external_data[['date', 'weather_condition', 'temperature', 'precipitation']],
on='date',
how='left'
)
# Baseline: average sales on clear days
clear_days = merged[
(merged['weather_condition'].isin(['clear', 'sunny', 'partly_cloudy'])) |
(merged['weather_condition'].isna())
]
baseline_avg = clear_days['quantity'].mean()
weather_rules = {
'baseline_avg': float(baseline_avg),
'conditions': {}
}
insights = []
# Hardcoded values for comparison
hardcoded_impacts = {
'rain': -0.15,
'snow': -0.25,
'extreme_heat': -0.10
}
# Learn impact for each weather condition
for condition in ['rain', 'rainy', 'snow', 'snowy', 'extreme_heat', 'hot', 'storm', 'fog']:
condition_days = merged[merged['weather_condition'].str.contains(condition, case=False, na=False)]
if len(condition_days) >= min_samples:
condition_avg = condition_days['quantity'].mean()
learned_impact = (condition_avg - baseline_avg) / baseline_avg
# Statistical significance test
t_stat, p_value = stats.ttest_ind(
condition_days['quantity'].values,
clear_days['quantity'].values,
equal_var=False
)
weather_rules['conditions'][condition] = {
'learned_multiplier': float(1 + learned_impact),
'learned_impact_pct': float(learned_impact * 100),
'sample_size': int(len(condition_days)),
'avg_quantity': float(condition_avg),
'p_value': float(p_value),
'significant': bool(p_value < 0.05)
}
# Compare with hardcoded value if exists
if condition in hardcoded_impacts and p_value < 0.05:
hardcoded_impact = hardcoded_impacts[condition]
difference = abs(learned_impact - hardcoded_impact)
if difference > 0.05: # More than 5% difference
insight = {
'type': 'optimization',
'priority': 'high' if difference > 0.15 else 'medium',
'category': 'forecasting',
'title': f'Weather Rule Mismatch: {condition.title()}',
'description': f'Learned {condition} impact is {learned_impact*100:.1f}% vs hardcoded {hardcoded_impact*100:.1f}%. Updating rule could improve forecast accuracy by {difference*100:.1f}%.',
'impact_type': 'forecast_improvement',
'impact_value': difference * 100,
'impact_unit': 'percentage_points',
'confidence': self._calculate_confidence(len(condition_days), p_value),
'metrics_json': {
'weather_condition': condition,
'learned_impact_pct': round(learned_impact * 100, 2),
'hardcoded_impact_pct': round(hardcoded_impact * 100, 2),
'difference_pct': round(difference * 100, 2),
'baseline_avg': round(baseline_avg, 2),
'condition_avg': round(condition_avg, 2),
'sample_size': len(condition_days),
'p_value': round(p_value, 4)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Update Weather Rule',
'action': 'update_weather_multiplier',
'params': {
'condition': condition,
'new_multiplier': round(1 + learned_impact, 3)
}
}
],
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
insights.append(insight)
logger.info(
"Weather rule discrepancy detected",
condition=condition,
learned=f"{learned_impact*100:.1f}%",
hardcoded=f"{hardcoded_impact*100:.1f}%"
)
return weather_rules, insights
async def _learn_holiday_rules(
self,
sales_data: pd.DataFrame,
external_data: pd.DataFrame,
min_samples: int
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Learn holiday impact by holiday type instead of uniform +50%.
Hardcoded: All holidays = +50%
Learn: Christmas vs Easter vs National holidays have different impacts
"""
logger.info("Learning holiday impact rules")
# Merge sales with holiday data
merged = sales_data.merge(
external_data[['date', 'is_holiday', 'holiday_name', 'holiday_type']],
on='date',
how='left'
)
# Baseline: non-holiday average
non_holidays = merged[merged['is_holiday'] == False]
baseline_avg = non_holidays['quantity'].mean()
holiday_rules = {
'baseline_avg': float(baseline_avg),
'hardcoded_multiplier': 1.5, # Current +50%
'holiday_types': {}
}
insights = []
# Learn impact per holiday type
if 'holiday_type' in merged.columns:
for holiday_type in merged[merged['is_holiday'] == True]['holiday_type'].unique():
if pd.isna(holiday_type):
continue
holiday_days = merged[merged['holiday_type'] == holiday_type]
if len(holiday_days) >= min_samples:
holiday_avg = holiday_days['quantity'].mean()
learned_multiplier = holiday_avg / baseline_avg
learned_impact = (learned_multiplier - 1) * 100
# Statistical test
t_stat, p_value = stats.ttest_ind(
holiday_days['quantity'].values,
non_holidays['quantity'].values,
equal_var=False
)
holiday_rules['holiday_types'][holiday_type] = {
'learned_multiplier': float(learned_multiplier),
'learned_impact_pct': float(learned_impact),
'sample_size': int(len(holiday_days)),
'avg_quantity': float(holiday_avg),
'p_value': float(p_value),
'significant': bool(p_value < 0.05)
}
# Compare with hardcoded +50%
hardcoded_multiplier = 1.5
difference = abs(learned_multiplier - hardcoded_multiplier)
if difference > 0.1 and p_value < 0.05: # More than 10% difference
insight = {
'type': 'recommendation',
'priority': 'high' if difference > 0.3 else 'medium',
'category': 'forecasting',
'title': f'Holiday Rule Optimization: {holiday_type}',
'description': f'{holiday_type} shows {learned_impact:.1f}% impact vs hardcoded +50%. Using learned multiplier {learned_multiplier:.2f}x could improve forecast accuracy.',
'impact_type': 'forecast_improvement',
'impact_value': difference * 100,
'impact_unit': 'percentage_points',
'confidence': self._calculate_confidence(len(holiday_days), p_value),
'metrics_json': {
'holiday_type': holiday_type,
'learned_multiplier': round(learned_multiplier, 3),
'hardcoded_multiplier': 1.5,
'learned_impact_pct': round(learned_impact, 2),
'hardcoded_impact_pct': 50.0,
'baseline_avg': round(baseline_avg, 2),
'holiday_avg': round(holiday_avg, 2),
'sample_size': len(holiday_days),
'p_value': round(p_value, 4)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Update Holiday Rule',
'action': 'update_holiday_multiplier',
'params': {
'holiday_type': holiday_type,
'new_multiplier': round(learned_multiplier, 3)
}
}
],
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
insights.append(insight)
logger.info(
"Holiday rule optimization identified",
holiday_type=holiday_type,
learned=f"{learned_multiplier:.2f}x",
hardcoded="1.5x"
)
# Overall holiday impact
all_holidays = merged[merged['is_holiday'] == True]
if len(all_holidays) >= min_samples:
overall_avg = all_holidays['quantity'].mean()
overall_multiplier = overall_avg / baseline_avg
holiday_rules['overall_learned_multiplier'] = float(overall_multiplier)
holiday_rules['overall_learned_impact_pct'] = float((overall_multiplier - 1) * 100)
return holiday_rules, insights
async def _learn_event_rules(
self,
sales_data: pd.DataFrame,
external_data: pd.DataFrame,
min_samples: int
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Learn event impact by event type instead of uniform +30%.
Hardcoded: All events = +30%
Learn: Sports events vs concerts vs festivals have different impacts
"""
logger.info("Learning event impact rules")
# Merge sales with event data
merged = sales_data.merge(
external_data[['date', 'event_name', 'event_type', 'event_attendance']],
on='date',
how='left'
)
# Baseline: non-event days
non_events = merged[merged['event_name'].isna()]
baseline_avg = non_events['quantity'].mean()
event_rules = {
'baseline_avg': float(baseline_avg),
'hardcoded_multiplier': 1.3, # Current +30%
'event_types': {}
}
insights = []
# Learn impact per event type
if 'event_type' in merged.columns:
for event_type in merged[merged['event_type'].notna()]['event_type'].unique():
if pd.isna(event_type):
continue
event_days = merged[merged['event_type'] == event_type]
if len(event_days) >= min_samples:
event_avg = event_days['quantity'].mean()
learned_multiplier = event_avg / baseline_avg
learned_impact = (learned_multiplier - 1) * 100
# Statistical test
t_stat, p_value = stats.ttest_ind(
event_days['quantity'].values,
non_events['quantity'].values,
equal_var=False
)
event_rules['event_types'][event_type] = {
'learned_multiplier': float(learned_multiplier),
'learned_impact_pct': float(learned_impact),
'sample_size': int(len(event_days)),
'avg_quantity': float(event_avg),
'p_value': float(p_value),
'significant': bool(p_value < 0.05)
}
# Compare with hardcoded +30%
hardcoded_multiplier = 1.3
difference = abs(learned_multiplier - hardcoded_multiplier)
if difference > 0.1 and p_value < 0.05:
insight = {
'type': 'recommendation',
'priority': 'medium',
'category': 'forecasting',
'title': f'Event Rule Optimization: {event_type}',
'description': f'{event_type} events show {learned_impact:.1f}% impact vs hardcoded +30%. Using learned multiplier could improve event forecasts.',
'impact_type': 'forecast_improvement',
'impact_value': difference * 100,
'impact_unit': 'percentage_points',
'confidence': self._calculate_confidence(len(event_days), p_value),
'metrics_json': {
'event_type': event_type,
'learned_multiplier': round(learned_multiplier, 3),
'hardcoded_multiplier': 1.3,
'learned_impact_pct': round(learned_impact, 2),
'hardcoded_impact_pct': 30.0,
'baseline_avg': round(baseline_avg, 2),
'event_avg': round(event_avg, 2),
'sample_size': len(event_days),
'p_value': round(p_value, 4)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Update Event Rule',
'action': 'update_event_multiplier',
'params': {
'event_type': event_type,
'new_multiplier': round(learned_multiplier, 3)
}
}
],
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
insights.append(insight)
return event_rules, insights
async def _learn_day_of_week_rules(
self,
sales_data: pd.DataFrame,
min_samples: int
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Learn day-of-week patterns per product.
Replace general assumptions with product-specific patterns.
"""
logger.info("Learning day-of-week patterns")
sales_data = sales_data.copy()
sales_data['day_of_week'] = sales_data['date'].dt.dayofweek
sales_data['day_name'] = sales_data['date'].dt.day_name()
# Calculate average per day of week
dow_avg = sales_data.groupby('day_of_week')['quantity'].agg(['mean', 'std', 'count'])
overall_avg = sales_data['quantity'].mean()
dow_rules = {
'overall_avg': float(overall_avg),
'days': {}
}
insights = []
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
for dow in range(7):
if dow not in dow_avg.index or dow_avg.loc[dow, 'count'] < min_samples:
continue
day_avg = dow_avg.loc[dow, 'mean']
day_std = dow_avg.loc[dow, 'std']
day_count = dow_avg.loc[dow, 'count']
multiplier = day_avg / overall_avg
impact_pct = (multiplier - 1) * 100
# Coefficient of variation
cv = (day_std / day_avg) if day_avg > 0 else 0
dow_rules['days'][day_names[dow]] = {
'day_of_week': int(dow),
'learned_multiplier': float(multiplier),
'impact_pct': float(impact_pct),
'avg_quantity': float(day_avg),
'std_quantity': float(day_std),
'sample_size': int(day_count),
'coefficient_of_variation': float(cv)
}
# Insight for significant deviations
if abs(impact_pct) > 20: # More than 20% difference
insight = {
'type': 'insight',
'priority': 'medium' if abs(impact_pct) > 30 else 'low',
'category': 'forecasting',
'title': f'{day_names[dow]} Pattern: {abs(impact_pct):.0f}% {"Higher" if impact_pct > 0 else "Lower"}',
'description': f'{day_names[dow]} sales average {day_avg:.1f} units ({impact_pct:+.1f}% vs weekly average {overall_avg:.1f}). Consider this pattern in production planning.',
'impact_type': 'operational_insight',
'impact_value': abs(impact_pct),
'impact_unit': 'percentage',
'confidence': self._calculate_confidence(day_count, 0.01), # Low p-value for large samples
'metrics_json': {
'day_of_week': day_names[dow],
'day_multiplier': round(multiplier, 3),
'impact_pct': round(impact_pct, 2),
'day_avg': round(day_avg, 2),
'overall_avg': round(overall_avg, 2),
'sample_size': int(day_count),
'std': round(day_std, 2)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Adjust Production Schedule',
'action': 'adjust_weekly_production',
'params': {
'day': day_names[dow],
'multiplier': round(multiplier, 3)
}
}
],
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
insights.append(insight)
return dow_rules, insights
async def _learn_month_rules(
self,
sales_data: pd.DataFrame,
min_samples: int
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Learn monthly seasonality patterns per product.
"""
logger.info("Learning monthly seasonality patterns")
sales_data = sales_data.copy()
sales_data['month'] = sales_data['date'].dt.month
sales_data['month_name'] = sales_data['date'].dt.month_name()
# Calculate average per month
month_avg = sales_data.groupby('month')['quantity'].agg(['mean', 'std', 'count'])
overall_avg = sales_data['quantity'].mean()
month_rules = {
'overall_avg': float(overall_avg),
'months': {}
}
insights = []
month_names = ['January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December']
for month in range(1, 13):
if month not in month_avg.index or month_avg.loc[month, 'count'] < min_samples:
continue
month_mean = month_avg.loc[month, 'mean']
month_std = month_avg.loc[month, 'std']
month_count = month_avg.loc[month, 'count']
multiplier = month_mean / overall_avg
impact_pct = (multiplier - 1) * 100
month_rules['months'][month_names[month - 1]] = {
'month': int(month),
'learned_multiplier': float(multiplier),
'impact_pct': float(impact_pct),
'avg_quantity': float(month_mean),
'std_quantity': float(month_std),
'sample_size': int(month_count)
}
# Insight for significant seasonal patterns
if abs(impact_pct) > 25: # More than 25% seasonal variation
insight = {
'type': 'insight',
'priority': 'medium',
'category': 'forecasting',
'title': f'Seasonal Pattern: {month_names[month - 1]} {abs(impact_pct):.0f}% {"Higher" if impact_pct > 0 else "Lower"}',
'description': f'{month_names[month - 1]} shows strong seasonality with {impact_pct:+.1f}% vs annual average. Plan inventory accordingly.',
'impact_type': 'operational_insight',
'impact_value': abs(impact_pct),
'impact_unit': 'percentage',
'confidence': self._calculate_confidence(month_count, 0.01),
'metrics_json': {
'month': month_names[month - 1],
'multiplier': round(multiplier, 3),
'impact_pct': round(impact_pct, 2),
'month_avg': round(month_mean, 2),
'annual_avg': round(overall_avg, 2),
'sample_size': int(month_count)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Adjust Seasonal Planning',
'action': 'adjust_seasonal_forecast',
'params': {
'month': month_names[month - 1],
'multiplier': round(multiplier, 3)
}
}
],
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
insights.append(insight)
return month_rules, insights
def _calculate_confidence(self, sample_size: int, p_value: float) -> int:
"""
Calculate confidence score (0-100) based on sample size and statistical significance.
Args:
sample_size: Number of observations
p_value: Statistical significance p-value
Returns:
Confidence score 0-100
"""
# Sample size score (0-50 points)
if sample_size >= 100:
sample_score = 50
elif sample_size >= 50:
sample_score = 40
elif sample_size >= 30:
sample_score = 30
elif sample_size >= 20:
sample_score = 20
else:
sample_score = 10
# Statistical significance score (0-50 points)
if p_value < 0.001:
sig_score = 50
elif p_value < 0.01:
sig_score = 45
elif p_value < 0.05:
sig_score = 35
elif p_value < 0.1:
sig_score = 20
else:
sig_score = 10
return min(100, sample_score + sig_score)
def get_rule(
self,
inventory_product_id: str,
rule_type: str,
key: str
) -> Optional[float]:
"""
Get learned rule multiplier for a specific condition.
Args:
inventory_product_id: Product identifier
rule_type: 'weather', 'holiday', 'event', 'day_of_week', 'month'
key: Specific condition key (e.g., 'rain', 'Christmas', 'Monday')
Returns:
Learned multiplier or None if not learned
"""
if rule_type == 'weather':
rules = self.weather_rules.get(inventory_product_id, {})
return rules.get('conditions', {}).get(key, {}).get('learned_multiplier')
elif rule_type == 'holiday':
rules = self.holiday_rules.get(inventory_product_id, {})
return rules.get('holiday_types', {}).get(key, {}).get('learned_multiplier')
elif rule_type == 'event':
rules = self.event_rules.get(inventory_product_id, {})
return rules.get('event_types', {}).get(key, {}).get('learned_multiplier')
elif rule_type == 'day_of_week':
rules = self.dow_rules.get(inventory_product_id, {})
return rules.get('days', {}).get(key, {}).get('learned_multiplier')
elif rule_type == 'month':
rules = self.month_rules.get(inventory_product_id, {})
return rules.get('months', {}).get(key, {}).get('learned_multiplier')
return None
def export_rules_for_prophet(
self,
inventory_product_id: str
) -> Dict[str, Any]:
"""
Export learned rules in format suitable for Prophet model integration.
Returns:
Dictionary with multipliers for Prophet custom seasonality/regressors
"""
return {
'weather': self.weather_rules.get(inventory_product_id, {}),
'holidays': self.holiday_rules.get(inventory_product_id, {}),
'events': self.event_rules.get(inventory_product_id, {}),
'day_of_week': self.dow_rules.get(inventory_product_id, {}),
'months': self.month_rules.get(inventory_product_id, {})
}

View File

@@ -0,0 +1,263 @@
"""
Multi-Horizon Forecasting System
Generates forecasts for multiple time horizons (7, 14, 30, 90 days)
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime, timedelta, date
import structlog
logger = structlog.get_logger()
class MultiHorizonForecaster:
"""
Multi-horizon forecasting with horizon-specific models.
Horizons:
- Short-term (1-7 days): High precision, detailed features
- Medium-term (8-14 days): Balanced approach
- Long-term (15-30 days): Focus on trends, seasonal patterns
- Very long-term (31-90 days): Strategic planning, major trends only
"""
HORIZONS = {
'short': (1, 7),
'medium': (8, 14),
'long': (15, 30),
'very_long': (31, 90)
}
def __init__(self, base_forecaster=None):
"""
Initialize multi-horizon forecaster.
Args:
base_forecaster: Base forecaster (e.g., BakeryForecaster) to use
"""
self.base_forecaster = base_forecaster
async def generate_multi_horizon_forecast(
self,
tenant_id: str,
inventory_product_id: str,
start_date: date,
horizons: List[str] = None,
include_confidence_intervals: bool = True
) -> Dict[str, Any]:
"""
Generate forecasts for multiple horizons.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
start_date: Start date for forecasts
horizons: List of horizons to forecast ('short', 'medium', 'long', 'very_long')
include_confidence_intervals: Include confidence intervals
Returns:
Dictionary with forecasts by horizon
"""
if horizons is None:
horizons = ['short', 'medium', 'long']
logger.info(
"Generating multi-horizon forecast",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
horizons=horizons
)
results = {
'tenant_id': tenant_id,
'inventory_product_id': inventory_product_id,
'start_date': start_date.isoformat(),
'generated_at': datetime.now().isoformat(),
'horizons': {}
}
for horizon_name in horizons:
if horizon_name not in self.HORIZONS:
logger.warning(f"Unknown horizon: {horizon_name}, skipping")
continue
start_day, end_day = self.HORIZONS[horizon_name]
# Generate forecast for this horizon
horizon_forecast = await self._generate_horizon_forecast(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
start_date=start_date,
days_ahead=end_day,
horizon_name=horizon_name,
include_confidence=include_confidence_intervals
)
results['horizons'][horizon_name] = horizon_forecast
logger.info("Multi-horizon forecast complete",
horizons_generated=len(results['horizons']))
return results
async def _generate_horizon_forecast(
self,
tenant_id: str,
inventory_product_id: str,
start_date: date,
days_ahead: int,
horizon_name: str,
include_confidence: bool
) -> Dict[str, Any]:
"""
Generate forecast for a specific horizon.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
start_date: Start date
days_ahead: Number of days ahead
horizon_name: Horizon name ('short', 'medium', etc.)
include_confidence: Include confidence intervals
Returns:
Forecast data for the horizon
"""
# Generate date range
dates = [start_date + timedelta(days=i) for i in range(days_ahead)]
# Use base forecaster if available
if self.base_forecaster:
# Call base forecaster for predictions
forecasts = []
for forecast_date in dates:
try:
# This would call the actual forecasting service
# For now, we'll return a structured response
forecasts.append({
'date': forecast_date.isoformat(),
'predicted_demand': 0, # Placeholder
'confidence_lower': 0 if include_confidence else None,
'confidence_upper': 0 if include_confidence else None
})
except Exception as e:
logger.error(f"Failed to generate forecast for {forecast_date}: {e}")
return {
'horizon_name': horizon_name,
'days_ahead': days_ahead,
'start_date': start_date.isoformat(),
'end_date': dates[-1].isoformat(),
'forecasts': forecasts,
'aggregates': self._calculate_horizon_aggregates(forecasts)
}
else:
logger.warning("No base forecaster available, returning placeholder")
return {
'horizon_name': horizon_name,
'days_ahead': days_ahead,
'forecasts': [],
'aggregates': {}
}
def _calculate_horizon_aggregates(self, forecasts: List[Dict]) -> Dict[str, float]:
"""
Calculate aggregate statistics for a horizon.
Args:
forecasts: List of daily forecasts
Returns:
Aggregate statistics
"""
if not forecasts:
return {}
demands = [f['predicted_demand'] for f in forecasts if f.get('predicted_demand')]
if not demands:
return {}
return {
'total_demand': sum(demands),
'avg_daily_demand': np.mean(demands),
'max_daily_demand': max(demands),
'min_daily_demand': min(demands),
'demand_volatility': np.std(demands) if len(demands) > 1 else 0
}
def get_horizon_recommendation(
self,
horizon_name: str,
forecast_data: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate recommendations based on horizon forecast.
Args:
horizon_name: Horizon name
forecast_data: Forecast data for the horizon
Returns:
Recommendations dictionary
"""
aggregates = forecast_data.get('aggregates', {})
total_demand = aggregates.get('total_demand', 0)
volatility = aggregates.get('demand_volatility', 0)
recommendations = {
'horizon': horizon_name,
'actions': []
}
if horizon_name == 'short':
# Short-term: Operational recommendations
if total_demand > 0:
recommendations['actions'].append(f"Prepare {total_demand:.0f} units for next 7 days")
if volatility > 10:
recommendations['actions'].append("High volatility expected - increase safety stock")
elif horizon_name == 'medium':
# Medium-term: Procurement planning
recommendations['actions'].append(f"Order supplies for {total_demand:.0f} units (2-week demand)")
if aggregates.get('max_daily_demand', 0) > aggregates.get('avg_daily_demand', 0) * 1.5:
recommendations['actions'].append("Peak demand day detected - plan extra capacity")
elif horizon_name == 'long':
# Long-term: Strategic planning
avg_weekly_demand = total_demand / 4 if total_demand > 0 else 0
recommendations['actions'].append(f"Monthly demand projection: {total_demand:.0f} units")
recommendations['actions'].append(f"Average weekly demand: {avg_weekly_demand:.0f} units")
elif horizon_name == 'very_long':
# Very long-term: Capacity planning
recommendations['actions'].append(f"Quarterly demand projection: {total_demand:.0f} units")
recommendations['actions'].append("Review capacity and staffing needs")
return recommendations
def get_appropriate_horizons_for_use_case(use_case: str) -> List[str]:
"""
Get appropriate forecast horizons for a use case.
Args:
use_case: Use case name (e.g., 'production_planning', 'procurement', 'strategic')
Returns:
List of horizon names
"""
use_case_horizons = {
'production_planning': ['short'],
'procurement': ['short', 'medium'],
'inventory_optimization': ['short', 'medium'],
'capacity_planning': ['medium', 'long'],
'strategic_planning': ['long', 'very_long'],
'financial_planning': ['long', 'very_long'],
'all': ['short', 'medium', 'long', 'very_long']
}
return use_case_horizons.get(use_case, ['short', 'medium'])

View File

@@ -0,0 +1,593 @@
"""
Pattern Detection Engine for Sales Data
Automatically identifies patterns and generates insights
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime, timedelta
import structlog
from scipy import stats
from collections import defaultdict
logger = structlog.get_logger()
class SalesPatternDetector:
"""
Detect sales patterns and generate actionable insights.
Patterns detected:
- Time-of-day patterns (hourly peaks)
- Day-of-week patterns (weekend spikes)
- Weekly seasonality patterns
- Monthly patterns
- Holiday impact patterns
- Weather correlation patterns
"""
def __init__(self, significance_threshold: float = 0.15):
"""
Initialize pattern detector.
Args:
significance_threshold: Minimum percentage difference to consider significant (default 15%)
"""
self.significance_threshold = significance_threshold
self.detected_patterns = []
async def detect_all_patterns(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_confidence: int = 70
) -> List[Dict[str, Any]]:
"""
Detect all patterns in sales data and generate insights.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
sales_data: Sales data with columns: date, quantity, (optional: hour, temperature, etc.)
min_confidence: Minimum confidence score for insights
Returns:
List of insight dictionaries ready for AI Insights Service
"""
logger.info(
"Starting pattern detection",
tenant_id=tenant_id,
product_id=inventory_product_id,
data_points=len(sales_data)
)
insights = []
# Ensure date column is datetime
if 'date' in sales_data.columns:
sales_data['date'] = pd.to_datetime(sales_data['date'])
# 1. Day-of-week patterns
dow_insights = await self._detect_day_of_week_patterns(
tenant_id, inventory_product_id, sales_data, min_confidence
)
insights.extend(dow_insights)
# 2. Weekend vs weekday patterns
weekend_insights = await self._detect_weekend_patterns(
tenant_id, inventory_product_id, sales_data, min_confidence
)
insights.extend(weekend_insights)
# 3. Month-end patterns
month_end_insights = await self._detect_month_end_patterns(
tenant_id, inventory_product_id, sales_data, min_confidence
)
insights.extend(month_end_insights)
# 4. Hourly patterns (if hour data available)
if 'hour' in sales_data.columns:
hourly_insights = await self._detect_hourly_patterns(
tenant_id, inventory_product_id, sales_data, min_confidence
)
insights.extend(hourly_insights)
# 5. Weather correlation (if temperature data available)
if 'temperature' in sales_data.columns:
weather_insights = await self._detect_weather_correlations(
tenant_id, inventory_product_id, sales_data, min_confidence
)
insights.extend(weather_insights)
# 6. Trend detection
trend_insights = await self._detect_trends(
tenant_id, inventory_product_id, sales_data, min_confidence
)
insights.extend(trend_insights)
logger.info(
"Pattern detection complete",
total_insights=len(insights),
product_id=inventory_product_id
)
return insights
async def _detect_day_of_week_patterns(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_confidence: int
) -> List[Dict[str, Any]]:
"""Detect day-of-week patterns (e.g., Friday sales spike)."""
insights = []
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
return insights
# Add day of week
sales_data['day_of_week'] = sales_data['date'].dt.dayofweek
sales_data['day_name'] = sales_data['date'].dt.day_name()
# Calculate average sales per day of week
dow_avg = sales_data.groupby(['day_of_week', 'day_name'])['quantity'].agg(['mean', 'count']).reset_index()
# Only consider days with sufficient data (at least 4 observations)
dow_avg = dow_avg[dow_avg['count'] >= 4]
if len(dow_avg) < 2:
return insights
overall_avg = sales_data['quantity'].mean()
# Find days significantly above average
for _, row in dow_avg.iterrows():
day_avg = row['mean']
pct_diff = ((day_avg - overall_avg) / overall_avg) * 100
if abs(pct_diff) > self.significance_threshold * 100:
# Calculate confidence based on sample size and consistency
confidence = self._calculate_pattern_confidence(
sample_size=int(row['count']),
effect_size=abs(pct_diff) / 100,
variability=sales_data['quantity'].std()
)
if confidence >= min_confidence:
if pct_diff > 0:
insight = self._create_insight(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
insight_type='pattern',
category='sales',
priority='medium' if pct_diff > 20 else 'low',
title=f'{row["day_name"]} Sales Pattern Detected',
description=f'Sales on {row["day_name"]} are {abs(pct_diff):.1f}% {"higher" if pct_diff > 0 else "lower"} than average ({day_avg:.1f} vs {overall_avg:.1f} units).',
confidence=confidence,
metrics={
'day_of_week': row['day_name'],
'avg_sales': float(day_avg),
'overall_avg': float(overall_avg),
'difference_pct': float(pct_diff),
'sample_size': int(row['count'])
},
actionable=True,
actions=[
{'label': 'Adjust Production', 'action': 'adjust_daily_production'},
{'label': 'Review Schedule', 'action': 'review_production_schedule'}
]
)
insights.append(insight)
return insights
async def _detect_weekend_patterns(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_confidence: int
) -> List[Dict[str, Any]]:
"""Detect weekend vs weekday patterns."""
insights = []
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
return insights
# Classify weekend vs weekday
sales_data['is_weekend'] = sales_data['date'].dt.dayofweek.isin([5, 6])
# Calculate averages
weekend_avg = sales_data[sales_data['is_weekend']]['quantity'].mean()
weekday_avg = sales_data[~sales_data['is_weekend']]['quantity'].mean()
weekend_count = sales_data[sales_data['is_weekend']]['quantity'].count()
weekday_count = sales_data[~sales_data['is_weekend']]['quantity'].count()
if weekend_count < 4 or weekday_count < 4:
return insights
pct_diff = ((weekend_avg - weekday_avg) / weekday_avg) * 100
if abs(pct_diff) > self.significance_threshold * 100:
confidence = self._calculate_pattern_confidence(
sample_size=min(weekend_count, weekday_count),
effect_size=abs(pct_diff) / 100,
variability=sales_data['quantity'].std()
)
if confidence >= min_confidence:
# Estimate revenue impact
impact_value = abs(weekend_avg - weekday_avg) * 8 * 4 # 8 weekend days per month
insight = self._create_insight(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
insight_type='recommendation',
category='forecasting',
priority='high' if abs(pct_diff) > 25 else 'medium',
title=f'Weekend Demand Pattern: {abs(pct_diff):.0f}% {"Higher" if pct_diff > 0 else "Lower"}',
description=f'Weekend sales average {weekend_avg:.1f} units vs {weekday_avg:.1f} on weekdays ({abs(pct_diff):.0f}% {"increase" if pct_diff > 0 else "decrease"}). Recommend adjusting weekend production targets.',
confidence=confidence,
impact_type='revenue_increase' if pct_diff > 0 else 'cost_savings',
impact_value=float(impact_value),
impact_unit='units/month',
metrics={
'weekend_avg': float(weekend_avg),
'weekday_avg': float(weekday_avg),
'difference_pct': float(pct_diff),
'weekend_samples': int(weekend_count),
'weekday_samples': int(weekday_count)
},
actionable=True,
actions=[
{'label': 'Increase Weekend Production', 'action': 'adjust_weekend_production'},
{'label': 'Update Forecast Multiplier', 'action': 'update_forecast_rule'}
]
)
insights.append(insight)
return insights
async def _detect_month_end_patterns(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_confidence: int
) -> List[Dict[str, Any]]:
"""Detect month-end and payday patterns."""
insights = []
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
return insights
# Identify payday periods (15th and last 3 days of month)
sales_data['day_of_month'] = sales_data['date'].dt.day
sales_data['is_payday'] = (
(sales_data['day_of_month'] == 15) |
(sales_data['date'].dt.is_month_end) |
(sales_data['day_of_month'] >= sales_data['date'].dt.days_in_month - 2)
)
payday_avg = sales_data[sales_data['is_payday']]['quantity'].mean()
regular_avg = sales_data[~sales_data['is_payday']]['quantity'].mean()
payday_count = sales_data[sales_data['is_payday']]['quantity'].count()
if payday_count < 4:
return insights
pct_diff = ((payday_avg - regular_avg) / regular_avg) * 100
if abs(pct_diff) > self.significance_threshold * 100:
confidence = self._calculate_pattern_confidence(
sample_size=payday_count,
effect_size=abs(pct_diff) / 100,
variability=sales_data['quantity'].std()
)
if confidence >= min_confidence and pct_diff > 0:
insight = self._create_insight(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
insight_type='pattern',
category='sales',
priority='medium',
title=f'Payday Shopping Pattern Detected',
description=f'Sales increase {pct_diff:.0f}% during payday periods (15th and month-end). Average {payday_avg:.1f} vs {regular_avg:.1f} units.',
confidence=confidence,
metrics={
'payday_avg': float(payday_avg),
'regular_avg': float(regular_avg),
'difference_pct': float(pct_diff)
},
actionable=True,
actions=[
{'label': 'Increase Payday Stock', 'action': 'adjust_payday_production'}
]
)
insights.append(insight)
return insights
async def _detect_hourly_patterns(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_confidence: int
) -> List[Dict[str, Any]]:
"""Detect hourly sales patterns (if POS data available)."""
insights = []
if 'hour' not in sales_data.columns or 'quantity' not in sales_data.columns:
return insights
hourly_avg = sales_data.groupby('hour')['quantity'].agg(['mean', 'count']).reset_index()
hourly_avg = hourly_avg[hourly_avg['count'] >= 3] # At least 3 observations
if len(hourly_avg) < 3:
return insights
overall_avg = sales_data['quantity'].mean()
# Find peak hours (top 3)
top_hours = hourly_avg.nlargest(3, 'mean')
for _, row in top_hours.iterrows():
hour_avg = row['mean']
pct_diff = ((hour_avg - overall_avg) / overall_avg) * 100
if pct_diff > self.significance_threshold * 100:
confidence = self._calculate_pattern_confidence(
sample_size=int(row['count']),
effect_size=pct_diff / 100,
variability=sales_data['quantity'].std()
)
if confidence >= min_confidence:
hour = int(row['hour'])
time_label = f"{hour:02d}:00-{(hour+1):02d}:00"
insight = self._create_insight(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
insight_type='pattern',
category='sales',
priority='low',
title=f'Peak Sales Hour: {time_label}',
description=f'Sales peak during {time_label} with {hour_avg:.1f} units ({pct_diff:.0f}% above average).',
confidence=confidence,
metrics={
'peak_hour': hour,
'avg_sales': float(hour_avg),
'overall_avg': float(overall_avg),
'difference_pct': float(pct_diff)
},
actionable=True,
actions=[
{'label': 'Ensure Fresh Stock', 'action': 'schedule_production'},
{'label': 'Increase Staffing', 'action': 'adjust_staffing'}
]
)
insights.append(insight)
return insights
async def _detect_weather_correlations(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_confidence: int
) -> List[Dict[str, Any]]:
"""Detect weather-sales correlations."""
insights = []
if 'temperature' not in sales_data.columns or 'quantity' not in sales_data.columns:
return insights
# Remove NaN values
clean_data = sales_data[['temperature', 'quantity']].dropna()
if len(clean_data) < 30: # Need sufficient data
return insights
# Calculate correlation
correlation, p_value = stats.pearsonr(clean_data['temperature'], clean_data['quantity'])
if abs(correlation) > 0.3 and p_value < 0.05: # Moderate correlation and significant
confidence = self._calculate_correlation_confidence(correlation, p_value, len(clean_data))
if confidence >= min_confidence:
direction = 'increase' if correlation > 0 else 'decrease'
insight = self._create_insight(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
insight_type='insight',
category='forecasting',
priority='medium' if abs(correlation) > 0.5 else 'low',
title=f'Temperature Impact on Sales: {abs(correlation):.0%} Correlation',
description=f'Sales {direction} with temperature (correlation: {correlation:.2f}). {"Warmer" if correlation > 0 else "Colder"} weather associated with {"higher" if correlation > 0 else "lower"} sales.',
confidence=confidence,
metrics={
'correlation': float(correlation),
'p_value': float(p_value),
'sample_size': len(clean_data),
'direction': direction
},
actionable=False
)
insights.append(insight)
return insights
async def _detect_trends(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
min_confidence: int
) -> List[Dict[str, Any]]:
"""Detect overall trends (growing, declining, stable)."""
insights = []
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns or len(sales_data) < 60:
return insights
# Sort by date
sales_data = sales_data.sort_values('date')
# Calculate 30-day rolling average
sales_data['rolling_30d'] = sales_data['quantity'].rolling(window=30, min_periods=15).mean()
# Compare first and last 30-day averages
first_30_avg = sales_data['rolling_30d'].iloc[:30].mean()
last_30_avg = sales_data['rolling_30d'].iloc[-30:].mean()
if pd.isna(first_30_avg) or pd.isna(last_30_avg):
return insights
pct_change = ((last_30_avg - first_30_avg) / first_30_avg) * 100
if abs(pct_change) > 10: # 10% change is significant
confidence = min(95, 70 + int(abs(pct_change))) # Higher change = higher confidence
trend_type = 'growing' if pct_change > 0 else 'declining'
insight = self._create_insight(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
insight_type='prediction',
category='forecasting',
priority='high' if abs(pct_change) > 20 else 'medium',
title=f'Sales Trend: {trend_type.title()} {abs(pct_change):.0f}%',
description=f'Sales show a {trend_type} trend over the period. Current 30-day average: {last_30_avg:.1f} vs earlier: {first_30_avg:.1f} ({pct_change:+.0f}%).',
confidence=confidence,
metrics={
'current_avg': float(last_30_avg),
'previous_avg': float(first_30_avg),
'change_pct': float(pct_change),
'trend': trend_type
},
actionable=True,
actions=[
{'label': 'Adjust Forecast Model', 'action': 'update_forecast'},
{'label': 'Review Capacity', 'action': 'review_production_capacity'}
]
)
insights.append(insight)
return insights
def _calculate_pattern_confidence(
self,
sample_size: int,
effect_size: float,
variability: float
) -> int:
"""
Calculate confidence score for detected pattern.
Args:
sample_size: Number of observations
effect_size: Size of the effect (e.g., 0.25 for 25% difference)
variability: Standard deviation of data
Returns:
Confidence score (0-100)
"""
# Base confidence from sample size
if sample_size < 4:
base = 50
elif sample_size < 10:
base = 65
elif sample_size < 30:
base = 75
elif sample_size < 100:
base = 85
else:
base = 90
# Adjust for effect size
effect_boost = min(15, effect_size * 30)
# Adjust for variability (penalize high variability)
variability_penalty = min(10, variability / 10)
confidence = base + effect_boost - variability_penalty
return int(max(0, min(100, confidence)))
def _calculate_correlation_confidence(
self,
correlation: float,
p_value: float,
sample_size: int
) -> int:
"""Calculate confidence for correlation insights."""
# Base confidence from correlation strength
base = abs(correlation) * 100
# Boost for significance
if p_value < 0.001:
significance_boost = 15
elif p_value < 0.01:
significance_boost = 10
elif p_value < 0.05:
significance_boost = 5
else:
significance_boost = 0
# Boost for sample size
if sample_size > 100:
sample_boost = 10
elif sample_size > 50:
sample_boost = 5
else:
sample_boost = 0
confidence = base + significance_boost + sample_boost
return int(max(0, min(100, confidence)))
def _create_insight(
self,
tenant_id: str,
inventory_product_id: str,
insight_type: str,
category: str,
priority: str,
title: str,
description: str,
confidence: int,
metrics: Dict[str, Any],
actionable: bool,
actions: List[Dict[str, str]] = None,
impact_type: str = None,
impact_value: float = None,
impact_unit: str = None
) -> Dict[str, Any]:
"""Create an insight dictionary for AI Insights Service."""
return {
'tenant_id': tenant_id,
'type': insight_type,
'priority': priority,
'category': category,
'title': title,
'description': description,
'impact_type': impact_type,
'impact_value': impact_value,
'impact_unit': impact_unit,
'confidence': confidence,
'metrics_json': metrics,
'actionable': actionable,
'recommendation_actions': actions or [],
'source_service': 'forecasting',
'source_data_id': f'pattern_detection_{inventory_product_id}_{datetime.utcnow().strftime("%Y%m%d")}'
}

View File

@@ -0,0 +1,854 @@
# ================================================================
# services/forecasting/app/ml/predictor.py
# ================================================================
"""
Enhanced predictor module with advanced forecasting capabilities
"""
import structlog
from typing import Dict, List, Any, Optional, Tuple
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
import pickle
import json
from app.core.config import settings
from shared.monitoring.metrics import MetricsCollector
from shared.database.base import create_database_manager
logger = structlog.get_logger()
metrics = MetricsCollector("forecasting-service")
class BakeryPredictor:
"""
Advanced predictor for bakery demand forecasting with dependency injection
Handles Prophet models and business-specific logic
"""
def __init__(self, database_manager=None, use_dynamic_rules=True):
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
self.model_cache = {}
self.use_dynamic_rules = use_dynamic_rules
if use_dynamic_rules:
try:
from app.ml.dynamic_rules_engine import DynamicRulesEngine
from shared.clients.ai_insights_client import AIInsightsClient
self.rules_engine = DynamicRulesEngine()
self.ai_insights_client = AIInsightsClient(
base_url=settings.AI_INSIGHTS_SERVICE_URL or "http://ai-insights-service:8000"
)
# Also provide business_rules for consistency
self.business_rules = BakeryBusinessRules(
use_dynamic_rules=True,
ai_insights_client=self.ai_insights_client
)
except ImportError as e:
logger.warning(f"Failed to import dynamic rules engine: {e}. Falling back to basic business rules.")
self.use_dynamic_rules = False
self.business_rules = BakeryBusinessRules()
else:
self.business_rules = BakeryBusinessRules()
class BakeryForecaster:
"""
Enhanced forecaster that integrates with repository pattern
Uses enhanced features from training service for predictions
"""
def __init__(self, database_manager=None, use_enhanced_features=True):
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
self.predictor = BakeryPredictor(database_manager)
self.use_enhanced_features = use_enhanced_features
# Initialize business rules - this was missing! This fixes the AttributeError
self.business_rules = BakeryBusinessRules(use_dynamic_rules=True, ai_insights_client=self.predictor.ai_insights_client if hasattr(self.predictor, 'ai_insights_client') else None)
# Initialize POI feature service
from app.services.poi_feature_service import POIFeatureService
self.poi_feature_service = POIFeatureService()
# Initialize enhanced data processor from shared module
if use_enhanced_features:
try:
from shared.ml.data_processor import EnhancedBakeryDataProcessor
self.data_processor = EnhancedBakeryDataProcessor(region='MD')
logger.info("Enhanced features enabled using shared data processor")
except ImportError as e:
logger.warning(
f"Could not import EnhancedBakeryDataProcessor from shared module: {e}. "
"Falling back to basic features."
)
self.use_enhanced_features = False
self.data_processor = None
else:
self.data_processor = None
async def predict_demand(self, model, features: Dict[str, Any],
business_type: str = "individual") -> Dict[str, float]:
"""Generate demand prediction with business rules applied"""
try:
# Generate base prediction
base_prediction = await self._generate_base_prediction(model, features)
# Apply business rules
adjusted_prediction = self.business_rules.apply_rules(
base_prediction, features, business_type
)
# Add uncertainty estimation
final_prediction = self._add_uncertainty_bands(adjusted_prediction, features)
return final_prediction
except Exception as e:
logger.error("Error in demand prediction", error=str(e))
raise
async def _generate_base_prediction(self, model, features: Dict[str, Any]) -> Dict[str, float]:
"""Generate base prediction from Prophet model"""
try:
# Convert features to Prophet DataFrame
df = self._prepare_prophet_dataframe(features)
# Generate forecast
forecast = model.predict(df)
if len(forecast) > 0:
row = forecast.iloc[0]
return {
"yhat": float(row['yhat']),
"yhat_lower": float(row['yhat_lower']),
"yhat_upper": float(row['yhat_upper']),
"trend": float(row.get('trend', 0)),
"seasonal": float(row.get('seasonal', 0)),
"weekly": float(row.get('weekly', 0)),
"yearly": float(row.get('yearly', 0)),
"holidays": float(row.get('holidays', 0))
}
else:
raise ValueError("No prediction generated from model")
except Exception as e:
logger.error("Error generating base prediction", error=str(e))
raise
async def _prepare_prophet_dataframe(self, features: Dict[str, Any],
historical_data: pd.DataFrame = None) -> pd.DataFrame:
"""
Convert features to Prophet-compatible DataFrame.
Uses enhanced features when available (60+ features vs basic 10).
"""
try:
if self.use_enhanced_features and self.data_processor:
# Use enhanced data processor from training service
logger.info("Generating enhanced features for prediction")
# Create future date range
future_dates = pd.DatetimeIndex([pd.to_datetime(features['date'])])
# Prepare weather forecast DataFrame
weather_df = pd.DataFrame({
'date': [pd.to_datetime(features['date'])],
'temperature': [features.get('temperature', 15.0)],
'precipitation': [features.get('precipitation', 0.0)],
'humidity': [features.get('humidity', 60.0)],
'wind_speed': [features.get('wind_speed', 5.0)],
'pressure': [features.get('pressure', 1013.0)]
})
# Fetch POI features if tenant_id is available
poi_features = None
if 'tenant_id' in features:
poi_features = await self.poi_feature_service.get_poi_features(
features['tenant_id']
)
if poi_features:
logger.info(
f"Retrieved {len(poi_features)} POI features for prediction",
tenant_id=features['tenant_id']
)
# Use data processor to create ALL enhanced features
df = await self.data_processor.prepare_prediction_features(
future_dates=future_dates,
weather_forecast=weather_df,
traffic_forecast=None, # Will add when traffic forecasting is implemented
poi_features=poi_features, # POI features for location-based forecasting
historical_data=historical_data # For lagged features
)
logger.info(f"Generated {len(df.columns)} enhanced features for prediction")
return df
else:
# Fallback to basic features
logger.info("Using basic features for prediction")
# Create base DataFrame
df = pd.DataFrame({
'ds': [pd.to_datetime(features['date'])]
})
# Add regressor features
feature_mapping = {
'temperature': 'temperature',
'precipitation': 'precipitation',
'humidity': 'humidity',
'wind_speed': 'wind_speed',
'traffic_volume': 'traffic_volume',
'pedestrian_count': 'pedestrian_count'
}
for feature_key, df_column in feature_mapping.items():
if feature_key in features and features[feature_key] is not None:
df[df_column] = float(features[feature_key])
else:
df[df_column] = 0.0
# Add categorical features
df['day_of_week'] = int(features.get('day_of_week', 0))
df['is_weekend'] = int(features.get('is_weekend', False))
df['is_holiday'] = int(features.get('is_holiday', False))
# Business type
business_type = features.get('business_type', 'individual')
df['is_central_workshop'] = int(business_type == 'central_workshop')
return df
except Exception as e:
logger.error(f"Error preparing Prophet dataframe: {e}, falling back to basic features")
# Fallback to basic implementation on error
df = pd.DataFrame({'ds': [pd.to_datetime(features['date'])]})
df['temperature'] = features.get('temperature', 15.0)
df['precipitation'] = features.get('precipitation', 0.0)
df['is_weekend'] = int(features.get('is_weekend', False))
df['is_holiday'] = int(features.get('is_holiday', False))
return df
def _add_uncertainty_bands(self, prediction: Dict[str, float],
features: Dict[str, Any]) -> Dict[str, float]:
"""Add uncertainty estimation based on external factors"""
try:
base_demand = prediction["yhat"]
base_lower = prediction["yhat_lower"]
base_upper = prediction["yhat_upper"]
# Weather uncertainty
weather_uncertainty = self._calculate_weather_uncertainty(features)
# Holiday uncertainty
holiday_uncertainty = self._calculate_holiday_uncertainty(features)
# Weekend uncertainty
weekend_uncertainty = self._calculate_weekend_uncertainty(features)
# Total uncertainty factor
total_uncertainty = 1.0 + weather_uncertainty + holiday_uncertainty + weekend_uncertainty
# Adjust bounds
uncertainty_range = (base_upper - base_lower) * total_uncertainty
center_point = base_demand
adjusted_lower = center_point - (uncertainty_range / 2)
adjusted_upper = center_point + (uncertainty_range / 2)
return {
"demand": max(0, base_demand), # Never predict negative demand
"lower_bound": max(0, adjusted_lower),
"upper_bound": adjusted_upper,
"uncertainty_factor": total_uncertainty,
"trend": prediction.get("trend", 0),
"seasonal": prediction.get("seasonal", 0),
"holiday_effect": prediction.get("holidays", 0)
}
except Exception as e:
logger.error("Error adding uncertainty bands", error=str(e))
# Return basic prediction if uncertainty calculation fails
return {
"demand": max(0, prediction["yhat"]),
"lower_bound": max(0, prediction["yhat_lower"]),
"upper_bound": prediction["yhat_upper"],
"uncertainty_factor": 1.0
}
def _calculate_weather_uncertainty(self, features: Dict[str, Any]) -> float:
"""Calculate weather-based uncertainty"""
uncertainty = 0.0
# Temperature extremes add uncertainty
temp = features.get('temperature')
if temp is not None:
if temp < settings.TEMPERATURE_THRESHOLD_COLD or temp > settings.TEMPERATURE_THRESHOLD_HOT:
uncertainty += 0.1
# Rain adds uncertainty
precipitation = features.get('precipitation')
if precipitation is not None and precipitation > 0:
uncertainty += 0.05 * min(precipitation, 10) # Cap at 50mm
return uncertainty
def _calculate_holiday_uncertainty(self, features: Dict[str, Any]) -> float:
"""Calculate holiday-based uncertainty"""
if features.get('is_holiday', False):
return 0.2 # 20% additional uncertainty on holidays
return 0.0
def _calculate_weekend_uncertainty(self, features: Dict[str, Any]) -> float:
"""Calculate weekend-based uncertainty"""
if features.get('is_weekend', False):
return 0.1 # 10% additional uncertainty on weekends
return 0.0
async def analyze_demand_patterns(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
forecast_horizon_days: int = 30,
min_history_days: int = 90
) -> Dict[str, Any]:
"""
Analyze demand patterns by delegating to the sales service.
NOTE: Sales data analysis is the responsibility of the sales service.
This method calls the sales service API to get demand pattern analysis.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
sales_data: Historical sales DataFrame (not used - kept for backward compatibility)
forecast_horizon_days: Days to forecast ahead (not used currently)
min_history_days: Minimum history required
Returns:
Analysis results with patterns, trends, and insights from sales service
"""
try:
from shared.clients.sales_client import SalesServiceClient
from datetime import date, timedelta
logger.info(
"Requesting demand pattern analysis from sales service",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id
)
# Initialize sales client
sales_client = SalesServiceClient(config=settings, calling_service_name="forecasting")
# Calculate date range
end_date = date.today()
start_date = end_date - timedelta(days=min_history_days)
# Call sales service for pattern analysis
patterns = await sales_client.get_product_demand_patterns(
tenant_id=tenant_id,
product_id=inventory_product_id,
start_date=start_date,
end_date=end_date,
min_history_days=min_history_days
)
# Generate insights from patterns
insights = self._generate_insights_from_patterns(
patterns,
tenant_id,
inventory_product_id
)
# Add insights to the result
patterns['insights'] = insights
logger.info(
"Demand pattern analysis received from sales service",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
insights_generated=len(insights)
)
return patterns
except Exception as e:
logger.error(
"Error getting demand patterns from sales service",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
error=str(e),
exc_info=True
)
return {
'analyzed_at': datetime.utcnow().isoformat(),
'history_days': 0,
'insights': [],
'patterns': {},
'trend_analysis': {},
'seasonal_factors': {},
'statistics': {},
'error': str(e)
}
def _generate_insights_from_patterns(
self,
patterns: Dict[str, Any],
tenant_id: str,
inventory_product_id: str
) -> List[Dict[str, Any]]:
"""
Generate actionable insights from demand patterns provided by sales service.
Args:
patterns: Demand patterns from sales service
tenant_id: Tenant identifier
inventory_product_id: Product identifier
Returns:
List of insights for AI Insights Service
"""
insights = []
# Check if there was an error in pattern analysis
if 'error' in patterns:
return insights
trend = patterns.get('trend_analysis', {})
stats = patterns.get('statistics', {})
seasonal = patterns.get('seasonal_factors', {})
# Trend insight
if trend.get('is_increasing'):
insights.append({
'type': 'insight',
'priority': 'medium',
'category': 'forecasting',
'title': 'Increasing Demand Trend Detected',
'description': f"Product shows {trend.get('direction', 'increasing')} demand trend. Consider increasing inventory levels.",
'impact_type': 'demand_increase',
'impact_value': abs(trend.get('correlation', 0) * 100),
'impact_unit': 'percent',
'confidence': min(int(abs(trend.get('correlation', 0)) * 100), 95),
'metrics_json': trend,
'actionable': True,
'recommendation_actions': [
{
'label': 'Increase Safety Stock',
'action': 'increase_safety_stock',
'params': {'product_id': inventory_product_id, 'factor': 1.2}
}
]
})
elif trend.get('is_decreasing'):
insights.append({
'type': 'insight',
'priority': 'low',
'category': 'forecasting',
'title': 'Decreasing Demand Trend Detected',
'description': f"Product shows {trend.get('direction', 'decreasing')} demand trend. Consider reviewing inventory strategy.",
'impact_type': 'demand_decrease',
'impact_value': abs(trend.get('correlation', 0) * 100),
'impact_unit': 'percent',
'confidence': min(int(abs(trend.get('correlation', 0)) * 100), 95),
'metrics_json': trend,
'actionable': True,
'recommendation_actions': [
{
'label': 'Review Inventory Levels',
'action': 'review_inventory',
'params': {'product_id': inventory_product_id}
}
]
})
# Volatility insight
cv = stats.get('coefficient_of_variation', 0)
if cv > 0.5:
insights.append({
'type': 'alert',
'priority': 'medium',
'category': 'forecasting',
'title': 'High Demand Variability Detected',
'description': f'Product has high demand variability (CV: {cv:.2f}). Consider dynamic safety stock levels.',
'impact_type': 'demand_variability',
'impact_value': round(cv * 100, 1),
'impact_unit': 'percent',
'confidence': 85,
'metrics_json': stats,
'actionable': True,
'recommendation_actions': [
{
'label': 'Enable Dynamic Safety Stock',
'action': 'enable_dynamic_safety_stock',
'params': {'product_id': inventory_product_id}
}
]
})
# Seasonal pattern insight
peak_ratio = seasonal.get('peak_ratio', 1.0)
if peak_ratio > 1.5:
pattern_data = patterns.get('patterns', {})
peak_day = pattern_data.get('peak_day', 0)
low_day = pattern_data.get('low_day', 0)
insights.append({
'type': 'insight',
'priority': 'medium',
'category': 'forecasting',
'title': 'Strong Weekly Pattern Detected',
'description': f'Demand is {peak_ratio:.1f}x higher on day {peak_day} compared to day {low_day}. Adjust production schedule accordingly.',
'impact_type': 'seasonal_pattern',
'impact_value': round((peak_ratio - 1) * 100, 1),
'impact_unit': 'percent',
'confidence': 80,
'metrics_json': {**seasonal, **pattern_data},
'actionable': True,
'recommendation_actions': [
{
'label': 'Adjust Production Schedule',
'action': 'adjust_production',
'params': {'product_id': inventory_product_id, 'pattern': 'weekly'}
}
]
})
return insights
async def _get_dynamic_rules(self, tenant_id: str, inventory_product_id: str, rule_type: str) -> Dict[str, float]:
"""
Fetch learned dynamic rules from AI Insights Service.
Args:
tenant_id: Tenant UUID
inventory_product_id: Product UUID
rule_type: Type of rules (weather, temporal, holiday, etc.)
Returns:
Dictionary of learned rules with factors
"""
try:
from uuid import UUID
# Fetch latest rules insight for this product
insights = await self.ai_insights_client.get_insights(
tenant_id=UUID(tenant_id),
filters={
'category': 'forecasting',
'actionable_only': False,
'page_size': 100
}
)
if not insights or 'items' not in insights:
return {}
# Find the most recent rules insight for this product
for insight in insights['items']:
if insight.get('source_model') == 'dynamic_rules_engine':
metrics = insight.get('metrics_json', {})
if metrics.get('inventory_product_id') == inventory_product_id:
rules_data = metrics.get('rules', {})
return rules_data.get(rule_type, {})
return {}
except Exception as e:
logger.warning(f"Failed to fetch dynamic rules: {e}")
return {}
async def generate_forecast_with_repository(self, tenant_id: str, inventory_product_id: str,
forecast_date: date, model_id: str = None) -> Dict[str, Any]:
"""Generate forecast with repository integration"""
try:
# This would integrate with repositories for model loading and caching
# For now, we'll implement basic forecasting logic using the forecaster's methods
# This is a simplified approach - in production, this would use repositories
# For now, prepare minimal features for prediction
features = {
'date': forecast_date.isoformat(),
'day_of_week': forecast_date.weekday(),
'is_weekend': 1 if forecast_date.weekday() >= 5 else 0,
'is_holiday': 0, # Would come from calendar service in real implementation
# Add default weather values if needed
'temperature': 20.0,
'precipitation': 0.0,
}
# This is a placeholder - in a full implementation, we would:
# 1. Load the appropriate model from repository
# 2. Use historical data to make prediction
# 3. Apply business rules
# For now, return the structure with basic info
# For more realistic implementation, we'd use self.predict_demand method
# but that requires a model object which needs to be loaded
return {
"tenant_id": tenant_id,
"inventory_product_id": inventory_product_id,
"forecast_date": forecast_date.isoformat(),
"prediction": 10.0, # Placeholder value - in reality would be calculated
"confidence_interval": {"lower": 8.0, "upper": 12.0}, # Placeholder values
"status": "completed",
"repository_integration": True,
"forecast_method": "placeholder"
}
except Exception as e:
logger.error("Forecast generation failed", error=str(e))
raise
class BakeryBusinessRules:
"""
Business rules for Spanish bakeries
Applies domain-specific adjustments to predictions
Supports both dynamic learned rules and hardcoded fallbacks
"""
def __init__(self, use_dynamic_rules=False, ai_insights_client=None):
self.use_dynamic_rules = use_dynamic_rules
self.ai_insights_client = ai_insights_client
self.rules_cache = {}
async def apply_rules(self, prediction: Dict[str, float], features: Dict[str, Any],
business_type: str, tenant_id: str = None, inventory_product_id: str = None) -> Dict[str, float]:
"""Apply all business rules to prediction (dynamic or hardcoded)"""
adjusted_prediction = prediction.copy()
# Apply weather rules
adjusted_prediction = await self._apply_weather_rules(
adjusted_prediction, features, tenant_id, inventory_product_id
)
# Apply time-based rules
adjusted_prediction = await self._apply_time_rules(
adjusted_prediction, features, tenant_id, inventory_product_id
)
# Apply business type rules
adjusted_prediction = self._apply_business_type_rules(adjusted_prediction, business_type)
# Apply Spanish-specific rules
adjusted_prediction = self._apply_spanish_rules(adjusted_prediction, features)
return adjusted_prediction
async def _get_dynamic_rules(self, tenant_id: str, inventory_product_id: str, rule_type: str) -> Dict[str, float]:
"""
Fetch learned dynamic rules from AI Insights Service.
Args:
tenant_id: Tenant UUID
inventory_product_id: Product UUID
rule_type: Type of rules (weather, temporal, holiday, etc.)
Returns:
Dictionary of learned rules with factors
"""
# Check cache first
cache_key = f"{tenant_id}:{inventory_product_id}:{rule_type}"
if cache_key in self.rules_cache:
return self.rules_cache[cache_key]
try:
from uuid import UUID
if not self.ai_insights_client:
return {}
# Fetch latest rules insight for this product
insights = await self.ai_insights_client.get_insights(
tenant_id=UUID(tenant_id),
filters={
'category': 'forecasting',
'actionable_only': False,
'page_size': 100
}
)
if not insights or 'items' not in insights:
return {}
# Find the most recent rules insight for this product
for insight in insights['items']:
if insight.get('source_model') == 'dynamic_rules_engine':
metrics = insight.get('metrics_json', {})
if metrics.get('inventory_product_id') == inventory_product_id:
rules_data = metrics.get('rules', {})
result = rules_data.get(rule_type, {})
# Cache the result
self.rules_cache[cache_key] = result
return result
return {}
except Exception as e:
logger.warning(f"Failed to fetch dynamic rules: {e}")
return {}
async def _apply_weather_rules(self, prediction: Dict[str, float],
features: Dict[str, Any],
tenant_id: str = None,
inventory_product_id: str = None) -> Dict[str, float]:
"""Apply weather-based business rules (dynamic or hardcoded fallback)"""
if self.use_dynamic_rules and tenant_id and inventory_product_id:
try:
# Fetch dynamic weather rules
rules = await self._get_dynamic_rules(tenant_id, inventory_product_id, 'weather')
# Apply learned rain impact
precipitation = features.get('precipitation', 0)
if precipitation > 0:
rain_factor = rules.get('rain_factor', settings.RAIN_IMPACT_FACTOR)
prediction["yhat"] *= rain_factor
prediction["yhat_lower"] *= rain_factor
prediction["yhat_upper"] *= rain_factor
# Apply learned temperature impact
temperature = features.get('temperature')
if temperature is not None:
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
hot_factor = rules.get('temperature_hot_factor', 0.9)
prediction["yhat"] *= hot_factor
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
cold_factor = rules.get('temperature_cold_factor', 1.1)
prediction["yhat"] *= cold_factor
except Exception as e:
logger.warning(f"Failed to apply dynamic weather rules, using fallback: {e}")
# Fallback to hardcoded
precipitation = features.get('precipitation', 0)
if precipitation > 0:
prediction["yhat"] *= settings.RAIN_IMPACT_FACTOR
prediction["yhat_lower"] *= settings.RAIN_IMPACT_FACTOR
prediction["yhat_upper"] *= settings.RAIN_IMPACT_FACTOR
temperature = features.get('temperature')
if temperature is not None:
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
prediction["yhat"] *= 0.9
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
prediction["yhat"] *= 1.1
else:
# Use hardcoded rules
precipitation = features.get('precipitation', 0)
if precipitation > 0:
rain_factor = settings.RAIN_IMPACT_FACTOR
prediction["yhat"] *= rain_factor
prediction["yhat_lower"] *= rain_factor
prediction["yhat_upper"] *= rain_factor
temperature = features.get('temperature')
if temperature is not None:
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
prediction["yhat"] *= 0.9
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
prediction["yhat"] *= 1.1
return prediction
async def _apply_time_rules(self, prediction: Dict[str, float],
features: Dict[str, Any],
tenant_id: str = None,
inventory_product_id: str = None) -> Dict[str, float]:
"""Apply time-based business rules (dynamic or hardcoded fallback)"""
if self.use_dynamic_rules and tenant_id and inventory_product_id:
try:
# Fetch dynamic temporal rules
rules = await self._get_dynamic_rules(tenant_id, inventory_product_id, 'temporal')
# Apply learned weekend adjustment
if features.get('is_weekend', False):
weekend_factor = rules.get('weekend_factor', settings.WEEKEND_ADJUSTMENT_FACTOR)
prediction["yhat"] *= weekend_factor
prediction["yhat_lower"] *= weekend_factor
prediction["yhat_upper"] *= weekend_factor
# Apply learned holiday adjustment
if features.get('is_holiday', False):
holiday_factor = rules.get('holiday_factor', settings.HOLIDAY_ADJUSTMENT_FACTOR)
prediction["yhat"] *= holiday_factor
prediction["yhat_lower"] *= holiday_factor
prediction["yhat_upper"] *= holiday_factor
except Exception as e:
logger.warning(f"Failed to apply dynamic time rules, using fallback: {e}")
# Fallback to hardcoded
if features.get('is_weekend', False):
prediction["yhat"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
prediction["yhat_lower"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
prediction["yhat_upper"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
if features.get('is_holiday', False):
prediction["yhat"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
prediction["yhat_lower"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
prediction["yhat_upper"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
else:
# Use hardcoded rules
if features.get('is_weekend', False):
weekend_factor = settings.WEEKEND_ADJUSTMENT_FACTOR
prediction["yhat"] *= weekend_factor
prediction["yhat_lower"] *= weekend_factor
prediction["yhat_upper"] *= weekend_factor
if features.get('is_holiday', False):
holiday_factor = settings.HOLIDAY_ADJUSTMENT_FACTOR
prediction["yhat"] *= holiday_factor
prediction["yhat_lower"] *= holiday_factor
prediction["yhat_upper"] *= holiday_factor
return prediction
def _apply_business_type_rules(self, prediction: Dict[str, float],
business_type: str) -> Dict[str, float]:
"""Apply business type specific rules"""
if business_type == "central_workshop":
# Central workshops have more stable demand
uncertainty_reduction = 0.8
center = prediction["yhat"]
lower = prediction["yhat_lower"]
upper = prediction["yhat_upper"]
# Reduce uncertainty band
new_range = (upper - lower) * uncertainty_reduction
prediction["yhat_lower"] = center - (new_range / 2)
prediction["yhat_upper"] = center + (new_range / 2)
return prediction
def _apply_spanish_rules(self, prediction: Dict[str, float],
features: Dict[str, Any]) -> Dict[str, float]:
"""Apply Spanish bakery specific rules"""
# Spanish siesta time considerations
date_str = features.get('date')
if date_str:
try:
current_date = pd.to_datetime(date_str)
day_of_week = current_date.weekday()
# Reduced activity during typical siesta hours (14:00-17:00)
# This affects afternoon sales planning
if day_of_week < 5: # Weekdays
prediction["yhat"] *= 0.95 # Slight reduction for siesta effect
except Exception as e:
logger.warning(f"Error processing date in spanish rules: {e}")
else:
logger.warning("Date not provided in features, skipping Spanish rules")
return prediction

View File

@@ -0,0 +1,312 @@
"""
Rules Orchestrator
Coordinates dynamic rules learning, insight posting, and integration with forecasting service
"""
import pandas as pd
from typing import Dict, List, Any, Optional
import structlog
from datetime import datetime
from uuid import UUID
from app.ml.dynamic_rules_engine import DynamicRulesEngine
from app.clients.ai_insights_client import AIInsightsClient
from shared.messaging import UnifiedEventPublisher
logger = structlog.get_logger()
class RulesOrchestrator:
"""
Orchestrates dynamic rules learning and insight generation workflow.
Workflow:
1. Learn dynamic rules from historical data
2. Generate insights comparing learned vs hardcoded rules
3. Post insights to AI Insights Service
4. Provide learned rules for forecasting integration
5. Track rule updates and performance
"""
def __init__(
self,
ai_insights_base_url: str = "http://ai-insights-service:8000",
event_publisher: Optional[UnifiedEventPublisher] = None
):
self.rules_engine = DynamicRulesEngine()
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
self.event_publisher = event_publisher
async def learn_and_post_rules(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
external_data: Optional[pd.DataFrame] = None,
min_samples: int = 10
) -> Dict[str, Any]:
"""
Complete workflow: Learn rules and post insights.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
sales_data: Historical sales data
external_data: Optional weather/events/holidays data
min_samples: Minimum samples for rule learning
Returns:
Workflow results with learned rules and posted insights
"""
logger.info(
"Starting dynamic rules learning workflow",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id
)
# Step 1: Learn all rules from data
rules_results = await self.rules_engine.learn_all_rules(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
sales_data=sales_data,
external_data=external_data,
min_samples=min_samples
)
logger.info(
"Rules learning complete",
insights_generated=len(rules_results['insights']),
rules_learned=len(rules_results['rules'])
)
# Step 2: Enrich insights with tenant_id and product context
enriched_insights = self._enrich_insights(
rules_results['insights'],
tenant_id,
inventory_product_id
)
# Step 3: Post insights to AI Insights Service
if enriched_insights:
post_results = await self.ai_insights_client.create_insights_bulk(
tenant_id=UUID(tenant_id),
insights=enriched_insights
)
logger.info(
"Insights posted to AI Insights Service",
total=post_results['total'],
successful=post_results['successful'],
failed=post_results['failed']
)
else:
post_results = {'total': 0, 'successful': 0, 'failed': 0}
logger.info("No insights to post")
# Step 4: Publish insight events to RabbitMQ
created_insights = post_results.get('created_insights', [])
if created_insights:
product_context = {'inventory_product_id': inventory_product_id}
await self._publish_insight_events(
tenant_id=tenant_id,
insights=created_insights,
product_context=product_context
)
# Step 5: Return comprehensive results
return {
'tenant_id': tenant_id,
'inventory_product_id': inventory_product_id,
'learned_at': rules_results['learned_at'],
'rules': rules_results['rules'],
'insights_generated': len(enriched_insights),
'insights_posted': post_results['successful'],
'insights_failed': post_results['failed'],
'created_insights': post_results.get('created_insights', [])
}
def _enrich_insights(
self,
insights: List[Dict[str, Any]],
tenant_id: str,
inventory_product_id: str
) -> List[Dict[str, Any]]:
"""
Enrich insights with required fields for AI Insights Service.
Args:
insights: Raw insights from rules engine
tenant_id: Tenant identifier
inventory_product_id: Product identifier
Returns:
Enriched insights ready for posting
"""
enriched = []
for insight in insights:
# Add required tenant_id and product context
enriched_insight = insight.copy()
enriched_insight['tenant_id'] = tenant_id
# Add product context to metrics
if 'metrics_json' not in enriched_insight:
enriched_insight['metrics_json'] = {}
enriched_insight['metrics_json']['inventory_product_id'] = inventory_product_id
# Add source metadata
enriched_insight['source_service'] = 'forecasting'
enriched_insight['source_model'] = 'dynamic_rules_engine'
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
enriched.append(enriched_insight)
return enriched
async def get_learned_rules_for_forecasting(
self,
inventory_product_id: str
) -> Dict[str, Any]:
"""
Get learned rules in format ready for forecasting integration.
Args:
inventory_product_id: Product identifier
Returns:
Dictionary with learned multipliers for all rule types
"""
return self.rules_engine.export_rules_for_prophet(inventory_product_id)
def get_rule_multiplier(
self,
inventory_product_id: str,
rule_type: str,
key: str,
default: float = 1.0
) -> float:
"""
Get learned rule multiplier with fallback to default.
Args:
inventory_product_id: Product identifier
rule_type: 'weather', 'holiday', 'event', 'day_of_week', 'month'
key: Condition key
default: Default multiplier if rule not learned
Returns:
Learned multiplier or default
"""
learned = self.rules_engine.get_rule(inventory_product_id, rule_type, key)
return learned if learned is not None else default
async def update_rules_periodically(
self,
tenant_id: str,
inventory_product_id: str,
sales_data: pd.DataFrame,
external_data: Optional[pd.DataFrame] = None
) -> Dict[str, Any]:
"""
Update learned rules with new data (for periodic refresh).
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
sales_data: Updated historical sales data
external_data: Updated external data
Returns:
Update results
"""
logger.info(
"Updating learned rules with new data",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
new_data_points=len(sales_data)
)
# Re-learn rules with updated data
results = await self.learn_and_post_rules(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
sales_data=sales_data,
external_data=external_data
)
logger.info(
"Rules update complete",
insights_posted=results['insights_posted']
)
return results
async def _publish_insight_events(self, tenant_id, insights, product_context=None):
"""
Publish insight events to RabbitMQ for alert processing.
Args:
tenant_id: Tenant identifier
insights: List of created insights
product_context: Additional context about the product
"""
if not self.event_publisher:
logger.warning("No event publisher available for business rules insights")
return
for insight in insights:
# Determine severity based on confidence and priority
confidence = insight.get('confidence', 0)
priority = insight.get('priority', 'medium')
# Map priority to severity, with confidence as tiebreaker
if priority == 'critical' or (priority == 'high' and confidence >= 70):
severity = 'high'
elif priority == 'high' or (priority == 'medium' and confidence >= 80):
severity = 'medium'
else:
severity = 'low'
# Prepare the event data
event_data = {
'insight_id': insight.get('id'),
'type': insight.get('type'),
'title': insight.get('title'),
'description': insight.get('description'),
'category': insight.get('category'),
'priority': insight.get('priority'),
'confidence': confidence,
'recommendation': insight.get('recommendation_actions', []),
'impact_type': insight.get('impact_type'),
'impact_value': insight.get('impact_value'),
'inventory_product_id': product_context.get('inventory_product_id') if product_context else None,
'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),
'source_service': 'forecasting',
'source_model': 'dynamic_rules_engine'
}
try:
await self.event_publisher.publish_recommendation(
event_type='ai_business_rule',
tenant_id=tenant_id,
severity=severity,
data=event_data
)
logger.info(
"Published business rules insight event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
severity=severity
)
except Exception as e:
logger.error(
"Failed to publish business rules insight event",
tenant_id=tenant_id,
insight_id=insight.get('id'),
error=str(e)
)
async def close(self):
"""Close HTTP client connections."""
await self.ai_insights_client.close()

View File

@@ -0,0 +1,385 @@
"""
Scenario Planning System
What-if analysis for demand forecasting
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional
from datetime import datetime, date, timedelta
import structlog
from enum import Enum
logger = structlog.get_logger()
class ScenarioType(str, Enum):
"""Types of scenarios"""
BASELINE = "baseline"
OPTIMISTIC = "optimistic"
PESSIMISTIC = "pessimistic"
CUSTOM = "custom"
PROMOTION = "promotion"
EVENT = "event"
WEATHER = "weather"
PRICE_CHANGE = "price_change"
class ScenarioPlanner:
"""
Scenario planning for demand forecasting.
Allows testing "what-if" scenarios:
- What if we run a promotion?
- What if there's a local festival?
- What if weather is unusually bad?
- What if we change prices?
"""
def __init__(self, base_forecaster=None):
"""
Initialize scenario planner.
Args:
base_forecaster: Base forecaster to use for baseline predictions
"""
self.base_forecaster = base_forecaster
async def create_scenario(
self,
tenant_id: str,
inventory_product_id: str,
scenario_name: str,
scenario_type: ScenarioType,
start_date: date,
end_date: date,
adjustments: Dict[str, Any]
) -> Dict[str, Any]:
"""
Create a forecast scenario with adjustments.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
scenario_name: Name for the scenario
scenario_type: Type of scenario
start_date: Scenario start date
end_date: Scenario end date
adjustments: Dictionary of adjustments to apply
Returns:
Scenario forecast results
"""
logger.info(
"Creating forecast scenario",
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
scenario_name=scenario_name,
scenario_type=scenario_type
)
# Generate baseline forecast first
baseline_forecast = await self._generate_baseline_forecast(
tenant_id=tenant_id,
inventory_product_id=inventory_product_id,
start_date=start_date,
end_date=end_date
)
# Apply scenario adjustments
scenario_forecast = self._apply_scenario_adjustments(
baseline_forecast=baseline_forecast,
adjustments=adjustments,
scenario_type=scenario_type
)
# Calculate impact
impact_analysis = self._calculate_scenario_impact(
baseline_forecast=baseline_forecast,
scenario_forecast=scenario_forecast
)
return {
'scenario_id': f"scenario_{tenant_id}_{inventory_product_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
'scenario_name': scenario_name,
'scenario_type': scenario_type,
'tenant_id': tenant_id,
'inventory_product_id': inventory_product_id,
'date_range': {
'start': start_date.isoformat(),
'end': end_date.isoformat()
},
'baseline_forecast': baseline_forecast,
'scenario_forecast': scenario_forecast,
'impact_analysis': impact_analysis,
'adjustments_applied': adjustments,
'created_at': datetime.now().isoformat()
}
async def compare_scenarios(
self,
scenarios: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Compare multiple scenarios side-by-side.
Args:
scenarios: List of scenario results from create_scenario()
Returns:
Comparison analysis
"""
if len(scenarios) < 2:
return {'error': 'Need at least 2 scenarios to compare'}
comparison = {
'scenarios_compared': len(scenarios),
'scenario_names': [s['scenario_name'] for s in scenarios],
'comparison_metrics': {}
}
# Extract total demand for each scenario
for scenario in scenarios:
scenario_name = scenario['scenario_name']
scenario_forecast = scenario['scenario_forecast']
total_demand = sum(f['predicted_demand'] for f in scenario_forecast)
comparison['comparison_metrics'][scenario_name] = {
'total_demand': total_demand,
'avg_daily_demand': total_demand / len(scenario_forecast) if scenario_forecast else 0,
'peak_demand': max(f['predicted_demand'] for f in scenario_forecast) if scenario_forecast else 0
}
# Determine best and worst scenarios
total_demands = {
name: metrics['total_demand']
for name, metrics in comparison['comparison_metrics'].items()
}
comparison['best_scenario'] = max(total_demands, key=total_demands.get)
comparison['worst_scenario'] = min(total_demands, key=total_demands.get)
comparison['demand_range'] = {
'min': min(total_demands.values()),
'max': max(total_demands.values()),
'spread': max(total_demands.values()) - min(total_demands.values())
}
return comparison
async def _generate_baseline_forecast(
self,
tenant_id: str,
inventory_product_id: str,
start_date: date,
end_date: date
) -> List[Dict[str, Any]]:
"""
Generate baseline forecast without adjustments.
Args:
tenant_id: Tenant identifier
inventory_product_id: Product identifier
start_date: Start date
end_date: End date
Returns:
List of daily forecasts
"""
# Generate date range
dates = []
current_date = start_date
while current_date <= end_date:
dates.append(current_date)
current_date += timedelta(days=1)
# Placeholder forecast (in real implementation, call forecasting service)
baseline = []
for forecast_date in dates:
baseline.append({
'date': forecast_date.isoformat(),
'predicted_demand': 100, # Placeholder
'confidence_lower': 80,
'confidence_upper': 120
})
return baseline
def _apply_scenario_adjustments(
self,
baseline_forecast: List[Dict[str, Any]],
adjustments: Dict[str, Any],
scenario_type: ScenarioType
) -> List[Dict[str, Any]]:
"""
Apply adjustments to baseline forecast.
Args:
baseline_forecast: Baseline forecast data
adjustments: Adjustments to apply
scenario_type: Type of scenario
Returns:
Adjusted forecast
"""
scenario_forecast = []
for day_forecast in baseline_forecast:
adjusted_forecast = day_forecast.copy()
# Apply different adjustment types
if 'demand_multiplier' in adjustments:
# Multiply demand by factor
multiplier = adjustments['demand_multiplier']
adjusted_forecast['predicted_demand'] *= multiplier
adjusted_forecast['confidence_lower'] *= multiplier
adjusted_forecast['confidence_upper'] *= multiplier
if 'demand_offset' in adjustments:
# Add/subtract fixed amount
offset = adjustments['demand_offset']
adjusted_forecast['predicted_demand'] += offset
adjusted_forecast['confidence_lower'] += offset
adjusted_forecast['confidence_upper'] += offset
if 'event_impact' in adjustments:
# Apply event-specific impact
event_multiplier = adjustments['event_impact']
adjusted_forecast['predicted_demand'] *= event_multiplier
if 'weather_impact' in adjustments:
# Apply weather adjustments
weather_factor = adjustments['weather_impact']
adjusted_forecast['predicted_demand'] *= weather_factor
if 'price_elasticity' in adjustments and 'price_change_percent' in adjustments:
# Apply price elasticity
elasticity = adjustments['price_elasticity']
price_change = adjustments['price_change_percent']
demand_change = -elasticity * price_change # Negative correlation
adjusted_forecast['predicted_demand'] *= (1 + demand_change)
# Ensure non-negative demand
adjusted_forecast['predicted_demand'] = max(0, adjusted_forecast['predicted_demand'])
adjusted_forecast['confidence_lower'] = max(0, adjusted_forecast['confidence_lower'])
scenario_forecast.append(adjusted_forecast)
return scenario_forecast
def _calculate_scenario_impact(
self,
baseline_forecast: List[Dict[str, Any]],
scenario_forecast: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Calculate impact of scenario vs baseline.
Args:
baseline_forecast: Baseline forecast
scenario_forecast: Scenario forecast
Returns:
Impact analysis
"""
baseline_total = sum(f['predicted_demand'] for f in baseline_forecast)
scenario_total = sum(f['predicted_demand'] for f in scenario_forecast)
difference = scenario_total - baseline_total
percent_change = (difference / baseline_total * 100) if baseline_total > 0 else 0
return {
'baseline_total_demand': baseline_total,
'scenario_total_demand': scenario_total,
'absolute_difference': difference,
'percent_change': percent_change,
'impact_category': self._categorize_impact(percent_change),
'days_analyzed': len(baseline_forecast)
}
def _categorize_impact(self, percent_change: float) -> str:
"""Categorize impact magnitude"""
if abs(percent_change) < 5:
return "minimal"
elif abs(percent_change) < 15:
return "moderate"
elif abs(percent_change) < 30:
return "significant"
else:
return "major"
def generate_predefined_scenarios(
self,
base_scenario: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""
Generate common predefined scenarios for comparison.
Args:
base_scenario: Base scenario parameters
Returns:
List of scenario configurations
"""
scenarios = []
# Baseline scenario
scenarios.append({
'scenario_name': 'Baseline',
'scenario_type': ScenarioType.BASELINE,
'adjustments': {}
})
# Optimistic scenario
scenarios.append({
'scenario_name': 'Optimistic',
'scenario_type': ScenarioType.OPTIMISTIC,
'adjustments': {
'demand_multiplier': 1.2, # 20% increase
'description': '+20% demand increase'
}
})
# Pessimistic scenario
scenarios.append({
'scenario_name': 'Pessimistic',
'scenario_type': ScenarioType.PESSIMISTIC,
'adjustments': {
'demand_multiplier': 0.8, # 20% decrease
'description': '-20% demand decrease'
}
})
# Promotion scenario
scenarios.append({
'scenario_name': 'Promotion Campaign',
'scenario_type': ScenarioType.PROMOTION,
'adjustments': {
'demand_multiplier': 1.5, # 50% increase
'description': '50% promotion boost'
}
})
# Bad weather scenario
scenarios.append({
'scenario_name': 'Bad Weather',
'scenario_type': ScenarioType.WEATHER,
'adjustments': {
'weather_impact': 0.7, # 30% decrease
'description': 'Bad weather reduces foot traffic'
}
})
# Price increase scenario
scenarios.append({
'scenario_name': 'Price Increase 10%',
'scenario_type': ScenarioType.PRICE_CHANGE,
'adjustments': {
'price_elasticity': 1.2, # Elastic demand
'price_change_percent': 0.10, # 10% price increase
'description': '10% price increase with elastic demand'
}
})
return scenarios