Initial commit - production deployment
This commit is contained in:
11
services/forecasting/app/ml/__init__.py
Normal file
11
services/forecasting/app/ml/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
ML Components for Forecasting
|
||||
Machine learning prediction and forecasting components
|
||||
"""
|
||||
|
||||
from .predictor import BakeryPredictor, BakeryForecaster
|
||||
|
||||
__all__ = [
|
||||
"BakeryPredictor",
|
||||
"BakeryForecaster"
|
||||
]
|
||||
@@ -0,0 +1,393 @@
|
||||
"""
|
||||
Business Rules Insights Orchestrator
|
||||
Coordinates business rules optimization and insight posting
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Any, Optional
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add shared clients to path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
|
||||
from shared.clients.ai_insights_client import AIInsightsClient
|
||||
from shared.messaging import UnifiedEventPublisher
|
||||
|
||||
from app.ml.dynamic_rules_engine import DynamicRulesEngine
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class BusinessRulesInsightsOrchestrator:
|
||||
"""
|
||||
Orchestrates business rules analysis and insight generation workflow.
|
||||
|
||||
Workflow:
|
||||
1. Analyze dynamic business rule performance
|
||||
2. Generate insights for rule optimization
|
||||
3. Post insights to AI Insights Service
|
||||
4. Publish recommendation events to RabbitMQ
|
||||
5. Provide rule optimization for forecasting
|
||||
6. Track rule effectiveness and improvements
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ai_insights_base_url: str = "http://ai-insights-service:8000",
|
||||
event_publisher: Optional[UnifiedEventPublisher] = None
|
||||
):
|
||||
self.rules_engine = DynamicRulesEngine()
|
||||
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
|
||||
self.event_publisher = event_publisher
|
||||
|
||||
async def analyze_and_post_business_rules_insights(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_samples: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Complete workflow: Analyze business rules and post insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Historical sales data
|
||||
min_samples: Minimum samples for rule analysis
|
||||
|
||||
Returns:
|
||||
Workflow results with analysis and posted insights
|
||||
"""
|
||||
logger.info(
|
||||
"Starting business rules analysis workflow",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
samples=len(sales_data)
|
||||
)
|
||||
|
||||
# Step 1: Learn and analyze rules
|
||||
rules_results = await self.rules_engine.learn_all_rules(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
external_data=None,
|
||||
min_samples=min_samples
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Business rules analysis complete",
|
||||
insights_generated=len(rules_results.get('insights', [])),
|
||||
rules_learned=len(rules_results.get('rules', {}))
|
||||
)
|
||||
|
||||
# Step 2: Enrich insights with tenant_id and product context
|
||||
enriched_insights = self._enrich_insights(
|
||||
rules_results.get('insights', []),
|
||||
tenant_id,
|
||||
inventory_product_id
|
||||
)
|
||||
|
||||
# Step 3: Post insights to AI Insights Service
|
||||
if enriched_insights:
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_insights
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Business rules insights posted to AI Insights Service",
|
||||
inventory_product_id=inventory_product_id,
|
||||
total=post_results['total'],
|
||||
successful=post_results['successful'],
|
||||
failed=post_results['failed']
|
||||
)
|
||||
else:
|
||||
post_results = {'total': 0, 'successful': 0, 'failed': 0}
|
||||
logger.info("No insights to post for product", inventory_product_id=inventory_product_id)
|
||||
|
||||
# Step 4: Publish insight events to RabbitMQ
|
||||
created_insights = post_results.get('created_insights', [])
|
||||
if created_insights:
|
||||
product_context = {'inventory_product_id': inventory_product_id}
|
||||
await self._publish_insight_events(
|
||||
tenant_id=tenant_id,
|
||||
insights=created_insights,
|
||||
product_context=product_context
|
||||
)
|
||||
|
||||
# Step 5: Return comprehensive results
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'learned_at': rules_results['learned_at'],
|
||||
'rules': rules_results.get('rules', {}),
|
||||
'insights_generated': len(enriched_insights),
|
||||
'insights_posted': post_results['successful'],
|
||||
'insights_failed': post_results['failed'],
|
||||
'created_insights': post_results.get('created_insights', [])
|
||||
}
|
||||
|
||||
def _enrich_insights(
|
||||
self,
|
||||
insights: List[Dict[str, Any]],
|
||||
tenant_id: str,
|
||||
inventory_product_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Enrich insights with required fields for AI Insights Service.
|
||||
|
||||
Args:
|
||||
insights: Raw insights from rules engine
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Enriched insights ready for posting
|
||||
"""
|
||||
enriched = []
|
||||
|
||||
for insight in insights:
|
||||
# Add required tenant_id
|
||||
enriched_insight = insight.copy()
|
||||
enriched_insight['tenant_id'] = tenant_id
|
||||
|
||||
# Add product context to metrics
|
||||
if 'metrics_json' not in enriched_insight:
|
||||
enriched_insight['metrics_json'] = {}
|
||||
|
||||
enriched_insight['metrics_json']['inventory_product_id'] = inventory_product_id
|
||||
|
||||
# Add source metadata
|
||||
enriched_insight['source_service'] = 'forecasting'
|
||||
enriched_insight['source_model'] = 'dynamic_rules_engine'
|
||||
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
enriched.append(enriched_insight)
|
||||
|
||||
return enriched
|
||||
|
||||
async def analyze_all_business_rules(
|
||||
self,
|
||||
tenant_id: str,
|
||||
products_data: Dict[str, pd.DataFrame],
|
||||
min_samples: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze all products for business rules optimization and generate comparative insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
products_data: Dict of {inventory_product_id: sales_data DataFrame}
|
||||
min_samples: Minimum samples for rule analysis
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis with rule optimization insights
|
||||
"""
|
||||
logger.info(
|
||||
"Analyzing business rules for all products",
|
||||
tenant_id=tenant_id,
|
||||
products=len(products_data)
|
||||
)
|
||||
|
||||
all_results = []
|
||||
total_insights_posted = 0
|
||||
|
||||
# Analyze each product
|
||||
for inventory_product_id, sales_data in products_data.items():
|
||||
try:
|
||||
results = await self.analyze_and_post_business_rules_insights(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
min_samples=min_samples
|
||||
)
|
||||
|
||||
all_results.append(results)
|
||||
total_insights_posted += results['insights_posted']
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error analyzing business rules for product",
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
# Generate summary insight
|
||||
if total_insights_posted > 0:
|
||||
summary_insight = self._generate_portfolio_summary_insight(
|
||||
tenant_id, all_results
|
||||
)
|
||||
|
||||
if summary_insight:
|
||||
enriched_summary = self._enrich_insights(
|
||||
[summary_insight], tenant_id, 'all_products'
|
||||
)
|
||||
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_summary
|
||||
)
|
||||
|
||||
total_insights_posted += post_results['successful']
|
||||
|
||||
logger.info(
|
||||
"All business rules analysis complete",
|
||||
tenant_id=tenant_id,
|
||||
products_analyzed=len(all_results),
|
||||
total_insights_posted=total_insights_posted
|
||||
)
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'products_analyzed': len(all_results),
|
||||
'product_results': all_results,
|
||||
'total_insights_posted': total_insights_posted
|
||||
}
|
||||
|
||||
def _generate_portfolio_summary_insight(
|
||||
self,
|
||||
tenant_id: str,
|
||||
all_results: List[Dict[str, Any]]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Generate portfolio-level business rules summary insight.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
all_results: All product analysis results
|
||||
|
||||
Returns:
|
||||
Summary insight or None
|
||||
"""
|
||||
if not all_results:
|
||||
return None
|
||||
|
||||
# Calculate summary statistics
|
||||
total_products = len(all_results)
|
||||
total_rules = sum(len(r.get('rules', {})) for r in all_results)
|
||||
|
||||
# Count products with significant rule improvements
|
||||
significant_improvements = sum(1 for r in all_results
|
||||
if any('improvement' in str(v).lower() for v in r.get('rules', {}).values()))
|
||||
|
||||
return {
|
||||
'type': 'recommendation',
|
||||
'priority': 'high' if significant_improvements > total_products * 0.3 else 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Business Rule Optimization: {total_products} Products Analyzed',
|
||||
'description': f'Learned {total_rules} dynamic rules across {total_products} products. Identified {significant_improvements} products with significant rule improvements.',
|
||||
'impact_type': 'operational_efficiency',
|
||||
'impact_value': total_rules,
|
||||
'impact_unit': 'rules',
|
||||
'confidence': 80,
|
||||
'metrics_json': {
|
||||
'total_products': total_products,
|
||||
'total_rules': total_rules,
|
||||
'significant_improvements': significant_improvements,
|
||||
'rules_per_product': round(total_rules / total_products, 2)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Review Learned Rules',
|
||||
'action': 'review_business_rules',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
},
|
||||
{
|
||||
'label': 'Implement Optimized Rules',
|
||||
'action': 'implement_business_rules',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
|
||||
async def get_learned_rules(
|
||||
self,
|
||||
inventory_product_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get cached learned rules for a product.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Learned rules or None if not analyzed
|
||||
"""
|
||||
return self.rules_engine.get_all_rules(inventory_product_id)
|
||||
|
||||
async def _publish_insight_events(self, tenant_id, insights, product_context=None):
|
||||
"""
|
||||
Publish insight events to RabbitMQ for alert processing.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
insights: List of created insights
|
||||
product_context: Additional context about the product
|
||||
"""
|
||||
if not self.event_publisher:
|
||||
logger.warning("No event publisher available for business rules insights")
|
||||
return
|
||||
|
||||
for insight in insights:
|
||||
# Determine severity based on confidence and priority
|
||||
confidence = insight.get('confidence', 0)
|
||||
priority = insight.get('priority', 'medium')
|
||||
|
||||
# Map priority to severity, with confidence as tiebreaker
|
||||
if priority == 'critical' or (priority == 'high' and confidence >= 70):
|
||||
severity = 'high'
|
||||
elif priority == 'high' or (priority == 'medium' and confidence >= 80):
|
||||
severity = 'medium'
|
||||
else:
|
||||
severity = 'low'
|
||||
|
||||
# Prepare the event data
|
||||
event_data = {
|
||||
'insight_id': insight.get('id'),
|
||||
'type': insight.get('type'),
|
||||
'title': insight.get('title'),
|
||||
'description': insight.get('description'),
|
||||
'category': insight.get('category'),
|
||||
'priority': insight.get('priority'),
|
||||
'confidence': confidence,
|
||||
'recommendation': insight.get('recommendation_actions', []),
|
||||
'impact_type': insight.get('impact_type'),
|
||||
'impact_value': insight.get('impact_value'),
|
||||
'inventory_product_id': product_context.get('inventory_product_id') if product_context else None,
|
||||
'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
|
||||
try:
|
||||
await self.event_publisher.publish_recommendation(
|
||||
event_type='ai_business_rule',
|
||||
tenant_id=tenant_id,
|
||||
severity=severity,
|
||||
data=event_data
|
||||
)
|
||||
logger.info(
|
||||
"Published business rules insight event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
severity=severity
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to publish business rules insight event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client connections."""
|
||||
await self.ai_insights_client.close()
|
||||
235
services/forecasting/app/ml/calendar_features.py
Normal file
235
services/forecasting/app/ml/calendar_features.py
Normal file
@@ -0,0 +1,235 @@
|
||||
"""
|
||||
Calendar-based Feature Engineering for Forecasting Service
|
||||
Generates calendar features for future date predictions
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import structlog
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime, date, time, timedelta
|
||||
from app.services.data_client import data_client
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class ForecastCalendarFeatures:
|
||||
"""
|
||||
Generates calendar-based features for future predictions
|
||||
Optimized for forecasting service (future dates only)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.calendar_cache = {} # Cache calendar data per tenant
|
||||
|
||||
async def get_calendar_for_tenant(
|
||||
self,
|
||||
tenant_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached calendar for tenant"""
|
||||
if tenant_id in self.calendar_cache:
|
||||
return self.calendar_cache[tenant_id]
|
||||
|
||||
calendar = await data_client.fetch_tenant_calendar(tenant_id)
|
||||
if calendar:
|
||||
self.calendar_cache[tenant_id] = calendar
|
||||
|
||||
return calendar
|
||||
|
||||
def _is_date_in_holiday_period(
|
||||
self,
|
||||
check_date: date,
|
||||
holiday_periods: List[Dict[str, Any]]
|
||||
) -> tuple[bool, Optional[str]]:
|
||||
"""Check if date is within any holiday period"""
|
||||
for period in holiday_periods:
|
||||
start = datetime.strptime(period["start_date"], "%Y-%m-%d").date()
|
||||
end = datetime.strptime(period["end_date"], "%Y-%m-%d").date()
|
||||
|
||||
if start <= check_date <= end:
|
||||
return True, period["name"]
|
||||
|
||||
return False, None
|
||||
|
||||
def _is_school_hours_active(
|
||||
self,
|
||||
check_datetime: datetime,
|
||||
school_hours: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""Check if datetime falls during school operating hours"""
|
||||
# Only weekdays
|
||||
if check_datetime.weekday() >= 5:
|
||||
return False
|
||||
|
||||
check_time = check_datetime.time()
|
||||
|
||||
# Morning session
|
||||
morning_start = datetime.strptime(
|
||||
school_hours["morning_start"], "%H:%M"
|
||||
).time()
|
||||
morning_end = datetime.strptime(
|
||||
school_hours["morning_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
if morning_start <= check_time <= morning_end:
|
||||
return True
|
||||
|
||||
# Afternoon session if exists
|
||||
if school_hours.get("has_afternoon_session", False):
|
||||
afternoon_start = datetime.strptime(
|
||||
school_hours["afternoon_start"], "%H:%M"
|
||||
).time()
|
||||
afternoon_end = datetime.strptime(
|
||||
school_hours["afternoon_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
if afternoon_start <= check_time <= afternoon_end:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _calculate_school_proximity_intensity(
|
||||
self,
|
||||
check_datetime: datetime,
|
||||
school_hours: Dict[str, Any]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate school proximity impact intensity
|
||||
Returns 0.0-1.0 based on drop-off/pick-up times
|
||||
"""
|
||||
# Only weekdays
|
||||
if check_datetime.weekday() >= 5:
|
||||
return 0.0
|
||||
|
||||
check_time = check_datetime.time()
|
||||
|
||||
morning_start = datetime.strptime(
|
||||
school_hours["morning_start"], "%H:%M"
|
||||
).time()
|
||||
morning_end = datetime.strptime(
|
||||
school_hours["morning_end"], "%H:%M"
|
||||
).time()
|
||||
|
||||
# Morning drop-off peak (30 min before to 15 min after start)
|
||||
drop_off_start = (
|
||||
datetime.combine(date.today(), morning_start) - timedelta(minutes=30)
|
||||
).time()
|
||||
drop_off_end = (
|
||||
datetime.combine(date.today(), morning_start) + timedelta(minutes=15)
|
||||
).time()
|
||||
|
||||
if drop_off_start <= check_time <= drop_off_end:
|
||||
return 1.0 # Peak
|
||||
|
||||
# Morning pick-up peak (15 min before to 30 min after end)
|
||||
pickup_start = (
|
||||
datetime.combine(date.today(), morning_end) - timedelta(minutes=15)
|
||||
).time()
|
||||
pickup_end = (
|
||||
datetime.combine(date.today(), morning_end) + timedelta(minutes=30)
|
||||
).time()
|
||||
|
||||
if pickup_start <= check_time <= pickup_end:
|
||||
return 1.0 # Peak
|
||||
|
||||
# During school hours (moderate)
|
||||
if morning_start <= check_time <= morning_end:
|
||||
return 0.3
|
||||
|
||||
return 0.0
|
||||
|
||||
async def add_calendar_features(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
tenant_id: str,
|
||||
date_column: str = "ds"
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Add calendar features to forecast dataframe
|
||||
|
||||
Args:
|
||||
df: Forecast dataframe with future dates
|
||||
tenant_id: Tenant ID to fetch calendar
|
||||
date_column: Name of date column (default 'ds' for Prophet)
|
||||
|
||||
Returns:
|
||||
DataFrame with calendar features added
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Adding calendar features to forecast",
|
||||
tenant_id=tenant_id,
|
||||
rows=len(df)
|
||||
)
|
||||
|
||||
# Get calendar
|
||||
calendar = await self.get_calendar_for_tenant(tenant_id)
|
||||
|
||||
if not calendar:
|
||||
logger.info(
|
||||
"No calendar available, using zero features",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
df["is_school_holiday"] = 0
|
||||
df["school_hours_active"] = 0
|
||||
df["school_proximity_intensity"] = 0.0
|
||||
return df
|
||||
|
||||
holiday_periods = calendar.get("holiday_periods", [])
|
||||
school_hours = calendar.get("school_hours", {})
|
||||
|
||||
# Initialize feature lists
|
||||
school_holidays = []
|
||||
hours_active = []
|
||||
proximity_intensity = []
|
||||
|
||||
# Process each row
|
||||
for idx, row in df.iterrows():
|
||||
row_date = pd.to_datetime(row[date_column])
|
||||
|
||||
# Check holiday
|
||||
is_holiday, _ = self._is_date_in_holiday_period(
|
||||
row_date.date(),
|
||||
holiday_periods
|
||||
)
|
||||
school_holidays.append(1 if is_holiday else 0)
|
||||
|
||||
# Check school hours and proximity (if datetime has time component)
|
||||
if hasattr(row_date, 'hour'):
|
||||
hours_active.append(
|
||||
1 if self._is_school_hours_active(row_date, school_hours) else 0
|
||||
)
|
||||
proximity_intensity.append(
|
||||
self._calculate_school_proximity_intensity(row_date, school_hours)
|
||||
)
|
||||
else:
|
||||
hours_active.append(0)
|
||||
proximity_intensity.append(0.0)
|
||||
|
||||
# Add features
|
||||
df["is_school_holiday"] = school_holidays
|
||||
df["school_hours_active"] = hours_active
|
||||
df["school_proximity_intensity"] = proximity_intensity
|
||||
|
||||
logger.info(
|
||||
"Calendar features added to forecast",
|
||||
tenant_id=tenant_id,
|
||||
holidays_in_forecast=sum(school_holidays)
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error adding calendar features to forecast",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
# Return with zero features on error
|
||||
df["is_school_holiday"] = 0
|
||||
df["school_hours_active"] = 0
|
||||
df["school_proximity_intensity"] = 0.0
|
||||
return df
|
||||
|
||||
|
||||
# Global instance
|
||||
forecast_calendar_features = ForecastCalendarFeatures()
|
||||
403
services/forecasting/app/ml/demand_insights_orchestrator.py
Normal file
403
services/forecasting/app/ml/demand_insights_orchestrator.py
Normal file
@@ -0,0 +1,403 @@
|
||||
"""
|
||||
Demand Insights Orchestrator
|
||||
Coordinates demand forecasting analysis and insight posting
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Any, Optional
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add shared clients to path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
|
||||
from shared.clients.ai_insights_client import AIInsightsClient
|
||||
from shared.messaging import UnifiedEventPublisher
|
||||
|
||||
from app.ml.predictor import BakeryForecaster
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class DemandInsightsOrchestrator:
|
||||
"""
|
||||
Orchestrates demand forecasting analysis and insight generation workflow.
|
||||
|
||||
Workflow:
|
||||
1. Analyze historical demand patterns from sales data
|
||||
2. Generate insights for demand optimization
|
||||
3. Post insights to AI Insights Service
|
||||
4. Publish recommendation events to RabbitMQ
|
||||
5. Provide demand pattern analysis for forecasting
|
||||
6. Track demand forecasting performance
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ai_insights_base_url: str = "http://ai-insights-service:8000",
|
||||
event_publisher: Optional[UnifiedEventPublisher] = None
|
||||
):
|
||||
self.forecaster = BakeryForecaster()
|
||||
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
|
||||
self.event_publisher = event_publisher
|
||||
|
||||
async def analyze_and_post_demand_insights(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
forecast_horizon_days: int = 30,
|
||||
min_history_days: int = 90
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Complete workflow: Analyze demand and post insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Historical sales data
|
||||
forecast_horizon_days: Days to forecast ahead
|
||||
min_history_days: Minimum days of history required
|
||||
|
||||
Returns:
|
||||
Workflow results with analysis and posted insights
|
||||
"""
|
||||
logger.info(
|
||||
"Starting demand forecasting analysis workflow",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
history_days=len(sales_data)
|
||||
)
|
||||
|
||||
# Step 1: Analyze demand patterns
|
||||
analysis_results = await self.forecaster.analyze_demand_patterns(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
forecast_horizon_days=forecast_horizon_days,
|
||||
min_history_days=min_history_days
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Demand analysis complete",
|
||||
inventory_product_id=inventory_product_id,
|
||||
insights_generated=len(analysis_results.get('insights', []))
|
||||
)
|
||||
|
||||
# Step 2: Enrich insights with tenant_id and product context
|
||||
enriched_insights = self._enrich_insights(
|
||||
analysis_results.get('insights', []),
|
||||
tenant_id,
|
||||
inventory_product_id
|
||||
)
|
||||
|
||||
# Step 3: Post insights to AI Insights Service
|
||||
if enriched_insights:
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_insights
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Demand insights posted to AI Insights Service",
|
||||
inventory_product_id=inventory_product_id,
|
||||
total=post_results['total'],
|
||||
successful=post_results['successful'],
|
||||
failed=post_results['failed']
|
||||
)
|
||||
else:
|
||||
post_results = {'total': 0, 'successful': 0, 'failed': 0}
|
||||
logger.info("No insights to post for product", inventory_product_id=inventory_product_id)
|
||||
|
||||
# Step 4: Publish insight events to RabbitMQ
|
||||
created_insights = post_results.get('created_insights', [])
|
||||
if created_insights:
|
||||
product_context = {'inventory_product_id': inventory_product_id}
|
||||
await self._publish_insight_events(
|
||||
tenant_id=tenant_id,
|
||||
insights=created_insights,
|
||||
product_context=product_context
|
||||
)
|
||||
|
||||
# Step 5: Return comprehensive results
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'analyzed_at': analysis_results['analyzed_at'],
|
||||
'history_days': analysis_results['history_days'],
|
||||
'demand_patterns': analysis_results.get('patterns', {}),
|
||||
'trend_analysis': analysis_results.get('trend_analysis', {}),
|
||||
'seasonal_factors': analysis_results.get('seasonal_factors', {}),
|
||||
'insights_generated': len(enriched_insights),
|
||||
'insights_posted': post_results['successful'],
|
||||
'insights_failed': post_results['failed'],
|
||||
'created_insights': post_results.get('created_insights', [])
|
||||
}
|
||||
|
||||
def _enrich_insights(
|
||||
self,
|
||||
insights: List[Dict[str, Any]],
|
||||
tenant_id: str,
|
||||
inventory_product_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Enrich insights with required fields for AI Insights Service.
|
||||
|
||||
Args:
|
||||
insights: Raw insights from forecaster
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Enriched insights ready for posting
|
||||
"""
|
||||
enriched = []
|
||||
|
||||
for insight in insights:
|
||||
# Add required tenant_id
|
||||
enriched_insight = insight.copy()
|
||||
enriched_insight['tenant_id'] = tenant_id
|
||||
|
||||
# Add product context to metrics
|
||||
if 'metrics_json' not in enriched_insight:
|
||||
enriched_insight['metrics_json'] = {}
|
||||
|
||||
enriched_insight['metrics_json']['inventory_product_id'] = inventory_product_id
|
||||
|
||||
# Add source metadata
|
||||
enriched_insight['source_service'] = 'forecasting'
|
||||
enriched_insight['source_model'] = 'demand_analyzer'
|
||||
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
enriched.append(enriched_insight)
|
||||
|
||||
return enriched
|
||||
|
||||
async def analyze_all_products(
|
||||
self,
|
||||
tenant_id: str,
|
||||
products_data: Dict[str, pd.DataFrame],
|
||||
forecast_horizon_days: int = 30,
|
||||
min_history_days: int = 90
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze all products for a tenant and generate comparative insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
products_data: Dict of {inventory_product_id: sales_data DataFrame}
|
||||
forecast_horizon_days: Days to forecast
|
||||
min_history_days: Minimum history required
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis with product comparison
|
||||
"""
|
||||
logger.info(
|
||||
"Analyzing all products for tenant",
|
||||
tenant_id=tenant_id,
|
||||
products=len(products_data)
|
||||
)
|
||||
|
||||
all_results = []
|
||||
total_insights_posted = 0
|
||||
|
||||
# Analyze each product
|
||||
for inventory_product_id, sales_data in products_data.items():
|
||||
try:
|
||||
results = await self.analyze_and_post_demand_insights(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
forecast_horizon_days=forecast_horizon_days,
|
||||
min_history_days=min_history_days
|
||||
)
|
||||
|
||||
all_results.append(results)
|
||||
total_insights_posted += results['insights_posted']
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error analyzing product",
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
# Generate summary insight
|
||||
if total_insights_posted > 0:
|
||||
summary_insight = self._generate_portfolio_summary_insight(
|
||||
tenant_id, all_results
|
||||
)
|
||||
|
||||
if summary_insight:
|
||||
enriched_summary = self._enrich_insights(
|
||||
[summary_insight], tenant_id, 'all_products'
|
||||
)
|
||||
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_summary
|
||||
)
|
||||
|
||||
total_insights_posted += post_results['successful']
|
||||
|
||||
logger.info(
|
||||
"All products analysis complete",
|
||||
tenant_id=tenant_id,
|
||||
products_analyzed=len(all_results),
|
||||
total_insights_posted=total_insights_posted
|
||||
)
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'products_analyzed': len(all_results),
|
||||
'product_results': all_results,
|
||||
'total_insights_posted': total_insights_posted
|
||||
}
|
||||
|
||||
def _generate_portfolio_summary_insight(
|
||||
self,
|
||||
tenant_id: str,
|
||||
all_results: List[Dict[str, Any]]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Generate portfolio-level summary insight.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
all_results: All product analysis results
|
||||
|
||||
Returns:
|
||||
Summary insight or None
|
||||
"""
|
||||
if not all_results:
|
||||
return None
|
||||
|
||||
# Calculate summary statistics
|
||||
total_products = len(all_results)
|
||||
high_demand_products = sum(1 for r in all_results if r.get('trend_analysis', {}).get('is_increasing', False))
|
||||
|
||||
avg_seasonal_factor = sum(
|
||||
r.get('seasonal_factors', {}).get('peak_ratio', 1.0)
|
||||
for r in all_results
|
||||
if r.get('seasonal_factors', {}).get('peak_ratio')
|
||||
) / max(1, len(all_results))
|
||||
|
||||
return {
|
||||
'type': 'recommendation',
|
||||
'priority': 'medium' if high_demand_products > total_products * 0.5 else 'low',
|
||||
'category': 'forecasting',
|
||||
'title': f'Demand Pattern Summary: {total_products} Products Analyzed',
|
||||
'description': f'Detected {high_demand_products} products with increasing demand trends. Average seasonal peak ratio: {avg_seasonal_factor:.2f}x.',
|
||||
'impact_type': 'demand_optimization',
|
||||
'impact_value': high_demand_products,
|
||||
'impact_unit': 'products',
|
||||
'confidence': 75,
|
||||
'metrics_json': {
|
||||
'total_products': total_products,
|
||||
'high_demand_products': high_demand_products,
|
||||
'avg_seasonal_factor': round(avg_seasonal_factor, 2),
|
||||
'trend_strength': 'strong' if high_demand_products > total_products * 0.7 else 'moderate'
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Review Production Schedule',
|
||||
'action': 'review_production_schedule',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
},
|
||||
{
|
||||
'label': 'Adjust Inventory Levels',
|
||||
'action': 'adjust_inventory_levels',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'demand_analyzer'
|
||||
}
|
||||
|
||||
async def get_demand_patterns(
|
||||
self,
|
||||
inventory_product_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get cached demand patterns for a product.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Demand patterns or None if not analyzed
|
||||
"""
|
||||
return self.forecaster.get_cached_demand_patterns(inventory_product_id)
|
||||
|
||||
async def _publish_insight_events(self, tenant_id, insights, product_context=None):
|
||||
"""
|
||||
Publish insight events to RabbitMQ for alert processing.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
insights: List of created insights
|
||||
product_context: Additional context about the product
|
||||
"""
|
||||
if not self.event_publisher:
|
||||
logger.warning("No event publisher available for demand insights")
|
||||
return
|
||||
|
||||
for insight in insights:
|
||||
# Determine severity based on confidence and priority
|
||||
confidence = insight.get('confidence', 0)
|
||||
priority = insight.get('priority', 'medium')
|
||||
|
||||
# Map priority to severity, with confidence as tiebreaker
|
||||
if priority == 'critical' or (priority == 'high' and confidence >= 70):
|
||||
severity = 'high'
|
||||
elif priority == 'high' or (priority == 'medium' and confidence >= 80):
|
||||
severity = 'medium'
|
||||
else:
|
||||
severity = 'low'
|
||||
|
||||
# Prepare the event data
|
||||
event_data = {
|
||||
'insight_id': insight.get('id'),
|
||||
'type': insight.get('type'),
|
||||
'title': insight.get('title'),
|
||||
'description': insight.get('description'),
|
||||
'category': insight.get('category'),
|
||||
'priority': insight.get('priority'),
|
||||
'confidence': confidence,
|
||||
'recommendation': insight.get('recommendation_actions', []),
|
||||
'impact_type': insight.get('impact_type'),
|
||||
'impact_value': insight.get('impact_value'),
|
||||
'inventory_product_id': product_context.get('inventory_product_id') if product_context else None,
|
||||
'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'demand_analyzer'
|
||||
}
|
||||
|
||||
try:
|
||||
await self.event_publisher.publish_recommendation(
|
||||
event_type='ai_demand_forecast',
|
||||
tenant_id=tenant_id,
|
||||
severity=severity,
|
||||
data=event_data
|
||||
)
|
||||
logger.info(
|
||||
"Published demand insight event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
severity=severity
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to publish demand insight event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client connections."""
|
||||
await self.ai_insights_client.close()
|
||||
758
services/forecasting/app/ml/dynamic_rules_engine.py
Normal file
758
services/forecasting/app/ml/dynamic_rules_engine.py
Normal file
@@ -0,0 +1,758 @@
|
||||
"""
|
||||
Dynamic Business Rules Engine
|
||||
Learns optimal adjustment factors from historical data instead of using hardcoded values
|
||||
Replaces hardcoded weather multipliers, holiday adjustments, event impacts with learned values
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
from datetime import datetime, timedelta
|
||||
from scipy import stats
|
||||
from sklearn.linear_model import Ridge
|
||||
from collections import defaultdict
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class DynamicRulesEngine:
|
||||
"""
|
||||
Learns business rules from historical data instead of using hardcoded values.
|
||||
|
||||
Current hardcoded values to replace:
|
||||
- Weather: rain = -15%, snow = -25%, extreme_heat = -10%
|
||||
- Holidays: +50% (all holidays treated the same)
|
||||
- Events: +30% (all events treated the same)
|
||||
- Weekend: Manual assumptions
|
||||
|
||||
Dynamic approach:
|
||||
- Learn actual weather impact per weather condition per product
|
||||
- Learn holiday multipliers per holiday type
|
||||
- Learn event impact by event type
|
||||
- Learn day-of-week patterns per product
|
||||
- Generate insights when learned values differ from hardcoded assumptions
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.weather_rules = {}
|
||||
self.holiday_rules = {}
|
||||
self.event_rules = {}
|
||||
self.dow_rules = {}
|
||||
self.month_rules = {}
|
||||
|
||||
async def learn_all_rules(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: Optional[pd.DataFrame] = None,
|
||||
min_samples: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Learn all business rules from historical data.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Historical sales data with 'date', 'quantity' columns
|
||||
external_data: Optional weather/events/holidays data
|
||||
min_samples: Minimum samples required to learn a rule
|
||||
|
||||
Returns:
|
||||
Dictionary of learned rules and insights
|
||||
"""
|
||||
logger.info(
|
||||
"Learning dynamic business rules from historical data",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
data_points=len(sales_data)
|
||||
)
|
||||
|
||||
results = {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'learned_at': datetime.utcnow().isoformat(),
|
||||
'rules': {},
|
||||
'insights': []
|
||||
}
|
||||
|
||||
# Ensure date column is datetime
|
||||
if 'date' not in sales_data.columns:
|
||||
sales_data = sales_data.copy()
|
||||
sales_data['date'] = sales_data['ds']
|
||||
|
||||
sales_data['date'] = pd.to_datetime(sales_data['date'])
|
||||
|
||||
# Learn weather impact rules
|
||||
if external_data is not None and 'weather_condition' in external_data.columns:
|
||||
weather_rules, weather_insights = await self._learn_weather_rules(
|
||||
sales_data, external_data, min_samples
|
||||
)
|
||||
results['rules']['weather'] = weather_rules
|
||||
results['insights'].extend(weather_insights)
|
||||
self.weather_rules[inventory_product_id] = weather_rules
|
||||
|
||||
# Learn holiday rules
|
||||
if external_data is not None and 'is_holiday' in external_data.columns:
|
||||
holiday_rules, holiday_insights = await self._learn_holiday_rules(
|
||||
sales_data, external_data, min_samples
|
||||
)
|
||||
results['rules']['holidays'] = holiday_rules
|
||||
results['insights'].extend(holiday_insights)
|
||||
self.holiday_rules[inventory_product_id] = holiday_rules
|
||||
|
||||
# Learn event rules
|
||||
if external_data is not None and 'event_type' in external_data.columns:
|
||||
event_rules, event_insights = await self._learn_event_rules(
|
||||
sales_data, external_data, min_samples
|
||||
)
|
||||
results['rules']['events'] = event_rules
|
||||
results['insights'].extend(event_insights)
|
||||
self.event_rules[inventory_product_id] = event_rules
|
||||
|
||||
# Learn day-of-week patterns (always available)
|
||||
dow_rules, dow_insights = await self._learn_day_of_week_rules(
|
||||
sales_data, min_samples
|
||||
)
|
||||
results['rules']['day_of_week'] = dow_rules
|
||||
results['insights'].extend(dow_insights)
|
||||
self.dow_rules[inventory_product_id] = dow_rules
|
||||
|
||||
# Learn monthly seasonality
|
||||
month_rules, month_insights = await self._learn_month_rules(
|
||||
sales_data, min_samples
|
||||
)
|
||||
results['rules']['months'] = month_rules
|
||||
results['insights'].extend(month_insights)
|
||||
self.month_rules[inventory_product_id] = month_rules
|
||||
|
||||
logger.info(
|
||||
"Dynamic rules learning complete",
|
||||
total_insights=len(results['insights']),
|
||||
rules_learned=len(results['rules'])
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
async def _learn_weather_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn actual weather impact from historical data.
|
||||
|
||||
Hardcoded assumptions:
|
||||
- rain: -15%
|
||||
- snow: -25%
|
||||
- extreme_heat: -10%
|
||||
|
||||
Learn actual impact for this product.
|
||||
"""
|
||||
logger.info("Learning weather impact rules")
|
||||
|
||||
# Merge sales with weather data
|
||||
merged = sales_data.merge(
|
||||
external_data[['date', 'weather_condition', 'temperature', 'precipitation']],
|
||||
on='date',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Baseline: average sales on clear days
|
||||
clear_days = merged[
|
||||
(merged['weather_condition'].isin(['clear', 'sunny', 'partly_cloudy'])) |
|
||||
(merged['weather_condition'].isna())
|
||||
]
|
||||
baseline_avg = clear_days['quantity'].mean()
|
||||
|
||||
weather_rules = {
|
||||
'baseline_avg': float(baseline_avg),
|
||||
'conditions': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
# Hardcoded values for comparison
|
||||
hardcoded_impacts = {
|
||||
'rain': -0.15,
|
||||
'snow': -0.25,
|
||||
'extreme_heat': -0.10
|
||||
}
|
||||
|
||||
# Learn impact for each weather condition
|
||||
for condition in ['rain', 'rainy', 'snow', 'snowy', 'extreme_heat', 'hot', 'storm', 'fog']:
|
||||
condition_days = merged[merged['weather_condition'].str.contains(condition, case=False, na=False)]
|
||||
|
||||
if len(condition_days) >= min_samples:
|
||||
condition_avg = condition_days['quantity'].mean()
|
||||
learned_impact = (condition_avg - baseline_avg) / baseline_avg
|
||||
|
||||
# Statistical significance test
|
||||
t_stat, p_value = stats.ttest_ind(
|
||||
condition_days['quantity'].values,
|
||||
clear_days['quantity'].values,
|
||||
equal_var=False
|
||||
)
|
||||
|
||||
weather_rules['conditions'][condition] = {
|
||||
'learned_multiplier': float(1 + learned_impact),
|
||||
'learned_impact_pct': float(learned_impact * 100),
|
||||
'sample_size': int(len(condition_days)),
|
||||
'avg_quantity': float(condition_avg),
|
||||
'p_value': float(p_value),
|
||||
'significant': bool(p_value < 0.05)
|
||||
}
|
||||
|
||||
# Compare with hardcoded value if exists
|
||||
if condition in hardcoded_impacts and p_value < 0.05:
|
||||
hardcoded_impact = hardcoded_impacts[condition]
|
||||
difference = abs(learned_impact - hardcoded_impact)
|
||||
|
||||
if difference > 0.05: # More than 5% difference
|
||||
insight = {
|
||||
'type': 'optimization',
|
||||
'priority': 'high' if difference > 0.15 else 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Weather Rule Mismatch: {condition.title()}',
|
||||
'description': f'Learned {condition} impact is {learned_impact*100:.1f}% vs hardcoded {hardcoded_impact*100:.1f}%. Updating rule could improve forecast accuracy by {difference*100:.1f}%.',
|
||||
'impact_type': 'forecast_improvement',
|
||||
'impact_value': difference * 100,
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': self._calculate_confidence(len(condition_days), p_value),
|
||||
'metrics_json': {
|
||||
'weather_condition': condition,
|
||||
'learned_impact_pct': round(learned_impact * 100, 2),
|
||||
'hardcoded_impact_pct': round(hardcoded_impact * 100, 2),
|
||||
'difference_pct': round(difference * 100, 2),
|
||||
'baseline_avg': round(baseline_avg, 2),
|
||||
'condition_avg': round(condition_avg, 2),
|
||||
'sample_size': len(condition_days),
|
||||
'p_value': round(p_value, 4)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Update Weather Rule',
|
||||
'action': 'update_weather_multiplier',
|
||||
'params': {
|
||||
'condition': condition,
|
||||
'new_multiplier': round(1 + learned_impact, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
logger.info(
|
||||
"Weather rule discrepancy detected",
|
||||
condition=condition,
|
||||
learned=f"{learned_impact*100:.1f}%",
|
||||
hardcoded=f"{hardcoded_impact*100:.1f}%"
|
||||
)
|
||||
|
||||
return weather_rules, insights
|
||||
|
||||
async def _learn_holiday_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn holiday impact by holiday type instead of uniform +50%.
|
||||
|
||||
Hardcoded: All holidays = +50%
|
||||
Learn: Christmas vs Easter vs National holidays have different impacts
|
||||
"""
|
||||
logger.info("Learning holiday impact rules")
|
||||
|
||||
# Merge sales with holiday data
|
||||
merged = sales_data.merge(
|
||||
external_data[['date', 'is_holiday', 'holiday_name', 'holiday_type']],
|
||||
on='date',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Baseline: non-holiday average
|
||||
non_holidays = merged[merged['is_holiday'] == False]
|
||||
baseline_avg = non_holidays['quantity'].mean()
|
||||
|
||||
holiday_rules = {
|
||||
'baseline_avg': float(baseline_avg),
|
||||
'hardcoded_multiplier': 1.5, # Current +50%
|
||||
'holiday_types': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
# Learn impact per holiday type
|
||||
if 'holiday_type' in merged.columns:
|
||||
for holiday_type in merged[merged['is_holiday'] == True]['holiday_type'].unique():
|
||||
if pd.isna(holiday_type):
|
||||
continue
|
||||
|
||||
holiday_days = merged[merged['holiday_type'] == holiday_type]
|
||||
|
||||
if len(holiday_days) >= min_samples:
|
||||
holiday_avg = holiday_days['quantity'].mean()
|
||||
learned_multiplier = holiday_avg / baseline_avg
|
||||
learned_impact = (learned_multiplier - 1) * 100
|
||||
|
||||
# Statistical test
|
||||
t_stat, p_value = stats.ttest_ind(
|
||||
holiday_days['quantity'].values,
|
||||
non_holidays['quantity'].values,
|
||||
equal_var=False
|
||||
)
|
||||
|
||||
holiday_rules['holiday_types'][holiday_type] = {
|
||||
'learned_multiplier': float(learned_multiplier),
|
||||
'learned_impact_pct': float(learned_impact),
|
||||
'sample_size': int(len(holiday_days)),
|
||||
'avg_quantity': float(holiday_avg),
|
||||
'p_value': float(p_value),
|
||||
'significant': bool(p_value < 0.05)
|
||||
}
|
||||
|
||||
# Compare with hardcoded +50%
|
||||
hardcoded_multiplier = 1.5
|
||||
difference = abs(learned_multiplier - hardcoded_multiplier)
|
||||
|
||||
if difference > 0.1 and p_value < 0.05: # More than 10% difference
|
||||
insight = {
|
||||
'type': 'recommendation',
|
||||
'priority': 'high' if difference > 0.3 else 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Holiday Rule Optimization: {holiday_type}',
|
||||
'description': f'{holiday_type} shows {learned_impact:.1f}% impact vs hardcoded +50%. Using learned multiplier {learned_multiplier:.2f}x could improve forecast accuracy.',
|
||||
'impact_type': 'forecast_improvement',
|
||||
'impact_value': difference * 100,
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': self._calculate_confidence(len(holiday_days), p_value),
|
||||
'metrics_json': {
|
||||
'holiday_type': holiday_type,
|
||||
'learned_multiplier': round(learned_multiplier, 3),
|
||||
'hardcoded_multiplier': 1.5,
|
||||
'learned_impact_pct': round(learned_impact, 2),
|
||||
'hardcoded_impact_pct': 50.0,
|
||||
'baseline_avg': round(baseline_avg, 2),
|
||||
'holiday_avg': round(holiday_avg, 2),
|
||||
'sample_size': len(holiday_days),
|
||||
'p_value': round(p_value, 4)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Update Holiday Rule',
|
||||
'action': 'update_holiday_multiplier',
|
||||
'params': {
|
||||
'holiday_type': holiday_type,
|
||||
'new_multiplier': round(learned_multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
logger.info(
|
||||
"Holiday rule optimization identified",
|
||||
holiday_type=holiday_type,
|
||||
learned=f"{learned_multiplier:.2f}x",
|
||||
hardcoded="1.5x"
|
||||
)
|
||||
|
||||
# Overall holiday impact
|
||||
all_holidays = merged[merged['is_holiday'] == True]
|
||||
if len(all_holidays) >= min_samples:
|
||||
overall_avg = all_holidays['quantity'].mean()
|
||||
overall_multiplier = overall_avg / baseline_avg
|
||||
|
||||
holiday_rules['overall_learned_multiplier'] = float(overall_multiplier)
|
||||
holiday_rules['overall_learned_impact_pct'] = float((overall_multiplier - 1) * 100)
|
||||
|
||||
return holiday_rules, insights
|
||||
|
||||
async def _learn_event_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn event impact by event type instead of uniform +30%.
|
||||
|
||||
Hardcoded: All events = +30%
|
||||
Learn: Sports events vs concerts vs festivals have different impacts
|
||||
"""
|
||||
logger.info("Learning event impact rules")
|
||||
|
||||
# Merge sales with event data
|
||||
merged = sales_data.merge(
|
||||
external_data[['date', 'event_name', 'event_type', 'event_attendance']],
|
||||
on='date',
|
||||
how='left'
|
||||
)
|
||||
|
||||
# Baseline: non-event days
|
||||
non_events = merged[merged['event_name'].isna()]
|
||||
baseline_avg = non_events['quantity'].mean()
|
||||
|
||||
event_rules = {
|
||||
'baseline_avg': float(baseline_avg),
|
||||
'hardcoded_multiplier': 1.3, # Current +30%
|
||||
'event_types': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
# Learn impact per event type
|
||||
if 'event_type' in merged.columns:
|
||||
for event_type in merged[merged['event_type'].notna()]['event_type'].unique():
|
||||
if pd.isna(event_type):
|
||||
continue
|
||||
|
||||
event_days = merged[merged['event_type'] == event_type]
|
||||
|
||||
if len(event_days) >= min_samples:
|
||||
event_avg = event_days['quantity'].mean()
|
||||
learned_multiplier = event_avg / baseline_avg
|
||||
learned_impact = (learned_multiplier - 1) * 100
|
||||
|
||||
# Statistical test
|
||||
t_stat, p_value = stats.ttest_ind(
|
||||
event_days['quantity'].values,
|
||||
non_events['quantity'].values,
|
||||
equal_var=False
|
||||
)
|
||||
|
||||
event_rules['event_types'][event_type] = {
|
||||
'learned_multiplier': float(learned_multiplier),
|
||||
'learned_impact_pct': float(learned_impact),
|
||||
'sample_size': int(len(event_days)),
|
||||
'avg_quantity': float(event_avg),
|
||||
'p_value': float(p_value),
|
||||
'significant': bool(p_value < 0.05)
|
||||
}
|
||||
|
||||
# Compare with hardcoded +30%
|
||||
hardcoded_multiplier = 1.3
|
||||
difference = abs(learned_multiplier - hardcoded_multiplier)
|
||||
|
||||
if difference > 0.1 and p_value < 0.05:
|
||||
insight = {
|
||||
'type': 'recommendation',
|
||||
'priority': 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Event Rule Optimization: {event_type}',
|
||||
'description': f'{event_type} events show {learned_impact:.1f}% impact vs hardcoded +30%. Using learned multiplier could improve event forecasts.',
|
||||
'impact_type': 'forecast_improvement',
|
||||
'impact_value': difference * 100,
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': self._calculate_confidence(len(event_days), p_value),
|
||||
'metrics_json': {
|
||||
'event_type': event_type,
|
||||
'learned_multiplier': round(learned_multiplier, 3),
|
||||
'hardcoded_multiplier': 1.3,
|
||||
'learned_impact_pct': round(learned_impact, 2),
|
||||
'hardcoded_impact_pct': 30.0,
|
||||
'baseline_avg': round(baseline_avg, 2),
|
||||
'event_avg': round(event_avg, 2),
|
||||
'sample_size': len(event_days),
|
||||
'p_value': round(p_value, 4)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Update Event Rule',
|
||||
'action': 'update_event_multiplier',
|
||||
'params': {
|
||||
'event_type': event_type,
|
||||
'new_multiplier': round(learned_multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
return event_rules, insights
|
||||
|
||||
async def _learn_day_of_week_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn day-of-week patterns per product.
|
||||
Replace general assumptions with product-specific patterns.
|
||||
"""
|
||||
logger.info("Learning day-of-week patterns")
|
||||
|
||||
sales_data = sales_data.copy()
|
||||
sales_data['day_of_week'] = sales_data['date'].dt.dayofweek
|
||||
sales_data['day_name'] = sales_data['date'].dt.day_name()
|
||||
|
||||
# Calculate average per day of week
|
||||
dow_avg = sales_data.groupby('day_of_week')['quantity'].agg(['mean', 'std', 'count'])
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
dow_rules = {
|
||||
'overall_avg': float(overall_avg),
|
||||
'days': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
|
||||
|
||||
for dow in range(7):
|
||||
if dow not in dow_avg.index or dow_avg.loc[dow, 'count'] < min_samples:
|
||||
continue
|
||||
|
||||
day_avg = dow_avg.loc[dow, 'mean']
|
||||
day_std = dow_avg.loc[dow, 'std']
|
||||
day_count = dow_avg.loc[dow, 'count']
|
||||
|
||||
multiplier = day_avg / overall_avg
|
||||
impact_pct = (multiplier - 1) * 100
|
||||
|
||||
# Coefficient of variation
|
||||
cv = (day_std / day_avg) if day_avg > 0 else 0
|
||||
|
||||
dow_rules['days'][day_names[dow]] = {
|
||||
'day_of_week': int(dow),
|
||||
'learned_multiplier': float(multiplier),
|
||||
'impact_pct': float(impact_pct),
|
||||
'avg_quantity': float(day_avg),
|
||||
'std_quantity': float(day_std),
|
||||
'sample_size': int(day_count),
|
||||
'coefficient_of_variation': float(cv)
|
||||
}
|
||||
|
||||
# Insight for significant deviations
|
||||
if abs(impact_pct) > 20: # More than 20% difference
|
||||
insight = {
|
||||
'type': 'insight',
|
||||
'priority': 'medium' if abs(impact_pct) > 30 else 'low',
|
||||
'category': 'forecasting',
|
||||
'title': f'{day_names[dow]} Pattern: {abs(impact_pct):.0f}% {"Higher" if impact_pct > 0 else "Lower"}',
|
||||
'description': f'{day_names[dow]} sales average {day_avg:.1f} units ({impact_pct:+.1f}% vs weekly average {overall_avg:.1f}). Consider this pattern in production planning.',
|
||||
'impact_type': 'operational_insight',
|
||||
'impact_value': abs(impact_pct),
|
||||
'impact_unit': 'percentage',
|
||||
'confidence': self._calculate_confidence(day_count, 0.01), # Low p-value for large samples
|
||||
'metrics_json': {
|
||||
'day_of_week': day_names[dow],
|
||||
'day_multiplier': round(multiplier, 3),
|
||||
'impact_pct': round(impact_pct, 2),
|
||||
'day_avg': round(day_avg, 2),
|
||||
'overall_avg': round(overall_avg, 2),
|
||||
'sample_size': int(day_count),
|
||||
'std': round(day_std, 2)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Adjust Production Schedule',
|
||||
'action': 'adjust_weekly_production',
|
||||
'params': {
|
||||
'day': day_names[dow],
|
||||
'multiplier': round(multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
return dow_rules, insights
|
||||
|
||||
async def _learn_month_rules(
|
||||
self,
|
||||
sales_data: pd.DataFrame,
|
||||
min_samples: int
|
||||
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
||||
"""
|
||||
Learn monthly seasonality patterns per product.
|
||||
"""
|
||||
logger.info("Learning monthly seasonality patterns")
|
||||
|
||||
sales_data = sales_data.copy()
|
||||
sales_data['month'] = sales_data['date'].dt.month
|
||||
sales_data['month_name'] = sales_data['date'].dt.month_name()
|
||||
|
||||
# Calculate average per month
|
||||
month_avg = sales_data.groupby('month')['quantity'].agg(['mean', 'std', 'count'])
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
month_rules = {
|
||||
'overall_avg': float(overall_avg),
|
||||
'months': {}
|
||||
}
|
||||
|
||||
insights = []
|
||||
|
||||
month_names = ['January', 'February', 'March', 'April', 'May', 'June',
|
||||
'July', 'August', 'September', 'October', 'November', 'December']
|
||||
|
||||
for month in range(1, 13):
|
||||
if month not in month_avg.index or month_avg.loc[month, 'count'] < min_samples:
|
||||
continue
|
||||
|
||||
month_mean = month_avg.loc[month, 'mean']
|
||||
month_std = month_avg.loc[month, 'std']
|
||||
month_count = month_avg.loc[month, 'count']
|
||||
|
||||
multiplier = month_mean / overall_avg
|
||||
impact_pct = (multiplier - 1) * 100
|
||||
|
||||
month_rules['months'][month_names[month - 1]] = {
|
||||
'month': int(month),
|
||||
'learned_multiplier': float(multiplier),
|
||||
'impact_pct': float(impact_pct),
|
||||
'avg_quantity': float(month_mean),
|
||||
'std_quantity': float(month_std),
|
||||
'sample_size': int(month_count)
|
||||
}
|
||||
|
||||
# Insight for significant seasonal patterns
|
||||
if abs(impact_pct) > 25: # More than 25% seasonal variation
|
||||
insight = {
|
||||
'type': 'insight',
|
||||
'priority': 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': f'Seasonal Pattern: {month_names[month - 1]} {abs(impact_pct):.0f}% {"Higher" if impact_pct > 0 else "Lower"}',
|
||||
'description': f'{month_names[month - 1]} shows strong seasonality with {impact_pct:+.1f}% vs annual average. Plan inventory accordingly.',
|
||||
'impact_type': 'operational_insight',
|
||||
'impact_value': abs(impact_pct),
|
||||
'impact_unit': 'percentage',
|
||||
'confidence': self._calculate_confidence(month_count, 0.01),
|
||||
'metrics_json': {
|
||||
'month': month_names[month - 1],
|
||||
'multiplier': round(multiplier, 3),
|
||||
'impact_pct': round(impact_pct, 2),
|
||||
'month_avg': round(month_mean, 2),
|
||||
'annual_avg': round(overall_avg, 2),
|
||||
'sample_size': int(month_count)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Adjust Seasonal Planning',
|
||||
'action': 'adjust_seasonal_forecast',
|
||||
'params': {
|
||||
'month': month_names[month - 1],
|
||||
'multiplier': round(multiplier, 3)
|
||||
}
|
||||
}
|
||||
],
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
insights.append(insight)
|
||||
|
||||
return month_rules, insights
|
||||
|
||||
def _calculate_confidence(self, sample_size: int, p_value: float) -> int:
|
||||
"""
|
||||
Calculate confidence score (0-100) based on sample size and statistical significance.
|
||||
|
||||
Args:
|
||||
sample_size: Number of observations
|
||||
p_value: Statistical significance p-value
|
||||
|
||||
Returns:
|
||||
Confidence score 0-100
|
||||
"""
|
||||
# Sample size score (0-50 points)
|
||||
if sample_size >= 100:
|
||||
sample_score = 50
|
||||
elif sample_size >= 50:
|
||||
sample_score = 40
|
||||
elif sample_size >= 30:
|
||||
sample_score = 30
|
||||
elif sample_size >= 20:
|
||||
sample_score = 20
|
||||
else:
|
||||
sample_score = 10
|
||||
|
||||
# Statistical significance score (0-50 points)
|
||||
if p_value < 0.001:
|
||||
sig_score = 50
|
||||
elif p_value < 0.01:
|
||||
sig_score = 45
|
||||
elif p_value < 0.05:
|
||||
sig_score = 35
|
||||
elif p_value < 0.1:
|
||||
sig_score = 20
|
||||
else:
|
||||
sig_score = 10
|
||||
|
||||
return min(100, sample_score + sig_score)
|
||||
|
||||
def get_rule(
|
||||
self,
|
||||
inventory_product_id: str,
|
||||
rule_type: str,
|
||||
key: str
|
||||
) -> Optional[float]:
|
||||
"""
|
||||
Get learned rule multiplier for a specific condition.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
rule_type: 'weather', 'holiday', 'event', 'day_of_week', 'month'
|
||||
key: Specific condition key (e.g., 'rain', 'Christmas', 'Monday')
|
||||
|
||||
Returns:
|
||||
Learned multiplier or None if not learned
|
||||
"""
|
||||
if rule_type == 'weather':
|
||||
rules = self.weather_rules.get(inventory_product_id, {})
|
||||
return rules.get('conditions', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'holiday':
|
||||
rules = self.holiday_rules.get(inventory_product_id, {})
|
||||
return rules.get('holiday_types', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'event':
|
||||
rules = self.event_rules.get(inventory_product_id, {})
|
||||
return rules.get('event_types', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'day_of_week':
|
||||
rules = self.dow_rules.get(inventory_product_id, {})
|
||||
return rules.get('days', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
elif rule_type == 'month':
|
||||
rules = self.month_rules.get(inventory_product_id, {})
|
||||
return rules.get('months', {}).get(key, {}).get('learned_multiplier')
|
||||
|
||||
return None
|
||||
|
||||
def export_rules_for_prophet(
|
||||
self,
|
||||
inventory_product_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Export learned rules in format suitable for Prophet model integration.
|
||||
|
||||
Returns:
|
||||
Dictionary with multipliers for Prophet custom seasonality/regressors
|
||||
"""
|
||||
return {
|
||||
'weather': self.weather_rules.get(inventory_product_id, {}),
|
||||
'holidays': self.holiday_rules.get(inventory_product_id, {}),
|
||||
'events': self.event_rules.get(inventory_product_id, {}),
|
||||
'day_of_week': self.dow_rules.get(inventory_product_id, {}),
|
||||
'months': self.month_rules.get(inventory_product_id, {})
|
||||
}
|
||||
263
services/forecasting/app/ml/multi_horizon_forecaster.py
Normal file
263
services/forecasting/app/ml/multi_horizon_forecaster.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
Multi-Horizon Forecasting System
|
||||
Generates forecasts for multiple time horizons (7, 14, 30, 90 days)
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta, date
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class MultiHorizonForecaster:
|
||||
"""
|
||||
Multi-horizon forecasting with horizon-specific models.
|
||||
|
||||
Horizons:
|
||||
- Short-term (1-7 days): High precision, detailed features
|
||||
- Medium-term (8-14 days): Balanced approach
|
||||
- Long-term (15-30 days): Focus on trends, seasonal patterns
|
||||
- Very long-term (31-90 days): Strategic planning, major trends only
|
||||
"""
|
||||
|
||||
HORIZONS = {
|
||||
'short': (1, 7),
|
||||
'medium': (8, 14),
|
||||
'long': (15, 30),
|
||||
'very_long': (31, 90)
|
||||
}
|
||||
|
||||
def __init__(self, base_forecaster=None):
|
||||
"""
|
||||
Initialize multi-horizon forecaster.
|
||||
|
||||
Args:
|
||||
base_forecaster: Base forecaster (e.g., BakeryForecaster) to use
|
||||
"""
|
||||
self.base_forecaster = base_forecaster
|
||||
|
||||
async def generate_multi_horizon_forecast(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
start_date: date,
|
||||
horizons: List[str] = None,
|
||||
include_confidence_intervals: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate forecasts for multiple horizons.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
start_date: Start date for forecasts
|
||||
horizons: List of horizons to forecast ('short', 'medium', 'long', 'very_long')
|
||||
include_confidence_intervals: Include confidence intervals
|
||||
|
||||
Returns:
|
||||
Dictionary with forecasts by horizon
|
||||
"""
|
||||
if horizons is None:
|
||||
horizons = ['short', 'medium', 'long']
|
||||
|
||||
logger.info(
|
||||
"Generating multi-horizon forecast",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
horizons=horizons
|
||||
)
|
||||
|
||||
results = {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'start_date': start_date.isoformat(),
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'horizons': {}
|
||||
}
|
||||
|
||||
for horizon_name in horizons:
|
||||
if horizon_name not in self.HORIZONS:
|
||||
logger.warning(f"Unknown horizon: {horizon_name}, skipping")
|
||||
continue
|
||||
|
||||
start_day, end_day = self.HORIZONS[horizon_name]
|
||||
|
||||
# Generate forecast for this horizon
|
||||
horizon_forecast = await self._generate_horizon_forecast(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
start_date=start_date,
|
||||
days_ahead=end_day,
|
||||
horizon_name=horizon_name,
|
||||
include_confidence=include_confidence_intervals
|
||||
)
|
||||
|
||||
results['horizons'][horizon_name] = horizon_forecast
|
||||
|
||||
logger.info("Multi-horizon forecast complete",
|
||||
horizons_generated=len(results['horizons']))
|
||||
|
||||
return results
|
||||
|
||||
async def _generate_horizon_forecast(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
start_date: date,
|
||||
days_ahead: int,
|
||||
horizon_name: str,
|
||||
include_confidence: bool
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate forecast for a specific horizon.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
start_date: Start date
|
||||
days_ahead: Number of days ahead
|
||||
horizon_name: Horizon name ('short', 'medium', etc.)
|
||||
include_confidence: Include confidence intervals
|
||||
|
||||
Returns:
|
||||
Forecast data for the horizon
|
||||
"""
|
||||
# Generate date range
|
||||
dates = [start_date + timedelta(days=i) for i in range(days_ahead)]
|
||||
|
||||
# Use base forecaster if available
|
||||
if self.base_forecaster:
|
||||
# Call base forecaster for predictions
|
||||
forecasts = []
|
||||
|
||||
for forecast_date in dates:
|
||||
try:
|
||||
# This would call the actual forecasting service
|
||||
# For now, we'll return a structured response
|
||||
forecasts.append({
|
||||
'date': forecast_date.isoformat(),
|
||||
'predicted_demand': 0, # Placeholder
|
||||
'confidence_lower': 0 if include_confidence else None,
|
||||
'confidence_upper': 0 if include_confidence else None
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate forecast for {forecast_date}: {e}")
|
||||
|
||||
return {
|
||||
'horizon_name': horizon_name,
|
||||
'days_ahead': days_ahead,
|
||||
'start_date': start_date.isoformat(),
|
||||
'end_date': dates[-1].isoformat(),
|
||||
'forecasts': forecasts,
|
||||
'aggregates': self._calculate_horizon_aggregates(forecasts)
|
||||
}
|
||||
else:
|
||||
logger.warning("No base forecaster available, returning placeholder")
|
||||
return {
|
||||
'horizon_name': horizon_name,
|
||||
'days_ahead': days_ahead,
|
||||
'forecasts': [],
|
||||
'aggregates': {}
|
||||
}
|
||||
|
||||
def _calculate_horizon_aggregates(self, forecasts: List[Dict]) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate aggregate statistics for a horizon.
|
||||
|
||||
Args:
|
||||
forecasts: List of daily forecasts
|
||||
|
||||
Returns:
|
||||
Aggregate statistics
|
||||
"""
|
||||
if not forecasts:
|
||||
return {}
|
||||
|
||||
demands = [f['predicted_demand'] for f in forecasts if f.get('predicted_demand')]
|
||||
|
||||
if not demands:
|
||||
return {}
|
||||
|
||||
return {
|
||||
'total_demand': sum(demands),
|
||||
'avg_daily_demand': np.mean(demands),
|
||||
'max_daily_demand': max(demands),
|
||||
'min_daily_demand': min(demands),
|
||||
'demand_volatility': np.std(demands) if len(demands) > 1 else 0
|
||||
}
|
||||
|
||||
def get_horizon_recommendation(
|
||||
self,
|
||||
horizon_name: str,
|
||||
forecast_data: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate recommendations based on horizon forecast.
|
||||
|
||||
Args:
|
||||
horizon_name: Horizon name
|
||||
forecast_data: Forecast data for the horizon
|
||||
|
||||
Returns:
|
||||
Recommendations dictionary
|
||||
"""
|
||||
aggregates = forecast_data.get('aggregates', {})
|
||||
total_demand = aggregates.get('total_demand', 0)
|
||||
volatility = aggregates.get('demand_volatility', 0)
|
||||
|
||||
recommendations = {
|
||||
'horizon': horizon_name,
|
||||
'actions': []
|
||||
}
|
||||
|
||||
if horizon_name == 'short':
|
||||
# Short-term: Operational recommendations
|
||||
if total_demand > 0:
|
||||
recommendations['actions'].append(f"Prepare {total_demand:.0f} units for next 7 days")
|
||||
if volatility > 10:
|
||||
recommendations['actions'].append("High volatility expected - increase safety stock")
|
||||
|
||||
elif horizon_name == 'medium':
|
||||
# Medium-term: Procurement planning
|
||||
recommendations['actions'].append(f"Order supplies for {total_demand:.0f} units (2-week demand)")
|
||||
if aggregates.get('max_daily_demand', 0) > aggregates.get('avg_daily_demand', 0) * 1.5:
|
||||
recommendations['actions'].append("Peak demand day detected - plan extra capacity")
|
||||
|
||||
elif horizon_name == 'long':
|
||||
# Long-term: Strategic planning
|
||||
avg_weekly_demand = total_demand / 4 if total_demand > 0 else 0
|
||||
recommendations['actions'].append(f"Monthly demand projection: {total_demand:.0f} units")
|
||||
recommendations['actions'].append(f"Average weekly demand: {avg_weekly_demand:.0f} units")
|
||||
|
||||
elif horizon_name == 'very_long':
|
||||
# Very long-term: Capacity planning
|
||||
recommendations['actions'].append(f"Quarterly demand projection: {total_demand:.0f} units")
|
||||
recommendations['actions'].append("Review capacity and staffing needs")
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
def get_appropriate_horizons_for_use_case(use_case: str) -> List[str]:
|
||||
"""
|
||||
Get appropriate forecast horizons for a use case.
|
||||
|
||||
Args:
|
||||
use_case: Use case name (e.g., 'production_planning', 'procurement', 'strategic')
|
||||
|
||||
Returns:
|
||||
List of horizon names
|
||||
"""
|
||||
use_case_horizons = {
|
||||
'production_planning': ['short'],
|
||||
'procurement': ['short', 'medium'],
|
||||
'inventory_optimization': ['short', 'medium'],
|
||||
'capacity_planning': ['medium', 'long'],
|
||||
'strategic_planning': ['long', 'very_long'],
|
||||
'financial_planning': ['long', 'very_long'],
|
||||
'all': ['short', 'medium', 'long', 'very_long']
|
||||
}
|
||||
|
||||
return use_case_horizons.get(use_case, ['short', 'medium'])
|
||||
593
services/forecasting/app/ml/pattern_detector.py
Normal file
593
services/forecasting/app/ml/pattern_detector.py
Normal file
@@ -0,0 +1,593 @@
|
||||
"""
|
||||
Pattern Detection Engine for Sales Data
|
||||
Automatically identifies patterns and generates insights
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from scipy import stats
|
||||
from collections import defaultdict
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class SalesPatternDetector:
|
||||
"""
|
||||
Detect sales patterns and generate actionable insights.
|
||||
|
||||
Patterns detected:
|
||||
- Time-of-day patterns (hourly peaks)
|
||||
- Day-of-week patterns (weekend spikes)
|
||||
- Weekly seasonality patterns
|
||||
- Monthly patterns
|
||||
- Holiday impact patterns
|
||||
- Weather correlation patterns
|
||||
"""
|
||||
|
||||
def __init__(self, significance_threshold: float = 0.15):
|
||||
"""
|
||||
Initialize pattern detector.
|
||||
|
||||
Args:
|
||||
significance_threshold: Minimum percentage difference to consider significant (default 15%)
|
||||
"""
|
||||
self.significance_threshold = significance_threshold
|
||||
self.detected_patterns = []
|
||||
|
||||
async def detect_all_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int = 70
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect all patterns in sales data and generate insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Sales data with columns: date, quantity, (optional: hour, temperature, etc.)
|
||||
min_confidence: Minimum confidence score for insights
|
||||
|
||||
Returns:
|
||||
List of insight dictionaries ready for AI Insights Service
|
||||
"""
|
||||
logger.info(
|
||||
"Starting pattern detection",
|
||||
tenant_id=tenant_id,
|
||||
product_id=inventory_product_id,
|
||||
data_points=len(sales_data)
|
||||
)
|
||||
|
||||
insights = []
|
||||
|
||||
# Ensure date column is datetime
|
||||
if 'date' in sales_data.columns:
|
||||
sales_data['date'] = pd.to_datetime(sales_data['date'])
|
||||
|
||||
# 1. Day-of-week patterns
|
||||
dow_insights = await self._detect_day_of_week_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(dow_insights)
|
||||
|
||||
# 2. Weekend vs weekday patterns
|
||||
weekend_insights = await self._detect_weekend_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(weekend_insights)
|
||||
|
||||
# 3. Month-end patterns
|
||||
month_end_insights = await self._detect_month_end_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(month_end_insights)
|
||||
|
||||
# 4. Hourly patterns (if hour data available)
|
||||
if 'hour' in sales_data.columns:
|
||||
hourly_insights = await self._detect_hourly_patterns(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(hourly_insights)
|
||||
|
||||
# 5. Weather correlation (if temperature data available)
|
||||
if 'temperature' in sales_data.columns:
|
||||
weather_insights = await self._detect_weather_correlations(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(weather_insights)
|
||||
|
||||
# 6. Trend detection
|
||||
trend_insights = await self._detect_trends(
|
||||
tenant_id, inventory_product_id, sales_data, min_confidence
|
||||
)
|
||||
insights.extend(trend_insights)
|
||||
|
||||
logger.info(
|
||||
"Pattern detection complete",
|
||||
total_insights=len(insights),
|
||||
product_id=inventory_product_id
|
||||
)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_day_of_week_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect day-of-week patterns (e.g., Friday sales spike)."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Add day of week
|
||||
sales_data['day_of_week'] = sales_data['date'].dt.dayofweek
|
||||
sales_data['day_name'] = sales_data['date'].dt.day_name()
|
||||
|
||||
# Calculate average sales per day of week
|
||||
dow_avg = sales_data.groupby(['day_of_week', 'day_name'])['quantity'].agg(['mean', 'count']).reset_index()
|
||||
|
||||
# Only consider days with sufficient data (at least 4 observations)
|
||||
dow_avg = dow_avg[dow_avg['count'] >= 4]
|
||||
|
||||
if len(dow_avg) < 2:
|
||||
return insights
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
# Find days significantly above average
|
||||
for _, row in dow_avg.iterrows():
|
||||
day_avg = row['mean']
|
||||
pct_diff = ((day_avg - overall_avg) / overall_avg) * 100
|
||||
|
||||
if abs(pct_diff) > self.significance_threshold * 100:
|
||||
# Calculate confidence based on sample size and consistency
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=int(row['count']),
|
||||
effect_size=abs(pct_diff) / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence:
|
||||
if pct_diff > 0:
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='pattern',
|
||||
category='sales',
|
||||
priority='medium' if pct_diff > 20 else 'low',
|
||||
title=f'{row["day_name"]} Sales Pattern Detected',
|
||||
description=f'Sales on {row["day_name"]} are {abs(pct_diff):.1f}% {"higher" if pct_diff > 0 else "lower"} than average ({day_avg:.1f} vs {overall_avg:.1f} units).',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'day_of_week': row['day_name'],
|
||||
'avg_sales': float(day_avg),
|
||||
'overall_avg': float(overall_avg),
|
||||
'difference_pct': float(pct_diff),
|
||||
'sample_size': int(row['count'])
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Adjust Production', 'action': 'adjust_daily_production'},
|
||||
{'label': 'Review Schedule', 'action': 'review_production_schedule'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_weekend_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect weekend vs weekday patterns."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Classify weekend vs weekday
|
||||
sales_data['is_weekend'] = sales_data['date'].dt.dayofweek.isin([5, 6])
|
||||
|
||||
# Calculate averages
|
||||
weekend_avg = sales_data[sales_data['is_weekend']]['quantity'].mean()
|
||||
weekday_avg = sales_data[~sales_data['is_weekend']]['quantity'].mean()
|
||||
|
||||
weekend_count = sales_data[sales_data['is_weekend']]['quantity'].count()
|
||||
weekday_count = sales_data[~sales_data['is_weekend']]['quantity'].count()
|
||||
|
||||
if weekend_count < 4 or weekday_count < 4:
|
||||
return insights
|
||||
|
||||
pct_diff = ((weekend_avg - weekday_avg) / weekday_avg) * 100
|
||||
|
||||
if abs(pct_diff) > self.significance_threshold * 100:
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=min(weekend_count, weekday_count),
|
||||
effect_size=abs(pct_diff) / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence:
|
||||
# Estimate revenue impact
|
||||
impact_value = abs(weekend_avg - weekday_avg) * 8 * 4 # 8 weekend days per month
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='recommendation',
|
||||
category='forecasting',
|
||||
priority='high' if abs(pct_diff) > 25 else 'medium',
|
||||
title=f'Weekend Demand Pattern: {abs(pct_diff):.0f}% {"Higher" if pct_diff > 0 else "Lower"}',
|
||||
description=f'Weekend sales average {weekend_avg:.1f} units vs {weekday_avg:.1f} on weekdays ({abs(pct_diff):.0f}% {"increase" if pct_diff > 0 else "decrease"}). Recommend adjusting weekend production targets.',
|
||||
confidence=confidence,
|
||||
impact_type='revenue_increase' if pct_diff > 0 else 'cost_savings',
|
||||
impact_value=float(impact_value),
|
||||
impact_unit='units/month',
|
||||
metrics={
|
||||
'weekend_avg': float(weekend_avg),
|
||||
'weekday_avg': float(weekday_avg),
|
||||
'difference_pct': float(pct_diff),
|
||||
'weekend_samples': int(weekend_count),
|
||||
'weekday_samples': int(weekday_count)
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Increase Weekend Production', 'action': 'adjust_weekend_production'},
|
||||
{'label': 'Update Forecast Multiplier', 'action': 'update_forecast_rule'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_month_end_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect month-end and payday patterns."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Identify payday periods (15th and last 3 days of month)
|
||||
sales_data['day_of_month'] = sales_data['date'].dt.day
|
||||
sales_data['is_payday'] = (
|
||||
(sales_data['day_of_month'] == 15) |
|
||||
(sales_data['date'].dt.is_month_end) |
|
||||
(sales_data['day_of_month'] >= sales_data['date'].dt.days_in_month - 2)
|
||||
)
|
||||
|
||||
payday_avg = sales_data[sales_data['is_payday']]['quantity'].mean()
|
||||
regular_avg = sales_data[~sales_data['is_payday']]['quantity'].mean()
|
||||
|
||||
payday_count = sales_data[sales_data['is_payday']]['quantity'].count()
|
||||
|
||||
if payday_count < 4:
|
||||
return insights
|
||||
|
||||
pct_diff = ((payday_avg - regular_avg) / regular_avg) * 100
|
||||
|
||||
if abs(pct_diff) > self.significance_threshold * 100:
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=payday_count,
|
||||
effect_size=abs(pct_diff) / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence and pct_diff > 0:
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='pattern',
|
||||
category='sales',
|
||||
priority='medium',
|
||||
title=f'Payday Shopping Pattern Detected',
|
||||
description=f'Sales increase {pct_diff:.0f}% during payday periods (15th and month-end). Average {payday_avg:.1f} vs {regular_avg:.1f} units.',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'payday_avg': float(payday_avg),
|
||||
'regular_avg': float(regular_avg),
|
||||
'difference_pct': float(pct_diff)
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Increase Payday Stock', 'action': 'adjust_payday_production'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_hourly_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect hourly sales patterns (if POS data available)."""
|
||||
insights = []
|
||||
|
||||
if 'hour' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
hourly_avg = sales_data.groupby('hour')['quantity'].agg(['mean', 'count']).reset_index()
|
||||
hourly_avg = hourly_avg[hourly_avg['count'] >= 3] # At least 3 observations
|
||||
|
||||
if len(hourly_avg) < 3:
|
||||
return insights
|
||||
|
||||
overall_avg = sales_data['quantity'].mean()
|
||||
|
||||
# Find peak hours (top 3)
|
||||
top_hours = hourly_avg.nlargest(3, 'mean')
|
||||
|
||||
for _, row in top_hours.iterrows():
|
||||
hour_avg = row['mean']
|
||||
pct_diff = ((hour_avg - overall_avg) / overall_avg) * 100
|
||||
|
||||
if pct_diff > self.significance_threshold * 100:
|
||||
confidence = self._calculate_pattern_confidence(
|
||||
sample_size=int(row['count']),
|
||||
effect_size=pct_diff / 100,
|
||||
variability=sales_data['quantity'].std()
|
||||
)
|
||||
|
||||
if confidence >= min_confidence:
|
||||
hour = int(row['hour'])
|
||||
time_label = f"{hour:02d}:00-{(hour+1):02d}:00"
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='pattern',
|
||||
category='sales',
|
||||
priority='low',
|
||||
title=f'Peak Sales Hour: {time_label}',
|
||||
description=f'Sales peak during {time_label} with {hour_avg:.1f} units ({pct_diff:.0f}% above average).',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'peak_hour': hour,
|
||||
'avg_sales': float(hour_avg),
|
||||
'overall_avg': float(overall_avg),
|
||||
'difference_pct': float(pct_diff)
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Ensure Fresh Stock', 'action': 'schedule_production'},
|
||||
{'label': 'Increase Staffing', 'action': 'adjust_staffing'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_weather_correlations(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect weather-sales correlations."""
|
||||
insights = []
|
||||
|
||||
if 'temperature' not in sales_data.columns or 'quantity' not in sales_data.columns:
|
||||
return insights
|
||||
|
||||
# Remove NaN values
|
||||
clean_data = sales_data[['temperature', 'quantity']].dropna()
|
||||
|
||||
if len(clean_data) < 30: # Need sufficient data
|
||||
return insights
|
||||
|
||||
# Calculate correlation
|
||||
correlation, p_value = stats.pearsonr(clean_data['temperature'], clean_data['quantity'])
|
||||
|
||||
if abs(correlation) > 0.3 and p_value < 0.05: # Moderate correlation and significant
|
||||
confidence = self._calculate_correlation_confidence(correlation, p_value, len(clean_data))
|
||||
|
||||
if confidence >= min_confidence:
|
||||
direction = 'increase' if correlation > 0 else 'decrease'
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='insight',
|
||||
category='forecasting',
|
||||
priority='medium' if abs(correlation) > 0.5 else 'low',
|
||||
title=f'Temperature Impact on Sales: {abs(correlation):.0%} Correlation',
|
||||
description=f'Sales {direction} with temperature (correlation: {correlation:.2f}). {"Warmer" if correlation > 0 else "Colder"} weather associated with {"higher" if correlation > 0 else "lower"} sales.',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'correlation': float(correlation),
|
||||
'p_value': float(p_value),
|
||||
'sample_size': len(clean_data),
|
||||
'direction': direction
|
||||
},
|
||||
actionable=False
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
async def _detect_trends(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
min_confidence: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Detect overall trends (growing, declining, stable)."""
|
||||
insights = []
|
||||
|
||||
if 'date' not in sales_data.columns or 'quantity' not in sales_data.columns or len(sales_data) < 60:
|
||||
return insights
|
||||
|
||||
# Sort by date
|
||||
sales_data = sales_data.sort_values('date')
|
||||
|
||||
# Calculate 30-day rolling average
|
||||
sales_data['rolling_30d'] = sales_data['quantity'].rolling(window=30, min_periods=15).mean()
|
||||
|
||||
# Compare first and last 30-day averages
|
||||
first_30_avg = sales_data['rolling_30d'].iloc[:30].mean()
|
||||
last_30_avg = sales_data['rolling_30d'].iloc[-30:].mean()
|
||||
|
||||
if pd.isna(first_30_avg) or pd.isna(last_30_avg):
|
||||
return insights
|
||||
|
||||
pct_change = ((last_30_avg - first_30_avg) / first_30_avg) * 100
|
||||
|
||||
if abs(pct_change) > 10: # 10% change is significant
|
||||
confidence = min(95, 70 + int(abs(pct_change))) # Higher change = higher confidence
|
||||
|
||||
trend_type = 'growing' if pct_change > 0 else 'declining'
|
||||
|
||||
insight = self._create_insight(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insight_type='prediction',
|
||||
category='forecasting',
|
||||
priority='high' if abs(pct_change) > 20 else 'medium',
|
||||
title=f'Sales Trend: {trend_type.title()} {abs(pct_change):.0f}%',
|
||||
description=f'Sales show a {trend_type} trend over the period. Current 30-day average: {last_30_avg:.1f} vs earlier: {first_30_avg:.1f} ({pct_change:+.0f}%).',
|
||||
confidence=confidence,
|
||||
metrics={
|
||||
'current_avg': float(last_30_avg),
|
||||
'previous_avg': float(first_30_avg),
|
||||
'change_pct': float(pct_change),
|
||||
'trend': trend_type
|
||||
},
|
||||
actionable=True,
|
||||
actions=[
|
||||
{'label': 'Adjust Forecast Model', 'action': 'update_forecast'},
|
||||
{'label': 'Review Capacity', 'action': 'review_production_capacity'}
|
||||
]
|
||||
)
|
||||
insights.append(insight)
|
||||
|
||||
return insights
|
||||
|
||||
def _calculate_pattern_confidence(
|
||||
self,
|
||||
sample_size: int,
|
||||
effect_size: float,
|
||||
variability: float
|
||||
) -> int:
|
||||
"""
|
||||
Calculate confidence score for detected pattern.
|
||||
|
||||
Args:
|
||||
sample_size: Number of observations
|
||||
effect_size: Size of the effect (e.g., 0.25 for 25% difference)
|
||||
variability: Standard deviation of data
|
||||
|
||||
Returns:
|
||||
Confidence score (0-100)
|
||||
"""
|
||||
# Base confidence from sample size
|
||||
if sample_size < 4:
|
||||
base = 50
|
||||
elif sample_size < 10:
|
||||
base = 65
|
||||
elif sample_size < 30:
|
||||
base = 75
|
||||
elif sample_size < 100:
|
||||
base = 85
|
||||
else:
|
||||
base = 90
|
||||
|
||||
# Adjust for effect size
|
||||
effect_boost = min(15, effect_size * 30)
|
||||
|
||||
# Adjust for variability (penalize high variability)
|
||||
variability_penalty = min(10, variability / 10)
|
||||
|
||||
confidence = base + effect_boost - variability_penalty
|
||||
|
||||
return int(max(0, min(100, confidence)))
|
||||
|
||||
def _calculate_correlation_confidence(
|
||||
self,
|
||||
correlation: float,
|
||||
p_value: float,
|
||||
sample_size: int
|
||||
) -> int:
|
||||
"""Calculate confidence for correlation insights."""
|
||||
# Base confidence from correlation strength
|
||||
base = abs(correlation) * 100
|
||||
|
||||
# Boost for significance
|
||||
if p_value < 0.001:
|
||||
significance_boost = 15
|
||||
elif p_value < 0.01:
|
||||
significance_boost = 10
|
||||
elif p_value < 0.05:
|
||||
significance_boost = 5
|
||||
else:
|
||||
significance_boost = 0
|
||||
|
||||
# Boost for sample size
|
||||
if sample_size > 100:
|
||||
sample_boost = 10
|
||||
elif sample_size > 50:
|
||||
sample_boost = 5
|
||||
else:
|
||||
sample_boost = 0
|
||||
|
||||
confidence = base + significance_boost + sample_boost
|
||||
|
||||
return int(max(0, min(100, confidence)))
|
||||
|
||||
def _create_insight(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
insight_type: str,
|
||||
category: str,
|
||||
priority: str,
|
||||
title: str,
|
||||
description: str,
|
||||
confidence: int,
|
||||
metrics: Dict[str, Any],
|
||||
actionable: bool,
|
||||
actions: List[Dict[str, str]] = None,
|
||||
impact_type: str = None,
|
||||
impact_value: float = None,
|
||||
impact_unit: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Create an insight dictionary for AI Insights Service."""
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'type': insight_type,
|
||||
'priority': priority,
|
||||
'category': category,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'impact_type': impact_type,
|
||||
'impact_value': impact_value,
|
||||
'impact_unit': impact_unit,
|
||||
'confidence': confidence,
|
||||
'metrics_json': metrics,
|
||||
'actionable': actionable,
|
||||
'recommendation_actions': actions or [],
|
||||
'source_service': 'forecasting',
|
||||
'source_data_id': f'pattern_detection_{inventory_product_id}_{datetime.utcnow().strftime("%Y%m%d")}'
|
||||
}
|
||||
854
services/forecasting/app/ml/predictor.py
Normal file
854
services/forecasting/app/ml/predictor.py
Normal file
@@ -0,0 +1,854 @@
|
||||
# ================================================================
|
||||
# services/forecasting/app/ml/predictor.py
|
||||
# ================================================================
|
||||
"""
|
||||
Enhanced predictor module with advanced forecasting capabilities
|
||||
"""
|
||||
|
||||
import structlog
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime, date, timedelta
|
||||
import pickle
|
||||
import json
|
||||
|
||||
from app.core.config import settings
|
||||
from shared.monitoring.metrics import MetricsCollector
|
||||
from shared.database.base import create_database_manager
|
||||
|
||||
logger = structlog.get_logger()
|
||||
metrics = MetricsCollector("forecasting-service")
|
||||
|
||||
class BakeryPredictor:
|
||||
"""
|
||||
Advanced predictor for bakery demand forecasting with dependency injection
|
||||
Handles Prophet models and business-specific logic
|
||||
"""
|
||||
|
||||
def __init__(self, database_manager=None, use_dynamic_rules=True):
|
||||
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
|
||||
self.model_cache = {}
|
||||
self.use_dynamic_rules = use_dynamic_rules
|
||||
|
||||
if use_dynamic_rules:
|
||||
try:
|
||||
from app.ml.dynamic_rules_engine import DynamicRulesEngine
|
||||
from shared.clients.ai_insights_client import AIInsightsClient
|
||||
self.rules_engine = DynamicRulesEngine()
|
||||
self.ai_insights_client = AIInsightsClient(
|
||||
base_url=settings.AI_INSIGHTS_SERVICE_URL or "http://ai-insights-service:8000"
|
||||
)
|
||||
# Also provide business_rules for consistency
|
||||
self.business_rules = BakeryBusinessRules(
|
||||
use_dynamic_rules=True,
|
||||
ai_insights_client=self.ai_insights_client
|
||||
)
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to import dynamic rules engine: {e}. Falling back to basic business rules.")
|
||||
self.use_dynamic_rules = False
|
||||
self.business_rules = BakeryBusinessRules()
|
||||
else:
|
||||
self.business_rules = BakeryBusinessRules()
|
||||
|
||||
class BakeryForecaster:
|
||||
"""
|
||||
Enhanced forecaster that integrates with repository pattern
|
||||
Uses enhanced features from training service for predictions
|
||||
"""
|
||||
|
||||
def __init__(self, database_manager=None, use_enhanced_features=True):
|
||||
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "forecasting-service")
|
||||
self.predictor = BakeryPredictor(database_manager)
|
||||
self.use_enhanced_features = use_enhanced_features
|
||||
|
||||
# Initialize business rules - this was missing! This fixes the AttributeError
|
||||
self.business_rules = BakeryBusinessRules(use_dynamic_rules=True, ai_insights_client=self.predictor.ai_insights_client if hasattr(self.predictor, 'ai_insights_client') else None)
|
||||
|
||||
# Initialize POI feature service
|
||||
from app.services.poi_feature_service import POIFeatureService
|
||||
self.poi_feature_service = POIFeatureService()
|
||||
|
||||
# Initialize enhanced data processor from shared module
|
||||
if use_enhanced_features:
|
||||
try:
|
||||
from shared.ml.data_processor import EnhancedBakeryDataProcessor
|
||||
self.data_processor = EnhancedBakeryDataProcessor(region='MD')
|
||||
logger.info("Enhanced features enabled using shared data processor")
|
||||
except ImportError as e:
|
||||
logger.warning(
|
||||
f"Could not import EnhancedBakeryDataProcessor from shared module: {e}. "
|
||||
"Falling back to basic features."
|
||||
)
|
||||
self.use_enhanced_features = False
|
||||
self.data_processor = None
|
||||
else:
|
||||
self.data_processor = None
|
||||
|
||||
|
||||
async def predict_demand(self, model, features: Dict[str, Any],
|
||||
business_type: str = "individual") -> Dict[str, float]:
|
||||
"""Generate demand prediction with business rules applied"""
|
||||
|
||||
try:
|
||||
# Generate base prediction
|
||||
base_prediction = await self._generate_base_prediction(model, features)
|
||||
|
||||
# Apply business rules
|
||||
adjusted_prediction = self.business_rules.apply_rules(
|
||||
base_prediction, features, business_type
|
||||
)
|
||||
|
||||
# Add uncertainty estimation
|
||||
final_prediction = self._add_uncertainty_bands(adjusted_prediction, features)
|
||||
|
||||
return final_prediction
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error in demand prediction", error=str(e))
|
||||
raise
|
||||
|
||||
async def _generate_base_prediction(self, model, features: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""Generate base prediction from Prophet model"""
|
||||
|
||||
try:
|
||||
# Convert features to Prophet DataFrame
|
||||
df = self._prepare_prophet_dataframe(features)
|
||||
|
||||
# Generate forecast
|
||||
forecast = model.predict(df)
|
||||
|
||||
if len(forecast) > 0:
|
||||
row = forecast.iloc[0]
|
||||
return {
|
||||
"yhat": float(row['yhat']),
|
||||
"yhat_lower": float(row['yhat_lower']),
|
||||
"yhat_upper": float(row['yhat_upper']),
|
||||
"trend": float(row.get('trend', 0)),
|
||||
"seasonal": float(row.get('seasonal', 0)),
|
||||
"weekly": float(row.get('weekly', 0)),
|
||||
"yearly": float(row.get('yearly', 0)),
|
||||
"holidays": float(row.get('holidays', 0))
|
||||
}
|
||||
else:
|
||||
raise ValueError("No prediction generated from model")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating base prediction", error=str(e))
|
||||
raise
|
||||
|
||||
async def _prepare_prophet_dataframe(self, features: Dict[str, Any],
|
||||
historical_data: pd.DataFrame = None) -> pd.DataFrame:
|
||||
"""
|
||||
Convert features to Prophet-compatible DataFrame.
|
||||
Uses enhanced features when available (60+ features vs basic 10).
|
||||
"""
|
||||
|
||||
try:
|
||||
if self.use_enhanced_features and self.data_processor:
|
||||
# Use enhanced data processor from training service
|
||||
logger.info("Generating enhanced features for prediction")
|
||||
|
||||
# Create future date range
|
||||
future_dates = pd.DatetimeIndex([pd.to_datetime(features['date'])])
|
||||
|
||||
# Prepare weather forecast DataFrame
|
||||
weather_df = pd.DataFrame({
|
||||
'date': [pd.to_datetime(features['date'])],
|
||||
'temperature': [features.get('temperature', 15.0)],
|
||||
'precipitation': [features.get('precipitation', 0.0)],
|
||||
'humidity': [features.get('humidity', 60.0)],
|
||||
'wind_speed': [features.get('wind_speed', 5.0)],
|
||||
'pressure': [features.get('pressure', 1013.0)]
|
||||
})
|
||||
|
||||
# Fetch POI features if tenant_id is available
|
||||
poi_features = None
|
||||
if 'tenant_id' in features:
|
||||
poi_features = await self.poi_feature_service.get_poi_features(
|
||||
features['tenant_id']
|
||||
)
|
||||
if poi_features:
|
||||
logger.info(
|
||||
f"Retrieved {len(poi_features)} POI features for prediction",
|
||||
tenant_id=features['tenant_id']
|
||||
)
|
||||
|
||||
# Use data processor to create ALL enhanced features
|
||||
df = await self.data_processor.prepare_prediction_features(
|
||||
future_dates=future_dates,
|
||||
weather_forecast=weather_df,
|
||||
traffic_forecast=None, # Will add when traffic forecasting is implemented
|
||||
poi_features=poi_features, # POI features for location-based forecasting
|
||||
historical_data=historical_data # For lagged features
|
||||
)
|
||||
|
||||
logger.info(f"Generated {len(df.columns)} enhanced features for prediction")
|
||||
return df
|
||||
|
||||
else:
|
||||
# Fallback to basic features
|
||||
logger.info("Using basic features for prediction")
|
||||
|
||||
# Create base DataFrame
|
||||
df = pd.DataFrame({
|
||||
'ds': [pd.to_datetime(features['date'])]
|
||||
})
|
||||
|
||||
# Add regressor features
|
||||
feature_mapping = {
|
||||
'temperature': 'temperature',
|
||||
'precipitation': 'precipitation',
|
||||
'humidity': 'humidity',
|
||||
'wind_speed': 'wind_speed',
|
||||
'traffic_volume': 'traffic_volume',
|
||||
'pedestrian_count': 'pedestrian_count'
|
||||
}
|
||||
|
||||
for feature_key, df_column in feature_mapping.items():
|
||||
if feature_key in features and features[feature_key] is not None:
|
||||
df[df_column] = float(features[feature_key])
|
||||
else:
|
||||
df[df_column] = 0.0
|
||||
|
||||
# Add categorical features
|
||||
df['day_of_week'] = int(features.get('day_of_week', 0))
|
||||
df['is_weekend'] = int(features.get('is_weekend', False))
|
||||
df['is_holiday'] = int(features.get('is_holiday', False))
|
||||
|
||||
# Business type
|
||||
business_type = features.get('business_type', 'individual')
|
||||
df['is_central_workshop'] = int(business_type == 'central_workshop')
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error preparing Prophet dataframe: {e}, falling back to basic features")
|
||||
# Fallback to basic implementation on error
|
||||
df = pd.DataFrame({'ds': [pd.to_datetime(features['date'])]})
|
||||
df['temperature'] = features.get('temperature', 15.0)
|
||||
df['precipitation'] = features.get('precipitation', 0.0)
|
||||
df['is_weekend'] = int(features.get('is_weekend', False))
|
||||
df['is_holiday'] = int(features.get('is_holiday', False))
|
||||
return df
|
||||
|
||||
def _add_uncertainty_bands(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""Add uncertainty estimation based on external factors"""
|
||||
|
||||
try:
|
||||
base_demand = prediction["yhat"]
|
||||
base_lower = prediction["yhat_lower"]
|
||||
base_upper = prediction["yhat_upper"]
|
||||
|
||||
# Weather uncertainty
|
||||
weather_uncertainty = self._calculate_weather_uncertainty(features)
|
||||
|
||||
# Holiday uncertainty
|
||||
holiday_uncertainty = self._calculate_holiday_uncertainty(features)
|
||||
|
||||
# Weekend uncertainty
|
||||
weekend_uncertainty = self._calculate_weekend_uncertainty(features)
|
||||
|
||||
# Total uncertainty factor
|
||||
total_uncertainty = 1.0 + weather_uncertainty + holiday_uncertainty + weekend_uncertainty
|
||||
|
||||
# Adjust bounds
|
||||
uncertainty_range = (base_upper - base_lower) * total_uncertainty
|
||||
center_point = base_demand
|
||||
|
||||
adjusted_lower = center_point - (uncertainty_range / 2)
|
||||
adjusted_upper = center_point + (uncertainty_range / 2)
|
||||
|
||||
return {
|
||||
"demand": max(0, base_demand), # Never predict negative demand
|
||||
"lower_bound": max(0, adjusted_lower),
|
||||
"upper_bound": adjusted_upper,
|
||||
"uncertainty_factor": total_uncertainty,
|
||||
"trend": prediction.get("trend", 0),
|
||||
"seasonal": prediction.get("seasonal", 0),
|
||||
"holiday_effect": prediction.get("holidays", 0)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error adding uncertainty bands", error=str(e))
|
||||
# Return basic prediction if uncertainty calculation fails
|
||||
return {
|
||||
"demand": max(0, prediction["yhat"]),
|
||||
"lower_bound": max(0, prediction["yhat_lower"]),
|
||||
"upper_bound": prediction["yhat_upper"],
|
||||
"uncertainty_factor": 1.0
|
||||
}
|
||||
|
||||
def _calculate_weather_uncertainty(self, features: Dict[str, Any]) -> float:
|
||||
"""Calculate weather-based uncertainty"""
|
||||
|
||||
uncertainty = 0.0
|
||||
|
||||
# Temperature extremes add uncertainty
|
||||
temp = features.get('temperature')
|
||||
if temp is not None:
|
||||
if temp < settings.TEMPERATURE_THRESHOLD_COLD or temp > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
uncertainty += 0.1
|
||||
|
||||
# Rain adds uncertainty
|
||||
precipitation = features.get('precipitation')
|
||||
if precipitation is not None and precipitation > 0:
|
||||
uncertainty += 0.05 * min(precipitation, 10) # Cap at 50mm
|
||||
|
||||
return uncertainty
|
||||
|
||||
def _calculate_holiday_uncertainty(self, features: Dict[str, Any]) -> float:
|
||||
"""Calculate holiday-based uncertainty"""
|
||||
|
||||
if features.get('is_holiday', False):
|
||||
return 0.2 # 20% additional uncertainty on holidays
|
||||
return 0.0
|
||||
|
||||
def _calculate_weekend_uncertainty(self, features: Dict[str, Any]) -> float:
|
||||
"""Calculate weekend-based uncertainty"""
|
||||
|
||||
if features.get('is_weekend', False):
|
||||
return 0.1 # 10% additional uncertainty on weekends
|
||||
return 0.0
|
||||
|
||||
async def analyze_demand_patterns(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
forecast_horizon_days: int = 30,
|
||||
min_history_days: int = 90
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze demand patterns by delegating to the sales service.
|
||||
|
||||
NOTE: Sales data analysis is the responsibility of the sales service.
|
||||
This method calls the sales service API to get demand pattern analysis.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Historical sales DataFrame (not used - kept for backward compatibility)
|
||||
forecast_horizon_days: Days to forecast ahead (not used currently)
|
||||
min_history_days: Minimum history required
|
||||
|
||||
Returns:
|
||||
Analysis results with patterns, trends, and insights from sales service
|
||||
"""
|
||||
try:
|
||||
from shared.clients.sales_client import SalesServiceClient
|
||||
from datetime import date, timedelta
|
||||
|
||||
logger.info(
|
||||
"Requesting demand pattern analysis from sales service",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id
|
||||
)
|
||||
|
||||
# Initialize sales client
|
||||
sales_client = SalesServiceClient(config=settings, calling_service_name="forecasting")
|
||||
|
||||
# Calculate date range
|
||||
end_date = date.today()
|
||||
start_date = end_date - timedelta(days=min_history_days)
|
||||
|
||||
# Call sales service for pattern analysis
|
||||
patterns = await sales_client.get_product_demand_patterns(
|
||||
tenant_id=tenant_id,
|
||||
product_id=inventory_product_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
min_history_days=min_history_days
|
||||
)
|
||||
|
||||
# Generate insights from patterns
|
||||
insights = self._generate_insights_from_patterns(
|
||||
patterns,
|
||||
tenant_id,
|
||||
inventory_product_id
|
||||
)
|
||||
|
||||
# Add insights to the result
|
||||
patterns['insights'] = insights
|
||||
|
||||
logger.info(
|
||||
"Demand pattern analysis received from sales service",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
insights_generated=len(insights)
|
||||
)
|
||||
|
||||
return patterns
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error getting demand patterns from sales service",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
return {
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'history_days': 0,
|
||||
'insights': [],
|
||||
'patterns': {},
|
||||
'trend_analysis': {},
|
||||
'seasonal_factors': {},
|
||||
'statistics': {},
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _generate_insights_from_patterns(
|
||||
self,
|
||||
patterns: Dict[str, Any],
|
||||
tenant_id: str,
|
||||
inventory_product_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate actionable insights from demand patterns provided by sales service.
|
||||
|
||||
Args:
|
||||
patterns: Demand patterns from sales service
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
List of insights for AI Insights Service
|
||||
"""
|
||||
insights = []
|
||||
|
||||
# Check if there was an error in pattern analysis
|
||||
if 'error' in patterns:
|
||||
return insights
|
||||
|
||||
trend = patterns.get('trend_analysis', {})
|
||||
stats = patterns.get('statistics', {})
|
||||
seasonal = patterns.get('seasonal_factors', {})
|
||||
|
||||
# Trend insight
|
||||
if trend.get('is_increasing'):
|
||||
insights.append({
|
||||
'type': 'insight',
|
||||
'priority': 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': 'Increasing Demand Trend Detected',
|
||||
'description': f"Product shows {trend.get('direction', 'increasing')} demand trend. Consider increasing inventory levels.",
|
||||
'impact_type': 'demand_increase',
|
||||
'impact_value': abs(trend.get('correlation', 0) * 100),
|
||||
'impact_unit': 'percent',
|
||||
'confidence': min(int(abs(trend.get('correlation', 0)) * 100), 95),
|
||||
'metrics_json': trend,
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Increase Safety Stock',
|
||||
'action': 'increase_safety_stock',
|
||||
'params': {'product_id': inventory_product_id, 'factor': 1.2}
|
||||
}
|
||||
]
|
||||
})
|
||||
elif trend.get('is_decreasing'):
|
||||
insights.append({
|
||||
'type': 'insight',
|
||||
'priority': 'low',
|
||||
'category': 'forecasting',
|
||||
'title': 'Decreasing Demand Trend Detected',
|
||||
'description': f"Product shows {trend.get('direction', 'decreasing')} demand trend. Consider reviewing inventory strategy.",
|
||||
'impact_type': 'demand_decrease',
|
||||
'impact_value': abs(trend.get('correlation', 0) * 100),
|
||||
'impact_unit': 'percent',
|
||||
'confidence': min(int(abs(trend.get('correlation', 0)) * 100), 95),
|
||||
'metrics_json': trend,
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Review Inventory Levels',
|
||||
'action': 'review_inventory',
|
||||
'params': {'product_id': inventory_product_id}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
# Volatility insight
|
||||
cv = stats.get('coefficient_of_variation', 0)
|
||||
if cv > 0.5:
|
||||
insights.append({
|
||||
'type': 'alert',
|
||||
'priority': 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': 'High Demand Variability Detected',
|
||||
'description': f'Product has high demand variability (CV: {cv:.2f}). Consider dynamic safety stock levels.',
|
||||
'impact_type': 'demand_variability',
|
||||
'impact_value': round(cv * 100, 1),
|
||||
'impact_unit': 'percent',
|
||||
'confidence': 85,
|
||||
'metrics_json': stats,
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Enable Dynamic Safety Stock',
|
||||
'action': 'enable_dynamic_safety_stock',
|
||||
'params': {'product_id': inventory_product_id}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
# Seasonal pattern insight
|
||||
peak_ratio = seasonal.get('peak_ratio', 1.0)
|
||||
if peak_ratio > 1.5:
|
||||
pattern_data = patterns.get('patterns', {})
|
||||
peak_day = pattern_data.get('peak_day', 0)
|
||||
low_day = pattern_data.get('low_day', 0)
|
||||
insights.append({
|
||||
'type': 'insight',
|
||||
'priority': 'medium',
|
||||
'category': 'forecasting',
|
||||
'title': 'Strong Weekly Pattern Detected',
|
||||
'description': f'Demand is {peak_ratio:.1f}x higher on day {peak_day} compared to day {low_day}. Adjust production schedule accordingly.',
|
||||
'impact_type': 'seasonal_pattern',
|
||||
'impact_value': round((peak_ratio - 1) * 100, 1),
|
||||
'impact_unit': 'percent',
|
||||
'confidence': 80,
|
||||
'metrics_json': {**seasonal, **pattern_data},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Adjust Production Schedule',
|
||||
'action': 'adjust_production',
|
||||
'params': {'product_id': inventory_product_id, 'pattern': 'weekly'}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
return insights
|
||||
|
||||
async def _get_dynamic_rules(self, tenant_id: str, inventory_product_id: str, rule_type: str) -> Dict[str, float]:
|
||||
"""
|
||||
Fetch learned dynamic rules from AI Insights Service.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
inventory_product_id: Product UUID
|
||||
rule_type: Type of rules (weather, temporal, holiday, etc.)
|
||||
|
||||
Returns:
|
||||
Dictionary of learned rules with factors
|
||||
"""
|
||||
try:
|
||||
from uuid import UUID
|
||||
|
||||
# Fetch latest rules insight for this product
|
||||
insights = await self.ai_insights_client.get_insights(
|
||||
tenant_id=UUID(tenant_id),
|
||||
filters={
|
||||
'category': 'forecasting',
|
||||
'actionable_only': False,
|
||||
'page_size': 100
|
||||
}
|
||||
)
|
||||
|
||||
if not insights or 'items' not in insights:
|
||||
return {}
|
||||
|
||||
# Find the most recent rules insight for this product
|
||||
for insight in insights['items']:
|
||||
if insight.get('source_model') == 'dynamic_rules_engine':
|
||||
metrics = insight.get('metrics_json', {})
|
||||
if metrics.get('inventory_product_id') == inventory_product_id:
|
||||
rules_data = metrics.get('rules', {})
|
||||
return rules_data.get(rule_type, {})
|
||||
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch dynamic rules: {e}")
|
||||
return {}
|
||||
|
||||
async def generate_forecast_with_repository(self, tenant_id: str, inventory_product_id: str,
|
||||
forecast_date: date, model_id: str = None) -> Dict[str, Any]:
|
||||
"""Generate forecast with repository integration"""
|
||||
try:
|
||||
# This would integrate with repositories for model loading and caching
|
||||
# For now, we'll implement basic forecasting logic using the forecaster's methods
|
||||
# This is a simplified approach - in production, this would use repositories
|
||||
|
||||
# For now, prepare minimal features for prediction
|
||||
features = {
|
||||
'date': forecast_date.isoformat(),
|
||||
'day_of_week': forecast_date.weekday(),
|
||||
'is_weekend': 1 if forecast_date.weekday() >= 5 else 0,
|
||||
'is_holiday': 0, # Would come from calendar service in real implementation
|
||||
# Add default weather values if needed
|
||||
'temperature': 20.0,
|
||||
'precipitation': 0.0,
|
||||
}
|
||||
|
||||
# This is a placeholder - in a full implementation, we would:
|
||||
# 1. Load the appropriate model from repository
|
||||
# 2. Use historical data to make prediction
|
||||
# 3. Apply business rules
|
||||
# For now, return the structure with basic info
|
||||
|
||||
# For more realistic implementation, we'd use self.predict_demand method
|
||||
# but that requires a model object which needs to be loaded
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"forecast_date": forecast_date.isoformat(),
|
||||
"prediction": 10.0, # Placeholder value - in reality would be calculated
|
||||
"confidence_interval": {"lower": 8.0, "upper": 12.0}, # Placeholder values
|
||||
"status": "completed",
|
||||
"repository_integration": True,
|
||||
"forecast_method": "placeholder"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Forecast generation failed", error=str(e))
|
||||
raise
|
||||
|
||||
|
||||
class BakeryBusinessRules:
|
||||
"""
|
||||
Business rules for Spanish bakeries
|
||||
Applies domain-specific adjustments to predictions
|
||||
Supports both dynamic learned rules and hardcoded fallbacks
|
||||
"""
|
||||
|
||||
def __init__(self, use_dynamic_rules=False, ai_insights_client=None):
|
||||
self.use_dynamic_rules = use_dynamic_rules
|
||||
self.ai_insights_client = ai_insights_client
|
||||
self.rules_cache = {}
|
||||
|
||||
async def apply_rules(self, prediction: Dict[str, float], features: Dict[str, Any],
|
||||
business_type: str, tenant_id: str = None, inventory_product_id: str = None) -> Dict[str, float]:
|
||||
"""Apply all business rules to prediction (dynamic or hardcoded)"""
|
||||
|
||||
adjusted_prediction = prediction.copy()
|
||||
|
||||
# Apply weather rules
|
||||
adjusted_prediction = await self._apply_weather_rules(
|
||||
adjusted_prediction, features, tenant_id, inventory_product_id
|
||||
)
|
||||
|
||||
# Apply time-based rules
|
||||
adjusted_prediction = await self._apply_time_rules(
|
||||
adjusted_prediction, features, tenant_id, inventory_product_id
|
||||
)
|
||||
|
||||
# Apply business type rules
|
||||
adjusted_prediction = self._apply_business_type_rules(adjusted_prediction, business_type)
|
||||
|
||||
# Apply Spanish-specific rules
|
||||
adjusted_prediction = self._apply_spanish_rules(adjusted_prediction, features)
|
||||
|
||||
return adjusted_prediction
|
||||
|
||||
async def _get_dynamic_rules(self, tenant_id: str, inventory_product_id: str, rule_type: str) -> Dict[str, float]:
|
||||
"""
|
||||
Fetch learned dynamic rules from AI Insights Service.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant UUID
|
||||
inventory_product_id: Product UUID
|
||||
rule_type: Type of rules (weather, temporal, holiday, etc.)
|
||||
|
||||
Returns:
|
||||
Dictionary of learned rules with factors
|
||||
"""
|
||||
# Check cache first
|
||||
cache_key = f"{tenant_id}:{inventory_product_id}:{rule_type}"
|
||||
if cache_key in self.rules_cache:
|
||||
return self.rules_cache[cache_key]
|
||||
|
||||
try:
|
||||
from uuid import UUID
|
||||
|
||||
if not self.ai_insights_client:
|
||||
return {}
|
||||
|
||||
# Fetch latest rules insight for this product
|
||||
insights = await self.ai_insights_client.get_insights(
|
||||
tenant_id=UUID(tenant_id),
|
||||
filters={
|
||||
'category': 'forecasting',
|
||||
'actionable_only': False,
|
||||
'page_size': 100
|
||||
}
|
||||
)
|
||||
|
||||
if not insights or 'items' not in insights:
|
||||
return {}
|
||||
|
||||
# Find the most recent rules insight for this product
|
||||
for insight in insights['items']:
|
||||
if insight.get('source_model') == 'dynamic_rules_engine':
|
||||
metrics = insight.get('metrics_json', {})
|
||||
if metrics.get('inventory_product_id') == inventory_product_id:
|
||||
rules_data = metrics.get('rules', {})
|
||||
result = rules_data.get(rule_type, {})
|
||||
# Cache the result
|
||||
self.rules_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch dynamic rules: {e}")
|
||||
return {}
|
||||
|
||||
async def _apply_weather_rules(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any],
|
||||
tenant_id: str = None,
|
||||
inventory_product_id: str = None) -> Dict[str, float]:
|
||||
"""Apply weather-based business rules (dynamic or hardcoded fallback)"""
|
||||
|
||||
if self.use_dynamic_rules and tenant_id and inventory_product_id:
|
||||
try:
|
||||
# Fetch dynamic weather rules
|
||||
rules = await self._get_dynamic_rules(tenant_id, inventory_product_id, 'weather')
|
||||
|
||||
# Apply learned rain impact
|
||||
precipitation = features.get('precipitation', 0)
|
||||
if precipitation > 0:
|
||||
rain_factor = rules.get('rain_factor', settings.RAIN_IMPACT_FACTOR)
|
||||
prediction["yhat"] *= rain_factor
|
||||
prediction["yhat_lower"] *= rain_factor
|
||||
prediction["yhat_upper"] *= rain_factor
|
||||
|
||||
# Apply learned temperature impact
|
||||
temperature = features.get('temperature')
|
||||
if temperature is not None:
|
||||
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
hot_factor = rules.get('temperature_hot_factor', 0.9)
|
||||
prediction["yhat"] *= hot_factor
|
||||
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
|
||||
cold_factor = rules.get('temperature_cold_factor', 1.1)
|
||||
prediction["yhat"] *= cold_factor
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to apply dynamic weather rules, using fallback: {e}")
|
||||
# Fallback to hardcoded
|
||||
precipitation = features.get('precipitation', 0)
|
||||
if precipitation > 0:
|
||||
prediction["yhat"] *= settings.RAIN_IMPACT_FACTOR
|
||||
prediction["yhat_lower"] *= settings.RAIN_IMPACT_FACTOR
|
||||
prediction["yhat_upper"] *= settings.RAIN_IMPACT_FACTOR
|
||||
|
||||
temperature = features.get('temperature')
|
||||
if temperature is not None:
|
||||
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
prediction["yhat"] *= 0.9
|
||||
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
|
||||
prediction["yhat"] *= 1.1
|
||||
else:
|
||||
# Use hardcoded rules
|
||||
precipitation = features.get('precipitation', 0)
|
||||
if precipitation > 0:
|
||||
rain_factor = settings.RAIN_IMPACT_FACTOR
|
||||
prediction["yhat"] *= rain_factor
|
||||
prediction["yhat_lower"] *= rain_factor
|
||||
prediction["yhat_upper"] *= rain_factor
|
||||
|
||||
temperature = features.get('temperature')
|
||||
if temperature is not None:
|
||||
if temperature > settings.TEMPERATURE_THRESHOLD_HOT:
|
||||
prediction["yhat"] *= 0.9
|
||||
elif temperature < settings.TEMPERATURE_THRESHOLD_COLD:
|
||||
prediction["yhat"] *= 1.1
|
||||
|
||||
return prediction
|
||||
|
||||
async def _apply_time_rules(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any],
|
||||
tenant_id: str = None,
|
||||
inventory_product_id: str = None) -> Dict[str, float]:
|
||||
"""Apply time-based business rules (dynamic or hardcoded fallback)"""
|
||||
|
||||
if self.use_dynamic_rules and tenant_id and inventory_product_id:
|
||||
try:
|
||||
# Fetch dynamic temporal rules
|
||||
rules = await self._get_dynamic_rules(tenant_id, inventory_product_id, 'temporal')
|
||||
|
||||
# Apply learned weekend adjustment
|
||||
if features.get('is_weekend', False):
|
||||
weekend_factor = rules.get('weekend_factor', settings.WEEKEND_ADJUSTMENT_FACTOR)
|
||||
prediction["yhat"] *= weekend_factor
|
||||
prediction["yhat_lower"] *= weekend_factor
|
||||
prediction["yhat_upper"] *= weekend_factor
|
||||
|
||||
# Apply learned holiday adjustment
|
||||
if features.get('is_holiday', False):
|
||||
holiday_factor = rules.get('holiday_factor', settings.HOLIDAY_ADJUSTMENT_FACTOR)
|
||||
prediction["yhat"] *= holiday_factor
|
||||
prediction["yhat_lower"] *= holiday_factor
|
||||
prediction["yhat_upper"] *= holiday_factor
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to apply dynamic time rules, using fallback: {e}")
|
||||
# Fallback to hardcoded
|
||||
if features.get('is_weekend', False):
|
||||
prediction["yhat"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_lower"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_upper"] *= settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
|
||||
if features.get('is_holiday', False):
|
||||
prediction["yhat"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_lower"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
prediction["yhat_upper"] *= settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
else:
|
||||
# Use hardcoded rules
|
||||
if features.get('is_weekend', False):
|
||||
weekend_factor = settings.WEEKEND_ADJUSTMENT_FACTOR
|
||||
prediction["yhat"] *= weekend_factor
|
||||
prediction["yhat_lower"] *= weekend_factor
|
||||
prediction["yhat_upper"] *= weekend_factor
|
||||
|
||||
if features.get('is_holiday', False):
|
||||
holiday_factor = settings.HOLIDAY_ADJUSTMENT_FACTOR
|
||||
prediction["yhat"] *= holiday_factor
|
||||
prediction["yhat_lower"] *= holiday_factor
|
||||
prediction["yhat_upper"] *= holiday_factor
|
||||
|
||||
return prediction
|
||||
|
||||
def _apply_business_type_rules(self, prediction: Dict[str, float],
|
||||
business_type: str) -> Dict[str, float]:
|
||||
"""Apply business type specific rules"""
|
||||
|
||||
if business_type == "central_workshop":
|
||||
# Central workshops have more stable demand
|
||||
uncertainty_reduction = 0.8
|
||||
center = prediction["yhat"]
|
||||
lower = prediction["yhat_lower"]
|
||||
upper = prediction["yhat_upper"]
|
||||
|
||||
# Reduce uncertainty band
|
||||
new_range = (upper - lower) * uncertainty_reduction
|
||||
prediction["yhat_lower"] = center - (new_range / 2)
|
||||
prediction["yhat_upper"] = center + (new_range / 2)
|
||||
|
||||
return prediction
|
||||
|
||||
def _apply_spanish_rules(self, prediction: Dict[str, float],
|
||||
features: Dict[str, Any]) -> Dict[str, float]:
|
||||
"""Apply Spanish bakery specific rules"""
|
||||
|
||||
# Spanish siesta time considerations
|
||||
date_str = features.get('date')
|
||||
if date_str:
|
||||
try:
|
||||
current_date = pd.to_datetime(date_str)
|
||||
day_of_week = current_date.weekday()
|
||||
|
||||
# Reduced activity during typical siesta hours (14:00-17:00)
|
||||
# This affects afternoon sales planning
|
||||
if day_of_week < 5: # Weekdays
|
||||
prediction["yhat"] *= 0.95 # Slight reduction for siesta effect
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing date in spanish rules: {e}")
|
||||
else:
|
||||
logger.warning("Date not provided in features, skipping Spanish rules")
|
||||
|
||||
return prediction
|
||||
312
services/forecasting/app/ml/rules_orchestrator.py
Normal file
312
services/forecasting/app/ml/rules_orchestrator.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""
|
||||
Rules Orchestrator
|
||||
Coordinates dynamic rules learning, insight posting, and integration with forecasting service
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Any, Optional
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from app.ml.dynamic_rules_engine import DynamicRulesEngine
|
||||
from app.clients.ai_insights_client import AIInsightsClient
|
||||
from shared.messaging import UnifiedEventPublisher
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class RulesOrchestrator:
|
||||
"""
|
||||
Orchestrates dynamic rules learning and insight generation workflow.
|
||||
|
||||
Workflow:
|
||||
1. Learn dynamic rules from historical data
|
||||
2. Generate insights comparing learned vs hardcoded rules
|
||||
3. Post insights to AI Insights Service
|
||||
4. Provide learned rules for forecasting integration
|
||||
5. Track rule updates and performance
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ai_insights_base_url: str = "http://ai-insights-service:8000",
|
||||
event_publisher: Optional[UnifiedEventPublisher] = None
|
||||
):
|
||||
self.rules_engine = DynamicRulesEngine()
|
||||
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
|
||||
self.event_publisher = event_publisher
|
||||
|
||||
async def learn_and_post_rules(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: Optional[pd.DataFrame] = None,
|
||||
min_samples: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Complete workflow: Learn rules and post insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Historical sales data
|
||||
external_data: Optional weather/events/holidays data
|
||||
min_samples: Minimum samples for rule learning
|
||||
|
||||
Returns:
|
||||
Workflow results with learned rules and posted insights
|
||||
"""
|
||||
logger.info(
|
||||
"Starting dynamic rules learning workflow",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id
|
||||
)
|
||||
|
||||
# Step 1: Learn all rules from data
|
||||
rules_results = await self.rules_engine.learn_all_rules(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
external_data=external_data,
|
||||
min_samples=min_samples
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Rules learning complete",
|
||||
insights_generated=len(rules_results['insights']),
|
||||
rules_learned=len(rules_results['rules'])
|
||||
)
|
||||
|
||||
# Step 2: Enrich insights with tenant_id and product context
|
||||
enriched_insights = self._enrich_insights(
|
||||
rules_results['insights'],
|
||||
tenant_id,
|
||||
inventory_product_id
|
||||
)
|
||||
|
||||
# Step 3: Post insights to AI Insights Service
|
||||
if enriched_insights:
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_insights
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Insights posted to AI Insights Service",
|
||||
total=post_results['total'],
|
||||
successful=post_results['successful'],
|
||||
failed=post_results['failed']
|
||||
)
|
||||
else:
|
||||
post_results = {'total': 0, 'successful': 0, 'failed': 0}
|
||||
logger.info("No insights to post")
|
||||
|
||||
# Step 4: Publish insight events to RabbitMQ
|
||||
created_insights = post_results.get('created_insights', [])
|
||||
if created_insights:
|
||||
product_context = {'inventory_product_id': inventory_product_id}
|
||||
await self._publish_insight_events(
|
||||
tenant_id=tenant_id,
|
||||
insights=created_insights,
|
||||
product_context=product_context
|
||||
)
|
||||
|
||||
# Step 5: Return comprehensive results
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'learned_at': rules_results['learned_at'],
|
||||
'rules': rules_results['rules'],
|
||||
'insights_generated': len(enriched_insights),
|
||||
'insights_posted': post_results['successful'],
|
||||
'insights_failed': post_results['failed'],
|
||||
'created_insights': post_results.get('created_insights', [])
|
||||
}
|
||||
|
||||
def _enrich_insights(
|
||||
self,
|
||||
insights: List[Dict[str, Any]],
|
||||
tenant_id: str,
|
||||
inventory_product_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Enrich insights with required fields for AI Insights Service.
|
||||
|
||||
Args:
|
||||
insights: Raw insights from rules engine
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Enriched insights ready for posting
|
||||
"""
|
||||
enriched = []
|
||||
|
||||
for insight in insights:
|
||||
# Add required tenant_id and product context
|
||||
enriched_insight = insight.copy()
|
||||
enriched_insight['tenant_id'] = tenant_id
|
||||
|
||||
# Add product context to metrics
|
||||
if 'metrics_json' not in enriched_insight:
|
||||
enriched_insight['metrics_json'] = {}
|
||||
|
||||
enriched_insight['metrics_json']['inventory_product_id'] = inventory_product_id
|
||||
|
||||
# Add source metadata
|
||||
enriched_insight['source_service'] = 'forecasting'
|
||||
enriched_insight['source_model'] = 'dynamic_rules_engine'
|
||||
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
enriched.append(enriched_insight)
|
||||
|
||||
return enriched
|
||||
|
||||
async def get_learned_rules_for_forecasting(
|
||||
self,
|
||||
inventory_product_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get learned rules in format ready for forecasting integration.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
|
||||
Returns:
|
||||
Dictionary with learned multipliers for all rule types
|
||||
"""
|
||||
return self.rules_engine.export_rules_for_prophet(inventory_product_id)
|
||||
|
||||
def get_rule_multiplier(
|
||||
self,
|
||||
inventory_product_id: str,
|
||||
rule_type: str,
|
||||
key: str,
|
||||
default: float = 1.0
|
||||
) -> float:
|
||||
"""
|
||||
Get learned rule multiplier with fallback to default.
|
||||
|
||||
Args:
|
||||
inventory_product_id: Product identifier
|
||||
rule_type: 'weather', 'holiday', 'event', 'day_of_week', 'month'
|
||||
key: Condition key
|
||||
default: Default multiplier if rule not learned
|
||||
|
||||
Returns:
|
||||
Learned multiplier or default
|
||||
"""
|
||||
learned = self.rules_engine.get_rule(inventory_product_id, rule_type, key)
|
||||
return learned if learned is not None else default
|
||||
|
||||
async def update_rules_periodically(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
sales_data: pd.DataFrame,
|
||||
external_data: Optional[pd.DataFrame] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Update learned rules with new data (for periodic refresh).
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
sales_data: Updated historical sales data
|
||||
external_data: Updated external data
|
||||
|
||||
Returns:
|
||||
Update results
|
||||
"""
|
||||
logger.info(
|
||||
"Updating learned rules with new data",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
new_data_points=len(sales_data)
|
||||
)
|
||||
|
||||
# Re-learn rules with updated data
|
||||
results = await self.learn_and_post_rules(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
sales_data=sales_data,
|
||||
external_data=external_data
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Rules update complete",
|
||||
insights_posted=results['insights_posted']
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
async def _publish_insight_events(self, tenant_id, insights, product_context=None):
|
||||
"""
|
||||
Publish insight events to RabbitMQ for alert processing.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
insights: List of created insights
|
||||
product_context: Additional context about the product
|
||||
"""
|
||||
if not self.event_publisher:
|
||||
logger.warning("No event publisher available for business rules insights")
|
||||
return
|
||||
|
||||
for insight in insights:
|
||||
# Determine severity based on confidence and priority
|
||||
confidence = insight.get('confidence', 0)
|
||||
priority = insight.get('priority', 'medium')
|
||||
|
||||
# Map priority to severity, with confidence as tiebreaker
|
||||
if priority == 'critical' or (priority == 'high' and confidence >= 70):
|
||||
severity = 'high'
|
||||
elif priority == 'high' or (priority == 'medium' and confidence >= 80):
|
||||
severity = 'medium'
|
||||
else:
|
||||
severity = 'low'
|
||||
|
||||
# Prepare the event data
|
||||
event_data = {
|
||||
'insight_id': insight.get('id'),
|
||||
'type': insight.get('type'),
|
||||
'title': insight.get('title'),
|
||||
'description': insight.get('description'),
|
||||
'category': insight.get('category'),
|
||||
'priority': insight.get('priority'),
|
||||
'confidence': confidence,
|
||||
'recommendation': insight.get('recommendation_actions', []),
|
||||
'impact_type': insight.get('impact_type'),
|
||||
'impact_value': insight.get('impact_value'),
|
||||
'inventory_product_id': product_context.get('inventory_product_id') if product_context else None,
|
||||
'timestamp': insight.get('detected_at', datetime.utcnow().isoformat()),
|
||||
'source_service': 'forecasting',
|
||||
'source_model': 'dynamic_rules_engine'
|
||||
}
|
||||
|
||||
try:
|
||||
await self.event_publisher.publish_recommendation(
|
||||
event_type='ai_business_rule',
|
||||
tenant_id=tenant_id,
|
||||
severity=severity,
|
||||
data=event_data
|
||||
)
|
||||
logger.info(
|
||||
"Published business rules insight event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
severity=severity
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to publish business rules insight event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client connections."""
|
||||
await self.ai_insights_client.close()
|
||||
385
services/forecasting/app/ml/scenario_planner.py
Normal file
385
services/forecasting/app/ml/scenario_planner.py
Normal file
@@ -0,0 +1,385 @@
|
||||
"""
|
||||
Scenario Planning System
|
||||
What-if analysis for demand forecasting
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime, date, timedelta
|
||||
import structlog
|
||||
from enum import Enum
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class ScenarioType(str, Enum):
|
||||
"""Types of scenarios"""
|
||||
BASELINE = "baseline"
|
||||
OPTIMISTIC = "optimistic"
|
||||
PESSIMISTIC = "pessimistic"
|
||||
CUSTOM = "custom"
|
||||
PROMOTION = "promotion"
|
||||
EVENT = "event"
|
||||
WEATHER = "weather"
|
||||
PRICE_CHANGE = "price_change"
|
||||
|
||||
|
||||
class ScenarioPlanner:
|
||||
"""
|
||||
Scenario planning for demand forecasting.
|
||||
|
||||
Allows testing "what-if" scenarios:
|
||||
- What if we run a promotion?
|
||||
- What if there's a local festival?
|
||||
- What if weather is unusually bad?
|
||||
- What if we change prices?
|
||||
"""
|
||||
|
||||
def __init__(self, base_forecaster=None):
|
||||
"""
|
||||
Initialize scenario planner.
|
||||
|
||||
Args:
|
||||
base_forecaster: Base forecaster to use for baseline predictions
|
||||
"""
|
||||
self.base_forecaster = base_forecaster
|
||||
|
||||
async def create_scenario(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
scenario_name: str,
|
||||
scenario_type: ScenarioType,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
adjustments: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a forecast scenario with adjustments.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
scenario_name: Name for the scenario
|
||||
scenario_type: Type of scenario
|
||||
start_date: Scenario start date
|
||||
end_date: Scenario end date
|
||||
adjustments: Dictionary of adjustments to apply
|
||||
|
||||
Returns:
|
||||
Scenario forecast results
|
||||
"""
|
||||
logger.info(
|
||||
"Creating forecast scenario",
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
scenario_name=scenario_name,
|
||||
scenario_type=scenario_type
|
||||
)
|
||||
|
||||
# Generate baseline forecast first
|
||||
baseline_forecast = await self._generate_baseline_forecast(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
# Apply scenario adjustments
|
||||
scenario_forecast = self._apply_scenario_adjustments(
|
||||
baseline_forecast=baseline_forecast,
|
||||
adjustments=adjustments,
|
||||
scenario_type=scenario_type
|
||||
)
|
||||
|
||||
# Calculate impact
|
||||
impact_analysis = self._calculate_scenario_impact(
|
||||
baseline_forecast=baseline_forecast,
|
||||
scenario_forecast=scenario_forecast
|
||||
)
|
||||
|
||||
return {
|
||||
'scenario_id': f"scenario_{tenant_id}_{inventory_product_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
|
||||
'scenario_name': scenario_name,
|
||||
'scenario_type': scenario_type,
|
||||
'tenant_id': tenant_id,
|
||||
'inventory_product_id': inventory_product_id,
|
||||
'date_range': {
|
||||
'start': start_date.isoformat(),
|
||||
'end': end_date.isoformat()
|
||||
},
|
||||
'baseline_forecast': baseline_forecast,
|
||||
'scenario_forecast': scenario_forecast,
|
||||
'impact_analysis': impact_analysis,
|
||||
'adjustments_applied': adjustments,
|
||||
'created_at': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
async def compare_scenarios(
|
||||
self,
|
||||
scenarios: List[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Compare multiple scenarios side-by-side.
|
||||
|
||||
Args:
|
||||
scenarios: List of scenario results from create_scenario()
|
||||
|
||||
Returns:
|
||||
Comparison analysis
|
||||
"""
|
||||
if len(scenarios) < 2:
|
||||
return {'error': 'Need at least 2 scenarios to compare'}
|
||||
|
||||
comparison = {
|
||||
'scenarios_compared': len(scenarios),
|
||||
'scenario_names': [s['scenario_name'] for s in scenarios],
|
||||
'comparison_metrics': {}
|
||||
}
|
||||
|
||||
# Extract total demand for each scenario
|
||||
for scenario in scenarios:
|
||||
scenario_name = scenario['scenario_name']
|
||||
scenario_forecast = scenario['scenario_forecast']
|
||||
|
||||
total_demand = sum(f['predicted_demand'] for f in scenario_forecast)
|
||||
|
||||
comparison['comparison_metrics'][scenario_name] = {
|
||||
'total_demand': total_demand,
|
||||
'avg_daily_demand': total_demand / len(scenario_forecast) if scenario_forecast else 0,
|
||||
'peak_demand': max(f['predicted_demand'] for f in scenario_forecast) if scenario_forecast else 0
|
||||
}
|
||||
|
||||
# Determine best and worst scenarios
|
||||
total_demands = {
|
||||
name: metrics['total_demand']
|
||||
for name, metrics in comparison['comparison_metrics'].items()
|
||||
}
|
||||
|
||||
comparison['best_scenario'] = max(total_demands, key=total_demands.get)
|
||||
comparison['worst_scenario'] = min(total_demands, key=total_demands.get)
|
||||
|
||||
comparison['demand_range'] = {
|
||||
'min': min(total_demands.values()),
|
||||
'max': max(total_demands.values()),
|
||||
'spread': max(total_demands.values()) - min(total_demands.values())
|
||||
}
|
||||
|
||||
return comparison
|
||||
|
||||
async def _generate_baseline_forecast(
|
||||
self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
start_date: date,
|
||||
end_date: date
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate baseline forecast without adjustments.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
inventory_product_id: Product identifier
|
||||
start_date: Start date
|
||||
end_date: End date
|
||||
|
||||
Returns:
|
||||
List of daily forecasts
|
||||
"""
|
||||
# Generate date range
|
||||
dates = []
|
||||
current_date = start_date
|
||||
while current_date <= end_date:
|
||||
dates.append(current_date)
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
# Placeholder forecast (in real implementation, call forecasting service)
|
||||
baseline = []
|
||||
for forecast_date in dates:
|
||||
baseline.append({
|
||||
'date': forecast_date.isoformat(),
|
||||
'predicted_demand': 100, # Placeholder
|
||||
'confidence_lower': 80,
|
||||
'confidence_upper': 120
|
||||
})
|
||||
|
||||
return baseline
|
||||
|
||||
def _apply_scenario_adjustments(
|
||||
self,
|
||||
baseline_forecast: List[Dict[str, Any]],
|
||||
adjustments: Dict[str, Any],
|
||||
scenario_type: ScenarioType
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Apply adjustments to baseline forecast.
|
||||
|
||||
Args:
|
||||
baseline_forecast: Baseline forecast data
|
||||
adjustments: Adjustments to apply
|
||||
scenario_type: Type of scenario
|
||||
|
||||
Returns:
|
||||
Adjusted forecast
|
||||
"""
|
||||
scenario_forecast = []
|
||||
|
||||
for day_forecast in baseline_forecast:
|
||||
adjusted_forecast = day_forecast.copy()
|
||||
|
||||
# Apply different adjustment types
|
||||
if 'demand_multiplier' in adjustments:
|
||||
# Multiply demand by factor
|
||||
multiplier = adjustments['demand_multiplier']
|
||||
adjusted_forecast['predicted_demand'] *= multiplier
|
||||
adjusted_forecast['confidence_lower'] *= multiplier
|
||||
adjusted_forecast['confidence_upper'] *= multiplier
|
||||
|
||||
if 'demand_offset' in adjustments:
|
||||
# Add/subtract fixed amount
|
||||
offset = adjustments['demand_offset']
|
||||
adjusted_forecast['predicted_demand'] += offset
|
||||
adjusted_forecast['confidence_lower'] += offset
|
||||
adjusted_forecast['confidence_upper'] += offset
|
||||
|
||||
if 'event_impact' in adjustments:
|
||||
# Apply event-specific impact
|
||||
event_multiplier = adjustments['event_impact']
|
||||
adjusted_forecast['predicted_demand'] *= event_multiplier
|
||||
|
||||
if 'weather_impact' in adjustments:
|
||||
# Apply weather adjustments
|
||||
weather_factor = adjustments['weather_impact']
|
||||
adjusted_forecast['predicted_demand'] *= weather_factor
|
||||
|
||||
if 'price_elasticity' in adjustments and 'price_change_percent' in adjustments:
|
||||
# Apply price elasticity
|
||||
elasticity = adjustments['price_elasticity']
|
||||
price_change = adjustments['price_change_percent']
|
||||
demand_change = -elasticity * price_change # Negative correlation
|
||||
adjusted_forecast['predicted_demand'] *= (1 + demand_change)
|
||||
|
||||
# Ensure non-negative demand
|
||||
adjusted_forecast['predicted_demand'] = max(0, adjusted_forecast['predicted_demand'])
|
||||
adjusted_forecast['confidence_lower'] = max(0, adjusted_forecast['confidence_lower'])
|
||||
|
||||
scenario_forecast.append(adjusted_forecast)
|
||||
|
||||
return scenario_forecast
|
||||
|
||||
def _calculate_scenario_impact(
|
||||
self,
|
||||
baseline_forecast: List[Dict[str, Any]],
|
||||
scenario_forecast: List[Dict[str, Any]]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate impact of scenario vs baseline.
|
||||
|
||||
Args:
|
||||
baseline_forecast: Baseline forecast
|
||||
scenario_forecast: Scenario forecast
|
||||
|
||||
Returns:
|
||||
Impact analysis
|
||||
"""
|
||||
baseline_total = sum(f['predicted_demand'] for f in baseline_forecast)
|
||||
scenario_total = sum(f['predicted_demand'] for f in scenario_forecast)
|
||||
|
||||
difference = scenario_total - baseline_total
|
||||
percent_change = (difference / baseline_total * 100) if baseline_total > 0 else 0
|
||||
|
||||
return {
|
||||
'baseline_total_demand': baseline_total,
|
||||
'scenario_total_demand': scenario_total,
|
||||
'absolute_difference': difference,
|
||||
'percent_change': percent_change,
|
||||
'impact_category': self._categorize_impact(percent_change),
|
||||
'days_analyzed': len(baseline_forecast)
|
||||
}
|
||||
|
||||
def _categorize_impact(self, percent_change: float) -> str:
|
||||
"""Categorize impact magnitude"""
|
||||
if abs(percent_change) < 5:
|
||||
return "minimal"
|
||||
elif abs(percent_change) < 15:
|
||||
return "moderate"
|
||||
elif abs(percent_change) < 30:
|
||||
return "significant"
|
||||
else:
|
||||
return "major"
|
||||
|
||||
def generate_predefined_scenarios(
|
||||
self,
|
||||
base_scenario: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate common predefined scenarios for comparison.
|
||||
|
||||
Args:
|
||||
base_scenario: Base scenario parameters
|
||||
|
||||
Returns:
|
||||
List of scenario configurations
|
||||
"""
|
||||
scenarios = []
|
||||
|
||||
# Baseline scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Baseline',
|
||||
'scenario_type': ScenarioType.BASELINE,
|
||||
'adjustments': {}
|
||||
})
|
||||
|
||||
# Optimistic scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Optimistic',
|
||||
'scenario_type': ScenarioType.OPTIMISTIC,
|
||||
'adjustments': {
|
||||
'demand_multiplier': 1.2, # 20% increase
|
||||
'description': '+20% demand increase'
|
||||
}
|
||||
})
|
||||
|
||||
# Pessimistic scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Pessimistic',
|
||||
'scenario_type': ScenarioType.PESSIMISTIC,
|
||||
'adjustments': {
|
||||
'demand_multiplier': 0.8, # 20% decrease
|
||||
'description': '-20% demand decrease'
|
||||
}
|
||||
})
|
||||
|
||||
# Promotion scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Promotion Campaign',
|
||||
'scenario_type': ScenarioType.PROMOTION,
|
||||
'adjustments': {
|
||||
'demand_multiplier': 1.5, # 50% increase
|
||||
'description': '50% promotion boost'
|
||||
}
|
||||
})
|
||||
|
||||
# Bad weather scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Bad Weather',
|
||||
'scenario_type': ScenarioType.WEATHER,
|
||||
'adjustments': {
|
||||
'weather_impact': 0.7, # 30% decrease
|
||||
'description': 'Bad weather reduces foot traffic'
|
||||
}
|
||||
})
|
||||
|
||||
# Price increase scenario
|
||||
scenarios.append({
|
||||
'scenario_name': 'Price Increase 10%',
|
||||
'scenario_type': ScenarioType.PRICE_CHANGE,
|
||||
'adjustments': {
|
||||
'price_elasticity': 1.2, # Elastic demand
|
||||
'price_change_percent': 0.10, # 10% price increase
|
||||
'description': '10% price increase with elastic demand'
|
||||
}
|
||||
})
|
||||
|
||||
return scenarios
|
||||
Reference in New Issue
Block a user