Improve AI logic

This commit is contained in:
Urtzi Alfaro
2025-11-05 13:34:56 +01:00
parent 5c87fbcf48
commit 394ad3aea4
218 changed files with 30627 additions and 7658 deletions

View File

@@ -98,6 +98,11 @@ class OrchestratorSettings(BaseServiceSettings):
AUDIT_ORCHESTRATION_RUNS: bool = os.getenv("AUDIT_ORCHESTRATION_RUNS", "true").lower() == "true"
DETAILED_LOGGING: bool = os.getenv("DETAILED_LOGGING", "true").lower() == "true"
# AI Enhancement Settings
ORCHESTRATION_USE_AI_INSIGHTS: bool = os.getenv("ORCHESTRATION_USE_AI_INSIGHTS", "true").lower() == "true"
AI_INSIGHTS_SERVICE_URL: str = os.getenv("AI_INSIGHTS_SERVICE_URL", "http://ai-insights-service:8000")
AI_INSIGHTS_MIN_CONFIDENCE: int = int(os.getenv("AI_INSIGHTS_MIN_CONFIDENCE", "70"))
# Global settings instance
settings = OrchestratorSettings()

View File

View File

@@ -0,0 +1,894 @@
"""
AI-Enhanced Orchestration Saga
Integrates ML insights into daily workflow orchestration
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime, timedelta
from uuid import UUID
import structlog
from shared.clients.ai_insights_client import AIInsightsClient
logger = structlog.get_logger()
class AIEnhancedOrchestrator:
"""
Enhanced orchestration engine that integrates ML insights into daily workflow.
Workflow:
1. Pre-Orchestration: Gather all relevant insights for target date
2. Intelligent Planning: Modify orchestration plan based on insights
3. Execution: Apply insights with confidence-based decision making
4. Feedback Tracking: Record outcomes for continuous learning
Replaces hardcoded logic with learned intelligence from:
- Demand Forecasting
- Supplier Performance
- Safety Stock Optimization
- Price Forecasting
- Production Yield Prediction
- Dynamic Business Rules
"""
def __init__(
self,
ai_insights_base_url: str = "http://ai-insights-service:8000",
min_confidence_threshold: int = 70
):
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
self.min_confidence_threshold = min_confidence_threshold
self.applied_insights = [] # Track applied insights for feedback
async def orchestrate_with_ai(
self,
tenant_id: str,
target_date: datetime,
base_orchestration_plan: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Run AI-enhanced orchestration for a target date.
Args:
tenant_id: Tenant identifier
target_date: Date to orchestrate for
base_orchestration_plan: Optional base plan to enhance (if None, creates new)
Returns:
Enhanced orchestration plan with applied insights and metadata
"""
logger.info(
"Starting AI-enhanced orchestration",
tenant_id=tenant_id,
target_date=target_date.isoformat()
)
# Step 1: Gather insights for target date
insights = await self._gather_insights(tenant_id, target_date)
logger.info(
"Insights gathered",
demand_forecasts=len(insights['demand_forecasts']),
supplier_alerts=len(insights['supplier_alerts']),
inventory_optimizations=len(insights['inventory_optimizations']),
price_opportunities=len(insights['price_opportunities']),
yield_predictions=len(insights['yield_predictions']),
business_rules=len(insights['business_rules'])
)
# Step 2: Initialize or load base plan
if base_orchestration_plan is None:
orchestration_plan = self._create_base_plan(target_date)
else:
orchestration_plan = base_orchestration_plan.copy()
# Step 3: Apply insights to plan
enhanced_plan = await self._apply_insights_to_plan(
orchestration_plan, insights, tenant_id
)
# Step 4: Generate execution summary
execution_summary = self._generate_execution_summary(
enhanced_plan, insights
)
logger.info(
"AI-enhanced orchestration complete",
tenant_id=tenant_id,
insights_applied=execution_summary['total_insights_applied'],
modifications=execution_summary['total_modifications']
)
return {
'tenant_id': tenant_id,
'target_date': target_date.isoformat(),
'orchestrated_at': datetime.utcnow().isoformat(),
'plan': enhanced_plan,
'insights_used': insights,
'execution_summary': execution_summary,
'applied_insights': self.applied_insights
}
async def _gather_insights(
self,
tenant_id: str,
target_date: datetime
) -> Dict[str, List[Dict[str, Any]]]:
"""
Gather all relevant insights for target date from AI Insights Service.
Returns insights categorized by type:
- demand_forecasts
- supplier_alerts
- inventory_optimizations
- price_opportunities
- yield_predictions
- business_rules
"""
# Get orchestration-ready insights
insights = await self.ai_insights_client.get_orchestration_ready_insights(
tenant_id=UUID(tenant_id),
target_date=target_date,
min_confidence=self.min_confidence_threshold
)
# Categorize insights by source
categorized = {
'demand_forecasts': [],
'supplier_alerts': [],
'inventory_optimizations': [],
'price_opportunities': [],
'yield_predictions': [],
'business_rules': [],
'other': []
}
for insight in insights:
source_model = insight.get('source_model', '')
category = insight.get('category', '')
if source_model == 'hybrid_forecaster' or category == 'demand':
categorized['demand_forecasts'].append(insight)
elif source_model == 'supplier_performance_predictor':
categorized['supplier_alerts'].append(insight)
elif source_model == 'safety_stock_optimizer':
categorized['inventory_optimizations'].append(insight)
elif source_model == 'price_forecaster':
categorized['price_opportunities'].append(insight)
elif source_model == 'yield_predictor':
categorized['yield_predictions'].append(insight)
elif source_model == 'business_rules_engine':
categorized['business_rules'].append(insight)
else:
categorized['other'].append(insight)
return categorized
def _create_base_plan(self, target_date: datetime) -> Dict[str, Any]:
"""Create base orchestration plan with default hardcoded values."""
return {
'target_date': target_date.isoformat(),
'procurement': {
'orders': [],
'supplier_selections': {},
'order_quantities': {}
},
'inventory': {
'safety_stock_levels': {},
'reorder_points': {},
'transfers': []
},
'production': {
'production_runs': [],
'recipe_quantities': {},
'worker_assignments': {}
},
'sales': {
'forecasted_demand': {},
'pricing_adjustments': {}
},
'modifications': [],
'ai_enhancements': []
}
async def _apply_insights_to_plan(
self,
plan: Dict[str, Any],
insights: Dict[str, List[Dict[str, Any]]],
tenant_id: str
) -> Dict[str, Any]:
"""
Apply categorized insights to orchestration plan.
Each insight type modifies specific parts of the plan:
- Demand forecasts → sales forecasts, production quantities
- Supplier alerts → supplier selection, procurement timing
- Inventory optimizations → safety stock levels, reorder points
- Price opportunities → procurement timing, order quantities
- Yield predictions → production quantities, worker assignments
- Business rules → cross-cutting modifications
"""
enhanced_plan = plan.copy()
# Apply demand forecasts
if insights['demand_forecasts']:
enhanced_plan = await self._apply_demand_forecasts(
enhanced_plan, insights['demand_forecasts'], tenant_id
)
# Apply supplier alerts
if insights['supplier_alerts']:
enhanced_plan = await self._apply_supplier_alerts(
enhanced_plan, insights['supplier_alerts'], tenant_id
)
# Apply inventory optimizations
if insights['inventory_optimizations']:
enhanced_plan = await self._apply_inventory_optimizations(
enhanced_plan, insights['inventory_optimizations'], tenant_id
)
# Apply price opportunities
if insights['price_opportunities']:
enhanced_plan = await self._apply_price_opportunities(
enhanced_plan, insights['price_opportunities'], tenant_id
)
# Apply yield predictions
if insights['yield_predictions']:
enhanced_plan = await self._apply_yield_predictions(
enhanced_plan, insights['yield_predictions'], tenant_id
)
# Apply business rules (highest priority, can override)
if insights['business_rules']:
enhanced_plan = await self._apply_business_rules(
enhanced_plan, insights['business_rules'], tenant_id
)
return enhanced_plan
async def _apply_demand_forecasts(
self,
plan: Dict[str, Any],
forecasts: List[Dict[str, Any]],
tenant_id: str
) -> Dict[str, Any]:
"""
Apply demand forecasts to sales and production planning.
Modifications:
- Update sales forecasted_demand
- Adjust production recipe_quantities
- Record insight application
"""
for forecast in forecasts:
if forecast['confidence'] < self.min_confidence_threshold:
continue
metrics = forecast.get('metrics_json', {})
product_id = metrics.get('product_id')
predicted_demand = metrics.get('predicted_demand')
forecast_date = metrics.get('forecast_date')
if not product_id or predicted_demand is None:
continue
# Update sales forecast
plan['sales']['forecasted_demand'][product_id] = {
'quantity': predicted_demand,
'confidence': forecast['confidence'],
'source': 'ai_forecast',
'insight_id': forecast.get('id')
}
# Adjust production quantities (demand + buffer)
buffer_pct = 1.10 # 10% buffer for uncertainty
production_quantity = int(predicted_demand * buffer_pct)
plan['production']['recipe_quantities'][product_id] = {
'quantity': production_quantity,
'demand_forecast': predicted_demand,
'buffer_applied': buffer_pct,
'source': 'ai_forecast',
'insight_id': forecast.get('id')
}
# Record modification
plan['modifications'].append({
'type': 'demand_forecast_applied',
'insight_id': forecast.get('id'),
'product_id': product_id,
'predicted_demand': predicted_demand,
'production_quantity': production_quantity,
'confidence': forecast['confidence']
})
# Track for feedback
self.applied_insights.append({
'insight_id': forecast.get('id'),
'type': 'demand_forecast',
'applied_at': datetime.utcnow().isoformat(),
'tenant_id': tenant_id,
'metrics': {
'product_id': product_id,
'predicted_demand': predicted_demand,
'production_quantity': production_quantity
}
})
logger.info(
"Applied demand forecast",
product_id=product_id,
predicted_demand=predicted_demand,
production_quantity=production_quantity
)
return plan
async def _apply_supplier_alerts(
self,
plan: Dict[str, Any],
alerts: List[Dict[str, Any]],
tenant_id: str
) -> Dict[str, Any]:
"""
Apply supplier performance alerts to procurement decisions.
Modifications:
- Switch suppliers for low reliability
- Adjust lead times for delays
- Increase order quantities for short deliveries
"""
for alert in alerts:
if alert['confidence'] < self.min_confidence_threshold:
continue
metrics = alert.get('metrics_json', {})
supplier_id = metrics.get('supplier_id')
reliability_score = metrics.get('reliability_score')
predicted_delay = metrics.get('predicted_delivery_delay_days')
if not supplier_id:
continue
# Low reliability: recommend supplier switch
if reliability_score and reliability_score < 70:
plan['procurement']['supplier_selections'][supplier_id] = {
'action': 'avoid',
'reason': f'Low reliability score: {reliability_score}',
'alternative_required': True,
'source': 'supplier_alert',
'insight_id': alert.get('id')
}
plan['modifications'].append({
'type': 'supplier_switch_recommended',
'insight_id': alert.get('id'),
'supplier_id': supplier_id,
'reliability_score': reliability_score,
'confidence': alert['confidence']
})
# Delay predicted: adjust lead time
if predicted_delay and predicted_delay > 1:
plan['procurement']['supplier_selections'][supplier_id] = {
'action': 'adjust_lead_time',
'additional_lead_days': int(predicted_delay),
'reason': f'Predicted delay: {predicted_delay} days',
'source': 'supplier_alert',
'insight_id': alert.get('id')
}
plan['modifications'].append({
'type': 'lead_time_adjusted',
'insight_id': alert.get('id'),
'supplier_id': supplier_id,
'additional_days': int(predicted_delay),
'confidence': alert['confidence']
})
# Track for feedback
self.applied_insights.append({
'insight_id': alert.get('id'),
'type': 'supplier_alert',
'applied_at': datetime.utcnow().isoformat(),
'tenant_id': tenant_id,
'metrics': {
'supplier_id': supplier_id,
'reliability_score': reliability_score,
'predicted_delay': predicted_delay
}
})
logger.info(
"Applied supplier alert",
supplier_id=supplier_id,
reliability_score=reliability_score,
predicted_delay=predicted_delay
)
return plan
async def _apply_inventory_optimizations(
self,
plan: Dict[str, Any],
optimizations: List[Dict[str, Any]],
tenant_id: str
) -> Dict[str, Any]:
"""
Apply safety stock optimizations to inventory management.
Modifications:
- Update safety stock levels (from hardcoded 95% to learned optimal)
- Adjust reorder points accordingly
"""
for optimization in optimizations:
if optimization['confidence'] < self.min_confidence_threshold:
continue
metrics = optimization.get('metrics_json', {})
product_id = metrics.get('inventory_product_id')
optimal_safety_stock = metrics.get('optimal_safety_stock')
optimal_service_level = metrics.get('optimal_service_level')
if not product_id or optimal_safety_stock is None:
continue
# Update safety stock level
plan['inventory']['safety_stock_levels'][product_id] = {
'quantity': optimal_safety_stock,
'service_level': optimal_service_level,
'source': 'ai_optimization',
'insight_id': optimization.get('id'),
'replaced_hardcoded': True
}
# Adjust reorder point (lead time demand + safety stock)
# This would use demand forecast if available
lead_time_demand = metrics.get('lead_time_demand', optimal_safety_stock * 2)
reorder_point = lead_time_demand + optimal_safety_stock
plan['inventory']['reorder_points'][product_id] = {
'quantity': reorder_point,
'lead_time_demand': lead_time_demand,
'safety_stock': optimal_safety_stock,
'source': 'ai_optimization',
'insight_id': optimization.get('id')
}
plan['modifications'].append({
'type': 'safety_stock_optimized',
'insight_id': optimization.get('id'),
'product_id': product_id,
'optimal_safety_stock': optimal_safety_stock,
'optimal_service_level': optimal_service_level,
'confidence': optimization['confidence']
})
# Track for feedback
self.applied_insights.append({
'insight_id': optimization.get('id'),
'type': 'inventory_optimization',
'applied_at': datetime.utcnow().isoformat(),
'tenant_id': tenant_id,
'metrics': {
'product_id': product_id,
'optimal_safety_stock': optimal_safety_stock,
'reorder_point': reorder_point
}
})
logger.info(
"Applied safety stock optimization",
product_id=product_id,
optimal_safety_stock=optimal_safety_stock,
reorder_point=reorder_point
)
return plan
async def _apply_price_opportunities(
self,
plan: Dict[str, Any],
opportunities: List[Dict[str, Any]],
tenant_id: str
) -> Dict[str, Any]:
"""
Apply price forecasting opportunities to procurement timing.
Modifications:
- Advance orders for predicted price increases
- Delay orders for predicted price decreases
- Increase quantities for bulk opportunities
"""
for opportunity in opportunities:
if opportunity['confidence'] < self.min_confidence_threshold:
continue
metrics = opportunity.get('metrics_json', {})
ingredient_id = metrics.get('ingredient_id')
recommendation = metrics.get('recommendation')
expected_price_change = metrics.get('expected_price_change_pct')
if not ingredient_id or not recommendation:
continue
# Buy now: price increasing
if recommendation == 'buy_now' and expected_price_change and expected_price_change > 5:
plan['procurement']['order_quantities'][ingredient_id] = {
'action': 'increase',
'multiplier': 1.5, # Buy 50% more
'reason': f'Price expected to increase {expected_price_change:.1f}%',
'source': 'price_forecast',
'insight_id': opportunity.get('id')
}
plan['modifications'].append({
'type': 'bulk_purchase_opportunity',
'insight_id': opportunity.get('id'),
'ingredient_id': ingredient_id,
'expected_price_change': expected_price_change,
'quantity_multiplier': 1.5,
'confidence': opportunity['confidence']
})
# Wait: price decreasing
elif recommendation == 'wait' and expected_price_change and expected_price_change < -5:
plan['procurement']['order_quantities'][ingredient_id] = {
'action': 'delay',
'delay_days': 7,
'reason': f'Price expected to decrease {abs(expected_price_change):.1f}%',
'source': 'price_forecast',
'insight_id': opportunity.get('id')
}
plan['modifications'].append({
'type': 'procurement_delayed',
'insight_id': opportunity.get('id'),
'ingredient_id': ingredient_id,
'expected_price_change': expected_price_change,
'delay_days': 7,
'confidence': opportunity['confidence']
})
# Track for feedback
self.applied_insights.append({
'insight_id': opportunity.get('id'),
'type': 'price_opportunity',
'applied_at': datetime.utcnow().isoformat(),
'tenant_id': tenant_id,
'metrics': {
'ingredient_id': ingredient_id,
'recommendation': recommendation,
'expected_price_change': expected_price_change
}
})
logger.info(
"Applied price opportunity",
ingredient_id=ingredient_id,
recommendation=recommendation,
expected_price_change=expected_price_change
)
return plan
async def _apply_yield_predictions(
self,
plan: Dict[str, Any],
predictions: List[Dict[str, Any]],
tenant_id: str
) -> Dict[str, Any]:
"""
Apply production yield predictions to production planning.
Modifications:
- Increase production quantities for low predicted yield
- Optimize worker assignments
- Adjust production timing
"""
for prediction in predictions:
if prediction['confidence'] < self.min_confidence_threshold:
continue
metrics = prediction.get('metrics_json', {})
recipe_id = metrics.get('recipe_id')
predicted_yield = metrics.get('predicted_yield')
expected_waste = metrics.get('expected_waste')
if not recipe_id or predicted_yield is None:
continue
# Low yield: increase production quantity to compensate
if predicted_yield < 90:
current_quantity = plan['production']['recipe_quantities'].get(
recipe_id, {}
).get('quantity', 100)
# Adjust quantity to account for predicted waste
adjusted_quantity = int(current_quantity * (100 / predicted_yield))
plan['production']['recipe_quantities'][recipe_id] = {
'quantity': adjusted_quantity,
'predicted_yield': predicted_yield,
'waste_compensation': adjusted_quantity - current_quantity,
'source': 'yield_prediction',
'insight_id': prediction.get('id')
}
plan['modifications'].append({
'type': 'yield_compensation_applied',
'insight_id': prediction.get('id'),
'recipe_id': recipe_id,
'predicted_yield': predicted_yield,
'original_quantity': current_quantity,
'adjusted_quantity': adjusted_quantity,
'confidence': prediction['confidence']
})
# Track for feedback
self.applied_insights.append({
'insight_id': prediction.get('id'),
'type': 'yield_prediction',
'applied_at': datetime.utcnow().isoformat(),
'tenant_id': tenant_id,
'metrics': {
'recipe_id': recipe_id,
'predicted_yield': predicted_yield,
'expected_waste': expected_waste
}
})
logger.info(
"Applied yield prediction",
recipe_id=recipe_id,
predicted_yield=predicted_yield
)
return plan
async def _apply_business_rules(
self,
plan: Dict[str, Any],
rules: List[Dict[str, Any]],
tenant_id: str
) -> Dict[str, Any]:
"""
Apply dynamic business rules to orchestration plan.
Business rules can override other insights based on business logic.
"""
for rule in rules:
if rule['confidence'] < self.min_confidence_threshold:
continue
# Business rules are flexible and defined in JSONB
# Parse recommendation_actions to understand what to apply
actions = rule.get('recommendation_actions', [])
for action in actions:
action_type = action.get('action')
params = action.get('params', {})
# Example: Force supplier switch
if action_type == 'force_supplier_switch':
supplier_id = params.get('from_supplier_id')
alternate_id = params.get('to_supplier_id')
if supplier_id and alternate_id:
plan['procurement']['supplier_selections'][supplier_id] = {
'action': 'replace',
'alternate_supplier': alternate_id,
'reason': rule.get('description'),
'source': 'business_rule',
'insight_id': rule.get('id'),
'override': True
}
# Example: Halt production
elif action_type == 'halt_production':
recipe_id = params.get('recipe_id')
if recipe_id:
plan['production']['recipe_quantities'][recipe_id] = {
'quantity': 0,
'halted': True,
'reason': rule.get('description'),
'source': 'business_rule',
'insight_id': rule.get('id')
}
plan['modifications'].append({
'type': 'business_rule_applied',
'insight_id': rule.get('id'),
'rule_description': rule.get('description'),
'confidence': rule['confidence']
})
# Track for feedback
self.applied_insights.append({
'insight_id': rule.get('id'),
'type': 'business_rule',
'applied_at': datetime.utcnow().isoformat(),
'tenant_id': tenant_id,
'metrics': {'actions': len(actions)}
})
logger.info(
"Applied business rule",
rule_description=rule.get('title')
)
return plan
def _generate_execution_summary(
self,
plan: Dict[str, Any],
insights: Dict[str, List[Dict[str, Any]]]
) -> Dict[str, Any]:
"""Generate summary of AI-enhanced orchestration execution."""
total_insights_available = sum(len(v) for v in insights.values())
total_insights_applied = len(self.applied_insights)
total_modifications = len(plan.get('modifications', []))
# Count by type
insights_by_type = {}
for category, category_insights in insights.items():
insights_by_type[category] = {
'available': len(category_insights),
'applied': len([
i for i in self.applied_insights
if i['type'] == category.rstrip('s') # Remove plural
])
}
return {
'total_insights_available': total_insights_available,
'total_insights_applied': total_insights_applied,
'total_modifications': total_modifications,
'application_rate': round(
(total_insights_applied / total_insights_available * 100)
if total_insights_available > 0 else 0,
2
),
'insights_by_type': insights_by_type,
'modifications_summary': self._summarize_modifications(plan)
}
def _summarize_modifications(self, plan: Dict[str, Any]) -> Dict[str, int]:
"""Summarize modifications by type."""
modifications = plan.get('modifications', [])
summary = {}
for mod in modifications:
mod_type = mod.get('type', 'unknown')
summary[mod_type] = summary.get(mod_type, 0) + 1
return summary
async def record_orchestration_feedback(
self,
tenant_id: str,
target_date: datetime,
actual_outcomes: Dict[str, Any]
) -> Dict[str, Any]:
"""
Record feedback for applied insights to enable continuous learning.
Args:
tenant_id: Tenant identifier
target_date: Orchestration target date
actual_outcomes: Actual results:
- actual_demand: {product_id: actual_quantity}
- actual_yields: {recipe_id: actual_yield_pct}
- actual_costs: {ingredient_id: actual_price}
- supplier_performance: {supplier_id: on_time_delivery}
Returns:
Feedback recording results
"""
logger.info(
"Recording orchestration feedback",
tenant_id=tenant_id,
target_date=target_date.isoformat(),
applied_insights=len(self.applied_insights)
)
feedback_results = []
for applied in self.applied_insights:
insight_id = applied.get('insight_id')
insight_type = applied.get('type')
metrics = applied.get('metrics', {})
# Prepare feedback based on type
feedback_data = {
'applied': True,
'applied_at': applied.get('applied_at'),
'outcome_date': target_date.isoformat()
}
# Demand forecast feedback
if insight_type == 'demand_forecast':
product_id = metrics.get('product_id')
predicted_demand = metrics.get('predicted_demand')
actual_demand = actual_outcomes.get('actual_demand', {}).get(product_id)
if actual_demand is not None:
error = abs(actual_demand - predicted_demand)
error_pct = (error / actual_demand * 100) if actual_demand > 0 else 0
feedback_data['outcome_metrics'] = {
'predicted_demand': predicted_demand,
'actual_demand': actual_demand,
'error': error,
'error_pct': round(error_pct, 2),
'accuracy': round(100 - error_pct, 2)
}
# Yield prediction feedback
elif insight_type == 'yield_prediction':
recipe_id = metrics.get('recipe_id')
predicted_yield = metrics.get('predicted_yield')
actual_yield = actual_outcomes.get('actual_yields', {}).get(recipe_id)
if actual_yield is not None:
error = abs(actual_yield - predicted_yield)
feedback_data['outcome_metrics'] = {
'predicted_yield': predicted_yield,
'actual_yield': actual_yield,
'error': round(error, 2),
'accuracy': round(100 - (error / actual_yield * 100), 2) if actual_yield > 0 else 0
}
# Record feedback via AI Insights Client
try:
await self.ai_insights_client.record_feedback(
tenant_id=UUID(tenant_id),
insight_id=UUID(insight_id) if insight_id else None,
feedback_data=feedback_data
)
feedback_results.append({
'insight_id': insight_id,
'insight_type': insight_type,
'status': 'recorded',
'feedback': feedback_data
})
except Exception as e:
logger.error(
"Error recording feedback",
insight_id=insight_id,
error=str(e)
)
feedback_results.append({
'insight_id': insight_id,
'insight_type': insight_type,
'status': 'failed',
'error': str(e)
})
logger.info(
"Feedback recording complete",
total=len(feedback_results),
successful=len([r for r in feedback_results if r['status'] == 'recorded'])
)
return {
'tenant_id': tenant_id,
'target_date': target_date.isoformat(),
'feedback_recorded_at': datetime.utcnow().isoformat(),
'total_insights': len(self.applied_insights),
'feedback_results': feedback_results,
'successful': len([r for r in feedback_results if r['status'] == 'recorded']),
'failed': len([r for r in feedback_results if r['status'] == 'failed'])
}
async def close(self):
"""Close HTTP client connections."""
await self.ai_insights_client.close()

View File

@@ -2,6 +2,7 @@
Orchestration Saga Service
Implements saga pattern for orchestrator workflow with compensation logic.
Integrates AI-enhanced orchestration when enabled.
"""
import asyncio
@@ -18,6 +19,8 @@ from shared.clients.notification_client import NotificationServiceClient
from shared.clients.inventory_client import InventoryServiceClient
from shared.clients.suppliers_client import SuppliersServiceClient
from shared.clients.recipes_client import RecipesServiceClient
from shared.clients.ai_insights_client import AIInsightsClient
from shared.clients.training_client import TrainingServiceClient
logger = logging.getLogger(__name__)
@@ -27,7 +30,8 @@ class OrchestrationSaga:
Saga coordinator for orchestration workflow.
Workflow Steps:
0. Fetch shared data snapshot (inventory, suppliers, recipes) - NEW
0. Fetch shared data snapshot (inventory, suppliers, recipes)
0.5. Generate AI insights from ML orchestrators
1. Generate forecasts
2. Generate production schedule
3. Generate procurement plan
@@ -44,7 +48,12 @@ class OrchestrationSaga:
notification_client: NotificationServiceClient,
inventory_client: InventoryServiceClient,
suppliers_client: SuppliersServiceClient,
recipes_client: RecipesServiceClient
recipes_client: RecipesServiceClient,
ai_insights_client: Optional[AIInsightsClient] = None,
training_client: Optional[TrainingServiceClient] = None,
use_ai_enhancement: bool = False,
ai_insights_base_url: str = "http://ai-insights-service:8000",
ai_insights_min_confidence: int = 70
):
"""
Initialize orchestration saga.
@@ -54,9 +63,14 @@ class OrchestrationSaga:
production_client: Production service client
procurement_client: Procurement service client
notification_client: Notification service client
inventory_client: Inventory service client (NEW)
suppliers_client: Suppliers service client (NEW)
recipes_client: Recipes service client (NEW)
inventory_client: Inventory service client
suppliers_client: Suppliers service client
recipes_client: Recipes service client
ai_insights_client: AI Insights service client
training_client: Training service client
use_ai_enhancement: Enable AI-enhanced orchestration
ai_insights_base_url: Base URL for AI Insights Service
ai_insights_min_confidence: Minimum confidence threshold for applying insights
"""
self.forecast_client = forecast_client
self.production_client = production_client
@@ -65,6 +79,25 @@ class OrchestrationSaga:
self.inventory_client = inventory_client
self.suppliers_client = suppliers_client
self.recipes_client = recipes_client
self.ai_insights_client = ai_insights_client or AIInsightsClient(
base_url=ai_insights_base_url
)
self.training_client = training_client
self.use_ai_enhancement = use_ai_enhancement
# Initialize AI enhancer if enabled
self.ai_enhancer = None
if use_ai_enhancement:
try:
from app.ml.ai_enhanced_orchestrator import AIEnhancedOrchestrator
self.ai_enhancer = AIEnhancedOrchestrator(
ai_insights_base_url=ai_insights_base_url,
min_confidence_threshold=ai_insights_min_confidence
)
logger.info("AI-enhanced orchestration enabled")
except ImportError as e:
logger.warning(f"AI enhancement requested but could not be loaded: {e}")
self.use_ai_enhancement = False
async def execute_orchestration(
self,
@@ -108,6 +141,14 @@ class OrchestrationSaga:
action_args=(tenant_id, context)
)
# Step 0.5: Generate AI insights (NEW)
saga.add_step(
name="generate_ai_insights",
action=self._generate_ai_insights,
compensation=None, # No compensation needed for read-only insight generation
action_args=(tenant_id, context)
)
# Step 1: Generate forecasts
saga.add_step(
name="generate_forecasts",
@@ -140,6 +181,14 @@ class OrchestrationSaga:
action_args=(tenant_id, context)
)
# Step 5: Validate previous day's forecasts
saga.add_step(
name="validate_previous_forecasts",
action=self._validate_previous_forecasts,
compensation=None, # No compensation needed for validation
action_args=(tenant_id, context)
)
# Execute saga
success, final_result, error = await saga.execute()
@@ -233,24 +282,249 @@ class OrchestrationSaga:
'count': len(recipes_data) if recipes_data else 0
}
# NEW: Fetch upcoming events for next 7 days
try:
from datetime import timedelta
# Note: Implement when event calendar service is ready
# For now, initialize as empty
context['event_calendar'] = []
logger.info("Event calendar: not yet implemented, using empty list")
except Exception as e:
logger.warning(f"Could not fetch events: {e}")
context['event_calendar'] = []
# NEW: Placeholder for traffic predictions (Phase 5)
try:
# Note: Implement traffic forecasting in Phase 5
# For now, initialize as empty DataFrame
import pandas as pd
context['traffic_predictions'] = pd.DataFrame()
logger.info("Traffic predictions: not yet implemented, using empty DataFrame")
except Exception as e:
logger.warning(f"Could not fetch traffic predictions: {e}")
import pandas as pd
context['traffic_predictions'] = pd.DataFrame()
logger.info(
f"Shared data snapshot fetched successfully: "
f"{len(inventory_data)} ingredients, "
f"{len(suppliers_data)} suppliers, "
f"{len(recipes_data)} recipes"
f"{len(recipes_data)} recipes, "
f"{len(context.get('event_calendar', []))} events"
)
return {
'success': True,
'inventory_count': len(inventory_data) if inventory_data else 0,
'suppliers_count': len(suppliers_data) if suppliers_data else 0,
'recipes_count': len(recipes_data) if recipes_data else 0
'recipes_count': len(recipes_data) if recipes_data else 0,
'events_count': len(context.get('event_calendar', []))
}
except Exception as e:
logger.error(f"Failed to fetch shared data snapshot for tenant {tenant_id}: {e}")
raise
# ========================================================================
# Step 0.5: Generate AI Insights (NEW)
# ========================================================================
async def _generate_ai_insights(
self,
tenant_id: str,
context: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate AI insights using HTTP calls to ML insights endpoints.
This step runs multiple ML insight generators in parallel via HTTP:
- Dynamic forecasting rules learning (forecasting service)
- Safety stock optimization (inventory service)
- Production yield predictions (production service)
- Supplier performance analysis (procurement service)
- Price forecasting (procurement service)
All insights are posted to the AI Insights Service by the respective services
and can be consumed by downstream orchestration steps.
Args:
tenant_id: Tenant ID
context: Execution context with cached data snapshots
Returns:
Dictionary with insights generation results
"""
logger.info(f"Generating AI insights for tenant {tenant_id} via HTTP endpoints")
insights_results = {
'total_insights_generated': 0,
'total_insights_posted': 0,
'insights_by_source': {},
'errors': []
}
try:
# Prepare async tasks for parallel HTTP calls
ml_tasks = []
# Task 1: Safety Stock Optimization (inventory service)
async def trigger_safety_stock_optimization():
try:
result = await self.inventory_client.trigger_safety_stock_optimization(
tenant_id=tenant_id,
product_ids=None, # Analyze all products
lookback_days=90,
min_history_days=30
)
if result and result.get('success'):
return ('safety_stock', {
'insights_posted': result.get('total_insights_posted', 0),
'insights_generated': result.get('total_insights_generated', 0),
'products_optimized': result.get('products_optimized', 0)
})
else:
return ('safety_stock', {'error': result.get('message', 'Unknown error') if result else 'Service returned None', 'insights_posted': 0})
except Exception as e:
logger.error(f"Safety stock optimization failed: {e}")
return ('safety_stock', {'error': str(e), 'insights_posted': 0})
ml_tasks.append(trigger_safety_stock_optimization())
# Task 2: Production Yield Analysis (production service)
async def trigger_yield_prediction():
try:
result = await self.production_client.trigger_yield_prediction(
tenant_id=tenant_id,
recipe_ids=None, # Analyze all recipes
lookback_days=90,
min_history_runs=30
)
if result and result.get('success'):
return ('yield_analysis', {
'insights_posted': result.get('total_insights_posted', 0),
'insights_generated': result.get('total_insights_generated', 0),
'recipes_analyzed': result.get('recipes_analyzed', 0)
})
else:
return ('yield_analysis', {'error': result.get('message', 'Unknown error') if result else 'Service returned None', 'insights_posted': 0})
except Exception as e:
logger.error(f"Yield prediction failed: {e}")
return ('yield_analysis', {'error': str(e), 'insights_posted': 0})
ml_tasks.append(trigger_yield_prediction())
# Task 3: Supplier Performance Analysis (procurement service)
async def trigger_supplier_analysis():
try:
result = await self.procurement_client.trigger_supplier_analysis(
tenant_id=tenant_id,
supplier_ids=None, # Analyze all suppliers
lookback_days=180,
min_orders=10
)
if result and result.get('success'):
return ('supplier_analysis', {
'insights_posted': result.get('total_insights_posted', 0),
'insights_generated': result.get('total_insights_generated', 0),
'suppliers_analyzed': result.get('suppliers_analyzed', 0)
})
else:
return ('supplier_analysis', {'error': result.get('message', 'Unknown error') if result else 'Service returned None', 'insights_posted': 0})
except Exception as e:
logger.error(f"Supplier analysis failed: {e}")
return ('supplier_analysis', {'error': str(e), 'insights_posted': 0})
ml_tasks.append(trigger_supplier_analysis())
# Task 4: Price Forecasting (procurement service)
async def trigger_price_forecasting():
try:
result = await self.procurement_client.trigger_price_forecasting(
tenant_id=tenant_id,
ingredient_ids=None, # Forecast all ingredients
lookback_days=180,
forecast_horizon_days=30
)
if result and result.get('success'):
return ('price_forecast', {
'insights_posted': result.get('total_insights_posted', 0),
'insights_generated': result.get('total_insights_generated', 0),
'ingredients_forecasted': result.get('ingredients_forecasted', 0),
'buy_now_recommendations': result.get('buy_now_recommendations', 0)
})
else:
return ('price_forecast', {'error': result.get('message', 'Unknown error') if result else 'Service returned None', 'insights_posted': 0})
except Exception as e:
logger.error(f"Price forecasting failed: {e}")
return ('price_forecast', {'error': str(e), 'insights_posted': 0})
ml_tasks.append(trigger_price_forecasting())
# Task 5: Dynamic Rules Learning (forecasting service)
async def trigger_rules_generation():
try:
result = await self.forecast_client.trigger_rules_generation(
tenant_id=tenant_id,
product_ids=None, # Analyze all products
lookback_days=90,
min_samples=10
)
if result and result.get('success'):
return ('rules_learning', {
'insights_posted': result.get('total_insights_posted', 0),
'insights_generated': result.get('total_insights_generated', 0),
'products_analyzed': result.get('products_analyzed', 0)
})
else:
return ('rules_learning', {'error': result.get('message', 'Unknown error') if result else 'Service returned None', 'insights_posted': 0})
except Exception as e:
logger.error(f"Rules generation failed: {e}")
return ('rules_learning', {'error': str(e), 'insights_posted': 0})
ml_tasks.append(trigger_rules_generation())
# Run all ML insight generation tasks in parallel
logger.info(f"Triggering {len(ml_tasks)} ML insight endpoints in parallel")
results = await asyncio.gather(*ml_tasks, return_exceptions=True)
# Process results
for result in results:
if isinstance(result, Exception):
logger.error(f"ML insight task failed with exception: {result}")
insights_results['errors'].append(str(result))
elif isinstance(result, tuple) and len(result) == 2:
source, data = result
if 'error' in data:
insights_results['errors'].append(f"{source}: {data['error']}")
else:
posted = data.get('insights_posted', 0)
generated = data.get('insights_generated', posted)
insights_results['total_insights_posted'] += posted
insights_results['total_insights_generated'] += generated
insights_results['insights_by_source'][source] = posted
logger.info(f"{source}: {posted} insights posted")
# Store insights count in context
context['ai_insights_generated'] = insights_results['total_insights_generated']
context['ai_insights_posted'] = insights_results['total_insights_posted']
logger.info(
f"AI insights generation complete: "
f"{insights_results['total_insights_posted']} insights posted from "
f"{len(insights_results['insights_by_source'])} sources"
)
return insights_results
except Exception as e:
logger.error(f"Failed to generate AI insights for tenant {tenant_id}: {e}", exc_info=True)
# Don't fail the orchestration if insights generation fails
# Log error and continue
insights_results['errors'].append(str(e))
context['ai_insights_generated'] = 0
context['ai_insights_posted'] = 0
return insights_results
# ========================================================================
# Step 1: Generate Forecasts
# ========================================================================
@@ -276,6 +550,10 @@ class OrchestrationSaga:
# Call forecast service
result = await self.forecast_client.generate_forecasts(tenant_id)
if not result:
logger.error(f"Forecast service returned None for tenant {tenant_id}")
raise Exception("Forecast service returned None")
# Store forecast ID in context
forecast_id = result.get('forecast_id') or result.get('id')
context['forecast_id'] = forecast_id
@@ -349,6 +627,10 @@ class OrchestrationSaga:
recipes_data=recipes_snapshot # NEW: Pass cached recipes
)
if not result:
logger.error(f"Production service returned None for tenant {tenant_id}")
raise Exception("Production service returned None")
# Store schedule ID in context
schedule_id = result.get('schedule_id') or result.get('id')
context['production_schedule_id'] = schedule_id
@@ -435,6 +717,10 @@ class OrchestrationSaga:
recipes_data=recipes_snapshot # NEW: Pass cached recipes
)
if not result:
logger.error(f"Procurement service returned None for tenant {tenant_id}")
raise Exception("Procurement service returned None")
# Store plan ID in context
plan_id = result.get('plan_id') or result.get('id')
context['procurement_plan_id'] = plan_id
@@ -523,12 +809,16 @@ class OrchestrationSaga:
notification_data=notification_data
)
notifications_sent = result.get('notifications_sent', 0)
context['notifications_sent'] = notifications_sent
if result:
notifications_sent = result.get('notifications_sent', 0)
context['notifications_sent'] = notifications_sent
logger.info(f"Notifications sent successfully: {notifications_sent}")
logger.info(f"Notifications sent successfully: {notifications_sent}")
return result
return result
else:
logger.warning(f"Notification service returned None for tenant {tenant_id}")
return {'notifications_sent': 0, 'error': 'Notification service returned None'}
except Exception as e:
# Log error but don't fail the saga for notification failures
@@ -536,6 +826,140 @@ class OrchestrationSaga:
# Return empty result instead of raising
return {'notifications_sent': 0, 'error': str(e)}
# ========================================================================
# Step 5: Validate Previous Day's Forecasts
# ========================================================================
async def _validate_previous_forecasts(
self,
tenant_id: str,
context: Dict[str, Any]
) -> Dict[str, Any]:
"""
Validate yesterday's forecasts against actual sales.
Calculate accuracy metrics (MAPE, RMSE, MAE) and trigger retraining if needed.
Args:
tenant_id: Tenant ID
context: Execution context
Returns:
Validation result with metrics
"""
from datetime import date, timedelta
logger.info(f"Validating previous day's forecasts for tenant {tenant_id}")
try:
yesterday = date.today() - timedelta(days=1)
# Call forecasting service validation endpoint
validation_result = await self.forecast_client.validate_forecasts(
tenant_id=tenant_id,
date=yesterday
)
if not validation_result:
logger.warning(f"No validation results returned for tenant {tenant_id}")
return {'validated': False, 'reason': 'no_data'}
# Extract metrics
overall_mape = validation_result.get('overall_mape', 0)
overall_rmse = validation_result.get('overall_rmse', 0)
overall_mae = validation_result.get('overall_mae', 0)
products_validated = validation_result.get('products_validated', 0)
poor_accuracy_products = validation_result.get('poor_accuracy_products', [])
context['validation_metrics'] = {
'mape': overall_mape,
'rmse': overall_rmse,
'mae': overall_mae,
'products_validated': products_validated,
'validation_date': yesterday.isoformat()
}
logger.info(
f"Validation complete for tenant {tenant_id}: "
f"MAPE={overall_mape:.2f}%, RMSE={overall_rmse:.2f}, MAE={overall_mae:.2f}, "
f"Products={products_validated}"
)
# Post accuracy insights to AI Insights Service
try:
from uuid import UUID
from datetime import datetime
await self.ai_insights_client.post_accuracy_metrics(
tenant_id=UUID(tenant_id),
validation_date=datetime.combine(yesterday, datetime.min.time()),
metrics={
'overall_mape': overall_mape,
'overall_rmse': overall_rmse,
'overall_mae': overall_mae,
'products_validated': products_validated,
'poor_accuracy_products': poor_accuracy_products
}
)
logger.info(f"Posted accuracy metrics to AI Insights Service")
except Exception as e:
logger.warning(f"Could not post accuracy metrics to AI Insights: {e}")
# Trigger retraining for products with poor accuracy
if poor_accuracy_products and len(poor_accuracy_products) > 0:
logger.warning(
f"Found {len(poor_accuracy_products)} products with MAPE > 30%, "
f"triggering retraining"
)
retraining_triggered = 0
for product_data in poor_accuracy_products:
product_id = product_data.get('product_id')
product_mape = product_data.get('mape', 0)
if not product_id:
continue
try:
await self.training_client.trigger_retrain(
tenant_id=tenant_id,
inventory_product_id=product_id,
reason='accuracy_degradation',
metadata={
'previous_mape': product_mape,
'validation_date': yesterday.isoformat(),
'triggered_by': 'orchestration_validation'
}
)
retraining_triggered += 1
logger.info(
f"Triggered retraining for product {product_id} "
f"(MAPE={product_mape:.2f}%)"
)
except Exception as e:
logger.error(
f"Failed to trigger retraining for product {product_id}: {e}"
)
context['retraining_triggered'] = retraining_triggered
logger.info(f"Triggered retraining for {retraining_triggered} products")
else:
logger.info("All products have acceptable accuracy (MAPE <= 30%)")
context['retraining_triggered'] = 0
return {
'validated': True,
'metrics': context['validation_metrics'],
'retraining_triggered': context.get('retraining_triggered', 0)
}
except Exception as e:
# Don't fail the saga if validation fails
logger.warning(f"Forecast validation failed for tenant {tenant_id}: {e}")
return {
'validated': False,
'error': str(e),
'retraining_triggered': 0
}
# ========================================================================
# Utility Methods
# ========================================================================

View File

@@ -26,7 +26,11 @@ from shared.clients.forecast_client import ForecastServiceClient
from shared.clients.production_client import ProductionServiceClient
from shared.clients.procurement_client import ProcurementServiceClient
from shared.clients.notification_client import NotificationServiceClient
from shared.utils.tenant_settings_client import TenantSettingsClient
from shared.clients.tenant_client import TenantServiceClient
from shared.clients.inventory_client import InventoryServiceClient
from shared.clients.suppliers_client import SuppliersServiceClient
from shared.clients.recipes_client import RecipesServiceClient
from shared.clients.training_client import TrainingServiceClient
from shared.utils.circuit_breaker import CircuitBreaker, CircuitBreakerOpenError
from app.core.config import settings
from app.repositories.orchestration_run_repository import OrchestrationRunRepository
@@ -46,11 +50,16 @@ class OrchestratorSchedulerService(BaseAlertService):
super().__init__(config)
# Service clients
self.forecast_client = ForecastServiceClient(config)
self.production_client = ProductionServiceClient(config)
self.procurement_client = ProcurementServiceClient(config)
self.notification_client = NotificationServiceClient(config)
self.tenant_settings_client = TenantSettingsClient(tenant_service_url=config.TENANT_SERVICE_URL)
self.forecast_client = ForecastServiceClient(config, "orchestrator-service")
self.production_client = ProductionServiceClient(config, "orchestrator-service")
self.procurement_client = ProcurementServiceClient(config, "orchestrator-service")
self.notification_client = NotificationServiceClient(config, "orchestrator-service")
self.tenant_client = TenantServiceClient(config)
self.training_client = TrainingServiceClient(config, "orchestrator-service")
# Clients for centralized data fetching
self.inventory_client = InventoryServiceClient(config, "orchestrator-service")
self.suppliers_client = SuppliersServiceClient(config, "orchestrator-service")
self.recipes_client = RecipesServiceClient(config, "orchestrator-service")
# Circuit breakers for each service
self.forecast_breaker = CircuitBreaker(
@@ -183,11 +192,19 @@ class OrchestratorSchedulerService(BaseAlertService):
# Set timeout for entire tenant orchestration
async with asyncio.timeout(settings.TENANT_TIMEOUT_SECONDS):
# Execute orchestration using Saga pattern
# AI enhancement is enabled via ORCHESTRATION_USE_AI_INSIGHTS config
saga = OrchestrationSaga(
forecast_client=self.forecast_client,
production_client=self.production_client,
procurement_client=self.procurement_client,
notification_client=self.notification_client
notification_client=self.notification_client,
inventory_client=self.inventory_client,
suppliers_client=self.suppliers_client,
recipes_client=self.recipes_client,
training_client=self.training_client,
use_ai_enhancement=settings.ORCHESTRATION_USE_AI_INSIGHTS,
ai_insights_base_url=settings.AI_INSIGHTS_SERVICE_URL,
ai_insights_min_confidence=settings.AI_INSIGHTS_MIN_CONFIDENCE
)
result = await saga.execute_orchestration(
@@ -238,7 +255,7 @@ class OrchestratorSchedulerService(BaseAlertService):
# Call Tenant Service with circuit breaker
tenants_data = await self.tenant_breaker.call(
self.tenant_settings_client.get_active_tenants
self.tenant_client.get_active_tenants
)
if not tenants_data:

View File

@@ -1,392 +0,0 @@
"""
Orchestrator Scheduler Service - REFACTORED
Coordinates daily auto-generation workflow: Forecasting → Production → Procurement → Notifications
CHANGES FROM ORIGINAL:
- Removed all TODO/stub code
- Integrated OrchestrationSaga for error handling and compensation
- Added circuit breakers for all service calls
- Implemented real Forecasting Service integration
- Implemented real Production Service integration
- Implemented real Tenant Service integration
- Implemented real Notification Service integration
- NO backwards compatibility, NO feature flags - complete rewrite
"""
import asyncio
import uuid
from datetime import datetime, date, timezone
from decimal import Decimal
from typing import List, Dict, Any, Optional
import structlog
from apscheduler.triggers.cron import CronTrigger
from shared.alerts.base_service import BaseAlertService
from shared.clients.forecast_client import ForecastServiceClient
from shared.clients.production_client import ProductionServiceClient
from shared.clients.procurement_client import ProcurementServiceClient
from shared.clients.notification_client import NotificationServiceClient
from shared.clients.tenant_settings_client import TenantSettingsClient
from shared.clients.inventory_client import InventoryServiceClient
from shared.clients.suppliers_client import SuppliersServiceClient
from shared.clients.recipes_client import RecipesServiceClient
from shared.utils.circuit_breaker import CircuitBreaker, CircuitBreakerOpenError
from app.core.config import settings
from app.repositories.orchestration_run_repository import OrchestrationRunRepository
from app.models.orchestration_run import OrchestrationStatus
from app.services.orchestration_saga import OrchestrationSaga
logger = structlog.get_logger()
class OrchestratorSchedulerService(BaseAlertService):
"""
Orchestrator Service extending BaseAlertService
Handles automated daily orchestration of forecasting, production, and procurement
"""
def __init__(self, config):
super().__init__(config)
# Service clients
self.forecast_client = ForecastServiceClient(config)
self.production_client = ProductionServiceClient(config)
self.procurement_client = ProcurementServiceClient(config)
self.notification_client = NotificationServiceClient(config)
self.tenant_settings_client = TenantSettingsClient(config)
# NEW: Clients for centralized data fetching
self.inventory_client = InventoryServiceClient(config)
self.suppliers_client = SuppliersServiceClient(config)
self.recipes_client = RecipesServiceClient(config)
# Circuit breakers for each service
self.forecast_breaker = CircuitBreaker(
failure_threshold=5,
timeout_duration=60,
success_threshold=2
)
self.production_breaker = CircuitBreaker(
failure_threshold=5,
timeout_duration=60,
success_threshold=2
)
self.procurement_breaker = CircuitBreaker(
failure_threshold=5,
timeout_duration=60,
success_threshold=2
)
self.tenant_breaker = CircuitBreaker(
failure_threshold=3,
timeout_duration=30,
success_threshold=2
)
def setup_scheduled_checks(self):
"""
Configure scheduled orchestration jobs
Runs daily at 5:30 AM (configured via ORCHESTRATION_SCHEDULE)
"""
# Parse cron schedule from config (default: "30 5 * * *" = 5:30 AM daily)
cron_parts = settings.ORCHESTRATION_SCHEDULE.split()
if len(cron_parts) == 5:
minute, hour, day, month, day_of_week = cron_parts
else:
# Fallback to default
minute, hour, day, month, day_of_week = "30", "5", "*", "*", "*"
# Schedule daily orchestration
self.scheduler.add_job(
func=self.run_daily_orchestration,
trigger=CronTrigger(
minute=minute,
hour=hour,
day=day,
month=month,
day_of_week=day_of_week
),
id="daily_orchestration",
name="Daily Orchestration (Forecasting → Production → Procurement)",
misfire_grace_time=300, # 5 minutes grace period
max_instances=1 # Only one instance running at a time
)
logger.info("Orchestrator scheduler configured",
schedule=settings.ORCHESTRATION_SCHEDULE)
async def run_daily_orchestration(self):
"""
Main orchestration workflow - runs daily
Executes for all active tenants in parallel (with limits)
"""
if not self.is_leader:
logger.debug("Not leader, skipping orchestration")
return
if not settings.ORCHESTRATION_ENABLED:
logger.info("Orchestration disabled via config")
return
logger.info("Starting daily orchestration workflow")
try:
# Get all active tenants
active_tenants = await self._get_active_tenants()
if not active_tenants:
logger.warning("No active tenants found for orchestration")
return
logger.info("Processing tenants",
total_tenants=len(active_tenants))
# Process tenants with concurrency limit
semaphore = asyncio.Semaphore(settings.MAX_CONCURRENT_TENANTS)
async def process_with_semaphore(tenant_id):
async with semaphore:
return await self._orchestrate_tenant(tenant_id)
# Process all tenants in parallel (but limited by semaphore)
tasks = [process_with_semaphore(tenant_id) for tenant_id in active_tenants]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Log summary
successful = sum(1 for r in results if r and not isinstance(r, Exception))
failed = len(results) - successful
logger.info("Daily orchestration completed",
total_tenants=len(active_tenants),
successful=successful,
failed=failed)
except Exception as e:
logger.error("Error in daily orchestration",
error=str(e), exc_info=True)
async def _orchestrate_tenant(self, tenant_id: uuid.UUID) -> bool:
"""
Orchestrate workflow for a single tenant using Saga pattern
Returns True if successful, False otherwise
"""
logger.info("Starting orchestration for tenant", tenant_id=str(tenant_id))
# Create orchestration run record
async with self.db_manager.get_session() as session:
repo = OrchestrationRunRepository(session)
run_number = await repo.generate_run_number()
run = await repo.create_run({
'run_number': run_number,
'tenant_id': tenant_id,
'status': OrchestrationStatus.running,
'run_type': 'scheduled',
'started_at': datetime.now(timezone.utc),
'triggered_by': 'scheduler'
})
await session.commit()
run_id = run.id
try:
# Set timeout for entire tenant orchestration
async with asyncio.timeout(settings.TENANT_TIMEOUT_SECONDS):
# Execute orchestration using Saga pattern
saga = OrchestrationSaga(
forecast_client=self.forecast_client,
production_client=self.production_client,
procurement_client=self.procurement_client,
notification_client=self.notification_client,
inventory_client=self.inventory_client, # NEW
suppliers_client=self.suppliers_client, # NEW
recipes_client=self.recipes_client # NEW
)
result = await saga.execute_orchestration(
tenant_id=str(tenant_id),
orchestration_run_id=str(run_id)
)
if result['success']:
# Update orchestration run with saga results
await self._complete_orchestration_run_with_saga(
run_id,
result
)
logger.info("Tenant orchestration completed successfully",
tenant_id=str(tenant_id), run_id=str(run_id))
return True
else:
# Saga failed (with compensation)
await self._mark_orchestration_failed(
run_id,
result.get('error', 'Saga execution failed')
)
return False
except asyncio.TimeoutError:
logger.error("Tenant orchestration timeout",
tenant_id=str(tenant_id),
timeout_seconds=settings.TENANT_TIMEOUT_SECONDS)
await self._mark_orchestration_failed(run_id, "Timeout exceeded")
return False
except Exception as e:
logger.error("Tenant orchestration failed",
tenant_id=str(tenant_id),
error=str(e), exc_info=True)
await self._mark_orchestration_failed(run_id, str(e))
return False
async def _get_active_tenants(self) -> List[uuid.UUID]:
"""
Get list of active tenants for orchestration
REAL IMPLEMENTATION (no stubs)
"""
try:
logger.info("Fetching active tenants from Tenant Service")
# Call Tenant Service with circuit breaker
tenants_data = await self.tenant_breaker.call(
self.tenant_settings_client.get_active_tenants
)
if not tenants_data:
logger.warning("Tenant Service returned no active tenants")
return []
# Extract tenant IDs
tenant_ids = []
for tenant in tenants_data:
tenant_id = tenant.get('id') or tenant.get('tenant_id')
if tenant_id:
# Convert string to UUID if needed
if isinstance(tenant_id, str):
tenant_id = uuid.UUID(tenant_id)
tenant_ids.append(tenant_id)
logger.info(f"Found {len(tenant_ids)} active tenants for orchestration")
return tenant_ids
except CircuitBreakerOpenError:
logger.error("Circuit breaker open for Tenant Service, skipping orchestration")
return []
except Exception as e:
logger.error("Error getting active tenants", error=str(e), exc_info=True)
return []
async def _complete_orchestration_run_with_saga(
self,
run_id: uuid.UUID,
saga_result: Dict[str, Any]
):
"""
Complete orchestration run with saga results
Args:
run_id: Orchestration run ID
saga_result: Result from saga execution
"""
async with self.db_manager.get_session() as session:
repo = OrchestrationRunRepository(session)
run = await repo.get_run_by_id(run_id)
if run:
started_at = run.started_at
completed_at = datetime.now(timezone.utc)
duration = (completed_at - started_at).total_seconds()
# Extract results from saga
forecast_id = saga_result.get('forecast_id')
production_schedule_id = saga_result.get('production_schedule_id')
procurement_plan_id = saga_result.get('procurement_plan_id')
notifications_sent = saga_result.get('notifications_sent', 0)
# Get saga summary
saga_summary = saga_result.get('saga_summary', {})
total_steps = saga_summary.get('total_steps', 0)
completed_steps = saga_summary.get('completed_steps', 0)
await repo.update_run(run_id, {
'status': OrchestrationStatus.completed,
'completed_at': completed_at,
'duration_seconds': int(duration),
'forecast_id': forecast_id,
'forecasting_status': 'success',
'forecasting_completed_at': completed_at,
'forecasts_generated': 1, # Placeholder
'production_schedule_id': production_schedule_id,
'production_status': 'success',
'production_completed_at': completed_at,
'production_batches_created': 0, # Placeholder
'procurement_plan_id': procurement_plan_id,
'procurement_status': 'success',
'procurement_completed_at': completed_at,
'procurement_plans_created': 1,
'purchase_orders_created': 0, # Placeholder
'notification_status': 'success',
'notification_completed_at': completed_at,
'notifications_sent': notifications_sent,
'saga_steps_total': total_steps,
'saga_steps_completed': completed_steps
})
await session.commit()
async def _mark_orchestration_failed(self, run_id: uuid.UUID, error_message: str):
"""Mark orchestration run as failed"""
async with self.db_manager.get_session() as session:
repo = OrchestrationRunRepository(session)
run = await repo.get_run_by_id(run_id)
if run:
started_at = run.started_at
completed_at = datetime.now(timezone.utc)
duration = (completed_at - started_at).total_seconds()
await repo.update_run(run_id, {
'status': OrchestrationStatus.failed,
'completed_at': completed_at,
'duration_seconds': int(duration),
'error_message': error_message
})
await session.commit()
# Manual trigger for testing
async def trigger_orchestration_for_tenant(
self,
tenant_id: uuid.UUID,
test_scenario: Optional[str] = None
) -> Dict[str, Any]:
"""
Manually trigger orchestration for a tenant (for testing)
Args:
tenant_id: Tenant ID to orchestrate
test_scenario: Optional test scenario (full, production_only, procurement_only)
Returns:
Dict with orchestration results
"""
logger.info("Manual orchestration trigger",
tenant_id=str(tenant_id),
test_scenario=test_scenario)
success = await self._orchestrate_tenant(tenant_id)
return {
'success': success,
'tenant_id': str(tenant_id),
'test_scenario': test_scenario,
'message': 'Orchestration completed' if success else 'Orchestration failed'
}
def get_circuit_breaker_stats(self) -> Dict[str, Any]:
"""Get circuit breaker statistics for monitoring"""
return {
'forecast_service': self.forecast_breaker.get_stats(),
'production_service': self.production_breaker.get_stats(),
'procurement_service': self.procurement_breaker.get_stats(),
'tenant_service': self.tenant_breaker.get_stats()
}