Initial commit - production deployment
This commit is contained in:
516
services/production/app/ml/yield_insights_orchestrator.py
Normal file
516
services/production/app/ml/yield_insights_orchestrator.py
Normal file
@@ -0,0 +1,516 @@
|
||||
"""
|
||||
Yield Insights Orchestrator
|
||||
Coordinates yield prediction and insight posting
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Any, Optional
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add shared clients to path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
|
||||
from shared.clients.ai_insights_client import AIInsightsClient
|
||||
from shared.messaging import UnifiedEventPublisher
|
||||
|
||||
from app.ml.yield_predictor import YieldPredictor
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class YieldInsightsOrchestrator:
|
||||
"""
|
||||
Orchestrates yield prediction and insight generation workflow.
|
||||
|
||||
Workflow:
|
||||
1. Predict yield for upcoming production run or analyze historical performance
|
||||
2. Generate insights for yield optimization opportunities
|
||||
3. Post insights to AI Insights Service
|
||||
4. Publish recommendation events to RabbitMQ
|
||||
5. Provide yield predictions for production planning
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ai_insights_base_url: str = "http://ai-insights-service:8000",
|
||||
event_publisher: Optional[UnifiedEventPublisher] = None
|
||||
):
|
||||
self.predictor = YieldPredictor()
|
||||
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
|
||||
self.event_publisher = event_publisher
|
||||
|
||||
async def predict_and_post_insights(
|
||||
self,
|
||||
tenant_id: str,
|
||||
recipe_id: str,
|
||||
production_history: pd.DataFrame,
|
||||
production_context: Dict[str, Any],
|
||||
min_history_runs: int = 30
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Complete workflow: Predict yield and post insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
recipe_id: Recipe identifier
|
||||
production_history: Historical production runs
|
||||
production_context: Upcoming production context:
|
||||
- staff_assigned (list of staff IDs)
|
||||
- planned_start_time
|
||||
- batch_size
|
||||
- planned_quantity
|
||||
- unit_cost (optional)
|
||||
- equipment_id (optional)
|
||||
min_history_runs: Minimum production runs required
|
||||
|
||||
Returns:
|
||||
Workflow results with prediction and posted insights
|
||||
"""
|
||||
logger.info(
|
||||
"Starting yield prediction workflow",
|
||||
tenant_id=tenant_id,
|
||||
recipe_id=recipe_id,
|
||||
history_runs=len(production_history)
|
||||
)
|
||||
|
||||
# Step 1: Predict yield
|
||||
prediction_results = await self.predictor.predict_yield(
|
||||
tenant_id=tenant_id,
|
||||
recipe_id=recipe_id,
|
||||
production_history=production_history,
|
||||
production_context=production_context,
|
||||
min_history_runs=min_history_runs
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Yield prediction complete",
|
||||
recipe_id=recipe_id,
|
||||
predicted_yield=prediction_results.get('predicted_yield'),
|
||||
insights_generated=len(prediction_results.get('insights', []))
|
||||
)
|
||||
|
||||
# Step 2: Enrich insights with tenant_id and recipe context
|
||||
enriched_insights = self._enrich_insights(
|
||||
prediction_results.get('insights', []),
|
||||
tenant_id,
|
||||
recipe_id
|
||||
)
|
||||
|
||||
# Step 3: Post insights to AI Insights Service
|
||||
if enriched_insights:
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_insights
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Yield insights posted to AI Insights Service",
|
||||
recipe_id=recipe_id,
|
||||
total=post_results['total'],
|
||||
successful=post_results['successful'],
|
||||
failed=post_results['failed']
|
||||
)
|
||||
|
||||
# Step 4: Publish recommendation events to RabbitMQ
|
||||
created_insights = post_results.get('created_insights', [])
|
||||
if created_insights:
|
||||
recipe_context = production_context.copy() if production_context else {}
|
||||
recipe_context['recipe_id'] = recipe_id
|
||||
await self._publish_insight_events(
|
||||
tenant_id=tenant_id,
|
||||
insights=created_insights,
|
||||
recipe_context=recipe_context
|
||||
)
|
||||
else:
|
||||
post_results = {'total': 0, 'successful': 0, 'failed': 0}
|
||||
logger.info("No insights to post for recipe", recipe_id=recipe_id)
|
||||
|
||||
# Step 4: Return comprehensive results
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'recipe_id': recipe_id,
|
||||
'predicted_at': prediction_results['predicted_at'],
|
||||
'history_runs': prediction_results['history_runs'],
|
||||
'baseline_yield': prediction_results.get('baseline_yield'),
|
||||
'predicted_yield': prediction_results.get('predicted_yield'),
|
||||
'prediction_range': prediction_results.get('prediction_range'),
|
||||
'expected_waste': prediction_results.get('expected_waste'),
|
||||
'confidence': prediction_results['confidence'],
|
||||
'factor_analysis': prediction_results.get('factor_analysis'),
|
||||
'patterns': prediction_results.get('patterns', []),
|
||||
'insights_generated': len(enriched_insights),
|
||||
'insights_posted': post_results['successful'],
|
||||
'insights_failed': post_results['failed'],
|
||||
'created_insights': post_results.get('created_insights', [])
|
||||
}
|
||||
|
||||
async def analyze_and_post_insights(
|
||||
self,
|
||||
tenant_id: str,
|
||||
recipe_id: str,
|
||||
production_history: pd.DataFrame,
|
||||
min_history_runs: int = 30
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze historical yield performance and post insights (no prediction).
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
recipe_id: Recipe identifier
|
||||
production_history: Historical production runs
|
||||
min_history_runs: Minimum production runs required
|
||||
|
||||
Returns:
|
||||
Workflow results with analysis and posted insights
|
||||
"""
|
||||
logger.info(
|
||||
"Starting yield analysis workflow",
|
||||
tenant_id=tenant_id,
|
||||
recipe_id=recipe_id,
|
||||
history_runs=len(production_history)
|
||||
)
|
||||
|
||||
# Step 1: Analyze historical yield
|
||||
analysis_results = await self.predictor.analyze_recipe_yield_history(
|
||||
tenant_id=tenant_id,
|
||||
recipe_id=recipe_id,
|
||||
production_history=production_history,
|
||||
min_history_runs=min_history_runs
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Yield analysis complete",
|
||||
recipe_id=recipe_id,
|
||||
baseline_yield=analysis_results.get('baseline_stats', {}).get('mean_yield'),
|
||||
insights_generated=len(analysis_results.get('insights', []))
|
||||
)
|
||||
|
||||
# Step 2: Enrich insights
|
||||
enriched_insights = self._enrich_insights(
|
||||
analysis_results.get('insights', []),
|
||||
tenant_id,
|
||||
recipe_id
|
||||
)
|
||||
|
||||
# Step 3: Post insights
|
||||
if enriched_insights:
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_insights
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Yield analysis insights posted",
|
||||
recipe_id=recipe_id,
|
||||
total=post_results['total'],
|
||||
successful=post_results['successful']
|
||||
)
|
||||
|
||||
# Step 4: Publish recommendation events to RabbitMQ
|
||||
created_insights = post_results.get('created_insights', [])
|
||||
if created_insights:
|
||||
await self._publish_insight_events(
|
||||
tenant_id=tenant_id,
|
||||
insights=created_insights,
|
||||
recipe_context={'recipe_id': recipe_id}
|
||||
)
|
||||
else:
|
||||
post_results = {'total': 0, 'successful': 0, 'failed': 0}
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'recipe_id': recipe_id,
|
||||
'analyzed_at': analysis_results['analyzed_at'],
|
||||
'history_runs': analysis_results['history_runs'],
|
||||
'baseline_stats': analysis_results.get('baseline_stats'),
|
||||
'factor_analysis': analysis_results.get('factor_analysis'),
|
||||
'patterns': analysis_results.get('patterns', []),
|
||||
'insights_generated': len(enriched_insights),
|
||||
'insights_posted': post_results['successful'],
|
||||
'created_insights': post_results.get('created_insights', [])
|
||||
}
|
||||
|
||||
def _enrich_insights(
|
||||
self,
|
||||
insights: List[Dict[str, Any]],
|
||||
tenant_id: str,
|
||||
recipe_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Enrich insights with required fields for AI Insights Service.
|
||||
|
||||
Args:
|
||||
insights: Raw insights from predictor
|
||||
tenant_id: Tenant identifier
|
||||
recipe_id: Recipe identifier
|
||||
|
||||
Returns:
|
||||
Enriched insights ready for posting
|
||||
"""
|
||||
enriched = []
|
||||
|
||||
for insight in insights:
|
||||
# Add required tenant_id
|
||||
enriched_insight = insight.copy()
|
||||
enriched_insight['tenant_id'] = tenant_id
|
||||
|
||||
# Add recipe context to metrics
|
||||
if 'metrics_json' not in enriched_insight:
|
||||
enriched_insight['metrics_json'] = {}
|
||||
|
||||
enriched_insight['metrics_json']['recipe_id'] = recipe_id
|
||||
|
||||
# Add source metadata
|
||||
enriched_insight['source_service'] = 'production'
|
||||
enriched_insight['source_model'] = 'yield_predictor'
|
||||
enriched_insight['detected_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
enriched.append(enriched_insight)
|
||||
|
||||
return enriched
|
||||
|
||||
async def _publish_insight_events(
|
||||
self,
|
||||
tenant_id: str,
|
||||
insights: List[Dict[str, Any]],
|
||||
recipe_context: Optional[Dict[str, Any]] = None
|
||||
) -> None:
|
||||
"""
|
||||
Publish recommendation events to RabbitMQ for each insight.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
insights: List of created insights (with insight_id from AI Insights Service)
|
||||
recipe_context: Optional recipe context (name, id, etc.)
|
||||
"""
|
||||
if not self.event_publisher:
|
||||
logger.warning("Event publisher not configured, skipping event publication")
|
||||
return
|
||||
|
||||
for insight in insights:
|
||||
try:
|
||||
# Determine severity based on confidence and priority
|
||||
confidence = insight.get('confidence', 0)
|
||||
priority = insight.get('priority', 'medium')
|
||||
|
||||
if priority == 'urgent' or confidence >= 90:
|
||||
severity = 'urgent'
|
||||
elif priority == 'high' or confidence >= 70:
|
||||
severity = 'high'
|
||||
elif priority == 'medium' or confidence >= 50:
|
||||
severity = 'medium'
|
||||
else:
|
||||
severity = 'low'
|
||||
|
||||
# Build event metadata
|
||||
event_metadata = {
|
||||
'insight_id': insight.get('id'), # From AI Insights Service response
|
||||
'insight_type': insight.get('insight_type'),
|
||||
'recipe_id': insight.get('metrics_json', {}).get('recipe_id'),
|
||||
'recipe_name': recipe_context.get('recipe_name') if recipe_context else None,
|
||||
'predicted_yield': insight.get('metrics_json', {}).get('predicted_yield'),
|
||||
'confidence': confidence,
|
||||
'recommendation': insight.get('recommendation'),
|
||||
'impact_type': insight.get('impact_type'),
|
||||
'impact_value': insight.get('impact_value'),
|
||||
'source_service': 'production',
|
||||
'source_model': 'yield_predictor'
|
||||
}
|
||||
|
||||
# Remove None values
|
||||
event_metadata = {k: v for k, v in event_metadata.items() if v is not None}
|
||||
|
||||
# Publish recommendation event
|
||||
await self.event_publisher.publish_recommendation(
|
||||
event_type='ai_yield_prediction',
|
||||
tenant_id=tenant_id,
|
||||
severity=severity,
|
||||
data=event_metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Published yield insight recommendation event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
insight_type=insight.get('insight_type'),
|
||||
severity=severity
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to publish insight event",
|
||||
tenant_id=tenant_id,
|
||||
insight_id=insight.get('id'),
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
# Don't raise - we don't want to fail the whole workflow if event publishing fails
|
||||
|
||||
async def analyze_all_recipes(
|
||||
self,
|
||||
tenant_id: str,
|
||||
recipes_data: Dict[str, pd.DataFrame],
|
||||
min_history_runs: int = 30
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze yield performance for all recipes for a tenant.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
recipes_data: Dict of {recipe_id: production_history_df}
|
||||
min_history_runs: Minimum production runs required
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis results
|
||||
"""
|
||||
logger.info(
|
||||
"Analyzing yield for all recipes",
|
||||
tenant_id=tenant_id,
|
||||
recipes=len(recipes_data)
|
||||
)
|
||||
|
||||
all_results = []
|
||||
total_insights_posted = 0
|
||||
recipes_with_issues = []
|
||||
|
||||
# Analyze each recipe
|
||||
for recipe_id, production_history in recipes_data.items():
|
||||
try:
|
||||
results = await self.analyze_and_post_insights(
|
||||
tenant_id=tenant_id,
|
||||
recipe_id=recipe_id,
|
||||
production_history=production_history,
|
||||
min_history_runs=min_history_runs
|
||||
)
|
||||
|
||||
all_results.append(results)
|
||||
total_insights_posted += results['insights_posted']
|
||||
|
||||
# Check for low baseline yield
|
||||
baseline_stats = results.get('baseline_stats')
|
||||
if baseline_stats and baseline_stats.get('mean_yield', 100) < 90:
|
||||
recipes_with_issues.append({
|
||||
'recipe_id': recipe_id,
|
||||
'mean_yield': baseline_stats['mean_yield'],
|
||||
'std_yield': baseline_stats['std_yield']
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error analyzing recipe",
|
||||
recipe_id=recipe_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
# Generate portfolio summary insight if there are yield issues
|
||||
if len(recipes_with_issues) > 0:
|
||||
summary_insight = self._generate_portfolio_summary_insight(
|
||||
tenant_id, recipes_with_issues, all_results
|
||||
)
|
||||
|
||||
if summary_insight:
|
||||
enriched_summary = self._enrich_insights(
|
||||
[summary_insight], tenant_id, 'all_recipes'
|
||||
)
|
||||
|
||||
post_results = await self.ai_insights_client.create_insights_bulk(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insights=enriched_summary
|
||||
)
|
||||
|
||||
total_insights_posted += post_results['successful']
|
||||
|
||||
logger.info(
|
||||
"All recipes yield analysis complete",
|
||||
tenant_id=tenant_id,
|
||||
recipes_analyzed=len(all_results),
|
||||
total_insights_posted=total_insights_posted,
|
||||
recipes_with_issues=len(recipes_with_issues)
|
||||
)
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'recipes_analyzed': len(all_results),
|
||||
'recipe_results': all_results,
|
||||
'total_insights_posted': total_insights_posted,
|
||||
'recipes_with_issues': recipes_with_issues
|
||||
}
|
||||
|
||||
def _generate_portfolio_summary_insight(
|
||||
self,
|
||||
tenant_id: str,
|
||||
recipes_with_issues: List[Dict[str, Any]],
|
||||
all_results: List[Dict[str, Any]]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Generate portfolio-level summary insight.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
recipes_with_issues: Recipes with low yield
|
||||
all_results: All recipe analysis results
|
||||
|
||||
Returns:
|
||||
Summary insight or None
|
||||
"""
|
||||
if len(recipes_with_issues) == 0:
|
||||
return None
|
||||
|
||||
# Calculate average yield and potential improvement
|
||||
total_recipes = len(all_results)
|
||||
issues_count = len(recipes_with_issues)
|
||||
avg_low_yield = sum(r['mean_yield'] for r in recipes_with_issues) / issues_count
|
||||
|
||||
# Estimate waste reduction potential
|
||||
# Assuming each recipe produces 1000 units/month, €5/unit cost
|
||||
monthly_production = 1000 * issues_count
|
||||
current_waste_pct = 100 - avg_low_yield
|
||||
target_waste_pct = 5 # Target 95% yield
|
||||
|
||||
if current_waste_pct > target_waste_pct:
|
||||
waste_reduction_units = monthly_production * ((current_waste_pct - target_waste_pct) / 100)
|
||||
annual_savings = waste_reduction_units * 12 * 5 # €5 per unit
|
||||
|
||||
return {
|
||||
'type': 'opportunity',
|
||||
'priority': 'high' if issues_count > 3 else 'medium',
|
||||
'category': 'production',
|
||||
'title': f'Production Yield Optimization: {issues_count} Recipes Below 90%',
|
||||
'description': f'{issues_count} of {total_recipes} recipes have average yield below 90% (average {avg_low_yield:.1f}%). Improving to 95% target would reduce waste by {waste_reduction_units:.0f} units/month, saving €{annual_savings:.0f}/year.',
|
||||
'impact_type': 'cost_savings',
|
||||
'impact_value': annual_savings,
|
||||
'impact_unit': 'euros_per_year',
|
||||
'confidence': 75,
|
||||
'metrics_json': {
|
||||
'recipes_analyzed': total_recipes,
|
||||
'recipes_with_issues': issues_count,
|
||||
'avg_low_yield': round(avg_low_yield, 2),
|
||||
'potential_annual_savings': round(annual_savings, 2),
|
||||
'waste_reduction_units_monthly': round(waste_reduction_units, 2)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [
|
||||
{
|
||||
'label': 'Review Low-Yield Recipes',
|
||||
'action': 'review_yield_insights',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
},
|
||||
{
|
||||
'label': 'Implement Yield Improvements',
|
||||
'action': 'apply_yield_recommendations',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
}
|
||||
],
|
||||
'source_service': 'production',
|
||||
'source_model': 'yield_predictor'
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client connections."""
|
||||
await self.ai_insights_client.close()
|
||||
813
services/production/app/ml/yield_predictor.py
Normal file
813
services/production/app/ml/yield_predictor.py
Normal file
@@ -0,0 +1,813 @@
|
||||
"""
|
||||
Production Yield Predictor
|
||||
Predicts actual vs planned yield and identifies waste reduction opportunities
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from scipy import stats
|
||||
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class YieldPredictor:
|
||||
"""
|
||||
Predicts production yield based on historical data and production factors.
|
||||
|
||||
Key Features:
|
||||
- Multi-factor yield prediction (recipe, worker, time-of-day, equipment, batch size)
|
||||
- Identifies low-yield patterns and root causes
|
||||
- Waste categorization (spoilage, measurement error, process inefficiency)
|
||||
- Actionable recommendations for yield improvement
|
||||
- Statistical validation of learned patterns
|
||||
|
||||
Methodology:
|
||||
1. Feature Engineering: Extract worker skill, time factors, batch size effects
|
||||
2. Statistical Analysis: Identify significant yield loss factors
|
||||
3. ML Prediction: Ensemble of Random Forest + Gradient Boosting
|
||||
4. Pattern Detection: Find recurring low-yield situations
|
||||
5. Insight Generation: Actionable recommendations with confidence scores
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model_cache = {} # Cache trained models per recipe
|
||||
self.baseline_yields = {} # Cache baseline yields per recipe
|
||||
|
||||
async def predict_yield(
|
||||
self,
|
||||
tenant_id: str,
|
||||
recipe_id: str,
|
||||
production_history: pd.DataFrame,
|
||||
production_context: Dict[str, Any],
|
||||
min_history_runs: int = 30
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Predict yield for upcoming production run and generate insights.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
recipe_id: Recipe identifier
|
||||
production_history: Historical production runs with columns:
|
||||
- production_run_id
|
||||
- recipe_id
|
||||
- planned_quantity
|
||||
- actual_quantity
|
||||
- yield_percentage
|
||||
- staff_assigned (list of staff IDs)
|
||||
- started_at
|
||||
- completed_at
|
||||
- batch_size
|
||||
- equipment_id (optional)
|
||||
- notes (optional)
|
||||
production_context: Upcoming production context:
|
||||
- staff_assigned (list of staff IDs)
|
||||
- planned_start_time
|
||||
- batch_size
|
||||
- equipment_id (optional)
|
||||
min_history_runs: Minimum production runs required for learning
|
||||
|
||||
Returns:
|
||||
Prediction results with yield forecast, confidence, and insights
|
||||
"""
|
||||
logger.info(
|
||||
"Predicting production yield",
|
||||
tenant_id=tenant_id,
|
||||
recipe_id=recipe_id,
|
||||
history_runs=len(production_history)
|
||||
)
|
||||
|
||||
# Validate production history
|
||||
if len(production_history) < min_history_runs:
|
||||
return self._insufficient_data_response(
|
||||
recipe_id, production_context, len(production_history), min_history_runs
|
||||
)
|
||||
|
||||
# Step 1: Calculate baseline statistics
|
||||
baseline_stats = self._calculate_baseline_statistics(production_history)
|
||||
|
||||
# Step 2: Feature engineering
|
||||
feature_df = self._engineer_features(production_history)
|
||||
|
||||
# Step 3: Analyze yield factors
|
||||
factor_analysis = self._analyze_yield_factors(feature_df)
|
||||
|
||||
# Step 4: Train predictive model
|
||||
model_results = self._train_yield_model(feature_df)
|
||||
|
||||
# Step 5: Make prediction for upcoming run
|
||||
prediction = self._predict_upcoming_run(
|
||||
production_context, model_results, baseline_stats, feature_df
|
||||
)
|
||||
|
||||
# Step 6: Identify low-yield patterns
|
||||
patterns = self._identify_yield_patterns(feature_df, factor_analysis)
|
||||
|
||||
# Step 7: Generate insights
|
||||
insights = self._generate_yield_insights(
|
||||
tenant_id, recipe_id, baseline_stats, factor_analysis,
|
||||
patterns, prediction, production_context
|
||||
)
|
||||
|
||||
# Step 8: Calculate confidence
|
||||
confidence = self._calculate_prediction_confidence(
|
||||
production_history, model_results, factor_analysis
|
||||
)
|
||||
|
||||
return {
|
||||
'recipe_id': recipe_id,
|
||||
'predicted_at': datetime.utcnow().isoformat(),
|
||||
'history_runs': len(production_history),
|
||||
'baseline_yield': baseline_stats['mean_yield'],
|
||||
'baseline_std': baseline_stats['std_yield'],
|
||||
'predicted_yield': prediction['predicted_yield'],
|
||||
'prediction_range': prediction['prediction_range'],
|
||||
'expected_waste': prediction['expected_waste'],
|
||||
'confidence': confidence,
|
||||
'factor_analysis': factor_analysis,
|
||||
'patterns': patterns,
|
||||
'model_performance': model_results['performance'],
|
||||
'insights': insights
|
||||
}
|
||||
|
||||
def _insufficient_data_response(
|
||||
self, recipe_id: str, production_context: Dict[str, Any],
|
||||
current_runs: int, required_runs: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Return response when insufficient historical data."""
|
||||
return {
|
||||
'recipe_id': recipe_id,
|
||||
'predicted_at': datetime.utcnow().isoformat(),
|
||||
'history_runs': current_runs,
|
||||
'status': 'insufficient_data',
|
||||
'required_runs': required_runs,
|
||||
'baseline_yield': None,
|
||||
'predicted_yield': None,
|
||||
'confidence': 0,
|
||||
'insights': [{
|
||||
'type': 'warning',
|
||||
'priority': 'low',
|
||||
'category': 'production',
|
||||
'title': f'Insufficient Production History for Yield Prediction',
|
||||
'description': f'Only {current_runs} production runs available. Need at least {required_runs} runs to build reliable yield predictions. Continue tracking production data to enable yield optimization.',
|
||||
'impact_type': 'data_quality',
|
||||
'confidence': 100,
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Track Production Data',
|
||||
'action': 'continue_production_tracking',
|
||||
'params': {'recipe_id': recipe_id}
|
||||
}]
|
||||
}]
|
||||
}
|
||||
|
||||
def _calculate_baseline_statistics(
|
||||
self, production_history: pd.DataFrame
|
||||
) -> Dict[str, Any]:
|
||||
"""Calculate baseline yield statistics."""
|
||||
yields = production_history['yield_percentage'].values
|
||||
|
||||
return {
|
||||
'mean_yield': float(np.mean(yields)),
|
||||
'median_yield': float(np.median(yields)),
|
||||
'std_yield': float(np.std(yields)),
|
||||
'min_yield': float(np.min(yields)),
|
||||
'max_yield': float(np.max(yields)),
|
||||
'cv_yield': float(np.std(yields) / np.mean(yields)), # Coefficient of variation
|
||||
'percentile_25': float(np.percentile(yields, 25)),
|
||||
'percentile_75': float(np.percentile(yields, 75)),
|
||||
'runs_below_90': int(np.sum(yields < 90)),
|
||||
'runs_above_95': int(np.sum(yields > 95))
|
||||
}
|
||||
|
||||
def _engineer_features(self, production_history: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Engineer features from production history."""
|
||||
df = production_history.copy()
|
||||
|
||||
# Time-based features
|
||||
df['started_at'] = pd.to_datetime(df['started_at'])
|
||||
df['hour_of_day'] = df['started_at'].dt.hour
|
||||
df['day_of_week'] = df['started_at'].dt.dayofweek
|
||||
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
|
||||
df['is_early_morning'] = (df['hour_of_day'] < 6).astype(int)
|
||||
df['is_late_night'] = (df['hour_of_day'] >= 22).astype(int)
|
||||
|
||||
# Duration features
|
||||
if 'completed_at' in df.columns:
|
||||
df['completed_at'] = pd.to_datetime(df['completed_at'])
|
||||
df['duration_hours'] = (df['completed_at'] - df['started_at']).dt.total_seconds() / 3600
|
||||
df['is_rushed'] = (df['duration_hours'] < df['duration_hours'].quantile(0.25)).astype(int)
|
||||
|
||||
# Batch size features
|
||||
df['batch_size_normalized'] = df['batch_size'] / df['batch_size'].mean()
|
||||
df['is_large_batch'] = (df['batch_size'] > df['batch_size'].quantile(0.75)).astype(int)
|
||||
df['is_small_batch'] = (df['batch_size'] < df['batch_size'].quantile(0.25)).astype(int)
|
||||
|
||||
# Worker experience features (proxy: number of previous runs)
|
||||
# Extract first worker from staff_assigned list
|
||||
df['worker_id'] = df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
|
||||
|
||||
df = df.sort_values('started_at')
|
||||
df['worker_run_count'] = df.groupby('worker_id').cumcount() + 1
|
||||
df['worker_experience_level'] = pd.cut(
|
||||
df['worker_run_count'],
|
||||
bins=[0, 5, 15, 100],
|
||||
labels=['novice', 'intermediate', 'expert']
|
||||
)
|
||||
|
||||
# Recent yield trend for worker
|
||||
df['worker_recent_avg_yield'] = df.groupby('worker_id')['yield_percentage'].transform(
|
||||
lambda x: x.rolling(window=5, min_periods=1).mean()
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
def _analyze_yield_factors(self, feature_df: pd.DataFrame) -> Dict[str, Any]:
|
||||
"""Analyze factors affecting yield using statistical tests."""
|
||||
factors = {}
|
||||
|
||||
# Worker impact
|
||||
# Extract worker_id from staff_assigned for analysis
|
||||
if 'worker_id' not in feature_df.columns:
|
||||
feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
|
||||
|
||||
worker_yields = feature_df.groupby('worker_id')['yield_percentage'].agg(['mean', 'std', 'count'])
|
||||
worker_yields = worker_yields[worker_yields['count'] >= 3] # Min 3 runs per worker
|
||||
|
||||
if len(worker_yields) > 1:
|
||||
# ANOVA test: Does worker significantly affect yield?
|
||||
worker_groups = [
|
||||
feature_df[feature_df['worker_id'] == worker]['yield_percentage'].values
|
||||
for worker in worker_yields.index
|
||||
]
|
||||
f_stat, p_value = stats.f_oneway(*worker_groups)
|
||||
|
||||
factors['worker'] = {
|
||||
'significant': p_value < 0.05,
|
||||
'p_value': float(p_value),
|
||||
'f_statistic': float(f_stat),
|
||||
'best_worker': worker_yields['mean'].idxmax(),
|
||||
'best_worker_yield': float(worker_yields['mean'].max()),
|
||||
'worst_worker': worker_yields['mean'].idxmin(),
|
||||
'worst_worker_yield': float(worker_yields['mean'].min()),
|
||||
'yield_range': float(worker_yields['mean'].max() - worker_yields['mean'].min())
|
||||
}
|
||||
else:
|
||||
factors['worker'] = {'significant': False, 'reason': 'insufficient_workers'}
|
||||
|
||||
# Time of day impact
|
||||
time_groups = {
|
||||
'early_morning': feature_df[feature_df['hour_of_day'] < 6]['yield_percentage'].values,
|
||||
'morning': feature_df[(feature_df['hour_of_day'] >= 6) & (feature_df['hour_of_day'] < 12)]['yield_percentage'].values,
|
||||
'afternoon': feature_df[(feature_df['hour_of_day'] >= 12) & (feature_df['hour_of_day'] < 18)]['yield_percentage'].values,
|
||||
'evening': feature_df[feature_df['hour_of_day'] >= 18]['yield_percentage'].values
|
||||
}
|
||||
time_groups = {k: v for k, v in time_groups.items() if len(v) >= 3}
|
||||
|
||||
if len(time_groups) > 1:
|
||||
f_stat, p_value = stats.f_oneway(*time_groups.values())
|
||||
time_means = {k: np.mean(v) for k, v in time_groups.items()}
|
||||
|
||||
factors['time_of_day'] = {
|
||||
'significant': p_value < 0.05,
|
||||
'p_value': float(p_value),
|
||||
'best_time': max(time_means, key=time_means.get),
|
||||
'best_time_yield': float(max(time_means.values())),
|
||||
'worst_time': min(time_means, key=time_means.get),
|
||||
'worst_time_yield': float(min(time_means.values())),
|
||||
'yield_range': float(max(time_means.values()) - min(time_means.values()))
|
||||
}
|
||||
else:
|
||||
factors['time_of_day'] = {'significant': False, 'reason': 'insufficient_data'}
|
||||
|
||||
# Batch size impact (correlation)
|
||||
if len(feature_df) >= 10:
|
||||
correlation, p_value = stats.pearsonr(
|
||||
feature_df['batch_size'],
|
||||
feature_df['yield_percentage']
|
||||
)
|
||||
|
||||
factors['batch_size'] = {
|
||||
'significant': abs(correlation) > 0.3 and p_value < 0.05,
|
||||
'correlation': float(correlation),
|
||||
'p_value': float(p_value),
|
||||
'direction': 'positive' if correlation > 0 else 'negative',
|
||||
'interpretation': self._interpret_batch_size_effect(correlation)
|
||||
}
|
||||
else:
|
||||
factors['batch_size'] = {'significant': False, 'reason': 'insufficient_data'}
|
||||
|
||||
# Weekend vs weekday
|
||||
weekend_yields = feature_df[feature_df['is_weekend'] == 1]['yield_percentage'].values
|
||||
weekday_yields = feature_df[feature_df['is_weekend'] == 0]['yield_percentage'].values
|
||||
|
||||
if len(weekend_yields) >= 3 and len(weekday_yields) >= 3:
|
||||
t_stat, p_value = stats.ttest_ind(weekend_yields, weekday_yields)
|
||||
|
||||
factors['weekend_effect'] = {
|
||||
'significant': p_value < 0.05,
|
||||
'p_value': float(p_value),
|
||||
't_statistic': float(t_stat),
|
||||
'weekend_yield': float(np.mean(weekend_yields)),
|
||||
'weekday_yield': float(np.mean(weekday_yields)),
|
||||
'difference': float(np.mean(weekend_yields) - np.mean(weekday_yields))
|
||||
}
|
||||
else:
|
||||
factors['weekend_effect'] = {'significant': False, 'reason': 'insufficient_weekend_data'}
|
||||
|
||||
return factors
|
||||
|
||||
def _interpret_batch_size_effect(self, correlation: float) -> str:
|
||||
"""Interpret batch size correlation."""
|
||||
if abs(correlation) < 0.3:
|
||||
return "Batch size has minimal impact on yield"
|
||||
elif correlation > 0:
|
||||
return "Larger batches tend to have higher yield (economies of scale)"
|
||||
else:
|
||||
return "Larger batches tend to have lower yield (difficulty handling large volumes)"
|
||||
|
||||
def _train_yield_model(self, feature_df: pd.DataFrame) -> Dict[str, Any]:
|
||||
"""Train ML model to predict yield."""
|
||||
# Prepare features
|
||||
feature_columns = [
|
||||
'hour_of_day', 'day_of_week', 'is_weekend',
|
||||
'batch_size_normalized', 'is_large_batch', 'is_small_batch',
|
||||
'worker_run_count'
|
||||
]
|
||||
|
||||
if 'duration_hours' in feature_df.columns:
|
||||
feature_columns.append('duration_hours')
|
||||
|
||||
# Encode worker_id (extracted from staff_assigned)
|
||||
if 'worker_id' not in feature_df.columns:
|
||||
feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
|
||||
|
||||
worker_encoding = {worker: idx for idx, worker in enumerate(feature_df['worker_id'].unique())}
|
||||
feature_df['worker_encoded'] = feature_df['worker_id'].map(worker_encoding)
|
||||
feature_columns.append('worker_encoded')
|
||||
|
||||
X = feature_df[feature_columns].fillna(0).values
|
||||
y = feature_df['yield_percentage'].values
|
||||
|
||||
# Split into train/test (temporal split)
|
||||
split_idx = int(len(X) * 0.8)
|
||||
X_train, X_test = X[:split_idx], X[split_idx:]
|
||||
y_train, y_test = y[:split_idx], y[split_idx:]
|
||||
|
||||
# Scale features
|
||||
scaler = StandardScaler()
|
||||
X_train_scaled = scaler.fit_transform(X_train)
|
||||
X_test_scaled = scaler.transform(X_test)
|
||||
|
||||
# Train ensemble of models
|
||||
models = {
|
||||
'random_forest': RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42),
|
||||
'gradient_boosting': GradientBoostingRegressor(n_estimators=50, max_depth=3, random_state=42),
|
||||
'linear': LinearRegression()
|
||||
}
|
||||
|
||||
performances = {}
|
||||
predictions = {}
|
||||
|
||||
for name, model in models.items():
|
||||
model.fit(X_train_scaled, y_train)
|
||||
y_pred = model.predict(X_test_scaled)
|
||||
|
||||
mae = np.mean(np.abs(y_test - y_pred))
|
||||
rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
|
||||
r2 = 1 - (np.sum((y_test - y_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))
|
||||
|
||||
performances[name] = {
|
||||
'mae': float(mae),
|
||||
'rmse': float(rmse),
|
||||
'r2': float(r2)
|
||||
}
|
||||
predictions[name] = y_pred
|
||||
|
||||
# Select best model based on MAE
|
||||
best_model_name = min(performances, key=lambda k: performances[k]['mae'])
|
||||
best_model = models[best_model_name]
|
||||
|
||||
# Feature importance (if available)
|
||||
feature_importance = {}
|
||||
if hasattr(best_model, 'feature_importances_'):
|
||||
importances = best_model.feature_importances_
|
||||
feature_importance = {
|
||||
feature_columns[i]: float(importances[i])
|
||||
for i in range(len(feature_columns))
|
||||
}
|
||||
feature_importance = dict(sorted(
|
||||
feature_importance.items(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True
|
||||
))
|
||||
|
||||
return {
|
||||
'best_model': best_model,
|
||||
'best_model_name': best_model_name,
|
||||
'scaler': scaler,
|
||||
'feature_columns': feature_columns,
|
||||
'worker_encoding': worker_encoding,
|
||||
'performance': performances[best_model_name],
|
||||
'all_performances': performances,
|
||||
'feature_importance': feature_importance
|
||||
}
|
||||
|
||||
def _predict_upcoming_run(
|
||||
self,
|
||||
production_context: Dict[str, Any],
|
||||
model_results: Dict[str, Any],
|
||||
baseline_stats: Dict[str, Any],
|
||||
feature_df: pd.DataFrame
|
||||
) -> Dict[str, Any]:
|
||||
"""Predict yield for upcoming production run."""
|
||||
# Extract context
|
||||
staff_assigned = production_context.get('staff_assigned', [])
|
||||
worker_id = staff_assigned[0] if isinstance(staff_assigned, list) and len(staff_assigned) > 0 else 'unknown'
|
||||
planned_start = pd.to_datetime(production_context.get('planned_start_time'))
|
||||
batch_size = production_context.get('batch_size')
|
||||
|
||||
# Get worker experience
|
||||
if 'worker_id' not in feature_df.columns:
|
||||
feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
|
||||
|
||||
worker_runs = feature_df[feature_df['worker_id'] == worker_id]
|
||||
worker_run_count = len(worker_runs) if len(worker_runs) > 0 else 1
|
||||
|
||||
# Build feature vector
|
||||
mean_batch_size = feature_df['batch_size'].mean()
|
||||
batch_size_normalized = batch_size / mean_batch_size
|
||||
is_large_batch = 1 if batch_size > feature_df['batch_size'].quantile(0.75) else 0
|
||||
is_small_batch = 1 if batch_size < feature_df['batch_size'].quantile(0.25) else 0
|
||||
|
||||
features = {
|
||||
'hour_of_day': planned_start.hour,
|
||||
'day_of_week': planned_start.dayofweek,
|
||||
'is_weekend': 1 if planned_start.dayofweek in [5, 6] else 0,
|
||||
'batch_size_normalized': batch_size_normalized,
|
||||
'is_large_batch': is_large_batch,
|
||||
'is_small_batch': is_small_batch,
|
||||
'worker_run_count': worker_run_count,
|
||||
'duration_hours': 0, # Not known yet
|
||||
'worker_encoded': model_results['worker_encoding'].get(worker_id, 0)
|
||||
}
|
||||
|
||||
# Create feature vector in correct order
|
||||
X = np.array([[features.get(col, 0) for col in model_results['feature_columns']]])
|
||||
X_scaled = model_results['scaler'].transform(X)
|
||||
|
||||
# Predict
|
||||
predicted_yield = float(model_results['best_model'].predict(X_scaled)[0])
|
||||
|
||||
# Prediction range (based on model RMSE)
|
||||
rmse = model_results['performance']['rmse']
|
||||
prediction_range = {
|
||||
'lower': max(0, predicted_yield - 1.96 * rmse),
|
||||
'upper': min(100, predicted_yield + 1.96 * rmse)
|
||||
}
|
||||
|
||||
# Expected waste
|
||||
planned_quantity = production_context.get('planned_quantity', 100)
|
||||
expected_waste_pct = max(0, 100 - predicted_yield)
|
||||
expected_waste_units = planned_quantity * (expected_waste_pct / 100)
|
||||
|
||||
return {
|
||||
'predicted_yield': round(predicted_yield, 2),
|
||||
'prediction_range': prediction_range,
|
||||
'expected_waste_pct': round(expected_waste_pct, 2),
|
||||
'expected_waste_units': round(expected_waste_units, 2),
|
||||
'baseline_comparison': round(predicted_yield - baseline_stats['mean_yield'], 2),
|
||||
'features_used': features
|
||||
}
|
||||
|
||||
def _identify_yield_patterns(
|
||||
self, feature_df: pd.DataFrame, factor_analysis: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Identify recurring low-yield patterns."""
|
||||
patterns = []
|
||||
|
||||
# Pattern 1: Specific worker consistently low
|
||||
if factor_analysis.get('worker', {}).get('significant'):
|
||||
worst_worker = factor_analysis['worker']['worst_worker']
|
||||
worst_yield = factor_analysis['worker']['worst_worker_yield']
|
||||
best_yield = factor_analysis['worker']['best_worker_yield']
|
||||
|
||||
if worst_yield < 90 and (best_yield - worst_yield) > 5:
|
||||
patterns.append({
|
||||
'pattern': 'low_yield_worker',
|
||||
'description': f'Worker {worst_worker} consistently produces {worst_yield:.1f}% yield vs best worker {best_yield:.1f}%',
|
||||
'severity': 'high' if worst_yield < 85 else 'medium',
|
||||
'affected_runs': int(len(feature_df[feature_df['worker_id'] == worst_worker])),
|
||||
'yield_impact': round(best_yield - worst_yield, 2),
|
||||
'recommendation': 'Provide additional training or reassign to different recipes'
|
||||
})
|
||||
|
||||
# Pattern 2: Time-of-day effect
|
||||
if factor_analysis.get('time_of_day', {}).get('significant'):
|
||||
worst_time = factor_analysis['time_of_day']['worst_time']
|
||||
worst_yield = factor_analysis['time_of_day']['worst_time_yield']
|
||||
|
||||
if worst_yield < 90:
|
||||
patterns.append({
|
||||
'pattern': 'low_yield_time',
|
||||
'description': f'{worst_time} shifts produce {worst_yield:.1f}% yield',
|
||||
'severity': 'medium',
|
||||
'affected_runs': 'varies',
|
||||
'yield_impact': round(factor_analysis['time_of_day']['yield_range'], 2),
|
||||
'recommendation': f'Avoid scheduling this recipe during {worst_time}'
|
||||
})
|
||||
|
||||
# Pattern 3: Large batch issues
|
||||
if factor_analysis.get('batch_size', {}).get('significant'):
|
||||
if factor_analysis['batch_size']['direction'] == 'negative':
|
||||
patterns.append({
|
||||
'pattern': 'large_batch_yield_loss',
|
||||
'description': 'Larger batches have lower yield - equipment or process capacity issues',
|
||||
'severity': 'medium',
|
||||
'correlation': round(factor_analysis['batch_size']['correlation'], 3),
|
||||
'recommendation': 'Split large batches or upgrade equipment'
|
||||
})
|
||||
|
||||
# Pattern 4: Weekend effect
|
||||
if factor_analysis.get('weekend_effect', {}).get('significant'):
|
||||
weekend_yield = factor_analysis['weekend_effect']['weekend_yield']
|
||||
weekday_yield = factor_analysis['weekend_effect']['weekday_yield']
|
||||
|
||||
if abs(weekend_yield - weekday_yield) > 3:
|
||||
if weekend_yield < weekday_yield:
|
||||
patterns.append({
|
||||
'pattern': 'weekend_yield_drop',
|
||||
'description': f'Weekend production {weekend_yield:.1f}% vs weekday {weekday_yield:.1f}%',
|
||||
'severity': 'low',
|
||||
'yield_impact': round(weekday_yield - weekend_yield, 2),
|
||||
'recommendation': 'Review weekend staffing or processes'
|
||||
})
|
||||
|
||||
return patterns
|
||||
|
||||
def _generate_yield_insights(
|
||||
self,
|
||||
tenant_id: str,
|
||||
recipe_id: str,
|
||||
baseline_stats: Dict[str, Any],
|
||||
factor_analysis: Dict[str, Any],
|
||||
patterns: List[Dict[str, Any]],
|
||||
prediction: Dict[str, Any],
|
||||
production_context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Generate actionable insights for yield improvement."""
|
||||
insights = []
|
||||
|
||||
# Insight 1: Low predicted yield warning
|
||||
if prediction['predicted_yield'] < 90:
|
||||
waste_value = prediction['expected_waste_units'] * production_context.get('unit_cost', 5)
|
||||
|
||||
insights.append({
|
||||
'type': 'warning',
|
||||
'priority': 'high' if prediction['predicted_yield'] < 85 else 'medium',
|
||||
'category': 'production',
|
||||
'title': f'Low Yield Predicted: {prediction["predicted_yield"]:.1f}%',
|
||||
'description': f'Upcoming production run predicted to yield {prediction["predicted_yield"]:.1f}%, below baseline {baseline_stats["mean_yield"]:.1f}%. Expected waste: {prediction["expected_waste_units"]:.1f} units (€{waste_value:.2f}).',
|
||||
'impact_type': 'waste',
|
||||
'impact_value': prediction['expected_waste_units'],
|
||||
'impact_unit': 'units',
|
||||
'confidence': 75,
|
||||
'metrics_json': {
|
||||
'recipe_id': recipe_id,
|
||||
'predicted_yield': prediction['predicted_yield'],
|
||||
'expected_waste': prediction['expected_waste_units'],
|
||||
'waste_value': round(waste_value, 2)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Review Production Setup',
|
||||
'action': 'review_production_factors',
|
||||
'params': {
|
||||
'recipe_id': recipe_id,
|
||||
'worker_id': worker_id
|
||||
}
|
||||
}]
|
||||
})
|
||||
|
||||
# Insight 2: High-severity patterns
|
||||
for pattern in patterns:
|
||||
if pattern.get('severity') == 'high':
|
||||
if pattern['pattern'] == 'low_yield_worker':
|
||||
insights.append({
|
||||
'type': 'opportunity',
|
||||
'priority': 'high',
|
||||
'category': 'production',
|
||||
'title': f'Worker Training Opportunity: {pattern["yield_impact"]:.1f}% Yield Gap',
|
||||
'description': pattern['description'] + f'. Improving this worker to average performance would save significant waste.',
|
||||
'impact_type': 'yield_improvement',
|
||||
'impact_value': pattern['yield_impact'],
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': 85,
|
||||
'metrics_json': {
|
||||
'recipe_id': recipe_id,
|
||||
'pattern': pattern['pattern'],
|
||||
'yield_impact': pattern['yield_impact']
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Schedule Training',
|
||||
'action': 'schedule_worker_training',
|
||||
'params': {'recipe_id': recipe_id}
|
||||
}]
|
||||
})
|
||||
|
||||
# Insight 3: Excellent yield
|
||||
if prediction['predicted_yield'] > 98:
|
||||
insights.append({
|
||||
'type': 'positive',
|
||||
'priority': 'low',
|
||||
'category': 'production',
|
||||
'title': f'Excellent Yield Expected: {prediction["predicted_yield"]:.1f}%',
|
||||
'description': f'Optimal production conditions detected. Expected yield {prediction["predicted_yield"]:.1f}% exceeds baseline {baseline_stats["mean_yield"]:.1f}%.',
|
||||
'impact_type': 'yield_improvement',
|
||||
'impact_value': prediction['baseline_comparison'],
|
||||
'impact_unit': 'percentage_points',
|
||||
'confidence': 70,
|
||||
'metrics_json': {
|
||||
'recipe_id': recipe_id,
|
||||
'predicted_yield': prediction['predicted_yield']
|
||||
},
|
||||
'actionable': False
|
||||
})
|
||||
|
||||
# Insight 4: Yield variability issue
|
||||
if baseline_stats['cv_yield'] > 0.05: # Coefficient of variation > 5%
|
||||
insights.append({
|
||||
'type': 'opportunity',
|
||||
'priority': 'medium',
|
||||
'category': 'production',
|
||||
'title': f'High Yield Variability: {baseline_stats["cv_yield"]*100:.1f}% CV',
|
||||
'description': f'Yield varies significantly across production runs (CV={baseline_stats["cv_yield"]*100:.1f}%, range {baseline_stats["min_yield"]:.1f}%-{baseline_stats["max_yield"]:.1f}%). Standardizing processes could reduce waste.',
|
||||
'impact_type': 'process_improvement',
|
||||
'confidence': 80,
|
||||
'metrics_json': {
|
||||
'recipe_id': recipe_id,
|
||||
'cv_yield': round(baseline_stats['cv_yield'], 3),
|
||||
'yield_range': round(baseline_stats['max_yield'] - baseline_stats['min_yield'], 2)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Standardize Process',
|
||||
'action': 'review_production_sop',
|
||||
'params': {'recipe_id': recipe_id}
|
||||
}]
|
||||
})
|
||||
|
||||
return insights
|
||||
|
||||
def _calculate_prediction_confidence(
|
||||
self,
|
||||
production_history: pd.DataFrame,
|
||||
model_results: Dict[str, Any],
|
||||
factor_analysis: Dict[str, Any]
|
||||
) -> int:
|
||||
"""Calculate overall confidence score for predictions."""
|
||||
confidence_factors = []
|
||||
|
||||
# Factor 1: Sample size (0-30 points)
|
||||
n_runs = len(production_history)
|
||||
if n_runs >= 100:
|
||||
sample_score = 30
|
||||
elif n_runs >= 50:
|
||||
sample_score = 25
|
||||
elif n_runs >= 30:
|
||||
sample_score = 20
|
||||
else:
|
||||
sample_score = 10
|
||||
confidence_factors.append(('sample_size', sample_score))
|
||||
|
||||
# Factor 2: Model performance (0-30 points)
|
||||
r2 = model_results['performance']['r2']
|
||||
mae = model_results['performance']['mae']
|
||||
|
||||
if r2 > 0.7 and mae < 3:
|
||||
model_score = 30
|
||||
elif r2 > 0.5 and mae < 5:
|
||||
model_score = 25
|
||||
elif r2 > 0.3 and mae < 7:
|
||||
model_score = 20
|
||||
else:
|
||||
model_score = 10
|
||||
confidence_factors.append(('model_performance', model_score))
|
||||
|
||||
# Factor 3: Statistical significance of factors (0-25 points)
|
||||
significant_factors = sum(
|
||||
1 for factor in factor_analysis.values()
|
||||
if isinstance(factor, dict) and factor.get('significant')
|
||||
)
|
||||
|
||||
if significant_factors >= 3:
|
||||
stats_score = 25
|
||||
elif significant_factors >= 2:
|
||||
stats_score = 20
|
||||
elif significant_factors >= 1:
|
||||
stats_score = 15
|
||||
else:
|
||||
stats_score = 10
|
||||
confidence_factors.append(('significant_factors', stats_score))
|
||||
|
||||
# Factor 4: Data recency (0-15 points)
|
||||
most_recent = production_history['started_at'].max()
|
||||
days_old = (datetime.utcnow() - pd.to_datetime(most_recent)).days
|
||||
|
||||
if days_old <= 7:
|
||||
recency_score = 15
|
||||
elif days_old <= 30:
|
||||
recency_score = 12
|
||||
elif days_old <= 90:
|
||||
recency_score = 8
|
||||
else:
|
||||
recency_score = 5
|
||||
confidence_factors.append(('data_recency', recency_score))
|
||||
|
||||
total_confidence = sum(score for _, score in confidence_factors)
|
||||
|
||||
return min(100, max(0, total_confidence))
|
||||
|
||||
async def analyze_recipe_yield_history(
|
||||
self,
|
||||
tenant_id: str,
|
||||
recipe_id: str,
|
||||
production_history: pd.DataFrame,
|
||||
min_history_runs: int = 30
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze historical yield performance for a recipe (no prediction).
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
recipe_id: Recipe identifier
|
||||
production_history: Historical production runs
|
||||
min_history_runs: Minimum production runs required
|
||||
|
||||
Returns:
|
||||
Historical analysis with insights
|
||||
"""
|
||||
logger.info(
|
||||
"Analyzing recipe yield history",
|
||||
tenant_id=tenant_id,
|
||||
recipe_id=recipe_id,
|
||||
history_runs=len(production_history)
|
||||
)
|
||||
|
||||
if len(production_history) < min_history_runs:
|
||||
return self._insufficient_data_response(
|
||||
recipe_id, {}, len(production_history), min_history_runs
|
||||
)
|
||||
|
||||
# Calculate statistics
|
||||
baseline_stats = self._calculate_baseline_statistics(production_history)
|
||||
|
||||
# Feature engineering
|
||||
feature_df = self._engineer_features(production_history)
|
||||
|
||||
# Analyze factors
|
||||
factor_analysis = self._analyze_yield_factors(feature_df)
|
||||
|
||||
# Identify patterns
|
||||
patterns = self._identify_yield_patterns(feature_df, factor_analysis)
|
||||
|
||||
# Generate insights (without prediction)
|
||||
insights = []
|
||||
|
||||
# Add insights for patterns
|
||||
for pattern in patterns:
|
||||
if pattern.get('severity') in ['high', 'medium']:
|
||||
insights.append({
|
||||
'type': 'opportunity',
|
||||
'priority': pattern['severity'],
|
||||
'category': 'production',
|
||||
'title': f'Yield Pattern Detected: {pattern["pattern"]}',
|
||||
'description': pattern['description'],
|
||||
'impact_type': 'yield_improvement',
|
||||
'confidence': 80,
|
||||
'metrics_json': {
|
||||
'recipe_id': recipe_id,
|
||||
'pattern': pattern
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation': pattern['recommendation']
|
||||
})
|
||||
|
||||
return {
|
||||
'recipe_id': recipe_id,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'history_runs': len(production_history),
|
||||
'baseline_stats': baseline_stats,
|
||||
'factor_analysis': factor_analysis,
|
||||
'patterns': patterns,
|
||||
'insights': insights
|
||||
}
|
||||
Reference in New Issue
Block a user