Files
bakery-ia/services/procurement/app/ml/supplier_performance_predictor.py

702 lines
28 KiB
Python

"""
Supplier Performance Predictor
Predicts supplier reliability, delivery delays, and quality issues
Generates insights for procurement risk management
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
import structlog
from datetime import datetime, timedelta
from collections import defaultdict
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
logger = structlog.get_logger()
class SupplierPerformancePredictor:
"""
Predicts supplier performance metrics for procurement risk management.
Capabilities:
1. Delivery delay probability prediction
2. Quality issue likelihood scoring
3. Supplier reliability scoring (0-100)
4. Alternative supplier recommendations
5. Procurement risk assessment
6. Insight generation for high-risk suppliers
"""
def __init__(self):
self.delay_model = None
self.quality_model = None
self.reliability_scores = {}
self.scaler = StandardScaler()
self.feature_columns = []
async def analyze_supplier_performance(
self,
tenant_id: str,
supplier_id: str,
order_history: pd.DataFrame,
min_orders: int = 10
) -> Dict[str, Any]:
"""
Analyze historical supplier performance and generate insights.
Args:
tenant_id: Tenant identifier
supplier_id: Supplier identifier
order_history: Historical orders with columns:
- order_date
- expected_delivery_date
- actual_delivery_date
- order_quantity
- received_quantity
- quality_issues (bool)
- quality_score (0-100)
- order_value
min_orders: Minimum orders required for analysis
Returns:
Dictionary with performance metrics and insights
"""
logger.info(
"Analyzing supplier performance",
tenant_id=tenant_id,
supplier_id=supplier_id,
orders=len(order_history)
)
if len(order_history) < min_orders:
logger.warning(
"Insufficient order history",
supplier_id=supplier_id,
orders=len(order_history),
required=min_orders
)
return self._insufficient_data_response(tenant_id, supplier_id)
# Calculate performance metrics
metrics = self._calculate_performance_metrics(order_history)
# Calculate reliability score
reliability_score = self._calculate_reliability_score(metrics)
# Predict future performance
predictions = self._predict_future_performance(order_history, metrics)
# Assess procurement risk
risk_assessment = self._assess_procurement_risk(
metrics, reliability_score, predictions
)
# Generate insights
insights = self._generate_supplier_insights(
tenant_id, supplier_id, metrics, reliability_score,
risk_assessment, predictions
)
# Store reliability score
self.reliability_scores[supplier_id] = reliability_score
logger.info(
"Supplier performance analysis complete",
supplier_id=supplier_id,
reliability_score=reliability_score,
insights_generated=len(insights)
)
return {
'tenant_id': tenant_id,
'supplier_id': supplier_id,
'analyzed_at': datetime.utcnow().isoformat(),
'orders_analyzed': len(order_history),
'metrics': metrics,
'reliability_score': reliability_score,
'predictions': predictions,
'risk_assessment': risk_assessment,
'insights': insights
}
def _calculate_performance_metrics(
self,
order_history: pd.DataFrame
) -> Dict[str, Any]:
"""
Calculate comprehensive supplier performance metrics.
Args:
order_history: Historical order data
Returns:
Dictionary of performance metrics
"""
# Ensure datetime columns
order_history['order_date'] = pd.to_datetime(order_history['order_date'])
order_history['expected_delivery_date'] = pd.to_datetime(order_history['expected_delivery_date'])
order_history['actual_delivery_date'] = pd.to_datetime(order_history['actual_delivery_date'])
# Calculate delivery delays
order_history['delivery_delay_days'] = (
order_history['actual_delivery_date'] - order_history['expected_delivery_date']
).dt.days
order_history['is_delayed'] = order_history['delivery_delay_days'] > 0
order_history['is_early'] = order_history['delivery_delay_days'] < 0
# Calculate quantity accuracy
order_history['quantity_accuracy'] = (
order_history['received_quantity'] / order_history['order_quantity']
)
order_history['is_short_delivery'] = order_history['quantity_accuracy'] < 1.0
order_history['is_over_delivery'] = order_history['quantity_accuracy'] > 1.0
metrics = {
# Delivery metrics
'total_orders': int(len(order_history)),
'on_time_orders': int((~order_history['is_delayed']).sum()),
'delayed_orders': int(order_history['is_delayed'].sum()),
'on_time_rate': float((~order_history['is_delayed']).mean() * 100),
'avg_delivery_delay_days': float(order_history[order_history['is_delayed']]['delivery_delay_days'].mean()) if order_history['is_delayed'].any() else 0.0,
'max_delivery_delay_days': int(order_history['delivery_delay_days'].max()),
'delivery_delay_std': float(order_history['delivery_delay_days'].std()),
# Quantity accuracy metrics
'avg_quantity_accuracy': float(order_history['quantity_accuracy'].mean() * 100),
'short_deliveries': int(order_history['is_short_delivery'].sum()),
'short_delivery_rate': float(order_history['is_short_delivery'].mean() * 100),
# Quality metrics
'quality_issues': int(order_history['quality_issues'].sum()) if 'quality_issues' in order_history.columns else 0,
'quality_issue_rate': float(order_history['quality_issues'].mean() * 100) if 'quality_issues' in order_history.columns else 0.0,
'avg_quality_score': float(order_history['quality_score'].mean()) if 'quality_score' in order_history.columns else 100.0,
# Consistency metrics
'delivery_consistency': float(100 - order_history['delivery_delay_days'].std() * 10), # Lower variance = higher consistency
'quantity_consistency': float(100 - (order_history['quantity_accuracy'].std() * 100)),
# Recent trend (last 30 days vs overall)
'recent_on_time_rate': self._calculate_recent_trend(order_history, 'is_delayed', days=30),
# Cost metrics
'total_order_value': float(order_history['order_value'].sum()) if 'order_value' in order_history.columns else 0.0,
'avg_order_value': float(order_history['order_value'].mean()) if 'order_value' in order_history.columns else 0.0
}
# Ensure all metrics are valid (no NaN)
for key, value in metrics.items():
if isinstance(value, float) and np.isnan(value):
metrics[key] = 0.0
return metrics
def _calculate_recent_trend(
self,
order_history: pd.DataFrame,
metric_column: str,
days: int = 30
) -> float:
"""Calculate recent trend for a metric."""
cutoff_date = datetime.utcnow() - timedelta(days=days)
recent_orders = order_history[order_history['order_date'] >= cutoff_date]
if len(recent_orders) < 3:
return 0.0 # Not enough recent data
if metric_column == 'is_delayed':
return float((~recent_orders['is_delayed']).mean() * 100)
else:
return float(recent_orders[metric_column].mean() * 100)
def _calculate_reliability_score(
self,
metrics: Dict[str, Any]
) -> int:
"""
Calculate overall supplier reliability score (0-100).
Factors:
- On-time delivery rate (40%)
- Quantity accuracy (20%)
- Quality score (25%)
- Consistency (15%)
"""
# On-time delivery score (40 points)
on_time_score = metrics['on_time_rate'] * 0.40
# Quantity accuracy score (20 points)
quantity_score = min(100, metrics['avg_quantity_accuracy']) * 0.20
# Quality score (25 points)
quality_score = metrics['avg_quality_score'] * 0.25
# Consistency score (15 points)
# Average of delivery and quantity consistency
consistency_score = (
(metrics['delivery_consistency'] + metrics['quantity_consistency']) / 2
) * 0.15
total_score = on_time_score + quantity_score + quality_score + consistency_score
# Penalties
# Severe penalty for high quality issue rate
if metrics['quality_issue_rate'] > 10:
total_score *= 0.8 # 20% penalty
# Penalty for high short delivery rate
if metrics['short_delivery_rate'] > 15:
total_score *= 0.9 # 10% penalty
return int(round(max(0, min(100, total_score))))
def _predict_future_performance(
self,
order_history: pd.DataFrame,
metrics: Dict[str, Any]
) -> Dict[str, Any]:
"""
Predict future supplier performance based on trends.
Args:
order_history: Historical order data
metrics: Calculated performance metrics
Returns:
Dictionary of predictions
"""
# Simple trend-based predictions
# For production, could use ML models trained on multi-supplier data
predictions = {
'next_order_delay_probability': 0.0,
'next_order_quality_issue_probability': 0.0,
'predicted_delivery_days': 0,
'confidence': 0
}
# Delay probability based on historical rate and recent trend
historical_delay_rate = metrics['delayed_orders'] / max(1, metrics['total_orders'])
recent_on_time_rate = metrics['recent_on_time_rate'] / 100
# Weight recent performance higher
predicted_on_time_prob = (historical_delay_rate * 0.3) + ((1 - recent_on_time_rate) * 0.7)
predictions['next_order_delay_probability'] = float(min(1.0, max(0.0, predicted_on_time_prob)))
# Quality issue probability
if metrics['quality_issues'] > 0:
quality_issue_prob = metrics['quality_issue_rate'] / 100
predictions['next_order_quality_issue_probability'] = float(quality_issue_prob)
# Predicted delivery days (expected delay)
if metrics['avg_delivery_delay_days'] > 0:
predictions['predicted_delivery_days'] = int(round(metrics['avg_delivery_delay_days']))
# Confidence based on data quantity and recency
if metrics['total_orders'] >= 50:
predictions['confidence'] = 90
elif metrics['total_orders'] >= 30:
predictions['confidence'] = 80
elif metrics['total_orders'] >= 20:
predictions['confidence'] = 70
else:
predictions['confidence'] = 60
return predictions
def _assess_procurement_risk(
self,
metrics: Dict[str, Any],
reliability_score: int,
predictions: Dict[str, Any]
) -> Dict[str, Any]:
"""
Assess overall procurement risk for this supplier.
Risk levels: low, medium, high, critical
"""
risk_factors = []
risk_score = 0 # 0-100, higher = more risky
# Low reliability
if reliability_score < 60:
risk_factors.append('Low reliability score')
risk_score += 30
elif reliability_score < 75:
risk_factors.append('Medium reliability score')
risk_score += 15
# High delay probability
if predictions['next_order_delay_probability'] > 0.5:
risk_factors.append('High delay probability')
risk_score += 25
elif predictions['next_order_delay_probability'] > 0.3:
risk_factors.append('Moderate delay probability')
risk_score += 15
# Quality issues
if metrics['quality_issue_rate'] > 15:
risk_factors.append('High quality issue rate')
risk_score += 25
elif metrics['quality_issue_rate'] > 5:
risk_factors.append('Moderate quality issue rate')
risk_score += 10
# Quantity accuracy issues
if metrics['short_delivery_rate'] > 20:
risk_factors.append('Frequent short deliveries')
risk_score += 15
elif metrics['short_delivery_rate'] > 10:
risk_factors.append('Occasional short deliveries')
risk_score += 8
# Low consistency
if metrics['delivery_consistency'] < 60:
risk_factors.append('Inconsistent delivery timing')
risk_score += 10
# Determine risk level
if risk_score >= 70:
risk_level = 'critical'
elif risk_score >= 50:
risk_level = 'high'
elif risk_score >= 30:
risk_level = 'medium'
else:
risk_level = 'low'
return {
'risk_level': risk_level,
'risk_score': min(100, risk_score),
'risk_factors': risk_factors,
'recommendation': self._get_risk_recommendation(risk_level, risk_factors)
}
def _get_risk_recommendation(
self,
risk_level: str,
risk_factors: List[str]
) -> str:
"""Generate risk mitigation recommendation."""
if risk_level == 'critical':
return 'URGENT: Consider switching to alternative supplier. Current supplier poses significant operational risk.'
elif risk_level == 'high':
return 'HIGH PRIORITY: Increase safety stock and have backup supplier ready. Monitor closely.'
elif risk_level == 'medium':
return 'MONITOR: Keep standard safety stock. Review performance quarterly.'
else:
return 'LOW RISK: Supplier performing well. Maintain current relationship.'
def _generate_supplier_insights(
self,
tenant_id: str,
supplier_id: str,
metrics: Dict[str, Any],
reliability_score: int,
risk_assessment: Dict[str, Any],
predictions: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""
Generate actionable insights for procurement team.
Args:
tenant_id: Tenant ID
supplier_id: Supplier ID
metrics: Performance metrics
reliability_score: Overall reliability (0-100)
risk_assessment: Risk assessment results
predictions: Future performance predictions
Returns:
List of insight dictionaries
"""
insights = []
# Insight 1: Low reliability alert
if reliability_score < 60:
insights.append({
'type': 'alert',
'priority': 'critical' if reliability_score < 50 else 'high',
'category': 'procurement',
'title': f'Low Supplier Reliability: {reliability_score}/100',
'description': f'Supplier {supplier_id} has low reliability score of {reliability_score}. On-time rate: {metrics["on_time_rate"]:.1f}%, Quality: {metrics["avg_quality_score"]:.1f}. Consider alternative suppliers.',
'impact_type': 'operational_risk',
'impact_value': 100 - reliability_score,
'impact_unit': 'risk_points',
'confidence': 85,
'metrics_json': {
'supplier_id': supplier_id,
'reliability_score': reliability_score,
'on_time_rate': round(metrics['on_time_rate'], 2),
'quality_score': round(metrics['avg_quality_score'], 2),
'quality_issue_rate': round(metrics['quality_issue_rate'], 2),
'delayed_orders': metrics['delayed_orders'],
'total_orders': metrics['total_orders']
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Find Alternative Supplier',
'action': 'search_alternative_suppliers',
'params': {'current_supplier_id': supplier_id}
},
{
'label': 'Increase Safety Stock',
'action': 'adjust_safety_stock',
'params': {'supplier_id': supplier_id, 'multiplier': 1.5}
}
],
'source_service': 'procurement',
'source_model': 'supplier_performance_predictor'
})
# Insight 2: High delay probability
if predictions['next_order_delay_probability'] > 0.4:
delay_prob_pct = predictions['next_order_delay_probability'] * 100
insights.append({
'type': 'prediction',
'priority': 'high' if delay_prob_pct > 60 else 'medium',
'category': 'procurement',
'title': f'High Delay Risk: {delay_prob_pct:.0f}% Probability',
'description': f'Supplier {supplier_id} has {delay_prob_pct:.0f}% probability of delaying next order. Expected delay: {predictions["predicted_delivery_days"]} days. Plan accordingly.',
'impact_type': 'operational_risk',
'impact_value': delay_prob_pct,
'impact_unit': 'probability_percent',
'confidence': predictions['confidence'],
'metrics_json': {
'supplier_id': supplier_id,
'delay_probability': round(delay_prob_pct, 2),
'predicted_delay_days': predictions['predicted_delivery_days'],
'historical_delay_rate': round(metrics['delayed_orders'] / max(1, metrics['total_orders']) * 100, 2),
'avg_delay_days': round(metrics['avg_delivery_delay_days'], 2)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Order Earlier',
'action': 'adjust_order_lead_time',
'params': {
'supplier_id': supplier_id,
'additional_days': predictions['predicted_delivery_days'] + 2
}
},
{
'label': 'Increase Safety Stock',
'action': 'adjust_safety_stock',
'params': {'supplier_id': supplier_id, 'multiplier': 1.3}
}
],
'source_service': 'procurement',
'source_model': 'supplier_performance_predictor'
})
# Insight 3: Quality issues
if metrics['quality_issue_rate'] > 10:
insights.append({
'type': 'alert',
'priority': 'high',
'category': 'procurement',
'title': f'Quality Issues: {metrics["quality_issue_rate"]:.1f}% of Orders',
'description': f'Supplier {supplier_id} has quality issues in {metrics["quality_issue_rate"]:.1f}% of orders ({metrics["quality_issues"]} of {metrics["total_orders"]}). This impacts product quality and customer satisfaction.',
'impact_type': 'quality_risk',
'impact_value': metrics['quality_issue_rate'],
'impact_unit': 'percentage',
'confidence': 90,
'metrics_json': {
'supplier_id': supplier_id,
'quality_issue_rate': round(metrics['quality_issue_rate'], 2),
'quality_issues': metrics['quality_issues'],
'avg_quality_score': round(metrics['avg_quality_score'], 2)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Review Supplier Quality',
'action': 'schedule_supplier_review',
'params': {'supplier_id': supplier_id, 'reason': 'quality_issues'}
},
{
'label': 'Increase Inspection',
'action': 'increase_quality_checks',
'params': {'supplier_id': supplier_id}
}
],
'source_service': 'procurement',
'source_model': 'supplier_performance_predictor'
})
# Insight 4: Excellent performance (positive insight)
if reliability_score >= 90:
insights.append({
'type': 'insight',
'priority': 'low',
'category': 'procurement',
'title': f'Excellent Supplier Performance: {reliability_score}/100',
'description': f'Supplier {supplier_id} demonstrates excellent performance with {reliability_score} reliability score. On-time: {metrics["on_time_rate"]:.1f}%, Quality: {metrics["avg_quality_score"]:.1f}. Consider expanding partnership.',
'impact_type': 'positive_performance',
'impact_value': reliability_score,
'impact_unit': 'score',
'confidence': 90,
'metrics_json': {
'supplier_id': supplier_id,
'reliability_score': reliability_score,
'on_time_rate': round(metrics['on_time_rate'], 2),
'quality_score': round(metrics['avg_quality_score'], 2)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Increase Order Volume',
'action': 'adjust_supplier_allocation',
'params': {'supplier_id': supplier_id, 'increase_pct': 20}
},
{
'label': 'Negotiate Better Terms',
'action': 'initiate_negotiation',
'params': {'supplier_id': supplier_id, 'reason': 'volume_increase'}
}
],
'source_service': 'procurement',
'source_model': 'supplier_performance_predictor'
})
# Insight 5: Performance decline
if metrics['recent_on_time_rate'] > 0 and metrics['recent_on_time_rate'] < metrics['on_time_rate'] - 15:
insights.append({
'type': 'alert',
'priority': 'medium',
'category': 'procurement',
'title': 'Supplier Performance Decline Detected',
'description': f'Supplier {supplier_id} recent performance ({metrics["recent_on_time_rate"]:.1f}% on-time) is significantly worse than historical average ({metrics["on_time_rate"]:.1f}%). Investigate potential issues.',
'impact_type': 'performance_decline',
'impact_value': metrics['on_time_rate'] - metrics['recent_on_time_rate'],
'impact_unit': 'percentage_points',
'confidence': 75,
'metrics_json': {
'supplier_id': supplier_id,
'recent_on_time_rate': round(metrics['recent_on_time_rate'], 2),
'historical_on_time_rate': round(metrics['on_time_rate'], 2),
'decline': round(metrics['on_time_rate'] - metrics['recent_on_time_rate'], 2)
},
'actionable': True,
'recommendation_actions': [
{
'label': 'Contact Supplier',
'action': 'schedule_supplier_meeting',
'params': {'supplier_id': supplier_id, 'reason': 'performance_decline'}
},
{
'label': 'Monitor Closely',
'action': 'increase_monitoring_frequency',
'params': {'supplier_id': supplier_id}
}
],
'source_service': 'procurement',
'source_model': 'supplier_performance_predictor'
})
logger.info(
"Generated supplier insights",
supplier_id=supplier_id,
insights=len(insights)
)
return insights
def _insufficient_data_response(
self,
tenant_id: str,
supplier_id: str
) -> Dict[str, Any]:
"""Return response when insufficient data available."""
return {
'tenant_id': tenant_id,
'supplier_id': supplier_id,
'analyzed_at': datetime.utcnow().isoformat(),
'orders_analyzed': 0,
'metrics': {},
'reliability_score': None,
'predictions': {},
'risk_assessment': {
'risk_level': 'unknown',
'risk_score': None,
'risk_factors': ['Insufficient historical data'],
'recommendation': 'Collect more order history before assessing supplier performance.'
},
'insights': []
}
def compare_suppliers(
self,
suppliers_analysis: List[Dict[str, Any]],
product_category: Optional[str] = None
) -> Dict[str, Any]:
"""
Compare multiple suppliers and provide recommendations.
Args:
suppliers_analysis: List of supplier analysis results
product_category: Optional product category filter
Returns:
Comparison report with recommendations
"""
if not suppliers_analysis:
return {'error': 'No suppliers to compare'}
# Sort by reliability score
ranked_suppliers = sorted(
suppliers_analysis,
key=lambda x: x.get('reliability_score', 0),
reverse=True
)
comparison = {
'analyzed_at': datetime.utcnow().isoformat(),
'suppliers_compared': len(ranked_suppliers),
'product_category': product_category,
'top_supplier': ranked_suppliers[0]['supplier_id'],
'top_supplier_score': ranked_suppliers[0]['reliability_score'],
'bottom_supplier': ranked_suppliers[-1]['supplier_id'],
'bottom_supplier_score': ranked_suppliers[-1]['reliability_score'],
'ranked_suppliers': [
{
'supplier_id': s['supplier_id'],
'reliability_score': s['reliability_score'],
'risk_level': s['risk_assessment']['risk_level']
}
for s in ranked_suppliers
],
'recommendations': []
}
# Generate comparison insights
if len(ranked_suppliers) >= 2:
score_gap = ranked_suppliers[0]['reliability_score'] - ranked_suppliers[-1]['reliability_score']
if score_gap > 30:
comparison['recommendations'].append({
'recommendation': f'Consider consolidating orders with top supplier {ranked_suppliers[0]["supplier_id"]} (score: {ranked_suppliers[0]["reliability_score"]})',
'reason': f'Significant performance gap ({score_gap} points) from lowest performer'
})
# Check for high-risk suppliers
high_risk = [s for s in ranked_suppliers if s['risk_assessment']['risk_level'] in ['high', 'critical']]
if high_risk:
comparison['recommendations'].append({
'recommendation': f'URGENT: Replace {len(high_risk)} high-risk supplier(s)',
'reason': 'Significant operational risk from unreliable suppliers',
'affected_suppliers': [s['supplier_id'] for s in high_risk]
})
return comparison
def get_supplier_reliability_score(self, supplier_id: str) -> Optional[int]:
"""Get cached reliability score for a supplier."""
return self.reliability_scores.get(supplier_id)