""" Supplier Performance Predictor Predicts supplier reliability, delivery delays, and quality issues Generates insights for procurement risk management """ import pandas as pd import numpy as np from typing import Dict, List, Any, Optional, Tuple import structlog from datetime import datetime, timedelta from collections import defaultdict from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor from sklearn.preprocessing import StandardScaler import warnings warnings.filterwarnings('ignore') logger = structlog.get_logger() class SupplierPerformancePredictor: """ Predicts supplier performance metrics for procurement risk management. Capabilities: 1. Delivery delay probability prediction 2. Quality issue likelihood scoring 3. Supplier reliability scoring (0-100) 4. Alternative supplier recommendations 5. Procurement risk assessment 6. Insight generation for high-risk suppliers """ def __init__(self): self.delay_model = None self.quality_model = None self.reliability_scores = {} self.scaler = StandardScaler() self.feature_columns = [] async def analyze_supplier_performance( self, tenant_id: str, supplier_id: str, order_history: pd.DataFrame, min_orders: int = 10 ) -> Dict[str, Any]: """ Analyze historical supplier performance and generate insights. Args: tenant_id: Tenant identifier supplier_id: Supplier identifier order_history: Historical orders with columns: - order_date - expected_delivery_date - actual_delivery_date - order_quantity - received_quantity - quality_issues (bool) - quality_score (0-100) - order_value min_orders: Minimum orders required for analysis Returns: Dictionary with performance metrics and insights """ logger.info( "Analyzing supplier performance", tenant_id=tenant_id, supplier_id=supplier_id, orders=len(order_history) ) if len(order_history) < min_orders: logger.warning( "Insufficient order history", supplier_id=supplier_id, orders=len(order_history), required=min_orders ) return self._insufficient_data_response(tenant_id, supplier_id) # Calculate performance metrics metrics = self._calculate_performance_metrics(order_history) # Calculate reliability score reliability_score = self._calculate_reliability_score(metrics) # Predict future performance predictions = self._predict_future_performance(order_history, metrics) # Assess procurement risk risk_assessment = self._assess_procurement_risk( metrics, reliability_score, predictions ) # Generate insights insights = self._generate_supplier_insights( tenant_id, supplier_id, metrics, reliability_score, risk_assessment, predictions ) # Store reliability score self.reliability_scores[supplier_id] = reliability_score logger.info( "Supplier performance analysis complete", supplier_id=supplier_id, reliability_score=reliability_score, insights_generated=len(insights) ) return { 'tenant_id': tenant_id, 'supplier_id': supplier_id, 'analyzed_at': datetime.utcnow().isoformat(), 'orders_analyzed': len(order_history), 'metrics': metrics, 'reliability_score': reliability_score, 'predictions': predictions, 'risk_assessment': risk_assessment, 'insights': insights } def _calculate_performance_metrics( self, order_history: pd.DataFrame ) -> Dict[str, Any]: """ Calculate comprehensive supplier performance metrics. Args: order_history: Historical order data Returns: Dictionary of performance metrics """ # Ensure datetime columns order_history['order_date'] = pd.to_datetime(order_history['order_date']) order_history['expected_delivery_date'] = pd.to_datetime(order_history['expected_delivery_date']) order_history['actual_delivery_date'] = pd.to_datetime(order_history['actual_delivery_date']) # Calculate delivery delays order_history['delivery_delay_days'] = ( order_history['actual_delivery_date'] - order_history['expected_delivery_date'] ).dt.days order_history['is_delayed'] = order_history['delivery_delay_days'] > 0 order_history['is_early'] = order_history['delivery_delay_days'] < 0 # Calculate quantity accuracy order_history['quantity_accuracy'] = ( order_history['received_quantity'] / order_history['order_quantity'] ) order_history['is_short_delivery'] = order_history['quantity_accuracy'] < 1.0 order_history['is_over_delivery'] = order_history['quantity_accuracy'] > 1.0 metrics = { # Delivery metrics 'total_orders': int(len(order_history)), 'on_time_orders': int((~order_history['is_delayed']).sum()), 'delayed_orders': int(order_history['is_delayed'].sum()), 'on_time_rate': float((~order_history['is_delayed']).mean() * 100), 'avg_delivery_delay_days': float(order_history[order_history['is_delayed']]['delivery_delay_days'].mean()) if order_history['is_delayed'].any() else 0.0, 'max_delivery_delay_days': int(order_history['delivery_delay_days'].max()), 'delivery_delay_std': float(order_history['delivery_delay_days'].std()), # Quantity accuracy metrics 'avg_quantity_accuracy': float(order_history['quantity_accuracy'].mean() * 100), 'short_deliveries': int(order_history['is_short_delivery'].sum()), 'short_delivery_rate': float(order_history['is_short_delivery'].mean() * 100), # Quality metrics 'quality_issues': int(order_history['quality_issues'].sum()) if 'quality_issues' in order_history.columns else 0, 'quality_issue_rate': float(order_history['quality_issues'].mean() * 100) if 'quality_issues' in order_history.columns else 0.0, 'avg_quality_score': float(order_history['quality_score'].mean()) if 'quality_score' in order_history.columns else 100.0, # Consistency metrics 'delivery_consistency': float(100 - order_history['delivery_delay_days'].std() * 10), # Lower variance = higher consistency 'quantity_consistency': float(100 - (order_history['quantity_accuracy'].std() * 100)), # Recent trend (last 30 days vs overall) 'recent_on_time_rate': self._calculate_recent_trend(order_history, 'is_delayed', days=30), # Cost metrics 'total_order_value': float(order_history['order_value'].sum()) if 'order_value' in order_history.columns else 0.0, 'avg_order_value': float(order_history['order_value'].mean()) if 'order_value' in order_history.columns else 0.0 } # Ensure all metrics are valid (no NaN) for key, value in metrics.items(): if isinstance(value, float) and np.isnan(value): metrics[key] = 0.0 return metrics def _calculate_recent_trend( self, order_history: pd.DataFrame, metric_column: str, days: int = 30 ) -> float: """Calculate recent trend for a metric.""" cutoff_date = datetime.utcnow() - timedelta(days=days) recent_orders = order_history[order_history['order_date'] >= cutoff_date] if len(recent_orders) < 3: return 0.0 # Not enough recent data if metric_column == 'is_delayed': return float((~recent_orders['is_delayed']).mean() * 100) else: return float(recent_orders[metric_column].mean() * 100) def _calculate_reliability_score( self, metrics: Dict[str, Any] ) -> int: """ Calculate overall supplier reliability score (0-100). Factors: - On-time delivery rate (40%) - Quantity accuracy (20%) - Quality score (25%) - Consistency (15%) """ # On-time delivery score (40 points) on_time_score = metrics['on_time_rate'] * 0.40 # Quantity accuracy score (20 points) quantity_score = min(100, metrics['avg_quantity_accuracy']) * 0.20 # Quality score (25 points) quality_score = metrics['avg_quality_score'] * 0.25 # Consistency score (15 points) # Average of delivery and quantity consistency consistency_score = ( (metrics['delivery_consistency'] + metrics['quantity_consistency']) / 2 ) * 0.15 total_score = on_time_score + quantity_score + quality_score + consistency_score # Penalties # Severe penalty for high quality issue rate if metrics['quality_issue_rate'] > 10: total_score *= 0.8 # 20% penalty # Penalty for high short delivery rate if metrics['short_delivery_rate'] > 15: total_score *= 0.9 # 10% penalty return int(round(max(0, min(100, total_score)))) def _predict_future_performance( self, order_history: pd.DataFrame, metrics: Dict[str, Any] ) -> Dict[str, Any]: """ Predict future supplier performance based on trends. Args: order_history: Historical order data metrics: Calculated performance metrics Returns: Dictionary of predictions """ # Simple trend-based predictions # For production, could use ML models trained on multi-supplier data predictions = { 'next_order_delay_probability': 0.0, 'next_order_quality_issue_probability': 0.0, 'predicted_delivery_days': 0, 'confidence': 0 } # Delay probability based on historical rate and recent trend historical_delay_rate = metrics['delayed_orders'] / max(1, metrics['total_orders']) recent_on_time_rate = metrics['recent_on_time_rate'] / 100 # Weight recent performance higher predicted_on_time_prob = (historical_delay_rate * 0.3) + ((1 - recent_on_time_rate) * 0.7) predictions['next_order_delay_probability'] = float(min(1.0, max(0.0, predicted_on_time_prob))) # Quality issue probability if metrics['quality_issues'] > 0: quality_issue_prob = metrics['quality_issue_rate'] / 100 predictions['next_order_quality_issue_probability'] = float(quality_issue_prob) # Predicted delivery days (expected delay) if metrics['avg_delivery_delay_days'] > 0: predictions['predicted_delivery_days'] = int(round(metrics['avg_delivery_delay_days'])) # Confidence based on data quantity and recency if metrics['total_orders'] >= 50: predictions['confidence'] = 90 elif metrics['total_orders'] >= 30: predictions['confidence'] = 80 elif metrics['total_orders'] >= 20: predictions['confidence'] = 70 else: predictions['confidence'] = 60 return predictions def _assess_procurement_risk( self, metrics: Dict[str, Any], reliability_score: int, predictions: Dict[str, Any] ) -> Dict[str, Any]: """ Assess overall procurement risk for this supplier. Risk levels: low, medium, high, critical """ risk_factors = [] risk_score = 0 # 0-100, higher = more risky # Low reliability if reliability_score < 60: risk_factors.append('Low reliability score') risk_score += 30 elif reliability_score < 75: risk_factors.append('Medium reliability score') risk_score += 15 # High delay probability if predictions['next_order_delay_probability'] > 0.5: risk_factors.append('High delay probability') risk_score += 25 elif predictions['next_order_delay_probability'] > 0.3: risk_factors.append('Moderate delay probability') risk_score += 15 # Quality issues if metrics['quality_issue_rate'] > 15: risk_factors.append('High quality issue rate') risk_score += 25 elif metrics['quality_issue_rate'] > 5: risk_factors.append('Moderate quality issue rate') risk_score += 10 # Quantity accuracy issues if metrics['short_delivery_rate'] > 20: risk_factors.append('Frequent short deliveries') risk_score += 15 elif metrics['short_delivery_rate'] > 10: risk_factors.append('Occasional short deliveries') risk_score += 8 # Low consistency if metrics['delivery_consistency'] < 60: risk_factors.append('Inconsistent delivery timing') risk_score += 10 # Determine risk level if risk_score >= 70: risk_level = 'critical' elif risk_score >= 50: risk_level = 'high' elif risk_score >= 30: risk_level = 'medium' else: risk_level = 'low' return { 'risk_level': risk_level, 'risk_score': min(100, risk_score), 'risk_factors': risk_factors, 'recommendation': self._get_risk_recommendation(risk_level, risk_factors) } def _get_risk_recommendation( self, risk_level: str, risk_factors: List[str] ) -> str: """Generate risk mitigation recommendation.""" if risk_level == 'critical': return 'URGENT: Consider switching to alternative supplier. Current supplier poses significant operational risk.' elif risk_level == 'high': return 'HIGH PRIORITY: Increase safety stock and have backup supplier ready. Monitor closely.' elif risk_level == 'medium': return 'MONITOR: Keep standard safety stock. Review performance quarterly.' else: return 'LOW RISK: Supplier performing well. Maintain current relationship.' def _generate_supplier_insights( self, tenant_id: str, supplier_id: str, metrics: Dict[str, Any], reliability_score: int, risk_assessment: Dict[str, Any], predictions: Dict[str, Any] ) -> List[Dict[str, Any]]: """ Generate actionable insights for procurement team. Args: tenant_id: Tenant ID supplier_id: Supplier ID metrics: Performance metrics reliability_score: Overall reliability (0-100) risk_assessment: Risk assessment results predictions: Future performance predictions Returns: List of insight dictionaries """ insights = [] # Insight 1: Low reliability alert if reliability_score < 60: insights.append({ 'type': 'alert', 'priority': 'critical' if reliability_score < 50 else 'high', 'category': 'procurement', 'title': f'Low Supplier Reliability: {reliability_score}/100', 'description': f'Supplier {supplier_id} has low reliability score of {reliability_score}. On-time rate: {metrics["on_time_rate"]:.1f}%, Quality: {metrics["avg_quality_score"]:.1f}. Consider alternative suppliers.', 'impact_type': 'operational_risk', 'impact_value': 100 - reliability_score, 'impact_unit': 'risk_points', 'confidence': 85, 'metrics_json': { 'supplier_id': supplier_id, 'reliability_score': reliability_score, 'on_time_rate': round(metrics['on_time_rate'], 2), 'quality_score': round(metrics['avg_quality_score'], 2), 'quality_issue_rate': round(metrics['quality_issue_rate'], 2), 'delayed_orders': metrics['delayed_orders'], 'total_orders': metrics['total_orders'] }, 'actionable': True, 'recommendation_actions': [ { 'label': 'Find Alternative Supplier', 'action': 'search_alternative_suppliers', 'params': {'current_supplier_id': supplier_id} }, { 'label': 'Increase Safety Stock', 'action': 'adjust_safety_stock', 'params': {'supplier_id': supplier_id, 'multiplier': 1.5} } ], 'source_service': 'procurement', 'source_model': 'supplier_performance_predictor' }) # Insight 2: High delay probability if predictions['next_order_delay_probability'] > 0.4: delay_prob_pct = predictions['next_order_delay_probability'] * 100 insights.append({ 'type': 'prediction', 'priority': 'high' if delay_prob_pct > 60 else 'medium', 'category': 'procurement', 'title': f'High Delay Risk: {delay_prob_pct:.0f}% Probability', 'description': f'Supplier {supplier_id} has {delay_prob_pct:.0f}% probability of delaying next order. Expected delay: {predictions["predicted_delivery_days"]} days. Plan accordingly.', 'impact_type': 'operational_risk', 'impact_value': delay_prob_pct, 'impact_unit': 'probability_percent', 'confidence': predictions['confidence'], 'metrics_json': { 'supplier_id': supplier_id, 'delay_probability': round(delay_prob_pct, 2), 'predicted_delay_days': predictions['predicted_delivery_days'], 'historical_delay_rate': round(metrics['delayed_orders'] / max(1, metrics['total_orders']) * 100, 2), 'avg_delay_days': round(metrics['avg_delivery_delay_days'], 2) }, 'actionable': True, 'recommendation_actions': [ { 'label': 'Order Earlier', 'action': 'adjust_order_lead_time', 'params': { 'supplier_id': supplier_id, 'additional_days': predictions['predicted_delivery_days'] + 2 } }, { 'label': 'Increase Safety Stock', 'action': 'adjust_safety_stock', 'params': {'supplier_id': supplier_id, 'multiplier': 1.3} } ], 'source_service': 'procurement', 'source_model': 'supplier_performance_predictor' }) # Insight 3: Quality issues if metrics['quality_issue_rate'] > 10: insights.append({ 'type': 'alert', 'priority': 'high', 'category': 'procurement', 'title': f'Quality Issues: {metrics["quality_issue_rate"]:.1f}% of Orders', 'description': f'Supplier {supplier_id} has quality issues in {metrics["quality_issue_rate"]:.1f}% of orders ({metrics["quality_issues"]} of {metrics["total_orders"]}). This impacts product quality and customer satisfaction.', 'impact_type': 'quality_risk', 'impact_value': metrics['quality_issue_rate'], 'impact_unit': 'percentage', 'confidence': 90, 'metrics_json': { 'supplier_id': supplier_id, 'quality_issue_rate': round(metrics['quality_issue_rate'], 2), 'quality_issues': metrics['quality_issues'], 'avg_quality_score': round(metrics['avg_quality_score'], 2) }, 'actionable': True, 'recommendation_actions': [ { 'label': 'Review Supplier Quality', 'action': 'schedule_supplier_review', 'params': {'supplier_id': supplier_id, 'reason': 'quality_issues'} }, { 'label': 'Increase Inspection', 'action': 'increase_quality_checks', 'params': {'supplier_id': supplier_id} } ], 'source_service': 'procurement', 'source_model': 'supplier_performance_predictor' }) # Insight 4: Excellent performance (positive insight) if reliability_score >= 90: insights.append({ 'type': 'insight', 'priority': 'low', 'category': 'procurement', 'title': f'Excellent Supplier Performance: {reliability_score}/100', 'description': f'Supplier {supplier_id} demonstrates excellent performance with {reliability_score} reliability score. On-time: {metrics["on_time_rate"]:.1f}%, Quality: {metrics["avg_quality_score"]:.1f}. Consider expanding partnership.', 'impact_type': 'positive_performance', 'impact_value': reliability_score, 'impact_unit': 'score', 'confidence': 90, 'metrics_json': { 'supplier_id': supplier_id, 'reliability_score': reliability_score, 'on_time_rate': round(metrics['on_time_rate'], 2), 'quality_score': round(metrics['avg_quality_score'], 2) }, 'actionable': True, 'recommendation_actions': [ { 'label': 'Increase Order Volume', 'action': 'adjust_supplier_allocation', 'params': {'supplier_id': supplier_id, 'increase_pct': 20} }, { 'label': 'Negotiate Better Terms', 'action': 'initiate_negotiation', 'params': {'supplier_id': supplier_id, 'reason': 'volume_increase'} } ], 'source_service': 'procurement', 'source_model': 'supplier_performance_predictor' }) # Insight 5: Performance decline if metrics['recent_on_time_rate'] > 0 and metrics['recent_on_time_rate'] < metrics['on_time_rate'] - 15: insights.append({ 'type': 'alert', 'priority': 'medium', 'category': 'procurement', 'title': 'Supplier Performance Decline Detected', 'description': f'Supplier {supplier_id} recent performance ({metrics["recent_on_time_rate"]:.1f}% on-time) is significantly worse than historical average ({metrics["on_time_rate"]:.1f}%). Investigate potential issues.', 'impact_type': 'performance_decline', 'impact_value': metrics['on_time_rate'] - metrics['recent_on_time_rate'], 'impact_unit': 'percentage_points', 'confidence': 75, 'metrics_json': { 'supplier_id': supplier_id, 'recent_on_time_rate': round(metrics['recent_on_time_rate'], 2), 'historical_on_time_rate': round(metrics['on_time_rate'], 2), 'decline': round(metrics['on_time_rate'] - metrics['recent_on_time_rate'], 2) }, 'actionable': True, 'recommendation_actions': [ { 'label': 'Contact Supplier', 'action': 'schedule_supplier_meeting', 'params': {'supplier_id': supplier_id, 'reason': 'performance_decline'} }, { 'label': 'Monitor Closely', 'action': 'increase_monitoring_frequency', 'params': {'supplier_id': supplier_id} } ], 'source_service': 'procurement', 'source_model': 'supplier_performance_predictor' }) logger.info( "Generated supplier insights", supplier_id=supplier_id, insights=len(insights) ) return insights def _insufficient_data_response( self, tenant_id: str, supplier_id: str ) -> Dict[str, Any]: """Return response when insufficient data available.""" return { 'tenant_id': tenant_id, 'supplier_id': supplier_id, 'analyzed_at': datetime.utcnow().isoformat(), 'orders_analyzed': 0, 'metrics': {}, 'reliability_score': None, 'predictions': {}, 'risk_assessment': { 'risk_level': 'unknown', 'risk_score': None, 'risk_factors': ['Insufficient historical data'], 'recommendation': 'Collect more order history before assessing supplier performance.' }, 'insights': [] } def compare_suppliers( self, suppliers_analysis: List[Dict[str, Any]], product_category: Optional[str] = None ) -> Dict[str, Any]: """ Compare multiple suppliers and provide recommendations. Args: suppliers_analysis: List of supplier analysis results product_category: Optional product category filter Returns: Comparison report with recommendations """ if not suppliers_analysis: return {'error': 'No suppliers to compare'} # Sort by reliability score ranked_suppliers = sorted( suppliers_analysis, key=lambda x: x.get('reliability_score', 0), reverse=True ) comparison = { 'analyzed_at': datetime.utcnow().isoformat(), 'suppliers_compared': len(ranked_suppliers), 'product_category': product_category, 'top_supplier': ranked_suppliers[0]['supplier_id'], 'top_supplier_score': ranked_suppliers[0]['reliability_score'], 'bottom_supplier': ranked_suppliers[-1]['supplier_id'], 'bottom_supplier_score': ranked_suppliers[-1]['reliability_score'], 'ranked_suppliers': [ { 'supplier_id': s['supplier_id'], 'reliability_score': s['reliability_score'], 'risk_level': s['risk_assessment']['risk_level'] } for s in ranked_suppliers ], 'recommendations': [] } # Generate comparison insights if len(ranked_suppliers) >= 2: score_gap = ranked_suppliers[0]['reliability_score'] - ranked_suppliers[-1]['reliability_score'] if score_gap > 30: comparison['recommendations'].append({ 'recommendation': f'Consider consolidating orders with top supplier {ranked_suppliers[0]["supplier_id"]} (score: {ranked_suppliers[0]["reliability_score"]})', 'reason': f'Significant performance gap ({score_gap} points) from lowest performer' }) # Check for high-risk suppliers high_risk = [s for s in ranked_suppliers if s['risk_assessment']['risk_level'] in ['high', 'critical']] if high_risk: comparison['recommendations'].append({ 'recommendation': f'URGENT: Replace {len(high_risk)} high-risk supplier(s)', 'reason': 'Significant operational risk from unreliable suppliers', 'affected_suppliers': [s['supplier_id'] for s in high_risk] }) return comparison def get_supplier_reliability_score(self, supplier_id: str) -> Optional[int]: """Get cached reliability score for a supplier.""" return self.reliability_scores.get(supplier_id)