""" Tests for Production Yield Predictor """ import pytest import pandas as pd import numpy as np from datetime import datetime, timedelta from services.production.app.ml.yield_predictor import YieldPredictor @pytest.fixture def yield_predictor(): """Create YieldPredictor instance.""" return YieldPredictor() @pytest.fixture def stable_yield_history(): """Generate production history with stable high yield.""" np.random.seed(42) base_date = datetime.utcnow() - timedelta(days=180) history = [] for i in range(50): run_date = base_date + timedelta(days=i * 3) history.append({ 'production_run_id': f'run_{i}', 'recipe_id': 'recipe_123', 'planned_quantity': 100, 'actual_quantity': np.random.normal(97, 1.5), # 97% avg, low variance 'yield_percentage': np.random.normal(97, 1.5), 'staff_assigned': [f'worker_{i % 3}'], # 3 workers 'started_at': run_date, 'completed_at': run_date + timedelta(hours=4), 'batch_size': np.random.randint(80, 120) }) df = pd.DataFrame(history) df['yield_percentage'] = df['yield_percentage'].clip(90, 100) return df @pytest.fixture def variable_yield_history(): """Generate production history with variable yield.""" np.random.seed(42) base_date = datetime.utcnow() - timedelta(days=180) history = [] workers = ['worker_expert', 'worker_intermediate', 'worker_novice'] worker_skills = {'worker_expert': 96, 'worker_intermediate': 90, 'worker_novice': 82} for i in range(60): run_date = base_date + timedelta(days=i * 3) worker = workers[i % 3] base_yield = worker_skills[worker] # Time of day effect hour = (6 + i * 2) % 24 time_penalty = 5 if hour < 6 or hour > 22 else 0 # Batch size effect batch_size = np.random.randint(50, 150) batch_penalty = 3 if batch_size > 120 else 0 final_yield = base_yield - time_penalty - batch_penalty + np.random.normal(0, 2) history.append({ 'production_run_id': f'run_{i}', 'recipe_id': 'recipe_456', 'planned_quantity': 100, 'actual_quantity': final_yield, 'yield_percentage': final_yield, 'worker_id': worker, 'started_at': run_date.replace(hour=hour), 'completed_at': run_date.replace(hour=hour) + timedelta(hours=4), 'batch_size': batch_size }) df = pd.DataFrame(history) df['yield_percentage'] = df['yield_percentage'].clip(70, 100) return df @pytest.fixture def low_yield_history(): """Generate production history with consistently low yield.""" np.random.seed(42) base_date = datetime.utcnow() - timedelta(days=120) history = [] for i in range(40): run_date = base_date + timedelta(days=i * 3) history.append({ 'production_run_id': f'run_{i}', 'recipe_id': 'recipe_789', 'planned_quantity': 100, 'actual_quantity': np.random.normal(82, 5), # 82% avg, high variance 'yield_percentage': np.random.normal(82, 5), 'worker_id': f'worker_{i % 2}', 'started_at': run_date, 'completed_at': run_date + timedelta(hours=4), 'batch_size': np.random.randint(80, 120) }) df = pd.DataFrame(history) df['yield_percentage'] = df['yield_percentage'].clip(60, 95) return df @pytest.fixture def production_context_optimal(): """Production context for optimal conditions.""" return { 'worker_id': 'worker_expert', 'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=10), 'batch_size': 100, 'planned_quantity': 100, 'unit_cost': 5.0 } @pytest.fixture def production_context_suboptimal(): """Production context for suboptimal conditions.""" return { 'worker_id': 'worker_novice', 'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=4), 'batch_size': 140, 'planned_quantity': 100, 'unit_cost': 5.0 } class TestYieldPredictorBasics: """Test basic functionality.""" @pytest.mark.asyncio async def test_insufficient_data(self, yield_predictor): """Test handling of insufficient production history.""" # Create minimal history (< 30 runs) history = pd.DataFrame([{ 'production_run_id': 'run_1', 'recipe_id': 'recipe_123', 'planned_quantity': 100, 'actual_quantity': 95, 'yield_percentage': 95, 'worker_id': 'worker_1', 'started_at': datetime.utcnow() - timedelta(days=1), 'completed_at': datetime.utcnow() - timedelta(hours=20), 'batch_size': 100 }]) context = { 'worker_id': 'worker_1', 'planned_start_time': datetime.utcnow() + timedelta(days=1), 'batch_size': 100, 'planned_quantity': 100 } result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_123', production_history=history, production_context=context, min_history_runs=30 ) assert result['status'] == 'insufficient_data' assert result['history_runs'] == 1 assert result['required_runs'] == 30 assert len(result['insights']) == 1 assert result['insights'][0]['type'] == 'warning' @pytest.mark.asyncio async def test_baseline_statistics_stable_yield(self, yield_predictor, stable_yield_history): """Test baseline statistics calculation for stable yield.""" stats = yield_predictor._calculate_baseline_statistics(stable_yield_history) assert 95 < stats['mean_yield'] < 99 assert stats['std_yield'] < 3 # Low variance assert stats['cv_yield'] < 0.05 # Low coefficient of variation assert stats['min_yield'] >= 90 assert stats['max_yield'] <= 100 @pytest.mark.asyncio async def test_baseline_statistics_variable_yield(self, yield_predictor, variable_yield_history): """Test baseline statistics for variable yield.""" stats = yield_predictor._calculate_baseline_statistics(variable_yield_history) assert 85 < stats['mean_yield'] < 93 assert stats['std_yield'] > 3 # Higher variance assert stats['cv_yield'] > 0.03 assert stats['runs_below_90'] > 0 class TestFeatureEngineering: """Test feature engineering.""" @pytest.mark.asyncio async def test_time_features(self, yield_predictor, stable_yield_history): """Test time-based feature extraction.""" feature_df = yield_predictor._engineer_features(stable_yield_history) assert 'hour_of_day' in feature_df.columns assert 'day_of_week' in feature_df.columns assert 'is_weekend' in feature_df.columns assert 'is_early_morning' in feature_df.columns assert 'is_late_night' in feature_df.columns assert feature_df['hour_of_day'].min() >= 0 assert feature_df['hour_of_day'].max() <= 23 assert feature_df['day_of_week'].min() >= 0 assert feature_df['day_of_week'].max() <= 6 @pytest.mark.asyncio async def test_batch_size_features(self, yield_predictor, stable_yield_history): """Test batch size feature engineering.""" feature_df = yield_predictor._engineer_features(stable_yield_history) assert 'batch_size_normalized' in feature_df.columns assert 'is_large_batch' in feature_df.columns assert 'is_small_batch' in feature_df.columns # Normalized batch size should be around 1.0 on average assert 0.5 < feature_df['batch_size_normalized'].mean() < 1.5 @pytest.mark.asyncio async def test_worker_experience_features(self, yield_predictor, variable_yield_history): """Test worker experience feature engineering.""" feature_df = yield_predictor._engineer_features(variable_yield_history) assert 'worker_run_count' in feature_df.columns assert 'worker_experience_level' in feature_df.columns # Worker run count should increase for each worker for worker in feature_df['worker_id'].unique(): worker_runs = feature_df[feature_df['worker_id'] == worker]['worker_run_count'] assert worker_runs.is_monotonic_increasing class TestFactorAnalysis: """Test yield factor analysis.""" @pytest.mark.asyncio async def test_worker_impact_detection(self, yield_predictor, variable_yield_history): """Test detection of worker impact on yield.""" feature_df = yield_predictor._engineer_features(variable_yield_history) factor_analysis = yield_predictor._analyze_yield_factors(feature_df) assert 'worker' in factor_analysis # Should detect worker skill differences if factor_analysis['worker'].get('significant'): assert 'best_worker' in factor_analysis['worker'] assert 'worst_worker' in factor_analysis['worker'] assert factor_analysis['worker']['yield_range'] > 0 @pytest.mark.asyncio async def test_batch_size_correlation(self, yield_predictor, variable_yield_history): """Test batch size correlation analysis.""" feature_df = yield_predictor._engineer_features(variable_yield_history) factor_analysis = yield_predictor._analyze_yield_factors(feature_df) assert 'batch_size' in factor_analysis if factor_analysis['batch_size'].get('significant'): assert 'correlation' in factor_analysis['batch_size'] assert 'direction' in factor_analysis['batch_size'] assert factor_analysis['batch_size']['direction'] in ['positive', 'negative'] @pytest.mark.asyncio async def test_time_of_day_effect(self, yield_predictor, variable_yield_history): """Test time of day effect analysis.""" feature_df = yield_predictor._engineer_features(variable_yield_history) factor_analysis = yield_predictor._analyze_yield_factors(feature_df) assert 'time_of_day' in factor_analysis class TestYieldPrediction: """Test yield prediction.""" @pytest.mark.asyncio async def test_predict_stable_yield(self, yield_predictor, stable_yield_history, production_context_optimal): """Test prediction for stable yield recipe.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_123', production_history=stable_yield_history, production_context=production_context_optimal, min_history_runs=30 ) assert result['status'] != 'insufficient_data' assert result['predicted_yield'] is not None assert 90 < result['predicted_yield'] < 100 assert result['confidence'] > 0 assert 'prediction_range' in result assert result['prediction_range']['lower'] < result['predicted_yield'] assert result['prediction_range']['upper'] > result['predicted_yield'] @pytest.mark.asyncio async def test_predict_variable_yield_optimal_context( self, yield_predictor, variable_yield_history, production_context_optimal ): """Test prediction with optimal production context.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_456', production_history=variable_yield_history, production_context=production_context_optimal, min_history_runs=30 ) assert result['predicted_yield'] is not None # Optimal context should predict higher yield assert result['predicted_yield'] > result['baseline_yield'] - 5 @pytest.mark.asyncio async def test_predict_variable_yield_suboptimal_context( self, yield_predictor, variable_yield_history, production_context_suboptimal ): """Test prediction with suboptimal production context.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_456', production_history=variable_yield_history, production_context=production_context_suboptimal, min_history_runs=30 ) assert result['predicted_yield'] is not None # Suboptimal context (novice worker, early morning, large batch) # should predict lower yield @pytest.mark.asyncio async def test_expected_waste_calculation( self, yield_predictor, low_yield_history, production_context_optimal ): """Test expected waste calculation.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_789', production_history=low_yield_history, production_context=production_context_optimal, min_history_runs=30 ) assert 'expected_waste' in result assert result['expected_waste'] > 0 # For low yield (82%), waste should be significant expected_waste_pct = 100 - result['predicted_yield'] assert expected_waste_pct > 5 class TestPatternDetection: """Test yield pattern identification.""" @pytest.mark.asyncio async def test_low_yield_worker_pattern(self, yield_predictor, variable_yield_history): """Test detection of low-yield worker pattern.""" feature_df = yield_predictor._engineer_features(variable_yield_history) factor_analysis = yield_predictor._analyze_yield_factors(feature_df) patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis) # Should detect novice worker pattern low_worker_patterns = [p for p in patterns if p['pattern'] == 'low_yield_worker'] if factor_analysis.get('worker', {}).get('significant'): assert len(low_worker_patterns) > 0 pattern = low_worker_patterns[0] assert pattern['severity'] in ['high', 'medium', 'low'] assert 'recommendation' in pattern @pytest.mark.asyncio async def test_time_of_day_pattern(self, yield_predictor, variable_yield_history): """Test detection of time-of-day pattern.""" feature_df = yield_predictor._engineer_features(variable_yield_history) factor_analysis = yield_predictor._analyze_yield_factors(feature_df) patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis) # May detect early morning low yield pattern time_patterns = [p for p in patterns if p['pattern'] == 'low_yield_time'] # Patterns are conditional on statistical significance class TestInsightGeneration: """Test insight generation.""" @pytest.mark.asyncio async def test_low_yield_warning_insight( self, yield_predictor, low_yield_history, production_context_optimal ): """Test generation of low yield warning insight.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_789', production_history=low_yield_history, production_context=production_context_optimal, min_history_runs=30 ) # Should generate low yield warning warning_insights = [i for i in result['insights'] if i['type'] == 'warning'] assert len(warning_insights) > 0 warning = warning_insights[0] assert warning['priority'] in ['high', 'medium'] assert warning['category'] == 'production' assert 'impact_value' in warning assert warning['actionable'] is True @pytest.mark.asyncio async def test_excellent_yield_insight( self, yield_predictor, stable_yield_history, production_context_optimal ): """Test generation of excellent yield insight.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_123', production_history=stable_yield_history, production_context=production_context_optimal, min_history_runs=30 ) # May generate positive insight for excellent yield positive_insights = [i for i in result['insights'] if i['type'] == 'positive'] if result['predicted_yield'] > 98: assert len(positive_insights) > 0 @pytest.mark.asyncio async def test_yield_variability_insight( self, yield_predictor, variable_yield_history, production_context_optimal ): """Test generation of yield variability insight.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_456', production_history=variable_yield_history, production_context=production_context_optimal, min_history_runs=30 ) # Should detect high variability if result['baseline_std'] / result['baseline_yield'] > 0.05: variability_insights = [ i for i in result['insights'] if 'variability' in i['title'].lower() or 'variability' in i['description'].lower() ] assert len(variability_insights) > 0 class TestConfidenceScoring: """Test confidence score calculation.""" @pytest.mark.asyncio async def test_high_confidence_large_sample( self, yield_predictor, stable_yield_history, production_context_optimal ): """Test high confidence with large stable sample.""" result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_123', production_history=stable_yield_history, production_context=production_context_optimal, min_history_runs=30 ) # Large sample + stable data should give high confidence assert result['confidence'] > 60 @pytest.mark.asyncio async def test_lower_confidence_small_sample(self, yield_predictor, production_context_optimal): """Test lower confidence with small sample.""" # Create small history (exactly 30 runs) small_history = pd.DataFrame([{ 'production_run_id': f'run_{i}', 'recipe_id': 'recipe_123', 'planned_quantity': 100, 'actual_quantity': 95 + np.random.normal(0, 2), 'yield_percentage': 95 + np.random.normal(0, 2), 'worker_id': 'worker_1', 'started_at': datetime.utcnow() - timedelta(days=90-i), 'completed_at': datetime.utcnow() - timedelta(days=90-i, hours=-4), 'batch_size': 100 } for i in range(30)]) result = await yield_predictor.predict_yield( tenant_id='tenant_123', recipe_id='recipe_123', production_history=small_history, production_context=production_context_optimal, min_history_runs=30 ) # Small sample should give moderate confidence assert result['confidence'] < 85 class TestHistoricalAnalysis: """Test historical analysis (no prediction).""" @pytest.mark.asyncio async def test_analyze_recipe_history(self, yield_predictor, variable_yield_history): """Test historical analysis without prediction.""" result = await yield_predictor.analyze_recipe_yield_history( tenant_id='tenant_123', recipe_id='recipe_456', production_history=variable_yield_history, min_history_runs=30 ) assert result['recipe_id'] == 'recipe_456' assert 'baseline_stats' in result assert 'factor_analysis' in result assert 'patterns' in result assert 'insights' in result @pytest.mark.asyncio async def test_analyze_insufficient_history(self, yield_predictor): """Test analysis with insufficient history.""" small_history = pd.DataFrame([{ 'production_run_id': 'run_1', 'recipe_id': 'recipe_123', 'planned_quantity': 100, 'actual_quantity': 95, 'yield_percentage': 95, 'worker_id': 'worker_1', 'started_at': datetime.utcnow() - timedelta(days=1), 'completed_at': datetime.utcnow() - timedelta(hours=20), 'batch_size': 100 }]) result = await yield_predictor.analyze_recipe_yield_history( tenant_id='tenant_123', recipe_id='recipe_123', production_history=small_history, min_history_runs=30 ) assert result['status'] == 'insufficient_data' class TestModelPerformance: """Test ML model performance.""" @pytest.mark.asyncio async def test_model_training(self, yield_predictor, variable_yield_history): """Test model training and performance metrics.""" feature_df = yield_predictor._engineer_features(variable_yield_history) model_results = yield_predictor._train_yield_model(feature_df) assert 'best_model' in model_results assert 'best_model_name' in model_results assert 'performance' in model_results assert 'feature_importance' in model_results performance = model_results['performance'] assert 'mae' in performance assert 'rmse' in performance assert 'r2' in performance # MAE should be reasonable (< 15 percentage points) assert performance['mae'] < 15 @pytest.mark.asyncio async def test_feature_importance(self, yield_predictor, variable_yield_history): """Test feature importance extraction.""" feature_df = yield_predictor._engineer_features(variable_yield_history) model_results = yield_predictor._train_yield_model(feature_df) feature_importance = model_results['feature_importance'] # Should have feature importances if len(feature_importance) > 0: # Worker encoding should be important (due to skill differences) assert 'worker_encoded' in feature_importance or len(feature_importance) > 0