579 lines
22 KiB
Python
579 lines
22 KiB
Python
"""
|
|
Tests for Production Yield Predictor
|
|
"""
|
|
|
|
import pytest
|
|
import pandas as pd
|
|
import numpy as np
|
|
from datetime import datetime, timedelta
|
|
from services.production.app.ml.yield_predictor import YieldPredictor
|
|
|
|
|
|
@pytest.fixture
|
|
def yield_predictor():
|
|
"""Create YieldPredictor instance."""
|
|
return YieldPredictor()
|
|
|
|
|
|
@pytest.fixture
|
|
def stable_yield_history():
|
|
"""Generate production history with stable high yield."""
|
|
np.random.seed(42)
|
|
base_date = datetime.utcnow() - timedelta(days=180)
|
|
|
|
history = []
|
|
for i in range(50):
|
|
run_date = base_date + timedelta(days=i * 3)
|
|
|
|
history.append({
|
|
'production_run_id': f'run_{i}',
|
|
'recipe_id': 'recipe_123',
|
|
'planned_quantity': 100,
|
|
'actual_quantity': np.random.normal(97, 1.5), # 97% avg, low variance
|
|
'yield_percentage': np.random.normal(97, 1.5),
|
|
'worker_id': f'worker_{i % 3}', # 3 workers
|
|
'started_at': run_date,
|
|
'completed_at': run_date + timedelta(hours=4),
|
|
'batch_size': np.random.randint(80, 120)
|
|
})
|
|
|
|
df = pd.DataFrame(history)
|
|
df['yield_percentage'] = df['yield_percentage'].clip(90, 100)
|
|
return df
|
|
|
|
|
|
@pytest.fixture
|
|
def variable_yield_history():
|
|
"""Generate production history with variable yield."""
|
|
np.random.seed(42)
|
|
base_date = datetime.utcnow() - timedelta(days=180)
|
|
|
|
history = []
|
|
workers = ['worker_expert', 'worker_intermediate', 'worker_novice']
|
|
worker_skills = {'worker_expert': 96, 'worker_intermediate': 90, 'worker_novice': 82}
|
|
|
|
for i in range(60):
|
|
run_date = base_date + timedelta(days=i * 3)
|
|
worker = workers[i % 3]
|
|
base_yield = worker_skills[worker]
|
|
|
|
# Time of day effect
|
|
hour = (6 + i * 2) % 24
|
|
time_penalty = 5 if hour < 6 or hour > 22 else 0
|
|
|
|
# Batch size effect
|
|
batch_size = np.random.randint(50, 150)
|
|
batch_penalty = 3 if batch_size > 120 else 0
|
|
|
|
final_yield = base_yield - time_penalty - batch_penalty + np.random.normal(0, 2)
|
|
|
|
history.append({
|
|
'production_run_id': f'run_{i}',
|
|
'recipe_id': 'recipe_456',
|
|
'planned_quantity': 100,
|
|
'actual_quantity': final_yield,
|
|
'yield_percentage': final_yield,
|
|
'worker_id': worker,
|
|
'started_at': run_date.replace(hour=hour),
|
|
'completed_at': run_date.replace(hour=hour) + timedelta(hours=4),
|
|
'batch_size': batch_size
|
|
})
|
|
|
|
df = pd.DataFrame(history)
|
|
df['yield_percentage'] = df['yield_percentage'].clip(70, 100)
|
|
return df
|
|
|
|
|
|
@pytest.fixture
|
|
def low_yield_history():
|
|
"""Generate production history with consistently low yield."""
|
|
np.random.seed(42)
|
|
base_date = datetime.utcnow() - timedelta(days=120)
|
|
|
|
history = []
|
|
for i in range(40):
|
|
run_date = base_date + timedelta(days=i * 3)
|
|
|
|
history.append({
|
|
'production_run_id': f'run_{i}',
|
|
'recipe_id': 'recipe_789',
|
|
'planned_quantity': 100,
|
|
'actual_quantity': np.random.normal(82, 5), # 82% avg, high variance
|
|
'yield_percentage': np.random.normal(82, 5),
|
|
'worker_id': f'worker_{i % 2}',
|
|
'started_at': run_date,
|
|
'completed_at': run_date + timedelta(hours=4),
|
|
'batch_size': np.random.randint(80, 120)
|
|
})
|
|
|
|
df = pd.DataFrame(history)
|
|
df['yield_percentage'] = df['yield_percentage'].clip(60, 95)
|
|
return df
|
|
|
|
|
|
@pytest.fixture
|
|
def production_context_optimal():
|
|
"""Production context for optimal conditions."""
|
|
return {
|
|
'worker_id': 'worker_expert',
|
|
'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=10),
|
|
'batch_size': 100,
|
|
'planned_quantity': 100,
|
|
'unit_cost': 5.0
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def production_context_suboptimal():
|
|
"""Production context for suboptimal conditions."""
|
|
return {
|
|
'worker_id': 'worker_novice',
|
|
'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=4),
|
|
'batch_size': 140,
|
|
'planned_quantity': 100,
|
|
'unit_cost': 5.0
|
|
}
|
|
|
|
|
|
class TestYieldPredictorBasics:
|
|
"""Test basic functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_insufficient_data(self, yield_predictor):
|
|
"""Test handling of insufficient production history."""
|
|
# Create minimal history (< 30 runs)
|
|
history = pd.DataFrame([{
|
|
'production_run_id': 'run_1',
|
|
'recipe_id': 'recipe_123',
|
|
'planned_quantity': 100,
|
|
'actual_quantity': 95,
|
|
'yield_percentage': 95,
|
|
'worker_id': 'worker_1',
|
|
'started_at': datetime.utcnow() - timedelta(days=1),
|
|
'completed_at': datetime.utcnow() - timedelta(hours=20),
|
|
'batch_size': 100
|
|
}])
|
|
|
|
context = {
|
|
'worker_id': 'worker_1',
|
|
'planned_start_time': datetime.utcnow() + timedelta(days=1),
|
|
'batch_size': 100,
|
|
'planned_quantity': 100
|
|
}
|
|
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_123',
|
|
production_history=history,
|
|
production_context=context,
|
|
min_history_runs=30
|
|
)
|
|
|
|
assert result['status'] == 'insufficient_data'
|
|
assert result['history_runs'] == 1
|
|
assert result['required_runs'] == 30
|
|
assert len(result['insights']) == 1
|
|
assert result['insights'][0]['type'] == 'warning'
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_baseline_statistics_stable_yield(self, yield_predictor, stable_yield_history):
|
|
"""Test baseline statistics calculation for stable yield."""
|
|
stats = yield_predictor._calculate_baseline_statistics(stable_yield_history)
|
|
|
|
assert 95 < stats['mean_yield'] < 99
|
|
assert stats['std_yield'] < 3 # Low variance
|
|
assert stats['cv_yield'] < 0.05 # Low coefficient of variation
|
|
assert stats['min_yield'] >= 90
|
|
assert stats['max_yield'] <= 100
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_baseline_statistics_variable_yield(self, yield_predictor, variable_yield_history):
|
|
"""Test baseline statistics for variable yield."""
|
|
stats = yield_predictor._calculate_baseline_statistics(variable_yield_history)
|
|
|
|
assert 85 < stats['mean_yield'] < 93
|
|
assert stats['std_yield'] > 3 # Higher variance
|
|
assert stats['cv_yield'] > 0.03
|
|
assert stats['runs_below_90'] > 0
|
|
|
|
|
|
class TestFeatureEngineering:
|
|
"""Test feature engineering."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_time_features(self, yield_predictor, stable_yield_history):
|
|
"""Test time-based feature extraction."""
|
|
feature_df = yield_predictor._engineer_features(stable_yield_history)
|
|
|
|
assert 'hour_of_day' in feature_df.columns
|
|
assert 'day_of_week' in feature_df.columns
|
|
assert 'is_weekend' in feature_df.columns
|
|
assert 'is_early_morning' in feature_df.columns
|
|
assert 'is_late_night' in feature_df.columns
|
|
|
|
assert feature_df['hour_of_day'].min() >= 0
|
|
assert feature_df['hour_of_day'].max() <= 23
|
|
assert feature_df['day_of_week'].min() >= 0
|
|
assert feature_df['day_of_week'].max() <= 6
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_batch_size_features(self, yield_predictor, stable_yield_history):
|
|
"""Test batch size feature engineering."""
|
|
feature_df = yield_predictor._engineer_features(stable_yield_history)
|
|
|
|
assert 'batch_size_normalized' in feature_df.columns
|
|
assert 'is_large_batch' in feature_df.columns
|
|
assert 'is_small_batch' in feature_df.columns
|
|
|
|
# Normalized batch size should be around 1.0 on average
|
|
assert 0.5 < feature_df['batch_size_normalized'].mean() < 1.5
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_worker_experience_features(self, yield_predictor, variable_yield_history):
|
|
"""Test worker experience feature engineering."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
|
|
assert 'worker_run_count' in feature_df.columns
|
|
assert 'worker_experience_level' in feature_df.columns
|
|
|
|
# Worker run count should increase for each worker
|
|
for worker in feature_df['worker_id'].unique():
|
|
worker_runs = feature_df[feature_df['worker_id'] == worker]['worker_run_count']
|
|
assert worker_runs.is_monotonic_increasing
|
|
|
|
|
|
class TestFactorAnalysis:
|
|
"""Test yield factor analysis."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_worker_impact_detection(self, yield_predictor, variable_yield_history):
|
|
"""Test detection of worker impact on yield."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
|
|
|
|
assert 'worker' in factor_analysis
|
|
# Should detect worker skill differences
|
|
if factor_analysis['worker'].get('significant'):
|
|
assert 'best_worker' in factor_analysis['worker']
|
|
assert 'worst_worker' in factor_analysis['worker']
|
|
assert factor_analysis['worker']['yield_range'] > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_batch_size_correlation(self, yield_predictor, variable_yield_history):
|
|
"""Test batch size correlation analysis."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
|
|
|
|
assert 'batch_size' in factor_analysis
|
|
if factor_analysis['batch_size'].get('significant'):
|
|
assert 'correlation' in factor_analysis['batch_size']
|
|
assert 'direction' in factor_analysis['batch_size']
|
|
assert factor_analysis['batch_size']['direction'] in ['positive', 'negative']
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_time_of_day_effect(self, yield_predictor, variable_yield_history):
|
|
"""Test time of day effect analysis."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
|
|
|
|
assert 'time_of_day' in factor_analysis
|
|
|
|
|
|
class TestYieldPrediction:
|
|
"""Test yield prediction."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_predict_stable_yield(self, yield_predictor, stable_yield_history, production_context_optimal):
|
|
"""Test prediction for stable yield recipe."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_123',
|
|
production_history=stable_yield_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
assert result['status'] != 'insufficient_data'
|
|
assert result['predicted_yield'] is not None
|
|
assert 90 < result['predicted_yield'] < 100
|
|
assert result['confidence'] > 0
|
|
assert 'prediction_range' in result
|
|
assert result['prediction_range']['lower'] < result['predicted_yield']
|
|
assert result['prediction_range']['upper'] > result['predicted_yield']
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_predict_variable_yield_optimal_context(
|
|
self, yield_predictor, variable_yield_history, production_context_optimal
|
|
):
|
|
"""Test prediction with optimal production context."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_456',
|
|
production_history=variable_yield_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
assert result['predicted_yield'] is not None
|
|
# Optimal context should predict higher yield
|
|
assert result['predicted_yield'] > result['baseline_yield'] - 5
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_predict_variable_yield_suboptimal_context(
|
|
self, yield_predictor, variable_yield_history, production_context_suboptimal
|
|
):
|
|
"""Test prediction with suboptimal production context."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_456',
|
|
production_history=variable_yield_history,
|
|
production_context=production_context_suboptimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
assert result['predicted_yield'] is not None
|
|
# Suboptimal context (novice worker, early morning, large batch)
|
|
# should predict lower yield
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_expected_waste_calculation(
|
|
self, yield_predictor, low_yield_history, production_context_optimal
|
|
):
|
|
"""Test expected waste calculation."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_789',
|
|
production_history=low_yield_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
assert 'expected_waste' in result
|
|
assert result['expected_waste'] > 0
|
|
# For low yield (82%), waste should be significant
|
|
expected_waste_pct = 100 - result['predicted_yield']
|
|
assert expected_waste_pct > 5
|
|
|
|
|
|
class TestPatternDetection:
|
|
"""Test yield pattern identification."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_low_yield_worker_pattern(self, yield_predictor, variable_yield_history):
|
|
"""Test detection of low-yield worker pattern."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
|
|
patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)
|
|
|
|
# Should detect novice worker pattern
|
|
low_worker_patterns = [p for p in patterns if p['pattern'] == 'low_yield_worker']
|
|
if factor_analysis.get('worker', {}).get('significant'):
|
|
assert len(low_worker_patterns) > 0
|
|
pattern = low_worker_patterns[0]
|
|
assert pattern['severity'] in ['high', 'medium', 'low']
|
|
assert 'recommendation' in pattern
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_time_of_day_pattern(self, yield_predictor, variable_yield_history):
|
|
"""Test detection of time-of-day pattern."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
|
|
patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)
|
|
|
|
# May detect early morning low yield pattern
|
|
time_patterns = [p for p in patterns if p['pattern'] == 'low_yield_time']
|
|
# Patterns are conditional on statistical significance
|
|
|
|
|
|
class TestInsightGeneration:
|
|
"""Test insight generation."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_low_yield_warning_insight(
|
|
self, yield_predictor, low_yield_history, production_context_optimal
|
|
):
|
|
"""Test generation of low yield warning insight."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_789',
|
|
production_history=low_yield_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
# Should generate low yield warning
|
|
warning_insights = [i for i in result['insights'] if i['type'] == 'warning']
|
|
assert len(warning_insights) > 0
|
|
|
|
warning = warning_insights[0]
|
|
assert warning['priority'] in ['high', 'medium']
|
|
assert warning['category'] == 'production'
|
|
assert 'impact_value' in warning
|
|
assert warning['actionable'] is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_excellent_yield_insight(
|
|
self, yield_predictor, stable_yield_history, production_context_optimal
|
|
):
|
|
"""Test generation of excellent yield insight."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_123',
|
|
production_history=stable_yield_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
# May generate positive insight for excellent yield
|
|
positive_insights = [i for i in result['insights'] if i['type'] == 'positive']
|
|
if result['predicted_yield'] > 98:
|
|
assert len(positive_insights) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_yield_variability_insight(
|
|
self, yield_predictor, variable_yield_history, production_context_optimal
|
|
):
|
|
"""Test generation of yield variability insight."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_456',
|
|
production_history=variable_yield_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
# Should detect high variability
|
|
if result['baseline_std'] / result['baseline_yield'] > 0.05:
|
|
variability_insights = [
|
|
i for i in result['insights']
|
|
if 'variability' in i['title'].lower() or 'variability' in i['description'].lower()
|
|
]
|
|
assert len(variability_insights) > 0
|
|
|
|
|
|
class TestConfidenceScoring:
|
|
"""Test confidence score calculation."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_high_confidence_large_sample(
|
|
self, yield_predictor, stable_yield_history, production_context_optimal
|
|
):
|
|
"""Test high confidence with large stable sample."""
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_123',
|
|
production_history=stable_yield_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
# Large sample + stable data should give high confidence
|
|
assert result['confidence'] > 60
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_lower_confidence_small_sample(self, yield_predictor, production_context_optimal):
|
|
"""Test lower confidence with small sample."""
|
|
# Create small history (exactly 30 runs)
|
|
small_history = pd.DataFrame([{
|
|
'production_run_id': f'run_{i}',
|
|
'recipe_id': 'recipe_123',
|
|
'planned_quantity': 100,
|
|
'actual_quantity': 95 + np.random.normal(0, 2),
|
|
'yield_percentage': 95 + np.random.normal(0, 2),
|
|
'worker_id': 'worker_1',
|
|
'started_at': datetime.utcnow() - timedelta(days=90-i),
|
|
'completed_at': datetime.utcnow() - timedelta(days=90-i, hours=-4),
|
|
'batch_size': 100
|
|
} for i in range(30)])
|
|
|
|
result = await yield_predictor.predict_yield(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_123',
|
|
production_history=small_history,
|
|
production_context=production_context_optimal,
|
|
min_history_runs=30
|
|
)
|
|
|
|
# Small sample should give moderate confidence
|
|
assert result['confidence'] < 85
|
|
|
|
|
|
class TestHistoricalAnalysis:
|
|
"""Test historical analysis (no prediction)."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_analyze_recipe_history(self, yield_predictor, variable_yield_history):
|
|
"""Test historical analysis without prediction."""
|
|
result = await yield_predictor.analyze_recipe_yield_history(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_456',
|
|
production_history=variable_yield_history,
|
|
min_history_runs=30
|
|
)
|
|
|
|
assert result['recipe_id'] == 'recipe_456'
|
|
assert 'baseline_stats' in result
|
|
assert 'factor_analysis' in result
|
|
assert 'patterns' in result
|
|
assert 'insights' in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_analyze_insufficient_history(self, yield_predictor):
|
|
"""Test analysis with insufficient history."""
|
|
small_history = pd.DataFrame([{
|
|
'production_run_id': 'run_1',
|
|
'recipe_id': 'recipe_123',
|
|
'planned_quantity': 100,
|
|
'actual_quantity': 95,
|
|
'yield_percentage': 95,
|
|
'worker_id': 'worker_1',
|
|
'started_at': datetime.utcnow() - timedelta(days=1),
|
|
'completed_at': datetime.utcnow() - timedelta(hours=20),
|
|
'batch_size': 100
|
|
}])
|
|
|
|
result = await yield_predictor.analyze_recipe_yield_history(
|
|
tenant_id='tenant_123',
|
|
recipe_id='recipe_123',
|
|
production_history=small_history,
|
|
min_history_runs=30
|
|
)
|
|
|
|
assert result['status'] == 'insufficient_data'
|
|
|
|
|
|
class TestModelPerformance:
|
|
"""Test ML model performance."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_model_training(self, yield_predictor, variable_yield_history):
|
|
"""Test model training and performance metrics."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
model_results = yield_predictor._train_yield_model(feature_df)
|
|
|
|
assert 'best_model' in model_results
|
|
assert 'best_model_name' in model_results
|
|
assert 'performance' in model_results
|
|
assert 'feature_importance' in model_results
|
|
|
|
performance = model_results['performance']
|
|
assert 'mae' in performance
|
|
assert 'rmse' in performance
|
|
assert 'r2' in performance
|
|
|
|
# MAE should be reasonable (< 15 percentage points)
|
|
assert performance['mae'] < 15
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_feature_importance(self, yield_predictor, variable_yield_history):
|
|
"""Test feature importance extraction."""
|
|
feature_df = yield_predictor._engineer_features(variable_yield_history)
|
|
model_results = yield_predictor._train_yield_model(feature_df)
|
|
|
|
feature_importance = model_results['feature_importance']
|
|
|
|
# Should have feature importances
|
|
if len(feature_importance) > 0:
|
|
# Worker encoding should be important (due to skill differences)
|
|
assert 'worker_encoded' in feature_importance or len(feature_importance) > 0
|