bakery-ia/services/production/tests/test_yield_predictor.py

"""
Tests for Production Yield Predictor
"""

import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from services.production.app.ml.yield_predictor import YieldPredictor


@pytest.fixture
def yield_predictor():
    """Create YieldPredictor instance."""
    return YieldPredictor()


@pytest.fixture
def stable_yield_history():
    """Generate production history with stable high yield."""
    np.random.seed(42)
    base_date = datetime.utcnow() - timedelta(days=180)

    history = []
    for i in range(50):
        run_date = base_date + timedelta(days=i * 3)

        history.append({
            'production_run_id': f'run_{i}',
            'recipe_id': 'recipe_123',
            'planned_quantity': 100,
            'actual_quantity': np.random.normal(97, 1.5),  # 97% avg, low variance
            'yield_percentage': np.random.normal(97, 1.5),
            'staff_assigned': [f'worker_{i % 3}'],  # 3 workers
            'started_at': run_date,
            'completed_at': run_date + timedelta(hours=4),
            'batch_size': np.random.randint(80, 120)
        })

    df = pd.DataFrame(history)
    df['yield_percentage'] = df['yield_percentage'].clip(90, 100)
    return df


@pytest.fixture
def variable_yield_history():
    """Generate production history with variable yield."""
    np.random.seed(42)
    base_date = datetime.utcnow() - timedelta(days=180)

    history = []
    workers = ['worker_expert', 'worker_intermediate', 'worker_novice']
    worker_skills = {'worker_expert': 96, 'worker_intermediate': 90, 'worker_novice': 82}

    for i in range(60):
        run_date = base_date + timedelta(days=i * 3)
        worker = workers[i % 3]
        base_yield = worker_skills[worker]

        # Time of day effect
        hour = (6 + i * 2) % 24
        time_penalty = 5 if hour < 6 or hour > 22 else 0

        # Batch size effect
        batch_size = np.random.randint(50, 150)
        batch_penalty = 3 if batch_size > 120 else 0

        final_yield = base_yield - time_penalty - batch_penalty + np.random.normal(0, 2)

        history.append({
            'production_run_id': f'run_{i}',
            'recipe_id': 'recipe_456',
            'planned_quantity': 100,
            'actual_quantity': final_yield,
            'yield_percentage': final_yield,
            'worker_id': worker,
            'started_at': run_date.replace(hour=hour),
            'completed_at': run_date.replace(hour=hour) + timedelta(hours=4),
            'batch_size': batch_size
        })

    df = pd.DataFrame(history)
    df['yield_percentage'] = df['yield_percentage'].clip(70, 100)
    return df


@pytest.fixture
def low_yield_history():
    """Generate production history with consistently low yield."""
    np.random.seed(42)
    base_date = datetime.utcnow() - timedelta(days=120)

    history = []
    for i in range(40):
        run_date = base_date + timedelta(days=i * 3)

        history.append({
            'production_run_id': f'run_{i}',
            'recipe_id': 'recipe_789',
            'planned_quantity': 100,
            'actual_quantity': np.random.normal(82, 5),  # 82% avg, high variance
            'yield_percentage': np.random.normal(82, 5),
            'worker_id': f'worker_{i % 2}',
            'started_at': run_date,
            'completed_at': run_date + timedelta(hours=4),
            'batch_size': np.random.randint(80, 120)
        })

    df = pd.DataFrame(history)
    df['yield_percentage'] = df['yield_percentage'].clip(60, 95)
    return df


@pytest.fixture
def production_context_optimal():
    """Production context for optimal conditions."""
    return {
        'worker_id': 'worker_expert',
        'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=10),
        'batch_size': 100,
        'planned_quantity': 100,
        'unit_cost': 5.0
    }


@pytest.fixture
def production_context_suboptimal():
    """Production context for suboptimal conditions."""
    return {
        'worker_id': 'worker_novice',
        'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=4),
        'batch_size': 140,
        'planned_quantity': 100,
        'unit_cost': 5.0
    }


class TestYieldPredictorBasics:
    """Test basic functionality."""

    @pytest.mark.asyncio
    async def test_insufficient_data(self, yield_predictor):
        """Test handling of insufficient production history."""
        # Create minimal history (< 30 runs)
        history = pd.DataFrame([{
            'production_run_id': 'run_1',
            'recipe_id': 'recipe_123',
            'planned_quantity': 100,
            'actual_quantity': 95,
            'yield_percentage': 95,
            'worker_id': 'worker_1',
            'started_at': datetime.utcnow() - timedelta(days=1),
            'completed_at': datetime.utcnow() - timedelta(hours=20),
            'batch_size': 100
        }])

        context = {
            'worker_id': 'worker_1',
            'planned_start_time': datetime.utcnow() + timedelta(days=1),
            'batch_size': 100,
            'planned_quantity': 100
        }

        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_123',
            production_history=history,
            production_context=context,
            min_history_runs=30
        )

        assert result['status'] == 'insufficient_data'
        assert result['history_runs'] == 1
        assert result['required_runs'] == 30
        assert len(result['insights']) == 1
        assert result['insights'][0]['type'] == 'warning'

    @pytest.mark.asyncio
    async def test_baseline_statistics_stable_yield(self, yield_predictor, stable_yield_history):
        """Test baseline statistics calculation for stable yield."""
        stats = yield_predictor._calculate_baseline_statistics(stable_yield_history)

        assert 95 < stats['mean_yield'] < 99
        assert stats['std_yield'] < 3  # Low variance
        assert stats['cv_yield'] < 0.05  # Low coefficient of variation
        assert stats['min_yield'] >= 90
        assert stats['max_yield'] <= 100

    @pytest.mark.asyncio
    async def test_baseline_statistics_variable_yield(self, yield_predictor, variable_yield_history):
        """Test baseline statistics for variable yield."""
        stats = yield_predictor._calculate_baseline_statistics(variable_yield_history)

        assert 85 < stats['mean_yield'] < 93
        assert stats['std_yield'] > 3  # Higher variance
        assert stats['cv_yield'] > 0.03
        assert stats['runs_below_90'] > 0


class TestFeatureEngineering:
    """Test feature engineering."""

    @pytest.mark.asyncio
    async def test_time_features(self, yield_predictor, stable_yield_history):
        """Test time-based feature extraction."""
        feature_df = yield_predictor._engineer_features(stable_yield_history)

        assert 'hour_of_day' in feature_df.columns
        assert 'day_of_week' in feature_df.columns
        assert 'is_weekend' in feature_df.columns
        assert 'is_early_morning' in feature_df.columns
        assert 'is_late_night' in feature_df.columns

        assert feature_df['hour_of_day'].min() >= 0
        assert feature_df['hour_of_day'].max() <= 23
        assert feature_df['day_of_week'].min() >= 0
        assert feature_df['day_of_week'].max() <= 6

    @pytest.mark.asyncio
    async def test_batch_size_features(self, yield_predictor, stable_yield_history):
        """Test batch size feature engineering."""
        feature_df = yield_predictor._engineer_features(stable_yield_history)

        assert 'batch_size_normalized' in feature_df.columns
        assert 'is_large_batch' in feature_df.columns
        assert 'is_small_batch' in feature_df.columns

        # Normalized batch size should be around 1.0 on average
        assert 0.5 < feature_df['batch_size_normalized'].mean() < 1.5

    @pytest.mark.asyncio
    async def test_worker_experience_features(self, yield_predictor, variable_yield_history):
        """Test worker experience feature engineering."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)

        assert 'worker_run_count' in feature_df.columns
        assert 'worker_experience_level' in feature_df.columns

        # Worker run count should increase for each worker
        for worker in feature_df['worker_id'].unique():
            worker_runs = feature_df[feature_df['worker_id'] == worker]['worker_run_count']
            assert worker_runs.is_monotonic_increasing


class TestFactorAnalysis:
    """Test yield factor analysis."""

    @pytest.mark.asyncio
    async def test_worker_impact_detection(self, yield_predictor, variable_yield_history):
        """Test detection of worker impact on yield."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)
        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)

        assert 'worker' in factor_analysis
        # Should detect worker skill differences
        if factor_analysis['worker'].get('significant'):
            assert 'best_worker' in factor_analysis['worker']
            assert 'worst_worker' in factor_analysis['worker']
            assert factor_analysis['worker']['yield_range'] > 0

    @pytest.mark.asyncio
    async def test_batch_size_correlation(self, yield_predictor, variable_yield_history):
        """Test batch size correlation analysis."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)
        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)

        assert 'batch_size' in factor_analysis
        if factor_analysis['batch_size'].get('significant'):
            assert 'correlation' in factor_analysis['batch_size']
            assert 'direction' in factor_analysis['batch_size']
            assert factor_analysis['batch_size']['direction'] in ['positive', 'negative']

    @pytest.mark.asyncio
    async def test_time_of_day_effect(self, yield_predictor, variable_yield_history):
        """Test time of day effect analysis."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)
        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)

        assert 'time_of_day' in factor_analysis


class TestYieldPrediction:
    """Test yield prediction."""

    @pytest.mark.asyncio
    async def test_predict_stable_yield(self, yield_predictor, stable_yield_history, production_context_optimal):
        """Test prediction for stable yield recipe."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_123',
            production_history=stable_yield_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        assert result['status'] != 'insufficient_data'
        assert result['predicted_yield'] is not None
        assert 90 < result['predicted_yield'] < 100
        assert result['confidence'] > 0
        assert 'prediction_range' in result
        assert result['prediction_range']['lower'] < result['predicted_yield']
        assert result['prediction_range']['upper'] > result['predicted_yield']

    @pytest.mark.asyncio
    async def test_predict_variable_yield_optimal_context(
        self, yield_predictor, variable_yield_history, production_context_optimal
    ):
        """Test prediction with optimal production context."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_456',
            production_history=variable_yield_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        assert result['predicted_yield'] is not None
        # Optimal context should predict higher yield
        assert result['predicted_yield'] > result['baseline_yield'] - 5

    @pytest.mark.asyncio
    async def test_predict_variable_yield_suboptimal_context(
        self, yield_predictor, variable_yield_history, production_context_suboptimal
    ):
        """Test prediction with suboptimal production context."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_456',
            production_history=variable_yield_history,
            production_context=production_context_suboptimal,
            min_history_runs=30
        )

        assert result['predicted_yield'] is not None
        # Suboptimal context (novice worker, early morning, large batch)
        # should predict lower yield

    @pytest.mark.asyncio
    async def test_expected_waste_calculation(
        self, yield_predictor, low_yield_history, production_context_optimal
    ):
        """Test expected waste calculation."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_789',
            production_history=low_yield_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        assert 'expected_waste' in result
        assert result['expected_waste'] > 0
        # For low yield (82%), waste should be significant
        expected_waste_pct = 100 - result['predicted_yield']
        assert expected_waste_pct > 5


class TestPatternDetection:
    """Test yield pattern identification."""

    @pytest.mark.asyncio
    async def test_low_yield_worker_pattern(self, yield_predictor, variable_yield_history):
        """Test detection of low-yield worker pattern."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)
        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
        patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)

        # Should detect novice worker pattern
        low_worker_patterns = [p for p in patterns if p['pattern'] == 'low_yield_worker']
        if factor_analysis.get('worker', {}).get('significant'):
            assert len(low_worker_patterns) > 0
            pattern = low_worker_patterns[0]
            assert pattern['severity'] in ['high', 'medium', 'low']
            assert 'recommendation' in pattern

    @pytest.mark.asyncio
    async def test_time_of_day_pattern(self, yield_predictor, variable_yield_history):
        """Test detection of time-of-day pattern."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)
        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
        patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)

        # May detect early morning low yield pattern
        time_patterns = [p for p in patterns if p['pattern'] == 'low_yield_time']
        # Patterns are conditional on statistical significance


class TestInsightGeneration:
    """Test insight generation."""

    @pytest.mark.asyncio
    async def test_low_yield_warning_insight(
        self, yield_predictor, low_yield_history, production_context_optimal
    ):
        """Test generation of low yield warning insight."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_789',
            production_history=low_yield_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        # Should generate low yield warning
        warning_insights = [i for i in result['insights'] if i['type'] == 'warning']
        assert len(warning_insights) > 0

        warning = warning_insights[0]
        assert warning['priority'] in ['high', 'medium']
        assert warning['category'] == 'production'
        assert 'impact_value' in warning
        assert warning['actionable'] is True

    @pytest.mark.asyncio
    async def test_excellent_yield_insight(
        self, yield_predictor, stable_yield_history, production_context_optimal
    ):
        """Test generation of excellent yield insight."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_123',
            production_history=stable_yield_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        # May generate positive insight for excellent yield
        positive_insights = [i for i in result['insights'] if i['type'] == 'positive']
        if result['predicted_yield'] > 98:
            assert len(positive_insights) > 0

    @pytest.mark.asyncio
    async def test_yield_variability_insight(
        self, yield_predictor, variable_yield_history, production_context_optimal
    ):
        """Test generation of yield variability insight."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_456',
            production_history=variable_yield_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        # Should detect high variability
        if result['baseline_std'] / result['baseline_yield'] > 0.05:
            variability_insights = [
                i for i in result['insights']
                if 'variability' in i['title'].lower() or 'variability' in i['description'].lower()
            ]
            assert len(variability_insights) > 0


class TestConfidenceScoring:
    """Test confidence score calculation."""

    @pytest.mark.asyncio
    async def test_high_confidence_large_sample(
        self, yield_predictor, stable_yield_history, production_context_optimal
    ):
        """Test high confidence with large stable sample."""
        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_123',
            production_history=stable_yield_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        # Large sample + stable data should give high confidence
        assert result['confidence'] > 60

    @pytest.mark.asyncio
    async def test_lower_confidence_small_sample(self, yield_predictor, production_context_optimal):
        """Test lower confidence with small sample."""
        # Create small history (exactly 30 runs)
        small_history = pd.DataFrame([{
            'production_run_id': f'run_{i}',
            'recipe_id': 'recipe_123',
            'planned_quantity': 100,
            'actual_quantity': 95 + np.random.normal(0, 2),
            'yield_percentage': 95 + np.random.normal(0, 2),
            'worker_id': 'worker_1',
            'started_at': datetime.utcnow() - timedelta(days=90-i),
            'completed_at': datetime.utcnow() - timedelta(days=90-i, hours=-4),
            'batch_size': 100
        } for i in range(30)])

        result = await yield_predictor.predict_yield(
            tenant_id='tenant_123',
            recipe_id='recipe_123',
            production_history=small_history,
            production_context=production_context_optimal,
            min_history_runs=30
        )

        # Small sample should give moderate confidence
        assert result['confidence'] < 85


class TestHistoricalAnalysis:
    """Test historical analysis (no prediction)."""

    @pytest.mark.asyncio
    async def test_analyze_recipe_history(self, yield_predictor, variable_yield_history):
        """Test historical analysis without prediction."""
        result = await yield_predictor.analyze_recipe_yield_history(
            tenant_id='tenant_123',
            recipe_id='recipe_456',
            production_history=variable_yield_history,
            min_history_runs=30
        )

        assert result['recipe_id'] == 'recipe_456'
        assert 'baseline_stats' in result
        assert 'factor_analysis' in result
        assert 'patterns' in result
        assert 'insights' in result

    @pytest.mark.asyncio
    async def test_analyze_insufficient_history(self, yield_predictor):
        """Test analysis with insufficient history."""
        small_history = pd.DataFrame([{
            'production_run_id': 'run_1',
            'recipe_id': 'recipe_123',
            'planned_quantity': 100,
            'actual_quantity': 95,
            'yield_percentage': 95,
            'worker_id': 'worker_1',
            'started_at': datetime.utcnow() - timedelta(days=1),
            'completed_at': datetime.utcnow() - timedelta(hours=20),
            'batch_size': 100
        }])

        result = await yield_predictor.analyze_recipe_yield_history(
            tenant_id='tenant_123',
            recipe_id='recipe_123',
            production_history=small_history,
            min_history_runs=30
        )

        assert result['status'] == 'insufficient_data'


class TestModelPerformance:
    """Test ML model performance."""

    @pytest.mark.asyncio
    async def test_model_training(self, yield_predictor, variable_yield_history):
        """Test model training and performance metrics."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)
        model_results = yield_predictor._train_yield_model(feature_df)

        assert 'best_model' in model_results
        assert 'best_model_name' in model_results
        assert 'performance' in model_results
        assert 'feature_importance' in model_results

        performance = model_results['performance']
        assert 'mae' in performance
        assert 'rmse' in performance
        assert 'r2' in performance

        # MAE should be reasonable (< 15 percentage points)
        assert performance['mae'] < 15

    @pytest.mark.asyncio
    async def test_feature_importance(self, yield_predictor, variable_yield_history):
        """Test feature importance extraction."""
        feature_df = yield_predictor._engineer_features(variable_yield_history)
        model_results = yield_predictor._train_yield_model(feature_df)

        feature_importance = model_results['feature_importance']

        # Should have feature importances
        if len(feature_importance) > 0:
            # Worker encoding should be important (due to skill differences)
            assert 'worker_encoded' in feature_importance or len(feature_importance) > 0
Improve AI logic 2025-11-05 13:34:56 +01:00			`"""`
			`Tests for Production Yield Predictor`
			`"""`

			`import pytest`
			`import pandas as pd`
			`import numpy as np`
			`from datetime import datetime, timedelta`
			`from services.production.app.ml.yield_predictor import YieldPredictor`


			`@pytest.fixture`
			`def yield_predictor():`
			`"""Create YieldPredictor instance."""`
			`return YieldPredictor()`


			`@pytest.fixture`
			`def stable_yield_history():`
			`"""Generate production history with stable high yield."""`
			`np.random.seed(42)`
			`base_date = datetime.utcnow() - timedelta(days=180)`

			`history = []`
			`for i in range(50):`
			`run_date = base_date + timedelta(days=i * 3)`

			`history.append({`
			`'production_run_id': f'run_{i}',`
			`'recipe_id': 'recipe_123',`
			`'planned_quantity': 100,`
			`'actual_quantity': np.random.normal(97, 1.5), # 97% avg, low variance`
			`'yield_percentage': np.random.normal(97, 1.5),`
Add AI insights feature 2025-12-15 21:14:22 +01:00			`'staff_assigned': [f'worker_{i % 3}'], # 3 workers`
Improve AI logic 2025-11-05 13:34:56 +01:00			`'started_at': run_date,`
			`'completed_at': run_date + timedelta(hours=4),`
			`'batch_size': np.random.randint(80, 120)`
			`})`

			`df = pd.DataFrame(history)`
			`df['yield_percentage'] = df['yield_percentage'].clip(90, 100)`
			`return df`


			`@pytest.fixture`
			`def variable_yield_history():`
			`"""Generate production history with variable yield."""`
			`np.random.seed(42)`
			`base_date = datetime.utcnow() - timedelta(days=180)`

			`history = []`
			`workers = ['worker_expert', 'worker_intermediate', 'worker_novice']`
			`worker_skills = {'worker_expert': 96, 'worker_intermediate': 90, 'worker_novice': 82}`

			`for i in range(60):`
			`run_date = base_date + timedelta(days=i * 3)`
			`worker = workers[i % 3]`
			`base_yield = worker_skills[worker]`

			`# Time of day effect`
			`hour = (6 + i * 2) % 24`
			`time_penalty = 5 if hour < 6 or hour > 22 else 0`

			`# Batch size effect`
			`batch_size = np.random.randint(50, 150)`
			`batch_penalty = 3 if batch_size > 120 else 0`

			`final_yield = base_yield - time_penalty - batch_penalty + np.random.normal(0, 2)`

			`history.append({`
			`'production_run_id': f'run_{i}',`
			`'recipe_id': 'recipe_456',`
			`'planned_quantity': 100,`
			`'actual_quantity': final_yield,`
			`'yield_percentage': final_yield,`
			`'worker_id': worker,`
			`'started_at': run_date.replace(hour=hour),`
			`'completed_at': run_date.replace(hour=hour) + timedelta(hours=4),`
			`'batch_size': batch_size`
			`})`

			`df = pd.DataFrame(history)`
			`df['yield_percentage'] = df['yield_percentage'].clip(70, 100)`
			`return df`


			`@pytest.fixture`
			`def low_yield_history():`
			`"""Generate production history with consistently low yield."""`
			`np.random.seed(42)`
			`base_date = datetime.utcnow() - timedelta(days=120)`

			`history = []`
			`for i in range(40):`
			`run_date = base_date + timedelta(days=i * 3)`

			`history.append({`
			`'production_run_id': f'run_{i}',`
			`'recipe_id': 'recipe_789',`
			`'planned_quantity': 100,`
			`'actual_quantity': np.random.normal(82, 5), # 82% avg, high variance`
			`'yield_percentage': np.random.normal(82, 5),`
			`'worker_id': f'worker_{i % 2}',`
			`'started_at': run_date,`
			`'completed_at': run_date + timedelta(hours=4),`
			`'batch_size': np.random.randint(80, 120)`
			`})`

			`df = pd.DataFrame(history)`
			`df['yield_percentage'] = df['yield_percentage'].clip(60, 95)`
			`return df`


			`@pytest.fixture`
			`def production_context_optimal():`
			`"""Production context for optimal conditions."""`
			`return {`
			`'worker_id': 'worker_expert',`
			`'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=10),`
			`'batch_size': 100,`
			`'planned_quantity': 100,`
			`'unit_cost': 5.0`
			`}`


			`@pytest.fixture`
			`def production_context_suboptimal():`
			`"""Production context for suboptimal conditions."""`
			`return {`
			`'worker_id': 'worker_novice',`
			`'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=4),`
			`'batch_size': 140,`
			`'planned_quantity': 100,`
			`'unit_cost': 5.0`
			`}`


			`class TestYieldPredictorBasics:`
			`"""Test basic functionality."""`

			`@pytest.mark.asyncio`
			`async def test_insufficient_data(self, yield_predictor):`
			`"""Test handling of insufficient production history."""`
			`# Create minimal history (< 30 runs)`
			`history = pd.DataFrame([{`
			`'production_run_id': 'run_1',`
			`'recipe_id': 'recipe_123',`
			`'planned_quantity': 100,`
			`'actual_quantity': 95,`
			`'yield_percentage': 95,`
			`'worker_id': 'worker_1',`
			`'started_at': datetime.utcnow() - timedelta(days=1),`
			`'completed_at': datetime.utcnow() - timedelta(hours=20),`
			`'batch_size': 100`
			`}])`

			`context = {`
			`'worker_id': 'worker_1',`
			`'planned_start_time': datetime.utcnow() + timedelta(days=1),`
			`'batch_size': 100,`
			`'planned_quantity': 100`
			`}`

			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_123',`
			`production_history=history,`
			`production_context=context,`
			`min_history_runs=30`
			`)`

			`assert result['status'] == 'insufficient_data'`
			`assert result['history_runs'] == 1`
			`assert result['required_runs'] == 30`
			`assert len(result['insights']) == 1`
			`assert result['insights'][0]['type'] == 'warning'`

			`@pytest.mark.asyncio`
			`async def test_baseline_statistics_stable_yield(self, yield_predictor, stable_yield_history):`
			`"""Test baseline statistics calculation for stable yield."""`
			`stats = yield_predictor._calculate_baseline_statistics(stable_yield_history)`

			`assert 95 < stats['mean_yield'] < 99`
			`assert stats['std_yield'] < 3 # Low variance`
			`assert stats['cv_yield'] < 0.05 # Low coefficient of variation`
			`assert stats['min_yield'] >= 90`
			`assert stats['max_yield'] <= 100`

			`@pytest.mark.asyncio`
			`async def test_baseline_statistics_variable_yield(self, yield_predictor, variable_yield_history):`
			`"""Test baseline statistics for variable yield."""`
			`stats = yield_predictor._calculate_baseline_statistics(variable_yield_history)`

			`assert 85 < stats['mean_yield'] < 93`
			`assert stats['std_yield'] > 3 # Higher variance`
			`assert stats['cv_yield'] > 0.03`
			`assert stats['runs_below_90'] > 0`


			`class TestFeatureEngineering:`
			`"""Test feature engineering."""`

			`@pytest.mark.asyncio`
			`async def test_time_features(self, yield_predictor, stable_yield_history):`
			`"""Test time-based feature extraction."""`
			`feature_df = yield_predictor._engineer_features(stable_yield_history)`

			`assert 'hour_of_day' in feature_df.columns`
			`assert 'day_of_week' in feature_df.columns`
			`assert 'is_weekend' in feature_df.columns`
			`assert 'is_early_morning' in feature_df.columns`
			`assert 'is_late_night' in feature_df.columns`

			`assert feature_df['hour_of_day'].min() >= 0`
			`assert feature_df['hour_of_day'].max() <= 23`
			`assert feature_df['day_of_week'].min() >= 0`
			`assert feature_df['day_of_week'].max() <= 6`

			`@pytest.mark.asyncio`
			`async def test_batch_size_features(self, yield_predictor, stable_yield_history):`
			`"""Test batch size feature engineering."""`
			`feature_df = yield_predictor._engineer_features(stable_yield_history)`

			`assert 'batch_size_normalized' in feature_df.columns`
			`assert 'is_large_batch' in feature_df.columns`
			`assert 'is_small_batch' in feature_df.columns`

			`# Normalized batch size should be around 1.0 on average`
			`assert 0.5 < feature_df['batch_size_normalized'].mean() < 1.5`

			`@pytest.mark.asyncio`
			`async def test_worker_experience_features(self, yield_predictor, variable_yield_history):`
			`"""Test worker experience feature engineering."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`

			`assert 'worker_run_count' in feature_df.columns`
			`assert 'worker_experience_level' in feature_df.columns`

			`# Worker run count should increase for each worker`
			`for worker in feature_df['worker_id'].unique():`
			`worker_runs = feature_df[feature_df['worker_id'] == worker]['worker_run_count']`
			`assert worker_runs.is_monotonic_increasing`


			`class TestFactorAnalysis:`
			`"""Test yield factor analysis."""`

			`@pytest.mark.asyncio`
			`async def test_worker_impact_detection(self, yield_predictor, variable_yield_history):`
			`"""Test detection of worker impact on yield."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`
			`factor_analysis = yield_predictor._analyze_yield_factors(feature_df)`

			`assert 'worker' in factor_analysis`
			`# Should detect worker skill differences`
			`if factor_analysis['worker'].get('significant'):`
			`assert 'best_worker' in factor_analysis['worker']`
			`assert 'worst_worker' in factor_analysis['worker']`
			`assert factor_analysis['worker']['yield_range'] > 0`

			`@pytest.mark.asyncio`
			`async def test_batch_size_correlation(self, yield_predictor, variable_yield_history):`
			`"""Test batch size correlation analysis."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`
			`factor_analysis = yield_predictor._analyze_yield_factors(feature_df)`

			`assert 'batch_size' in factor_analysis`
			`if factor_analysis['batch_size'].get('significant'):`
			`assert 'correlation' in factor_analysis['batch_size']`
			`assert 'direction' in factor_analysis['batch_size']`
			`assert factor_analysis['batch_size']['direction'] in ['positive', 'negative']`

			`@pytest.mark.asyncio`
			`async def test_time_of_day_effect(self, yield_predictor, variable_yield_history):`
			`"""Test time of day effect analysis."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`
			`factor_analysis = yield_predictor._analyze_yield_factors(feature_df)`

			`assert 'time_of_day' in factor_analysis`


			`class TestYieldPrediction:`
			`"""Test yield prediction."""`

			`@pytest.mark.asyncio`
			`async def test_predict_stable_yield(self, yield_predictor, stable_yield_history, production_context_optimal):`
			`"""Test prediction for stable yield recipe."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_123',`
			`production_history=stable_yield_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`assert result['status'] != 'insufficient_data'`
			`assert result['predicted_yield'] is not None`
			`assert 90 < result['predicted_yield'] < 100`
			`assert result['confidence'] > 0`
			`assert 'prediction_range' in result`
			`assert result['prediction_range']['lower'] < result['predicted_yield']`
			`assert result['prediction_range']['upper'] > result['predicted_yield']`

			`@pytest.mark.asyncio`
			`async def test_predict_variable_yield_optimal_context(`
			`self, yield_predictor, variable_yield_history, production_context_optimal`
			`):`
			`"""Test prediction with optimal production context."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_456',`
			`production_history=variable_yield_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`assert result['predicted_yield'] is not None`
			`# Optimal context should predict higher yield`
			`assert result['predicted_yield'] > result['baseline_yield'] - 5`

			`@pytest.mark.asyncio`
			`async def test_predict_variable_yield_suboptimal_context(`
			`self, yield_predictor, variable_yield_history, production_context_suboptimal`
			`):`
			`"""Test prediction with suboptimal production context."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_456',`
			`production_history=variable_yield_history,`
			`production_context=production_context_suboptimal,`
			`min_history_runs=30`
			`)`

			`assert result['predicted_yield'] is not None`
			`# Suboptimal context (novice worker, early morning, large batch)`
			`# should predict lower yield`

			`@pytest.mark.asyncio`
			`async def test_expected_waste_calculation(`
			`self, yield_predictor, low_yield_history, production_context_optimal`
			`):`
			`"""Test expected waste calculation."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_789',`
			`production_history=low_yield_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`assert 'expected_waste' in result`
			`assert result['expected_waste'] > 0`
			`# For low yield (82%), waste should be significant`
			`expected_waste_pct = 100 - result['predicted_yield']`
			`assert expected_waste_pct > 5`


			`class TestPatternDetection:`
			`"""Test yield pattern identification."""`

			`@pytest.mark.asyncio`
			`async def test_low_yield_worker_pattern(self, yield_predictor, variable_yield_history):`
			`"""Test detection of low-yield worker pattern."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`
			`factor_analysis = yield_predictor._analyze_yield_factors(feature_df)`
			`patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)`

			`# Should detect novice worker pattern`
			`low_worker_patterns = [p for p in patterns if p['pattern'] == 'low_yield_worker']`
			`if factor_analysis.get('worker', {}).get('significant'):`
			`assert len(low_worker_patterns) > 0`
			`pattern = low_worker_patterns[0]`
			`assert pattern['severity'] in ['high', 'medium', 'low']`
			`assert 'recommendation' in pattern`

			`@pytest.mark.asyncio`
			`async def test_time_of_day_pattern(self, yield_predictor, variable_yield_history):`
			`"""Test detection of time-of-day pattern."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`
			`factor_analysis = yield_predictor._analyze_yield_factors(feature_df)`
			`patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)`

			`# May detect early morning low yield pattern`
			`time_patterns = [p for p in patterns if p['pattern'] == 'low_yield_time']`
			`# Patterns are conditional on statistical significance`


			`class TestInsightGeneration:`
			`"""Test insight generation."""`

			`@pytest.mark.asyncio`
			`async def test_low_yield_warning_insight(`
			`self, yield_predictor, low_yield_history, production_context_optimal`
			`):`
			`"""Test generation of low yield warning insight."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_789',`
			`production_history=low_yield_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`# Should generate low yield warning`
			`warning_insights = [i for i in result['insights'] if i['type'] == 'warning']`
			`assert len(warning_insights) > 0`

			`warning = warning_insights[0]`
			`assert warning['priority'] in ['high', 'medium']`
			`assert warning['category'] == 'production'`
			`assert 'impact_value' in warning`
			`assert warning['actionable'] is True`

			`@pytest.mark.asyncio`
			`async def test_excellent_yield_insight(`
			`self, yield_predictor, stable_yield_history, production_context_optimal`
			`):`
			`"""Test generation of excellent yield insight."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_123',`
			`production_history=stable_yield_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`# May generate positive insight for excellent yield`
			`positive_insights = [i for i in result['insights'] if i['type'] == 'positive']`
			`if result['predicted_yield'] > 98:`
			`assert len(positive_insights) > 0`

			`@pytest.mark.asyncio`
			`async def test_yield_variability_insight(`
			`self, yield_predictor, variable_yield_history, production_context_optimal`
			`):`
			`"""Test generation of yield variability insight."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_456',`
			`production_history=variable_yield_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`# Should detect high variability`
			`if result['baseline_std'] / result['baseline_yield'] > 0.05:`
			`variability_insights = [`
			`i for i in result['insights']`
			`if 'variability' in i['title'].lower() or 'variability' in i['description'].lower()`
			`]`
			`assert len(variability_insights) > 0`


			`class TestConfidenceScoring:`
			`"""Test confidence score calculation."""`

			`@pytest.mark.asyncio`
			`async def test_high_confidence_large_sample(`
			`self, yield_predictor, stable_yield_history, production_context_optimal`
			`):`
			`"""Test high confidence with large stable sample."""`
			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_123',`
			`production_history=stable_yield_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`# Large sample + stable data should give high confidence`
			`assert result['confidence'] > 60`

			`@pytest.mark.asyncio`
			`async def test_lower_confidence_small_sample(self, yield_predictor, production_context_optimal):`
			`"""Test lower confidence with small sample."""`
			`# Create small history (exactly 30 runs)`
			`small_history = pd.DataFrame([{`
			`'production_run_id': f'run_{i}',`
			`'recipe_id': 'recipe_123',`
			`'planned_quantity': 100,`
			`'actual_quantity': 95 + np.random.normal(0, 2),`
			`'yield_percentage': 95 + np.random.normal(0, 2),`
			`'worker_id': 'worker_1',`
			`'started_at': datetime.utcnow() - timedelta(days=90-i),`
			`'completed_at': datetime.utcnow() - timedelta(days=90-i, hours=-4),`
			`'batch_size': 100`
			`} for i in range(30)])`

			`result = await yield_predictor.predict_yield(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_123',`
			`production_history=small_history,`
			`production_context=production_context_optimal,`
			`min_history_runs=30`
			`)`

			`# Small sample should give moderate confidence`
			`assert result['confidence'] < 85`


			`class TestHistoricalAnalysis:`
			`"""Test historical analysis (no prediction)."""`

			`@pytest.mark.asyncio`
			`async def test_analyze_recipe_history(self, yield_predictor, variable_yield_history):`
			`"""Test historical analysis without prediction."""`
			`result = await yield_predictor.analyze_recipe_yield_history(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_456',`
			`production_history=variable_yield_history,`
			`min_history_runs=30`
			`)`

			`assert result['recipe_id'] == 'recipe_456'`
			`assert 'baseline_stats' in result`
			`assert 'factor_analysis' in result`
			`assert 'patterns' in result`
			`assert 'insights' in result`

			`@pytest.mark.asyncio`
			`async def test_analyze_insufficient_history(self, yield_predictor):`
			`"""Test analysis with insufficient history."""`
			`small_history = pd.DataFrame([{`
			`'production_run_id': 'run_1',`
			`'recipe_id': 'recipe_123',`
			`'planned_quantity': 100,`
			`'actual_quantity': 95,`
			`'yield_percentage': 95,`
			`'worker_id': 'worker_1',`
			`'started_at': datetime.utcnow() - timedelta(days=1),`
			`'completed_at': datetime.utcnow() - timedelta(hours=20),`
			`'batch_size': 100`
			`}])`

			`result = await yield_predictor.analyze_recipe_yield_history(`
			`tenant_id='tenant_123',`
			`recipe_id='recipe_123',`
			`production_history=small_history,`
			`min_history_runs=30`
			`)`

			`assert result['status'] == 'insufficient_data'`


			`class TestModelPerformance:`
			`"""Test ML model performance."""`

			`@pytest.mark.asyncio`
			`async def test_model_training(self, yield_predictor, variable_yield_history):`
			`"""Test model training and performance metrics."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`
			`model_results = yield_predictor._train_yield_model(feature_df)`

			`assert 'best_model' in model_results`
			`assert 'best_model_name' in model_results`
			`assert 'performance' in model_results`
			`assert 'feature_importance' in model_results`

			`performance = model_results['performance']`
			`assert 'mae' in performance`
			`assert 'rmse' in performance`
			`assert 'r2' in performance`

			`# MAE should be reasonable (< 15 percentage points)`
			`assert performance['mae'] < 15`

			`@pytest.mark.asyncio`
			`async def test_feature_importance(self, yield_predictor, variable_yield_history):`
			`"""Test feature importance extraction."""`
			`feature_df = yield_predictor._engineer_features(variable_yield_history)`
			`model_results = yield_predictor._train_yield_model(feature_df)`

			`feature_importance = model_results['feature_importance']`

			`# Should have feature importances`
			`if len(feature_importance) > 0:`
			`# Worker encoding should be important (due to skill differences)`
			`assert 'worker_encoded' in feature_importance or len(feature_importance) > 0`