bakery-ia/services/ai_insights/tests/test_feedback_learning_system.py

"""
Tests for Feedback Loop & Learning System
"""

import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from services.ai_insights.app.ml.feedback_learning_system import FeedbackLearningSystem


@pytest.fixture
def learning_system():
    """Create FeedbackLearningSystem instance."""
    return FeedbackLearningSystem(
        performance_threshold=0.85,
        degradation_threshold=0.10,
        min_feedback_samples=30
    )


@pytest.fixture
def good_feedback_data():
    """Generate feedback data for well-performing model."""
    np.random.seed(42)
    dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')

    feedback = []
    for i, date in enumerate(dates):
        predicted = 100 + np.random.normal(0, 10)
        actual = predicted + np.random.normal(0, 5)  # Small error

        error = predicted - actual
        error_pct = abs(error / actual * 100) if actual != 0 else 0
        accuracy = max(0, 100 - error_pct)

        feedback.append({
            'insight_id': f'insight_{i}',
            'applied_at': date - timedelta(days=1),
            'outcome_date': date,
            'predicted_value': predicted,
            'actual_value': actual,
            'error': error,
            'error_pct': error_pct,
            'accuracy': accuracy,
            'confidence': 85
        })

    return pd.DataFrame(feedback)


@pytest.fixture
def degraded_feedback_data():
    """Generate feedback data for degrading model."""
    np.random.seed(42)
    dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')

    feedback = []
    for i, date in enumerate(dates):
        # Introduce increasing error over time
        error_multiplier = 1 + (i / 50) * 2  # Errors double by end

        predicted = 100 + np.random.normal(0, 10)
        actual = predicted + np.random.normal(0, 10 * error_multiplier)

        error = predicted - actual
        error_pct = abs(error / actual * 100) if actual != 0 else 0
        accuracy = max(0, 100 - error_pct)

        feedback.append({
            'insight_id': f'insight_{i}',
            'applied_at': date - timedelta(days=1),
            'outcome_date': date,
            'predicted_value': predicted,
            'actual_value': actual,
            'error': error,
            'error_pct': error_pct,
            'accuracy': accuracy,
            'confidence': 85
        })

    return pd.DataFrame(feedback)


@pytest.fixture
def biased_feedback_data():
    """Generate feedback data with systematic bias."""
    np.random.seed(42)
    dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')

    feedback = []
    for i, date in enumerate(dates):
        predicted = 100 + np.random.normal(0, 10)
        # Systematic over-prediction by 15%
        actual = predicted * 0.85 + np.random.normal(0, 3)

        error = predicted - actual
        error_pct = abs(error / actual * 100) if actual != 0 else 0
        accuracy = max(0, 100 - error_pct)

        feedback.append({
            'insight_id': f'insight_{i}',
            'applied_at': date - timedelta(days=1),
            'outcome_date': date,
            'predicted_value': predicted,
            'actual_value': actual,
            'error': error,
            'error_pct': error_pct,
            'accuracy': accuracy,
            'confidence': 80
        })

    return pd.DataFrame(feedback)


@pytest.fixture
def poorly_calibrated_feedback_data():
    """Generate feedback with poor confidence calibration."""
    np.random.seed(42)
    dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')

    feedback = []
    for i, date in enumerate(dates):
        predicted = 100 + np.random.normal(0, 10)

        # High confidence but low accuracy
        if i < 25:
            confidence = 90
            actual = predicted + np.random.normal(0, 20)  # Large error
        else:
            confidence = 60
            actual = predicted + np.random.normal(0, 5)   # Small error

        error = predicted - actual
        error_pct = abs(error / actual * 100) if actual != 0 else 0
        accuracy = max(0, 100 - error_pct)

        feedback.append({
            'insight_id': f'insight_{i}',
            'applied_at': date - timedelta(days=1),
            'outcome_date': date,
            'predicted_value': predicted,
            'actual_value': actual,
            'error': error,
            'error_pct': error_pct,
            'accuracy': accuracy,
            'confidence': confidence
        })

    return pd.DataFrame(feedback)


class TestPerformanceMetrics:
    """Test performance metric calculation."""

    @pytest.mark.asyncio
    async def test_calculate_metrics_good_performance(self, learning_system, good_feedback_data):
        """Test metric calculation for good performance."""
        metrics = learning_system._calculate_performance_metrics(good_feedback_data)

        assert 'accuracy' in metrics
        assert 'mae' in metrics
        assert 'rmse' in metrics
        assert 'mape' in metrics
        assert 'bias' in metrics
        assert 'r_squared' in metrics

        # Good model should have high accuracy
        assert metrics['accuracy'] > 80
        assert metrics['mae'] < 10
        assert abs(metrics['bias']) < 5

    @pytest.mark.asyncio
    async def test_calculate_metrics_degraded_performance(self, learning_system, degraded_feedback_data):
        """Test metric calculation for degraded performance."""
        metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)

        # Degraded model should have lower accuracy
        assert metrics['accuracy'] < 80
        assert metrics['mae'] > 5


class TestPerformanceTrend:
    """Test performance trend analysis."""

    @pytest.mark.asyncio
    async def test_stable_trend(self, learning_system, good_feedback_data):
        """Test detection of stable performance trend."""
        trend = learning_system._analyze_performance_trend(good_feedback_data)

        assert trend['trend'] in ['stable', 'improving']

    @pytest.mark.asyncio
    async def test_degrading_trend(self, learning_system, degraded_feedback_data):
        """Test detection of degrading performance trend."""
        trend = learning_system._analyze_performance_trend(degraded_feedback_data)

        # May detect degrading trend depending on data
        assert trend['trend'] in ['degrading', 'stable']
        if trend['significant']:
            assert 'slope' in trend

    @pytest.mark.asyncio
    async def test_insufficient_data_trend(self, learning_system):
        """Test trend analysis with insufficient data."""
        small_data = pd.DataFrame([{
            'insight_id': 'test',
            'outcome_date': datetime.utcnow(),
            'accuracy': 90
        }])

        trend = learning_system._analyze_performance_trend(small_data)
        assert trend['trend'] == 'insufficient_data'


class TestDegradationDetection:
    """Test performance degradation detection."""

    @pytest.mark.asyncio
    async def test_no_degradation_detected(self, learning_system, good_feedback_data):
        """Test no degradation for good performance."""
        current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
        trend = learning_system._analyze_performance_trend(good_feedback_data)

        degradation = learning_system._detect_performance_degradation(
            current_metrics,
            baseline_performance={'accuracy': 85},
            trend_analysis=trend
        )

        assert degradation['detected'] is False
        assert degradation['severity'] == 'none'

    @pytest.mark.asyncio
    async def test_degradation_below_threshold(self, learning_system):
        """Test degradation detection when below absolute threshold."""
        current_metrics = {'accuracy': 70}  # Below 85% threshold
        trend = {'trend': 'stable', 'significant': False}

        degradation = learning_system._detect_performance_degradation(
            current_metrics,
            baseline_performance=None,
            trend_analysis=trend
        )

        assert degradation['detected'] is True
        assert degradation['severity'] == 'high'
        assert len(degradation['reasons']) > 0

    @pytest.mark.asyncio
    async def test_degradation_vs_baseline(self, learning_system):
        """Test degradation detection vs baseline."""
        current_metrics = {'accuracy': 80}
        baseline = {'accuracy': 95}  # 15.8% drop
        trend = {'trend': 'stable', 'significant': False}

        degradation = learning_system._detect_performance_degradation(
            current_metrics,
            baseline_performance=baseline,
            trend_analysis=trend
        )

        assert degradation['detected'] is True
        assert 'dropped' in degradation['reasons'][0].lower()

    @pytest.mark.asyncio
    async def test_degradation_trending_down(self, learning_system, degraded_feedback_data):
        """Test degradation detection from trending down."""
        current_metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)
        trend = learning_system._analyze_performance_trend(degraded_feedback_data)

        degradation = learning_system._detect_performance_degradation(
            current_metrics,
            baseline_performance={'accuracy': 90},
            trend_analysis=trend
        )

        # Should detect some form of degradation
        assert degradation['detected'] is True


class TestRetrainingRecommendation:
    """Test retraining recommendation generation."""

    @pytest.mark.asyncio
    async def test_urgent_retraining_recommendation(self, learning_system):
        """Test urgent retraining recommendation."""
        current_metrics = {'accuracy': 70}
        degradation = {
            'detected': True,
            'severity': 'high',
            'reasons': ['Accuracy below threshold'],
            'current_accuracy': 70,
            'baseline_accuracy': 90
        }
        trend = {'trend': 'degrading', 'significant': True}

        recommendation = learning_system._generate_retraining_recommendation(
            'test_model',
            current_metrics,
            degradation,
            trend
        )

        assert recommendation['recommended'] is True
        assert recommendation['priority'] == 'urgent'
        assert 'immediately' in recommendation['recommendation'].lower()

    @pytest.mark.asyncio
    async def test_no_retraining_needed(self, learning_system, good_feedback_data):
        """Test no retraining recommendation for good performance."""
        current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
        degradation = {'detected': False, 'severity': 'none'}
        trend = learning_system._analyze_performance_trend(good_feedback_data)

        recommendation = learning_system._generate_retraining_recommendation(
            'test_model',
            current_metrics,
            degradation,
            trend
        )

        assert recommendation['recommended'] is False
        assert recommendation['priority'] == 'none'


class TestErrorPatternDetection:
    """Test error pattern identification."""

    @pytest.mark.asyncio
    async def test_systematic_bias_detection(self, learning_system, biased_feedback_data):
        """Test detection of systematic bias."""
        patterns = learning_system._identify_error_patterns(biased_feedback_data)

        # Should detect over-prediction bias
        bias_patterns = [p for p in patterns if p['pattern'] == 'systematic_bias']
        assert len(bias_patterns) > 0

        bias = bias_patterns[0]
        assert 'over-prediction' in bias['description']
        assert bias['severity'] in ['high', 'medium']

    @pytest.mark.asyncio
    async def test_no_patterns_for_good_data(self, learning_system, good_feedback_data):
        """Test no significant patterns for good data."""
        patterns = learning_system._identify_error_patterns(good_feedback_data)

        # May have some minor patterns, but no high severity
        high_severity = [p for p in patterns if p.get('severity') == 'high']
        assert len(high_severity) == 0


class TestConfidenceCalibration:
    """Test confidence calibration analysis."""

    @pytest.mark.asyncio
    async def test_well_calibrated_confidence(self, learning_system, good_feedback_data):
        """Test well-calibrated confidence scores."""
        calibration = learning_system._calculate_confidence_calibration(good_feedback_data)

        # Good data with consistent confidence should be well calibrated
        if 'overall_calibration_error' in calibration:
            # Small calibration error indicates good calibration
            assert calibration['overall_calibration_error'] < 20

    @pytest.mark.asyncio
    async def test_poorly_calibrated_confidence(self, learning_system, poorly_calibrated_feedback_data):
        """Test poorly calibrated confidence scores."""
        calibration = learning_system._calculate_confidence_calibration(poorly_calibrated_feedback_data)

        # Should detect poor calibration
        assert calibration['calibrated'] is False
        if 'by_confidence_range' in calibration:
            assert len(calibration['by_confidence_range']) > 0

    @pytest.mark.asyncio
    async def test_no_confidence_data(self, learning_system):
        """Test calibration when no confidence scores available."""
        no_conf_data = pd.DataFrame([{
            'predicted_value': 100,
            'actual_value': 95,
            'accuracy': 95
        }])

        calibration = learning_system._calculate_confidence_calibration(no_conf_data)
        assert calibration['calibrated'] is False
        assert 'reason' in calibration


class TestCompletePerformanceAnalysis:
    """Test complete performance analysis workflow."""

    @pytest.mark.asyncio
    async def test_analyze_good_performance(self, learning_system, good_feedback_data):
        """Test complete analysis of good performance."""
        result = await learning_system.analyze_model_performance(
            model_name='test_model',
            feedback_data=good_feedback_data,
            baseline_performance={'accuracy': 85}
        )

        assert result['model_name'] == 'test_model'
        assert result['status'] != 'insufficient_feedback'
        assert 'current_performance' in result
        assert 'trend_analysis' in result
        assert 'degradation_detected' in result
        assert 'retraining_recommendation' in result

        # Good performance should not recommend retraining
        assert result['retraining_recommendation']['recommended'] is False

    @pytest.mark.asyncio
    async def test_analyze_degraded_performance(self, learning_system, degraded_feedback_data):
        """Test complete analysis of degraded performance."""
        result = await learning_system.analyze_model_performance(
            model_name='degraded_model',
            feedback_data=degraded_feedback_data,
            baseline_performance={'accuracy': 90}
        )

        assert result['degradation_detected']['detected'] is True
        assert result['retraining_recommendation']['recommended'] is True

    @pytest.mark.asyncio
    async def test_insufficient_feedback(self, learning_system):
        """Test analysis with insufficient feedback samples."""
        small_data = pd.DataFrame([{
            'insight_id': 'test',
            'outcome_date': datetime.utcnow(),
            'predicted_value': 100,
            'actual_value': 95,
            'error': 5,
            'error_pct': 5,
            'accuracy': 95,
            'confidence': 85
        }])

        result = await learning_system.analyze_model_performance(
            model_name='test_model',
            feedback_data=small_data
        )

        assert result['status'] == 'insufficient_feedback'
        assert result['feedback_samples'] == 1
        assert result['required_samples'] == 30


class TestLearningInsights:
    """Test learning insight generation."""

    @pytest.mark.asyncio
    async def test_generate_urgent_retraining_insight(self, learning_system):
        """Test generation of urgent retraining insight."""
        analyses = [{
            'model_name': 'urgent_model',
            'retraining_recommendation': {
                'priority': 'urgent',
                'recommended': True
            },
            'degradation_detected': {
                'detected': True
            }
        }]

        insights = await learning_system.generate_learning_insights(
            analyses,
            tenant_id='tenant_123'
        )

        # Should generate urgent warning
        urgent_insights = [i for i in insights if i['priority'] == 'urgent']
        assert len(urgent_insights) > 0

        insight = urgent_insights[0]
        assert insight['type'] == 'warning'
        assert 'urgent_model' in insight['description'].lower()

    @pytest.mark.asyncio
    async def test_generate_system_health_insight(self, learning_system):
        """Test generation of system health insight."""
        # 3 models, 1 degraded
        analyses = [
            {
                'model_name': 'model_1',
                'degradation_detected': {'detected': False},
                'retraining_recommendation': {'priority': 'none'}
            },
            {
                'model_name': 'model_2',
                'degradation_detected': {'detected': False},
                'retraining_recommendation': {'priority': 'none'}
            },
            {
                'model_name': 'model_3',
                'degradation_detected': {'detected': True},
                'retraining_recommendation': {'priority': 'high'}
            }
        ]

        insights = await learning_system.generate_learning_insights(
            analyses,
            tenant_id='tenant_123'
        )

        # Should generate system health insight (66% healthy < 80%)
        # Note: May or may not trigger depending on threshold
        # At minimum should not crash
        assert isinstance(insights, list)

    @pytest.mark.asyncio
    async def test_generate_calibration_insight(self, learning_system):
        """Test generation of calibration insight."""
        analyses = [{
            'model_name': 'model_1',
            'degradation_detected': {'detected': False},
            'retraining_recommendation': {'priority': 'none'},
            'confidence_calibration': {
                'calibrated': False,
                'overall_calibration_error': 15
            }
        }]

        insights = await learning_system.generate_learning_insights(
            analyses,
            tenant_id='tenant_123'
        )

        # Should generate calibration insight
        calibration_insights = [
            i for i in insights
            if 'calibration' in i['title'].lower()
        ]
        assert len(calibration_insights) > 0


class TestROICalculation:
    """Test ROI calculation."""

    @pytest.mark.asyncio
    async def test_calculate_roi_with_impact_values(self, learning_system):
        """Test ROI calculation with impact values."""
        feedback_data = pd.DataFrame([
            {
                'accuracy': 90,
                'impact_value': 1000
            },
            {
                'accuracy': 85,
                'impact_value': 1500
            },
            {
                'accuracy': 95,
                'impact_value': 800
            }
        ])

        roi = await learning_system.calculate_roi(
            feedback_data,
            insight_type='demand_forecast'
        )

        assert roi['insight_type'] == 'demand_forecast'
        assert roi['samples'] == 3
        assert roi['avg_accuracy'] == 90.0
        assert roi['total_impact_value'] == 3300
        assert roi['roi_validated'] is True

    @pytest.mark.asyncio
    async def test_calculate_roi_without_impact_values(self, learning_system, good_feedback_data):
        """Test ROI calculation without impact values."""
        roi = await learning_system.calculate_roi(
            good_feedback_data,
            insight_type='yield_prediction'
        )

        assert roi['insight_type'] == 'yield_prediction'
        assert roi['samples'] > 0
        assert 'avg_accuracy' in roi
        assert roi['roi_validated'] is False
Improve AI logic 2025-11-05 13:34:56 +01:00			`"""`
			`Tests for Feedback Loop & Learning System`
			`"""`

			`import pytest`
			`import pandas as pd`
			`import numpy as np`
			`from datetime import datetime, timedelta`
			`from services.ai_insights.app.ml.feedback_learning_system import FeedbackLearningSystem`


			`@pytest.fixture`
			`def learning_system():`
			`"""Create FeedbackLearningSystem instance."""`
			`return FeedbackLearningSystem(`
			`performance_threshold=0.85,`
			`degradation_threshold=0.10,`
			`min_feedback_samples=30`
			`)`


			`@pytest.fixture`
			`def good_feedback_data():`
			`"""Generate feedback data for well-performing model."""`
			`np.random.seed(42)`
			`dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')`

			`feedback = []`
			`for i, date in enumerate(dates):`
			`predicted = 100 + np.random.normal(0, 10)`
			`actual = predicted + np.random.normal(0, 5) # Small error`

			`error = predicted - actual`
			`error_pct = abs(error / actual * 100) if actual != 0 else 0`
			`accuracy = max(0, 100 - error_pct)`

			`feedback.append({`
			`'insight_id': f'insight_{i}',`
			`'applied_at': date - timedelta(days=1),`
			`'outcome_date': date,`
			`'predicted_value': predicted,`
			`'actual_value': actual,`
			`'error': error,`
			`'error_pct': error_pct,`
			`'accuracy': accuracy,`
			`'confidence': 85`
			`})`

			`return pd.DataFrame(feedback)`


			`@pytest.fixture`
			`def degraded_feedback_data():`
			`"""Generate feedback data for degrading model."""`
			`np.random.seed(42)`
			`dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')`

			`feedback = []`
			`for i, date in enumerate(dates):`
			`# Introduce increasing error over time`
			`error_multiplier = 1 + (i / 50) * 2 # Errors double by end`

			`predicted = 100 + np.random.normal(0, 10)`
			`actual = predicted + np.random.normal(0, 10 * error_multiplier)`

			`error = predicted - actual`
			`error_pct = abs(error / actual * 100) if actual != 0 else 0`
			`accuracy = max(0, 100 - error_pct)`

			`feedback.append({`
			`'insight_id': f'insight_{i}',`
			`'applied_at': date - timedelta(days=1),`
			`'outcome_date': date,`
			`'predicted_value': predicted,`
			`'actual_value': actual,`
			`'error': error,`
			`'error_pct': error_pct,`
			`'accuracy': accuracy,`
			`'confidence': 85`
			`})`

			`return pd.DataFrame(feedback)`


			`@pytest.fixture`
			`def biased_feedback_data():`
			`"""Generate feedback data with systematic bias."""`
			`np.random.seed(42)`
			`dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')`

			`feedback = []`
			`for i, date in enumerate(dates):`
			`predicted = 100 + np.random.normal(0, 10)`
			`# Systematic over-prediction by 15%`
			`actual = predicted * 0.85 + np.random.normal(0, 3)`

			`error = predicted - actual`
			`error_pct = abs(error / actual * 100) if actual != 0 else 0`
			`accuracy = max(0, 100 - error_pct)`

			`feedback.append({`
			`'insight_id': f'insight_{i}',`
			`'applied_at': date - timedelta(days=1),`
			`'outcome_date': date,`
			`'predicted_value': predicted,`
			`'actual_value': actual,`
			`'error': error,`
			`'error_pct': error_pct,`
			`'accuracy': accuracy,`
			`'confidence': 80`
			`})`

			`return pd.DataFrame(feedback)`


			`@pytest.fixture`
			`def poorly_calibrated_feedback_data():`
			`"""Generate feedback with poor confidence calibration."""`
			`np.random.seed(42)`
			`dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')`

			`feedback = []`
			`for i, date in enumerate(dates):`
			`predicted = 100 + np.random.normal(0, 10)`

			`# High confidence but low accuracy`
			`if i < 25:`
			`confidence = 90`
			`actual = predicted + np.random.normal(0, 20) # Large error`
			`else:`
			`confidence = 60`
			`actual = predicted + np.random.normal(0, 5) # Small error`

			`error = predicted - actual`
			`error_pct = abs(error / actual * 100) if actual != 0 else 0`
			`accuracy = max(0, 100 - error_pct)`

			`feedback.append({`
			`'insight_id': f'insight_{i}',`
			`'applied_at': date - timedelta(days=1),`
			`'outcome_date': date,`
			`'predicted_value': predicted,`
			`'actual_value': actual,`
			`'error': error,`
			`'error_pct': error_pct,`
			`'accuracy': accuracy,`
			`'confidence': confidence`
			`})`

			`return pd.DataFrame(feedback)`


			`class TestPerformanceMetrics:`
			`"""Test performance metric calculation."""`

			`@pytest.mark.asyncio`
			`async def test_calculate_metrics_good_performance(self, learning_system, good_feedback_data):`
			`"""Test metric calculation for good performance."""`
			`metrics = learning_system._calculate_performance_metrics(good_feedback_data)`

			`assert 'accuracy' in metrics`
			`assert 'mae' in metrics`
			`assert 'rmse' in metrics`
			`assert 'mape' in metrics`
			`assert 'bias' in metrics`
			`assert 'r_squared' in metrics`

			`# Good model should have high accuracy`
			`assert metrics['accuracy'] > 80`
			`assert metrics['mae'] < 10`
			`assert abs(metrics['bias']) < 5`

			`@pytest.mark.asyncio`
			`async def test_calculate_metrics_degraded_performance(self, learning_system, degraded_feedback_data):`
			`"""Test metric calculation for degraded performance."""`
			`metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)`

			`# Degraded model should have lower accuracy`
			`assert metrics['accuracy'] < 80`
			`assert metrics['mae'] > 5`


			`class TestPerformanceTrend:`
			`"""Test performance trend analysis."""`

			`@pytest.mark.asyncio`
			`async def test_stable_trend(self, learning_system, good_feedback_data):`
			`"""Test detection of stable performance trend."""`
			`trend = learning_system._analyze_performance_trend(good_feedback_data)`

			`assert trend['trend'] in ['stable', 'improving']`

			`@pytest.mark.asyncio`
			`async def test_degrading_trend(self, learning_system, degraded_feedback_data):`
			`"""Test detection of degrading performance trend."""`
			`trend = learning_system._analyze_performance_trend(degraded_feedback_data)`

			`# May detect degrading trend depending on data`
			`assert trend['trend'] in ['degrading', 'stable']`
			`if trend['significant']:`
			`assert 'slope' in trend`

			`@pytest.mark.asyncio`
			`async def test_insufficient_data_trend(self, learning_system):`
			`"""Test trend analysis with insufficient data."""`
			`small_data = pd.DataFrame([{`
			`'insight_id': 'test',`
			`'outcome_date': datetime.utcnow(),`
			`'accuracy': 90`
			`}])`

			`trend = learning_system._analyze_performance_trend(small_data)`
			`assert trend['trend'] == 'insufficient_data'`


			`class TestDegradationDetection:`
			`"""Test performance degradation detection."""`

			`@pytest.mark.asyncio`
			`async def test_no_degradation_detected(self, learning_system, good_feedback_data):`
			`"""Test no degradation for good performance."""`
			`current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)`
			`trend = learning_system._analyze_performance_trend(good_feedback_data)`

			`degradation = learning_system._detect_performance_degradation(`
			`current_metrics,`
			`baseline_performance={'accuracy': 85},`
			`trend_analysis=trend`
			`)`

			`assert degradation['detected'] is False`
			`assert degradation['severity'] == 'none'`

			`@pytest.mark.asyncio`
			`async def test_degradation_below_threshold(self, learning_system):`
			`"""Test degradation detection when below absolute threshold."""`
			`current_metrics = {'accuracy': 70} # Below 85% threshold`
			`trend = {'trend': 'stable', 'significant': False}`

			`degradation = learning_system._detect_performance_degradation(`
			`current_metrics,`
			`baseline_performance=None,`
			`trend_analysis=trend`
			`)`

			`assert degradation['detected'] is True`
			`assert degradation['severity'] == 'high'`
			`assert len(degradation['reasons']) > 0`

			`@pytest.mark.asyncio`
			`async def test_degradation_vs_baseline(self, learning_system):`
			`"""Test degradation detection vs baseline."""`
			`current_metrics = {'accuracy': 80}`
			`baseline = {'accuracy': 95} # 15.8% drop`
			`trend = {'trend': 'stable', 'significant': False}`

			`degradation = learning_system._detect_performance_degradation(`
			`current_metrics,`
			`baseline_performance=baseline,`
			`trend_analysis=trend`
			`)`

			`assert degradation['detected'] is True`
			`assert 'dropped' in degradation['reasons'][0].lower()`

			`@pytest.mark.asyncio`
			`async def test_degradation_trending_down(self, learning_system, degraded_feedback_data):`
			`"""Test degradation detection from trending down."""`
			`current_metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)`
			`trend = learning_system._analyze_performance_trend(degraded_feedback_data)`

			`degradation = learning_system._detect_performance_degradation(`
			`current_metrics,`
			`baseline_performance={'accuracy': 90},`
			`trend_analysis=trend`
			`)`

			`# Should detect some form of degradation`
			`assert degradation['detected'] is True`


			`class TestRetrainingRecommendation:`
			`"""Test retraining recommendation generation."""`

			`@pytest.mark.asyncio`
			`async def test_urgent_retraining_recommendation(self, learning_system):`
			`"""Test urgent retraining recommendation."""`
			`current_metrics = {'accuracy': 70}`
			`degradation = {`
			`'detected': True,`
			`'severity': 'high',`
			`'reasons': ['Accuracy below threshold'],`
			`'current_accuracy': 70,`
			`'baseline_accuracy': 90`
			`}`
			`trend = {'trend': 'degrading', 'significant': True}`

			`recommendation = learning_system._generate_retraining_recommendation(`
			`'test_model',`
			`current_metrics,`
			`degradation,`
			`trend`
			`)`

			`assert recommendation['recommended'] is True`
			`assert recommendation['priority'] == 'urgent'`
			`assert 'immediately' in recommendation['recommendation'].lower()`

			`@pytest.mark.asyncio`
			`async def test_no_retraining_needed(self, learning_system, good_feedback_data):`
			`"""Test no retraining recommendation for good performance."""`
			`current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)`
			`degradation = {'detected': False, 'severity': 'none'}`
			`trend = learning_system._analyze_performance_trend(good_feedback_data)`

			`recommendation = learning_system._generate_retraining_recommendation(`
			`'test_model',`
			`current_metrics,`
			`degradation,`
			`trend`
			`)`

			`assert recommendation['recommended'] is False`
			`assert recommendation['priority'] == 'none'`


			`class TestErrorPatternDetection:`
			`"""Test error pattern identification."""`

			`@pytest.mark.asyncio`
			`async def test_systematic_bias_detection(self, learning_system, biased_feedback_data):`
			`"""Test detection of systematic bias."""`
			`patterns = learning_system._identify_error_patterns(biased_feedback_data)`

			`# Should detect over-prediction bias`
			`bias_patterns = [p for p in patterns if p['pattern'] == 'systematic_bias']`
			`assert len(bias_patterns) > 0`

			`bias = bias_patterns[0]`
			`assert 'over-prediction' in bias['description']`
			`assert bias['severity'] in ['high', 'medium']`

			`@pytest.mark.asyncio`
			`async def test_no_patterns_for_good_data(self, learning_system, good_feedback_data):`
			`"""Test no significant patterns for good data."""`
			`patterns = learning_system._identify_error_patterns(good_feedback_data)`

			`# May have some minor patterns, but no high severity`
			`high_severity = [p for p in patterns if p.get('severity') == 'high']`
			`assert len(high_severity) == 0`


			`class TestConfidenceCalibration:`
			`"""Test confidence calibration analysis."""`

			`@pytest.mark.asyncio`
			`async def test_well_calibrated_confidence(self, learning_system, good_feedback_data):`
			`"""Test well-calibrated confidence scores."""`
			`calibration = learning_system._calculate_confidence_calibration(good_feedback_data)`

			`# Good data with consistent confidence should be well calibrated`
			`if 'overall_calibration_error' in calibration:`
			`# Small calibration error indicates good calibration`
			`assert calibration['overall_calibration_error'] < 20`

			`@pytest.mark.asyncio`
			`async def test_poorly_calibrated_confidence(self, learning_system, poorly_calibrated_feedback_data):`
			`"""Test poorly calibrated confidence scores."""`
			`calibration = learning_system._calculate_confidence_calibration(poorly_calibrated_feedback_data)`

			`# Should detect poor calibration`
			`assert calibration['calibrated'] is False`
			`if 'by_confidence_range' in calibration:`
			`assert len(calibration['by_confidence_range']) > 0`

			`@pytest.mark.asyncio`
			`async def test_no_confidence_data(self, learning_system):`
			`"""Test calibration when no confidence scores available."""`
			`no_conf_data = pd.DataFrame([{`
			`'predicted_value': 100,`
			`'actual_value': 95,`
			`'accuracy': 95`
			`}])`

			`calibration = learning_system._calculate_confidence_calibration(no_conf_data)`
			`assert calibration['calibrated'] is False`
			`assert 'reason' in calibration`


			`class TestCompletePerformanceAnalysis:`
			`"""Test complete performance analysis workflow."""`

			`@pytest.mark.asyncio`
			`async def test_analyze_good_performance(self, learning_system, good_feedback_data):`
			`"""Test complete analysis of good performance."""`
			`result = await learning_system.analyze_model_performance(`
			`model_name='test_model',`
			`feedback_data=good_feedback_data,`
			`baseline_performance={'accuracy': 85}`
			`)`

			`assert result['model_name'] == 'test_model'`
			`assert result['status'] != 'insufficient_feedback'`
			`assert 'current_performance' in result`
			`assert 'trend_analysis' in result`
			`assert 'degradation_detected' in result`
			`assert 'retraining_recommendation' in result`

			`# Good performance should not recommend retraining`
			`assert result['retraining_recommendation']['recommended'] is False`

			`@pytest.mark.asyncio`
			`async def test_analyze_degraded_performance(self, learning_system, degraded_feedback_data):`
			`"""Test complete analysis of degraded performance."""`
			`result = await learning_system.analyze_model_performance(`
			`model_name='degraded_model',`
			`feedback_data=degraded_feedback_data,`
			`baseline_performance={'accuracy': 90}`
			`)`

			`assert result['degradation_detected']['detected'] is True`
			`assert result['retraining_recommendation']['recommended'] is True`

			`@pytest.mark.asyncio`
			`async def test_insufficient_feedback(self, learning_system):`
			`"""Test analysis with insufficient feedback samples."""`
			`small_data = pd.DataFrame([{`
			`'insight_id': 'test',`
			`'outcome_date': datetime.utcnow(),`
			`'predicted_value': 100,`
			`'actual_value': 95,`
			`'error': 5,`
			`'error_pct': 5,`
			`'accuracy': 95,`
			`'confidence': 85`
			`}])`

			`result = await learning_system.analyze_model_performance(`
			`model_name='test_model',`
			`feedback_data=small_data`
			`)`

			`assert result['status'] == 'insufficient_feedback'`
			`assert result['feedback_samples'] == 1`
			`assert result['required_samples'] == 30`


			`class TestLearningInsights:`
			`"""Test learning insight generation."""`

			`@pytest.mark.asyncio`
			`async def test_generate_urgent_retraining_insight(self, learning_system):`
			`"""Test generation of urgent retraining insight."""`
			`analyses = [{`
			`'model_name': 'urgent_model',`
			`'retraining_recommendation': {`
			`'priority': 'urgent',`
			`'recommended': True`
			`},`
			`'degradation_detected': {`
			`'detected': True`
			`}`
			`}]`

			`insights = await learning_system.generate_learning_insights(`
			`analyses,`
			`tenant_id='tenant_123'`
			`)`

			`# Should generate urgent warning`
			`urgent_insights = [i for i in insights if i['priority'] == 'urgent']`
			`assert len(urgent_insights) > 0`

			`insight = urgent_insights[0]`
			`assert insight['type'] == 'warning'`
			`assert 'urgent_model' in insight['description'].lower()`

			`@pytest.mark.asyncio`
			`async def test_generate_system_health_insight(self, learning_system):`
			`"""Test generation of system health insight."""`
			`# 3 models, 1 degraded`
			`analyses = [`
			`{`
			`'model_name': 'model_1',`
			`'degradation_detected': {'detected': False},`
			`'retraining_recommendation': {'priority': 'none'}`
			`},`
			`{`
			`'model_name': 'model_2',`
			`'degradation_detected': {'detected': False},`
			`'retraining_recommendation': {'priority': 'none'}`
			`},`
			`{`
			`'model_name': 'model_3',`
			`'degradation_detected': {'detected': True},`
			`'retraining_recommendation': {'priority': 'high'}`
			`}`
			`]`

			`insights = await learning_system.generate_learning_insights(`
			`analyses,`
			`tenant_id='tenant_123'`
			`)`

			`# Should generate system health insight (66% healthy < 80%)`
			`# Note: May or may not trigger depending on threshold`
			`# At minimum should not crash`
			`assert isinstance(insights, list)`

			`@pytest.mark.asyncio`
			`async def test_generate_calibration_insight(self, learning_system):`
			`"""Test generation of calibration insight."""`
			`analyses = [{`
			`'model_name': 'model_1',`
			`'degradation_detected': {'detected': False},`
			`'retraining_recommendation': {'priority': 'none'},`
			`'confidence_calibration': {`
			`'calibrated': False,`
			`'overall_calibration_error': 15`
			`}`
			`}]`

			`insights = await learning_system.generate_learning_insights(`
			`analyses,`
			`tenant_id='tenant_123'`
			`)`

			`# Should generate calibration insight`
			`calibration_insights = [`
			`i for i in insights`
			`if 'calibration' in i['title'].lower()`
			`]`
			`assert len(calibration_insights) > 0`


			`class TestROICalculation:`
			`"""Test ROI calculation."""`

			`@pytest.mark.asyncio`
			`async def test_calculate_roi_with_impact_values(self, learning_system):`
			`"""Test ROI calculation with impact values."""`
			`feedback_data = pd.DataFrame([`
			`{`
			`'accuracy': 90,`
			`'impact_value': 1000`
			`},`
			`{`
			`'accuracy': 85,`
			`'impact_value': 1500`
			`},`
			`{`
			`'accuracy': 95,`
			`'impact_value': 800`
			`}`
			`])`

			`roi = await learning_system.calculate_roi(`
			`feedback_data,`
			`insight_type='demand_forecast'`
			`)`

			`assert roi['insight_type'] == 'demand_forecast'`
			`assert roi['samples'] == 3`
			`assert roi['avg_accuracy'] == 90.0`
			`assert roi['total_impact_value'] == 3300`
			`assert roi['roi_validated'] is True`

			`@pytest.mark.asyncio`
			`async def test_calculate_roi_without_impact_values(self, learning_system, good_feedback_data):`
			`"""Test ROI calculation without impact values."""`
			`roi = await learning_system.calculate_roi(`
			`good_feedback_data,`
			`insight_type='yield_prediction'`
			`)`

			`assert roi['insight_type'] == 'yield_prediction'`
			`assert roi['samples'] > 0`
			`assert 'avg_accuracy' in roi`
			`assert roi['roi_validated'] is False`