580 lines
20 KiB
Python
580 lines
20 KiB
Python
|
|
"""
|
||
|
|
Tests for Feedback Loop & Learning System
|
||
|
|
"""
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
import pandas as pd
|
||
|
|
import numpy as np
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
from services.ai_insights.app.ml.feedback_learning_system import FeedbackLearningSystem
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def learning_system():
|
||
|
|
"""Create FeedbackLearningSystem instance."""
|
||
|
|
return FeedbackLearningSystem(
|
||
|
|
performance_threshold=0.85,
|
||
|
|
degradation_threshold=0.10,
|
||
|
|
min_feedback_samples=30
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def good_feedback_data():
|
||
|
|
"""Generate feedback data for well-performing model."""
|
||
|
|
np.random.seed(42)
|
||
|
|
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||
|
|
|
||
|
|
feedback = []
|
||
|
|
for i, date in enumerate(dates):
|
||
|
|
predicted = 100 + np.random.normal(0, 10)
|
||
|
|
actual = predicted + np.random.normal(0, 5) # Small error
|
||
|
|
|
||
|
|
error = predicted - actual
|
||
|
|
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||
|
|
accuracy = max(0, 100 - error_pct)
|
||
|
|
|
||
|
|
feedback.append({
|
||
|
|
'insight_id': f'insight_{i}',
|
||
|
|
'applied_at': date - timedelta(days=1),
|
||
|
|
'outcome_date': date,
|
||
|
|
'predicted_value': predicted,
|
||
|
|
'actual_value': actual,
|
||
|
|
'error': error,
|
||
|
|
'error_pct': error_pct,
|
||
|
|
'accuracy': accuracy,
|
||
|
|
'confidence': 85
|
||
|
|
})
|
||
|
|
|
||
|
|
return pd.DataFrame(feedback)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def degraded_feedback_data():
|
||
|
|
"""Generate feedback data for degrading model."""
|
||
|
|
np.random.seed(42)
|
||
|
|
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||
|
|
|
||
|
|
feedback = []
|
||
|
|
for i, date in enumerate(dates):
|
||
|
|
# Introduce increasing error over time
|
||
|
|
error_multiplier = 1 + (i / 50) * 2 # Errors double by end
|
||
|
|
|
||
|
|
predicted = 100 + np.random.normal(0, 10)
|
||
|
|
actual = predicted + np.random.normal(0, 10 * error_multiplier)
|
||
|
|
|
||
|
|
error = predicted - actual
|
||
|
|
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||
|
|
accuracy = max(0, 100 - error_pct)
|
||
|
|
|
||
|
|
feedback.append({
|
||
|
|
'insight_id': f'insight_{i}',
|
||
|
|
'applied_at': date - timedelta(days=1),
|
||
|
|
'outcome_date': date,
|
||
|
|
'predicted_value': predicted,
|
||
|
|
'actual_value': actual,
|
||
|
|
'error': error,
|
||
|
|
'error_pct': error_pct,
|
||
|
|
'accuracy': accuracy,
|
||
|
|
'confidence': 85
|
||
|
|
})
|
||
|
|
|
||
|
|
return pd.DataFrame(feedback)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def biased_feedback_data():
|
||
|
|
"""Generate feedback data with systematic bias."""
|
||
|
|
np.random.seed(42)
|
||
|
|
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||
|
|
|
||
|
|
feedback = []
|
||
|
|
for i, date in enumerate(dates):
|
||
|
|
predicted = 100 + np.random.normal(0, 10)
|
||
|
|
# Systematic over-prediction by 15%
|
||
|
|
actual = predicted * 0.85 + np.random.normal(0, 3)
|
||
|
|
|
||
|
|
error = predicted - actual
|
||
|
|
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||
|
|
accuracy = max(0, 100 - error_pct)
|
||
|
|
|
||
|
|
feedback.append({
|
||
|
|
'insight_id': f'insight_{i}',
|
||
|
|
'applied_at': date - timedelta(days=1),
|
||
|
|
'outcome_date': date,
|
||
|
|
'predicted_value': predicted,
|
||
|
|
'actual_value': actual,
|
||
|
|
'error': error,
|
||
|
|
'error_pct': error_pct,
|
||
|
|
'accuracy': accuracy,
|
||
|
|
'confidence': 80
|
||
|
|
})
|
||
|
|
|
||
|
|
return pd.DataFrame(feedback)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def poorly_calibrated_feedback_data():
|
||
|
|
"""Generate feedback with poor confidence calibration."""
|
||
|
|
np.random.seed(42)
|
||
|
|
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||
|
|
|
||
|
|
feedback = []
|
||
|
|
for i, date in enumerate(dates):
|
||
|
|
predicted = 100 + np.random.normal(0, 10)
|
||
|
|
|
||
|
|
# High confidence but low accuracy
|
||
|
|
if i < 25:
|
||
|
|
confidence = 90
|
||
|
|
actual = predicted + np.random.normal(0, 20) # Large error
|
||
|
|
else:
|
||
|
|
confidence = 60
|
||
|
|
actual = predicted + np.random.normal(0, 5) # Small error
|
||
|
|
|
||
|
|
error = predicted - actual
|
||
|
|
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||
|
|
accuracy = max(0, 100 - error_pct)
|
||
|
|
|
||
|
|
feedback.append({
|
||
|
|
'insight_id': f'insight_{i}',
|
||
|
|
'applied_at': date - timedelta(days=1),
|
||
|
|
'outcome_date': date,
|
||
|
|
'predicted_value': predicted,
|
||
|
|
'actual_value': actual,
|
||
|
|
'error': error,
|
||
|
|
'error_pct': error_pct,
|
||
|
|
'accuracy': accuracy,
|
||
|
|
'confidence': confidence
|
||
|
|
})
|
||
|
|
|
||
|
|
return pd.DataFrame(feedback)
|
||
|
|
|
||
|
|
|
||
|
|
class TestPerformanceMetrics:
|
||
|
|
"""Test performance metric calculation."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_calculate_metrics_good_performance(self, learning_system, good_feedback_data):
|
||
|
|
"""Test metric calculation for good performance."""
|
||
|
|
metrics = learning_system._calculate_performance_metrics(good_feedback_data)
|
||
|
|
|
||
|
|
assert 'accuracy' in metrics
|
||
|
|
assert 'mae' in metrics
|
||
|
|
assert 'rmse' in metrics
|
||
|
|
assert 'mape' in metrics
|
||
|
|
assert 'bias' in metrics
|
||
|
|
assert 'r_squared' in metrics
|
||
|
|
|
||
|
|
# Good model should have high accuracy
|
||
|
|
assert metrics['accuracy'] > 80
|
||
|
|
assert metrics['mae'] < 10
|
||
|
|
assert abs(metrics['bias']) < 5
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_calculate_metrics_degraded_performance(self, learning_system, degraded_feedback_data):
|
||
|
|
"""Test metric calculation for degraded performance."""
|
||
|
|
metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)
|
||
|
|
|
||
|
|
# Degraded model should have lower accuracy
|
||
|
|
assert metrics['accuracy'] < 80
|
||
|
|
assert metrics['mae'] > 5
|
||
|
|
|
||
|
|
|
||
|
|
class TestPerformanceTrend:
|
||
|
|
"""Test performance trend analysis."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_stable_trend(self, learning_system, good_feedback_data):
|
||
|
|
"""Test detection of stable performance trend."""
|
||
|
|
trend = learning_system._analyze_performance_trend(good_feedback_data)
|
||
|
|
|
||
|
|
assert trend['trend'] in ['stable', 'improving']
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_degrading_trend(self, learning_system, degraded_feedback_data):
|
||
|
|
"""Test detection of degrading performance trend."""
|
||
|
|
trend = learning_system._analyze_performance_trend(degraded_feedback_data)
|
||
|
|
|
||
|
|
# May detect degrading trend depending on data
|
||
|
|
assert trend['trend'] in ['degrading', 'stable']
|
||
|
|
if trend['significant']:
|
||
|
|
assert 'slope' in trend
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_insufficient_data_trend(self, learning_system):
|
||
|
|
"""Test trend analysis with insufficient data."""
|
||
|
|
small_data = pd.DataFrame([{
|
||
|
|
'insight_id': 'test',
|
||
|
|
'outcome_date': datetime.utcnow(),
|
||
|
|
'accuracy': 90
|
||
|
|
}])
|
||
|
|
|
||
|
|
trend = learning_system._analyze_performance_trend(small_data)
|
||
|
|
assert trend['trend'] == 'insufficient_data'
|
||
|
|
|
||
|
|
|
||
|
|
class TestDegradationDetection:
|
||
|
|
"""Test performance degradation detection."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_no_degradation_detected(self, learning_system, good_feedback_data):
|
||
|
|
"""Test no degradation for good performance."""
|
||
|
|
current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
|
||
|
|
trend = learning_system._analyze_performance_trend(good_feedback_data)
|
||
|
|
|
||
|
|
degradation = learning_system._detect_performance_degradation(
|
||
|
|
current_metrics,
|
||
|
|
baseline_performance={'accuracy': 85},
|
||
|
|
trend_analysis=trend
|
||
|
|
)
|
||
|
|
|
||
|
|
assert degradation['detected'] is False
|
||
|
|
assert degradation['severity'] == 'none'
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_degradation_below_threshold(self, learning_system):
|
||
|
|
"""Test degradation detection when below absolute threshold."""
|
||
|
|
current_metrics = {'accuracy': 70} # Below 85% threshold
|
||
|
|
trend = {'trend': 'stable', 'significant': False}
|
||
|
|
|
||
|
|
degradation = learning_system._detect_performance_degradation(
|
||
|
|
current_metrics,
|
||
|
|
baseline_performance=None,
|
||
|
|
trend_analysis=trend
|
||
|
|
)
|
||
|
|
|
||
|
|
assert degradation['detected'] is True
|
||
|
|
assert degradation['severity'] == 'high'
|
||
|
|
assert len(degradation['reasons']) > 0
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_degradation_vs_baseline(self, learning_system):
|
||
|
|
"""Test degradation detection vs baseline."""
|
||
|
|
current_metrics = {'accuracy': 80}
|
||
|
|
baseline = {'accuracy': 95} # 15.8% drop
|
||
|
|
trend = {'trend': 'stable', 'significant': False}
|
||
|
|
|
||
|
|
degradation = learning_system._detect_performance_degradation(
|
||
|
|
current_metrics,
|
||
|
|
baseline_performance=baseline,
|
||
|
|
trend_analysis=trend
|
||
|
|
)
|
||
|
|
|
||
|
|
assert degradation['detected'] is True
|
||
|
|
assert 'dropped' in degradation['reasons'][0].lower()
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_degradation_trending_down(self, learning_system, degraded_feedback_data):
|
||
|
|
"""Test degradation detection from trending down."""
|
||
|
|
current_metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)
|
||
|
|
trend = learning_system._analyze_performance_trend(degraded_feedback_data)
|
||
|
|
|
||
|
|
degradation = learning_system._detect_performance_degradation(
|
||
|
|
current_metrics,
|
||
|
|
baseline_performance={'accuracy': 90},
|
||
|
|
trend_analysis=trend
|
||
|
|
)
|
||
|
|
|
||
|
|
# Should detect some form of degradation
|
||
|
|
assert degradation['detected'] is True
|
||
|
|
|
||
|
|
|
||
|
|
class TestRetrainingRecommendation:
|
||
|
|
"""Test retraining recommendation generation."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_urgent_retraining_recommendation(self, learning_system):
|
||
|
|
"""Test urgent retraining recommendation."""
|
||
|
|
current_metrics = {'accuracy': 70}
|
||
|
|
degradation = {
|
||
|
|
'detected': True,
|
||
|
|
'severity': 'high',
|
||
|
|
'reasons': ['Accuracy below threshold'],
|
||
|
|
'current_accuracy': 70,
|
||
|
|
'baseline_accuracy': 90
|
||
|
|
}
|
||
|
|
trend = {'trend': 'degrading', 'significant': True}
|
||
|
|
|
||
|
|
recommendation = learning_system._generate_retraining_recommendation(
|
||
|
|
'test_model',
|
||
|
|
current_metrics,
|
||
|
|
degradation,
|
||
|
|
trend
|
||
|
|
)
|
||
|
|
|
||
|
|
assert recommendation['recommended'] is True
|
||
|
|
assert recommendation['priority'] == 'urgent'
|
||
|
|
assert 'immediately' in recommendation['recommendation'].lower()
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_no_retraining_needed(self, learning_system, good_feedback_data):
|
||
|
|
"""Test no retraining recommendation for good performance."""
|
||
|
|
current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
|
||
|
|
degradation = {'detected': False, 'severity': 'none'}
|
||
|
|
trend = learning_system._analyze_performance_trend(good_feedback_data)
|
||
|
|
|
||
|
|
recommendation = learning_system._generate_retraining_recommendation(
|
||
|
|
'test_model',
|
||
|
|
current_metrics,
|
||
|
|
degradation,
|
||
|
|
trend
|
||
|
|
)
|
||
|
|
|
||
|
|
assert recommendation['recommended'] is False
|
||
|
|
assert recommendation['priority'] == 'none'
|
||
|
|
|
||
|
|
|
||
|
|
class TestErrorPatternDetection:
|
||
|
|
"""Test error pattern identification."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_systematic_bias_detection(self, learning_system, biased_feedback_data):
|
||
|
|
"""Test detection of systematic bias."""
|
||
|
|
patterns = learning_system._identify_error_patterns(biased_feedback_data)
|
||
|
|
|
||
|
|
# Should detect over-prediction bias
|
||
|
|
bias_patterns = [p for p in patterns if p['pattern'] == 'systematic_bias']
|
||
|
|
assert len(bias_patterns) > 0
|
||
|
|
|
||
|
|
bias = bias_patterns[0]
|
||
|
|
assert 'over-prediction' in bias['description']
|
||
|
|
assert bias['severity'] in ['high', 'medium']
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_no_patterns_for_good_data(self, learning_system, good_feedback_data):
|
||
|
|
"""Test no significant patterns for good data."""
|
||
|
|
patterns = learning_system._identify_error_patterns(good_feedback_data)
|
||
|
|
|
||
|
|
# May have some minor patterns, but no high severity
|
||
|
|
high_severity = [p for p in patterns if p.get('severity') == 'high']
|
||
|
|
assert len(high_severity) == 0
|
||
|
|
|
||
|
|
|
||
|
|
class TestConfidenceCalibration:
|
||
|
|
"""Test confidence calibration analysis."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_well_calibrated_confidence(self, learning_system, good_feedback_data):
|
||
|
|
"""Test well-calibrated confidence scores."""
|
||
|
|
calibration = learning_system._calculate_confidence_calibration(good_feedback_data)
|
||
|
|
|
||
|
|
# Good data with consistent confidence should be well calibrated
|
||
|
|
if 'overall_calibration_error' in calibration:
|
||
|
|
# Small calibration error indicates good calibration
|
||
|
|
assert calibration['overall_calibration_error'] < 20
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_poorly_calibrated_confidence(self, learning_system, poorly_calibrated_feedback_data):
|
||
|
|
"""Test poorly calibrated confidence scores."""
|
||
|
|
calibration = learning_system._calculate_confidence_calibration(poorly_calibrated_feedback_data)
|
||
|
|
|
||
|
|
# Should detect poor calibration
|
||
|
|
assert calibration['calibrated'] is False
|
||
|
|
if 'by_confidence_range' in calibration:
|
||
|
|
assert len(calibration['by_confidence_range']) > 0
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_no_confidence_data(self, learning_system):
|
||
|
|
"""Test calibration when no confidence scores available."""
|
||
|
|
no_conf_data = pd.DataFrame([{
|
||
|
|
'predicted_value': 100,
|
||
|
|
'actual_value': 95,
|
||
|
|
'accuracy': 95
|
||
|
|
}])
|
||
|
|
|
||
|
|
calibration = learning_system._calculate_confidence_calibration(no_conf_data)
|
||
|
|
assert calibration['calibrated'] is False
|
||
|
|
assert 'reason' in calibration
|
||
|
|
|
||
|
|
|
||
|
|
class TestCompletePerformanceAnalysis:
|
||
|
|
"""Test complete performance analysis workflow."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_analyze_good_performance(self, learning_system, good_feedback_data):
|
||
|
|
"""Test complete analysis of good performance."""
|
||
|
|
result = await learning_system.analyze_model_performance(
|
||
|
|
model_name='test_model',
|
||
|
|
feedback_data=good_feedback_data,
|
||
|
|
baseline_performance={'accuracy': 85}
|
||
|
|
)
|
||
|
|
|
||
|
|
assert result['model_name'] == 'test_model'
|
||
|
|
assert result['status'] != 'insufficient_feedback'
|
||
|
|
assert 'current_performance' in result
|
||
|
|
assert 'trend_analysis' in result
|
||
|
|
assert 'degradation_detected' in result
|
||
|
|
assert 'retraining_recommendation' in result
|
||
|
|
|
||
|
|
# Good performance should not recommend retraining
|
||
|
|
assert result['retraining_recommendation']['recommended'] is False
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_analyze_degraded_performance(self, learning_system, degraded_feedback_data):
|
||
|
|
"""Test complete analysis of degraded performance."""
|
||
|
|
result = await learning_system.analyze_model_performance(
|
||
|
|
model_name='degraded_model',
|
||
|
|
feedback_data=degraded_feedback_data,
|
||
|
|
baseline_performance={'accuracy': 90}
|
||
|
|
)
|
||
|
|
|
||
|
|
assert result['degradation_detected']['detected'] is True
|
||
|
|
assert result['retraining_recommendation']['recommended'] is True
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_insufficient_feedback(self, learning_system):
|
||
|
|
"""Test analysis with insufficient feedback samples."""
|
||
|
|
small_data = pd.DataFrame([{
|
||
|
|
'insight_id': 'test',
|
||
|
|
'outcome_date': datetime.utcnow(),
|
||
|
|
'predicted_value': 100,
|
||
|
|
'actual_value': 95,
|
||
|
|
'error': 5,
|
||
|
|
'error_pct': 5,
|
||
|
|
'accuracy': 95,
|
||
|
|
'confidence': 85
|
||
|
|
}])
|
||
|
|
|
||
|
|
result = await learning_system.analyze_model_performance(
|
||
|
|
model_name='test_model',
|
||
|
|
feedback_data=small_data
|
||
|
|
)
|
||
|
|
|
||
|
|
assert result['status'] == 'insufficient_feedback'
|
||
|
|
assert result['feedback_samples'] == 1
|
||
|
|
assert result['required_samples'] == 30
|
||
|
|
|
||
|
|
|
||
|
|
class TestLearningInsights:
|
||
|
|
"""Test learning insight generation."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_generate_urgent_retraining_insight(self, learning_system):
|
||
|
|
"""Test generation of urgent retraining insight."""
|
||
|
|
analyses = [{
|
||
|
|
'model_name': 'urgent_model',
|
||
|
|
'retraining_recommendation': {
|
||
|
|
'priority': 'urgent',
|
||
|
|
'recommended': True
|
||
|
|
},
|
||
|
|
'degradation_detected': {
|
||
|
|
'detected': True
|
||
|
|
}
|
||
|
|
}]
|
||
|
|
|
||
|
|
insights = await learning_system.generate_learning_insights(
|
||
|
|
analyses,
|
||
|
|
tenant_id='tenant_123'
|
||
|
|
)
|
||
|
|
|
||
|
|
# Should generate urgent warning
|
||
|
|
urgent_insights = [i for i in insights if i['priority'] == 'urgent']
|
||
|
|
assert len(urgent_insights) > 0
|
||
|
|
|
||
|
|
insight = urgent_insights[0]
|
||
|
|
assert insight['type'] == 'warning'
|
||
|
|
assert 'urgent_model' in insight['description'].lower()
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_generate_system_health_insight(self, learning_system):
|
||
|
|
"""Test generation of system health insight."""
|
||
|
|
# 3 models, 1 degraded
|
||
|
|
analyses = [
|
||
|
|
{
|
||
|
|
'model_name': 'model_1',
|
||
|
|
'degradation_detected': {'detected': False},
|
||
|
|
'retraining_recommendation': {'priority': 'none'}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
'model_name': 'model_2',
|
||
|
|
'degradation_detected': {'detected': False},
|
||
|
|
'retraining_recommendation': {'priority': 'none'}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
'model_name': 'model_3',
|
||
|
|
'degradation_detected': {'detected': True},
|
||
|
|
'retraining_recommendation': {'priority': 'high'}
|
||
|
|
}
|
||
|
|
]
|
||
|
|
|
||
|
|
insights = await learning_system.generate_learning_insights(
|
||
|
|
analyses,
|
||
|
|
tenant_id='tenant_123'
|
||
|
|
)
|
||
|
|
|
||
|
|
# Should generate system health insight (66% healthy < 80%)
|
||
|
|
# Note: May or may not trigger depending on threshold
|
||
|
|
# At minimum should not crash
|
||
|
|
assert isinstance(insights, list)
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_generate_calibration_insight(self, learning_system):
|
||
|
|
"""Test generation of calibration insight."""
|
||
|
|
analyses = [{
|
||
|
|
'model_name': 'model_1',
|
||
|
|
'degradation_detected': {'detected': False},
|
||
|
|
'retraining_recommendation': {'priority': 'none'},
|
||
|
|
'confidence_calibration': {
|
||
|
|
'calibrated': False,
|
||
|
|
'overall_calibration_error': 15
|
||
|
|
}
|
||
|
|
}]
|
||
|
|
|
||
|
|
insights = await learning_system.generate_learning_insights(
|
||
|
|
analyses,
|
||
|
|
tenant_id='tenant_123'
|
||
|
|
)
|
||
|
|
|
||
|
|
# Should generate calibration insight
|
||
|
|
calibration_insights = [
|
||
|
|
i for i in insights
|
||
|
|
if 'calibration' in i['title'].lower()
|
||
|
|
]
|
||
|
|
assert len(calibration_insights) > 0
|
||
|
|
|
||
|
|
|
||
|
|
class TestROICalculation:
|
||
|
|
"""Test ROI calculation."""
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_calculate_roi_with_impact_values(self, learning_system):
|
||
|
|
"""Test ROI calculation with impact values."""
|
||
|
|
feedback_data = pd.DataFrame([
|
||
|
|
{
|
||
|
|
'accuracy': 90,
|
||
|
|
'impact_value': 1000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
'accuracy': 85,
|
||
|
|
'impact_value': 1500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
'accuracy': 95,
|
||
|
|
'impact_value': 800
|
||
|
|
}
|
||
|
|
])
|
||
|
|
|
||
|
|
roi = await learning_system.calculate_roi(
|
||
|
|
feedback_data,
|
||
|
|
insight_type='demand_forecast'
|
||
|
|
)
|
||
|
|
|
||
|
|
assert roi['insight_type'] == 'demand_forecast'
|
||
|
|
assert roi['samples'] == 3
|
||
|
|
assert roi['avg_accuracy'] == 90.0
|
||
|
|
assert roi['total_impact_value'] == 3300
|
||
|
|
assert roi['roi_validated'] is True
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_calculate_roi_without_impact_values(self, learning_system, good_feedback_data):
|
||
|
|
"""Test ROI calculation without impact values."""
|
||
|
|
roi = await learning_system.calculate_roi(
|
||
|
|
good_feedback_data,
|
||
|
|
insight_type='yield_prediction'
|
||
|
|
)
|
||
|
|
|
||
|
|
assert roi['insight_type'] == 'yield_prediction'
|
||
|
|
assert roi['samples'] > 0
|
||
|
|
assert 'avg_accuracy' in roi
|
||
|
|
assert roi['roi_validated'] is False
|