Improve AI logic

This commit is contained in:
Urtzi Alfaro
2025-11-05 13:34:56 +01:00
parent 5c87fbcf48
commit 394ad3aea4
218 changed files with 30627 additions and 7658 deletions

View File

@@ -0,0 +1,229 @@
"""Confidence scoring calculator for AI Insights."""
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
import math
class ConfidenceCalculator:
"""
Calculate unified confidence scores across different insight types.
Confidence is calculated based on multiple factors:
- Data quality (completeness, consistency)
- Model performance (historical accuracy)
- Sample size (statistical significance)
- Recency (how recent is the data)
- Historical accuracy (past insight performance)
"""
# Weights for different factors
WEIGHTS = {
'data_quality': 0.25,
'model_performance': 0.30,
'sample_size': 0.20,
'recency': 0.15,
'historical_accuracy': 0.10
}
def calculate_confidence(
self,
data_quality_score: Optional[float] = None,
model_performance_score: Optional[float] = None,
sample_size: Optional[int] = None,
data_date: Optional[datetime] = None,
historical_accuracy: Optional[float] = None,
insight_type: Optional[str] = None
) -> int:
"""
Calculate overall confidence score (0-100).
Args:
data_quality_score: 0-1 score for data quality
model_performance_score: 0-1 score from model metrics (e.g., 1-MAPE)
sample_size: Number of data points used
data_date: Date of most recent data
historical_accuracy: 0-1 score from past insight performance
insight_type: Type of insight for specific adjustments
Returns:
int: Confidence score 0-100
"""
scores = {}
# Data Quality Score (0-100)
if data_quality_score is not None:
scores['data_quality'] = min(100, data_quality_score * 100)
else:
scores['data_quality'] = 70 # Default
# Model Performance Score (0-100)
if model_performance_score is not None:
scores['model_performance'] = min(100, model_performance_score * 100)
else:
scores['model_performance'] = 75 # Default
# Sample Size Score (0-100)
if sample_size is not None:
scores['sample_size'] = self._score_sample_size(sample_size)
else:
scores['sample_size'] = 60 # Default
# Recency Score (0-100)
if data_date is not None:
scores['recency'] = self._score_recency(data_date)
else:
scores['recency'] = 80 # Default
# Historical Accuracy Score (0-100)
if historical_accuracy is not None:
scores['historical_accuracy'] = min(100, historical_accuracy * 100)
else:
scores['historical_accuracy'] = 65 # Default
# Calculate weighted average
confidence = sum(
scores[factor] * self.WEIGHTS[factor]
for factor in scores
)
# Apply insight-type specific adjustments
confidence = self._apply_type_adjustments(confidence, insight_type)
return int(round(confidence))
def _score_sample_size(self, sample_size: int) -> float:
"""
Score based on sample size using logarithmic scale.
Args:
sample_size: Number of data points
Returns:
float: Score 0-100
"""
if sample_size <= 10:
return 30.0
elif sample_size <= 30:
return 50.0
elif sample_size <= 100:
return 70.0
elif sample_size <= 365:
return 85.0
else:
# Logarithmic scaling for larger samples
return min(100.0, 85 + (math.log10(sample_size) - math.log10(365)) * 10)
def _score_recency(self, data_date: datetime) -> float:
"""
Score based on data recency.
Args:
data_date: Date of most recent data
Returns:
float: Score 0-100
"""
days_old = (datetime.utcnow() - data_date).days
if days_old == 0:
return 100.0
elif days_old <= 1:
return 95.0
elif days_old <= 3:
return 90.0
elif days_old <= 7:
return 80.0
elif days_old <= 14:
return 70.0
elif days_old <= 30:
return 60.0
elif days_old <= 60:
return 45.0
else:
# Exponential decay for older data
return max(20.0, 60 * math.exp(-days_old / 60))
def _apply_type_adjustments(self, base_confidence: float, insight_type: Optional[str]) -> float:
"""
Apply insight-type specific confidence adjustments.
Args:
base_confidence: Base confidence score
insight_type: Type of insight
Returns:
float: Adjusted confidence
"""
if not insight_type:
return base_confidence
adjustments = {
'prediction': -5, # Predictions inherently less certain
'optimization': +2, # Optimizations based on solid math
'alert': +3, # Alerts based on thresholds
'recommendation': 0, # No adjustment
'insight': +2, # Insights from data analysis
'anomaly': -3 # Anomalies are uncertain
}
adjustment = adjustments.get(insight_type, 0)
return max(0, min(100, base_confidence + adjustment))
def calculate_forecast_confidence(
self,
model_mape: float,
forecast_horizon_days: int,
data_points: int,
last_data_date: datetime
) -> int:
"""
Specialized confidence calculation for forecasting insights.
Args:
model_mape: Model MAPE (Mean Absolute Percentage Error)
forecast_horizon_days: How many days ahead
data_points: Number of historical data points
last_data_date: Date of last training data
Returns:
int: Confidence score 0-100
"""
# Model performance: 1 - (MAPE/100) capped at 1
model_score = max(0, 1 - (model_mape / 100))
# Horizon penalty: Longer horizons = less confidence
horizon_factor = max(0.5, 1 - (forecast_horizon_days / 30))
return self.calculate_confidence(
data_quality_score=0.9, # Assume good quality
model_performance_score=model_score * horizon_factor,
sample_size=data_points,
data_date=last_data_date,
insight_type='prediction'
)
def calculate_optimization_confidence(
self,
calculation_accuracy: float,
data_completeness: float,
sample_size: int
) -> int:
"""
Confidence for optimization recommendations.
Args:
calculation_accuracy: 0-1 score for optimization calculation reliability
data_completeness: 0-1 score for data completeness
sample_size: Number of data points
Returns:
int: Confidence score 0-100
"""
return self.calculate_confidence(
data_quality_score=data_completeness,
model_performance_score=calculation_accuracy,
sample_size=sample_size,
data_date=datetime.utcnow(),
insight_type='optimization'
)