Improve AI logic

This commit is contained in:
Urtzi Alfaro
2025-11-05 13:34:56 +01:00
parent 5c87fbcf48
commit 394ad3aea4
218 changed files with 30627 additions and 7658 deletions

View File

@@ -0,0 +1,3 @@
"""AI Insights Service."""
__version__ = "1.0.0"

View File

@@ -0,0 +1 @@
"""API modules for AI Insights Service."""

View File

@@ -0,0 +1,323 @@
"""API endpoints for AI Insights."""
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy.ext.asyncio import AsyncSession
from typing import Optional
from uuid import UUID
from datetime import datetime
import math
from app.core.database import get_db
from app.repositories.insight_repository import InsightRepository
from app.repositories.feedback_repository import FeedbackRepository
from app.schemas.insight import (
AIInsightCreate,
AIInsightUpdate,
AIInsightResponse,
AIInsightList,
InsightMetrics,
InsightFilters
)
from app.schemas.feedback import InsightFeedbackCreate, InsightFeedbackResponse
router = APIRouter()
@router.post("/tenants/{tenant_id}/insights", response_model=AIInsightResponse, status_code=status.HTTP_201_CREATED)
async def create_insight(
tenant_id: UUID,
insight_data: AIInsightCreate,
db: AsyncSession = Depends(get_db)
):
"""Create a new AI Insight."""
# Ensure tenant_id matches
if insight_data.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Tenant ID mismatch"
)
repo = InsightRepository(db)
insight = await repo.create(insight_data)
await db.commit()
return insight
@router.get("/tenants/{tenant_id}/insights", response_model=AIInsightList)
async def get_insights(
tenant_id: UUID,
category: Optional[str] = Query(None),
priority: Optional[str] = Query(None),
status: Optional[str] = Query(None),
actionable_only: bool = Query(False),
min_confidence: int = Query(0, ge=0, le=100),
source_service: Optional[str] = Query(None),
from_date: Optional[datetime] = Query(None),
to_date: Optional[datetime] = Query(None),
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
db: AsyncSession = Depends(get_db)
):
"""Get insights for a tenant with filters and pagination."""
filters = InsightFilters(
category=category,
priority=priority,
status=status,
actionable_only=actionable_only,
min_confidence=min_confidence,
source_service=source_service,
from_date=from_date,
to_date=to_date
)
repo = InsightRepository(db)
skip = (page - 1) * page_size
insights, total = await repo.get_by_tenant(tenant_id, filters, skip, page_size)
total_pages = math.ceil(total / page_size) if total > 0 else 0
return AIInsightList(
items=insights,
total=total,
page=page,
page_size=page_size,
total_pages=total_pages
)
@router.get("/tenants/{tenant_id}/insights/orchestration-ready")
async def get_orchestration_ready_insights(
tenant_id: UUID,
target_date: datetime = Query(...),
min_confidence: int = Query(70, ge=0, le=100),
db: AsyncSession = Depends(get_db)
):
"""Get actionable insights for orchestration workflow."""
repo = InsightRepository(db)
categorized_insights = await repo.get_orchestration_ready_insights(
tenant_id, target_date, min_confidence
)
return categorized_insights
@router.get("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
async def get_insight(
tenant_id: UUID,
insight_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Get a single insight by ID."""
repo = InsightRepository(db)
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
return insight
@router.patch("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
async def update_insight(
tenant_id: UUID,
insight_id: UUID,
update_data: AIInsightUpdate,
db: AsyncSession = Depends(get_db)
):
"""Update an insight (typically status changes)."""
repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
updated_insight = await repo.update(insight_id, update_data)
await db.commit()
return updated_insight
@router.delete("/tenants/{tenant_id}/insights/{insight_id}", status_code=status.HTTP_204_NO_CONTENT)
async def dismiss_insight(
tenant_id: UUID,
insight_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Dismiss an insight (soft delete)."""
repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
await repo.delete(insight_id)
await db.commit()
@router.get("/tenants/{tenant_id}/insights/metrics/summary", response_model=InsightMetrics)
async def get_insights_metrics(
tenant_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Get aggregate metrics for insights."""
repo = InsightRepository(db)
metrics = await repo.get_metrics(tenant_id)
return InsightMetrics(**metrics)
@router.post("/tenants/{tenant_id}/insights/{insight_id}/apply")
async def apply_insight(
tenant_id: UUID,
insight_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Apply an insight recommendation (trigger action)."""
repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
if not insight.actionable:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="This insight is not actionable"
)
# Update status to in_progress
update_data = AIInsightUpdate(status='in_progress', applied_at=datetime.utcnow())
await repo.update(insight_id, update_data)
await db.commit()
# TODO: Route to appropriate service based on recommendation_actions
# This will be implemented when service clients are added
return {
"message": "Insight application initiated",
"insight_id": str(insight_id),
"actions": insight.recommendation_actions
}
@router.post("/tenants/{tenant_id}/insights/{insight_id}/feedback", response_model=InsightFeedbackResponse)
async def record_feedback(
tenant_id: UUID,
insight_id: UUID,
feedback_data: InsightFeedbackCreate,
db: AsyncSession = Depends(get_db)
):
"""Record feedback for an applied insight."""
insight_repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await insight_repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
# Ensure feedback is for this insight
if feedback_data.insight_id != insight_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Insight ID mismatch"
)
feedback_repo = FeedbackRepository(db)
feedback = await feedback_repo.create(feedback_data)
# Update insight status based on feedback
new_status = 'applied' if feedback.success else 'dismissed'
update_data = AIInsightUpdate(status=new_status)
await insight_repo.update(insight_id, update_data)
await db.commit()
return feedback
@router.post("/tenants/{tenant_id}/insights/refresh")
async def refresh_insights(
tenant_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Trigger insight refresh (expire old, generate new)."""
repo = InsightRepository(db)
# Expire old insights
expired_count = await repo.expire_old_insights()
await db.commit()
return {
"message": "Insights refreshed",
"expired_count": expired_count
}
@router.get("/tenants/{tenant_id}/insights/export")
async def export_insights(
tenant_id: UUID,
format: str = Query("json", regex="^(json|csv)$"),
db: AsyncSession = Depends(get_db)
):
"""Export insights to JSON or CSV."""
repo = InsightRepository(db)
insights, _ = await repo.get_by_tenant(tenant_id, filters=None, skip=0, limit=1000)
if format == "json":
return {"insights": [AIInsightResponse.model_validate(i) for i in insights]}
# CSV export would be implemented here
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="CSV export not yet implemented"
)

View File

@@ -0,0 +1,77 @@
"""Configuration settings for AI Insights Service."""
from shared.config.base import BaseServiceSettings
import os
from typing import Optional
class Settings(BaseServiceSettings):
"""Application settings."""
# Service Info
SERVICE_NAME: str = "ai-insights"
SERVICE_VERSION: str = "1.0.0"
API_V1_PREFIX: str = "/api/v1"
# Database configuration (secure approach - build from components)
@property
def DATABASE_URL(self) -> str:
"""Build database URL from secure components"""
# Try complete URL first (for backward compatibility)
complete_url = os.getenv("AI_INSIGHTS_DATABASE_URL")
if complete_url:
return complete_url
# Also check for generic DATABASE_URL (for migration compatibility)
generic_url = os.getenv("DATABASE_URL")
if generic_url:
return generic_url
# Build from components (secure approach)
user = os.getenv("AI_INSIGHTS_DB_USER", "ai_insights_user")
password = os.getenv("AI_INSIGHTS_DB_PASSWORD", "ai_insights_pass123")
host = os.getenv("AI_INSIGHTS_DB_HOST", "localhost")
port = os.getenv("AI_INSIGHTS_DB_PORT", "5432")
name = os.getenv("AI_INSIGHTS_DB_NAME", "ai_insights_db")
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
DB_POOL_SIZE: int = 20
DB_MAX_OVERFLOW: int = 10
# Redis (inherited from BaseServiceSettings but can override)
REDIS_CACHE_TTL: int = 900 # 15 minutes
REDIS_DB: int = 3 # Dedicated Redis database for AI Insights
# Service URLs
FORECASTING_SERVICE_URL: str = "http://forecasting-service:8000"
PROCUREMENT_SERVICE_URL: str = "http://procurement-service:8000"
PRODUCTION_SERVICE_URL: str = "http://production-service:8000"
SALES_SERVICE_URL: str = "http://sales-service:8000"
INVENTORY_SERVICE_URL: str = "http://inventory-service:8000"
# Circuit Breaker Settings
CIRCUIT_BREAKER_FAILURE_THRESHOLD: int = 5
CIRCUIT_BREAKER_TIMEOUT: int = 60
# Insight Settings
MIN_CONFIDENCE_THRESHOLD: int = 60
DEFAULT_INSIGHT_TTL_DAYS: int = 7
MAX_INSIGHTS_PER_REQUEST: int = 100
# Feedback Settings
FEEDBACK_PROCESSING_ENABLED: bool = True
FEEDBACK_PROCESSING_SCHEDULE: str = "0 6 * * *" # Daily at 6 AM
# Logging
LOG_LEVEL: str = "INFO"
# CORS
ALLOWED_ORIGINS: list[str] = ["http://localhost:3000", "http://localhost:5173"]
class Config:
env_file = ".env"
case_sensitive = True
settings = Settings()

View File

@@ -0,0 +1,58 @@
"""Database configuration and session management."""
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
from sqlalchemy.orm import declarative_base
from sqlalchemy.pool import NullPool
from typing import AsyncGenerator
from app.core.config import settings
# Create async engine
engine = create_async_engine(
settings.DATABASE_URL,
pool_size=settings.DB_POOL_SIZE,
max_overflow=settings.DB_MAX_OVERFLOW,
echo=False,
future=True,
)
# Create async session factory
AsyncSessionLocal = async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
autocommit=False,
autoflush=False,
)
# Create declarative base
Base = declarative_base()
async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""
Dependency for getting async database sessions.
Yields:
AsyncSession: Database session
"""
async with AsyncSessionLocal() as session:
try:
yield session
await session.commit()
except Exception:
await session.rollback()
raise
finally:
await session.close()
async def init_db():
"""Initialize database tables."""
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async def close_db():
"""Close database connections."""
await engine.dispose()

View File

@@ -0,0 +1,320 @@
"""Impact estimation for AI Insights."""
from typing import Dict, Any, Optional, Tuple
from decimal import Decimal
from datetime import datetime, timedelta
class ImpactEstimator:
"""
Estimate potential impact of recommendations.
Calculates expected business value in terms of:
- Cost savings (euros)
- Revenue increase (euros)
- Waste reduction (euros or percentage)
- Efficiency gains (hours or percentage)
- Quality improvements (units or percentage)
"""
def estimate_procurement_savings(
self,
current_price: Decimal,
predicted_price: Decimal,
order_quantity: Decimal,
timeframe_days: int = 30
) -> Tuple[Decimal, str, str]:
"""
Estimate savings from opportunistic buying.
Args:
current_price: Current unit price
predicted_price: Predicted future price
order_quantity: Quantity to order
timeframe_days: Time horizon for prediction
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
savings_per_unit = predicted_price - current_price
if savings_per_unit > 0:
total_savings = savings_per_unit * order_quantity
return (
round(total_savings, 2),
'euros',
'cost_savings'
)
return (Decimal('0.0'), 'euros', 'cost_savings')
def estimate_waste_reduction_savings(
self,
current_waste_rate: float,
optimized_waste_rate: float,
monthly_volume: Decimal,
avg_cost_per_unit: Decimal
) -> Tuple[Decimal, str, str]:
"""
Estimate savings from waste reduction.
Args:
current_waste_rate: Current waste rate (0-1)
optimized_waste_rate: Optimized waste rate (0-1)
monthly_volume: Monthly volume
avg_cost_per_unit: Average cost per unit
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
waste_reduction_rate = current_waste_rate - optimized_waste_rate
units_saved = monthly_volume * Decimal(str(waste_reduction_rate))
savings = units_saved * avg_cost_per_unit
return (
round(savings, 2),
'euros/month',
'waste_reduction'
)
def estimate_forecast_improvement_value(
self,
current_mape: float,
improved_mape: float,
avg_monthly_revenue: Decimal
) -> Tuple[Decimal, str, str]:
"""
Estimate value from forecast accuracy improvement.
Better forecasts reduce:
- Stockouts (lost sales)
- Overproduction (waste)
- Emergency orders (premium costs)
Args:
current_mape: Current forecast MAPE
improved_mape: Improved forecast MAPE
avg_monthly_revenue: Average monthly revenue
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
# Rule of thumb: 1% MAPE improvement = 0.5% revenue impact
mape_improvement = current_mape - improved_mape
revenue_impact_pct = mape_improvement * 0.5 / 100
revenue_increase = avg_monthly_revenue * Decimal(str(revenue_impact_pct))
return (
round(revenue_increase, 2),
'euros/month',
'revenue_increase'
)
def estimate_production_efficiency_gain(
self,
time_saved_minutes: int,
batches_per_month: int,
labor_cost_per_hour: Decimal = Decimal('15.0')
) -> Tuple[Decimal, str, str]:
"""
Estimate value from production efficiency improvements.
Args:
time_saved_minutes: Minutes saved per batch
batches_per_month: Number of batches per month
labor_cost_per_hour: Labor cost per hour
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
hours_saved_per_month = (time_saved_minutes * batches_per_month) / 60
cost_savings = Decimal(str(hours_saved_per_month)) * labor_cost_per_hour
return (
round(cost_savings, 2),
'euros/month',
'efficiency_gain'
)
def estimate_safety_stock_optimization(
self,
current_safety_stock: Decimal,
optimal_safety_stock: Decimal,
holding_cost_per_unit_per_day: Decimal,
stockout_cost_reduction: Decimal = Decimal('0.0')
) -> Tuple[Decimal, str, str]:
"""
Estimate impact of safety stock optimization.
Args:
current_safety_stock: Current safety stock level
optimal_safety_stock: Optimal safety stock level
holding_cost_per_unit_per_day: Daily holding cost
stockout_cost_reduction: Reduction in stockout costs
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
stock_reduction = current_safety_stock - optimal_safety_stock
if stock_reduction > 0:
# Savings from reduced holding costs
daily_savings = stock_reduction * holding_cost_per_unit_per_day
monthly_savings = daily_savings * 30
total_savings = monthly_savings + stockout_cost_reduction
return (
round(total_savings, 2),
'euros/month',
'cost_savings'
)
elif stock_reduction < 0:
# Cost increase but reduces stockouts
daily_cost = abs(stock_reduction) * holding_cost_per_unit_per_day
monthly_cost = daily_cost * 30
net_savings = stockout_cost_reduction - monthly_cost
if net_savings > 0:
return (
round(net_savings, 2),
'euros/month',
'cost_savings'
)
return (Decimal('0.0'), 'euros/month', 'cost_savings')
def estimate_supplier_switch_savings(
self,
current_supplier_price: Decimal,
alternative_supplier_price: Decimal,
monthly_order_quantity: Decimal,
quality_difference_score: float = 0.0 # -1 to 1
) -> Tuple[Decimal, str, str]:
"""
Estimate savings from switching suppliers.
Args:
current_supplier_price: Current supplier unit price
alternative_supplier_price: Alternative supplier unit price
monthly_order_quantity: Monthly order quantity
quality_difference_score: Quality difference (-1=worse, 0=same, 1=better)
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
price_savings = (current_supplier_price - alternative_supplier_price) * monthly_order_quantity
# Adjust for quality difference
# If quality is worse, reduce estimated savings
quality_adjustment = 1 + (quality_difference_score * 0.1) # ±10% max adjustment
adjusted_savings = price_savings * Decimal(str(quality_adjustment))
return (
round(adjusted_savings, 2),
'euros/month',
'cost_savings'
)
def estimate_yield_improvement_value(
self,
current_yield_rate: float,
predicted_yield_rate: float,
production_volume: Decimal,
product_price: Decimal
) -> Tuple[Decimal, str, str]:
"""
Estimate value from production yield improvements.
Args:
current_yield_rate: Current yield rate (0-1)
predicted_yield_rate: Predicted yield rate (0-1)
production_volume: Monthly production volume
product_price: Product selling price
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
yield_improvement = predicted_yield_rate - current_yield_rate
if yield_improvement > 0:
additional_units = production_volume * Decimal(str(yield_improvement))
revenue_increase = additional_units * product_price
return (
round(revenue_increase, 2),
'euros/month',
'revenue_increase'
)
return (Decimal('0.0'), 'euros/month', 'revenue_increase')
def estimate_demand_pattern_value(
self,
pattern_strength: float, # 0-1
potential_revenue_increase: Decimal,
implementation_cost: Decimal = Decimal('0.0')
) -> Tuple[Decimal, str, str]:
"""
Estimate value from acting on demand patterns.
Args:
pattern_strength: Strength of detected pattern (0-1)
potential_revenue_increase: Potential monthly revenue increase
implementation_cost: One-time implementation cost
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
# Discount by pattern strength (confidence)
expected_value = potential_revenue_increase * Decimal(str(pattern_strength))
# Amortize implementation cost over 6 months
monthly_cost = implementation_cost / 6
net_value = expected_value - monthly_cost
return (
round(max(Decimal('0.0'), net_value), 2),
'euros/month',
'revenue_increase'
)
def estimate_composite_impact(
self,
impacts: list[Dict[str, Any]]
) -> Tuple[Decimal, str, str]:
"""
Combine multiple impact estimations.
Args:
impacts: List of impact dicts with 'value', 'unit', 'type'
Returns:
tuple: (total_impact_value, impact_unit, impact_type)
"""
total_savings = Decimal('0.0')
total_revenue = Decimal('0.0')
for impact in impacts:
value = Decimal(str(impact['value']))
impact_type = impact['type']
if impact_type == 'cost_savings':
total_savings += value
elif impact_type == 'revenue_increase':
total_revenue += value
# Combine both types
total_impact = total_savings + total_revenue
if total_impact > 0:
# Determine primary type
primary_type = 'cost_savings' if total_savings > total_revenue else 'revenue_increase'
return (
round(total_impact, 2),
'euros/month',
primary_type
)
return (Decimal('0.0'), 'euros/month', 'cost_savings')

View File

@@ -0,0 +1,93 @@
"""Main FastAPI application for AI Insights Service."""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import structlog
from app.core.config import settings
from app.core.database import init_db, close_db
from app.api import insights
# Configure structured logging
structlog.configure(
processors=[
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.JSONRenderer()
]
)
logger = structlog.get_logger()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Lifespan event handler for startup and shutdown."""
# Startup
logger.info("Starting AI Insights Service", service=settings.SERVICE_NAME, version=settings.SERVICE_VERSION)
await init_db()
logger.info("Database initialized")
yield
# Shutdown
logger.info("Shutting down AI Insights Service")
await close_db()
logger.info("Database connections closed")
# Create FastAPI app
app = FastAPI(
title="AI Insights Service",
description="Intelligent insights and recommendations for bakery operations",
version=settings.SERVICE_VERSION,
lifespan=lifespan
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.ALLOWED_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include routers
app.include_router(
insights.router,
prefix=settings.API_V1_PREFIX,
tags=["insights"]
)
@app.get("/")
async def root():
"""Root endpoint."""
return {
"service": settings.SERVICE_NAME,
"version": settings.SERVICE_VERSION,
"status": "running"
}
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {
"status": "healthy",
"service": settings.SERVICE_NAME,
"version": settings.SERVICE_VERSION
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host="0.0.0.0",
port=8000,
reload=True,
log_level=settings.LOG_LEVEL.lower()
)

View File

@@ -0,0 +1,672 @@
"""
Feedback Loop & Learning System
Enables continuous improvement through outcome tracking and model retraining
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime, timedelta
from uuid import UUID
import structlog
from scipy import stats
from collections import defaultdict
logger = structlog.get_logger()
class FeedbackLearningSystem:
"""
Manages feedback collection, model performance tracking, and retraining triggers.
Key Responsibilities:
1. Aggregate feedback from applied insights
2. Calculate model performance metrics (accuracy, precision, recall)
3. Detect performance degradation
4. Trigger automatic retraining when needed
5. Calibrate confidence scores based on actual accuracy
6. Generate learning insights for model improvement
Workflow:
- Feedback continuously recorded via AIInsightsClient
- Periodic performance analysis (daily/weekly)
- Automatic alerts when performance degrades
- Retraining recommendations with priority
"""
def __init__(
self,
performance_threshold: float = 0.85, # Minimum acceptable accuracy
degradation_threshold: float = 0.10, # 10% drop triggers alert
min_feedback_samples: int = 30, # Minimum samples for analysis
retraining_window_days: int = 90 # Consider last 90 days
):
self.performance_threshold = performance_threshold
self.degradation_threshold = degradation_threshold
self.min_feedback_samples = min_feedback_samples
self.retraining_window_days = retraining_window_days
async def analyze_model_performance(
self,
model_name: str,
feedback_data: pd.DataFrame,
baseline_performance: Optional[Dict[str, float]] = None
) -> Dict[str, Any]:
"""
Analyze model performance based on feedback data.
Args:
model_name: Name of the model (e.g., 'hybrid_forecaster', 'yield_predictor')
feedback_data: DataFrame with columns:
- insight_id
- applied_at
- outcome_date
- predicted_value
- actual_value
- error
- error_pct
- accuracy
baseline_performance: Optional baseline metrics for comparison
Returns:
Performance analysis with metrics, trends, and recommendations
"""
logger.info(
"Analyzing model performance",
model_name=model_name,
feedback_samples=len(feedback_data)
)
if len(feedback_data) < self.min_feedback_samples:
return self._insufficient_feedback_response(
model_name, len(feedback_data), self.min_feedback_samples
)
# Step 1: Calculate current performance metrics
current_metrics = self._calculate_performance_metrics(feedback_data)
# Step 2: Analyze performance trend over time
trend_analysis = self._analyze_performance_trend(feedback_data)
# Step 3: Detect performance degradation
degradation_detected = self._detect_performance_degradation(
current_metrics, baseline_performance, trend_analysis
)
# Step 4: Generate retraining recommendation
retraining_recommendation = self._generate_retraining_recommendation(
model_name, current_metrics, degradation_detected, trend_analysis
)
# Step 5: Identify error patterns
error_patterns = self._identify_error_patterns(feedback_data)
# Step 6: Calculate confidence calibration
confidence_calibration = self._calculate_confidence_calibration(feedback_data)
logger.info(
"Model performance analysis complete",
model_name=model_name,
current_accuracy=current_metrics['accuracy'],
degradation_detected=degradation_detected['detected'],
retraining_recommended=retraining_recommendation['recommended']
)
return {
'model_name': model_name,
'analyzed_at': datetime.utcnow().isoformat(),
'feedback_samples': len(feedback_data),
'date_range': {
'start': feedback_data['outcome_date'].min().isoformat(),
'end': feedback_data['outcome_date'].max().isoformat()
},
'current_performance': current_metrics,
'baseline_performance': baseline_performance,
'trend_analysis': trend_analysis,
'degradation_detected': degradation_detected,
'retraining_recommendation': retraining_recommendation,
'error_patterns': error_patterns,
'confidence_calibration': confidence_calibration
}
def _insufficient_feedback_response(
self, model_name: str, current_samples: int, required_samples: int
) -> Dict[str, Any]:
"""Return response when insufficient feedback data."""
return {
'model_name': model_name,
'analyzed_at': datetime.utcnow().isoformat(),
'status': 'insufficient_feedback',
'feedback_samples': current_samples,
'required_samples': required_samples,
'current_performance': None,
'recommendation': f'Need {required_samples - current_samples} more feedback samples for reliable analysis'
}
def _calculate_performance_metrics(
self, feedback_data: pd.DataFrame
) -> Dict[str, float]:
"""
Calculate comprehensive performance metrics.
Metrics:
- Accuracy: % of predictions within acceptable error
- MAE: Mean Absolute Error
- RMSE: Root Mean Squared Error
- MAPE: Mean Absolute Percentage Error
- Bias: Systematic over/under prediction
- R²: Correlation between predicted and actual
"""
predicted = feedback_data['predicted_value'].values
actual = feedback_data['actual_value'].values
# Filter out invalid values
valid_mask = ~(np.isnan(predicted) | np.isnan(actual))
predicted = predicted[valid_mask]
actual = actual[valid_mask]
if len(predicted) == 0:
return {
'accuracy': 0,
'mae': 0,
'rmse': 0,
'mape': 0,
'bias': 0,
'r_squared': 0
}
# Calculate errors
errors = predicted - actual
abs_errors = np.abs(errors)
pct_errors = np.abs(errors / actual) * 100 if np.all(actual != 0) else np.zeros_like(errors)
# MAE and RMSE
mae = float(np.mean(abs_errors))
rmse = float(np.sqrt(np.mean(errors ** 2)))
# MAPE (excluding cases where actual = 0)
valid_pct_mask = actual != 0
mape = float(np.mean(pct_errors[valid_pct_mask])) if np.any(valid_pct_mask) else 0
# Accuracy (% within 10% error)
within_10pct = np.sum(pct_errors <= 10) / len(pct_errors) * 100
# Bias (mean error - positive = over-prediction)
bias = float(np.mean(errors))
# R² (correlation)
if len(predicted) > 1 and np.std(actual) > 0:
correlation = np.corrcoef(predicted, actual)[0, 1]
r_squared = correlation ** 2
else:
r_squared = 0
return {
'accuracy': round(within_10pct, 2), # % within 10% error
'mae': round(mae, 2),
'rmse': round(rmse, 2),
'mape': round(mape, 2),
'bias': round(bias, 2),
'r_squared': round(r_squared, 3),
'sample_size': len(predicted)
}
def _analyze_performance_trend(
self, feedback_data: pd.DataFrame
) -> Dict[str, Any]:
"""
Analyze performance trend over time.
Returns trend direction (improving/stable/degrading) and slope.
"""
# Sort by date
df = feedback_data.sort_values('outcome_date').copy()
# Calculate rolling accuracy (7-day window)
df['rolling_accuracy'] = df['accuracy'].rolling(window=7, min_periods=3).mean()
# Linear trend
if len(df) >= 10:
# Use day index as x
df['day_index'] = (df['outcome_date'] - df['outcome_date'].min()).dt.days
# Fit linear regression
valid_mask = ~np.isnan(df['rolling_accuracy'])
if valid_mask.sum() >= 10:
x = df.loc[valid_mask, 'day_index'].values
y = df.loc[valid_mask, 'rolling_accuracy'].values
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
# Determine trend
if p_value < 0.05:
if slope > 0.1:
trend = 'improving'
elif slope < -0.1:
trend = 'degrading'
else:
trend = 'stable'
else:
trend = 'stable'
return {
'trend': trend,
'slope': round(float(slope), 4),
'p_value': round(float(p_value), 4),
'significant': p_value < 0.05,
'recent_performance': round(float(df['rolling_accuracy'].iloc[-1]), 2),
'initial_performance': round(float(df['rolling_accuracy'].dropna().iloc[0]), 2)
}
# Not enough data for trend
return {
'trend': 'insufficient_data',
'slope': 0,
'p_value': 1.0,
'significant': False
}
def _detect_performance_degradation(
self,
current_metrics: Dict[str, float],
baseline_performance: Optional[Dict[str, float]],
trend_analysis: Dict[str, Any]
) -> Dict[str, Any]:
"""
Detect if model performance has degraded.
Degradation triggers:
1. Current accuracy below threshold (85%)
2. Significant drop from baseline (>10%)
3. Degrading trend detected
"""
degradation_reasons = []
severity = 'none'
# Check absolute performance
if current_metrics['accuracy'] < self.performance_threshold * 100:
degradation_reasons.append(
f"Accuracy {current_metrics['accuracy']:.1f}% below threshold {self.performance_threshold*100}%"
)
severity = 'high'
# Check vs baseline
if baseline_performance and 'accuracy' in baseline_performance:
baseline_acc = baseline_performance['accuracy']
current_acc = current_metrics['accuracy']
drop_pct = (baseline_acc - current_acc) / baseline_acc
if drop_pct > self.degradation_threshold:
degradation_reasons.append(
f"Accuracy dropped {drop_pct*100:.1f}% from baseline {baseline_acc:.1f}%"
)
severity = 'high' if severity != 'high' else severity
# Check trend
if trend_analysis.get('trend') == 'degrading' and trend_analysis.get('significant'):
degradation_reasons.append(
f"Degrading trend detected (slope: {trend_analysis['slope']:.4f})"
)
severity = 'medium' if severity == 'none' else severity
detected = len(degradation_reasons) > 0
return {
'detected': detected,
'severity': severity,
'reasons': degradation_reasons,
'current_accuracy': current_metrics['accuracy'],
'baseline_accuracy': baseline_performance.get('accuracy') if baseline_performance else None
}
def _generate_retraining_recommendation(
self,
model_name: str,
current_metrics: Dict[str, float],
degradation_detected: Dict[str, Any],
trend_analysis: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate retraining recommendation based on performance analysis.
Priority Levels:
- urgent: Severe degradation, retrain immediately
- high: Performance below threshold, retrain soon
- medium: Trending down, schedule retraining
- low: Stable, routine retraining
- none: No retraining needed
"""
if degradation_detected['detected']:
severity = degradation_detected['severity']
if severity == 'high':
priority = 'urgent'
recommendation = f"Retrain {model_name} immediately - severe performance degradation"
elif severity == 'medium':
priority = 'high'
recommendation = f"Schedule {model_name} retraining within 7 days"
else:
priority = 'medium'
recommendation = f"Schedule routine {model_name} retraining"
return {
'recommended': True,
'priority': priority,
'recommendation': recommendation,
'reasons': degradation_detected['reasons'],
'estimated_improvement': self._estimate_retraining_benefit(
current_metrics, degradation_detected
)
}
# Check if routine retraining is due (e.g., every 90 days)
# This would require tracking last_retrained_at
else:
return {
'recommended': False,
'priority': 'none',
'recommendation': f"{model_name} performance is acceptable, no immediate retraining needed",
'next_review_date': (datetime.utcnow() + timedelta(days=30)).isoformat()
}
def _estimate_retraining_benefit(
self,
current_metrics: Dict[str, float],
degradation_detected: Dict[str, Any]
) -> Dict[str, Any]:
"""Estimate expected improvement from retraining."""
baseline_acc = degradation_detected.get('baseline_accuracy')
current_acc = current_metrics['accuracy']
if baseline_acc:
# Expect to recover 70-80% of lost performance
expected_improvement = (baseline_acc - current_acc) * 0.75
expected_new_acc = current_acc + expected_improvement
return {
'expected_accuracy_improvement': round(expected_improvement, 2),
'expected_new_accuracy': round(expected_new_acc, 2),
'confidence': 'medium'
}
return {
'expected_accuracy_improvement': 'unknown',
'confidence': 'low'
}
def _identify_error_patterns(
self, feedback_data: pd.DataFrame
) -> List[Dict[str, Any]]:
"""
Identify systematic error patterns.
Patterns:
- Consistent over/under prediction
- Higher errors for specific ranges
- Day-of-week effects
- Seasonal effects
"""
patterns = []
# Pattern 1: Systematic bias
mean_error = feedback_data['error'].mean()
if abs(mean_error) > feedback_data['error'].std() * 0.5:
direction = 'over-prediction' if mean_error > 0 else 'under-prediction'
patterns.append({
'pattern': 'systematic_bias',
'description': f'Consistent {direction} by {abs(mean_error):.1f} units',
'severity': 'high' if abs(mean_error) > 10 else 'medium',
'recommendation': 'Recalibrate model bias term'
})
# Pattern 2: High error for large values
if 'predicted_value' in feedback_data.columns:
# Split into quartiles
feedback_data['value_quartile'] = pd.qcut(
feedback_data['predicted_value'],
q=4,
labels=['Q1', 'Q2', 'Q3', 'Q4'],
duplicates='drop'
)
quartile_errors = feedback_data.groupby('value_quartile')['error_pct'].mean()
if len(quartile_errors) == 4 and quartile_errors['Q4'] > quartile_errors['Q1'] * 1.5:
patterns.append({
'pattern': 'high_value_error',
'description': f'Higher errors for large predictions (Q4: {quartile_errors["Q4"]:.1f}% vs Q1: {quartile_errors["Q1"]:.1f}%)',
'severity': 'medium',
'recommendation': 'Add log transformation or separate model for high values'
})
# Pattern 3: Day-of-week effect
if 'outcome_date' in feedback_data.columns:
feedback_data['day_of_week'] = pd.to_datetime(feedback_data['outcome_date']).dt.dayofweek
dow_errors = feedback_data.groupby('day_of_week')['error_pct'].mean()
if len(dow_errors) >= 5 and dow_errors.max() > dow_errors.min() * 1.5:
worst_day = dow_errors.idxmax()
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
patterns.append({
'pattern': 'day_of_week_effect',
'description': f'Higher errors on {day_names[worst_day]} ({dow_errors[worst_day]:.1f}%)',
'severity': 'low',
'recommendation': 'Add day-of-week features to model'
})
return patterns
def _calculate_confidence_calibration(
self, feedback_data: pd.DataFrame
) -> Dict[str, Any]:
"""
Calculate how well confidence scores match actual accuracy.
Well-calibrated model: 80% confidence → 80% accuracy
"""
if 'confidence' not in feedback_data.columns:
return {'calibrated': False, 'reason': 'No confidence scores available'}
# Bin by confidence ranges
feedback_data['confidence_bin'] = pd.cut(
feedback_data['confidence'],
bins=[0, 60, 70, 80, 90, 100],
labels=['<60', '60-70', '70-80', '80-90', '90+']
)
calibration_results = []
for conf_bin in feedback_data['confidence_bin'].unique():
if pd.isna(conf_bin):
continue
bin_data = feedback_data[feedback_data['confidence_bin'] == conf_bin]
if len(bin_data) >= 5:
avg_confidence = bin_data['confidence'].mean()
avg_accuracy = bin_data['accuracy'].mean()
calibration_error = abs(avg_confidence - avg_accuracy)
calibration_results.append({
'confidence_range': str(conf_bin),
'avg_confidence': round(avg_confidence, 1),
'avg_accuracy': round(avg_accuracy, 1),
'calibration_error': round(calibration_error, 1),
'sample_size': len(bin_data),
'well_calibrated': calibration_error < 10
})
# Overall calibration
if calibration_results:
overall_calibration_error = np.mean([r['calibration_error'] for r in calibration_results])
well_calibrated = overall_calibration_error < 10
return {
'calibrated': well_calibrated,
'overall_calibration_error': round(overall_calibration_error, 2),
'by_confidence_range': calibration_results,
'recommendation': 'Confidence scores are well-calibrated' if well_calibrated
else 'Recalibrate confidence scoring algorithm'
}
return {'calibrated': False, 'reason': 'Insufficient data for calibration analysis'}
async def generate_learning_insights(
self,
performance_analyses: List[Dict[str, Any]],
tenant_id: str
) -> List[Dict[str, Any]]:
"""
Generate high-level insights about learning system performance.
Args:
performance_analyses: List of model performance analyses
tenant_id: Tenant identifier
Returns:
Learning insights for system improvement
"""
insights = []
# Insight 1: Models needing urgent retraining
urgent_models = [
a for a in performance_analyses
if a.get('retraining_recommendation', {}).get('priority') == 'urgent'
]
if urgent_models:
model_names = ', '.join([a['model_name'] for a in urgent_models])
insights.append({
'type': 'warning',
'priority': 'urgent',
'category': 'system',
'title': f'Urgent Model Retraining Required: {len(urgent_models)} Models',
'description': f'Models requiring immediate retraining: {model_names}. Performance has degraded significantly.',
'impact_type': 'system_health',
'confidence': 95,
'metrics_json': {
'tenant_id': tenant_id,
'urgent_models': [a['model_name'] for a in urgent_models],
'affected_count': len(urgent_models)
},
'actionable': True,
'recommendation_actions': [{
'label': 'Retrain Models',
'action': 'trigger_model_retraining',
'params': {'models': [a['model_name'] for a in urgent_models]}
}],
'source_service': 'ai_insights',
'source_model': 'feedback_learning_system'
})
# Insight 2: Overall system health
total_models = len(performance_analyses)
healthy_models = [
a for a in performance_analyses
if not a.get('degradation_detected', {}).get('detected', False)
]
health_pct = (len(healthy_models) / total_models * 100) if total_models > 0 else 0
if health_pct < 80:
insights.append({
'type': 'warning',
'priority': 'high',
'category': 'system',
'title': f'Learning System Health: {health_pct:.0f}%',
'description': f'{len(healthy_models)} of {total_models} models are performing well. System-wide performance review recommended.',
'impact_type': 'system_health',
'confidence': 90,
'metrics_json': {
'tenant_id': tenant_id,
'total_models': total_models,
'healthy_models': len(healthy_models),
'health_percentage': round(health_pct, 1)
},
'actionable': True,
'recommendation_actions': [{
'label': 'Review System Health',
'action': 'review_learning_system',
'params': {'tenant_id': tenant_id}
}],
'source_service': 'ai_insights',
'source_model': 'feedback_learning_system'
})
# Insight 3: Confidence calibration issues
poorly_calibrated = [
a for a in performance_analyses
if not a.get('confidence_calibration', {}).get('calibrated', True)
]
if poorly_calibrated:
insights.append({
'type': 'opportunity',
'priority': 'medium',
'category': 'system',
'title': f'Confidence Calibration Needed: {len(poorly_calibrated)} Models',
'description': 'Confidence scores do not match actual accuracy. Recalibration recommended.',
'impact_type': 'system_improvement',
'confidence': 85,
'metrics_json': {
'tenant_id': tenant_id,
'models_needing_calibration': [a['model_name'] for a in poorly_calibrated]
},
'actionable': True,
'recommendation_actions': [{
'label': 'Recalibrate Confidence Scores',
'action': 'recalibrate_confidence',
'params': {'models': [a['model_name'] for a in poorly_calibrated]}
}],
'source_service': 'ai_insights',
'source_model': 'feedback_learning_system'
})
return insights
async def calculate_roi(
self,
feedback_data: pd.DataFrame,
insight_type: str
) -> Dict[str, Any]:
"""
Calculate ROI for applied insights.
Args:
feedback_data: Feedback data with business impact metrics
insight_type: Type of insight (e.g., 'demand_forecast', 'safety_stock')
Returns:
ROI calculation with cost savings and accuracy metrics
"""
if len(feedback_data) == 0:
return {'status': 'insufficient_data', 'samples': 0}
# Calculate accuracy
avg_accuracy = feedback_data['accuracy'].mean()
# Estimate cost savings (would be more sophisticated in production)
# For now, use impact_value from insights if available
if 'impact_value' in feedback_data.columns:
total_impact = feedback_data['impact_value'].sum()
avg_impact = feedback_data['impact_value'].mean()
return {
'insight_type': insight_type,
'samples': len(feedback_data),
'avg_accuracy': round(avg_accuracy, 2),
'total_impact_value': round(total_impact, 2),
'avg_impact_per_insight': round(avg_impact, 2),
'roi_validated': True
}
return {
'insight_type': insight_type,
'samples': len(feedback_data),
'avg_accuracy': round(avg_accuracy, 2),
'roi_validated': False,
'note': 'Impact values not tracked in feedback'
}

View File

@@ -0,0 +1,11 @@
"""Database models for AI Insights Service."""
from app.models.ai_insight import AIInsight
from app.models.insight_feedback import InsightFeedback
from app.models.insight_correlation import InsightCorrelation
__all__ = [
"AIInsight",
"InsightFeedback",
"InsightCorrelation",
]

View File

@@ -0,0 +1,129 @@
"""AI Insight database model."""
from sqlalchemy import Column, String, Integer, Boolean, DECIMAL, TIMESTAMP, Text, Index, CheckConstraint
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.sql import func
import uuid
from app.core.database import Base
class AIInsight(Base):
"""AI Insight model for storing intelligent recommendations and predictions."""
__tablename__ = "ai_insights"
# Primary Key
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Tenant Information
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
# Classification
type = Column(
String(50),
nullable=False,
index=True,
comment="optimization, alert, prediction, recommendation, insight, anomaly"
)
priority = Column(
String(20),
nullable=False,
index=True,
comment="low, medium, high, critical"
)
category = Column(
String(50),
nullable=False,
index=True,
comment="forecasting, inventory, production, procurement, customer, cost, quality, efficiency, demand, maintenance, energy, scheduling"
)
# Content
title = Column(String(255), nullable=False)
description = Column(Text, nullable=False)
# Impact Information
impact_type = Column(
String(50),
comment="cost_savings, revenue_increase, waste_reduction, efficiency_gain, quality_improvement, risk_mitigation"
)
impact_value = Column(DECIMAL(10, 2), comment="Numeric impact value")
impact_unit = Column(
String(20),
comment="euros, percentage, hours, units, euros/month, euros/year"
)
# Confidence and Metrics
confidence = Column(
Integer,
CheckConstraint('confidence >= 0 AND confidence <= 100'),
nullable=False,
index=True,
comment="Confidence score 0-100"
)
metrics_json = Column(
JSONB,
comment="Dynamic metrics specific to insight type"
)
# Actionability
actionable = Column(
Boolean,
default=True,
nullable=False,
index=True,
comment="Whether this insight can be acted upon"
)
recommendation_actions = Column(
JSONB,
comment="List of possible actions: [{label, action, endpoint}]"
)
# Status
status = Column(
String(20),
default='new',
nullable=False,
index=True,
comment="new, acknowledged, in_progress, applied, dismissed, expired"
)
# Source Information
source_service = Column(
String(50),
comment="Service that generated this insight"
)
source_data_id = Column(
String(100),
comment="Reference to source data (e.g., forecast_id, model_id)"
)
# Timestamps
created_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
nullable=False,
index=True
)
updated_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False
)
applied_at = Column(TIMESTAMP(timezone=True), comment="When insight was applied")
expired_at = Column(
TIMESTAMP(timezone=True),
comment="When insight expires (auto-calculated based on TTL)"
)
# Composite Indexes
__table_args__ = (
Index('idx_tenant_status_category', 'tenant_id', 'status', 'category'),
Index('idx_tenant_created_confidence', 'tenant_id', 'created_at', 'confidence'),
Index('idx_actionable_status', 'actionable', 'status'),
)
def __repr__(self):
return f"<AIInsight(id={self.id}, type={self.type}, title={self.title[:30]}, confidence={self.confidence})>"

View File

@@ -0,0 +1,69 @@
"""Insight Correlation database model for cross-service intelligence."""
from sqlalchemy import Column, String, Integer, DECIMAL, TIMESTAMP, ForeignKey, Index
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.sql import func
from sqlalchemy.orm import relationship
import uuid
from app.core.database import Base
class InsightCorrelation(Base):
"""Track correlations between insights from different services."""
__tablename__ = "insight_correlations"
# Primary Key
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Foreign Keys to AIInsights
parent_insight_id = Column(
UUID(as_uuid=True),
ForeignKey('ai_insights.id', ondelete='CASCADE'),
nullable=False,
index=True,
comment="Primary insight that leads to correlation"
)
child_insight_id = Column(
UUID(as_uuid=True),
ForeignKey('ai_insights.id', ondelete='CASCADE'),
nullable=False,
index=True,
comment="Related insight"
)
# Correlation Information
correlation_type = Column(
String(50),
nullable=False,
comment="forecast_inventory, production_procurement, weather_customer, demand_supplier, etc."
)
correlation_strength = Column(
DECIMAL(3, 2),
nullable=False,
comment="0.00 to 1.00 indicating strength of correlation"
)
# Combined Metrics
combined_confidence = Column(
Integer,
comment="Weighted combined confidence of both insights"
)
# Timestamp
created_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
nullable=False,
index=True
)
# Composite Indexes
__table_args__ = (
Index('idx_parent_child', 'parent_insight_id', 'child_insight_id'),
Index('idx_correlation_type', 'correlation_type'),
)
def __repr__(self):
return f"<InsightCorrelation(id={self.id}, type={self.correlation_type}, strength={self.correlation_strength})>"

View File

@@ -0,0 +1,87 @@
"""Insight Feedback database model for closed-loop learning."""
from sqlalchemy import Column, String, Boolean, DECIMAL, TIMESTAMP, Text, ForeignKey, Index
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.sql import func
from sqlalchemy.orm import relationship
import uuid
from app.core.database import Base
class InsightFeedback(Base):
"""Feedback tracking for AI Insights to enable learning."""
__tablename__ = "insight_feedback"
# Primary Key
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Foreign Key to AIInsight
insight_id = Column(
UUID(as_uuid=True),
ForeignKey('ai_insights.id', ondelete='CASCADE'),
nullable=False,
index=True
)
# Action Information
action_taken = Column(
String(100),
comment="Specific action that was taken from recommendation_actions"
)
# Result Data
result_data = Column(
JSONB,
comment="Detailed result data from applying the insight"
)
# Success Tracking
success = Column(
Boolean,
nullable=False,
index=True,
comment="Whether the insight application was successful"
)
error_message = Column(
Text,
comment="Error message if success = false"
)
# Impact Comparison
expected_impact_value = Column(
DECIMAL(10, 2),
comment="Expected impact value from original insight"
)
actual_impact_value = Column(
DECIMAL(10, 2),
comment="Measured actual impact after application"
)
variance_percentage = Column(
DECIMAL(5, 2),
comment="(actual - expected) / expected * 100"
)
# User Information
applied_by = Column(
String(100),
comment="User or system that applied the insight"
)
# Timestamp
created_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
nullable=False,
index=True
)
# Composite Indexes
__table_args__ = (
Index('idx_insight_success', 'insight_id', 'success'),
Index('idx_created_success', 'created_at', 'success'),
)
def __repr__(self):
return f"<InsightFeedback(id={self.id}, insight_id={self.insight_id}, success={self.success})>"

View File

@@ -0,0 +1,9 @@
"""Repositories for AI Insights Service."""
from app.repositories.insight_repository import InsightRepository
from app.repositories.feedback_repository import FeedbackRepository
__all__ = [
"InsightRepository",
"FeedbackRepository",
]

View File

@@ -0,0 +1,81 @@
"""Repository for Insight Feedback database operations."""
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, desc
from typing import Optional, List
from uuid import UUID
from decimal import Decimal
from app.models.insight_feedback import InsightFeedback
from app.schemas.feedback import InsightFeedbackCreate
class FeedbackRepository:
"""Repository for Insight Feedback operations."""
def __init__(self, session: AsyncSession):
self.session = session
async def create(self, feedback_data: InsightFeedbackCreate) -> InsightFeedback:
"""Create feedback for an insight."""
# Calculate variance if both values provided
variance = None
if (feedback_data.expected_impact_value is not None and
feedback_data.actual_impact_value is not None and
feedback_data.expected_impact_value != 0):
variance = (
(feedback_data.actual_impact_value - feedback_data.expected_impact_value) /
feedback_data.expected_impact_value * 100
)
feedback = InsightFeedback(
**feedback_data.model_dump(exclude={'variance_percentage'}),
variance_percentage=variance
)
self.session.add(feedback)
await self.session.flush()
await self.session.refresh(feedback)
return feedback
async def get_by_id(self, feedback_id: UUID) -> Optional[InsightFeedback]:
"""Get feedback by ID."""
query = select(InsightFeedback).where(InsightFeedback.id == feedback_id)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def get_by_insight(self, insight_id: UUID) -> List[InsightFeedback]:
"""Get all feedback for an insight."""
query = select(InsightFeedback).where(
InsightFeedback.insight_id == insight_id
).order_by(desc(InsightFeedback.created_at))
result = await self.session.execute(query)
return list(result.scalars().all())
async def get_success_rate(self, insight_type: Optional[str] = None) -> float:
"""Calculate success rate for insights."""
query = select(InsightFeedback)
result = await self.session.execute(query)
feedbacks = result.scalars().all()
if not feedbacks:
return 0.0
successful = sum(1 for f in feedbacks if f.success)
return (successful / len(feedbacks)) * 100
async def get_average_impact_variance(self) -> Decimal:
"""Calculate average variance between expected and actual impact."""
query = select(InsightFeedback).where(
InsightFeedback.variance_percentage.isnot(None)
)
result = await self.session.execute(query)
feedbacks = result.scalars().all()
if not feedbacks:
return Decimal('0.0')
avg_variance = sum(f.variance_percentage for f in feedbacks) / len(feedbacks)
return Decimal(str(round(float(avg_variance), 2)))

View File

@@ -0,0 +1,254 @@
"""Repository for AI Insight database operations."""
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func, and_, or_, desc
from sqlalchemy.orm import selectinload
from typing import Optional, List, Dict, Any
from uuid import UUID
from datetime import datetime, timedelta
from app.models.ai_insight import AIInsight
from app.schemas.insight import AIInsightCreate, AIInsightUpdate, InsightFilters
class InsightRepository:
"""Repository for AI Insight operations."""
def __init__(self, session: AsyncSession):
self.session = session
async def create(self, insight_data: AIInsightCreate) -> AIInsight:
"""Create a new AI Insight."""
# Calculate expiration date (default 7 days from now)
from app.core.config import settings
expired_at = datetime.utcnow() + timedelta(days=settings.DEFAULT_INSIGHT_TTL_DAYS)
insight = AIInsight(
**insight_data.model_dump(),
expired_at=expired_at
)
self.session.add(insight)
await self.session.flush()
await self.session.refresh(insight)
return insight
async def get_by_id(self, insight_id: UUID) -> Optional[AIInsight]:
"""Get insight by ID."""
query = select(AIInsight).where(AIInsight.id == insight_id)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def get_by_tenant(
self,
tenant_id: UUID,
filters: Optional[InsightFilters] = None,
skip: int = 0,
limit: int = 100
) -> tuple[List[AIInsight], int]:
"""Get insights for a tenant with filters and pagination."""
# Build base query
query = select(AIInsight).where(AIInsight.tenant_id == tenant_id)
# Apply filters
if filters:
if filters.category and filters.category != 'all':
query = query.where(AIInsight.category == filters.category)
if filters.priority and filters.priority != 'all':
query = query.where(AIInsight.priority == filters.priority)
if filters.status and filters.status != 'all':
query = query.where(AIInsight.status == filters.status)
if filters.actionable_only:
query = query.where(AIInsight.actionable == True)
if filters.min_confidence > 0:
query = query.where(AIInsight.confidence >= filters.min_confidence)
if filters.source_service:
query = query.where(AIInsight.source_service == filters.source_service)
if filters.from_date:
query = query.where(AIInsight.created_at >= filters.from_date)
if filters.to_date:
query = query.where(AIInsight.created_at <= filters.to_date)
# Get total count
count_query = select(func.count()).select_from(query.subquery())
total_result = await self.session.execute(count_query)
total = total_result.scalar() or 0
# Apply ordering, pagination
query = query.order_by(desc(AIInsight.confidence), desc(AIInsight.created_at))
query = query.offset(skip).limit(limit)
# Execute query
result = await self.session.execute(query)
insights = result.scalars().all()
return list(insights), total
async def get_orchestration_ready_insights(
self,
tenant_id: UUID,
target_date: datetime,
min_confidence: int = 70
) -> Dict[str, List[AIInsight]]:
"""Get actionable insights for orchestration."""
query = select(AIInsight).where(
and_(
AIInsight.tenant_id == tenant_id,
AIInsight.actionable == True,
AIInsight.confidence >= min_confidence,
AIInsight.status.in_(['new', 'acknowledged']),
or_(
AIInsight.expired_at.is_(None),
AIInsight.expired_at > datetime.utcnow()
)
)
).order_by(desc(AIInsight.confidence))
result = await self.session.execute(query)
insights = result.scalars().all()
# Categorize insights
categorized = {
'forecast_adjustments': [],
'procurement_recommendations': [],
'production_optimizations': [],
'supplier_alerts': [],
'price_opportunities': []
}
for insight in insights:
if insight.category == 'forecasting':
categorized['forecast_adjustments'].append(insight)
elif insight.category == 'procurement':
if 'supplier' in insight.title.lower():
categorized['supplier_alerts'].append(insight)
elif 'price' in insight.title.lower():
categorized['price_opportunities'].append(insight)
else:
categorized['procurement_recommendations'].append(insight)
elif insight.category == 'production':
categorized['production_optimizations'].append(insight)
return categorized
async def update(self, insight_id: UUID, update_data: AIInsightUpdate) -> Optional[AIInsight]:
"""Update an insight."""
insight = await self.get_by_id(insight_id)
if not insight:
return None
for field, value in update_data.model_dump(exclude_unset=True).items():
setattr(insight, field, value)
await self.session.flush()
await self.session.refresh(insight)
return insight
async def delete(self, insight_id: UUID) -> bool:
"""Delete (dismiss) an insight."""
insight = await self.get_by_id(insight_id)
if not insight:
return False
insight.status = 'dismissed'
await self.session.flush()
return True
async def get_metrics(self, tenant_id: UUID) -> Dict[str, Any]:
"""Get aggregate metrics for insights."""
query = select(AIInsight).where(
and_(
AIInsight.tenant_id == tenant_id,
AIInsight.status != 'dismissed',
or_(
AIInsight.expired_at.is_(None),
AIInsight.expired_at > datetime.utcnow()
)
)
)
result = await self.session.execute(query)
insights = result.scalars().all()
if not insights:
return {
'total_insights': 0,
'actionable_insights': 0,
'average_confidence': 0,
'high_priority_count': 0,
'medium_priority_count': 0,
'low_priority_count': 0,
'critical_priority_count': 0,
'by_category': {},
'by_status': {},
'total_potential_impact': 0
}
# Calculate metrics
total = len(insights)
actionable = sum(1 for i in insights if i.actionable)
avg_confidence = sum(i.confidence for i in insights) / total if total > 0 else 0
# Priority counts
priority_counts = {
'high': sum(1 for i in insights if i.priority == 'high'),
'medium': sum(1 for i in insights if i.priority == 'medium'),
'low': sum(1 for i in insights if i.priority == 'low'),
'critical': sum(1 for i in insights if i.priority == 'critical')
}
# By category
by_category = {}
for insight in insights:
by_category[insight.category] = by_category.get(insight.category, 0) + 1
# By status
by_status = {}
for insight in insights:
by_status[insight.status] = by_status.get(insight.status, 0) + 1
# Total potential impact
total_impact = sum(
float(i.impact_value) for i in insights
if i.impact_value and i.impact_type in ['cost_savings', 'revenue_increase']
)
return {
'total_insights': total,
'actionable_insights': actionable,
'average_confidence': round(avg_confidence, 1),
'high_priority_count': priority_counts['high'],
'medium_priority_count': priority_counts['medium'],
'low_priority_count': priority_counts['low'],
'critical_priority_count': priority_counts['critical'],
'by_category': by_category,
'by_status': by_status,
'total_potential_impact': round(total_impact, 2)
}
async def expire_old_insights(self) -> int:
"""Mark expired insights as expired."""
query = select(AIInsight).where(
and_(
AIInsight.expired_at.isnot(None),
AIInsight.expired_at <= datetime.utcnow(),
AIInsight.status.notin_(['applied', 'dismissed', 'expired'])
)
)
result = await self.session.execute(query)
insights = result.scalars().all()
count = 0
for insight in insights:
insight.status = 'expired'
count += 1
await self.session.flush()
return count

View File

@@ -0,0 +1,27 @@
"""Pydantic schemas for AI Insights Service."""
from app.schemas.insight import (
AIInsightBase,
AIInsightCreate,
AIInsightUpdate,
AIInsightResponse,
AIInsightList,
InsightMetrics,
InsightFilters
)
from app.schemas.feedback import (
InsightFeedbackCreate,
InsightFeedbackResponse
)
__all__ = [
"AIInsightBase",
"AIInsightCreate",
"AIInsightUpdate",
"AIInsightResponse",
"AIInsightList",
"InsightMetrics",
"InsightFilters",
"InsightFeedbackCreate",
"InsightFeedbackResponse",
]

View File

@@ -0,0 +1,37 @@
"""Pydantic schemas for Insight Feedback."""
from pydantic import BaseModel, Field, ConfigDict
from typing import Optional, Dict, Any
from datetime import datetime
from uuid import UUID
from decimal import Decimal
class InsightFeedbackBase(BaseModel):
"""Base schema for Insight Feedback."""
action_taken: str
result_data: Optional[Dict[str, Any]] = Field(default_factory=dict)
success: bool
error_message: Optional[str] = None
expected_impact_value: Optional[Decimal] = None
actual_impact_value: Optional[Decimal] = None
variance_percentage: Optional[Decimal] = None
class InsightFeedbackCreate(InsightFeedbackBase):
"""Schema for creating feedback."""
insight_id: UUID
applied_by: Optional[str] = "system"
class InsightFeedbackResponse(InsightFeedbackBase):
"""Schema for feedback response."""
id: UUID
insight_id: UUID
applied_by: str
created_at: datetime
model_config = ConfigDict(from_attributes=True)

View File

@@ -0,0 +1,93 @@
"""Pydantic schemas for AI Insights."""
from pydantic import BaseModel, Field, ConfigDict
from typing import Optional, Dict, Any, List
from datetime import datetime
from uuid import UUID
from decimal import Decimal
class AIInsightBase(BaseModel):
"""Base schema for AI Insight."""
type: str = Field(..., description="optimization, alert, prediction, recommendation, insight, anomaly")
priority: str = Field(..., description="low, medium, high, critical")
category: str = Field(..., description="forecasting, inventory, production, procurement, customer, etc.")
title: str = Field(..., max_length=255)
description: str
impact_type: Optional[str] = Field(None, description="cost_savings, revenue_increase, waste_reduction, etc.")
impact_value: Optional[Decimal] = None
impact_unit: Optional[str] = Field(None, description="euros, percentage, hours, units, etc.")
confidence: int = Field(..., ge=0, le=100, description="Confidence score 0-100")
metrics_json: Optional[Dict[str, Any]] = Field(default_factory=dict)
actionable: bool = True
recommendation_actions: Optional[List[Dict[str, str]]] = Field(default_factory=list)
source_service: Optional[str] = None
source_data_id: Optional[str] = None
class AIInsightCreate(AIInsightBase):
"""Schema for creating a new AI Insight."""
tenant_id: UUID
class AIInsightUpdate(BaseModel):
"""Schema for updating an AI Insight."""
status: Optional[str] = Field(None, description="new, acknowledged, in_progress, applied, dismissed, expired")
applied_at: Optional[datetime] = None
model_config = ConfigDict(from_attributes=True)
class AIInsightResponse(AIInsightBase):
"""Schema for AI Insight response."""
id: UUID
tenant_id: UUID
status: str
created_at: datetime
updated_at: datetime
applied_at: Optional[datetime] = None
expired_at: Optional[datetime] = None
model_config = ConfigDict(from_attributes=True)
class AIInsightList(BaseModel):
"""Paginated list of AI Insights."""
items: List[AIInsightResponse]
total: int
page: int
page_size: int
total_pages: int
class InsightMetrics(BaseModel):
"""Aggregate metrics for insights."""
total_insights: int
actionable_insights: int
average_confidence: float
high_priority_count: int
medium_priority_count: int
low_priority_count: int
critical_priority_count: int
by_category: Dict[str, int]
by_status: Dict[str, int]
total_potential_impact: Optional[Decimal] = None
class InsightFilters(BaseModel):
"""Filters for querying insights."""
category: Optional[str] = None
priority: Optional[str] = None
status: Optional[str] = None
actionable_only: bool = False
min_confidence: int = 0
source_service: Optional[str] = None
from_date: Optional[datetime] = None
to_date: Optional[datetime] = None

View File

@@ -0,0 +1,229 @@
"""Confidence scoring calculator for AI Insights."""
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
import math
class ConfidenceCalculator:
"""
Calculate unified confidence scores across different insight types.
Confidence is calculated based on multiple factors:
- Data quality (completeness, consistency)
- Model performance (historical accuracy)
- Sample size (statistical significance)
- Recency (how recent is the data)
- Historical accuracy (past insight performance)
"""
# Weights for different factors
WEIGHTS = {
'data_quality': 0.25,
'model_performance': 0.30,
'sample_size': 0.20,
'recency': 0.15,
'historical_accuracy': 0.10
}
def calculate_confidence(
self,
data_quality_score: Optional[float] = None,
model_performance_score: Optional[float] = None,
sample_size: Optional[int] = None,
data_date: Optional[datetime] = None,
historical_accuracy: Optional[float] = None,
insight_type: Optional[str] = None
) -> int:
"""
Calculate overall confidence score (0-100).
Args:
data_quality_score: 0-1 score for data quality
model_performance_score: 0-1 score from model metrics (e.g., 1-MAPE)
sample_size: Number of data points used
data_date: Date of most recent data
historical_accuracy: 0-1 score from past insight performance
insight_type: Type of insight for specific adjustments
Returns:
int: Confidence score 0-100
"""
scores = {}
# Data Quality Score (0-100)
if data_quality_score is not None:
scores['data_quality'] = min(100, data_quality_score * 100)
else:
scores['data_quality'] = 70 # Default
# Model Performance Score (0-100)
if model_performance_score is not None:
scores['model_performance'] = min(100, model_performance_score * 100)
else:
scores['model_performance'] = 75 # Default
# Sample Size Score (0-100)
if sample_size is not None:
scores['sample_size'] = self._score_sample_size(sample_size)
else:
scores['sample_size'] = 60 # Default
# Recency Score (0-100)
if data_date is not None:
scores['recency'] = self._score_recency(data_date)
else:
scores['recency'] = 80 # Default
# Historical Accuracy Score (0-100)
if historical_accuracy is not None:
scores['historical_accuracy'] = min(100, historical_accuracy * 100)
else:
scores['historical_accuracy'] = 65 # Default
# Calculate weighted average
confidence = sum(
scores[factor] * self.WEIGHTS[factor]
for factor in scores
)
# Apply insight-type specific adjustments
confidence = self._apply_type_adjustments(confidence, insight_type)
return int(round(confidence))
def _score_sample_size(self, sample_size: int) -> float:
"""
Score based on sample size using logarithmic scale.
Args:
sample_size: Number of data points
Returns:
float: Score 0-100
"""
if sample_size <= 10:
return 30.0
elif sample_size <= 30:
return 50.0
elif sample_size <= 100:
return 70.0
elif sample_size <= 365:
return 85.0
else:
# Logarithmic scaling for larger samples
return min(100.0, 85 + (math.log10(sample_size) - math.log10(365)) * 10)
def _score_recency(self, data_date: datetime) -> float:
"""
Score based on data recency.
Args:
data_date: Date of most recent data
Returns:
float: Score 0-100
"""
days_old = (datetime.utcnow() - data_date).days
if days_old == 0:
return 100.0
elif days_old <= 1:
return 95.0
elif days_old <= 3:
return 90.0
elif days_old <= 7:
return 80.0
elif days_old <= 14:
return 70.0
elif days_old <= 30:
return 60.0
elif days_old <= 60:
return 45.0
else:
# Exponential decay for older data
return max(20.0, 60 * math.exp(-days_old / 60))
def _apply_type_adjustments(self, base_confidence: float, insight_type: Optional[str]) -> float:
"""
Apply insight-type specific confidence adjustments.
Args:
base_confidence: Base confidence score
insight_type: Type of insight
Returns:
float: Adjusted confidence
"""
if not insight_type:
return base_confidence
adjustments = {
'prediction': -5, # Predictions inherently less certain
'optimization': +2, # Optimizations based on solid math
'alert': +3, # Alerts based on thresholds
'recommendation': 0, # No adjustment
'insight': +2, # Insights from data analysis
'anomaly': -3 # Anomalies are uncertain
}
adjustment = adjustments.get(insight_type, 0)
return max(0, min(100, base_confidence + adjustment))
def calculate_forecast_confidence(
self,
model_mape: float,
forecast_horizon_days: int,
data_points: int,
last_data_date: datetime
) -> int:
"""
Specialized confidence calculation for forecasting insights.
Args:
model_mape: Model MAPE (Mean Absolute Percentage Error)
forecast_horizon_days: How many days ahead
data_points: Number of historical data points
last_data_date: Date of last training data
Returns:
int: Confidence score 0-100
"""
# Model performance: 1 - (MAPE/100) capped at 1
model_score = max(0, 1 - (model_mape / 100))
# Horizon penalty: Longer horizons = less confidence
horizon_factor = max(0.5, 1 - (forecast_horizon_days / 30))
return self.calculate_confidence(
data_quality_score=0.9, # Assume good quality
model_performance_score=model_score * horizon_factor,
sample_size=data_points,
data_date=last_data_date,
insight_type='prediction'
)
def calculate_optimization_confidence(
self,
calculation_accuracy: float,
data_completeness: float,
sample_size: int
) -> int:
"""
Confidence for optimization recommendations.
Args:
calculation_accuracy: 0-1 score for optimization calculation reliability
data_completeness: 0-1 score for data completeness
sample_size: Number of data points
Returns:
int: Confidence score 0-100
"""
return self.calculate_confidence(
data_quality_score=data_completeness,
model_performance_score=calculation_accuracy,
sample_size=sample_size,
data_date=datetime.utcnow(),
insight_type='optimization'
)