Improve AI logic
This commit is contained in:
3
services/ai_insights/app/__init__.py
Normal file
3
services/ai_insights/app/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""AI Insights Service."""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
1
services/ai_insights/app/api/__init__.py
Normal file
1
services/ai_insights/app/api/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""API modules for AI Insights Service."""
|
||||
323
services/ai_insights/app/api/insights.py
Normal file
323
services/ai_insights/app/api/insights.py
Normal file
@@ -0,0 +1,323 @@
|
||||
"""API endpoints for AI Insights."""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
from datetime import datetime
|
||||
import math
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.repositories.insight_repository import InsightRepository
|
||||
from app.repositories.feedback_repository import FeedbackRepository
|
||||
from app.schemas.insight import (
|
||||
AIInsightCreate,
|
||||
AIInsightUpdate,
|
||||
AIInsightResponse,
|
||||
AIInsightList,
|
||||
InsightMetrics,
|
||||
InsightFilters
|
||||
)
|
||||
from app.schemas.feedback import InsightFeedbackCreate, InsightFeedbackResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights", response_model=AIInsightResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_insight(
|
||||
tenant_id: UUID,
|
||||
insight_data: AIInsightCreate,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Create a new AI Insight."""
|
||||
# Ensure tenant_id matches
|
||||
if insight_data.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Tenant ID mismatch"
|
||||
)
|
||||
|
||||
repo = InsightRepository(db)
|
||||
insight = await repo.create(insight_data)
|
||||
await db.commit()
|
||||
|
||||
return insight
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights", response_model=AIInsightList)
|
||||
async def get_insights(
|
||||
tenant_id: UUID,
|
||||
category: Optional[str] = Query(None),
|
||||
priority: Optional[str] = Query(None),
|
||||
status: Optional[str] = Query(None),
|
||||
actionable_only: bool = Query(False),
|
||||
min_confidence: int = Query(0, ge=0, le=100),
|
||||
source_service: Optional[str] = Query(None),
|
||||
from_date: Optional[datetime] = Query(None),
|
||||
to_date: Optional[datetime] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(20, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get insights for a tenant with filters and pagination."""
|
||||
filters = InsightFilters(
|
||||
category=category,
|
||||
priority=priority,
|
||||
status=status,
|
||||
actionable_only=actionable_only,
|
||||
min_confidence=min_confidence,
|
||||
source_service=source_service,
|
||||
from_date=from_date,
|
||||
to_date=to_date
|
||||
)
|
||||
|
||||
repo = InsightRepository(db)
|
||||
skip = (page - 1) * page_size
|
||||
|
||||
insights, total = await repo.get_by_tenant(tenant_id, filters, skip, page_size)
|
||||
|
||||
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
||||
|
||||
return AIInsightList(
|
||||
items=insights,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/orchestration-ready")
|
||||
async def get_orchestration_ready_insights(
|
||||
tenant_id: UUID,
|
||||
target_date: datetime = Query(...),
|
||||
min_confidence: int = Query(70, ge=0, le=100),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get actionable insights for orchestration workflow."""
|
||||
repo = InsightRepository(db)
|
||||
categorized_insights = await repo.get_orchestration_ready_insights(
|
||||
tenant_id, target_date, min_confidence
|
||||
)
|
||||
|
||||
return categorized_insights
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
|
||||
async def get_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get a single insight by ID."""
|
||||
repo = InsightRepository(db)
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
return insight
|
||||
|
||||
|
||||
@router.patch("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
|
||||
async def update_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
update_data: AIInsightUpdate,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Update an insight (typically status changes)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
updated_insight = await repo.update(insight_id, update_data)
|
||||
await db.commit()
|
||||
|
||||
return updated_insight
|
||||
|
||||
|
||||
@router.delete("/tenants/{tenant_id}/insights/{insight_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def dismiss_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Dismiss an insight (soft delete)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
await repo.delete(insight_id)
|
||||
await db.commit()
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/metrics/summary", response_model=InsightMetrics)
|
||||
async def get_insights_metrics(
|
||||
tenant_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get aggregate metrics for insights."""
|
||||
repo = InsightRepository(db)
|
||||
metrics = await repo.get_metrics(tenant_id)
|
||||
|
||||
return InsightMetrics(**metrics)
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights/{insight_id}/apply")
|
||||
async def apply_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Apply an insight recommendation (trigger action)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
if not insight.actionable:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="This insight is not actionable"
|
||||
)
|
||||
|
||||
# Update status to in_progress
|
||||
update_data = AIInsightUpdate(status='in_progress', applied_at=datetime.utcnow())
|
||||
await repo.update(insight_id, update_data)
|
||||
await db.commit()
|
||||
|
||||
# TODO: Route to appropriate service based on recommendation_actions
|
||||
# This will be implemented when service clients are added
|
||||
|
||||
return {
|
||||
"message": "Insight application initiated",
|
||||
"insight_id": str(insight_id),
|
||||
"actions": insight.recommendation_actions
|
||||
}
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights/{insight_id}/feedback", response_model=InsightFeedbackResponse)
|
||||
async def record_feedback(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
feedback_data: InsightFeedbackCreate,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Record feedback for an applied insight."""
|
||||
insight_repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await insight_repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
# Ensure feedback is for this insight
|
||||
if feedback_data.insight_id != insight_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Insight ID mismatch"
|
||||
)
|
||||
|
||||
feedback_repo = FeedbackRepository(db)
|
||||
feedback = await feedback_repo.create(feedback_data)
|
||||
|
||||
# Update insight status based on feedback
|
||||
new_status = 'applied' if feedback.success else 'dismissed'
|
||||
update_data = AIInsightUpdate(status=new_status)
|
||||
await insight_repo.update(insight_id, update_data)
|
||||
|
||||
await db.commit()
|
||||
|
||||
return feedback
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights/refresh")
|
||||
async def refresh_insights(
|
||||
tenant_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Trigger insight refresh (expire old, generate new)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Expire old insights
|
||||
expired_count = await repo.expire_old_insights()
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"message": "Insights refreshed",
|
||||
"expired_count": expired_count
|
||||
}
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/export")
|
||||
async def export_insights(
|
||||
tenant_id: UUID,
|
||||
format: str = Query("json", regex="^(json|csv)$"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Export insights to JSON or CSV."""
|
||||
repo = InsightRepository(db)
|
||||
insights, _ = await repo.get_by_tenant(tenant_id, filters=None, skip=0, limit=1000)
|
||||
|
||||
if format == "json":
|
||||
return {"insights": [AIInsightResponse.model_validate(i) for i in insights]}
|
||||
|
||||
# CSV export would be implemented here
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail="CSV export not yet implemented"
|
||||
)
|
||||
77
services/ai_insights/app/core/config.py
Normal file
77
services/ai_insights/app/core/config.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Configuration settings for AI Insights Service."""
|
||||
|
||||
from shared.config.base import BaseServiceSettings
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Settings(BaseServiceSettings):
|
||||
"""Application settings."""
|
||||
|
||||
# Service Info
|
||||
SERVICE_NAME: str = "ai-insights"
|
||||
SERVICE_VERSION: str = "1.0.0"
|
||||
API_V1_PREFIX: str = "/api/v1"
|
||||
|
||||
# Database configuration (secure approach - build from components)
|
||||
@property
|
||||
def DATABASE_URL(self) -> str:
|
||||
"""Build database URL from secure components"""
|
||||
# Try complete URL first (for backward compatibility)
|
||||
complete_url = os.getenv("AI_INSIGHTS_DATABASE_URL")
|
||||
if complete_url:
|
||||
return complete_url
|
||||
|
||||
# Also check for generic DATABASE_URL (for migration compatibility)
|
||||
generic_url = os.getenv("DATABASE_URL")
|
||||
if generic_url:
|
||||
return generic_url
|
||||
|
||||
# Build from components (secure approach)
|
||||
user = os.getenv("AI_INSIGHTS_DB_USER", "ai_insights_user")
|
||||
password = os.getenv("AI_INSIGHTS_DB_PASSWORD", "ai_insights_pass123")
|
||||
host = os.getenv("AI_INSIGHTS_DB_HOST", "localhost")
|
||||
port = os.getenv("AI_INSIGHTS_DB_PORT", "5432")
|
||||
name = os.getenv("AI_INSIGHTS_DB_NAME", "ai_insights_db")
|
||||
|
||||
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
|
||||
|
||||
DB_POOL_SIZE: int = 20
|
||||
DB_MAX_OVERFLOW: int = 10
|
||||
|
||||
# Redis (inherited from BaseServiceSettings but can override)
|
||||
REDIS_CACHE_TTL: int = 900 # 15 minutes
|
||||
REDIS_DB: int = 3 # Dedicated Redis database for AI Insights
|
||||
|
||||
# Service URLs
|
||||
FORECASTING_SERVICE_URL: str = "http://forecasting-service:8000"
|
||||
PROCUREMENT_SERVICE_URL: str = "http://procurement-service:8000"
|
||||
PRODUCTION_SERVICE_URL: str = "http://production-service:8000"
|
||||
SALES_SERVICE_URL: str = "http://sales-service:8000"
|
||||
INVENTORY_SERVICE_URL: str = "http://inventory-service:8000"
|
||||
|
||||
# Circuit Breaker Settings
|
||||
CIRCUIT_BREAKER_FAILURE_THRESHOLD: int = 5
|
||||
CIRCUIT_BREAKER_TIMEOUT: int = 60
|
||||
|
||||
# Insight Settings
|
||||
MIN_CONFIDENCE_THRESHOLD: int = 60
|
||||
DEFAULT_INSIGHT_TTL_DAYS: int = 7
|
||||
MAX_INSIGHTS_PER_REQUEST: int = 100
|
||||
|
||||
# Feedback Settings
|
||||
FEEDBACK_PROCESSING_ENABLED: bool = True
|
||||
FEEDBACK_PROCESSING_SCHEDULE: str = "0 6 * * *" # Daily at 6 AM
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL: str = "INFO"
|
||||
|
||||
# CORS
|
||||
ALLOWED_ORIGINS: list[str] = ["http://localhost:3000", "http://localhost:5173"]
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = True
|
||||
|
||||
|
||||
settings = Settings()
|
||||
58
services/ai_insights/app/core/database.py
Normal file
58
services/ai_insights/app/core/database.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Database configuration and session management."""
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
||||
from sqlalchemy.orm import declarative_base
|
||||
from sqlalchemy.pool import NullPool
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
# Create async engine
|
||||
engine = create_async_engine(
|
||||
settings.DATABASE_URL,
|
||||
pool_size=settings.DB_POOL_SIZE,
|
||||
max_overflow=settings.DB_MAX_OVERFLOW,
|
||||
echo=False,
|
||||
future=True,
|
||||
)
|
||||
|
||||
# Create async session factory
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False,
|
||||
autocommit=False,
|
||||
autoflush=False,
|
||||
)
|
||||
|
||||
# Create declarative base
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
"""
|
||||
Dependency for getting async database sessions.
|
||||
|
||||
Yields:
|
||||
AsyncSession: Database session
|
||||
"""
|
||||
async with AsyncSessionLocal() as session:
|
||||
try:
|
||||
yield session
|
||||
await session.commit()
|
||||
except Exception:
|
||||
await session.rollback()
|
||||
raise
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""Initialize database tables."""
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
|
||||
async def close_db():
|
||||
"""Close database connections."""
|
||||
await engine.dispose()
|
||||
320
services/ai_insights/app/impact/impact_estimator.py
Normal file
320
services/ai_insights/app/impact/impact_estimator.py
Normal file
@@ -0,0 +1,320 @@
|
||||
"""Impact estimation for AI Insights."""
|
||||
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from decimal import Decimal
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
class ImpactEstimator:
|
||||
"""
|
||||
Estimate potential impact of recommendations.
|
||||
|
||||
Calculates expected business value in terms of:
|
||||
- Cost savings (euros)
|
||||
- Revenue increase (euros)
|
||||
- Waste reduction (euros or percentage)
|
||||
- Efficiency gains (hours or percentage)
|
||||
- Quality improvements (units or percentage)
|
||||
"""
|
||||
|
||||
def estimate_procurement_savings(
|
||||
self,
|
||||
current_price: Decimal,
|
||||
predicted_price: Decimal,
|
||||
order_quantity: Decimal,
|
||||
timeframe_days: int = 30
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate savings from opportunistic buying.
|
||||
|
||||
Args:
|
||||
current_price: Current unit price
|
||||
predicted_price: Predicted future price
|
||||
order_quantity: Quantity to order
|
||||
timeframe_days: Time horizon for prediction
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
savings_per_unit = predicted_price - current_price
|
||||
|
||||
if savings_per_unit > 0:
|
||||
total_savings = savings_per_unit * order_quantity
|
||||
return (
|
||||
round(total_savings, 2),
|
||||
'euros',
|
||||
'cost_savings'
|
||||
)
|
||||
return (Decimal('0.0'), 'euros', 'cost_savings')
|
||||
|
||||
def estimate_waste_reduction_savings(
|
||||
self,
|
||||
current_waste_rate: float,
|
||||
optimized_waste_rate: float,
|
||||
monthly_volume: Decimal,
|
||||
avg_cost_per_unit: Decimal
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate savings from waste reduction.
|
||||
|
||||
Args:
|
||||
current_waste_rate: Current waste rate (0-1)
|
||||
optimized_waste_rate: Optimized waste rate (0-1)
|
||||
monthly_volume: Monthly volume
|
||||
avg_cost_per_unit: Average cost per unit
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
waste_reduction_rate = current_waste_rate - optimized_waste_rate
|
||||
units_saved = monthly_volume * Decimal(str(waste_reduction_rate))
|
||||
savings = units_saved * avg_cost_per_unit
|
||||
|
||||
return (
|
||||
round(savings, 2),
|
||||
'euros/month',
|
||||
'waste_reduction'
|
||||
)
|
||||
|
||||
def estimate_forecast_improvement_value(
|
||||
self,
|
||||
current_mape: float,
|
||||
improved_mape: float,
|
||||
avg_monthly_revenue: Decimal
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from forecast accuracy improvement.
|
||||
|
||||
Better forecasts reduce:
|
||||
- Stockouts (lost sales)
|
||||
- Overproduction (waste)
|
||||
- Emergency orders (premium costs)
|
||||
|
||||
Args:
|
||||
current_mape: Current forecast MAPE
|
||||
improved_mape: Improved forecast MAPE
|
||||
avg_monthly_revenue: Average monthly revenue
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
# Rule of thumb: 1% MAPE improvement = 0.5% revenue impact
|
||||
mape_improvement = current_mape - improved_mape
|
||||
revenue_impact_pct = mape_improvement * 0.5 / 100
|
||||
|
||||
revenue_increase = avg_monthly_revenue * Decimal(str(revenue_impact_pct))
|
||||
|
||||
return (
|
||||
round(revenue_increase, 2),
|
||||
'euros/month',
|
||||
'revenue_increase'
|
||||
)
|
||||
|
||||
def estimate_production_efficiency_gain(
|
||||
self,
|
||||
time_saved_minutes: int,
|
||||
batches_per_month: int,
|
||||
labor_cost_per_hour: Decimal = Decimal('15.0')
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from production efficiency improvements.
|
||||
|
||||
Args:
|
||||
time_saved_minutes: Minutes saved per batch
|
||||
batches_per_month: Number of batches per month
|
||||
labor_cost_per_hour: Labor cost per hour
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
hours_saved_per_month = (time_saved_minutes * batches_per_month) / 60
|
||||
cost_savings = Decimal(str(hours_saved_per_month)) * labor_cost_per_hour
|
||||
|
||||
return (
|
||||
round(cost_savings, 2),
|
||||
'euros/month',
|
||||
'efficiency_gain'
|
||||
)
|
||||
|
||||
def estimate_safety_stock_optimization(
|
||||
self,
|
||||
current_safety_stock: Decimal,
|
||||
optimal_safety_stock: Decimal,
|
||||
holding_cost_per_unit_per_day: Decimal,
|
||||
stockout_cost_reduction: Decimal = Decimal('0.0')
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate impact of safety stock optimization.
|
||||
|
||||
Args:
|
||||
current_safety_stock: Current safety stock level
|
||||
optimal_safety_stock: Optimal safety stock level
|
||||
holding_cost_per_unit_per_day: Daily holding cost
|
||||
stockout_cost_reduction: Reduction in stockout costs
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
stock_reduction = current_safety_stock - optimal_safety_stock
|
||||
|
||||
if stock_reduction > 0:
|
||||
# Savings from reduced holding costs
|
||||
daily_savings = stock_reduction * holding_cost_per_unit_per_day
|
||||
monthly_savings = daily_savings * 30
|
||||
total_savings = monthly_savings + stockout_cost_reduction
|
||||
|
||||
return (
|
||||
round(total_savings, 2),
|
||||
'euros/month',
|
||||
'cost_savings'
|
||||
)
|
||||
elif stock_reduction < 0:
|
||||
# Cost increase but reduces stockouts
|
||||
daily_cost = abs(stock_reduction) * holding_cost_per_unit_per_day
|
||||
monthly_cost = daily_cost * 30
|
||||
net_savings = stockout_cost_reduction - monthly_cost
|
||||
|
||||
if net_savings > 0:
|
||||
return (
|
||||
round(net_savings, 2),
|
||||
'euros/month',
|
||||
'cost_savings'
|
||||
)
|
||||
|
||||
return (Decimal('0.0'), 'euros/month', 'cost_savings')
|
||||
|
||||
def estimate_supplier_switch_savings(
|
||||
self,
|
||||
current_supplier_price: Decimal,
|
||||
alternative_supplier_price: Decimal,
|
||||
monthly_order_quantity: Decimal,
|
||||
quality_difference_score: float = 0.0 # -1 to 1
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate savings from switching suppliers.
|
||||
|
||||
Args:
|
||||
current_supplier_price: Current supplier unit price
|
||||
alternative_supplier_price: Alternative supplier unit price
|
||||
monthly_order_quantity: Monthly order quantity
|
||||
quality_difference_score: Quality difference (-1=worse, 0=same, 1=better)
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
price_savings = (current_supplier_price - alternative_supplier_price) * monthly_order_quantity
|
||||
|
||||
# Adjust for quality difference
|
||||
# If quality is worse, reduce estimated savings
|
||||
quality_adjustment = 1 + (quality_difference_score * 0.1) # ±10% max adjustment
|
||||
adjusted_savings = price_savings * Decimal(str(quality_adjustment))
|
||||
|
||||
return (
|
||||
round(adjusted_savings, 2),
|
||||
'euros/month',
|
||||
'cost_savings'
|
||||
)
|
||||
|
||||
def estimate_yield_improvement_value(
|
||||
self,
|
||||
current_yield_rate: float,
|
||||
predicted_yield_rate: float,
|
||||
production_volume: Decimal,
|
||||
product_price: Decimal
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from production yield improvements.
|
||||
|
||||
Args:
|
||||
current_yield_rate: Current yield rate (0-1)
|
||||
predicted_yield_rate: Predicted yield rate (0-1)
|
||||
production_volume: Monthly production volume
|
||||
product_price: Product selling price
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
yield_improvement = predicted_yield_rate - current_yield_rate
|
||||
|
||||
if yield_improvement > 0:
|
||||
additional_units = production_volume * Decimal(str(yield_improvement))
|
||||
revenue_increase = additional_units * product_price
|
||||
|
||||
return (
|
||||
round(revenue_increase, 2),
|
||||
'euros/month',
|
||||
'revenue_increase'
|
||||
)
|
||||
|
||||
return (Decimal('0.0'), 'euros/month', 'revenue_increase')
|
||||
|
||||
def estimate_demand_pattern_value(
|
||||
self,
|
||||
pattern_strength: float, # 0-1
|
||||
potential_revenue_increase: Decimal,
|
||||
implementation_cost: Decimal = Decimal('0.0')
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from acting on demand patterns.
|
||||
|
||||
Args:
|
||||
pattern_strength: Strength of detected pattern (0-1)
|
||||
potential_revenue_increase: Potential monthly revenue increase
|
||||
implementation_cost: One-time implementation cost
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
# Discount by pattern strength (confidence)
|
||||
expected_value = potential_revenue_increase * Decimal(str(pattern_strength))
|
||||
|
||||
# Amortize implementation cost over 6 months
|
||||
monthly_cost = implementation_cost / 6
|
||||
|
||||
net_value = expected_value - monthly_cost
|
||||
|
||||
return (
|
||||
round(max(Decimal('0.0'), net_value), 2),
|
||||
'euros/month',
|
||||
'revenue_increase'
|
||||
)
|
||||
|
||||
def estimate_composite_impact(
|
||||
self,
|
||||
impacts: list[Dict[str, Any]]
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Combine multiple impact estimations.
|
||||
|
||||
Args:
|
||||
impacts: List of impact dicts with 'value', 'unit', 'type'
|
||||
|
||||
Returns:
|
||||
tuple: (total_impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
total_savings = Decimal('0.0')
|
||||
total_revenue = Decimal('0.0')
|
||||
|
||||
for impact in impacts:
|
||||
value = Decimal(str(impact['value']))
|
||||
impact_type = impact['type']
|
||||
|
||||
if impact_type == 'cost_savings':
|
||||
total_savings += value
|
||||
elif impact_type == 'revenue_increase':
|
||||
total_revenue += value
|
||||
|
||||
# Combine both types
|
||||
total_impact = total_savings + total_revenue
|
||||
|
||||
if total_impact > 0:
|
||||
# Determine primary type
|
||||
primary_type = 'cost_savings' if total_savings > total_revenue else 'revenue_increase'
|
||||
|
||||
return (
|
||||
round(total_impact, 2),
|
||||
'euros/month',
|
||||
primary_type
|
||||
)
|
||||
|
||||
return (Decimal('0.0'), 'euros/month', 'cost_savings')
|
||||
93
services/ai_insights/app/main.py
Normal file
93
services/ai_insights/app/main.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Main FastAPI application for AI Insights Service."""
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
import structlog
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import init_db, close_db
|
||||
from app.api import insights
|
||||
|
||||
# Configure structured logging
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.JSONRenderer()
|
||||
]
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Lifespan event handler for startup and shutdown."""
|
||||
# Startup
|
||||
logger.info("Starting AI Insights Service", service=settings.SERVICE_NAME, version=settings.SERVICE_VERSION)
|
||||
await init_db()
|
||||
logger.info("Database initialized")
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down AI Insights Service")
|
||||
await close_db()
|
||||
logger.info("Database connections closed")
|
||||
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="AI Insights Service",
|
||||
description="Intelligent insights and recommendations for bakery operations",
|
||||
version=settings.SERVICE_VERSION,
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.ALLOWED_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(
|
||||
insights.router,
|
||||
prefix=settings.API_V1_PREFIX,
|
||||
tags=["insights"]
|
||||
)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint."""
|
||||
return {
|
||||
"service": settings.SERVICE_NAME,
|
||||
"version": settings.SERVICE_VERSION,
|
||||
"status": "running"
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": settings.SERVICE_NAME,
|
||||
"version": settings.SERVICE_VERSION
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"app.main:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=True,
|
||||
log_level=settings.LOG_LEVEL.lower()
|
||||
)
|
||||
672
services/ai_insights/app/ml/feedback_learning_system.py
Normal file
672
services/ai_insights/app/ml/feedback_learning_system.py
Normal file
@@ -0,0 +1,672 @@
|
||||
"""
|
||||
Feedback Loop & Learning System
|
||||
Enables continuous improvement through outcome tracking and model retraining
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
from scipy import stats
|
||||
from collections import defaultdict
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class FeedbackLearningSystem:
|
||||
"""
|
||||
Manages feedback collection, model performance tracking, and retraining triggers.
|
||||
|
||||
Key Responsibilities:
|
||||
1. Aggregate feedback from applied insights
|
||||
2. Calculate model performance metrics (accuracy, precision, recall)
|
||||
3. Detect performance degradation
|
||||
4. Trigger automatic retraining when needed
|
||||
5. Calibrate confidence scores based on actual accuracy
|
||||
6. Generate learning insights for model improvement
|
||||
|
||||
Workflow:
|
||||
- Feedback continuously recorded via AIInsightsClient
|
||||
- Periodic performance analysis (daily/weekly)
|
||||
- Automatic alerts when performance degrades
|
||||
- Retraining recommendations with priority
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
performance_threshold: float = 0.85, # Minimum acceptable accuracy
|
||||
degradation_threshold: float = 0.10, # 10% drop triggers alert
|
||||
min_feedback_samples: int = 30, # Minimum samples for analysis
|
||||
retraining_window_days: int = 90 # Consider last 90 days
|
||||
):
|
||||
self.performance_threshold = performance_threshold
|
||||
self.degradation_threshold = degradation_threshold
|
||||
self.min_feedback_samples = min_feedback_samples
|
||||
self.retraining_window_days = retraining_window_days
|
||||
|
||||
async def analyze_model_performance(
|
||||
self,
|
||||
model_name: str,
|
||||
feedback_data: pd.DataFrame,
|
||||
baseline_performance: Optional[Dict[str, float]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze model performance based on feedback data.
|
||||
|
||||
Args:
|
||||
model_name: Name of the model (e.g., 'hybrid_forecaster', 'yield_predictor')
|
||||
feedback_data: DataFrame with columns:
|
||||
- insight_id
|
||||
- applied_at
|
||||
- outcome_date
|
||||
- predicted_value
|
||||
- actual_value
|
||||
- error
|
||||
- error_pct
|
||||
- accuracy
|
||||
baseline_performance: Optional baseline metrics for comparison
|
||||
|
||||
Returns:
|
||||
Performance analysis with metrics, trends, and recommendations
|
||||
"""
|
||||
logger.info(
|
||||
"Analyzing model performance",
|
||||
model_name=model_name,
|
||||
feedback_samples=len(feedback_data)
|
||||
)
|
||||
|
||||
if len(feedback_data) < self.min_feedback_samples:
|
||||
return self._insufficient_feedback_response(
|
||||
model_name, len(feedback_data), self.min_feedback_samples
|
||||
)
|
||||
|
||||
# Step 1: Calculate current performance metrics
|
||||
current_metrics = self._calculate_performance_metrics(feedback_data)
|
||||
|
||||
# Step 2: Analyze performance trend over time
|
||||
trend_analysis = self._analyze_performance_trend(feedback_data)
|
||||
|
||||
# Step 3: Detect performance degradation
|
||||
degradation_detected = self._detect_performance_degradation(
|
||||
current_metrics, baseline_performance, trend_analysis
|
||||
)
|
||||
|
||||
# Step 4: Generate retraining recommendation
|
||||
retraining_recommendation = self._generate_retraining_recommendation(
|
||||
model_name, current_metrics, degradation_detected, trend_analysis
|
||||
)
|
||||
|
||||
# Step 5: Identify error patterns
|
||||
error_patterns = self._identify_error_patterns(feedback_data)
|
||||
|
||||
# Step 6: Calculate confidence calibration
|
||||
confidence_calibration = self._calculate_confidence_calibration(feedback_data)
|
||||
|
||||
logger.info(
|
||||
"Model performance analysis complete",
|
||||
model_name=model_name,
|
||||
current_accuracy=current_metrics['accuracy'],
|
||||
degradation_detected=degradation_detected['detected'],
|
||||
retraining_recommended=retraining_recommendation['recommended']
|
||||
)
|
||||
|
||||
return {
|
||||
'model_name': model_name,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'feedback_samples': len(feedback_data),
|
||||
'date_range': {
|
||||
'start': feedback_data['outcome_date'].min().isoformat(),
|
||||
'end': feedback_data['outcome_date'].max().isoformat()
|
||||
},
|
||||
'current_performance': current_metrics,
|
||||
'baseline_performance': baseline_performance,
|
||||
'trend_analysis': trend_analysis,
|
||||
'degradation_detected': degradation_detected,
|
||||
'retraining_recommendation': retraining_recommendation,
|
||||
'error_patterns': error_patterns,
|
||||
'confidence_calibration': confidence_calibration
|
||||
}
|
||||
|
||||
def _insufficient_feedback_response(
|
||||
self, model_name: str, current_samples: int, required_samples: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Return response when insufficient feedback data."""
|
||||
return {
|
||||
'model_name': model_name,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'status': 'insufficient_feedback',
|
||||
'feedback_samples': current_samples,
|
||||
'required_samples': required_samples,
|
||||
'current_performance': None,
|
||||
'recommendation': f'Need {required_samples - current_samples} more feedback samples for reliable analysis'
|
||||
}
|
||||
|
||||
def _calculate_performance_metrics(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate comprehensive performance metrics.
|
||||
|
||||
Metrics:
|
||||
- Accuracy: % of predictions within acceptable error
|
||||
- MAE: Mean Absolute Error
|
||||
- RMSE: Root Mean Squared Error
|
||||
- MAPE: Mean Absolute Percentage Error
|
||||
- Bias: Systematic over/under prediction
|
||||
- R²: Correlation between predicted and actual
|
||||
"""
|
||||
predicted = feedback_data['predicted_value'].values
|
||||
actual = feedback_data['actual_value'].values
|
||||
|
||||
# Filter out invalid values
|
||||
valid_mask = ~(np.isnan(predicted) | np.isnan(actual))
|
||||
predicted = predicted[valid_mask]
|
||||
actual = actual[valid_mask]
|
||||
|
||||
if len(predicted) == 0:
|
||||
return {
|
||||
'accuracy': 0,
|
||||
'mae': 0,
|
||||
'rmse': 0,
|
||||
'mape': 0,
|
||||
'bias': 0,
|
||||
'r_squared': 0
|
||||
}
|
||||
|
||||
# Calculate errors
|
||||
errors = predicted - actual
|
||||
abs_errors = np.abs(errors)
|
||||
pct_errors = np.abs(errors / actual) * 100 if np.all(actual != 0) else np.zeros_like(errors)
|
||||
|
||||
# MAE and RMSE
|
||||
mae = float(np.mean(abs_errors))
|
||||
rmse = float(np.sqrt(np.mean(errors ** 2)))
|
||||
|
||||
# MAPE (excluding cases where actual = 0)
|
||||
valid_pct_mask = actual != 0
|
||||
mape = float(np.mean(pct_errors[valid_pct_mask])) if np.any(valid_pct_mask) else 0
|
||||
|
||||
# Accuracy (% within 10% error)
|
||||
within_10pct = np.sum(pct_errors <= 10) / len(pct_errors) * 100
|
||||
|
||||
# Bias (mean error - positive = over-prediction)
|
||||
bias = float(np.mean(errors))
|
||||
|
||||
# R² (correlation)
|
||||
if len(predicted) > 1 and np.std(actual) > 0:
|
||||
correlation = np.corrcoef(predicted, actual)[0, 1]
|
||||
r_squared = correlation ** 2
|
||||
else:
|
||||
r_squared = 0
|
||||
|
||||
return {
|
||||
'accuracy': round(within_10pct, 2), # % within 10% error
|
||||
'mae': round(mae, 2),
|
||||
'rmse': round(rmse, 2),
|
||||
'mape': round(mape, 2),
|
||||
'bias': round(bias, 2),
|
||||
'r_squared': round(r_squared, 3),
|
||||
'sample_size': len(predicted)
|
||||
}
|
||||
|
||||
def _analyze_performance_trend(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze performance trend over time.
|
||||
|
||||
Returns trend direction (improving/stable/degrading) and slope.
|
||||
"""
|
||||
# Sort by date
|
||||
df = feedback_data.sort_values('outcome_date').copy()
|
||||
|
||||
# Calculate rolling accuracy (7-day window)
|
||||
df['rolling_accuracy'] = df['accuracy'].rolling(window=7, min_periods=3).mean()
|
||||
|
||||
# Linear trend
|
||||
if len(df) >= 10:
|
||||
# Use day index as x
|
||||
df['day_index'] = (df['outcome_date'] - df['outcome_date'].min()).dt.days
|
||||
|
||||
# Fit linear regression
|
||||
valid_mask = ~np.isnan(df['rolling_accuracy'])
|
||||
if valid_mask.sum() >= 10:
|
||||
x = df.loc[valid_mask, 'day_index'].values
|
||||
y = df.loc[valid_mask, 'rolling_accuracy'].values
|
||||
|
||||
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
||||
|
||||
# Determine trend
|
||||
if p_value < 0.05:
|
||||
if slope > 0.1:
|
||||
trend = 'improving'
|
||||
elif slope < -0.1:
|
||||
trend = 'degrading'
|
||||
else:
|
||||
trend = 'stable'
|
||||
else:
|
||||
trend = 'stable'
|
||||
|
||||
return {
|
||||
'trend': trend,
|
||||
'slope': round(float(slope), 4),
|
||||
'p_value': round(float(p_value), 4),
|
||||
'significant': p_value < 0.05,
|
||||
'recent_performance': round(float(df['rolling_accuracy'].iloc[-1]), 2),
|
||||
'initial_performance': round(float(df['rolling_accuracy'].dropna().iloc[0]), 2)
|
||||
}
|
||||
|
||||
# Not enough data for trend
|
||||
return {
|
||||
'trend': 'insufficient_data',
|
||||
'slope': 0,
|
||||
'p_value': 1.0,
|
||||
'significant': False
|
||||
}
|
||||
|
||||
def _detect_performance_degradation(
|
||||
self,
|
||||
current_metrics: Dict[str, float],
|
||||
baseline_performance: Optional[Dict[str, float]],
|
||||
trend_analysis: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect if model performance has degraded.
|
||||
|
||||
Degradation triggers:
|
||||
1. Current accuracy below threshold (85%)
|
||||
2. Significant drop from baseline (>10%)
|
||||
3. Degrading trend detected
|
||||
"""
|
||||
degradation_reasons = []
|
||||
severity = 'none'
|
||||
|
||||
# Check absolute performance
|
||||
if current_metrics['accuracy'] < self.performance_threshold * 100:
|
||||
degradation_reasons.append(
|
||||
f"Accuracy {current_metrics['accuracy']:.1f}% below threshold {self.performance_threshold*100}%"
|
||||
)
|
||||
severity = 'high'
|
||||
|
||||
# Check vs baseline
|
||||
if baseline_performance and 'accuracy' in baseline_performance:
|
||||
baseline_acc = baseline_performance['accuracy']
|
||||
current_acc = current_metrics['accuracy']
|
||||
drop_pct = (baseline_acc - current_acc) / baseline_acc
|
||||
|
||||
if drop_pct > self.degradation_threshold:
|
||||
degradation_reasons.append(
|
||||
f"Accuracy dropped {drop_pct*100:.1f}% from baseline {baseline_acc:.1f}%"
|
||||
)
|
||||
severity = 'high' if severity != 'high' else severity
|
||||
|
||||
# Check trend
|
||||
if trend_analysis.get('trend') == 'degrading' and trend_analysis.get('significant'):
|
||||
degradation_reasons.append(
|
||||
f"Degrading trend detected (slope: {trend_analysis['slope']:.4f})"
|
||||
)
|
||||
severity = 'medium' if severity == 'none' else severity
|
||||
|
||||
detected = len(degradation_reasons) > 0
|
||||
|
||||
return {
|
||||
'detected': detected,
|
||||
'severity': severity,
|
||||
'reasons': degradation_reasons,
|
||||
'current_accuracy': current_metrics['accuracy'],
|
||||
'baseline_accuracy': baseline_performance.get('accuracy') if baseline_performance else None
|
||||
}
|
||||
|
||||
def _generate_retraining_recommendation(
|
||||
self,
|
||||
model_name: str,
|
||||
current_metrics: Dict[str, float],
|
||||
degradation_detected: Dict[str, Any],
|
||||
trend_analysis: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate retraining recommendation based on performance analysis.
|
||||
|
||||
Priority Levels:
|
||||
- urgent: Severe degradation, retrain immediately
|
||||
- high: Performance below threshold, retrain soon
|
||||
- medium: Trending down, schedule retraining
|
||||
- low: Stable, routine retraining
|
||||
- none: No retraining needed
|
||||
"""
|
||||
if degradation_detected['detected']:
|
||||
severity = degradation_detected['severity']
|
||||
|
||||
if severity == 'high':
|
||||
priority = 'urgent'
|
||||
recommendation = f"Retrain {model_name} immediately - severe performance degradation"
|
||||
elif severity == 'medium':
|
||||
priority = 'high'
|
||||
recommendation = f"Schedule {model_name} retraining within 7 days"
|
||||
else:
|
||||
priority = 'medium'
|
||||
recommendation = f"Schedule routine {model_name} retraining"
|
||||
|
||||
return {
|
||||
'recommended': True,
|
||||
'priority': priority,
|
||||
'recommendation': recommendation,
|
||||
'reasons': degradation_detected['reasons'],
|
||||
'estimated_improvement': self._estimate_retraining_benefit(
|
||||
current_metrics, degradation_detected
|
||||
)
|
||||
}
|
||||
|
||||
# Check if routine retraining is due (e.g., every 90 days)
|
||||
# This would require tracking last_retrained_at
|
||||
else:
|
||||
return {
|
||||
'recommended': False,
|
||||
'priority': 'none',
|
||||
'recommendation': f"{model_name} performance is acceptable, no immediate retraining needed",
|
||||
'next_review_date': (datetime.utcnow() + timedelta(days=30)).isoformat()
|
||||
}
|
||||
|
||||
def _estimate_retraining_benefit(
|
||||
self,
|
||||
current_metrics: Dict[str, float],
|
||||
degradation_detected: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""Estimate expected improvement from retraining."""
|
||||
baseline_acc = degradation_detected.get('baseline_accuracy')
|
||||
current_acc = current_metrics['accuracy']
|
||||
|
||||
if baseline_acc:
|
||||
# Expect to recover 70-80% of lost performance
|
||||
expected_improvement = (baseline_acc - current_acc) * 0.75
|
||||
expected_new_acc = current_acc + expected_improvement
|
||||
|
||||
return {
|
||||
'expected_accuracy_improvement': round(expected_improvement, 2),
|
||||
'expected_new_accuracy': round(expected_new_acc, 2),
|
||||
'confidence': 'medium'
|
||||
}
|
||||
|
||||
return {
|
||||
'expected_accuracy_improvement': 'unknown',
|
||||
'confidence': 'low'
|
||||
}
|
||||
|
||||
def _identify_error_patterns(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Identify systematic error patterns.
|
||||
|
||||
Patterns:
|
||||
- Consistent over/under prediction
|
||||
- Higher errors for specific ranges
|
||||
- Day-of-week effects
|
||||
- Seasonal effects
|
||||
"""
|
||||
patterns = []
|
||||
|
||||
# Pattern 1: Systematic bias
|
||||
mean_error = feedback_data['error'].mean()
|
||||
if abs(mean_error) > feedback_data['error'].std() * 0.5:
|
||||
direction = 'over-prediction' if mean_error > 0 else 'under-prediction'
|
||||
patterns.append({
|
||||
'pattern': 'systematic_bias',
|
||||
'description': f'Consistent {direction} by {abs(mean_error):.1f} units',
|
||||
'severity': 'high' if abs(mean_error) > 10 else 'medium',
|
||||
'recommendation': 'Recalibrate model bias term'
|
||||
})
|
||||
|
||||
# Pattern 2: High error for large values
|
||||
if 'predicted_value' in feedback_data.columns:
|
||||
# Split into quartiles
|
||||
feedback_data['value_quartile'] = pd.qcut(
|
||||
feedback_data['predicted_value'],
|
||||
q=4,
|
||||
labels=['Q1', 'Q2', 'Q3', 'Q4'],
|
||||
duplicates='drop'
|
||||
)
|
||||
|
||||
quartile_errors = feedback_data.groupby('value_quartile')['error_pct'].mean()
|
||||
|
||||
if len(quartile_errors) == 4 and quartile_errors['Q4'] > quartile_errors['Q1'] * 1.5:
|
||||
patterns.append({
|
||||
'pattern': 'high_value_error',
|
||||
'description': f'Higher errors for large predictions (Q4: {quartile_errors["Q4"]:.1f}% vs Q1: {quartile_errors["Q1"]:.1f}%)',
|
||||
'severity': 'medium',
|
||||
'recommendation': 'Add log transformation or separate model for high values'
|
||||
})
|
||||
|
||||
# Pattern 3: Day-of-week effect
|
||||
if 'outcome_date' in feedback_data.columns:
|
||||
feedback_data['day_of_week'] = pd.to_datetime(feedback_data['outcome_date']).dt.dayofweek
|
||||
|
||||
dow_errors = feedback_data.groupby('day_of_week')['error_pct'].mean()
|
||||
|
||||
if len(dow_errors) >= 5 and dow_errors.max() > dow_errors.min() * 1.5:
|
||||
worst_day = dow_errors.idxmax()
|
||||
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
|
||||
|
||||
patterns.append({
|
||||
'pattern': 'day_of_week_effect',
|
||||
'description': f'Higher errors on {day_names[worst_day]} ({dow_errors[worst_day]:.1f}%)',
|
||||
'severity': 'low',
|
||||
'recommendation': 'Add day-of-week features to model'
|
||||
})
|
||||
|
||||
return patterns
|
||||
|
||||
def _calculate_confidence_calibration(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate how well confidence scores match actual accuracy.
|
||||
|
||||
Well-calibrated model: 80% confidence → 80% accuracy
|
||||
"""
|
||||
if 'confidence' not in feedback_data.columns:
|
||||
return {'calibrated': False, 'reason': 'No confidence scores available'}
|
||||
|
||||
# Bin by confidence ranges
|
||||
feedback_data['confidence_bin'] = pd.cut(
|
||||
feedback_data['confidence'],
|
||||
bins=[0, 60, 70, 80, 90, 100],
|
||||
labels=['<60', '60-70', '70-80', '80-90', '90+']
|
||||
)
|
||||
|
||||
calibration_results = []
|
||||
|
||||
for conf_bin in feedback_data['confidence_bin'].unique():
|
||||
if pd.isna(conf_bin):
|
||||
continue
|
||||
|
||||
bin_data = feedback_data[feedback_data['confidence_bin'] == conf_bin]
|
||||
|
||||
if len(bin_data) >= 5:
|
||||
avg_confidence = bin_data['confidence'].mean()
|
||||
avg_accuracy = bin_data['accuracy'].mean()
|
||||
calibration_error = abs(avg_confidence - avg_accuracy)
|
||||
|
||||
calibration_results.append({
|
||||
'confidence_range': str(conf_bin),
|
||||
'avg_confidence': round(avg_confidence, 1),
|
||||
'avg_accuracy': round(avg_accuracy, 1),
|
||||
'calibration_error': round(calibration_error, 1),
|
||||
'sample_size': len(bin_data),
|
||||
'well_calibrated': calibration_error < 10
|
||||
})
|
||||
|
||||
# Overall calibration
|
||||
if calibration_results:
|
||||
overall_calibration_error = np.mean([r['calibration_error'] for r in calibration_results])
|
||||
well_calibrated = overall_calibration_error < 10
|
||||
|
||||
return {
|
||||
'calibrated': well_calibrated,
|
||||
'overall_calibration_error': round(overall_calibration_error, 2),
|
||||
'by_confidence_range': calibration_results,
|
||||
'recommendation': 'Confidence scores are well-calibrated' if well_calibrated
|
||||
else 'Recalibrate confidence scoring algorithm'
|
||||
}
|
||||
|
||||
return {'calibrated': False, 'reason': 'Insufficient data for calibration analysis'}
|
||||
|
||||
async def generate_learning_insights(
|
||||
self,
|
||||
performance_analyses: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate high-level insights about learning system performance.
|
||||
|
||||
Args:
|
||||
performance_analyses: List of model performance analyses
|
||||
tenant_id: Tenant identifier
|
||||
|
||||
Returns:
|
||||
Learning insights for system improvement
|
||||
"""
|
||||
insights = []
|
||||
|
||||
# Insight 1: Models needing urgent retraining
|
||||
urgent_models = [
|
||||
a for a in performance_analyses
|
||||
if a.get('retraining_recommendation', {}).get('priority') == 'urgent'
|
||||
]
|
||||
|
||||
if urgent_models:
|
||||
model_names = ', '.join([a['model_name'] for a in urgent_models])
|
||||
|
||||
insights.append({
|
||||
'type': 'warning',
|
||||
'priority': 'urgent',
|
||||
'category': 'system',
|
||||
'title': f'Urgent Model Retraining Required: {len(urgent_models)} Models',
|
||||
'description': f'Models requiring immediate retraining: {model_names}. Performance has degraded significantly.',
|
||||
'impact_type': 'system_health',
|
||||
'confidence': 95,
|
||||
'metrics_json': {
|
||||
'tenant_id': tenant_id,
|
||||
'urgent_models': [a['model_name'] for a in urgent_models],
|
||||
'affected_count': len(urgent_models)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Retrain Models',
|
||||
'action': 'trigger_model_retraining',
|
||||
'params': {'models': [a['model_name'] for a in urgent_models]}
|
||||
}],
|
||||
'source_service': 'ai_insights',
|
||||
'source_model': 'feedback_learning_system'
|
||||
})
|
||||
|
||||
# Insight 2: Overall system health
|
||||
total_models = len(performance_analyses)
|
||||
healthy_models = [
|
||||
a for a in performance_analyses
|
||||
if not a.get('degradation_detected', {}).get('detected', False)
|
||||
]
|
||||
|
||||
health_pct = (len(healthy_models) / total_models * 100) if total_models > 0 else 0
|
||||
|
||||
if health_pct < 80:
|
||||
insights.append({
|
||||
'type': 'warning',
|
||||
'priority': 'high',
|
||||
'category': 'system',
|
||||
'title': f'Learning System Health: {health_pct:.0f}%',
|
||||
'description': f'{len(healthy_models)} of {total_models} models are performing well. System-wide performance review recommended.',
|
||||
'impact_type': 'system_health',
|
||||
'confidence': 90,
|
||||
'metrics_json': {
|
||||
'tenant_id': tenant_id,
|
||||
'total_models': total_models,
|
||||
'healthy_models': len(healthy_models),
|
||||
'health_percentage': round(health_pct, 1)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Review System Health',
|
||||
'action': 'review_learning_system',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
}],
|
||||
'source_service': 'ai_insights',
|
||||
'source_model': 'feedback_learning_system'
|
||||
})
|
||||
|
||||
# Insight 3: Confidence calibration issues
|
||||
poorly_calibrated = [
|
||||
a for a in performance_analyses
|
||||
if not a.get('confidence_calibration', {}).get('calibrated', True)
|
||||
]
|
||||
|
||||
if poorly_calibrated:
|
||||
insights.append({
|
||||
'type': 'opportunity',
|
||||
'priority': 'medium',
|
||||
'category': 'system',
|
||||
'title': f'Confidence Calibration Needed: {len(poorly_calibrated)} Models',
|
||||
'description': 'Confidence scores do not match actual accuracy. Recalibration recommended.',
|
||||
'impact_type': 'system_improvement',
|
||||
'confidence': 85,
|
||||
'metrics_json': {
|
||||
'tenant_id': tenant_id,
|
||||
'models_needing_calibration': [a['model_name'] for a in poorly_calibrated]
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Recalibrate Confidence Scores',
|
||||
'action': 'recalibrate_confidence',
|
||||
'params': {'models': [a['model_name'] for a in poorly_calibrated]}
|
||||
}],
|
||||
'source_service': 'ai_insights',
|
||||
'source_model': 'feedback_learning_system'
|
||||
})
|
||||
|
||||
return insights
|
||||
|
||||
async def calculate_roi(
|
||||
self,
|
||||
feedback_data: pd.DataFrame,
|
||||
insight_type: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate ROI for applied insights.
|
||||
|
||||
Args:
|
||||
feedback_data: Feedback data with business impact metrics
|
||||
insight_type: Type of insight (e.g., 'demand_forecast', 'safety_stock')
|
||||
|
||||
Returns:
|
||||
ROI calculation with cost savings and accuracy metrics
|
||||
"""
|
||||
if len(feedback_data) == 0:
|
||||
return {'status': 'insufficient_data', 'samples': 0}
|
||||
|
||||
# Calculate accuracy
|
||||
avg_accuracy = feedback_data['accuracy'].mean()
|
||||
|
||||
# Estimate cost savings (would be more sophisticated in production)
|
||||
# For now, use impact_value from insights if available
|
||||
if 'impact_value' in feedback_data.columns:
|
||||
total_impact = feedback_data['impact_value'].sum()
|
||||
avg_impact = feedback_data['impact_value'].mean()
|
||||
|
||||
return {
|
||||
'insight_type': insight_type,
|
||||
'samples': len(feedback_data),
|
||||
'avg_accuracy': round(avg_accuracy, 2),
|
||||
'total_impact_value': round(total_impact, 2),
|
||||
'avg_impact_per_insight': round(avg_impact, 2),
|
||||
'roi_validated': True
|
||||
}
|
||||
|
||||
return {
|
||||
'insight_type': insight_type,
|
||||
'samples': len(feedback_data),
|
||||
'avg_accuracy': round(avg_accuracy, 2),
|
||||
'roi_validated': False,
|
||||
'note': 'Impact values not tracked in feedback'
|
||||
}
|
||||
11
services/ai_insights/app/models/__init__.py
Normal file
11
services/ai_insights/app/models/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Database models for AI Insights Service."""
|
||||
|
||||
from app.models.ai_insight import AIInsight
|
||||
from app.models.insight_feedback import InsightFeedback
|
||||
from app.models.insight_correlation import InsightCorrelation
|
||||
|
||||
__all__ = [
|
||||
"AIInsight",
|
||||
"InsightFeedback",
|
||||
"InsightCorrelation",
|
||||
]
|
||||
129
services/ai_insights/app/models/ai_insight.py
Normal file
129
services/ai_insights/app/models/ai_insight.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""AI Insight database model."""
|
||||
|
||||
from sqlalchemy import Column, String, Integer, Boolean, DECIMAL, TIMESTAMP, Text, Index, CheckConstraint
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class AIInsight(Base):
|
||||
"""AI Insight model for storing intelligent recommendations and predictions."""
|
||||
|
||||
__tablename__ = "ai_insights"
|
||||
|
||||
# Primary Key
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Tenant Information
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
|
||||
# Classification
|
||||
type = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="optimization, alert, prediction, recommendation, insight, anomaly"
|
||||
)
|
||||
priority = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="low, medium, high, critical"
|
||||
)
|
||||
category = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="forecasting, inventory, production, procurement, customer, cost, quality, efficiency, demand, maintenance, energy, scheduling"
|
||||
)
|
||||
|
||||
# Content
|
||||
title = Column(String(255), nullable=False)
|
||||
description = Column(Text, nullable=False)
|
||||
|
||||
# Impact Information
|
||||
impact_type = Column(
|
||||
String(50),
|
||||
comment="cost_savings, revenue_increase, waste_reduction, efficiency_gain, quality_improvement, risk_mitigation"
|
||||
)
|
||||
impact_value = Column(DECIMAL(10, 2), comment="Numeric impact value")
|
||||
impact_unit = Column(
|
||||
String(20),
|
||||
comment="euros, percentage, hours, units, euros/month, euros/year"
|
||||
)
|
||||
|
||||
# Confidence and Metrics
|
||||
confidence = Column(
|
||||
Integer,
|
||||
CheckConstraint('confidence >= 0 AND confidence <= 100'),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Confidence score 0-100"
|
||||
)
|
||||
metrics_json = Column(
|
||||
JSONB,
|
||||
comment="Dynamic metrics specific to insight type"
|
||||
)
|
||||
|
||||
# Actionability
|
||||
actionable = Column(
|
||||
Boolean,
|
||||
default=True,
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Whether this insight can be acted upon"
|
||||
)
|
||||
recommendation_actions = Column(
|
||||
JSONB,
|
||||
comment="List of possible actions: [{label, action, endpoint}]"
|
||||
)
|
||||
|
||||
# Status
|
||||
status = Column(
|
||||
String(20),
|
||||
default='new',
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="new, acknowledged, in_progress, applied, dismissed, expired"
|
||||
)
|
||||
|
||||
# Source Information
|
||||
source_service = Column(
|
||||
String(50),
|
||||
comment="Service that generated this insight"
|
||||
)
|
||||
source_data_id = Column(
|
||||
String(100),
|
||||
comment="Reference to source data (e.g., forecast_id, model_id)"
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
updated_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
nullable=False
|
||||
)
|
||||
applied_at = Column(TIMESTAMP(timezone=True), comment="When insight was applied")
|
||||
expired_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
comment="When insight expires (auto-calculated based on TTL)"
|
||||
)
|
||||
|
||||
# Composite Indexes
|
||||
__table_args__ = (
|
||||
Index('idx_tenant_status_category', 'tenant_id', 'status', 'category'),
|
||||
Index('idx_tenant_created_confidence', 'tenant_id', 'created_at', 'confidence'),
|
||||
Index('idx_actionable_status', 'actionable', 'status'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AIInsight(id={self.id}, type={self.type}, title={self.title[:30]}, confidence={self.confidence})>"
|
||||
69
services/ai_insights/app/models/insight_correlation.py
Normal file
69
services/ai_insights/app/models/insight_correlation.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Insight Correlation database model for cross-service intelligence."""
|
||||
|
||||
from sqlalchemy import Column, String, Integer, DECIMAL, TIMESTAMP, ForeignKey, Index
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy.orm import relationship
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class InsightCorrelation(Base):
|
||||
"""Track correlations between insights from different services."""
|
||||
|
||||
__tablename__ = "insight_correlations"
|
||||
|
||||
# Primary Key
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Foreign Keys to AIInsights
|
||||
parent_insight_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey('ai_insights.id', ondelete='CASCADE'),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Primary insight that leads to correlation"
|
||||
)
|
||||
child_insight_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey('ai_insights.id', ondelete='CASCADE'),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Related insight"
|
||||
)
|
||||
|
||||
# Correlation Information
|
||||
correlation_type = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
comment="forecast_inventory, production_procurement, weather_customer, demand_supplier, etc."
|
||||
)
|
||||
correlation_strength = Column(
|
||||
DECIMAL(3, 2),
|
||||
nullable=False,
|
||||
comment="0.00 to 1.00 indicating strength of correlation"
|
||||
)
|
||||
|
||||
# Combined Metrics
|
||||
combined_confidence = Column(
|
||||
Integer,
|
||||
comment="Weighted combined confidence of both insights"
|
||||
)
|
||||
|
||||
# Timestamp
|
||||
created_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
|
||||
# Composite Indexes
|
||||
__table_args__ = (
|
||||
Index('idx_parent_child', 'parent_insight_id', 'child_insight_id'),
|
||||
Index('idx_correlation_type', 'correlation_type'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<InsightCorrelation(id={self.id}, type={self.correlation_type}, strength={self.correlation_strength})>"
|
||||
87
services/ai_insights/app/models/insight_feedback.py
Normal file
87
services/ai_insights/app/models/insight_feedback.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""Insight Feedback database model for closed-loop learning."""
|
||||
|
||||
from sqlalchemy import Column, String, Boolean, DECIMAL, TIMESTAMP, Text, ForeignKey, Index
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy.orm import relationship
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class InsightFeedback(Base):
|
||||
"""Feedback tracking for AI Insights to enable learning."""
|
||||
|
||||
__tablename__ = "insight_feedback"
|
||||
|
||||
# Primary Key
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Foreign Key to AIInsight
|
||||
insight_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey('ai_insights.id', ondelete='CASCADE'),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
|
||||
# Action Information
|
||||
action_taken = Column(
|
||||
String(100),
|
||||
comment="Specific action that was taken from recommendation_actions"
|
||||
)
|
||||
|
||||
# Result Data
|
||||
result_data = Column(
|
||||
JSONB,
|
||||
comment="Detailed result data from applying the insight"
|
||||
)
|
||||
|
||||
# Success Tracking
|
||||
success = Column(
|
||||
Boolean,
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Whether the insight application was successful"
|
||||
)
|
||||
error_message = Column(
|
||||
Text,
|
||||
comment="Error message if success = false"
|
||||
)
|
||||
|
||||
# Impact Comparison
|
||||
expected_impact_value = Column(
|
||||
DECIMAL(10, 2),
|
||||
comment="Expected impact value from original insight"
|
||||
)
|
||||
actual_impact_value = Column(
|
||||
DECIMAL(10, 2),
|
||||
comment="Measured actual impact after application"
|
||||
)
|
||||
variance_percentage = Column(
|
||||
DECIMAL(5, 2),
|
||||
comment="(actual - expected) / expected * 100"
|
||||
)
|
||||
|
||||
# User Information
|
||||
applied_by = Column(
|
||||
String(100),
|
||||
comment="User or system that applied the insight"
|
||||
)
|
||||
|
||||
# Timestamp
|
||||
created_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
|
||||
# Composite Indexes
|
||||
__table_args__ = (
|
||||
Index('idx_insight_success', 'insight_id', 'success'),
|
||||
Index('idx_created_success', 'created_at', 'success'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<InsightFeedback(id={self.id}, insight_id={self.insight_id}, success={self.success})>"
|
||||
9
services/ai_insights/app/repositories/__init__.py
Normal file
9
services/ai_insights/app/repositories/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Repositories for AI Insights Service."""
|
||||
|
||||
from app.repositories.insight_repository import InsightRepository
|
||||
from app.repositories.feedback_repository import FeedbackRepository
|
||||
|
||||
__all__ = [
|
||||
"InsightRepository",
|
||||
"FeedbackRepository",
|
||||
]
|
||||
81
services/ai_insights/app/repositories/feedback_repository.py
Normal file
81
services/ai_insights/app/repositories/feedback_repository.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Repository for Insight Feedback database operations."""
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, desc
|
||||
from typing import Optional, List
|
||||
from uuid import UUID
|
||||
from decimal import Decimal
|
||||
|
||||
from app.models.insight_feedback import InsightFeedback
|
||||
from app.schemas.feedback import InsightFeedbackCreate
|
||||
|
||||
|
||||
class FeedbackRepository:
|
||||
"""Repository for Insight Feedback operations."""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def create(self, feedback_data: InsightFeedbackCreate) -> InsightFeedback:
|
||||
"""Create feedback for an insight."""
|
||||
# Calculate variance if both values provided
|
||||
variance = None
|
||||
if (feedback_data.expected_impact_value is not None and
|
||||
feedback_data.actual_impact_value is not None and
|
||||
feedback_data.expected_impact_value != 0):
|
||||
variance = (
|
||||
(feedback_data.actual_impact_value - feedback_data.expected_impact_value) /
|
||||
feedback_data.expected_impact_value * 100
|
||||
)
|
||||
|
||||
feedback = InsightFeedback(
|
||||
**feedback_data.model_dump(exclude={'variance_percentage'}),
|
||||
variance_percentage=variance
|
||||
)
|
||||
self.session.add(feedback)
|
||||
await self.session.flush()
|
||||
await self.session.refresh(feedback)
|
||||
return feedback
|
||||
|
||||
async def get_by_id(self, feedback_id: UUID) -> Optional[InsightFeedback]:
|
||||
"""Get feedback by ID."""
|
||||
query = select(InsightFeedback).where(InsightFeedback.id == feedback_id)
|
||||
result = await self.session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_insight(self, insight_id: UUID) -> List[InsightFeedback]:
|
||||
"""Get all feedback for an insight."""
|
||||
query = select(InsightFeedback).where(
|
||||
InsightFeedback.insight_id == insight_id
|
||||
).order_by(desc(InsightFeedback.created_at))
|
||||
|
||||
result = await self.session.execute(query)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def get_success_rate(self, insight_type: Optional[str] = None) -> float:
|
||||
"""Calculate success rate for insights."""
|
||||
query = select(InsightFeedback)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
feedbacks = result.scalars().all()
|
||||
|
||||
if not feedbacks:
|
||||
return 0.0
|
||||
|
||||
successful = sum(1 for f in feedbacks if f.success)
|
||||
return (successful / len(feedbacks)) * 100
|
||||
|
||||
async def get_average_impact_variance(self) -> Decimal:
|
||||
"""Calculate average variance between expected and actual impact."""
|
||||
query = select(InsightFeedback).where(
|
||||
InsightFeedback.variance_percentage.isnot(None)
|
||||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
feedbacks = result.scalars().all()
|
||||
|
||||
if not feedbacks:
|
||||
return Decimal('0.0')
|
||||
|
||||
avg_variance = sum(f.variance_percentage for f in feedbacks) / len(feedbacks)
|
||||
return Decimal(str(round(float(avg_variance), 2)))
|
||||
254
services/ai_insights/app/repositories/insight_repository.py
Normal file
254
services/ai_insights/app/repositories/insight_repository.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""Repository for AI Insight database operations."""
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func, and_, or_, desc
|
||||
from sqlalchemy.orm import selectinload
|
||||
from typing import Optional, List, Dict, Any
|
||||
from uuid import UUID
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from app.models.ai_insight import AIInsight
|
||||
from app.schemas.insight import AIInsightCreate, AIInsightUpdate, InsightFilters
|
||||
|
||||
|
||||
class InsightRepository:
|
||||
"""Repository for AI Insight operations."""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def create(self, insight_data: AIInsightCreate) -> AIInsight:
|
||||
"""Create a new AI Insight."""
|
||||
# Calculate expiration date (default 7 days from now)
|
||||
from app.core.config import settings
|
||||
expired_at = datetime.utcnow() + timedelta(days=settings.DEFAULT_INSIGHT_TTL_DAYS)
|
||||
|
||||
insight = AIInsight(
|
||||
**insight_data.model_dump(),
|
||||
expired_at=expired_at
|
||||
)
|
||||
self.session.add(insight)
|
||||
await self.session.flush()
|
||||
await self.session.refresh(insight)
|
||||
return insight
|
||||
|
||||
async def get_by_id(self, insight_id: UUID) -> Optional[AIInsight]:
|
||||
"""Get insight by ID."""
|
||||
query = select(AIInsight).where(AIInsight.id == insight_id)
|
||||
result = await self.session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_tenant(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
filters: Optional[InsightFilters] = None,
|
||||
skip: int = 0,
|
||||
limit: int = 100
|
||||
) -> tuple[List[AIInsight], int]:
|
||||
"""Get insights for a tenant with filters and pagination."""
|
||||
# Build base query
|
||||
query = select(AIInsight).where(AIInsight.tenant_id == tenant_id)
|
||||
|
||||
# Apply filters
|
||||
if filters:
|
||||
if filters.category and filters.category != 'all':
|
||||
query = query.where(AIInsight.category == filters.category)
|
||||
|
||||
if filters.priority and filters.priority != 'all':
|
||||
query = query.where(AIInsight.priority == filters.priority)
|
||||
|
||||
if filters.status and filters.status != 'all':
|
||||
query = query.where(AIInsight.status == filters.status)
|
||||
|
||||
if filters.actionable_only:
|
||||
query = query.where(AIInsight.actionable == True)
|
||||
|
||||
if filters.min_confidence > 0:
|
||||
query = query.where(AIInsight.confidence >= filters.min_confidence)
|
||||
|
||||
if filters.source_service:
|
||||
query = query.where(AIInsight.source_service == filters.source_service)
|
||||
|
||||
if filters.from_date:
|
||||
query = query.where(AIInsight.created_at >= filters.from_date)
|
||||
|
||||
if filters.to_date:
|
||||
query = query.where(AIInsight.created_at <= filters.to_date)
|
||||
|
||||
# Get total count
|
||||
count_query = select(func.count()).select_from(query.subquery())
|
||||
total_result = await self.session.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
# Apply ordering, pagination
|
||||
query = query.order_by(desc(AIInsight.confidence), desc(AIInsight.created_at))
|
||||
query = query.offset(skip).limit(limit)
|
||||
|
||||
# Execute query
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
return list(insights), total
|
||||
|
||||
async def get_orchestration_ready_insights(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
target_date: datetime,
|
||||
min_confidence: int = 70
|
||||
) -> Dict[str, List[AIInsight]]:
|
||||
"""Get actionable insights for orchestration."""
|
||||
query = select(AIInsight).where(
|
||||
and_(
|
||||
AIInsight.tenant_id == tenant_id,
|
||||
AIInsight.actionable == True,
|
||||
AIInsight.confidence >= min_confidence,
|
||||
AIInsight.status.in_(['new', 'acknowledged']),
|
||||
or_(
|
||||
AIInsight.expired_at.is_(None),
|
||||
AIInsight.expired_at > datetime.utcnow()
|
||||
)
|
||||
)
|
||||
).order_by(desc(AIInsight.confidence))
|
||||
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
# Categorize insights
|
||||
categorized = {
|
||||
'forecast_adjustments': [],
|
||||
'procurement_recommendations': [],
|
||||
'production_optimizations': [],
|
||||
'supplier_alerts': [],
|
||||
'price_opportunities': []
|
||||
}
|
||||
|
||||
for insight in insights:
|
||||
if insight.category == 'forecasting':
|
||||
categorized['forecast_adjustments'].append(insight)
|
||||
elif insight.category == 'procurement':
|
||||
if 'supplier' in insight.title.lower():
|
||||
categorized['supplier_alerts'].append(insight)
|
||||
elif 'price' in insight.title.lower():
|
||||
categorized['price_opportunities'].append(insight)
|
||||
else:
|
||||
categorized['procurement_recommendations'].append(insight)
|
||||
elif insight.category == 'production':
|
||||
categorized['production_optimizations'].append(insight)
|
||||
|
||||
return categorized
|
||||
|
||||
async def update(self, insight_id: UUID, update_data: AIInsightUpdate) -> Optional[AIInsight]:
|
||||
"""Update an insight."""
|
||||
insight = await self.get_by_id(insight_id)
|
||||
if not insight:
|
||||
return None
|
||||
|
||||
for field, value in update_data.model_dump(exclude_unset=True).items():
|
||||
setattr(insight, field, value)
|
||||
|
||||
await self.session.flush()
|
||||
await self.session.refresh(insight)
|
||||
return insight
|
||||
|
||||
async def delete(self, insight_id: UUID) -> bool:
|
||||
"""Delete (dismiss) an insight."""
|
||||
insight = await self.get_by_id(insight_id)
|
||||
if not insight:
|
||||
return False
|
||||
|
||||
insight.status = 'dismissed'
|
||||
await self.session.flush()
|
||||
return True
|
||||
|
||||
async def get_metrics(self, tenant_id: UUID) -> Dict[str, Any]:
|
||||
"""Get aggregate metrics for insights."""
|
||||
query = select(AIInsight).where(
|
||||
and_(
|
||||
AIInsight.tenant_id == tenant_id,
|
||||
AIInsight.status != 'dismissed',
|
||||
or_(
|
||||
AIInsight.expired_at.is_(None),
|
||||
AIInsight.expired_at > datetime.utcnow()
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
if not insights:
|
||||
return {
|
||||
'total_insights': 0,
|
||||
'actionable_insights': 0,
|
||||
'average_confidence': 0,
|
||||
'high_priority_count': 0,
|
||||
'medium_priority_count': 0,
|
||||
'low_priority_count': 0,
|
||||
'critical_priority_count': 0,
|
||||
'by_category': {},
|
||||
'by_status': {},
|
||||
'total_potential_impact': 0
|
||||
}
|
||||
|
||||
# Calculate metrics
|
||||
total = len(insights)
|
||||
actionable = sum(1 for i in insights if i.actionable)
|
||||
avg_confidence = sum(i.confidence for i in insights) / total if total > 0 else 0
|
||||
|
||||
# Priority counts
|
||||
priority_counts = {
|
||||
'high': sum(1 for i in insights if i.priority == 'high'),
|
||||
'medium': sum(1 for i in insights if i.priority == 'medium'),
|
||||
'low': sum(1 for i in insights if i.priority == 'low'),
|
||||
'critical': sum(1 for i in insights if i.priority == 'critical')
|
||||
}
|
||||
|
||||
# By category
|
||||
by_category = {}
|
||||
for insight in insights:
|
||||
by_category[insight.category] = by_category.get(insight.category, 0) + 1
|
||||
|
||||
# By status
|
||||
by_status = {}
|
||||
for insight in insights:
|
||||
by_status[insight.status] = by_status.get(insight.status, 0) + 1
|
||||
|
||||
# Total potential impact
|
||||
total_impact = sum(
|
||||
float(i.impact_value) for i in insights
|
||||
if i.impact_value and i.impact_type in ['cost_savings', 'revenue_increase']
|
||||
)
|
||||
|
||||
return {
|
||||
'total_insights': total,
|
||||
'actionable_insights': actionable,
|
||||
'average_confidence': round(avg_confidence, 1),
|
||||
'high_priority_count': priority_counts['high'],
|
||||
'medium_priority_count': priority_counts['medium'],
|
||||
'low_priority_count': priority_counts['low'],
|
||||
'critical_priority_count': priority_counts['critical'],
|
||||
'by_category': by_category,
|
||||
'by_status': by_status,
|
||||
'total_potential_impact': round(total_impact, 2)
|
||||
}
|
||||
|
||||
async def expire_old_insights(self) -> int:
|
||||
"""Mark expired insights as expired."""
|
||||
query = select(AIInsight).where(
|
||||
and_(
|
||||
AIInsight.expired_at.isnot(None),
|
||||
AIInsight.expired_at <= datetime.utcnow(),
|
||||
AIInsight.status.notin_(['applied', 'dismissed', 'expired'])
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
count = 0
|
||||
for insight in insights:
|
||||
insight.status = 'expired'
|
||||
count += 1
|
||||
|
||||
await self.session.flush()
|
||||
return count
|
||||
27
services/ai_insights/app/schemas/__init__.py
Normal file
27
services/ai_insights/app/schemas/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""Pydantic schemas for AI Insights Service."""
|
||||
|
||||
from app.schemas.insight import (
|
||||
AIInsightBase,
|
||||
AIInsightCreate,
|
||||
AIInsightUpdate,
|
||||
AIInsightResponse,
|
||||
AIInsightList,
|
||||
InsightMetrics,
|
||||
InsightFilters
|
||||
)
|
||||
from app.schemas.feedback import (
|
||||
InsightFeedbackCreate,
|
||||
InsightFeedbackResponse
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AIInsightBase",
|
||||
"AIInsightCreate",
|
||||
"AIInsightUpdate",
|
||||
"AIInsightResponse",
|
||||
"AIInsightList",
|
||||
"InsightMetrics",
|
||||
"InsightFilters",
|
||||
"InsightFeedbackCreate",
|
||||
"InsightFeedbackResponse",
|
||||
]
|
||||
37
services/ai_insights/app/schemas/feedback.py
Normal file
37
services/ai_insights/app/schemas/feedback.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Pydantic schemas for Insight Feedback."""
|
||||
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
from decimal import Decimal
|
||||
|
||||
|
||||
class InsightFeedbackBase(BaseModel):
|
||||
"""Base schema for Insight Feedback."""
|
||||
|
||||
action_taken: str
|
||||
result_data: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
success: bool
|
||||
error_message: Optional[str] = None
|
||||
expected_impact_value: Optional[Decimal] = None
|
||||
actual_impact_value: Optional[Decimal] = None
|
||||
variance_percentage: Optional[Decimal] = None
|
||||
|
||||
|
||||
class InsightFeedbackCreate(InsightFeedbackBase):
|
||||
"""Schema for creating feedback."""
|
||||
|
||||
insight_id: UUID
|
||||
applied_by: Optional[str] = "system"
|
||||
|
||||
|
||||
class InsightFeedbackResponse(InsightFeedbackBase):
|
||||
"""Schema for feedback response."""
|
||||
|
||||
id: UUID
|
||||
insight_id: UUID
|
||||
applied_by: str
|
||||
created_at: datetime
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
93
services/ai_insights/app/schemas/insight.py
Normal file
93
services/ai_insights/app/schemas/insight.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Pydantic schemas for AI Insights."""
|
||||
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
from decimal import Decimal
|
||||
|
||||
|
||||
class AIInsightBase(BaseModel):
|
||||
"""Base schema for AI Insight."""
|
||||
|
||||
type: str = Field(..., description="optimization, alert, prediction, recommendation, insight, anomaly")
|
||||
priority: str = Field(..., description="low, medium, high, critical")
|
||||
category: str = Field(..., description="forecasting, inventory, production, procurement, customer, etc.")
|
||||
title: str = Field(..., max_length=255)
|
||||
description: str
|
||||
impact_type: Optional[str] = Field(None, description="cost_savings, revenue_increase, waste_reduction, etc.")
|
||||
impact_value: Optional[Decimal] = None
|
||||
impact_unit: Optional[str] = Field(None, description="euros, percentage, hours, units, etc.")
|
||||
confidence: int = Field(..., ge=0, le=100, description="Confidence score 0-100")
|
||||
metrics_json: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
actionable: bool = True
|
||||
recommendation_actions: Optional[List[Dict[str, str]]] = Field(default_factory=list)
|
||||
source_service: Optional[str] = None
|
||||
source_data_id: Optional[str] = None
|
||||
|
||||
|
||||
class AIInsightCreate(AIInsightBase):
|
||||
"""Schema for creating a new AI Insight."""
|
||||
|
||||
tenant_id: UUID
|
||||
|
||||
|
||||
class AIInsightUpdate(BaseModel):
|
||||
"""Schema for updating an AI Insight."""
|
||||
|
||||
status: Optional[str] = Field(None, description="new, acknowledged, in_progress, applied, dismissed, expired")
|
||||
applied_at: Optional[datetime] = None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class AIInsightResponse(AIInsightBase):
|
||||
"""Schema for AI Insight response."""
|
||||
|
||||
id: UUID
|
||||
tenant_id: UUID
|
||||
status: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
applied_at: Optional[datetime] = None
|
||||
expired_at: Optional[datetime] = None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class AIInsightList(BaseModel):
|
||||
"""Paginated list of AI Insights."""
|
||||
|
||||
items: List[AIInsightResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class InsightMetrics(BaseModel):
|
||||
"""Aggregate metrics for insights."""
|
||||
|
||||
total_insights: int
|
||||
actionable_insights: int
|
||||
average_confidence: float
|
||||
high_priority_count: int
|
||||
medium_priority_count: int
|
||||
low_priority_count: int
|
||||
critical_priority_count: int
|
||||
by_category: Dict[str, int]
|
||||
by_status: Dict[str, int]
|
||||
total_potential_impact: Optional[Decimal] = None
|
||||
|
||||
|
||||
class InsightFilters(BaseModel):
|
||||
"""Filters for querying insights."""
|
||||
|
||||
category: Optional[str] = None
|
||||
priority: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
actionable_only: bool = False
|
||||
min_confidence: int = 0
|
||||
source_service: Optional[str] = None
|
||||
from_date: Optional[datetime] = None
|
||||
to_date: Optional[datetime] = None
|
||||
229
services/ai_insights/app/scoring/confidence_calculator.py
Normal file
229
services/ai_insights/app/scoring/confidence_calculator.py
Normal file
@@ -0,0 +1,229 @@
|
||||
"""Confidence scoring calculator for AI Insights."""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import math
|
||||
|
||||
|
||||
class ConfidenceCalculator:
|
||||
"""
|
||||
Calculate unified confidence scores across different insight types.
|
||||
|
||||
Confidence is calculated based on multiple factors:
|
||||
- Data quality (completeness, consistency)
|
||||
- Model performance (historical accuracy)
|
||||
- Sample size (statistical significance)
|
||||
- Recency (how recent is the data)
|
||||
- Historical accuracy (past insight performance)
|
||||
"""
|
||||
|
||||
# Weights for different factors
|
||||
WEIGHTS = {
|
||||
'data_quality': 0.25,
|
||||
'model_performance': 0.30,
|
||||
'sample_size': 0.20,
|
||||
'recency': 0.15,
|
||||
'historical_accuracy': 0.10
|
||||
}
|
||||
|
||||
def calculate_confidence(
|
||||
self,
|
||||
data_quality_score: Optional[float] = None,
|
||||
model_performance_score: Optional[float] = None,
|
||||
sample_size: Optional[int] = None,
|
||||
data_date: Optional[datetime] = None,
|
||||
historical_accuracy: Optional[float] = None,
|
||||
insight_type: Optional[str] = None
|
||||
) -> int:
|
||||
"""
|
||||
Calculate overall confidence score (0-100).
|
||||
|
||||
Args:
|
||||
data_quality_score: 0-1 score for data quality
|
||||
model_performance_score: 0-1 score from model metrics (e.g., 1-MAPE)
|
||||
sample_size: Number of data points used
|
||||
data_date: Date of most recent data
|
||||
historical_accuracy: 0-1 score from past insight performance
|
||||
insight_type: Type of insight for specific adjustments
|
||||
|
||||
Returns:
|
||||
int: Confidence score 0-100
|
||||
"""
|
||||
scores = {}
|
||||
|
||||
# Data Quality Score (0-100)
|
||||
if data_quality_score is not None:
|
||||
scores['data_quality'] = min(100, data_quality_score * 100)
|
||||
else:
|
||||
scores['data_quality'] = 70 # Default
|
||||
|
||||
# Model Performance Score (0-100)
|
||||
if model_performance_score is not None:
|
||||
scores['model_performance'] = min(100, model_performance_score * 100)
|
||||
else:
|
||||
scores['model_performance'] = 75 # Default
|
||||
|
||||
# Sample Size Score (0-100)
|
||||
if sample_size is not None:
|
||||
scores['sample_size'] = self._score_sample_size(sample_size)
|
||||
else:
|
||||
scores['sample_size'] = 60 # Default
|
||||
|
||||
# Recency Score (0-100)
|
||||
if data_date is not None:
|
||||
scores['recency'] = self._score_recency(data_date)
|
||||
else:
|
||||
scores['recency'] = 80 # Default
|
||||
|
||||
# Historical Accuracy Score (0-100)
|
||||
if historical_accuracy is not None:
|
||||
scores['historical_accuracy'] = min(100, historical_accuracy * 100)
|
||||
else:
|
||||
scores['historical_accuracy'] = 65 # Default
|
||||
|
||||
# Calculate weighted average
|
||||
confidence = sum(
|
||||
scores[factor] * self.WEIGHTS[factor]
|
||||
for factor in scores
|
||||
)
|
||||
|
||||
# Apply insight-type specific adjustments
|
||||
confidence = self._apply_type_adjustments(confidence, insight_type)
|
||||
|
||||
return int(round(confidence))
|
||||
|
||||
def _score_sample_size(self, sample_size: int) -> float:
|
||||
"""
|
||||
Score based on sample size using logarithmic scale.
|
||||
|
||||
Args:
|
||||
sample_size: Number of data points
|
||||
|
||||
Returns:
|
||||
float: Score 0-100
|
||||
"""
|
||||
if sample_size <= 10:
|
||||
return 30.0
|
||||
elif sample_size <= 30:
|
||||
return 50.0
|
||||
elif sample_size <= 100:
|
||||
return 70.0
|
||||
elif sample_size <= 365:
|
||||
return 85.0
|
||||
else:
|
||||
# Logarithmic scaling for larger samples
|
||||
return min(100.0, 85 + (math.log10(sample_size) - math.log10(365)) * 10)
|
||||
|
||||
def _score_recency(self, data_date: datetime) -> float:
|
||||
"""
|
||||
Score based on data recency.
|
||||
|
||||
Args:
|
||||
data_date: Date of most recent data
|
||||
|
||||
Returns:
|
||||
float: Score 0-100
|
||||
"""
|
||||
days_old = (datetime.utcnow() - data_date).days
|
||||
|
||||
if days_old == 0:
|
||||
return 100.0
|
||||
elif days_old <= 1:
|
||||
return 95.0
|
||||
elif days_old <= 3:
|
||||
return 90.0
|
||||
elif days_old <= 7:
|
||||
return 80.0
|
||||
elif days_old <= 14:
|
||||
return 70.0
|
||||
elif days_old <= 30:
|
||||
return 60.0
|
||||
elif days_old <= 60:
|
||||
return 45.0
|
||||
else:
|
||||
# Exponential decay for older data
|
||||
return max(20.0, 60 * math.exp(-days_old / 60))
|
||||
|
||||
def _apply_type_adjustments(self, base_confidence: float, insight_type: Optional[str]) -> float:
|
||||
"""
|
||||
Apply insight-type specific confidence adjustments.
|
||||
|
||||
Args:
|
||||
base_confidence: Base confidence score
|
||||
insight_type: Type of insight
|
||||
|
||||
Returns:
|
||||
float: Adjusted confidence
|
||||
"""
|
||||
if not insight_type:
|
||||
return base_confidence
|
||||
|
||||
adjustments = {
|
||||
'prediction': -5, # Predictions inherently less certain
|
||||
'optimization': +2, # Optimizations based on solid math
|
||||
'alert': +3, # Alerts based on thresholds
|
||||
'recommendation': 0, # No adjustment
|
||||
'insight': +2, # Insights from data analysis
|
||||
'anomaly': -3 # Anomalies are uncertain
|
||||
}
|
||||
|
||||
adjustment = adjustments.get(insight_type, 0)
|
||||
return max(0, min(100, base_confidence + adjustment))
|
||||
|
||||
def calculate_forecast_confidence(
|
||||
self,
|
||||
model_mape: float,
|
||||
forecast_horizon_days: int,
|
||||
data_points: int,
|
||||
last_data_date: datetime
|
||||
) -> int:
|
||||
"""
|
||||
Specialized confidence calculation for forecasting insights.
|
||||
|
||||
Args:
|
||||
model_mape: Model MAPE (Mean Absolute Percentage Error)
|
||||
forecast_horizon_days: How many days ahead
|
||||
data_points: Number of historical data points
|
||||
last_data_date: Date of last training data
|
||||
|
||||
Returns:
|
||||
int: Confidence score 0-100
|
||||
"""
|
||||
# Model performance: 1 - (MAPE/100) capped at 1
|
||||
model_score = max(0, 1 - (model_mape / 100))
|
||||
|
||||
# Horizon penalty: Longer horizons = less confidence
|
||||
horizon_factor = max(0.5, 1 - (forecast_horizon_days / 30))
|
||||
|
||||
return self.calculate_confidence(
|
||||
data_quality_score=0.9, # Assume good quality
|
||||
model_performance_score=model_score * horizon_factor,
|
||||
sample_size=data_points,
|
||||
data_date=last_data_date,
|
||||
insight_type='prediction'
|
||||
)
|
||||
|
||||
def calculate_optimization_confidence(
|
||||
self,
|
||||
calculation_accuracy: float,
|
||||
data_completeness: float,
|
||||
sample_size: int
|
||||
) -> int:
|
||||
"""
|
||||
Confidence for optimization recommendations.
|
||||
|
||||
Args:
|
||||
calculation_accuracy: 0-1 score for optimization calculation reliability
|
||||
data_completeness: 0-1 score for data completeness
|
||||
sample_size: Number of data points
|
||||
|
||||
Returns:
|
||||
int: Confidence score 0-100
|
||||
"""
|
||||
return self.calculate_confidence(
|
||||
data_quality_score=data_completeness,
|
||||
model_performance_score=calculation_accuracy,
|
||||
sample_size=sample_size,
|
||||
data_date=datetime.utcnow(),
|
||||
insight_type='optimization'
|
||||
)
|
||||
Reference in New Issue
Block a user