Improve AI logic

This commit is contained in:
Urtzi Alfaro
2025-11-05 13:34:56 +01:00
parent 5c87fbcf48
commit 394ad3aea4
218 changed files with 30627 additions and 7658 deletions

View File

@@ -0,0 +1,41 @@
# AI Insights Service Environment Variables
# Service Info
SERVICE_NAME=ai-insights
SERVICE_VERSION=1.0.0
API_V1_PREFIX=/api/v1/ai-insights
# Database
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/bakery_ai_insights
DB_POOL_SIZE=20
DB_MAX_OVERFLOW=10
# Redis
REDIS_URL=redis://localhost:6379/5
REDIS_CACHE_TTL=900
# Service URLs
FORECASTING_SERVICE_URL=http://forecasting-service:8000
PROCUREMENT_SERVICE_URL=http://procurement-service:8000
PRODUCTION_SERVICE_URL=http://production-service:8000
SALES_SERVICE_URL=http://sales-service:8000
INVENTORY_SERVICE_URL=http://inventory-service:8000
# Circuit Breaker Settings
CIRCUIT_BREAKER_FAILURE_THRESHOLD=5
CIRCUIT_BREAKER_TIMEOUT=60
# Insight Settings
MIN_CONFIDENCE_THRESHOLD=60
DEFAULT_INSIGHT_TTL_DAYS=7
MAX_INSIGHTS_PER_REQUEST=100
# Feedback Settings
FEEDBACK_PROCESSING_ENABLED=true
FEEDBACK_PROCESSING_SCHEDULE="0 6 * * *"
# Logging
LOG_LEVEL=INFO
# CORS
ALLOWED_ORIGINS=["http://localhost:3000","http://localhost:5173"]

View File

@@ -0,0 +1,49 @@
# AI Insights Dockerfile
# Add this stage at the top of each service Dockerfile
FROM python:3.11-slim AS shared
WORKDIR /shared
COPY shared/ /shared/
# Then your main service stage
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
curl \
postgresql-client \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements
COPY shared/requirements-tracing.txt /tmp/
COPY services/ai_insights/requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r /tmp/requirements-tracing.txt
RUN pip install --no-cache-dir -r requirements.txt
# Copy shared libraries from the shared stage
COPY --from=shared /shared /app/shared
# Copy application code
COPY services/ai_insights/ .
# Copy scripts for migrations
COPY scripts/ /app/scripts/
# Add shared libraries to Python path
ENV PYTHONPATH="/app:/app/shared:${PYTHONPATH:-}"
# Expose port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Run the application
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -0,0 +1,232 @@
# AI Insights Service - Quick Start Guide
Get the AI Insights Service running in 5 minutes.
## Prerequisites
- Python 3.11+
- PostgreSQL 14+ (running)
- Redis 6+ (running)
## Step 1: Setup Environment
```bash
cd services/ai_insights
# Create virtual environment
python3 -m venv venv
source venv/bin/activate # Windows: venv\Scripts\activate
# Install dependencies
pip install -r requirements.txt
```
## Step 2: Configure Database
```bash
# Copy environment template
cp .env.example .env
# Edit .env file
nano .env
```
**Minimum required configuration**:
```env
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/bakery_ai_insights
REDIS_URL=redis://localhost:6379/5
```
## Step 3: Create Database
```bash
# Connect to PostgreSQL
psql -U postgres
# Create database
CREATE DATABASE bakery_ai_insights;
\q
```
## Step 4: Run Migrations
```bash
# Run Alembic migrations
alembic upgrade head
```
You should see:
```
INFO [alembic.runtime.migration] Running upgrade -> 001, Initial schema for AI Insights Service
```
## Step 5: Start the Service
```bash
uvicorn app.main:app --reload
```
You should see:
```
INFO: Uvicorn running on http://127.0.0.1:8000
INFO: Application startup complete.
```
## Step 6: Verify Installation
Open browser to http://localhost:8000/docs
You should see the Swagger UI with all API endpoints.
### Test Health Endpoint
```bash
curl http://localhost:8000/health
```
Expected response:
```json
{
"status": "healthy",
"service": "ai-insights",
"version": "1.0.0"
}
```
## Step 7: Create Your First Insight
```bash
curl -X POST "http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights" \
-H "Content-Type: application/json" \
-d '{
"tenant_id": "550e8400-e29b-41d4-a716-446655440000",
"type": "recommendation",
"priority": "high",
"category": "forecasting",
"title": "Test Insight - Weekend Demand Pattern",
"description": "Weekend sales 20% higher than weekdays",
"impact_type": "revenue_increase",
"impact_value": 150.00,
"impact_unit": "euros/week",
"confidence": 85,
"metrics_json": {
"weekday_avg": 45.2,
"weekend_avg": 54.2,
"increase_pct": 20.0
},
"actionable": true,
"recommendation_actions": [
{"label": "Increase Production", "action": "adjust_production"}
],
"source_service": "forecasting"
}'
```
## Step 8: Query Your Insights
```bash
curl "http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights?page=1&page_size=10"
```
## Common Issues
### Issue: "ModuleNotFoundError: No module named 'app'"
**Solution**: Make sure you're running from the `services/ai_insights/` directory and virtual environment is activated.
### Issue: "Connection refused" on database
**Solution**: Verify PostgreSQL is running:
```bash
# Check if PostgreSQL is running
pg_isready
# Start PostgreSQL (macOS with Homebrew)
brew services start postgresql
# Start PostgreSQL (Linux)
sudo systemctl start postgresql
```
### Issue: "Redis connection error"
**Solution**: Verify Redis is running:
```bash
# Check if Redis is running
redis-cli ping
# Should return: PONG
# Start Redis (macOS with Homebrew)
brew services start redis
# Start Redis (Linux)
sudo systemctl start redis
```
### Issue: "Alembic command not found"
**Solution**: Virtual environment not activated:
```bash
source venv/bin/activate
```
## Next Steps
1. **Explore API**: Visit http://localhost:8000/docs
2. **Read Documentation**: See `README.md` for detailed documentation
3. **Implementation Guide**: See `AI_INSIGHTS_IMPLEMENTATION_SUMMARY.md`
4. **Integration**: Start integrating with other services
## Useful Commands
```bash
# Check service status
curl http://localhost:8000/health
# Get aggregate metrics
curl "http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights/metrics/summary"
# Filter high-confidence insights
curl "http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights?actionable_only=true&min_confidence=80"
# Stop the service
# Press Ctrl+C in the terminal running uvicorn
# Deactivate virtual environment
deactivate
```
## Docker Quick Start (Alternative)
If you prefer Docker:
```bash
# Build image
docker build -t ai-insights .
# Run container
docker run -d \
--name ai-insights \
-p 8000:8000 \
-e DATABASE_URL=postgresql+asyncpg://postgres:postgres@host.docker.internal:5432/bakery_ai_insights \
-e REDIS_URL=redis://host.docker.internal:6379/5 \
ai-insights
# Check logs
docker logs ai-insights
# Stop container
docker stop ai-insights
docker rm ai-insights
```
## Support
- **Documentation**: See `README.md`
- **API Docs**: http://localhost:8000/docs
- **Issues**: Create GitHub issue or contact team
---
**You're ready!** The AI Insights Service is now running and ready to accept insights from other services.

View File

@@ -0,0 +1,316 @@
# AI Insights Service
Intelligent insights and recommendations service for bakery operations optimization.
## Overview
The AI Insights Service is a microservice that aggregates, scores, and manages intelligent recommendations across the bakery-ia platform. It provides:
- **Unified Insight Management**: Centralized storage and retrieval of AI-generated insights
- **Confidence Scoring**: Standardized confidence calculation across different insight types
- **Impact Estimation**: Business value quantification for recommendations
- **Feedback Loop**: Closed-loop learning from applied insights
- **Cross-Service Intelligence**: Correlation detection between insights from different services
## Features
### Core Capabilities
1. **Insight Aggregation**
- Collect insights from Forecasting, Procurement, Production, and Sales services
- Categorize and prioritize recommendations
- Filter by confidence, category, priority, and actionability
2. **Confidence Calculation**
- Multi-factor scoring: data quality, model performance, sample size, recency, historical accuracy
- Insight-type specific adjustments
- Specialized calculations for forecasting and optimization insights
3. **Impact Estimation**
- Cost savings quantification
- Revenue increase projections
- Waste reduction calculations
- Efficiency gain measurements
- Quality improvement tracking
4. **Feedback & Learning**
- Track application outcomes
- Compare expected vs. actual impact
- Calculate success rates
- Enable model improvement
5. **Orchestration Integration**
- Pre-orchestration insight gathering
- Actionable insight filtering
- Categorized recommendations for workflow phases
## Architecture
### Database Models
- **AIInsight**: Core insights table with classification, confidence, impact metrics
- **InsightFeedback**: Feedback tracking for closed-loop learning
- **InsightCorrelation**: Cross-service insight relationships
### API Endpoints
```
POST /api/v1/ai-insights/tenants/{tenant_id}/insights
GET /api/v1/ai-insights/tenants/{tenant_id}/insights
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}
PATCH /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}
DELETE /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/orchestration-ready
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/metrics/summary
POST /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}/apply
POST /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}/feedback
POST /api/v1/ai-insights/tenants/{tenant_id}/insights/refresh
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/export
```
## Installation
### Prerequisites
- Python 3.11+
- PostgreSQL 14+
- Redis 6+
### Setup
1. **Clone and navigate**:
```bash
cd services/ai_insights
```
2. **Create virtual environment**:
```bash
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. **Install dependencies**:
```bash
pip install -r requirements.txt
```
4. **Configure environment**:
```bash
cp .env.example .env
# Edit .env with your configuration
```
5. **Run migrations**:
```bash
alembic upgrade head
```
6. **Start the service**:
```bash
uvicorn app.main:app --reload
```
The service will be available at `http://localhost:8000`.
## Configuration
### Environment Variables
| Variable | Description | Default |
|----------|-------------|---------|
| `DATABASE_URL` | PostgreSQL connection string | Required |
| `REDIS_URL` | Redis connection string | Required |
| `FORECASTING_SERVICE_URL` | Forecasting service URL | `http://forecasting-service:8000` |
| `PROCUREMENT_SERVICE_URL` | Procurement service URL | `http://procurement-service:8000` |
| `PRODUCTION_SERVICE_URL` | Production service URL | `http://production-service:8000` |
| `MIN_CONFIDENCE_THRESHOLD` | Minimum confidence for insights | `60` |
| `DEFAULT_INSIGHT_TTL_DAYS` | Days before insights expire | `7` |
## Usage Examples
### Creating an Insight
```python
import httpx
insight_data = {
"tenant_id": "550e8400-e29b-41d4-a716-446655440000",
"type": "recommendation",
"priority": "high",
"category": "procurement",
"title": "Flour Price Increase Expected",
"description": "Price predicted to rise 8% in next week. Consider ordering now.",
"impact_type": "cost_savings",
"impact_value": 120.50,
"impact_unit": "euros",
"confidence": 85,
"metrics_json": {
"current_price": 1.20,
"predicted_price": 1.30,
"order_quantity": 1000
},
"actionable": True,
"recommendation_actions": [
{"label": "Order Now", "action": "create_purchase_order"},
{"label": "Review", "action": "review_forecast"}
],
"source_service": "procurement",
"source_data_id": "price_forecast_123"
}
response = httpx.post(
"http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights",
json=insight_data
)
print(response.json())
```
### Querying Insights
```python
# Get high-confidence actionable insights
response = httpx.get(
"http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights",
params={
"actionable_only": True,
"min_confidence": 80,
"priority": "high",
"page": 1,
"page_size": 20
}
)
insights = response.json()
```
### Recording Feedback
```python
feedback_data = {
"insight_id": "insight-uuid",
"action_taken": "create_purchase_order",
"success": True,
"expected_impact_value": 120.50,
"actual_impact_value": 115.30,
"result_data": {
"order_id": "PO-12345",
"actual_savings": 115.30
},
"applied_by": "user@example.com"
}
response = httpx.post(
f"http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}/feedback",
json=feedback_data
)
```
## Development
### Running Tests
```bash
pytest
```
### Code Quality
```bash
# Format code
black app/
# Lint
flake8 app/
# Type checking
mypy app/
```
### Creating a Migration
```bash
alembic revision --autogenerate -m "Description of changes"
alembic upgrade head
```
## Insight Types
- **optimization**: Process improvements with measurable gains
- **alert**: Warnings requiring attention
- **prediction**: Future forecasts with confidence intervals
- **recommendation**: Suggested actions with estimated impact
- **insight**: General data-driven observations
- **anomaly**: Unusual patterns detected in data
## Priority Levels
- **critical**: Immediate action required (e.g., stockout risk)
- **high**: Action recommended soon (e.g., price opportunity)
- **medium**: Consider acting (e.g., efficiency improvement)
- **low**: Informational (e.g., pattern observation)
## Categories
- **forecasting**: Demand predictions and patterns
- **inventory**: Stock management and optimization
- **production**: Manufacturing efficiency and scheduling
- **procurement**: Purchasing and supplier management
- **customer**: Customer behavior and satisfaction
- **cost**: Cost optimization opportunities
- **quality**: Quality improvements
- **efficiency**: Process efficiency gains
## Integration with Other Services
### Forecasting Service
- Receives forecast accuracy insights
- Pattern detection alerts
- Demand anomaly notifications
### Procurement Service
- Price forecast recommendations
- Supplier performance alerts
- Safety stock optimization
### Production Service
- Yield prediction insights
- Schedule optimization recommendations
- Equipment maintenance alerts
### Orchestrator Service
- Pre-orchestration insight gathering
- Actionable recommendation filtering
- Feedback recording for applied insights
## API Documentation
Once the service is running, interactive API documentation is available at:
- Swagger UI: `http://localhost:8000/docs`
- ReDoc: `http://localhost:8000/redoc`
## Monitoring
### Health Check
```bash
curl http://localhost:8000/health
```
### Metrics Endpoint
```bash
curl http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights/metrics/summary
```
## License
Copyright © 2025 Bakery IA. All rights reserved.
## Support
For issues and questions, please contact the development team or create an issue in the project repository.

View File

@@ -0,0 +1,112 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts
script_location = migrations
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python-dateutil library that can be
# installed by adding `alembic[tz]` to the pip requirements
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =
# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; This defaults
# to migrations/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
sqlalchemy.url = driver://user:pass@localhost/dbname
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

View File

@@ -0,0 +1,3 @@
"""AI Insights Service."""
__version__ = "1.0.0"

View File

@@ -0,0 +1 @@
"""API modules for AI Insights Service."""

View File

@@ -0,0 +1,323 @@
"""API endpoints for AI Insights."""
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy.ext.asyncio import AsyncSession
from typing import Optional
from uuid import UUID
from datetime import datetime
import math
from app.core.database import get_db
from app.repositories.insight_repository import InsightRepository
from app.repositories.feedback_repository import FeedbackRepository
from app.schemas.insight import (
AIInsightCreate,
AIInsightUpdate,
AIInsightResponse,
AIInsightList,
InsightMetrics,
InsightFilters
)
from app.schemas.feedback import InsightFeedbackCreate, InsightFeedbackResponse
router = APIRouter()
@router.post("/tenants/{tenant_id}/insights", response_model=AIInsightResponse, status_code=status.HTTP_201_CREATED)
async def create_insight(
tenant_id: UUID,
insight_data: AIInsightCreate,
db: AsyncSession = Depends(get_db)
):
"""Create a new AI Insight."""
# Ensure tenant_id matches
if insight_data.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Tenant ID mismatch"
)
repo = InsightRepository(db)
insight = await repo.create(insight_data)
await db.commit()
return insight
@router.get("/tenants/{tenant_id}/insights", response_model=AIInsightList)
async def get_insights(
tenant_id: UUID,
category: Optional[str] = Query(None),
priority: Optional[str] = Query(None),
status: Optional[str] = Query(None),
actionable_only: bool = Query(False),
min_confidence: int = Query(0, ge=0, le=100),
source_service: Optional[str] = Query(None),
from_date: Optional[datetime] = Query(None),
to_date: Optional[datetime] = Query(None),
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
db: AsyncSession = Depends(get_db)
):
"""Get insights for a tenant with filters and pagination."""
filters = InsightFilters(
category=category,
priority=priority,
status=status,
actionable_only=actionable_only,
min_confidence=min_confidence,
source_service=source_service,
from_date=from_date,
to_date=to_date
)
repo = InsightRepository(db)
skip = (page - 1) * page_size
insights, total = await repo.get_by_tenant(tenant_id, filters, skip, page_size)
total_pages = math.ceil(total / page_size) if total > 0 else 0
return AIInsightList(
items=insights,
total=total,
page=page,
page_size=page_size,
total_pages=total_pages
)
@router.get("/tenants/{tenant_id}/insights/orchestration-ready")
async def get_orchestration_ready_insights(
tenant_id: UUID,
target_date: datetime = Query(...),
min_confidence: int = Query(70, ge=0, le=100),
db: AsyncSession = Depends(get_db)
):
"""Get actionable insights for orchestration workflow."""
repo = InsightRepository(db)
categorized_insights = await repo.get_orchestration_ready_insights(
tenant_id, target_date, min_confidence
)
return categorized_insights
@router.get("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
async def get_insight(
tenant_id: UUID,
insight_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Get a single insight by ID."""
repo = InsightRepository(db)
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
return insight
@router.patch("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
async def update_insight(
tenant_id: UUID,
insight_id: UUID,
update_data: AIInsightUpdate,
db: AsyncSession = Depends(get_db)
):
"""Update an insight (typically status changes)."""
repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
updated_insight = await repo.update(insight_id, update_data)
await db.commit()
return updated_insight
@router.delete("/tenants/{tenant_id}/insights/{insight_id}", status_code=status.HTTP_204_NO_CONTENT)
async def dismiss_insight(
tenant_id: UUID,
insight_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Dismiss an insight (soft delete)."""
repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
await repo.delete(insight_id)
await db.commit()
@router.get("/tenants/{tenant_id}/insights/metrics/summary", response_model=InsightMetrics)
async def get_insights_metrics(
tenant_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Get aggregate metrics for insights."""
repo = InsightRepository(db)
metrics = await repo.get_metrics(tenant_id)
return InsightMetrics(**metrics)
@router.post("/tenants/{tenant_id}/insights/{insight_id}/apply")
async def apply_insight(
tenant_id: UUID,
insight_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Apply an insight recommendation (trigger action)."""
repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
if not insight.actionable:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="This insight is not actionable"
)
# Update status to in_progress
update_data = AIInsightUpdate(status='in_progress', applied_at=datetime.utcnow())
await repo.update(insight_id, update_data)
await db.commit()
# TODO: Route to appropriate service based on recommendation_actions
# This will be implemented when service clients are added
return {
"message": "Insight application initiated",
"insight_id": str(insight_id),
"actions": insight.recommendation_actions
}
@router.post("/tenants/{tenant_id}/insights/{insight_id}/feedback", response_model=InsightFeedbackResponse)
async def record_feedback(
tenant_id: UUID,
insight_id: UUID,
feedback_data: InsightFeedbackCreate,
db: AsyncSession = Depends(get_db)
):
"""Record feedback for an applied insight."""
insight_repo = InsightRepository(db)
# Verify insight exists and belongs to tenant
insight = await insight_repo.get_by_id(insight_id)
if not insight:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Insight not found"
)
if insight.tenant_id != tenant_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Access denied"
)
# Ensure feedback is for this insight
if feedback_data.insight_id != insight_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Insight ID mismatch"
)
feedback_repo = FeedbackRepository(db)
feedback = await feedback_repo.create(feedback_data)
# Update insight status based on feedback
new_status = 'applied' if feedback.success else 'dismissed'
update_data = AIInsightUpdate(status=new_status)
await insight_repo.update(insight_id, update_data)
await db.commit()
return feedback
@router.post("/tenants/{tenant_id}/insights/refresh")
async def refresh_insights(
tenant_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Trigger insight refresh (expire old, generate new)."""
repo = InsightRepository(db)
# Expire old insights
expired_count = await repo.expire_old_insights()
await db.commit()
return {
"message": "Insights refreshed",
"expired_count": expired_count
}
@router.get("/tenants/{tenant_id}/insights/export")
async def export_insights(
tenant_id: UUID,
format: str = Query("json", regex="^(json|csv)$"),
db: AsyncSession = Depends(get_db)
):
"""Export insights to JSON or CSV."""
repo = InsightRepository(db)
insights, _ = await repo.get_by_tenant(tenant_id, filters=None, skip=0, limit=1000)
if format == "json":
return {"insights": [AIInsightResponse.model_validate(i) for i in insights]}
# CSV export would be implemented here
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="CSV export not yet implemented"
)

View File

@@ -0,0 +1,77 @@
"""Configuration settings for AI Insights Service."""
from shared.config.base import BaseServiceSettings
import os
from typing import Optional
class Settings(BaseServiceSettings):
"""Application settings."""
# Service Info
SERVICE_NAME: str = "ai-insights"
SERVICE_VERSION: str = "1.0.0"
API_V1_PREFIX: str = "/api/v1"
# Database configuration (secure approach - build from components)
@property
def DATABASE_URL(self) -> str:
"""Build database URL from secure components"""
# Try complete URL first (for backward compatibility)
complete_url = os.getenv("AI_INSIGHTS_DATABASE_URL")
if complete_url:
return complete_url
# Also check for generic DATABASE_URL (for migration compatibility)
generic_url = os.getenv("DATABASE_URL")
if generic_url:
return generic_url
# Build from components (secure approach)
user = os.getenv("AI_INSIGHTS_DB_USER", "ai_insights_user")
password = os.getenv("AI_INSIGHTS_DB_PASSWORD", "ai_insights_pass123")
host = os.getenv("AI_INSIGHTS_DB_HOST", "localhost")
port = os.getenv("AI_INSIGHTS_DB_PORT", "5432")
name = os.getenv("AI_INSIGHTS_DB_NAME", "ai_insights_db")
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
DB_POOL_SIZE: int = 20
DB_MAX_OVERFLOW: int = 10
# Redis (inherited from BaseServiceSettings but can override)
REDIS_CACHE_TTL: int = 900 # 15 minutes
REDIS_DB: int = 3 # Dedicated Redis database for AI Insights
# Service URLs
FORECASTING_SERVICE_URL: str = "http://forecasting-service:8000"
PROCUREMENT_SERVICE_URL: str = "http://procurement-service:8000"
PRODUCTION_SERVICE_URL: str = "http://production-service:8000"
SALES_SERVICE_URL: str = "http://sales-service:8000"
INVENTORY_SERVICE_URL: str = "http://inventory-service:8000"
# Circuit Breaker Settings
CIRCUIT_BREAKER_FAILURE_THRESHOLD: int = 5
CIRCUIT_BREAKER_TIMEOUT: int = 60
# Insight Settings
MIN_CONFIDENCE_THRESHOLD: int = 60
DEFAULT_INSIGHT_TTL_DAYS: int = 7
MAX_INSIGHTS_PER_REQUEST: int = 100
# Feedback Settings
FEEDBACK_PROCESSING_ENABLED: bool = True
FEEDBACK_PROCESSING_SCHEDULE: str = "0 6 * * *" # Daily at 6 AM
# Logging
LOG_LEVEL: str = "INFO"
# CORS
ALLOWED_ORIGINS: list[str] = ["http://localhost:3000", "http://localhost:5173"]
class Config:
env_file = ".env"
case_sensitive = True
settings = Settings()

View File

@@ -0,0 +1,58 @@
"""Database configuration and session management."""
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
from sqlalchemy.orm import declarative_base
from sqlalchemy.pool import NullPool
from typing import AsyncGenerator
from app.core.config import settings
# Create async engine
engine = create_async_engine(
settings.DATABASE_URL,
pool_size=settings.DB_POOL_SIZE,
max_overflow=settings.DB_MAX_OVERFLOW,
echo=False,
future=True,
)
# Create async session factory
AsyncSessionLocal = async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False,
autocommit=False,
autoflush=False,
)
# Create declarative base
Base = declarative_base()
async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""
Dependency for getting async database sessions.
Yields:
AsyncSession: Database session
"""
async with AsyncSessionLocal() as session:
try:
yield session
await session.commit()
except Exception:
await session.rollback()
raise
finally:
await session.close()
async def init_db():
"""Initialize database tables."""
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async def close_db():
"""Close database connections."""
await engine.dispose()

View File

@@ -0,0 +1,320 @@
"""Impact estimation for AI Insights."""
from typing import Dict, Any, Optional, Tuple
from decimal import Decimal
from datetime import datetime, timedelta
class ImpactEstimator:
"""
Estimate potential impact of recommendations.
Calculates expected business value in terms of:
- Cost savings (euros)
- Revenue increase (euros)
- Waste reduction (euros or percentage)
- Efficiency gains (hours or percentage)
- Quality improvements (units or percentage)
"""
def estimate_procurement_savings(
self,
current_price: Decimal,
predicted_price: Decimal,
order_quantity: Decimal,
timeframe_days: int = 30
) -> Tuple[Decimal, str, str]:
"""
Estimate savings from opportunistic buying.
Args:
current_price: Current unit price
predicted_price: Predicted future price
order_quantity: Quantity to order
timeframe_days: Time horizon for prediction
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
savings_per_unit = predicted_price - current_price
if savings_per_unit > 0:
total_savings = savings_per_unit * order_quantity
return (
round(total_savings, 2),
'euros',
'cost_savings'
)
return (Decimal('0.0'), 'euros', 'cost_savings')
def estimate_waste_reduction_savings(
self,
current_waste_rate: float,
optimized_waste_rate: float,
monthly_volume: Decimal,
avg_cost_per_unit: Decimal
) -> Tuple[Decimal, str, str]:
"""
Estimate savings from waste reduction.
Args:
current_waste_rate: Current waste rate (0-1)
optimized_waste_rate: Optimized waste rate (0-1)
monthly_volume: Monthly volume
avg_cost_per_unit: Average cost per unit
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
waste_reduction_rate = current_waste_rate - optimized_waste_rate
units_saved = monthly_volume * Decimal(str(waste_reduction_rate))
savings = units_saved * avg_cost_per_unit
return (
round(savings, 2),
'euros/month',
'waste_reduction'
)
def estimate_forecast_improvement_value(
self,
current_mape: float,
improved_mape: float,
avg_monthly_revenue: Decimal
) -> Tuple[Decimal, str, str]:
"""
Estimate value from forecast accuracy improvement.
Better forecasts reduce:
- Stockouts (lost sales)
- Overproduction (waste)
- Emergency orders (premium costs)
Args:
current_mape: Current forecast MAPE
improved_mape: Improved forecast MAPE
avg_monthly_revenue: Average monthly revenue
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
# Rule of thumb: 1% MAPE improvement = 0.5% revenue impact
mape_improvement = current_mape - improved_mape
revenue_impact_pct = mape_improvement * 0.5 / 100
revenue_increase = avg_monthly_revenue * Decimal(str(revenue_impact_pct))
return (
round(revenue_increase, 2),
'euros/month',
'revenue_increase'
)
def estimate_production_efficiency_gain(
self,
time_saved_minutes: int,
batches_per_month: int,
labor_cost_per_hour: Decimal = Decimal('15.0')
) -> Tuple[Decimal, str, str]:
"""
Estimate value from production efficiency improvements.
Args:
time_saved_minutes: Minutes saved per batch
batches_per_month: Number of batches per month
labor_cost_per_hour: Labor cost per hour
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
hours_saved_per_month = (time_saved_minutes * batches_per_month) / 60
cost_savings = Decimal(str(hours_saved_per_month)) * labor_cost_per_hour
return (
round(cost_savings, 2),
'euros/month',
'efficiency_gain'
)
def estimate_safety_stock_optimization(
self,
current_safety_stock: Decimal,
optimal_safety_stock: Decimal,
holding_cost_per_unit_per_day: Decimal,
stockout_cost_reduction: Decimal = Decimal('0.0')
) -> Tuple[Decimal, str, str]:
"""
Estimate impact of safety stock optimization.
Args:
current_safety_stock: Current safety stock level
optimal_safety_stock: Optimal safety stock level
holding_cost_per_unit_per_day: Daily holding cost
stockout_cost_reduction: Reduction in stockout costs
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
stock_reduction = current_safety_stock - optimal_safety_stock
if stock_reduction > 0:
# Savings from reduced holding costs
daily_savings = stock_reduction * holding_cost_per_unit_per_day
monthly_savings = daily_savings * 30
total_savings = monthly_savings + stockout_cost_reduction
return (
round(total_savings, 2),
'euros/month',
'cost_savings'
)
elif stock_reduction < 0:
# Cost increase but reduces stockouts
daily_cost = abs(stock_reduction) * holding_cost_per_unit_per_day
monthly_cost = daily_cost * 30
net_savings = stockout_cost_reduction - monthly_cost
if net_savings > 0:
return (
round(net_savings, 2),
'euros/month',
'cost_savings'
)
return (Decimal('0.0'), 'euros/month', 'cost_savings')
def estimate_supplier_switch_savings(
self,
current_supplier_price: Decimal,
alternative_supplier_price: Decimal,
monthly_order_quantity: Decimal,
quality_difference_score: float = 0.0 # -1 to 1
) -> Tuple[Decimal, str, str]:
"""
Estimate savings from switching suppliers.
Args:
current_supplier_price: Current supplier unit price
alternative_supplier_price: Alternative supplier unit price
monthly_order_quantity: Monthly order quantity
quality_difference_score: Quality difference (-1=worse, 0=same, 1=better)
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
price_savings = (current_supplier_price - alternative_supplier_price) * monthly_order_quantity
# Adjust for quality difference
# If quality is worse, reduce estimated savings
quality_adjustment = 1 + (quality_difference_score * 0.1) # ±10% max adjustment
adjusted_savings = price_savings * Decimal(str(quality_adjustment))
return (
round(adjusted_savings, 2),
'euros/month',
'cost_savings'
)
def estimate_yield_improvement_value(
self,
current_yield_rate: float,
predicted_yield_rate: float,
production_volume: Decimal,
product_price: Decimal
) -> Tuple[Decimal, str, str]:
"""
Estimate value from production yield improvements.
Args:
current_yield_rate: Current yield rate (0-1)
predicted_yield_rate: Predicted yield rate (0-1)
production_volume: Monthly production volume
product_price: Product selling price
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
yield_improvement = predicted_yield_rate - current_yield_rate
if yield_improvement > 0:
additional_units = production_volume * Decimal(str(yield_improvement))
revenue_increase = additional_units * product_price
return (
round(revenue_increase, 2),
'euros/month',
'revenue_increase'
)
return (Decimal('0.0'), 'euros/month', 'revenue_increase')
def estimate_demand_pattern_value(
self,
pattern_strength: float, # 0-1
potential_revenue_increase: Decimal,
implementation_cost: Decimal = Decimal('0.0')
) -> Tuple[Decimal, str, str]:
"""
Estimate value from acting on demand patterns.
Args:
pattern_strength: Strength of detected pattern (0-1)
potential_revenue_increase: Potential monthly revenue increase
implementation_cost: One-time implementation cost
Returns:
tuple: (impact_value, impact_unit, impact_type)
"""
# Discount by pattern strength (confidence)
expected_value = potential_revenue_increase * Decimal(str(pattern_strength))
# Amortize implementation cost over 6 months
monthly_cost = implementation_cost / 6
net_value = expected_value - monthly_cost
return (
round(max(Decimal('0.0'), net_value), 2),
'euros/month',
'revenue_increase'
)
def estimate_composite_impact(
self,
impacts: list[Dict[str, Any]]
) -> Tuple[Decimal, str, str]:
"""
Combine multiple impact estimations.
Args:
impacts: List of impact dicts with 'value', 'unit', 'type'
Returns:
tuple: (total_impact_value, impact_unit, impact_type)
"""
total_savings = Decimal('0.0')
total_revenue = Decimal('0.0')
for impact in impacts:
value = Decimal(str(impact['value']))
impact_type = impact['type']
if impact_type == 'cost_savings':
total_savings += value
elif impact_type == 'revenue_increase':
total_revenue += value
# Combine both types
total_impact = total_savings + total_revenue
if total_impact > 0:
# Determine primary type
primary_type = 'cost_savings' if total_savings > total_revenue else 'revenue_increase'
return (
round(total_impact, 2),
'euros/month',
primary_type
)
return (Decimal('0.0'), 'euros/month', 'cost_savings')

View File

@@ -0,0 +1,93 @@
"""Main FastAPI application for AI Insights Service."""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import structlog
from app.core.config import settings
from app.core.database import init_db, close_db
from app.api import insights
# Configure structured logging
structlog.configure(
processors=[
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.JSONRenderer()
]
)
logger = structlog.get_logger()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Lifespan event handler for startup and shutdown."""
# Startup
logger.info("Starting AI Insights Service", service=settings.SERVICE_NAME, version=settings.SERVICE_VERSION)
await init_db()
logger.info("Database initialized")
yield
# Shutdown
logger.info("Shutting down AI Insights Service")
await close_db()
logger.info("Database connections closed")
# Create FastAPI app
app = FastAPI(
title="AI Insights Service",
description="Intelligent insights and recommendations for bakery operations",
version=settings.SERVICE_VERSION,
lifespan=lifespan
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.ALLOWED_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include routers
app.include_router(
insights.router,
prefix=settings.API_V1_PREFIX,
tags=["insights"]
)
@app.get("/")
async def root():
"""Root endpoint."""
return {
"service": settings.SERVICE_NAME,
"version": settings.SERVICE_VERSION,
"status": "running"
}
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {
"status": "healthy",
"service": settings.SERVICE_NAME,
"version": settings.SERVICE_VERSION
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host="0.0.0.0",
port=8000,
reload=True,
log_level=settings.LOG_LEVEL.lower()
)

View File

@@ -0,0 +1,672 @@
"""
Feedback Loop & Learning System
Enables continuous improvement through outcome tracking and model retraining
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime, timedelta
from uuid import UUID
import structlog
from scipy import stats
from collections import defaultdict
logger = structlog.get_logger()
class FeedbackLearningSystem:
"""
Manages feedback collection, model performance tracking, and retraining triggers.
Key Responsibilities:
1. Aggregate feedback from applied insights
2. Calculate model performance metrics (accuracy, precision, recall)
3. Detect performance degradation
4. Trigger automatic retraining when needed
5. Calibrate confidence scores based on actual accuracy
6. Generate learning insights for model improvement
Workflow:
- Feedback continuously recorded via AIInsightsClient
- Periodic performance analysis (daily/weekly)
- Automatic alerts when performance degrades
- Retraining recommendations with priority
"""
def __init__(
self,
performance_threshold: float = 0.85, # Minimum acceptable accuracy
degradation_threshold: float = 0.10, # 10% drop triggers alert
min_feedback_samples: int = 30, # Minimum samples for analysis
retraining_window_days: int = 90 # Consider last 90 days
):
self.performance_threshold = performance_threshold
self.degradation_threshold = degradation_threshold
self.min_feedback_samples = min_feedback_samples
self.retraining_window_days = retraining_window_days
async def analyze_model_performance(
self,
model_name: str,
feedback_data: pd.DataFrame,
baseline_performance: Optional[Dict[str, float]] = None
) -> Dict[str, Any]:
"""
Analyze model performance based on feedback data.
Args:
model_name: Name of the model (e.g., 'hybrid_forecaster', 'yield_predictor')
feedback_data: DataFrame with columns:
- insight_id
- applied_at
- outcome_date
- predicted_value
- actual_value
- error
- error_pct
- accuracy
baseline_performance: Optional baseline metrics for comparison
Returns:
Performance analysis with metrics, trends, and recommendations
"""
logger.info(
"Analyzing model performance",
model_name=model_name,
feedback_samples=len(feedback_data)
)
if len(feedback_data) < self.min_feedback_samples:
return self._insufficient_feedback_response(
model_name, len(feedback_data), self.min_feedback_samples
)
# Step 1: Calculate current performance metrics
current_metrics = self._calculate_performance_metrics(feedback_data)
# Step 2: Analyze performance trend over time
trend_analysis = self._analyze_performance_trend(feedback_data)
# Step 3: Detect performance degradation
degradation_detected = self._detect_performance_degradation(
current_metrics, baseline_performance, trend_analysis
)
# Step 4: Generate retraining recommendation
retraining_recommendation = self._generate_retraining_recommendation(
model_name, current_metrics, degradation_detected, trend_analysis
)
# Step 5: Identify error patterns
error_patterns = self._identify_error_patterns(feedback_data)
# Step 6: Calculate confidence calibration
confidence_calibration = self._calculate_confidence_calibration(feedback_data)
logger.info(
"Model performance analysis complete",
model_name=model_name,
current_accuracy=current_metrics['accuracy'],
degradation_detected=degradation_detected['detected'],
retraining_recommended=retraining_recommendation['recommended']
)
return {
'model_name': model_name,
'analyzed_at': datetime.utcnow().isoformat(),
'feedback_samples': len(feedback_data),
'date_range': {
'start': feedback_data['outcome_date'].min().isoformat(),
'end': feedback_data['outcome_date'].max().isoformat()
},
'current_performance': current_metrics,
'baseline_performance': baseline_performance,
'trend_analysis': trend_analysis,
'degradation_detected': degradation_detected,
'retraining_recommendation': retraining_recommendation,
'error_patterns': error_patterns,
'confidence_calibration': confidence_calibration
}
def _insufficient_feedback_response(
self, model_name: str, current_samples: int, required_samples: int
) -> Dict[str, Any]:
"""Return response when insufficient feedback data."""
return {
'model_name': model_name,
'analyzed_at': datetime.utcnow().isoformat(),
'status': 'insufficient_feedback',
'feedback_samples': current_samples,
'required_samples': required_samples,
'current_performance': None,
'recommendation': f'Need {required_samples - current_samples} more feedback samples for reliable analysis'
}
def _calculate_performance_metrics(
self, feedback_data: pd.DataFrame
) -> Dict[str, float]:
"""
Calculate comprehensive performance metrics.
Metrics:
- Accuracy: % of predictions within acceptable error
- MAE: Mean Absolute Error
- RMSE: Root Mean Squared Error
- MAPE: Mean Absolute Percentage Error
- Bias: Systematic over/under prediction
- R²: Correlation between predicted and actual
"""
predicted = feedback_data['predicted_value'].values
actual = feedback_data['actual_value'].values
# Filter out invalid values
valid_mask = ~(np.isnan(predicted) | np.isnan(actual))
predicted = predicted[valid_mask]
actual = actual[valid_mask]
if len(predicted) == 0:
return {
'accuracy': 0,
'mae': 0,
'rmse': 0,
'mape': 0,
'bias': 0,
'r_squared': 0
}
# Calculate errors
errors = predicted - actual
abs_errors = np.abs(errors)
pct_errors = np.abs(errors / actual) * 100 if np.all(actual != 0) else np.zeros_like(errors)
# MAE and RMSE
mae = float(np.mean(abs_errors))
rmse = float(np.sqrt(np.mean(errors ** 2)))
# MAPE (excluding cases where actual = 0)
valid_pct_mask = actual != 0
mape = float(np.mean(pct_errors[valid_pct_mask])) if np.any(valid_pct_mask) else 0
# Accuracy (% within 10% error)
within_10pct = np.sum(pct_errors <= 10) / len(pct_errors) * 100
# Bias (mean error - positive = over-prediction)
bias = float(np.mean(errors))
# R² (correlation)
if len(predicted) > 1 and np.std(actual) > 0:
correlation = np.corrcoef(predicted, actual)[0, 1]
r_squared = correlation ** 2
else:
r_squared = 0
return {
'accuracy': round(within_10pct, 2), # % within 10% error
'mae': round(mae, 2),
'rmse': round(rmse, 2),
'mape': round(mape, 2),
'bias': round(bias, 2),
'r_squared': round(r_squared, 3),
'sample_size': len(predicted)
}
def _analyze_performance_trend(
self, feedback_data: pd.DataFrame
) -> Dict[str, Any]:
"""
Analyze performance trend over time.
Returns trend direction (improving/stable/degrading) and slope.
"""
# Sort by date
df = feedback_data.sort_values('outcome_date').copy()
# Calculate rolling accuracy (7-day window)
df['rolling_accuracy'] = df['accuracy'].rolling(window=7, min_periods=3).mean()
# Linear trend
if len(df) >= 10:
# Use day index as x
df['day_index'] = (df['outcome_date'] - df['outcome_date'].min()).dt.days
# Fit linear regression
valid_mask = ~np.isnan(df['rolling_accuracy'])
if valid_mask.sum() >= 10:
x = df.loc[valid_mask, 'day_index'].values
y = df.loc[valid_mask, 'rolling_accuracy'].values
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
# Determine trend
if p_value < 0.05:
if slope > 0.1:
trend = 'improving'
elif slope < -0.1:
trend = 'degrading'
else:
trend = 'stable'
else:
trend = 'stable'
return {
'trend': trend,
'slope': round(float(slope), 4),
'p_value': round(float(p_value), 4),
'significant': p_value < 0.05,
'recent_performance': round(float(df['rolling_accuracy'].iloc[-1]), 2),
'initial_performance': round(float(df['rolling_accuracy'].dropna().iloc[0]), 2)
}
# Not enough data for trend
return {
'trend': 'insufficient_data',
'slope': 0,
'p_value': 1.0,
'significant': False
}
def _detect_performance_degradation(
self,
current_metrics: Dict[str, float],
baseline_performance: Optional[Dict[str, float]],
trend_analysis: Dict[str, Any]
) -> Dict[str, Any]:
"""
Detect if model performance has degraded.
Degradation triggers:
1. Current accuracy below threshold (85%)
2. Significant drop from baseline (>10%)
3. Degrading trend detected
"""
degradation_reasons = []
severity = 'none'
# Check absolute performance
if current_metrics['accuracy'] < self.performance_threshold * 100:
degradation_reasons.append(
f"Accuracy {current_metrics['accuracy']:.1f}% below threshold {self.performance_threshold*100}%"
)
severity = 'high'
# Check vs baseline
if baseline_performance and 'accuracy' in baseline_performance:
baseline_acc = baseline_performance['accuracy']
current_acc = current_metrics['accuracy']
drop_pct = (baseline_acc - current_acc) / baseline_acc
if drop_pct > self.degradation_threshold:
degradation_reasons.append(
f"Accuracy dropped {drop_pct*100:.1f}% from baseline {baseline_acc:.1f}%"
)
severity = 'high' if severity != 'high' else severity
# Check trend
if trend_analysis.get('trend') == 'degrading' and trend_analysis.get('significant'):
degradation_reasons.append(
f"Degrading trend detected (slope: {trend_analysis['slope']:.4f})"
)
severity = 'medium' if severity == 'none' else severity
detected = len(degradation_reasons) > 0
return {
'detected': detected,
'severity': severity,
'reasons': degradation_reasons,
'current_accuracy': current_metrics['accuracy'],
'baseline_accuracy': baseline_performance.get('accuracy') if baseline_performance else None
}
def _generate_retraining_recommendation(
self,
model_name: str,
current_metrics: Dict[str, float],
degradation_detected: Dict[str, Any],
trend_analysis: Dict[str, Any]
) -> Dict[str, Any]:
"""
Generate retraining recommendation based on performance analysis.
Priority Levels:
- urgent: Severe degradation, retrain immediately
- high: Performance below threshold, retrain soon
- medium: Trending down, schedule retraining
- low: Stable, routine retraining
- none: No retraining needed
"""
if degradation_detected['detected']:
severity = degradation_detected['severity']
if severity == 'high':
priority = 'urgent'
recommendation = f"Retrain {model_name} immediately - severe performance degradation"
elif severity == 'medium':
priority = 'high'
recommendation = f"Schedule {model_name} retraining within 7 days"
else:
priority = 'medium'
recommendation = f"Schedule routine {model_name} retraining"
return {
'recommended': True,
'priority': priority,
'recommendation': recommendation,
'reasons': degradation_detected['reasons'],
'estimated_improvement': self._estimate_retraining_benefit(
current_metrics, degradation_detected
)
}
# Check if routine retraining is due (e.g., every 90 days)
# This would require tracking last_retrained_at
else:
return {
'recommended': False,
'priority': 'none',
'recommendation': f"{model_name} performance is acceptable, no immediate retraining needed",
'next_review_date': (datetime.utcnow() + timedelta(days=30)).isoformat()
}
def _estimate_retraining_benefit(
self,
current_metrics: Dict[str, float],
degradation_detected: Dict[str, Any]
) -> Dict[str, Any]:
"""Estimate expected improvement from retraining."""
baseline_acc = degradation_detected.get('baseline_accuracy')
current_acc = current_metrics['accuracy']
if baseline_acc:
# Expect to recover 70-80% of lost performance
expected_improvement = (baseline_acc - current_acc) * 0.75
expected_new_acc = current_acc + expected_improvement
return {
'expected_accuracy_improvement': round(expected_improvement, 2),
'expected_new_accuracy': round(expected_new_acc, 2),
'confidence': 'medium'
}
return {
'expected_accuracy_improvement': 'unknown',
'confidence': 'low'
}
def _identify_error_patterns(
self, feedback_data: pd.DataFrame
) -> List[Dict[str, Any]]:
"""
Identify systematic error patterns.
Patterns:
- Consistent over/under prediction
- Higher errors for specific ranges
- Day-of-week effects
- Seasonal effects
"""
patterns = []
# Pattern 1: Systematic bias
mean_error = feedback_data['error'].mean()
if abs(mean_error) > feedback_data['error'].std() * 0.5:
direction = 'over-prediction' if mean_error > 0 else 'under-prediction'
patterns.append({
'pattern': 'systematic_bias',
'description': f'Consistent {direction} by {abs(mean_error):.1f} units',
'severity': 'high' if abs(mean_error) > 10 else 'medium',
'recommendation': 'Recalibrate model bias term'
})
# Pattern 2: High error for large values
if 'predicted_value' in feedback_data.columns:
# Split into quartiles
feedback_data['value_quartile'] = pd.qcut(
feedback_data['predicted_value'],
q=4,
labels=['Q1', 'Q2', 'Q3', 'Q4'],
duplicates='drop'
)
quartile_errors = feedback_data.groupby('value_quartile')['error_pct'].mean()
if len(quartile_errors) == 4 and quartile_errors['Q4'] > quartile_errors['Q1'] * 1.5:
patterns.append({
'pattern': 'high_value_error',
'description': f'Higher errors for large predictions (Q4: {quartile_errors["Q4"]:.1f}% vs Q1: {quartile_errors["Q1"]:.1f}%)',
'severity': 'medium',
'recommendation': 'Add log transformation or separate model for high values'
})
# Pattern 3: Day-of-week effect
if 'outcome_date' in feedback_data.columns:
feedback_data['day_of_week'] = pd.to_datetime(feedback_data['outcome_date']).dt.dayofweek
dow_errors = feedback_data.groupby('day_of_week')['error_pct'].mean()
if len(dow_errors) >= 5 and dow_errors.max() > dow_errors.min() * 1.5:
worst_day = dow_errors.idxmax()
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
patterns.append({
'pattern': 'day_of_week_effect',
'description': f'Higher errors on {day_names[worst_day]} ({dow_errors[worst_day]:.1f}%)',
'severity': 'low',
'recommendation': 'Add day-of-week features to model'
})
return patterns
def _calculate_confidence_calibration(
self, feedback_data: pd.DataFrame
) -> Dict[str, Any]:
"""
Calculate how well confidence scores match actual accuracy.
Well-calibrated model: 80% confidence → 80% accuracy
"""
if 'confidence' not in feedback_data.columns:
return {'calibrated': False, 'reason': 'No confidence scores available'}
# Bin by confidence ranges
feedback_data['confidence_bin'] = pd.cut(
feedback_data['confidence'],
bins=[0, 60, 70, 80, 90, 100],
labels=['<60', '60-70', '70-80', '80-90', '90+']
)
calibration_results = []
for conf_bin in feedback_data['confidence_bin'].unique():
if pd.isna(conf_bin):
continue
bin_data = feedback_data[feedback_data['confidence_bin'] == conf_bin]
if len(bin_data) >= 5:
avg_confidence = bin_data['confidence'].mean()
avg_accuracy = bin_data['accuracy'].mean()
calibration_error = abs(avg_confidence - avg_accuracy)
calibration_results.append({
'confidence_range': str(conf_bin),
'avg_confidence': round(avg_confidence, 1),
'avg_accuracy': round(avg_accuracy, 1),
'calibration_error': round(calibration_error, 1),
'sample_size': len(bin_data),
'well_calibrated': calibration_error < 10
})
# Overall calibration
if calibration_results:
overall_calibration_error = np.mean([r['calibration_error'] for r in calibration_results])
well_calibrated = overall_calibration_error < 10
return {
'calibrated': well_calibrated,
'overall_calibration_error': round(overall_calibration_error, 2),
'by_confidence_range': calibration_results,
'recommendation': 'Confidence scores are well-calibrated' if well_calibrated
else 'Recalibrate confidence scoring algorithm'
}
return {'calibrated': False, 'reason': 'Insufficient data for calibration analysis'}
async def generate_learning_insights(
self,
performance_analyses: List[Dict[str, Any]],
tenant_id: str
) -> List[Dict[str, Any]]:
"""
Generate high-level insights about learning system performance.
Args:
performance_analyses: List of model performance analyses
tenant_id: Tenant identifier
Returns:
Learning insights for system improvement
"""
insights = []
# Insight 1: Models needing urgent retraining
urgent_models = [
a for a in performance_analyses
if a.get('retraining_recommendation', {}).get('priority') == 'urgent'
]
if urgent_models:
model_names = ', '.join([a['model_name'] for a in urgent_models])
insights.append({
'type': 'warning',
'priority': 'urgent',
'category': 'system',
'title': f'Urgent Model Retraining Required: {len(urgent_models)} Models',
'description': f'Models requiring immediate retraining: {model_names}. Performance has degraded significantly.',
'impact_type': 'system_health',
'confidence': 95,
'metrics_json': {
'tenant_id': tenant_id,
'urgent_models': [a['model_name'] for a in urgent_models],
'affected_count': len(urgent_models)
},
'actionable': True,
'recommendation_actions': [{
'label': 'Retrain Models',
'action': 'trigger_model_retraining',
'params': {'models': [a['model_name'] for a in urgent_models]}
}],
'source_service': 'ai_insights',
'source_model': 'feedback_learning_system'
})
# Insight 2: Overall system health
total_models = len(performance_analyses)
healthy_models = [
a for a in performance_analyses
if not a.get('degradation_detected', {}).get('detected', False)
]
health_pct = (len(healthy_models) / total_models * 100) if total_models > 0 else 0
if health_pct < 80:
insights.append({
'type': 'warning',
'priority': 'high',
'category': 'system',
'title': f'Learning System Health: {health_pct:.0f}%',
'description': f'{len(healthy_models)} of {total_models} models are performing well. System-wide performance review recommended.',
'impact_type': 'system_health',
'confidence': 90,
'metrics_json': {
'tenant_id': tenant_id,
'total_models': total_models,
'healthy_models': len(healthy_models),
'health_percentage': round(health_pct, 1)
},
'actionable': True,
'recommendation_actions': [{
'label': 'Review System Health',
'action': 'review_learning_system',
'params': {'tenant_id': tenant_id}
}],
'source_service': 'ai_insights',
'source_model': 'feedback_learning_system'
})
# Insight 3: Confidence calibration issues
poorly_calibrated = [
a for a in performance_analyses
if not a.get('confidence_calibration', {}).get('calibrated', True)
]
if poorly_calibrated:
insights.append({
'type': 'opportunity',
'priority': 'medium',
'category': 'system',
'title': f'Confidence Calibration Needed: {len(poorly_calibrated)} Models',
'description': 'Confidence scores do not match actual accuracy. Recalibration recommended.',
'impact_type': 'system_improvement',
'confidence': 85,
'metrics_json': {
'tenant_id': tenant_id,
'models_needing_calibration': [a['model_name'] for a in poorly_calibrated]
},
'actionable': True,
'recommendation_actions': [{
'label': 'Recalibrate Confidence Scores',
'action': 'recalibrate_confidence',
'params': {'models': [a['model_name'] for a in poorly_calibrated]}
}],
'source_service': 'ai_insights',
'source_model': 'feedback_learning_system'
})
return insights
async def calculate_roi(
self,
feedback_data: pd.DataFrame,
insight_type: str
) -> Dict[str, Any]:
"""
Calculate ROI for applied insights.
Args:
feedback_data: Feedback data with business impact metrics
insight_type: Type of insight (e.g., 'demand_forecast', 'safety_stock')
Returns:
ROI calculation with cost savings and accuracy metrics
"""
if len(feedback_data) == 0:
return {'status': 'insufficient_data', 'samples': 0}
# Calculate accuracy
avg_accuracy = feedback_data['accuracy'].mean()
# Estimate cost savings (would be more sophisticated in production)
# For now, use impact_value from insights if available
if 'impact_value' in feedback_data.columns:
total_impact = feedback_data['impact_value'].sum()
avg_impact = feedback_data['impact_value'].mean()
return {
'insight_type': insight_type,
'samples': len(feedback_data),
'avg_accuracy': round(avg_accuracy, 2),
'total_impact_value': round(total_impact, 2),
'avg_impact_per_insight': round(avg_impact, 2),
'roi_validated': True
}
return {
'insight_type': insight_type,
'samples': len(feedback_data),
'avg_accuracy': round(avg_accuracy, 2),
'roi_validated': False,
'note': 'Impact values not tracked in feedback'
}

View File

@@ -0,0 +1,11 @@
"""Database models for AI Insights Service."""
from app.models.ai_insight import AIInsight
from app.models.insight_feedback import InsightFeedback
from app.models.insight_correlation import InsightCorrelation
__all__ = [
"AIInsight",
"InsightFeedback",
"InsightCorrelation",
]

View File

@@ -0,0 +1,129 @@
"""AI Insight database model."""
from sqlalchemy import Column, String, Integer, Boolean, DECIMAL, TIMESTAMP, Text, Index, CheckConstraint
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.sql import func
import uuid
from app.core.database import Base
class AIInsight(Base):
"""AI Insight model for storing intelligent recommendations and predictions."""
__tablename__ = "ai_insights"
# Primary Key
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Tenant Information
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
# Classification
type = Column(
String(50),
nullable=False,
index=True,
comment="optimization, alert, prediction, recommendation, insight, anomaly"
)
priority = Column(
String(20),
nullable=False,
index=True,
comment="low, medium, high, critical"
)
category = Column(
String(50),
nullable=False,
index=True,
comment="forecasting, inventory, production, procurement, customer, cost, quality, efficiency, demand, maintenance, energy, scheduling"
)
# Content
title = Column(String(255), nullable=False)
description = Column(Text, nullable=False)
# Impact Information
impact_type = Column(
String(50),
comment="cost_savings, revenue_increase, waste_reduction, efficiency_gain, quality_improvement, risk_mitigation"
)
impact_value = Column(DECIMAL(10, 2), comment="Numeric impact value")
impact_unit = Column(
String(20),
comment="euros, percentage, hours, units, euros/month, euros/year"
)
# Confidence and Metrics
confidence = Column(
Integer,
CheckConstraint('confidence >= 0 AND confidence <= 100'),
nullable=False,
index=True,
comment="Confidence score 0-100"
)
metrics_json = Column(
JSONB,
comment="Dynamic metrics specific to insight type"
)
# Actionability
actionable = Column(
Boolean,
default=True,
nullable=False,
index=True,
comment="Whether this insight can be acted upon"
)
recommendation_actions = Column(
JSONB,
comment="List of possible actions: [{label, action, endpoint}]"
)
# Status
status = Column(
String(20),
default='new',
nullable=False,
index=True,
comment="new, acknowledged, in_progress, applied, dismissed, expired"
)
# Source Information
source_service = Column(
String(50),
comment="Service that generated this insight"
)
source_data_id = Column(
String(100),
comment="Reference to source data (e.g., forecast_id, model_id)"
)
# Timestamps
created_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
nullable=False,
index=True
)
updated_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
onupdate=func.now(),
nullable=False
)
applied_at = Column(TIMESTAMP(timezone=True), comment="When insight was applied")
expired_at = Column(
TIMESTAMP(timezone=True),
comment="When insight expires (auto-calculated based on TTL)"
)
# Composite Indexes
__table_args__ = (
Index('idx_tenant_status_category', 'tenant_id', 'status', 'category'),
Index('idx_tenant_created_confidence', 'tenant_id', 'created_at', 'confidence'),
Index('idx_actionable_status', 'actionable', 'status'),
)
def __repr__(self):
return f"<AIInsight(id={self.id}, type={self.type}, title={self.title[:30]}, confidence={self.confidence})>"

View File

@@ -0,0 +1,69 @@
"""Insight Correlation database model for cross-service intelligence."""
from sqlalchemy import Column, String, Integer, DECIMAL, TIMESTAMP, ForeignKey, Index
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.sql import func
from sqlalchemy.orm import relationship
import uuid
from app.core.database import Base
class InsightCorrelation(Base):
"""Track correlations between insights from different services."""
__tablename__ = "insight_correlations"
# Primary Key
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Foreign Keys to AIInsights
parent_insight_id = Column(
UUID(as_uuid=True),
ForeignKey('ai_insights.id', ondelete='CASCADE'),
nullable=False,
index=True,
comment="Primary insight that leads to correlation"
)
child_insight_id = Column(
UUID(as_uuid=True),
ForeignKey('ai_insights.id', ondelete='CASCADE'),
nullable=False,
index=True,
comment="Related insight"
)
# Correlation Information
correlation_type = Column(
String(50),
nullable=False,
comment="forecast_inventory, production_procurement, weather_customer, demand_supplier, etc."
)
correlation_strength = Column(
DECIMAL(3, 2),
nullable=False,
comment="0.00 to 1.00 indicating strength of correlation"
)
# Combined Metrics
combined_confidence = Column(
Integer,
comment="Weighted combined confidence of both insights"
)
# Timestamp
created_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
nullable=False,
index=True
)
# Composite Indexes
__table_args__ = (
Index('idx_parent_child', 'parent_insight_id', 'child_insight_id'),
Index('idx_correlation_type', 'correlation_type'),
)
def __repr__(self):
return f"<InsightCorrelation(id={self.id}, type={self.correlation_type}, strength={self.correlation_strength})>"

View File

@@ -0,0 +1,87 @@
"""Insight Feedback database model for closed-loop learning."""
from sqlalchemy import Column, String, Boolean, DECIMAL, TIMESTAMP, Text, ForeignKey, Index
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.sql import func
from sqlalchemy.orm import relationship
import uuid
from app.core.database import Base
class InsightFeedback(Base):
"""Feedback tracking for AI Insights to enable learning."""
__tablename__ = "insight_feedback"
# Primary Key
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
# Foreign Key to AIInsight
insight_id = Column(
UUID(as_uuid=True),
ForeignKey('ai_insights.id', ondelete='CASCADE'),
nullable=False,
index=True
)
# Action Information
action_taken = Column(
String(100),
comment="Specific action that was taken from recommendation_actions"
)
# Result Data
result_data = Column(
JSONB,
comment="Detailed result data from applying the insight"
)
# Success Tracking
success = Column(
Boolean,
nullable=False,
index=True,
comment="Whether the insight application was successful"
)
error_message = Column(
Text,
comment="Error message if success = false"
)
# Impact Comparison
expected_impact_value = Column(
DECIMAL(10, 2),
comment="Expected impact value from original insight"
)
actual_impact_value = Column(
DECIMAL(10, 2),
comment="Measured actual impact after application"
)
variance_percentage = Column(
DECIMAL(5, 2),
comment="(actual - expected) / expected * 100"
)
# User Information
applied_by = Column(
String(100),
comment="User or system that applied the insight"
)
# Timestamp
created_at = Column(
TIMESTAMP(timezone=True),
server_default=func.now(),
nullable=False,
index=True
)
# Composite Indexes
__table_args__ = (
Index('idx_insight_success', 'insight_id', 'success'),
Index('idx_created_success', 'created_at', 'success'),
)
def __repr__(self):
return f"<InsightFeedback(id={self.id}, insight_id={self.insight_id}, success={self.success})>"

View File

@@ -0,0 +1,9 @@
"""Repositories for AI Insights Service."""
from app.repositories.insight_repository import InsightRepository
from app.repositories.feedback_repository import FeedbackRepository
__all__ = [
"InsightRepository",
"FeedbackRepository",
]

View File

@@ -0,0 +1,81 @@
"""Repository for Insight Feedback database operations."""
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, desc
from typing import Optional, List
from uuid import UUID
from decimal import Decimal
from app.models.insight_feedback import InsightFeedback
from app.schemas.feedback import InsightFeedbackCreate
class FeedbackRepository:
"""Repository for Insight Feedback operations."""
def __init__(self, session: AsyncSession):
self.session = session
async def create(self, feedback_data: InsightFeedbackCreate) -> InsightFeedback:
"""Create feedback for an insight."""
# Calculate variance if both values provided
variance = None
if (feedback_data.expected_impact_value is not None and
feedback_data.actual_impact_value is not None and
feedback_data.expected_impact_value != 0):
variance = (
(feedback_data.actual_impact_value - feedback_data.expected_impact_value) /
feedback_data.expected_impact_value * 100
)
feedback = InsightFeedback(
**feedback_data.model_dump(exclude={'variance_percentage'}),
variance_percentage=variance
)
self.session.add(feedback)
await self.session.flush()
await self.session.refresh(feedback)
return feedback
async def get_by_id(self, feedback_id: UUID) -> Optional[InsightFeedback]:
"""Get feedback by ID."""
query = select(InsightFeedback).where(InsightFeedback.id == feedback_id)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def get_by_insight(self, insight_id: UUID) -> List[InsightFeedback]:
"""Get all feedback for an insight."""
query = select(InsightFeedback).where(
InsightFeedback.insight_id == insight_id
).order_by(desc(InsightFeedback.created_at))
result = await self.session.execute(query)
return list(result.scalars().all())
async def get_success_rate(self, insight_type: Optional[str] = None) -> float:
"""Calculate success rate for insights."""
query = select(InsightFeedback)
result = await self.session.execute(query)
feedbacks = result.scalars().all()
if not feedbacks:
return 0.0
successful = sum(1 for f in feedbacks if f.success)
return (successful / len(feedbacks)) * 100
async def get_average_impact_variance(self) -> Decimal:
"""Calculate average variance between expected and actual impact."""
query = select(InsightFeedback).where(
InsightFeedback.variance_percentage.isnot(None)
)
result = await self.session.execute(query)
feedbacks = result.scalars().all()
if not feedbacks:
return Decimal('0.0')
avg_variance = sum(f.variance_percentage for f in feedbacks) / len(feedbacks)
return Decimal(str(round(float(avg_variance), 2)))

View File

@@ -0,0 +1,254 @@
"""Repository for AI Insight database operations."""
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func, and_, or_, desc
from sqlalchemy.orm import selectinload
from typing import Optional, List, Dict, Any
from uuid import UUID
from datetime import datetime, timedelta
from app.models.ai_insight import AIInsight
from app.schemas.insight import AIInsightCreate, AIInsightUpdate, InsightFilters
class InsightRepository:
"""Repository for AI Insight operations."""
def __init__(self, session: AsyncSession):
self.session = session
async def create(self, insight_data: AIInsightCreate) -> AIInsight:
"""Create a new AI Insight."""
# Calculate expiration date (default 7 days from now)
from app.core.config import settings
expired_at = datetime.utcnow() + timedelta(days=settings.DEFAULT_INSIGHT_TTL_DAYS)
insight = AIInsight(
**insight_data.model_dump(),
expired_at=expired_at
)
self.session.add(insight)
await self.session.flush()
await self.session.refresh(insight)
return insight
async def get_by_id(self, insight_id: UUID) -> Optional[AIInsight]:
"""Get insight by ID."""
query = select(AIInsight).where(AIInsight.id == insight_id)
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def get_by_tenant(
self,
tenant_id: UUID,
filters: Optional[InsightFilters] = None,
skip: int = 0,
limit: int = 100
) -> tuple[List[AIInsight], int]:
"""Get insights for a tenant with filters and pagination."""
# Build base query
query = select(AIInsight).where(AIInsight.tenant_id == tenant_id)
# Apply filters
if filters:
if filters.category and filters.category != 'all':
query = query.where(AIInsight.category == filters.category)
if filters.priority and filters.priority != 'all':
query = query.where(AIInsight.priority == filters.priority)
if filters.status and filters.status != 'all':
query = query.where(AIInsight.status == filters.status)
if filters.actionable_only:
query = query.where(AIInsight.actionable == True)
if filters.min_confidence > 0:
query = query.where(AIInsight.confidence >= filters.min_confidence)
if filters.source_service:
query = query.where(AIInsight.source_service == filters.source_service)
if filters.from_date:
query = query.where(AIInsight.created_at >= filters.from_date)
if filters.to_date:
query = query.where(AIInsight.created_at <= filters.to_date)
# Get total count
count_query = select(func.count()).select_from(query.subquery())
total_result = await self.session.execute(count_query)
total = total_result.scalar() or 0
# Apply ordering, pagination
query = query.order_by(desc(AIInsight.confidence), desc(AIInsight.created_at))
query = query.offset(skip).limit(limit)
# Execute query
result = await self.session.execute(query)
insights = result.scalars().all()
return list(insights), total
async def get_orchestration_ready_insights(
self,
tenant_id: UUID,
target_date: datetime,
min_confidence: int = 70
) -> Dict[str, List[AIInsight]]:
"""Get actionable insights for orchestration."""
query = select(AIInsight).where(
and_(
AIInsight.tenant_id == tenant_id,
AIInsight.actionable == True,
AIInsight.confidence >= min_confidence,
AIInsight.status.in_(['new', 'acknowledged']),
or_(
AIInsight.expired_at.is_(None),
AIInsight.expired_at > datetime.utcnow()
)
)
).order_by(desc(AIInsight.confidence))
result = await self.session.execute(query)
insights = result.scalars().all()
# Categorize insights
categorized = {
'forecast_adjustments': [],
'procurement_recommendations': [],
'production_optimizations': [],
'supplier_alerts': [],
'price_opportunities': []
}
for insight in insights:
if insight.category == 'forecasting':
categorized['forecast_adjustments'].append(insight)
elif insight.category == 'procurement':
if 'supplier' in insight.title.lower():
categorized['supplier_alerts'].append(insight)
elif 'price' in insight.title.lower():
categorized['price_opportunities'].append(insight)
else:
categorized['procurement_recommendations'].append(insight)
elif insight.category == 'production':
categorized['production_optimizations'].append(insight)
return categorized
async def update(self, insight_id: UUID, update_data: AIInsightUpdate) -> Optional[AIInsight]:
"""Update an insight."""
insight = await self.get_by_id(insight_id)
if not insight:
return None
for field, value in update_data.model_dump(exclude_unset=True).items():
setattr(insight, field, value)
await self.session.flush()
await self.session.refresh(insight)
return insight
async def delete(self, insight_id: UUID) -> bool:
"""Delete (dismiss) an insight."""
insight = await self.get_by_id(insight_id)
if not insight:
return False
insight.status = 'dismissed'
await self.session.flush()
return True
async def get_metrics(self, tenant_id: UUID) -> Dict[str, Any]:
"""Get aggregate metrics for insights."""
query = select(AIInsight).where(
and_(
AIInsight.tenant_id == tenant_id,
AIInsight.status != 'dismissed',
or_(
AIInsight.expired_at.is_(None),
AIInsight.expired_at > datetime.utcnow()
)
)
)
result = await self.session.execute(query)
insights = result.scalars().all()
if not insights:
return {
'total_insights': 0,
'actionable_insights': 0,
'average_confidence': 0,
'high_priority_count': 0,
'medium_priority_count': 0,
'low_priority_count': 0,
'critical_priority_count': 0,
'by_category': {},
'by_status': {},
'total_potential_impact': 0
}
# Calculate metrics
total = len(insights)
actionable = sum(1 for i in insights if i.actionable)
avg_confidence = sum(i.confidence for i in insights) / total if total > 0 else 0
# Priority counts
priority_counts = {
'high': sum(1 for i in insights if i.priority == 'high'),
'medium': sum(1 for i in insights if i.priority == 'medium'),
'low': sum(1 for i in insights if i.priority == 'low'),
'critical': sum(1 for i in insights if i.priority == 'critical')
}
# By category
by_category = {}
for insight in insights:
by_category[insight.category] = by_category.get(insight.category, 0) + 1
# By status
by_status = {}
for insight in insights:
by_status[insight.status] = by_status.get(insight.status, 0) + 1
# Total potential impact
total_impact = sum(
float(i.impact_value) for i in insights
if i.impact_value and i.impact_type in ['cost_savings', 'revenue_increase']
)
return {
'total_insights': total,
'actionable_insights': actionable,
'average_confidence': round(avg_confidence, 1),
'high_priority_count': priority_counts['high'],
'medium_priority_count': priority_counts['medium'],
'low_priority_count': priority_counts['low'],
'critical_priority_count': priority_counts['critical'],
'by_category': by_category,
'by_status': by_status,
'total_potential_impact': round(total_impact, 2)
}
async def expire_old_insights(self) -> int:
"""Mark expired insights as expired."""
query = select(AIInsight).where(
and_(
AIInsight.expired_at.isnot(None),
AIInsight.expired_at <= datetime.utcnow(),
AIInsight.status.notin_(['applied', 'dismissed', 'expired'])
)
)
result = await self.session.execute(query)
insights = result.scalars().all()
count = 0
for insight in insights:
insight.status = 'expired'
count += 1
await self.session.flush()
return count

View File

@@ -0,0 +1,27 @@
"""Pydantic schemas for AI Insights Service."""
from app.schemas.insight import (
AIInsightBase,
AIInsightCreate,
AIInsightUpdate,
AIInsightResponse,
AIInsightList,
InsightMetrics,
InsightFilters
)
from app.schemas.feedback import (
InsightFeedbackCreate,
InsightFeedbackResponse
)
__all__ = [
"AIInsightBase",
"AIInsightCreate",
"AIInsightUpdate",
"AIInsightResponse",
"AIInsightList",
"InsightMetrics",
"InsightFilters",
"InsightFeedbackCreate",
"InsightFeedbackResponse",
]

View File

@@ -0,0 +1,37 @@
"""Pydantic schemas for Insight Feedback."""
from pydantic import BaseModel, Field, ConfigDict
from typing import Optional, Dict, Any
from datetime import datetime
from uuid import UUID
from decimal import Decimal
class InsightFeedbackBase(BaseModel):
"""Base schema for Insight Feedback."""
action_taken: str
result_data: Optional[Dict[str, Any]] = Field(default_factory=dict)
success: bool
error_message: Optional[str] = None
expected_impact_value: Optional[Decimal] = None
actual_impact_value: Optional[Decimal] = None
variance_percentage: Optional[Decimal] = None
class InsightFeedbackCreate(InsightFeedbackBase):
"""Schema for creating feedback."""
insight_id: UUID
applied_by: Optional[str] = "system"
class InsightFeedbackResponse(InsightFeedbackBase):
"""Schema for feedback response."""
id: UUID
insight_id: UUID
applied_by: str
created_at: datetime
model_config = ConfigDict(from_attributes=True)

View File

@@ -0,0 +1,93 @@
"""Pydantic schemas for AI Insights."""
from pydantic import BaseModel, Field, ConfigDict
from typing import Optional, Dict, Any, List
from datetime import datetime
from uuid import UUID
from decimal import Decimal
class AIInsightBase(BaseModel):
"""Base schema for AI Insight."""
type: str = Field(..., description="optimization, alert, prediction, recommendation, insight, anomaly")
priority: str = Field(..., description="low, medium, high, critical")
category: str = Field(..., description="forecasting, inventory, production, procurement, customer, etc.")
title: str = Field(..., max_length=255)
description: str
impact_type: Optional[str] = Field(None, description="cost_savings, revenue_increase, waste_reduction, etc.")
impact_value: Optional[Decimal] = None
impact_unit: Optional[str] = Field(None, description="euros, percentage, hours, units, etc.")
confidence: int = Field(..., ge=0, le=100, description="Confidence score 0-100")
metrics_json: Optional[Dict[str, Any]] = Field(default_factory=dict)
actionable: bool = True
recommendation_actions: Optional[List[Dict[str, str]]] = Field(default_factory=list)
source_service: Optional[str] = None
source_data_id: Optional[str] = None
class AIInsightCreate(AIInsightBase):
"""Schema for creating a new AI Insight."""
tenant_id: UUID
class AIInsightUpdate(BaseModel):
"""Schema for updating an AI Insight."""
status: Optional[str] = Field(None, description="new, acknowledged, in_progress, applied, dismissed, expired")
applied_at: Optional[datetime] = None
model_config = ConfigDict(from_attributes=True)
class AIInsightResponse(AIInsightBase):
"""Schema for AI Insight response."""
id: UUID
tenant_id: UUID
status: str
created_at: datetime
updated_at: datetime
applied_at: Optional[datetime] = None
expired_at: Optional[datetime] = None
model_config = ConfigDict(from_attributes=True)
class AIInsightList(BaseModel):
"""Paginated list of AI Insights."""
items: List[AIInsightResponse]
total: int
page: int
page_size: int
total_pages: int
class InsightMetrics(BaseModel):
"""Aggregate metrics for insights."""
total_insights: int
actionable_insights: int
average_confidence: float
high_priority_count: int
medium_priority_count: int
low_priority_count: int
critical_priority_count: int
by_category: Dict[str, int]
by_status: Dict[str, int]
total_potential_impact: Optional[Decimal] = None
class InsightFilters(BaseModel):
"""Filters for querying insights."""
category: Optional[str] = None
priority: Optional[str] = None
status: Optional[str] = None
actionable_only: bool = False
min_confidence: int = 0
source_service: Optional[str] = None
from_date: Optional[datetime] = None
to_date: Optional[datetime] = None

View File

@@ -0,0 +1,229 @@
"""Confidence scoring calculator for AI Insights."""
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
import math
class ConfidenceCalculator:
"""
Calculate unified confidence scores across different insight types.
Confidence is calculated based on multiple factors:
- Data quality (completeness, consistency)
- Model performance (historical accuracy)
- Sample size (statistical significance)
- Recency (how recent is the data)
- Historical accuracy (past insight performance)
"""
# Weights for different factors
WEIGHTS = {
'data_quality': 0.25,
'model_performance': 0.30,
'sample_size': 0.20,
'recency': 0.15,
'historical_accuracy': 0.10
}
def calculate_confidence(
self,
data_quality_score: Optional[float] = None,
model_performance_score: Optional[float] = None,
sample_size: Optional[int] = None,
data_date: Optional[datetime] = None,
historical_accuracy: Optional[float] = None,
insight_type: Optional[str] = None
) -> int:
"""
Calculate overall confidence score (0-100).
Args:
data_quality_score: 0-1 score for data quality
model_performance_score: 0-1 score from model metrics (e.g., 1-MAPE)
sample_size: Number of data points used
data_date: Date of most recent data
historical_accuracy: 0-1 score from past insight performance
insight_type: Type of insight for specific adjustments
Returns:
int: Confidence score 0-100
"""
scores = {}
# Data Quality Score (0-100)
if data_quality_score is not None:
scores['data_quality'] = min(100, data_quality_score * 100)
else:
scores['data_quality'] = 70 # Default
# Model Performance Score (0-100)
if model_performance_score is not None:
scores['model_performance'] = min(100, model_performance_score * 100)
else:
scores['model_performance'] = 75 # Default
# Sample Size Score (0-100)
if sample_size is not None:
scores['sample_size'] = self._score_sample_size(sample_size)
else:
scores['sample_size'] = 60 # Default
# Recency Score (0-100)
if data_date is not None:
scores['recency'] = self._score_recency(data_date)
else:
scores['recency'] = 80 # Default
# Historical Accuracy Score (0-100)
if historical_accuracy is not None:
scores['historical_accuracy'] = min(100, historical_accuracy * 100)
else:
scores['historical_accuracy'] = 65 # Default
# Calculate weighted average
confidence = sum(
scores[factor] * self.WEIGHTS[factor]
for factor in scores
)
# Apply insight-type specific adjustments
confidence = self._apply_type_adjustments(confidence, insight_type)
return int(round(confidence))
def _score_sample_size(self, sample_size: int) -> float:
"""
Score based on sample size using logarithmic scale.
Args:
sample_size: Number of data points
Returns:
float: Score 0-100
"""
if sample_size <= 10:
return 30.0
elif sample_size <= 30:
return 50.0
elif sample_size <= 100:
return 70.0
elif sample_size <= 365:
return 85.0
else:
# Logarithmic scaling for larger samples
return min(100.0, 85 + (math.log10(sample_size) - math.log10(365)) * 10)
def _score_recency(self, data_date: datetime) -> float:
"""
Score based on data recency.
Args:
data_date: Date of most recent data
Returns:
float: Score 0-100
"""
days_old = (datetime.utcnow() - data_date).days
if days_old == 0:
return 100.0
elif days_old <= 1:
return 95.0
elif days_old <= 3:
return 90.0
elif days_old <= 7:
return 80.0
elif days_old <= 14:
return 70.0
elif days_old <= 30:
return 60.0
elif days_old <= 60:
return 45.0
else:
# Exponential decay for older data
return max(20.0, 60 * math.exp(-days_old / 60))
def _apply_type_adjustments(self, base_confidence: float, insight_type: Optional[str]) -> float:
"""
Apply insight-type specific confidence adjustments.
Args:
base_confidence: Base confidence score
insight_type: Type of insight
Returns:
float: Adjusted confidence
"""
if not insight_type:
return base_confidence
adjustments = {
'prediction': -5, # Predictions inherently less certain
'optimization': +2, # Optimizations based on solid math
'alert': +3, # Alerts based on thresholds
'recommendation': 0, # No adjustment
'insight': +2, # Insights from data analysis
'anomaly': -3 # Anomalies are uncertain
}
adjustment = adjustments.get(insight_type, 0)
return max(0, min(100, base_confidence + adjustment))
def calculate_forecast_confidence(
self,
model_mape: float,
forecast_horizon_days: int,
data_points: int,
last_data_date: datetime
) -> int:
"""
Specialized confidence calculation for forecasting insights.
Args:
model_mape: Model MAPE (Mean Absolute Percentage Error)
forecast_horizon_days: How many days ahead
data_points: Number of historical data points
last_data_date: Date of last training data
Returns:
int: Confidence score 0-100
"""
# Model performance: 1 - (MAPE/100) capped at 1
model_score = max(0, 1 - (model_mape / 100))
# Horizon penalty: Longer horizons = less confidence
horizon_factor = max(0.5, 1 - (forecast_horizon_days / 30))
return self.calculate_confidence(
data_quality_score=0.9, # Assume good quality
model_performance_score=model_score * horizon_factor,
sample_size=data_points,
data_date=last_data_date,
insight_type='prediction'
)
def calculate_optimization_confidence(
self,
calculation_accuracy: float,
data_completeness: float,
sample_size: int
) -> int:
"""
Confidence for optimization recommendations.
Args:
calculation_accuracy: 0-1 score for optimization calculation reliability
data_completeness: 0-1 score for data completeness
sample_size: Number of data points
Returns:
int: Confidence score 0-100
"""
return self.calculate_confidence(
data_quality_score=data_completeness,
model_performance_score=calculation_accuracy,
sample_size=sample_size,
data_date=datetime.utcnow(),
insight_type='optimization'
)

View File

@@ -0,0 +1,67 @@
"""Alembic environment configuration."""
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
import os
import sys
# Add parent directory to path for imports
sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '..')))
from app.core.config import settings
from app.core.database import Base
from app.models import * # Import all models
# this is the Alembic Config object
config = context.config
# Interpret the config file for Python logging
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Set sqlalchemy.url from settings
# Replace asyncpg with psycopg2 for synchronous Alembic migrations
db_url = settings.DATABASE_URL.replace('postgresql+asyncpg://', 'postgresql://')
config.set_main_option('sqlalchemy.url', db_url)
# Add your model's MetaData object here for 'autogenerate' support
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,111 @@
"""Initial schema for AI Insights Service
Revision ID: 001
Revises:
Create Date: 2025-11-02 14:30:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID, JSONB
# revision identifiers, used by Alembic.
revision: str = '001'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Create ai_insights table
op.create_table(
'ai_insights',
sa.Column('id', UUID(as_uuid=True), primary_key=True),
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False),
sa.Column('type', sa.String(50), nullable=False),
sa.Column('priority', sa.String(20), nullable=False),
sa.Column('category', sa.String(50), nullable=False),
sa.Column('title', sa.String(255), nullable=False),
sa.Column('description', sa.Text, nullable=False),
sa.Column('impact_type', sa.String(50)),
sa.Column('impact_value', sa.DECIMAL(10, 2)),
sa.Column('impact_unit', sa.String(20)),
sa.Column('confidence', sa.Integer, nullable=False),
sa.Column('metrics_json', JSONB),
sa.Column('actionable', sa.Boolean, nullable=False, server_default='true'),
sa.Column('recommendation_actions', JSONB),
sa.Column('status', sa.String(20), nullable=False, server_default='new'),
sa.Column('source_service', sa.String(50)),
sa.Column('source_data_id', sa.String(100)),
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
sa.Column('applied_at', sa.TIMESTAMP(timezone=True)),
sa.Column('expired_at', sa.TIMESTAMP(timezone=True)),
sa.CheckConstraint('confidence >= 0 AND confidence <= 100', name='check_confidence_range')
)
# Create indexes for ai_insights
op.create_index('idx_tenant_id', 'ai_insights', ['tenant_id'])
op.create_index('idx_type', 'ai_insights', ['type'])
op.create_index('idx_priority', 'ai_insights', ['priority'])
op.create_index('idx_category', 'ai_insights', ['category'])
op.create_index('idx_confidence', 'ai_insights', ['confidence'])
op.create_index('idx_status', 'ai_insights', ['status'])
op.create_index('idx_actionable', 'ai_insights', ['actionable'])
op.create_index('idx_created_at', 'ai_insights', ['created_at'])
op.create_index('idx_tenant_status_category', 'ai_insights', ['tenant_id', 'status', 'category'])
op.create_index('idx_tenant_created_confidence', 'ai_insights', ['tenant_id', 'created_at', 'confidence'])
op.create_index('idx_actionable_status', 'ai_insights', ['actionable', 'status'])
# Create insight_feedback table
op.create_table(
'insight_feedback',
sa.Column('id', UUID(as_uuid=True), primary_key=True),
sa.Column('insight_id', UUID(as_uuid=True), nullable=False),
sa.Column('action_taken', sa.String(100)),
sa.Column('result_data', JSONB),
sa.Column('success', sa.Boolean, nullable=False),
sa.Column('error_message', sa.Text),
sa.Column('expected_impact_value', sa.DECIMAL(10, 2)),
sa.Column('actual_impact_value', sa.DECIMAL(10, 2)),
sa.Column('variance_percentage', sa.DECIMAL(5, 2)),
sa.Column('applied_by', sa.String(100)),
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
sa.ForeignKeyConstraint(['insight_id'], ['ai_insights.id'], ondelete='CASCADE')
)
# Create indexes for insight_feedback
op.create_index('idx_feedback_insight_id', 'insight_feedback', ['insight_id'])
op.create_index('idx_feedback_success', 'insight_feedback', ['success'])
op.create_index('idx_feedback_created_at', 'insight_feedback', ['created_at'])
op.create_index('idx_insight_success', 'insight_feedback', ['insight_id', 'success'])
op.create_index('idx_created_success', 'insight_feedback', ['created_at', 'success'])
# Create insight_correlations table
op.create_table(
'insight_correlations',
sa.Column('id', UUID(as_uuid=True), primary_key=True),
sa.Column('parent_insight_id', UUID(as_uuid=True), nullable=False),
sa.Column('child_insight_id', UUID(as_uuid=True), nullable=False),
sa.Column('correlation_type', sa.String(50), nullable=False),
sa.Column('correlation_strength', sa.DECIMAL(3, 2), nullable=False),
sa.Column('combined_confidence', sa.Integer),
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
sa.ForeignKeyConstraint(['parent_insight_id'], ['ai_insights.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['child_insight_id'], ['ai_insights.id'], ondelete='CASCADE')
)
# Create indexes for insight_correlations
op.create_index('idx_corr_parent', 'insight_correlations', ['parent_insight_id'])
op.create_index('idx_corr_child', 'insight_correlations', ['child_insight_id'])
op.create_index('idx_corr_type', 'insight_correlations', ['correlation_type'])
op.create_index('idx_corr_created_at', 'insight_correlations', ['created_at'])
op.create_index('idx_parent_child', 'insight_correlations', ['parent_insight_id', 'child_insight_id'])
def downgrade() -> None:
op.drop_table('insight_correlations')
op.drop_table('insight_feedback')
op.drop_table('ai_insights')

View File

@@ -0,0 +1,46 @@
# FastAPI and ASGI
fastapi==0.104.1
uvicorn[standard]==0.24.0
python-multipart==0.0.6
# Database
sqlalchemy==2.0.23
alembic==1.12.1
psycopg2-binary==2.9.9
asyncpg==0.29.0
# Pydantic
pydantic==2.5.0
pydantic-settings==2.1.0
# HTTP Client
httpx==0.25.1
aiohttp==3.9.1
# Redis
redis==5.0.1
hiredis==2.2.3
# Utilities
python-dotenv==1.0.0
python-dateutil==2.8.2
pytz==2023.3
# Logging
structlog==23.2.0
# Machine Learning (for confidence scoring and impact estimation)
numpy==1.26.2
pandas==2.1.3
scikit-learn==1.3.2
# Testing
pytest==7.4.3
pytest-asyncio==0.21.1
pytest-cov==4.1.0
httpx==0.25.1
# Code Quality
black==23.11.0
flake8==6.1.0
mypy==1.7.1

View File

@@ -0,0 +1,579 @@
"""
Tests for Feedback Loop & Learning System
"""
import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from services.ai_insights.app.ml.feedback_learning_system import FeedbackLearningSystem
@pytest.fixture
def learning_system():
"""Create FeedbackLearningSystem instance."""
return FeedbackLearningSystem(
performance_threshold=0.85,
degradation_threshold=0.10,
min_feedback_samples=30
)
@pytest.fixture
def good_feedback_data():
"""Generate feedback data for well-performing model."""
np.random.seed(42)
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
feedback = []
for i, date in enumerate(dates):
predicted = 100 + np.random.normal(0, 10)
actual = predicted + np.random.normal(0, 5) # Small error
error = predicted - actual
error_pct = abs(error / actual * 100) if actual != 0 else 0
accuracy = max(0, 100 - error_pct)
feedback.append({
'insight_id': f'insight_{i}',
'applied_at': date - timedelta(days=1),
'outcome_date': date,
'predicted_value': predicted,
'actual_value': actual,
'error': error,
'error_pct': error_pct,
'accuracy': accuracy,
'confidence': 85
})
return pd.DataFrame(feedback)
@pytest.fixture
def degraded_feedback_data():
"""Generate feedback data for degrading model."""
np.random.seed(42)
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
feedback = []
for i, date in enumerate(dates):
# Introduce increasing error over time
error_multiplier = 1 + (i / 50) * 2 # Errors double by end
predicted = 100 + np.random.normal(0, 10)
actual = predicted + np.random.normal(0, 10 * error_multiplier)
error = predicted - actual
error_pct = abs(error / actual * 100) if actual != 0 else 0
accuracy = max(0, 100 - error_pct)
feedback.append({
'insight_id': f'insight_{i}',
'applied_at': date - timedelta(days=1),
'outcome_date': date,
'predicted_value': predicted,
'actual_value': actual,
'error': error,
'error_pct': error_pct,
'accuracy': accuracy,
'confidence': 85
})
return pd.DataFrame(feedback)
@pytest.fixture
def biased_feedback_data():
"""Generate feedback data with systematic bias."""
np.random.seed(42)
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
feedback = []
for i, date in enumerate(dates):
predicted = 100 + np.random.normal(0, 10)
# Systematic over-prediction by 15%
actual = predicted * 0.85 + np.random.normal(0, 3)
error = predicted - actual
error_pct = abs(error / actual * 100) if actual != 0 else 0
accuracy = max(0, 100 - error_pct)
feedback.append({
'insight_id': f'insight_{i}',
'applied_at': date - timedelta(days=1),
'outcome_date': date,
'predicted_value': predicted,
'actual_value': actual,
'error': error,
'error_pct': error_pct,
'accuracy': accuracy,
'confidence': 80
})
return pd.DataFrame(feedback)
@pytest.fixture
def poorly_calibrated_feedback_data():
"""Generate feedback with poor confidence calibration."""
np.random.seed(42)
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
feedback = []
for i, date in enumerate(dates):
predicted = 100 + np.random.normal(0, 10)
# High confidence but low accuracy
if i < 25:
confidence = 90
actual = predicted + np.random.normal(0, 20) # Large error
else:
confidence = 60
actual = predicted + np.random.normal(0, 5) # Small error
error = predicted - actual
error_pct = abs(error / actual * 100) if actual != 0 else 0
accuracy = max(0, 100 - error_pct)
feedback.append({
'insight_id': f'insight_{i}',
'applied_at': date - timedelta(days=1),
'outcome_date': date,
'predicted_value': predicted,
'actual_value': actual,
'error': error,
'error_pct': error_pct,
'accuracy': accuracy,
'confidence': confidence
})
return pd.DataFrame(feedback)
class TestPerformanceMetrics:
"""Test performance metric calculation."""
@pytest.mark.asyncio
async def test_calculate_metrics_good_performance(self, learning_system, good_feedback_data):
"""Test metric calculation for good performance."""
metrics = learning_system._calculate_performance_metrics(good_feedback_data)
assert 'accuracy' in metrics
assert 'mae' in metrics
assert 'rmse' in metrics
assert 'mape' in metrics
assert 'bias' in metrics
assert 'r_squared' in metrics
# Good model should have high accuracy
assert metrics['accuracy'] > 80
assert metrics['mae'] < 10
assert abs(metrics['bias']) < 5
@pytest.mark.asyncio
async def test_calculate_metrics_degraded_performance(self, learning_system, degraded_feedback_data):
"""Test metric calculation for degraded performance."""
metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)
# Degraded model should have lower accuracy
assert metrics['accuracy'] < 80
assert metrics['mae'] > 5
class TestPerformanceTrend:
"""Test performance trend analysis."""
@pytest.mark.asyncio
async def test_stable_trend(self, learning_system, good_feedback_data):
"""Test detection of stable performance trend."""
trend = learning_system._analyze_performance_trend(good_feedback_data)
assert trend['trend'] in ['stable', 'improving']
@pytest.mark.asyncio
async def test_degrading_trend(self, learning_system, degraded_feedback_data):
"""Test detection of degrading performance trend."""
trend = learning_system._analyze_performance_trend(degraded_feedback_data)
# May detect degrading trend depending on data
assert trend['trend'] in ['degrading', 'stable']
if trend['significant']:
assert 'slope' in trend
@pytest.mark.asyncio
async def test_insufficient_data_trend(self, learning_system):
"""Test trend analysis with insufficient data."""
small_data = pd.DataFrame([{
'insight_id': 'test',
'outcome_date': datetime.utcnow(),
'accuracy': 90
}])
trend = learning_system._analyze_performance_trend(small_data)
assert trend['trend'] == 'insufficient_data'
class TestDegradationDetection:
"""Test performance degradation detection."""
@pytest.mark.asyncio
async def test_no_degradation_detected(self, learning_system, good_feedback_data):
"""Test no degradation for good performance."""
current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
trend = learning_system._analyze_performance_trend(good_feedback_data)
degradation = learning_system._detect_performance_degradation(
current_metrics,
baseline_performance={'accuracy': 85},
trend_analysis=trend
)
assert degradation['detected'] is False
assert degradation['severity'] == 'none'
@pytest.mark.asyncio
async def test_degradation_below_threshold(self, learning_system):
"""Test degradation detection when below absolute threshold."""
current_metrics = {'accuracy': 70} # Below 85% threshold
trend = {'trend': 'stable', 'significant': False}
degradation = learning_system._detect_performance_degradation(
current_metrics,
baseline_performance=None,
trend_analysis=trend
)
assert degradation['detected'] is True
assert degradation['severity'] == 'high'
assert len(degradation['reasons']) > 0
@pytest.mark.asyncio
async def test_degradation_vs_baseline(self, learning_system):
"""Test degradation detection vs baseline."""
current_metrics = {'accuracy': 80}
baseline = {'accuracy': 95} # 15.8% drop
trend = {'trend': 'stable', 'significant': False}
degradation = learning_system._detect_performance_degradation(
current_metrics,
baseline_performance=baseline,
trend_analysis=trend
)
assert degradation['detected'] is True
assert 'dropped' in degradation['reasons'][0].lower()
@pytest.mark.asyncio
async def test_degradation_trending_down(self, learning_system, degraded_feedback_data):
"""Test degradation detection from trending down."""
current_metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)
trend = learning_system._analyze_performance_trend(degraded_feedback_data)
degradation = learning_system._detect_performance_degradation(
current_metrics,
baseline_performance={'accuracy': 90},
trend_analysis=trend
)
# Should detect some form of degradation
assert degradation['detected'] is True
class TestRetrainingRecommendation:
"""Test retraining recommendation generation."""
@pytest.mark.asyncio
async def test_urgent_retraining_recommendation(self, learning_system):
"""Test urgent retraining recommendation."""
current_metrics = {'accuracy': 70}
degradation = {
'detected': True,
'severity': 'high',
'reasons': ['Accuracy below threshold'],
'current_accuracy': 70,
'baseline_accuracy': 90
}
trend = {'trend': 'degrading', 'significant': True}
recommendation = learning_system._generate_retraining_recommendation(
'test_model',
current_metrics,
degradation,
trend
)
assert recommendation['recommended'] is True
assert recommendation['priority'] == 'urgent'
assert 'immediately' in recommendation['recommendation'].lower()
@pytest.mark.asyncio
async def test_no_retraining_needed(self, learning_system, good_feedback_data):
"""Test no retraining recommendation for good performance."""
current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
degradation = {'detected': False, 'severity': 'none'}
trend = learning_system._analyze_performance_trend(good_feedback_data)
recommendation = learning_system._generate_retraining_recommendation(
'test_model',
current_metrics,
degradation,
trend
)
assert recommendation['recommended'] is False
assert recommendation['priority'] == 'none'
class TestErrorPatternDetection:
"""Test error pattern identification."""
@pytest.mark.asyncio
async def test_systematic_bias_detection(self, learning_system, biased_feedback_data):
"""Test detection of systematic bias."""
patterns = learning_system._identify_error_patterns(biased_feedback_data)
# Should detect over-prediction bias
bias_patterns = [p for p in patterns if p['pattern'] == 'systematic_bias']
assert len(bias_patterns) > 0
bias = bias_patterns[0]
assert 'over-prediction' in bias['description']
assert bias['severity'] in ['high', 'medium']
@pytest.mark.asyncio
async def test_no_patterns_for_good_data(self, learning_system, good_feedback_data):
"""Test no significant patterns for good data."""
patterns = learning_system._identify_error_patterns(good_feedback_data)
# May have some minor patterns, but no high severity
high_severity = [p for p in patterns if p.get('severity') == 'high']
assert len(high_severity) == 0
class TestConfidenceCalibration:
"""Test confidence calibration analysis."""
@pytest.mark.asyncio
async def test_well_calibrated_confidence(self, learning_system, good_feedback_data):
"""Test well-calibrated confidence scores."""
calibration = learning_system._calculate_confidence_calibration(good_feedback_data)
# Good data with consistent confidence should be well calibrated
if 'overall_calibration_error' in calibration:
# Small calibration error indicates good calibration
assert calibration['overall_calibration_error'] < 20
@pytest.mark.asyncio
async def test_poorly_calibrated_confidence(self, learning_system, poorly_calibrated_feedback_data):
"""Test poorly calibrated confidence scores."""
calibration = learning_system._calculate_confidence_calibration(poorly_calibrated_feedback_data)
# Should detect poor calibration
assert calibration['calibrated'] is False
if 'by_confidence_range' in calibration:
assert len(calibration['by_confidence_range']) > 0
@pytest.mark.asyncio
async def test_no_confidence_data(self, learning_system):
"""Test calibration when no confidence scores available."""
no_conf_data = pd.DataFrame([{
'predicted_value': 100,
'actual_value': 95,
'accuracy': 95
}])
calibration = learning_system._calculate_confidence_calibration(no_conf_data)
assert calibration['calibrated'] is False
assert 'reason' in calibration
class TestCompletePerformanceAnalysis:
"""Test complete performance analysis workflow."""
@pytest.mark.asyncio
async def test_analyze_good_performance(self, learning_system, good_feedback_data):
"""Test complete analysis of good performance."""
result = await learning_system.analyze_model_performance(
model_name='test_model',
feedback_data=good_feedback_data,
baseline_performance={'accuracy': 85}
)
assert result['model_name'] == 'test_model'
assert result['status'] != 'insufficient_feedback'
assert 'current_performance' in result
assert 'trend_analysis' in result
assert 'degradation_detected' in result
assert 'retraining_recommendation' in result
# Good performance should not recommend retraining
assert result['retraining_recommendation']['recommended'] is False
@pytest.mark.asyncio
async def test_analyze_degraded_performance(self, learning_system, degraded_feedback_data):
"""Test complete analysis of degraded performance."""
result = await learning_system.analyze_model_performance(
model_name='degraded_model',
feedback_data=degraded_feedback_data,
baseline_performance={'accuracy': 90}
)
assert result['degradation_detected']['detected'] is True
assert result['retraining_recommendation']['recommended'] is True
@pytest.mark.asyncio
async def test_insufficient_feedback(self, learning_system):
"""Test analysis with insufficient feedback samples."""
small_data = pd.DataFrame([{
'insight_id': 'test',
'outcome_date': datetime.utcnow(),
'predicted_value': 100,
'actual_value': 95,
'error': 5,
'error_pct': 5,
'accuracy': 95,
'confidence': 85
}])
result = await learning_system.analyze_model_performance(
model_name='test_model',
feedback_data=small_data
)
assert result['status'] == 'insufficient_feedback'
assert result['feedback_samples'] == 1
assert result['required_samples'] == 30
class TestLearningInsights:
"""Test learning insight generation."""
@pytest.mark.asyncio
async def test_generate_urgent_retraining_insight(self, learning_system):
"""Test generation of urgent retraining insight."""
analyses = [{
'model_name': 'urgent_model',
'retraining_recommendation': {
'priority': 'urgent',
'recommended': True
},
'degradation_detected': {
'detected': True
}
}]
insights = await learning_system.generate_learning_insights(
analyses,
tenant_id='tenant_123'
)
# Should generate urgent warning
urgent_insights = [i for i in insights if i['priority'] == 'urgent']
assert len(urgent_insights) > 0
insight = urgent_insights[0]
assert insight['type'] == 'warning'
assert 'urgent_model' in insight['description'].lower()
@pytest.mark.asyncio
async def test_generate_system_health_insight(self, learning_system):
"""Test generation of system health insight."""
# 3 models, 1 degraded
analyses = [
{
'model_name': 'model_1',
'degradation_detected': {'detected': False},
'retraining_recommendation': {'priority': 'none'}
},
{
'model_name': 'model_2',
'degradation_detected': {'detected': False},
'retraining_recommendation': {'priority': 'none'}
},
{
'model_name': 'model_3',
'degradation_detected': {'detected': True},
'retraining_recommendation': {'priority': 'high'}
}
]
insights = await learning_system.generate_learning_insights(
analyses,
tenant_id='tenant_123'
)
# Should generate system health insight (66% healthy < 80%)
# Note: May or may not trigger depending on threshold
# At minimum should not crash
assert isinstance(insights, list)
@pytest.mark.asyncio
async def test_generate_calibration_insight(self, learning_system):
"""Test generation of calibration insight."""
analyses = [{
'model_name': 'model_1',
'degradation_detected': {'detected': False},
'retraining_recommendation': {'priority': 'none'},
'confidence_calibration': {
'calibrated': False,
'overall_calibration_error': 15
}
}]
insights = await learning_system.generate_learning_insights(
analyses,
tenant_id='tenant_123'
)
# Should generate calibration insight
calibration_insights = [
i for i in insights
if 'calibration' in i['title'].lower()
]
assert len(calibration_insights) > 0
class TestROICalculation:
"""Test ROI calculation."""
@pytest.mark.asyncio
async def test_calculate_roi_with_impact_values(self, learning_system):
"""Test ROI calculation with impact values."""
feedback_data = pd.DataFrame([
{
'accuracy': 90,
'impact_value': 1000
},
{
'accuracy': 85,
'impact_value': 1500
},
{
'accuracy': 95,
'impact_value': 800
}
])
roi = await learning_system.calculate_roi(
feedback_data,
insight_type='demand_forecast'
)
assert roi['insight_type'] == 'demand_forecast'
assert roi['samples'] == 3
assert roi['avg_accuracy'] == 90.0
assert roi['total_impact_value'] == 3300
assert roi['roi_validated'] is True
@pytest.mark.asyncio
async def test_calculate_roi_without_impact_values(self, learning_system, good_feedback_data):
"""Test ROI calculation without impact values."""
roi = await learning_system.calculate_roi(
good_feedback_data,
insight_type='yield_prediction'
)
assert roi['insight_type'] == 'yield_prediction'
assert roi['samples'] > 0
assert 'avg_accuracy' in roi
assert roi['roi_validated'] is False