Improve AI logic
This commit is contained in:
41
services/ai_insights/.env.example
Normal file
41
services/ai_insights/.env.example
Normal file
@@ -0,0 +1,41 @@
|
||||
# AI Insights Service Environment Variables
|
||||
|
||||
# Service Info
|
||||
SERVICE_NAME=ai-insights
|
||||
SERVICE_VERSION=1.0.0
|
||||
API_V1_PREFIX=/api/v1/ai-insights
|
||||
|
||||
# Database
|
||||
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/bakery_ai_insights
|
||||
DB_POOL_SIZE=20
|
||||
DB_MAX_OVERFLOW=10
|
||||
|
||||
# Redis
|
||||
REDIS_URL=redis://localhost:6379/5
|
||||
REDIS_CACHE_TTL=900
|
||||
|
||||
# Service URLs
|
||||
FORECASTING_SERVICE_URL=http://forecasting-service:8000
|
||||
PROCUREMENT_SERVICE_URL=http://procurement-service:8000
|
||||
PRODUCTION_SERVICE_URL=http://production-service:8000
|
||||
SALES_SERVICE_URL=http://sales-service:8000
|
||||
INVENTORY_SERVICE_URL=http://inventory-service:8000
|
||||
|
||||
# Circuit Breaker Settings
|
||||
CIRCUIT_BREAKER_FAILURE_THRESHOLD=5
|
||||
CIRCUIT_BREAKER_TIMEOUT=60
|
||||
|
||||
# Insight Settings
|
||||
MIN_CONFIDENCE_THRESHOLD=60
|
||||
DEFAULT_INSIGHT_TTL_DAYS=7
|
||||
MAX_INSIGHTS_PER_REQUEST=100
|
||||
|
||||
# Feedback Settings
|
||||
FEEDBACK_PROCESSING_ENABLED=true
|
||||
FEEDBACK_PROCESSING_SCHEDULE="0 6 * * *"
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL=INFO
|
||||
|
||||
# CORS
|
||||
ALLOWED_ORIGINS=["http://localhost:3000","http://localhost:5173"]
|
||||
49
services/ai_insights/Dockerfile
Normal file
49
services/ai_insights/Dockerfile
Normal file
@@ -0,0 +1,49 @@
|
||||
# AI Insights Dockerfile
|
||||
# Add this stage at the top of each service Dockerfile
|
||||
FROM python:3.11-slim AS shared
|
||||
WORKDIR /shared
|
||||
COPY shared/ /shared/
|
||||
|
||||
# Then your main service stage
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
curl \
|
||||
postgresql-client \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements
|
||||
COPY shared/requirements-tracing.txt /tmp/
|
||||
|
||||
COPY services/ai_insights/requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r /tmp/requirements-tracing.txt
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy shared libraries from the shared stage
|
||||
COPY --from=shared /shared /app/shared
|
||||
|
||||
# Copy application code
|
||||
COPY services/ai_insights/ .
|
||||
|
||||
# Copy scripts for migrations
|
||||
COPY scripts/ /app/scripts/
|
||||
|
||||
# Add shared libraries to Python path
|
||||
ENV PYTHONPATH="/app:/app/shared:${PYTHONPATH:-}"
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run the application
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
232
services/ai_insights/QUICK_START.md
Normal file
232
services/ai_insights/QUICK_START.md
Normal file
@@ -0,0 +1,232 @@
|
||||
# AI Insights Service - Quick Start Guide
|
||||
|
||||
Get the AI Insights Service running in 5 minutes.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Python 3.11+
|
||||
- PostgreSQL 14+ (running)
|
||||
- Redis 6+ (running)
|
||||
|
||||
## Step 1: Setup Environment
|
||||
|
||||
```bash
|
||||
cd services/ai_insights
|
||||
|
||||
# Create virtual environment
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate # Windows: venv\Scripts\activate
|
||||
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Step 2: Configure Database
|
||||
|
||||
```bash
|
||||
# Copy environment template
|
||||
cp .env.example .env
|
||||
|
||||
# Edit .env file
|
||||
nano .env
|
||||
```
|
||||
|
||||
**Minimum required configuration**:
|
||||
```env
|
||||
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/bakery_ai_insights
|
||||
REDIS_URL=redis://localhost:6379/5
|
||||
```
|
||||
|
||||
## Step 3: Create Database
|
||||
|
||||
```bash
|
||||
# Connect to PostgreSQL
|
||||
psql -U postgres
|
||||
|
||||
# Create database
|
||||
CREATE DATABASE bakery_ai_insights;
|
||||
\q
|
||||
```
|
||||
|
||||
## Step 4: Run Migrations
|
||||
|
||||
```bash
|
||||
# Run Alembic migrations
|
||||
alembic upgrade head
|
||||
```
|
||||
|
||||
You should see:
|
||||
```
|
||||
INFO [alembic.runtime.migration] Running upgrade -> 001, Initial schema for AI Insights Service
|
||||
```
|
||||
|
||||
## Step 5: Start the Service
|
||||
|
||||
```bash
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
You should see:
|
||||
```
|
||||
INFO: Uvicorn running on http://127.0.0.1:8000
|
||||
INFO: Application startup complete.
|
||||
```
|
||||
|
||||
## Step 6: Verify Installation
|
||||
|
||||
Open browser to http://localhost:8000/docs
|
||||
|
||||
You should see the Swagger UI with all API endpoints.
|
||||
|
||||
### Test Health Endpoint
|
||||
|
||||
```bash
|
||||
curl http://localhost:8000/health
|
||||
```
|
||||
|
||||
Expected response:
|
||||
```json
|
||||
{
|
||||
"status": "healthy",
|
||||
"service": "ai-insights",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
```
|
||||
|
||||
## Step 7: Create Your First Insight
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"tenant_id": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"type": "recommendation",
|
||||
"priority": "high",
|
||||
"category": "forecasting",
|
||||
"title": "Test Insight - Weekend Demand Pattern",
|
||||
"description": "Weekend sales 20% higher than weekdays",
|
||||
"impact_type": "revenue_increase",
|
||||
"impact_value": 150.00,
|
||||
"impact_unit": "euros/week",
|
||||
"confidence": 85,
|
||||
"metrics_json": {
|
||||
"weekday_avg": 45.2,
|
||||
"weekend_avg": 54.2,
|
||||
"increase_pct": 20.0
|
||||
},
|
||||
"actionable": true,
|
||||
"recommendation_actions": [
|
||||
{"label": "Increase Production", "action": "adjust_production"}
|
||||
],
|
||||
"source_service": "forecasting"
|
||||
}'
|
||||
```
|
||||
|
||||
## Step 8: Query Your Insights
|
||||
|
||||
```bash
|
||||
curl "http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights?page=1&page_size=10"
|
||||
```
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Issue: "ModuleNotFoundError: No module named 'app'"
|
||||
|
||||
**Solution**: Make sure you're running from the `services/ai_insights/` directory and virtual environment is activated.
|
||||
|
||||
### Issue: "Connection refused" on database
|
||||
|
||||
**Solution**: Verify PostgreSQL is running:
|
||||
```bash
|
||||
# Check if PostgreSQL is running
|
||||
pg_isready
|
||||
|
||||
# Start PostgreSQL (macOS with Homebrew)
|
||||
brew services start postgresql
|
||||
|
||||
# Start PostgreSQL (Linux)
|
||||
sudo systemctl start postgresql
|
||||
```
|
||||
|
||||
### Issue: "Redis connection error"
|
||||
|
||||
**Solution**: Verify Redis is running:
|
||||
```bash
|
||||
# Check if Redis is running
|
||||
redis-cli ping
|
||||
|
||||
# Should return: PONG
|
||||
|
||||
# Start Redis (macOS with Homebrew)
|
||||
brew services start redis
|
||||
|
||||
# Start Redis (Linux)
|
||||
sudo systemctl start redis
|
||||
```
|
||||
|
||||
### Issue: "Alembic command not found"
|
||||
|
||||
**Solution**: Virtual environment not activated:
|
||||
```bash
|
||||
source venv/bin/activate
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Explore API**: Visit http://localhost:8000/docs
|
||||
2. **Read Documentation**: See `README.md` for detailed documentation
|
||||
3. **Implementation Guide**: See `AI_INSIGHTS_IMPLEMENTATION_SUMMARY.md`
|
||||
4. **Integration**: Start integrating with other services
|
||||
|
||||
## Useful Commands
|
||||
|
||||
```bash
|
||||
# Check service status
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Get aggregate metrics
|
||||
curl "http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights/metrics/summary"
|
||||
|
||||
# Filter high-confidence insights
|
||||
curl "http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights?actionable_only=true&min_confidence=80"
|
||||
|
||||
# Stop the service
|
||||
# Press Ctrl+C in the terminal running uvicorn
|
||||
|
||||
# Deactivate virtual environment
|
||||
deactivate
|
||||
```
|
||||
|
||||
## Docker Quick Start (Alternative)
|
||||
|
||||
If you prefer Docker:
|
||||
|
||||
```bash
|
||||
# Build image
|
||||
docker build -t ai-insights .
|
||||
|
||||
# Run container
|
||||
docker run -d \
|
||||
--name ai-insights \
|
||||
-p 8000:8000 \
|
||||
-e DATABASE_URL=postgresql+asyncpg://postgres:postgres@host.docker.internal:5432/bakery_ai_insights \
|
||||
-e REDIS_URL=redis://host.docker.internal:6379/5 \
|
||||
ai-insights
|
||||
|
||||
# Check logs
|
||||
docker logs ai-insights
|
||||
|
||||
# Stop container
|
||||
docker stop ai-insights
|
||||
docker rm ai-insights
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
- **Documentation**: See `README.md`
|
||||
- **API Docs**: http://localhost:8000/docs
|
||||
- **Issues**: Create GitHub issue or contact team
|
||||
|
||||
---
|
||||
|
||||
**You're ready!** The AI Insights Service is now running and ready to accept insights from other services.
|
||||
316
services/ai_insights/README.md
Normal file
316
services/ai_insights/README.md
Normal file
@@ -0,0 +1,316 @@
|
||||
# AI Insights Service
|
||||
|
||||
Intelligent insights and recommendations service for bakery operations optimization.
|
||||
|
||||
## Overview
|
||||
|
||||
The AI Insights Service is a microservice that aggregates, scores, and manages intelligent recommendations across the bakery-ia platform. It provides:
|
||||
|
||||
- **Unified Insight Management**: Centralized storage and retrieval of AI-generated insights
|
||||
- **Confidence Scoring**: Standardized confidence calculation across different insight types
|
||||
- **Impact Estimation**: Business value quantification for recommendations
|
||||
- **Feedback Loop**: Closed-loop learning from applied insights
|
||||
- **Cross-Service Intelligence**: Correlation detection between insights from different services
|
||||
|
||||
## Features
|
||||
|
||||
### Core Capabilities
|
||||
|
||||
1. **Insight Aggregation**
|
||||
- Collect insights from Forecasting, Procurement, Production, and Sales services
|
||||
- Categorize and prioritize recommendations
|
||||
- Filter by confidence, category, priority, and actionability
|
||||
|
||||
2. **Confidence Calculation**
|
||||
- Multi-factor scoring: data quality, model performance, sample size, recency, historical accuracy
|
||||
- Insight-type specific adjustments
|
||||
- Specialized calculations for forecasting and optimization insights
|
||||
|
||||
3. **Impact Estimation**
|
||||
- Cost savings quantification
|
||||
- Revenue increase projections
|
||||
- Waste reduction calculations
|
||||
- Efficiency gain measurements
|
||||
- Quality improvement tracking
|
||||
|
||||
4. **Feedback & Learning**
|
||||
- Track application outcomes
|
||||
- Compare expected vs. actual impact
|
||||
- Calculate success rates
|
||||
- Enable model improvement
|
||||
|
||||
5. **Orchestration Integration**
|
||||
- Pre-orchestration insight gathering
|
||||
- Actionable insight filtering
|
||||
- Categorized recommendations for workflow phases
|
||||
|
||||
## Architecture
|
||||
|
||||
### Database Models
|
||||
|
||||
- **AIInsight**: Core insights table with classification, confidence, impact metrics
|
||||
- **InsightFeedback**: Feedback tracking for closed-loop learning
|
||||
- **InsightCorrelation**: Cross-service insight relationships
|
||||
|
||||
### API Endpoints
|
||||
|
||||
```
|
||||
POST /api/v1/ai-insights/tenants/{tenant_id}/insights
|
||||
GET /api/v1/ai-insights/tenants/{tenant_id}/insights
|
||||
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}
|
||||
PATCH /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}
|
||||
DELETE /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}
|
||||
|
||||
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/orchestration-ready
|
||||
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/metrics/summary
|
||||
POST /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}/apply
|
||||
POST /api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}/feedback
|
||||
POST /api/v1/ai-insights/tenants/{tenant_id}/insights/refresh
|
||||
GET /api/v1/ai-insights/tenants/{tenant_id}/insights/export
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Python 3.11+
|
||||
- PostgreSQL 14+
|
||||
- Redis 6+
|
||||
|
||||
### Setup
|
||||
|
||||
1. **Clone and navigate**:
|
||||
```bash
|
||||
cd services/ai_insights
|
||||
```
|
||||
|
||||
2. **Create virtual environment**:
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. **Install dependencies**:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. **Configure environment**:
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# Edit .env with your configuration
|
||||
```
|
||||
|
||||
5. **Run migrations**:
|
||||
```bash
|
||||
alembic upgrade head
|
||||
```
|
||||
|
||||
6. **Start the service**:
|
||||
```bash
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
The service will be available at `http://localhost:8000`.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `DATABASE_URL` | PostgreSQL connection string | Required |
|
||||
| `REDIS_URL` | Redis connection string | Required |
|
||||
| `FORECASTING_SERVICE_URL` | Forecasting service URL | `http://forecasting-service:8000` |
|
||||
| `PROCUREMENT_SERVICE_URL` | Procurement service URL | `http://procurement-service:8000` |
|
||||
| `PRODUCTION_SERVICE_URL` | Production service URL | `http://production-service:8000` |
|
||||
| `MIN_CONFIDENCE_THRESHOLD` | Minimum confidence for insights | `60` |
|
||||
| `DEFAULT_INSIGHT_TTL_DAYS` | Days before insights expire | `7` |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Creating an Insight
|
||||
|
||||
```python
|
||||
import httpx
|
||||
|
||||
insight_data = {
|
||||
"tenant_id": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"type": "recommendation",
|
||||
"priority": "high",
|
||||
"category": "procurement",
|
||||
"title": "Flour Price Increase Expected",
|
||||
"description": "Price predicted to rise 8% in next week. Consider ordering now.",
|
||||
"impact_type": "cost_savings",
|
||||
"impact_value": 120.50,
|
||||
"impact_unit": "euros",
|
||||
"confidence": 85,
|
||||
"metrics_json": {
|
||||
"current_price": 1.20,
|
||||
"predicted_price": 1.30,
|
||||
"order_quantity": 1000
|
||||
},
|
||||
"actionable": True,
|
||||
"recommendation_actions": [
|
||||
{"label": "Order Now", "action": "create_purchase_order"},
|
||||
{"label": "Review", "action": "review_forecast"}
|
||||
],
|
||||
"source_service": "procurement",
|
||||
"source_data_id": "price_forecast_123"
|
||||
}
|
||||
|
||||
response = httpx.post(
|
||||
"http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights",
|
||||
json=insight_data
|
||||
)
|
||||
print(response.json())
|
||||
```
|
||||
|
||||
### Querying Insights
|
||||
|
||||
```python
|
||||
# Get high-confidence actionable insights
|
||||
response = httpx.get(
|
||||
"http://localhost:8000/api/v1/ai-insights/tenants/550e8400-e29b-41d4-a716-446655440000/insights",
|
||||
params={
|
||||
"actionable_only": True,
|
||||
"min_confidence": 80,
|
||||
"priority": "high",
|
||||
"page": 1,
|
||||
"page_size": 20
|
||||
}
|
||||
)
|
||||
insights = response.json()
|
||||
```
|
||||
|
||||
### Recording Feedback
|
||||
|
||||
```python
|
||||
feedback_data = {
|
||||
"insight_id": "insight-uuid",
|
||||
"action_taken": "create_purchase_order",
|
||||
"success": True,
|
||||
"expected_impact_value": 120.50,
|
||||
"actual_impact_value": 115.30,
|
||||
"result_data": {
|
||||
"order_id": "PO-12345",
|
||||
"actual_savings": 115.30
|
||||
},
|
||||
"applied_by": "user@example.com"
|
||||
}
|
||||
|
||||
response = httpx.post(
|
||||
f"http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights/{insight_id}/feedback",
|
||||
json=feedback_data
|
||||
)
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
black app/
|
||||
|
||||
# Lint
|
||||
flake8 app/
|
||||
|
||||
# Type checking
|
||||
mypy app/
|
||||
```
|
||||
|
||||
### Creating a Migration
|
||||
|
||||
```bash
|
||||
alembic revision --autogenerate -m "Description of changes"
|
||||
alembic upgrade head
|
||||
```
|
||||
|
||||
## Insight Types
|
||||
|
||||
- **optimization**: Process improvements with measurable gains
|
||||
- **alert**: Warnings requiring attention
|
||||
- **prediction**: Future forecasts with confidence intervals
|
||||
- **recommendation**: Suggested actions with estimated impact
|
||||
- **insight**: General data-driven observations
|
||||
- **anomaly**: Unusual patterns detected in data
|
||||
|
||||
## Priority Levels
|
||||
|
||||
- **critical**: Immediate action required (e.g., stockout risk)
|
||||
- **high**: Action recommended soon (e.g., price opportunity)
|
||||
- **medium**: Consider acting (e.g., efficiency improvement)
|
||||
- **low**: Informational (e.g., pattern observation)
|
||||
|
||||
## Categories
|
||||
|
||||
- **forecasting**: Demand predictions and patterns
|
||||
- **inventory**: Stock management and optimization
|
||||
- **production**: Manufacturing efficiency and scheduling
|
||||
- **procurement**: Purchasing and supplier management
|
||||
- **customer**: Customer behavior and satisfaction
|
||||
- **cost**: Cost optimization opportunities
|
||||
- **quality**: Quality improvements
|
||||
- **efficiency**: Process efficiency gains
|
||||
|
||||
## Integration with Other Services
|
||||
|
||||
### Forecasting Service
|
||||
|
||||
- Receives forecast accuracy insights
|
||||
- Pattern detection alerts
|
||||
- Demand anomaly notifications
|
||||
|
||||
### Procurement Service
|
||||
|
||||
- Price forecast recommendations
|
||||
- Supplier performance alerts
|
||||
- Safety stock optimization
|
||||
|
||||
### Production Service
|
||||
|
||||
- Yield prediction insights
|
||||
- Schedule optimization recommendations
|
||||
- Equipment maintenance alerts
|
||||
|
||||
### Orchestrator Service
|
||||
|
||||
- Pre-orchestration insight gathering
|
||||
- Actionable recommendation filtering
|
||||
- Feedback recording for applied insights
|
||||
|
||||
## API Documentation
|
||||
|
||||
Once the service is running, interactive API documentation is available at:
|
||||
|
||||
- Swagger UI: `http://localhost:8000/docs`
|
||||
- ReDoc: `http://localhost:8000/redoc`
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Health Check
|
||||
|
||||
```bash
|
||||
curl http://localhost:8000/health
|
||||
```
|
||||
|
||||
### Metrics Endpoint
|
||||
|
||||
```bash
|
||||
curl http://localhost:8000/api/v1/ai-insights/tenants/{tenant_id}/insights/metrics/summary
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Copyright © 2025 Bakery IA. All rights reserved.
|
||||
|
||||
## Support
|
||||
|
||||
For issues and questions, please contact the development team or create an issue in the project repository.
|
||||
112
services/ai_insights/alembic.ini
Normal file
112
services/ai_insights/alembic.ini
Normal file
@@ -0,0 +1,112 @@
|
||||
# A generic, single database configuration.
|
||||
|
||||
[alembic]
|
||||
# path to migration scripts
|
||||
script_location = migrations
|
||||
|
||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
||||
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
|
||||
|
||||
# sys.path path, will be prepended to sys.path if present.
|
||||
prepend_sys_path = .
|
||||
|
||||
# timezone to use when rendering the date within the migration file
|
||||
# as well as the filename.
|
||||
# If specified, requires the python-dateutil library that can be
|
||||
# installed by adding `alembic[tz]` to the pip requirements
|
||||
# string value is passed to dateutil.tz.gettz()
|
||||
# leave blank for localtime
|
||||
# timezone =
|
||||
|
||||
# max length of characters to apply to the
|
||||
# "slug" field
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# set to 'true' to run the environment during
|
||||
# the 'revision' command, regardless of autogenerate
|
||||
# revision_environment = false
|
||||
|
||||
# set to 'true' to allow .pyc and .pyo files without
|
||||
# a source .py file to be detected as revisions in the
|
||||
# versions/ directory
|
||||
# sourceless = false
|
||||
|
||||
# version location specification; This defaults
|
||||
# to migrations/versions. When using multiple version
|
||||
# directories, initial revisions must be specified with --version-path.
|
||||
# The path separator used here should be the separator specified by "version_path_separator" below.
|
||||
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
|
||||
|
||||
# version path separator; As mentioned above, this is the character used to split
|
||||
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
||||
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
||||
# Valid values for version_path_separator are:
|
||||
#
|
||||
# version_path_separator = :
|
||||
# version_path_separator = ;
|
||||
# version_path_separator = space
|
||||
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
||||
|
||||
# set to 'true' to search source files recursively
|
||||
# in each "version_locations" directory
|
||||
# new in Alembic version 1.10
|
||||
# recursive_version_locations = false
|
||||
|
||||
# the output encoding used when revision files
|
||||
# are written from script.py.mako
|
||||
# output_encoding = utf-8
|
||||
|
||||
sqlalchemy.url = driver://user:pass@localhost/dbname
|
||||
|
||||
|
||||
[post_write_hooks]
|
||||
# post_write_hooks defines scripts or Python functions that are run
|
||||
# on newly generated revision scripts. See the documentation for further
|
||||
# detail and examples
|
||||
|
||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
||||
# hooks = black
|
||||
# black.type = console_scripts
|
||||
# black.entrypoint = black
|
||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
||||
|
||||
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
||||
# hooks = ruff
|
||||
# ruff.type = exec
|
||||
# ruff.executable = %(here)s/.venv/bin/ruff
|
||||
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
3
services/ai_insights/app/__init__.py
Normal file
3
services/ai_insights/app/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""AI Insights Service."""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
1
services/ai_insights/app/api/__init__.py
Normal file
1
services/ai_insights/app/api/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""API modules for AI Insights Service."""
|
||||
323
services/ai_insights/app/api/insights.py
Normal file
323
services/ai_insights/app/api/insights.py
Normal file
@@ -0,0 +1,323 @@
|
||||
"""API endpoints for AI Insights."""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
from datetime import datetime
|
||||
import math
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.repositories.insight_repository import InsightRepository
|
||||
from app.repositories.feedback_repository import FeedbackRepository
|
||||
from app.schemas.insight import (
|
||||
AIInsightCreate,
|
||||
AIInsightUpdate,
|
||||
AIInsightResponse,
|
||||
AIInsightList,
|
||||
InsightMetrics,
|
||||
InsightFilters
|
||||
)
|
||||
from app.schemas.feedback import InsightFeedbackCreate, InsightFeedbackResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights", response_model=AIInsightResponse, status_code=status.HTTP_201_CREATED)
|
||||
async def create_insight(
|
||||
tenant_id: UUID,
|
||||
insight_data: AIInsightCreate,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Create a new AI Insight."""
|
||||
# Ensure tenant_id matches
|
||||
if insight_data.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Tenant ID mismatch"
|
||||
)
|
||||
|
||||
repo = InsightRepository(db)
|
||||
insight = await repo.create(insight_data)
|
||||
await db.commit()
|
||||
|
||||
return insight
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights", response_model=AIInsightList)
|
||||
async def get_insights(
|
||||
tenant_id: UUID,
|
||||
category: Optional[str] = Query(None),
|
||||
priority: Optional[str] = Query(None),
|
||||
status: Optional[str] = Query(None),
|
||||
actionable_only: bool = Query(False),
|
||||
min_confidence: int = Query(0, ge=0, le=100),
|
||||
source_service: Optional[str] = Query(None),
|
||||
from_date: Optional[datetime] = Query(None),
|
||||
to_date: Optional[datetime] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(20, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get insights for a tenant with filters and pagination."""
|
||||
filters = InsightFilters(
|
||||
category=category,
|
||||
priority=priority,
|
||||
status=status,
|
||||
actionable_only=actionable_only,
|
||||
min_confidence=min_confidence,
|
||||
source_service=source_service,
|
||||
from_date=from_date,
|
||||
to_date=to_date
|
||||
)
|
||||
|
||||
repo = InsightRepository(db)
|
||||
skip = (page - 1) * page_size
|
||||
|
||||
insights, total = await repo.get_by_tenant(tenant_id, filters, skip, page_size)
|
||||
|
||||
total_pages = math.ceil(total / page_size) if total > 0 else 0
|
||||
|
||||
return AIInsightList(
|
||||
items=insights,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/orchestration-ready")
|
||||
async def get_orchestration_ready_insights(
|
||||
tenant_id: UUID,
|
||||
target_date: datetime = Query(...),
|
||||
min_confidence: int = Query(70, ge=0, le=100),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get actionable insights for orchestration workflow."""
|
||||
repo = InsightRepository(db)
|
||||
categorized_insights = await repo.get_orchestration_ready_insights(
|
||||
tenant_id, target_date, min_confidence
|
||||
)
|
||||
|
||||
return categorized_insights
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
|
||||
async def get_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get a single insight by ID."""
|
||||
repo = InsightRepository(db)
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
return insight
|
||||
|
||||
|
||||
@router.patch("/tenants/{tenant_id}/insights/{insight_id}", response_model=AIInsightResponse)
|
||||
async def update_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
update_data: AIInsightUpdate,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Update an insight (typically status changes)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
updated_insight = await repo.update(insight_id, update_data)
|
||||
await db.commit()
|
||||
|
||||
return updated_insight
|
||||
|
||||
|
||||
@router.delete("/tenants/{tenant_id}/insights/{insight_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def dismiss_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Dismiss an insight (soft delete)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
await repo.delete(insight_id)
|
||||
await db.commit()
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/metrics/summary", response_model=InsightMetrics)
|
||||
async def get_insights_metrics(
|
||||
tenant_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get aggregate metrics for insights."""
|
||||
repo = InsightRepository(db)
|
||||
metrics = await repo.get_metrics(tenant_id)
|
||||
|
||||
return InsightMetrics(**metrics)
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights/{insight_id}/apply")
|
||||
async def apply_insight(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Apply an insight recommendation (trigger action)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
if not insight.actionable:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="This insight is not actionable"
|
||||
)
|
||||
|
||||
# Update status to in_progress
|
||||
update_data = AIInsightUpdate(status='in_progress', applied_at=datetime.utcnow())
|
||||
await repo.update(insight_id, update_data)
|
||||
await db.commit()
|
||||
|
||||
# TODO: Route to appropriate service based on recommendation_actions
|
||||
# This will be implemented when service clients are added
|
||||
|
||||
return {
|
||||
"message": "Insight application initiated",
|
||||
"insight_id": str(insight_id),
|
||||
"actions": insight.recommendation_actions
|
||||
}
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights/{insight_id}/feedback", response_model=InsightFeedbackResponse)
|
||||
async def record_feedback(
|
||||
tenant_id: UUID,
|
||||
insight_id: UUID,
|
||||
feedback_data: InsightFeedbackCreate,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Record feedback for an applied insight."""
|
||||
insight_repo = InsightRepository(db)
|
||||
|
||||
# Verify insight exists and belongs to tenant
|
||||
insight = await insight_repo.get_by_id(insight_id)
|
||||
if not insight:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Insight not found"
|
||||
)
|
||||
|
||||
if insight.tenant_id != tenant_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Access denied"
|
||||
)
|
||||
|
||||
# Ensure feedback is for this insight
|
||||
if feedback_data.insight_id != insight_id:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Insight ID mismatch"
|
||||
)
|
||||
|
||||
feedback_repo = FeedbackRepository(db)
|
||||
feedback = await feedback_repo.create(feedback_data)
|
||||
|
||||
# Update insight status based on feedback
|
||||
new_status = 'applied' if feedback.success else 'dismissed'
|
||||
update_data = AIInsightUpdate(status=new_status)
|
||||
await insight_repo.update(insight_id, update_data)
|
||||
|
||||
await db.commit()
|
||||
|
||||
return feedback
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/insights/refresh")
|
||||
async def refresh_insights(
|
||||
tenant_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Trigger insight refresh (expire old, generate new)."""
|
||||
repo = InsightRepository(db)
|
||||
|
||||
# Expire old insights
|
||||
expired_count = await repo.expire_old_insights()
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"message": "Insights refreshed",
|
||||
"expired_count": expired_count
|
||||
}
|
||||
|
||||
|
||||
@router.get("/tenants/{tenant_id}/insights/export")
|
||||
async def export_insights(
|
||||
tenant_id: UUID,
|
||||
format: str = Query("json", regex="^(json|csv)$"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Export insights to JSON or CSV."""
|
||||
repo = InsightRepository(db)
|
||||
insights, _ = await repo.get_by_tenant(tenant_id, filters=None, skip=0, limit=1000)
|
||||
|
||||
if format == "json":
|
||||
return {"insights": [AIInsightResponse.model_validate(i) for i in insights]}
|
||||
|
||||
# CSV export would be implemented here
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail="CSV export not yet implemented"
|
||||
)
|
||||
77
services/ai_insights/app/core/config.py
Normal file
77
services/ai_insights/app/core/config.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Configuration settings for AI Insights Service."""
|
||||
|
||||
from shared.config.base import BaseServiceSettings
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Settings(BaseServiceSettings):
|
||||
"""Application settings."""
|
||||
|
||||
# Service Info
|
||||
SERVICE_NAME: str = "ai-insights"
|
||||
SERVICE_VERSION: str = "1.0.0"
|
||||
API_V1_PREFIX: str = "/api/v1"
|
||||
|
||||
# Database configuration (secure approach - build from components)
|
||||
@property
|
||||
def DATABASE_URL(self) -> str:
|
||||
"""Build database URL from secure components"""
|
||||
# Try complete URL first (for backward compatibility)
|
||||
complete_url = os.getenv("AI_INSIGHTS_DATABASE_URL")
|
||||
if complete_url:
|
||||
return complete_url
|
||||
|
||||
# Also check for generic DATABASE_URL (for migration compatibility)
|
||||
generic_url = os.getenv("DATABASE_URL")
|
||||
if generic_url:
|
||||
return generic_url
|
||||
|
||||
# Build from components (secure approach)
|
||||
user = os.getenv("AI_INSIGHTS_DB_USER", "ai_insights_user")
|
||||
password = os.getenv("AI_INSIGHTS_DB_PASSWORD", "ai_insights_pass123")
|
||||
host = os.getenv("AI_INSIGHTS_DB_HOST", "localhost")
|
||||
port = os.getenv("AI_INSIGHTS_DB_PORT", "5432")
|
||||
name = os.getenv("AI_INSIGHTS_DB_NAME", "ai_insights_db")
|
||||
|
||||
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
|
||||
|
||||
DB_POOL_SIZE: int = 20
|
||||
DB_MAX_OVERFLOW: int = 10
|
||||
|
||||
# Redis (inherited from BaseServiceSettings but can override)
|
||||
REDIS_CACHE_TTL: int = 900 # 15 minutes
|
||||
REDIS_DB: int = 3 # Dedicated Redis database for AI Insights
|
||||
|
||||
# Service URLs
|
||||
FORECASTING_SERVICE_URL: str = "http://forecasting-service:8000"
|
||||
PROCUREMENT_SERVICE_URL: str = "http://procurement-service:8000"
|
||||
PRODUCTION_SERVICE_URL: str = "http://production-service:8000"
|
||||
SALES_SERVICE_URL: str = "http://sales-service:8000"
|
||||
INVENTORY_SERVICE_URL: str = "http://inventory-service:8000"
|
||||
|
||||
# Circuit Breaker Settings
|
||||
CIRCUIT_BREAKER_FAILURE_THRESHOLD: int = 5
|
||||
CIRCUIT_BREAKER_TIMEOUT: int = 60
|
||||
|
||||
# Insight Settings
|
||||
MIN_CONFIDENCE_THRESHOLD: int = 60
|
||||
DEFAULT_INSIGHT_TTL_DAYS: int = 7
|
||||
MAX_INSIGHTS_PER_REQUEST: int = 100
|
||||
|
||||
# Feedback Settings
|
||||
FEEDBACK_PROCESSING_ENABLED: bool = True
|
||||
FEEDBACK_PROCESSING_SCHEDULE: str = "0 6 * * *" # Daily at 6 AM
|
||||
|
||||
# Logging
|
||||
LOG_LEVEL: str = "INFO"
|
||||
|
||||
# CORS
|
||||
ALLOWED_ORIGINS: list[str] = ["http://localhost:3000", "http://localhost:5173"]
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = True
|
||||
|
||||
|
||||
settings = Settings()
|
||||
58
services/ai_insights/app/core/database.py
Normal file
58
services/ai_insights/app/core/database.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Database configuration and session management."""
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
||||
from sqlalchemy.orm import declarative_base
|
||||
from sqlalchemy.pool import NullPool
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
# Create async engine
|
||||
engine = create_async_engine(
|
||||
settings.DATABASE_URL,
|
||||
pool_size=settings.DB_POOL_SIZE,
|
||||
max_overflow=settings.DB_MAX_OVERFLOW,
|
||||
echo=False,
|
||||
future=True,
|
||||
)
|
||||
|
||||
# Create async session factory
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False,
|
||||
autocommit=False,
|
||||
autoflush=False,
|
||||
)
|
||||
|
||||
# Create declarative base
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
"""
|
||||
Dependency for getting async database sessions.
|
||||
|
||||
Yields:
|
||||
AsyncSession: Database session
|
||||
"""
|
||||
async with AsyncSessionLocal() as session:
|
||||
try:
|
||||
yield session
|
||||
await session.commit()
|
||||
except Exception:
|
||||
await session.rollback()
|
||||
raise
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""Initialize database tables."""
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
|
||||
|
||||
async def close_db():
|
||||
"""Close database connections."""
|
||||
await engine.dispose()
|
||||
320
services/ai_insights/app/impact/impact_estimator.py
Normal file
320
services/ai_insights/app/impact/impact_estimator.py
Normal file
@@ -0,0 +1,320 @@
|
||||
"""Impact estimation for AI Insights."""
|
||||
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from decimal import Decimal
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
class ImpactEstimator:
|
||||
"""
|
||||
Estimate potential impact of recommendations.
|
||||
|
||||
Calculates expected business value in terms of:
|
||||
- Cost savings (euros)
|
||||
- Revenue increase (euros)
|
||||
- Waste reduction (euros or percentage)
|
||||
- Efficiency gains (hours or percentage)
|
||||
- Quality improvements (units or percentage)
|
||||
"""
|
||||
|
||||
def estimate_procurement_savings(
|
||||
self,
|
||||
current_price: Decimal,
|
||||
predicted_price: Decimal,
|
||||
order_quantity: Decimal,
|
||||
timeframe_days: int = 30
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate savings from opportunistic buying.
|
||||
|
||||
Args:
|
||||
current_price: Current unit price
|
||||
predicted_price: Predicted future price
|
||||
order_quantity: Quantity to order
|
||||
timeframe_days: Time horizon for prediction
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
savings_per_unit = predicted_price - current_price
|
||||
|
||||
if savings_per_unit > 0:
|
||||
total_savings = savings_per_unit * order_quantity
|
||||
return (
|
||||
round(total_savings, 2),
|
||||
'euros',
|
||||
'cost_savings'
|
||||
)
|
||||
return (Decimal('0.0'), 'euros', 'cost_savings')
|
||||
|
||||
def estimate_waste_reduction_savings(
|
||||
self,
|
||||
current_waste_rate: float,
|
||||
optimized_waste_rate: float,
|
||||
monthly_volume: Decimal,
|
||||
avg_cost_per_unit: Decimal
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate savings from waste reduction.
|
||||
|
||||
Args:
|
||||
current_waste_rate: Current waste rate (0-1)
|
||||
optimized_waste_rate: Optimized waste rate (0-1)
|
||||
monthly_volume: Monthly volume
|
||||
avg_cost_per_unit: Average cost per unit
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
waste_reduction_rate = current_waste_rate - optimized_waste_rate
|
||||
units_saved = monthly_volume * Decimal(str(waste_reduction_rate))
|
||||
savings = units_saved * avg_cost_per_unit
|
||||
|
||||
return (
|
||||
round(savings, 2),
|
||||
'euros/month',
|
||||
'waste_reduction'
|
||||
)
|
||||
|
||||
def estimate_forecast_improvement_value(
|
||||
self,
|
||||
current_mape: float,
|
||||
improved_mape: float,
|
||||
avg_monthly_revenue: Decimal
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from forecast accuracy improvement.
|
||||
|
||||
Better forecasts reduce:
|
||||
- Stockouts (lost sales)
|
||||
- Overproduction (waste)
|
||||
- Emergency orders (premium costs)
|
||||
|
||||
Args:
|
||||
current_mape: Current forecast MAPE
|
||||
improved_mape: Improved forecast MAPE
|
||||
avg_monthly_revenue: Average monthly revenue
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
# Rule of thumb: 1% MAPE improvement = 0.5% revenue impact
|
||||
mape_improvement = current_mape - improved_mape
|
||||
revenue_impact_pct = mape_improvement * 0.5 / 100
|
||||
|
||||
revenue_increase = avg_monthly_revenue * Decimal(str(revenue_impact_pct))
|
||||
|
||||
return (
|
||||
round(revenue_increase, 2),
|
||||
'euros/month',
|
||||
'revenue_increase'
|
||||
)
|
||||
|
||||
def estimate_production_efficiency_gain(
|
||||
self,
|
||||
time_saved_minutes: int,
|
||||
batches_per_month: int,
|
||||
labor_cost_per_hour: Decimal = Decimal('15.0')
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from production efficiency improvements.
|
||||
|
||||
Args:
|
||||
time_saved_minutes: Minutes saved per batch
|
||||
batches_per_month: Number of batches per month
|
||||
labor_cost_per_hour: Labor cost per hour
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
hours_saved_per_month = (time_saved_minutes * batches_per_month) / 60
|
||||
cost_savings = Decimal(str(hours_saved_per_month)) * labor_cost_per_hour
|
||||
|
||||
return (
|
||||
round(cost_savings, 2),
|
||||
'euros/month',
|
||||
'efficiency_gain'
|
||||
)
|
||||
|
||||
def estimate_safety_stock_optimization(
|
||||
self,
|
||||
current_safety_stock: Decimal,
|
||||
optimal_safety_stock: Decimal,
|
||||
holding_cost_per_unit_per_day: Decimal,
|
||||
stockout_cost_reduction: Decimal = Decimal('0.0')
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate impact of safety stock optimization.
|
||||
|
||||
Args:
|
||||
current_safety_stock: Current safety stock level
|
||||
optimal_safety_stock: Optimal safety stock level
|
||||
holding_cost_per_unit_per_day: Daily holding cost
|
||||
stockout_cost_reduction: Reduction in stockout costs
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
stock_reduction = current_safety_stock - optimal_safety_stock
|
||||
|
||||
if stock_reduction > 0:
|
||||
# Savings from reduced holding costs
|
||||
daily_savings = stock_reduction * holding_cost_per_unit_per_day
|
||||
monthly_savings = daily_savings * 30
|
||||
total_savings = monthly_savings + stockout_cost_reduction
|
||||
|
||||
return (
|
||||
round(total_savings, 2),
|
||||
'euros/month',
|
||||
'cost_savings'
|
||||
)
|
||||
elif stock_reduction < 0:
|
||||
# Cost increase but reduces stockouts
|
||||
daily_cost = abs(stock_reduction) * holding_cost_per_unit_per_day
|
||||
monthly_cost = daily_cost * 30
|
||||
net_savings = stockout_cost_reduction - monthly_cost
|
||||
|
||||
if net_savings > 0:
|
||||
return (
|
||||
round(net_savings, 2),
|
||||
'euros/month',
|
||||
'cost_savings'
|
||||
)
|
||||
|
||||
return (Decimal('0.0'), 'euros/month', 'cost_savings')
|
||||
|
||||
def estimate_supplier_switch_savings(
|
||||
self,
|
||||
current_supplier_price: Decimal,
|
||||
alternative_supplier_price: Decimal,
|
||||
monthly_order_quantity: Decimal,
|
||||
quality_difference_score: float = 0.0 # -1 to 1
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate savings from switching suppliers.
|
||||
|
||||
Args:
|
||||
current_supplier_price: Current supplier unit price
|
||||
alternative_supplier_price: Alternative supplier unit price
|
||||
monthly_order_quantity: Monthly order quantity
|
||||
quality_difference_score: Quality difference (-1=worse, 0=same, 1=better)
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
price_savings = (current_supplier_price - alternative_supplier_price) * monthly_order_quantity
|
||||
|
||||
# Adjust for quality difference
|
||||
# If quality is worse, reduce estimated savings
|
||||
quality_adjustment = 1 + (quality_difference_score * 0.1) # ±10% max adjustment
|
||||
adjusted_savings = price_savings * Decimal(str(quality_adjustment))
|
||||
|
||||
return (
|
||||
round(adjusted_savings, 2),
|
||||
'euros/month',
|
||||
'cost_savings'
|
||||
)
|
||||
|
||||
def estimate_yield_improvement_value(
|
||||
self,
|
||||
current_yield_rate: float,
|
||||
predicted_yield_rate: float,
|
||||
production_volume: Decimal,
|
||||
product_price: Decimal
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from production yield improvements.
|
||||
|
||||
Args:
|
||||
current_yield_rate: Current yield rate (0-1)
|
||||
predicted_yield_rate: Predicted yield rate (0-1)
|
||||
production_volume: Monthly production volume
|
||||
product_price: Product selling price
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
yield_improvement = predicted_yield_rate - current_yield_rate
|
||||
|
||||
if yield_improvement > 0:
|
||||
additional_units = production_volume * Decimal(str(yield_improvement))
|
||||
revenue_increase = additional_units * product_price
|
||||
|
||||
return (
|
||||
round(revenue_increase, 2),
|
||||
'euros/month',
|
||||
'revenue_increase'
|
||||
)
|
||||
|
||||
return (Decimal('0.0'), 'euros/month', 'revenue_increase')
|
||||
|
||||
def estimate_demand_pattern_value(
|
||||
self,
|
||||
pattern_strength: float, # 0-1
|
||||
potential_revenue_increase: Decimal,
|
||||
implementation_cost: Decimal = Decimal('0.0')
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Estimate value from acting on demand patterns.
|
||||
|
||||
Args:
|
||||
pattern_strength: Strength of detected pattern (0-1)
|
||||
potential_revenue_increase: Potential monthly revenue increase
|
||||
implementation_cost: One-time implementation cost
|
||||
|
||||
Returns:
|
||||
tuple: (impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
# Discount by pattern strength (confidence)
|
||||
expected_value = potential_revenue_increase * Decimal(str(pattern_strength))
|
||||
|
||||
# Amortize implementation cost over 6 months
|
||||
monthly_cost = implementation_cost / 6
|
||||
|
||||
net_value = expected_value - monthly_cost
|
||||
|
||||
return (
|
||||
round(max(Decimal('0.0'), net_value), 2),
|
||||
'euros/month',
|
||||
'revenue_increase'
|
||||
)
|
||||
|
||||
def estimate_composite_impact(
|
||||
self,
|
||||
impacts: list[Dict[str, Any]]
|
||||
) -> Tuple[Decimal, str, str]:
|
||||
"""
|
||||
Combine multiple impact estimations.
|
||||
|
||||
Args:
|
||||
impacts: List of impact dicts with 'value', 'unit', 'type'
|
||||
|
||||
Returns:
|
||||
tuple: (total_impact_value, impact_unit, impact_type)
|
||||
"""
|
||||
total_savings = Decimal('0.0')
|
||||
total_revenue = Decimal('0.0')
|
||||
|
||||
for impact in impacts:
|
||||
value = Decimal(str(impact['value']))
|
||||
impact_type = impact['type']
|
||||
|
||||
if impact_type == 'cost_savings':
|
||||
total_savings += value
|
||||
elif impact_type == 'revenue_increase':
|
||||
total_revenue += value
|
||||
|
||||
# Combine both types
|
||||
total_impact = total_savings + total_revenue
|
||||
|
||||
if total_impact > 0:
|
||||
# Determine primary type
|
||||
primary_type = 'cost_savings' if total_savings > total_revenue else 'revenue_increase'
|
||||
|
||||
return (
|
||||
round(total_impact, 2),
|
||||
'euros/month',
|
||||
primary_type
|
||||
)
|
||||
|
||||
return (Decimal('0.0'), 'euros/month', 'cost_savings')
|
||||
93
services/ai_insights/app/main.py
Normal file
93
services/ai_insights/app/main.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Main FastAPI application for AI Insights Service."""
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
import structlog
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import init_db, close_db
|
||||
from app.api import insights
|
||||
|
||||
# Configure structured logging
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.JSONRenderer()
|
||||
]
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Lifespan event handler for startup and shutdown."""
|
||||
# Startup
|
||||
logger.info("Starting AI Insights Service", service=settings.SERVICE_NAME, version=settings.SERVICE_VERSION)
|
||||
await init_db()
|
||||
logger.info("Database initialized")
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down AI Insights Service")
|
||||
await close_db()
|
||||
logger.info("Database connections closed")
|
||||
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="AI Insights Service",
|
||||
description="Intelligent insights and recommendations for bakery operations",
|
||||
version=settings.SERVICE_VERSION,
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.ALLOWED_ORIGINS,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(
|
||||
insights.router,
|
||||
prefix=settings.API_V1_PREFIX,
|
||||
tags=["insights"]
|
||||
)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint."""
|
||||
return {
|
||||
"service": settings.SERVICE_NAME,
|
||||
"version": settings.SERVICE_VERSION,
|
||||
"status": "running"
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": settings.SERVICE_NAME,
|
||||
"version": settings.SERVICE_VERSION
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"app.main:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=True,
|
||||
log_level=settings.LOG_LEVEL.lower()
|
||||
)
|
||||
672
services/ai_insights/app/ml/feedback_learning_system.py
Normal file
672
services/ai_insights/app/ml/feedback_learning_system.py
Normal file
@@ -0,0 +1,672 @@
|
||||
"""
|
||||
Feedback Loop & Learning System
|
||||
Enables continuous improvement through outcome tracking and model retraining
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
from scipy import stats
|
||||
from collections import defaultdict
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class FeedbackLearningSystem:
|
||||
"""
|
||||
Manages feedback collection, model performance tracking, and retraining triggers.
|
||||
|
||||
Key Responsibilities:
|
||||
1. Aggregate feedback from applied insights
|
||||
2. Calculate model performance metrics (accuracy, precision, recall)
|
||||
3. Detect performance degradation
|
||||
4. Trigger automatic retraining when needed
|
||||
5. Calibrate confidence scores based on actual accuracy
|
||||
6. Generate learning insights for model improvement
|
||||
|
||||
Workflow:
|
||||
- Feedback continuously recorded via AIInsightsClient
|
||||
- Periodic performance analysis (daily/weekly)
|
||||
- Automatic alerts when performance degrades
|
||||
- Retraining recommendations with priority
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
performance_threshold: float = 0.85, # Minimum acceptable accuracy
|
||||
degradation_threshold: float = 0.10, # 10% drop triggers alert
|
||||
min_feedback_samples: int = 30, # Minimum samples for analysis
|
||||
retraining_window_days: int = 90 # Consider last 90 days
|
||||
):
|
||||
self.performance_threshold = performance_threshold
|
||||
self.degradation_threshold = degradation_threshold
|
||||
self.min_feedback_samples = min_feedback_samples
|
||||
self.retraining_window_days = retraining_window_days
|
||||
|
||||
async def analyze_model_performance(
|
||||
self,
|
||||
model_name: str,
|
||||
feedback_data: pd.DataFrame,
|
||||
baseline_performance: Optional[Dict[str, float]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze model performance based on feedback data.
|
||||
|
||||
Args:
|
||||
model_name: Name of the model (e.g., 'hybrid_forecaster', 'yield_predictor')
|
||||
feedback_data: DataFrame with columns:
|
||||
- insight_id
|
||||
- applied_at
|
||||
- outcome_date
|
||||
- predicted_value
|
||||
- actual_value
|
||||
- error
|
||||
- error_pct
|
||||
- accuracy
|
||||
baseline_performance: Optional baseline metrics for comparison
|
||||
|
||||
Returns:
|
||||
Performance analysis with metrics, trends, and recommendations
|
||||
"""
|
||||
logger.info(
|
||||
"Analyzing model performance",
|
||||
model_name=model_name,
|
||||
feedback_samples=len(feedback_data)
|
||||
)
|
||||
|
||||
if len(feedback_data) < self.min_feedback_samples:
|
||||
return self._insufficient_feedback_response(
|
||||
model_name, len(feedback_data), self.min_feedback_samples
|
||||
)
|
||||
|
||||
# Step 1: Calculate current performance metrics
|
||||
current_metrics = self._calculate_performance_metrics(feedback_data)
|
||||
|
||||
# Step 2: Analyze performance trend over time
|
||||
trend_analysis = self._analyze_performance_trend(feedback_data)
|
||||
|
||||
# Step 3: Detect performance degradation
|
||||
degradation_detected = self._detect_performance_degradation(
|
||||
current_metrics, baseline_performance, trend_analysis
|
||||
)
|
||||
|
||||
# Step 4: Generate retraining recommendation
|
||||
retraining_recommendation = self._generate_retraining_recommendation(
|
||||
model_name, current_metrics, degradation_detected, trend_analysis
|
||||
)
|
||||
|
||||
# Step 5: Identify error patterns
|
||||
error_patterns = self._identify_error_patterns(feedback_data)
|
||||
|
||||
# Step 6: Calculate confidence calibration
|
||||
confidence_calibration = self._calculate_confidence_calibration(feedback_data)
|
||||
|
||||
logger.info(
|
||||
"Model performance analysis complete",
|
||||
model_name=model_name,
|
||||
current_accuracy=current_metrics['accuracy'],
|
||||
degradation_detected=degradation_detected['detected'],
|
||||
retraining_recommended=retraining_recommendation['recommended']
|
||||
)
|
||||
|
||||
return {
|
||||
'model_name': model_name,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'feedback_samples': len(feedback_data),
|
||||
'date_range': {
|
||||
'start': feedback_data['outcome_date'].min().isoformat(),
|
||||
'end': feedback_data['outcome_date'].max().isoformat()
|
||||
},
|
||||
'current_performance': current_metrics,
|
||||
'baseline_performance': baseline_performance,
|
||||
'trend_analysis': trend_analysis,
|
||||
'degradation_detected': degradation_detected,
|
||||
'retraining_recommendation': retraining_recommendation,
|
||||
'error_patterns': error_patterns,
|
||||
'confidence_calibration': confidence_calibration
|
||||
}
|
||||
|
||||
def _insufficient_feedback_response(
|
||||
self, model_name: str, current_samples: int, required_samples: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Return response when insufficient feedback data."""
|
||||
return {
|
||||
'model_name': model_name,
|
||||
'analyzed_at': datetime.utcnow().isoformat(),
|
||||
'status': 'insufficient_feedback',
|
||||
'feedback_samples': current_samples,
|
||||
'required_samples': required_samples,
|
||||
'current_performance': None,
|
||||
'recommendation': f'Need {required_samples - current_samples} more feedback samples for reliable analysis'
|
||||
}
|
||||
|
||||
def _calculate_performance_metrics(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate comprehensive performance metrics.
|
||||
|
||||
Metrics:
|
||||
- Accuracy: % of predictions within acceptable error
|
||||
- MAE: Mean Absolute Error
|
||||
- RMSE: Root Mean Squared Error
|
||||
- MAPE: Mean Absolute Percentage Error
|
||||
- Bias: Systematic over/under prediction
|
||||
- R²: Correlation between predicted and actual
|
||||
"""
|
||||
predicted = feedback_data['predicted_value'].values
|
||||
actual = feedback_data['actual_value'].values
|
||||
|
||||
# Filter out invalid values
|
||||
valid_mask = ~(np.isnan(predicted) | np.isnan(actual))
|
||||
predicted = predicted[valid_mask]
|
||||
actual = actual[valid_mask]
|
||||
|
||||
if len(predicted) == 0:
|
||||
return {
|
||||
'accuracy': 0,
|
||||
'mae': 0,
|
||||
'rmse': 0,
|
||||
'mape': 0,
|
||||
'bias': 0,
|
||||
'r_squared': 0
|
||||
}
|
||||
|
||||
# Calculate errors
|
||||
errors = predicted - actual
|
||||
abs_errors = np.abs(errors)
|
||||
pct_errors = np.abs(errors / actual) * 100 if np.all(actual != 0) else np.zeros_like(errors)
|
||||
|
||||
# MAE and RMSE
|
||||
mae = float(np.mean(abs_errors))
|
||||
rmse = float(np.sqrt(np.mean(errors ** 2)))
|
||||
|
||||
# MAPE (excluding cases where actual = 0)
|
||||
valid_pct_mask = actual != 0
|
||||
mape = float(np.mean(pct_errors[valid_pct_mask])) if np.any(valid_pct_mask) else 0
|
||||
|
||||
# Accuracy (% within 10% error)
|
||||
within_10pct = np.sum(pct_errors <= 10) / len(pct_errors) * 100
|
||||
|
||||
# Bias (mean error - positive = over-prediction)
|
||||
bias = float(np.mean(errors))
|
||||
|
||||
# R² (correlation)
|
||||
if len(predicted) > 1 and np.std(actual) > 0:
|
||||
correlation = np.corrcoef(predicted, actual)[0, 1]
|
||||
r_squared = correlation ** 2
|
||||
else:
|
||||
r_squared = 0
|
||||
|
||||
return {
|
||||
'accuracy': round(within_10pct, 2), # % within 10% error
|
||||
'mae': round(mae, 2),
|
||||
'rmse': round(rmse, 2),
|
||||
'mape': round(mape, 2),
|
||||
'bias': round(bias, 2),
|
||||
'r_squared': round(r_squared, 3),
|
||||
'sample_size': len(predicted)
|
||||
}
|
||||
|
||||
def _analyze_performance_trend(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze performance trend over time.
|
||||
|
||||
Returns trend direction (improving/stable/degrading) and slope.
|
||||
"""
|
||||
# Sort by date
|
||||
df = feedback_data.sort_values('outcome_date').copy()
|
||||
|
||||
# Calculate rolling accuracy (7-day window)
|
||||
df['rolling_accuracy'] = df['accuracy'].rolling(window=7, min_periods=3).mean()
|
||||
|
||||
# Linear trend
|
||||
if len(df) >= 10:
|
||||
# Use day index as x
|
||||
df['day_index'] = (df['outcome_date'] - df['outcome_date'].min()).dt.days
|
||||
|
||||
# Fit linear regression
|
||||
valid_mask = ~np.isnan(df['rolling_accuracy'])
|
||||
if valid_mask.sum() >= 10:
|
||||
x = df.loc[valid_mask, 'day_index'].values
|
||||
y = df.loc[valid_mask, 'rolling_accuracy'].values
|
||||
|
||||
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
||||
|
||||
# Determine trend
|
||||
if p_value < 0.05:
|
||||
if slope > 0.1:
|
||||
trend = 'improving'
|
||||
elif slope < -0.1:
|
||||
trend = 'degrading'
|
||||
else:
|
||||
trend = 'stable'
|
||||
else:
|
||||
trend = 'stable'
|
||||
|
||||
return {
|
||||
'trend': trend,
|
||||
'slope': round(float(slope), 4),
|
||||
'p_value': round(float(p_value), 4),
|
||||
'significant': p_value < 0.05,
|
||||
'recent_performance': round(float(df['rolling_accuracy'].iloc[-1]), 2),
|
||||
'initial_performance': round(float(df['rolling_accuracy'].dropna().iloc[0]), 2)
|
||||
}
|
||||
|
||||
# Not enough data for trend
|
||||
return {
|
||||
'trend': 'insufficient_data',
|
||||
'slope': 0,
|
||||
'p_value': 1.0,
|
||||
'significant': False
|
||||
}
|
||||
|
||||
def _detect_performance_degradation(
|
||||
self,
|
||||
current_metrics: Dict[str, float],
|
||||
baseline_performance: Optional[Dict[str, float]],
|
||||
trend_analysis: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect if model performance has degraded.
|
||||
|
||||
Degradation triggers:
|
||||
1. Current accuracy below threshold (85%)
|
||||
2. Significant drop from baseline (>10%)
|
||||
3. Degrading trend detected
|
||||
"""
|
||||
degradation_reasons = []
|
||||
severity = 'none'
|
||||
|
||||
# Check absolute performance
|
||||
if current_metrics['accuracy'] < self.performance_threshold * 100:
|
||||
degradation_reasons.append(
|
||||
f"Accuracy {current_metrics['accuracy']:.1f}% below threshold {self.performance_threshold*100}%"
|
||||
)
|
||||
severity = 'high'
|
||||
|
||||
# Check vs baseline
|
||||
if baseline_performance and 'accuracy' in baseline_performance:
|
||||
baseline_acc = baseline_performance['accuracy']
|
||||
current_acc = current_metrics['accuracy']
|
||||
drop_pct = (baseline_acc - current_acc) / baseline_acc
|
||||
|
||||
if drop_pct > self.degradation_threshold:
|
||||
degradation_reasons.append(
|
||||
f"Accuracy dropped {drop_pct*100:.1f}% from baseline {baseline_acc:.1f}%"
|
||||
)
|
||||
severity = 'high' if severity != 'high' else severity
|
||||
|
||||
# Check trend
|
||||
if trend_analysis.get('trend') == 'degrading' and trend_analysis.get('significant'):
|
||||
degradation_reasons.append(
|
||||
f"Degrading trend detected (slope: {trend_analysis['slope']:.4f})"
|
||||
)
|
||||
severity = 'medium' if severity == 'none' else severity
|
||||
|
||||
detected = len(degradation_reasons) > 0
|
||||
|
||||
return {
|
||||
'detected': detected,
|
||||
'severity': severity,
|
||||
'reasons': degradation_reasons,
|
||||
'current_accuracy': current_metrics['accuracy'],
|
||||
'baseline_accuracy': baseline_performance.get('accuracy') if baseline_performance else None
|
||||
}
|
||||
|
||||
def _generate_retraining_recommendation(
|
||||
self,
|
||||
model_name: str,
|
||||
current_metrics: Dict[str, float],
|
||||
degradation_detected: Dict[str, Any],
|
||||
trend_analysis: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate retraining recommendation based on performance analysis.
|
||||
|
||||
Priority Levels:
|
||||
- urgent: Severe degradation, retrain immediately
|
||||
- high: Performance below threshold, retrain soon
|
||||
- medium: Trending down, schedule retraining
|
||||
- low: Stable, routine retraining
|
||||
- none: No retraining needed
|
||||
"""
|
||||
if degradation_detected['detected']:
|
||||
severity = degradation_detected['severity']
|
||||
|
||||
if severity == 'high':
|
||||
priority = 'urgent'
|
||||
recommendation = f"Retrain {model_name} immediately - severe performance degradation"
|
||||
elif severity == 'medium':
|
||||
priority = 'high'
|
||||
recommendation = f"Schedule {model_name} retraining within 7 days"
|
||||
else:
|
||||
priority = 'medium'
|
||||
recommendation = f"Schedule routine {model_name} retraining"
|
||||
|
||||
return {
|
||||
'recommended': True,
|
||||
'priority': priority,
|
||||
'recommendation': recommendation,
|
||||
'reasons': degradation_detected['reasons'],
|
||||
'estimated_improvement': self._estimate_retraining_benefit(
|
||||
current_metrics, degradation_detected
|
||||
)
|
||||
}
|
||||
|
||||
# Check if routine retraining is due (e.g., every 90 days)
|
||||
# This would require tracking last_retrained_at
|
||||
else:
|
||||
return {
|
||||
'recommended': False,
|
||||
'priority': 'none',
|
||||
'recommendation': f"{model_name} performance is acceptable, no immediate retraining needed",
|
||||
'next_review_date': (datetime.utcnow() + timedelta(days=30)).isoformat()
|
||||
}
|
||||
|
||||
def _estimate_retraining_benefit(
|
||||
self,
|
||||
current_metrics: Dict[str, float],
|
||||
degradation_detected: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""Estimate expected improvement from retraining."""
|
||||
baseline_acc = degradation_detected.get('baseline_accuracy')
|
||||
current_acc = current_metrics['accuracy']
|
||||
|
||||
if baseline_acc:
|
||||
# Expect to recover 70-80% of lost performance
|
||||
expected_improvement = (baseline_acc - current_acc) * 0.75
|
||||
expected_new_acc = current_acc + expected_improvement
|
||||
|
||||
return {
|
||||
'expected_accuracy_improvement': round(expected_improvement, 2),
|
||||
'expected_new_accuracy': round(expected_new_acc, 2),
|
||||
'confidence': 'medium'
|
||||
}
|
||||
|
||||
return {
|
||||
'expected_accuracy_improvement': 'unknown',
|
||||
'confidence': 'low'
|
||||
}
|
||||
|
||||
def _identify_error_patterns(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Identify systematic error patterns.
|
||||
|
||||
Patterns:
|
||||
- Consistent over/under prediction
|
||||
- Higher errors for specific ranges
|
||||
- Day-of-week effects
|
||||
- Seasonal effects
|
||||
"""
|
||||
patterns = []
|
||||
|
||||
# Pattern 1: Systematic bias
|
||||
mean_error = feedback_data['error'].mean()
|
||||
if abs(mean_error) > feedback_data['error'].std() * 0.5:
|
||||
direction = 'over-prediction' if mean_error > 0 else 'under-prediction'
|
||||
patterns.append({
|
||||
'pattern': 'systematic_bias',
|
||||
'description': f'Consistent {direction} by {abs(mean_error):.1f} units',
|
||||
'severity': 'high' if abs(mean_error) > 10 else 'medium',
|
||||
'recommendation': 'Recalibrate model bias term'
|
||||
})
|
||||
|
||||
# Pattern 2: High error for large values
|
||||
if 'predicted_value' in feedback_data.columns:
|
||||
# Split into quartiles
|
||||
feedback_data['value_quartile'] = pd.qcut(
|
||||
feedback_data['predicted_value'],
|
||||
q=4,
|
||||
labels=['Q1', 'Q2', 'Q3', 'Q4'],
|
||||
duplicates='drop'
|
||||
)
|
||||
|
||||
quartile_errors = feedback_data.groupby('value_quartile')['error_pct'].mean()
|
||||
|
||||
if len(quartile_errors) == 4 and quartile_errors['Q4'] > quartile_errors['Q1'] * 1.5:
|
||||
patterns.append({
|
||||
'pattern': 'high_value_error',
|
||||
'description': f'Higher errors for large predictions (Q4: {quartile_errors["Q4"]:.1f}% vs Q1: {quartile_errors["Q1"]:.1f}%)',
|
||||
'severity': 'medium',
|
||||
'recommendation': 'Add log transformation or separate model for high values'
|
||||
})
|
||||
|
||||
# Pattern 3: Day-of-week effect
|
||||
if 'outcome_date' in feedback_data.columns:
|
||||
feedback_data['day_of_week'] = pd.to_datetime(feedback_data['outcome_date']).dt.dayofweek
|
||||
|
||||
dow_errors = feedback_data.groupby('day_of_week')['error_pct'].mean()
|
||||
|
||||
if len(dow_errors) >= 5 and dow_errors.max() > dow_errors.min() * 1.5:
|
||||
worst_day = dow_errors.idxmax()
|
||||
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
|
||||
|
||||
patterns.append({
|
||||
'pattern': 'day_of_week_effect',
|
||||
'description': f'Higher errors on {day_names[worst_day]} ({dow_errors[worst_day]:.1f}%)',
|
||||
'severity': 'low',
|
||||
'recommendation': 'Add day-of-week features to model'
|
||||
})
|
||||
|
||||
return patterns
|
||||
|
||||
def _calculate_confidence_calibration(
|
||||
self, feedback_data: pd.DataFrame
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate how well confidence scores match actual accuracy.
|
||||
|
||||
Well-calibrated model: 80% confidence → 80% accuracy
|
||||
"""
|
||||
if 'confidence' not in feedback_data.columns:
|
||||
return {'calibrated': False, 'reason': 'No confidence scores available'}
|
||||
|
||||
# Bin by confidence ranges
|
||||
feedback_data['confidence_bin'] = pd.cut(
|
||||
feedback_data['confidence'],
|
||||
bins=[0, 60, 70, 80, 90, 100],
|
||||
labels=['<60', '60-70', '70-80', '80-90', '90+']
|
||||
)
|
||||
|
||||
calibration_results = []
|
||||
|
||||
for conf_bin in feedback_data['confidence_bin'].unique():
|
||||
if pd.isna(conf_bin):
|
||||
continue
|
||||
|
||||
bin_data = feedback_data[feedback_data['confidence_bin'] == conf_bin]
|
||||
|
||||
if len(bin_data) >= 5:
|
||||
avg_confidence = bin_data['confidence'].mean()
|
||||
avg_accuracy = bin_data['accuracy'].mean()
|
||||
calibration_error = abs(avg_confidence - avg_accuracy)
|
||||
|
||||
calibration_results.append({
|
||||
'confidence_range': str(conf_bin),
|
||||
'avg_confidence': round(avg_confidence, 1),
|
||||
'avg_accuracy': round(avg_accuracy, 1),
|
||||
'calibration_error': round(calibration_error, 1),
|
||||
'sample_size': len(bin_data),
|
||||
'well_calibrated': calibration_error < 10
|
||||
})
|
||||
|
||||
# Overall calibration
|
||||
if calibration_results:
|
||||
overall_calibration_error = np.mean([r['calibration_error'] for r in calibration_results])
|
||||
well_calibrated = overall_calibration_error < 10
|
||||
|
||||
return {
|
||||
'calibrated': well_calibrated,
|
||||
'overall_calibration_error': round(overall_calibration_error, 2),
|
||||
'by_confidence_range': calibration_results,
|
||||
'recommendation': 'Confidence scores are well-calibrated' if well_calibrated
|
||||
else 'Recalibrate confidence scoring algorithm'
|
||||
}
|
||||
|
||||
return {'calibrated': False, 'reason': 'Insufficient data for calibration analysis'}
|
||||
|
||||
async def generate_learning_insights(
|
||||
self,
|
||||
performance_analyses: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate high-level insights about learning system performance.
|
||||
|
||||
Args:
|
||||
performance_analyses: List of model performance analyses
|
||||
tenant_id: Tenant identifier
|
||||
|
||||
Returns:
|
||||
Learning insights for system improvement
|
||||
"""
|
||||
insights = []
|
||||
|
||||
# Insight 1: Models needing urgent retraining
|
||||
urgent_models = [
|
||||
a for a in performance_analyses
|
||||
if a.get('retraining_recommendation', {}).get('priority') == 'urgent'
|
||||
]
|
||||
|
||||
if urgent_models:
|
||||
model_names = ', '.join([a['model_name'] for a in urgent_models])
|
||||
|
||||
insights.append({
|
||||
'type': 'warning',
|
||||
'priority': 'urgent',
|
||||
'category': 'system',
|
||||
'title': f'Urgent Model Retraining Required: {len(urgent_models)} Models',
|
||||
'description': f'Models requiring immediate retraining: {model_names}. Performance has degraded significantly.',
|
||||
'impact_type': 'system_health',
|
||||
'confidence': 95,
|
||||
'metrics_json': {
|
||||
'tenant_id': tenant_id,
|
||||
'urgent_models': [a['model_name'] for a in urgent_models],
|
||||
'affected_count': len(urgent_models)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Retrain Models',
|
||||
'action': 'trigger_model_retraining',
|
||||
'params': {'models': [a['model_name'] for a in urgent_models]}
|
||||
}],
|
||||
'source_service': 'ai_insights',
|
||||
'source_model': 'feedback_learning_system'
|
||||
})
|
||||
|
||||
# Insight 2: Overall system health
|
||||
total_models = len(performance_analyses)
|
||||
healthy_models = [
|
||||
a for a in performance_analyses
|
||||
if not a.get('degradation_detected', {}).get('detected', False)
|
||||
]
|
||||
|
||||
health_pct = (len(healthy_models) / total_models * 100) if total_models > 0 else 0
|
||||
|
||||
if health_pct < 80:
|
||||
insights.append({
|
||||
'type': 'warning',
|
||||
'priority': 'high',
|
||||
'category': 'system',
|
||||
'title': f'Learning System Health: {health_pct:.0f}%',
|
||||
'description': f'{len(healthy_models)} of {total_models} models are performing well. System-wide performance review recommended.',
|
||||
'impact_type': 'system_health',
|
||||
'confidence': 90,
|
||||
'metrics_json': {
|
||||
'tenant_id': tenant_id,
|
||||
'total_models': total_models,
|
||||
'healthy_models': len(healthy_models),
|
||||
'health_percentage': round(health_pct, 1)
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Review System Health',
|
||||
'action': 'review_learning_system',
|
||||
'params': {'tenant_id': tenant_id}
|
||||
}],
|
||||
'source_service': 'ai_insights',
|
||||
'source_model': 'feedback_learning_system'
|
||||
})
|
||||
|
||||
# Insight 3: Confidence calibration issues
|
||||
poorly_calibrated = [
|
||||
a for a in performance_analyses
|
||||
if not a.get('confidence_calibration', {}).get('calibrated', True)
|
||||
]
|
||||
|
||||
if poorly_calibrated:
|
||||
insights.append({
|
||||
'type': 'opportunity',
|
||||
'priority': 'medium',
|
||||
'category': 'system',
|
||||
'title': f'Confidence Calibration Needed: {len(poorly_calibrated)} Models',
|
||||
'description': 'Confidence scores do not match actual accuracy. Recalibration recommended.',
|
||||
'impact_type': 'system_improvement',
|
||||
'confidence': 85,
|
||||
'metrics_json': {
|
||||
'tenant_id': tenant_id,
|
||||
'models_needing_calibration': [a['model_name'] for a in poorly_calibrated]
|
||||
},
|
||||
'actionable': True,
|
||||
'recommendation_actions': [{
|
||||
'label': 'Recalibrate Confidence Scores',
|
||||
'action': 'recalibrate_confidence',
|
||||
'params': {'models': [a['model_name'] for a in poorly_calibrated]}
|
||||
}],
|
||||
'source_service': 'ai_insights',
|
||||
'source_model': 'feedback_learning_system'
|
||||
})
|
||||
|
||||
return insights
|
||||
|
||||
async def calculate_roi(
|
||||
self,
|
||||
feedback_data: pd.DataFrame,
|
||||
insight_type: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Calculate ROI for applied insights.
|
||||
|
||||
Args:
|
||||
feedback_data: Feedback data with business impact metrics
|
||||
insight_type: Type of insight (e.g., 'demand_forecast', 'safety_stock')
|
||||
|
||||
Returns:
|
||||
ROI calculation with cost savings and accuracy metrics
|
||||
"""
|
||||
if len(feedback_data) == 0:
|
||||
return {'status': 'insufficient_data', 'samples': 0}
|
||||
|
||||
# Calculate accuracy
|
||||
avg_accuracy = feedback_data['accuracy'].mean()
|
||||
|
||||
# Estimate cost savings (would be more sophisticated in production)
|
||||
# For now, use impact_value from insights if available
|
||||
if 'impact_value' in feedback_data.columns:
|
||||
total_impact = feedback_data['impact_value'].sum()
|
||||
avg_impact = feedback_data['impact_value'].mean()
|
||||
|
||||
return {
|
||||
'insight_type': insight_type,
|
||||
'samples': len(feedback_data),
|
||||
'avg_accuracy': round(avg_accuracy, 2),
|
||||
'total_impact_value': round(total_impact, 2),
|
||||
'avg_impact_per_insight': round(avg_impact, 2),
|
||||
'roi_validated': True
|
||||
}
|
||||
|
||||
return {
|
||||
'insight_type': insight_type,
|
||||
'samples': len(feedback_data),
|
||||
'avg_accuracy': round(avg_accuracy, 2),
|
||||
'roi_validated': False,
|
||||
'note': 'Impact values not tracked in feedback'
|
||||
}
|
||||
11
services/ai_insights/app/models/__init__.py
Normal file
11
services/ai_insights/app/models/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Database models for AI Insights Service."""
|
||||
|
||||
from app.models.ai_insight import AIInsight
|
||||
from app.models.insight_feedback import InsightFeedback
|
||||
from app.models.insight_correlation import InsightCorrelation
|
||||
|
||||
__all__ = [
|
||||
"AIInsight",
|
||||
"InsightFeedback",
|
||||
"InsightCorrelation",
|
||||
]
|
||||
129
services/ai_insights/app/models/ai_insight.py
Normal file
129
services/ai_insights/app/models/ai_insight.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""AI Insight database model."""
|
||||
|
||||
from sqlalchemy import Column, String, Integer, Boolean, DECIMAL, TIMESTAMP, Text, Index, CheckConstraint
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.sql import func
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class AIInsight(Base):
|
||||
"""AI Insight model for storing intelligent recommendations and predictions."""
|
||||
|
||||
__tablename__ = "ai_insights"
|
||||
|
||||
# Primary Key
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Tenant Information
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
|
||||
# Classification
|
||||
type = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="optimization, alert, prediction, recommendation, insight, anomaly"
|
||||
)
|
||||
priority = Column(
|
||||
String(20),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="low, medium, high, critical"
|
||||
)
|
||||
category = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="forecasting, inventory, production, procurement, customer, cost, quality, efficiency, demand, maintenance, energy, scheduling"
|
||||
)
|
||||
|
||||
# Content
|
||||
title = Column(String(255), nullable=False)
|
||||
description = Column(Text, nullable=False)
|
||||
|
||||
# Impact Information
|
||||
impact_type = Column(
|
||||
String(50),
|
||||
comment="cost_savings, revenue_increase, waste_reduction, efficiency_gain, quality_improvement, risk_mitigation"
|
||||
)
|
||||
impact_value = Column(DECIMAL(10, 2), comment="Numeric impact value")
|
||||
impact_unit = Column(
|
||||
String(20),
|
||||
comment="euros, percentage, hours, units, euros/month, euros/year"
|
||||
)
|
||||
|
||||
# Confidence and Metrics
|
||||
confidence = Column(
|
||||
Integer,
|
||||
CheckConstraint('confidence >= 0 AND confidence <= 100'),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Confidence score 0-100"
|
||||
)
|
||||
metrics_json = Column(
|
||||
JSONB,
|
||||
comment="Dynamic metrics specific to insight type"
|
||||
)
|
||||
|
||||
# Actionability
|
||||
actionable = Column(
|
||||
Boolean,
|
||||
default=True,
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Whether this insight can be acted upon"
|
||||
)
|
||||
recommendation_actions = Column(
|
||||
JSONB,
|
||||
comment="List of possible actions: [{label, action, endpoint}]"
|
||||
)
|
||||
|
||||
# Status
|
||||
status = Column(
|
||||
String(20),
|
||||
default='new',
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="new, acknowledged, in_progress, applied, dismissed, expired"
|
||||
)
|
||||
|
||||
# Source Information
|
||||
source_service = Column(
|
||||
String(50),
|
||||
comment="Service that generated this insight"
|
||||
)
|
||||
source_data_id = Column(
|
||||
String(100),
|
||||
comment="Reference to source data (e.g., forecast_id, model_id)"
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
created_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
updated_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
onupdate=func.now(),
|
||||
nullable=False
|
||||
)
|
||||
applied_at = Column(TIMESTAMP(timezone=True), comment="When insight was applied")
|
||||
expired_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
comment="When insight expires (auto-calculated based on TTL)"
|
||||
)
|
||||
|
||||
# Composite Indexes
|
||||
__table_args__ = (
|
||||
Index('idx_tenant_status_category', 'tenant_id', 'status', 'category'),
|
||||
Index('idx_tenant_created_confidence', 'tenant_id', 'created_at', 'confidence'),
|
||||
Index('idx_actionable_status', 'actionable', 'status'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AIInsight(id={self.id}, type={self.type}, title={self.title[:30]}, confidence={self.confidence})>"
|
||||
69
services/ai_insights/app/models/insight_correlation.py
Normal file
69
services/ai_insights/app/models/insight_correlation.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Insight Correlation database model for cross-service intelligence."""
|
||||
|
||||
from sqlalchemy import Column, String, Integer, DECIMAL, TIMESTAMP, ForeignKey, Index
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy.orm import relationship
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class InsightCorrelation(Base):
|
||||
"""Track correlations between insights from different services."""
|
||||
|
||||
__tablename__ = "insight_correlations"
|
||||
|
||||
# Primary Key
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Foreign Keys to AIInsights
|
||||
parent_insight_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey('ai_insights.id', ondelete='CASCADE'),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Primary insight that leads to correlation"
|
||||
)
|
||||
child_insight_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey('ai_insights.id', ondelete='CASCADE'),
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Related insight"
|
||||
)
|
||||
|
||||
# Correlation Information
|
||||
correlation_type = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
comment="forecast_inventory, production_procurement, weather_customer, demand_supplier, etc."
|
||||
)
|
||||
correlation_strength = Column(
|
||||
DECIMAL(3, 2),
|
||||
nullable=False,
|
||||
comment="0.00 to 1.00 indicating strength of correlation"
|
||||
)
|
||||
|
||||
# Combined Metrics
|
||||
combined_confidence = Column(
|
||||
Integer,
|
||||
comment="Weighted combined confidence of both insights"
|
||||
)
|
||||
|
||||
# Timestamp
|
||||
created_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
|
||||
# Composite Indexes
|
||||
__table_args__ = (
|
||||
Index('idx_parent_child', 'parent_insight_id', 'child_insight_id'),
|
||||
Index('idx_correlation_type', 'correlation_type'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<InsightCorrelation(id={self.id}, type={self.correlation_type}, strength={self.correlation_strength})>"
|
||||
87
services/ai_insights/app/models/insight_feedback.py
Normal file
87
services/ai_insights/app/models/insight_feedback.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""Insight Feedback database model for closed-loop learning."""
|
||||
|
||||
from sqlalchemy import Column, String, Boolean, DECIMAL, TIMESTAMP, Text, ForeignKey, Index
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.sql import func
|
||||
from sqlalchemy.orm import relationship
|
||||
import uuid
|
||||
|
||||
from app.core.database import Base
|
||||
|
||||
|
||||
class InsightFeedback(Base):
|
||||
"""Feedback tracking for AI Insights to enable learning."""
|
||||
|
||||
__tablename__ = "insight_feedback"
|
||||
|
||||
# Primary Key
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
|
||||
# Foreign Key to AIInsight
|
||||
insight_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey('ai_insights.id', ondelete='CASCADE'),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
|
||||
# Action Information
|
||||
action_taken = Column(
|
||||
String(100),
|
||||
comment="Specific action that was taken from recommendation_actions"
|
||||
)
|
||||
|
||||
# Result Data
|
||||
result_data = Column(
|
||||
JSONB,
|
||||
comment="Detailed result data from applying the insight"
|
||||
)
|
||||
|
||||
# Success Tracking
|
||||
success = Column(
|
||||
Boolean,
|
||||
nullable=False,
|
||||
index=True,
|
||||
comment="Whether the insight application was successful"
|
||||
)
|
||||
error_message = Column(
|
||||
Text,
|
||||
comment="Error message if success = false"
|
||||
)
|
||||
|
||||
# Impact Comparison
|
||||
expected_impact_value = Column(
|
||||
DECIMAL(10, 2),
|
||||
comment="Expected impact value from original insight"
|
||||
)
|
||||
actual_impact_value = Column(
|
||||
DECIMAL(10, 2),
|
||||
comment="Measured actual impact after application"
|
||||
)
|
||||
variance_percentage = Column(
|
||||
DECIMAL(5, 2),
|
||||
comment="(actual - expected) / expected * 100"
|
||||
)
|
||||
|
||||
# User Information
|
||||
applied_by = Column(
|
||||
String(100),
|
||||
comment="User or system that applied the insight"
|
||||
)
|
||||
|
||||
# Timestamp
|
||||
created_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
server_default=func.now(),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
|
||||
# Composite Indexes
|
||||
__table_args__ = (
|
||||
Index('idx_insight_success', 'insight_id', 'success'),
|
||||
Index('idx_created_success', 'created_at', 'success'),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<InsightFeedback(id={self.id}, insight_id={self.insight_id}, success={self.success})>"
|
||||
9
services/ai_insights/app/repositories/__init__.py
Normal file
9
services/ai_insights/app/repositories/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Repositories for AI Insights Service."""
|
||||
|
||||
from app.repositories.insight_repository import InsightRepository
|
||||
from app.repositories.feedback_repository import FeedbackRepository
|
||||
|
||||
__all__ = [
|
||||
"InsightRepository",
|
||||
"FeedbackRepository",
|
||||
]
|
||||
81
services/ai_insights/app/repositories/feedback_repository.py
Normal file
81
services/ai_insights/app/repositories/feedback_repository.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Repository for Insight Feedback database operations."""
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, desc
|
||||
from typing import Optional, List
|
||||
from uuid import UUID
|
||||
from decimal import Decimal
|
||||
|
||||
from app.models.insight_feedback import InsightFeedback
|
||||
from app.schemas.feedback import InsightFeedbackCreate
|
||||
|
||||
|
||||
class FeedbackRepository:
|
||||
"""Repository for Insight Feedback operations."""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def create(self, feedback_data: InsightFeedbackCreate) -> InsightFeedback:
|
||||
"""Create feedback for an insight."""
|
||||
# Calculate variance if both values provided
|
||||
variance = None
|
||||
if (feedback_data.expected_impact_value is not None and
|
||||
feedback_data.actual_impact_value is not None and
|
||||
feedback_data.expected_impact_value != 0):
|
||||
variance = (
|
||||
(feedback_data.actual_impact_value - feedback_data.expected_impact_value) /
|
||||
feedback_data.expected_impact_value * 100
|
||||
)
|
||||
|
||||
feedback = InsightFeedback(
|
||||
**feedback_data.model_dump(exclude={'variance_percentage'}),
|
||||
variance_percentage=variance
|
||||
)
|
||||
self.session.add(feedback)
|
||||
await self.session.flush()
|
||||
await self.session.refresh(feedback)
|
||||
return feedback
|
||||
|
||||
async def get_by_id(self, feedback_id: UUID) -> Optional[InsightFeedback]:
|
||||
"""Get feedback by ID."""
|
||||
query = select(InsightFeedback).where(InsightFeedback.id == feedback_id)
|
||||
result = await self.session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_insight(self, insight_id: UUID) -> List[InsightFeedback]:
|
||||
"""Get all feedback for an insight."""
|
||||
query = select(InsightFeedback).where(
|
||||
InsightFeedback.insight_id == insight_id
|
||||
).order_by(desc(InsightFeedback.created_at))
|
||||
|
||||
result = await self.session.execute(query)
|
||||
return list(result.scalars().all())
|
||||
|
||||
async def get_success_rate(self, insight_type: Optional[str] = None) -> float:
|
||||
"""Calculate success rate for insights."""
|
||||
query = select(InsightFeedback)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
feedbacks = result.scalars().all()
|
||||
|
||||
if not feedbacks:
|
||||
return 0.0
|
||||
|
||||
successful = sum(1 for f in feedbacks if f.success)
|
||||
return (successful / len(feedbacks)) * 100
|
||||
|
||||
async def get_average_impact_variance(self) -> Decimal:
|
||||
"""Calculate average variance between expected and actual impact."""
|
||||
query = select(InsightFeedback).where(
|
||||
InsightFeedback.variance_percentage.isnot(None)
|
||||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
feedbacks = result.scalars().all()
|
||||
|
||||
if not feedbacks:
|
||||
return Decimal('0.0')
|
||||
|
||||
avg_variance = sum(f.variance_percentage for f in feedbacks) / len(feedbacks)
|
||||
return Decimal(str(round(float(avg_variance), 2)))
|
||||
254
services/ai_insights/app/repositories/insight_repository.py
Normal file
254
services/ai_insights/app/repositories/insight_repository.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""Repository for AI Insight database operations."""
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func, and_, or_, desc
|
||||
from sqlalchemy.orm import selectinload
|
||||
from typing import Optional, List, Dict, Any
|
||||
from uuid import UUID
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from app.models.ai_insight import AIInsight
|
||||
from app.schemas.insight import AIInsightCreate, AIInsightUpdate, InsightFilters
|
||||
|
||||
|
||||
class InsightRepository:
|
||||
"""Repository for AI Insight operations."""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self.session = session
|
||||
|
||||
async def create(self, insight_data: AIInsightCreate) -> AIInsight:
|
||||
"""Create a new AI Insight."""
|
||||
# Calculate expiration date (default 7 days from now)
|
||||
from app.core.config import settings
|
||||
expired_at = datetime.utcnow() + timedelta(days=settings.DEFAULT_INSIGHT_TTL_DAYS)
|
||||
|
||||
insight = AIInsight(
|
||||
**insight_data.model_dump(),
|
||||
expired_at=expired_at
|
||||
)
|
||||
self.session.add(insight)
|
||||
await self.session.flush()
|
||||
await self.session.refresh(insight)
|
||||
return insight
|
||||
|
||||
async def get_by_id(self, insight_id: UUID) -> Optional[AIInsight]:
|
||||
"""Get insight by ID."""
|
||||
query = select(AIInsight).where(AIInsight.id == insight_id)
|
||||
result = await self.session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_tenant(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
filters: Optional[InsightFilters] = None,
|
||||
skip: int = 0,
|
||||
limit: int = 100
|
||||
) -> tuple[List[AIInsight], int]:
|
||||
"""Get insights for a tenant with filters and pagination."""
|
||||
# Build base query
|
||||
query = select(AIInsight).where(AIInsight.tenant_id == tenant_id)
|
||||
|
||||
# Apply filters
|
||||
if filters:
|
||||
if filters.category and filters.category != 'all':
|
||||
query = query.where(AIInsight.category == filters.category)
|
||||
|
||||
if filters.priority and filters.priority != 'all':
|
||||
query = query.where(AIInsight.priority == filters.priority)
|
||||
|
||||
if filters.status and filters.status != 'all':
|
||||
query = query.where(AIInsight.status == filters.status)
|
||||
|
||||
if filters.actionable_only:
|
||||
query = query.where(AIInsight.actionable == True)
|
||||
|
||||
if filters.min_confidence > 0:
|
||||
query = query.where(AIInsight.confidence >= filters.min_confidence)
|
||||
|
||||
if filters.source_service:
|
||||
query = query.where(AIInsight.source_service == filters.source_service)
|
||||
|
||||
if filters.from_date:
|
||||
query = query.where(AIInsight.created_at >= filters.from_date)
|
||||
|
||||
if filters.to_date:
|
||||
query = query.where(AIInsight.created_at <= filters.to_date)
|
||||
|
||||
# Get total count
|
||||
count_query = select(func.count()).select_from(query.subquery())
|
||||
total_result = await self.session.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
# Apply ordering, pagination
|
||||
query = query.order_by(desc(AIInsight.confidence), desc(AIInsight.created_at))
|
||||
query = query.offset(skip).limit(limit)
|
||||
|
||||
# Execute query
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
return list(insights), total
|
||||
|
||||
async def get_orchestration_ready_insights(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
target_date: datetime,
|
||||
min_confidence: int = 70
|
||||
) -> Dict[str, List[AIInsight]]:
|
||||
"""Get actionable insights for orchestration."""
|
||||
query = select(AIInsight).where(
|
||||
and_(
|
||||
AIInsight.tenant_id == tenant_id,
|
||||
AIInsight.actionable == True,
|
||||
AIInsight.confidence >= min_confidence,
|
||||
AIInsight.status.in_(['new', 'acknowledged']),
|
||||
or_(
|
||||
AIInsight.expired_at.is_(None),
|
||||
AIInsight.expired_at > datetime.utcnow()
|
||||
)
|
||||
)
|
||||
).order_by(desc(AIInsight.confidence))
|
||||
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
# Categorize insights
|
||||
categorized = {
|
||||
'forecast_adjustments': [],
|
||||
'procurement_recommendations': [],
|
||||
'production_optimizations': [],
|
||||
'supplier_alerts': [],
|
||||
'price_opportunities': []
|
||||
}
|
||||
|
||||
for insight in insights:
|
||||
if insight.category == 'forecasting':
|
||||
categorized['forecast_adjustments'].append(insight)
|
||||
elif insight.category == 'procurement':
|
||||
if 'supplier' in insight.title.lower():
|
||||
categorized['supplier_alerts'].append(insight)
|
||||
elif 'price' in insight.title.lower():
|
||||
categorized['price_opportunities'].append(insight)
|
||||
else:
|
||||
categorized['procurement_recommendations'].append(insight)
|
||||
elif insight.category == 'production':
|
||||
categorized['production_optimizations'].append(insight)
|
||||
|
||||
return categorized
|
||||
|
||||
async def update(self, insight_id: UUID, update_data: AIInsightUpdate) -> Optional[AIInsight]:
|
||||
"""Update an insight."""
|
||||
insight = await self.get_by_id(insight_id)
|
||||
if not insight:
|
||||
return None
|
||||
|
||||
for field, value in update_data.model_dump(exclude_unset=True).items():
|
||||
setattr(insight, field, value)
|
||||
|
||||
await self.session.flush()
|
||||
await self.session.refresh(insight)
|
||||
return insight
|
||||
|
||||
async def delete(self, insight_id: UUID) -> bool:
|
||||
"""Delete (dismiss) an insight."""
|
||||
insight = await self.get_by_id(insight_id)
|
||||
if not insight:
|
||||
return False
|
||||
|
||||
insight.status = 'dismissed'
|
||||
await self.session.flush()
|
||||
return True
|
||||
|
||||
async def get_metrics(self, tenant_id: UUID) -> Dict[str, Any]:
|
||||
"""Get aggregate metrics for insights."""
|
||||
query = select(AIInsight).where(
|
||||
and_(
|
||||
AIInsight.tenant_id == tenant_id,
|
||||
AIInsight.status != 'dismissed',
|
||||
or_(
|
||||
AIInsight.expired_at.is_(None),
|
||||
AIInsight.expired_at > datetime.utcnow()
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
if not insights:
|
||||
return {
|
||||
'total_insights': 0,
|
||||
'actionable_insights': 0,
|
||||
'average_confidence': 0,
|
||||
'high_priority_count': 0,
|
||||
'medium_priority_count': 0,
|
||||
'low_priority_count': 0,
|
||||
'critical_priority_count': 0,
|
||||
'by_category': {},
|
||||
'by_status': {},
|
||||
'total_potential_impact': 0
|
||||
}
|
||||
|
||||
# Calculate metrics
|
||||
total = len(insights)
|
||||
actionable = sum(1 for i in insights if i.actionable)
|
||||
avg_confidence = sum(i.confidence for i in insights) / total if total > 0 else 0
|
||||
|
||||
# Priority counts
|
||||
priority_counts = {
|
||||
'high': sum(1 for i in insights if i.priority == 'high'),
|
||||
'medium': sum(1 for i in insights if i.priority == 'medium'),
|
||||
'low': sum(1 for i in insights if i.priority == 'low'),
|
||||
'critical': sum(1 for i in insights if i.priority == 'critical')
|
||||
}
|
||||
|
||||
# By category
|
||||
by_category = {}
|
||||
for insight in insights:
|
||||
by_category[insight.category] = by_category.get(insight.category, 0) + 1
|
||||
|
||||
# By status
|
||||
by_status = {}
|
||||
for insight in insights:
|
||||
by_status[insight.status] = by_status.get(insight.status, 0) + 1
|
||||
|
||||
# Total potential impact
|
||||
total_impact = sum(
|
||||
float(i.impact_value) for i in insights
|
||||
if i.impact_value and i.impact_type in ['cost_savings', 'revenue_increase']
|
||||
)
|
||||
|
||||
return {
|
||||
'total_insights': total,
|
||||
'actionable_insights': actionable,
|
||||
'average_confidence': round(avg_confidence, 1),
|
||||
'high_priority_count': priority_counts['high'],
|
||||
'medium_priority_count': priority_counts['medium'],
|
||||
'low_priority_count': priority_counts['low'],
|
||||
'critical_priority_count': priority_counts['critical'],
|
||||
'by_category': by_category,
|
||||
'by_status': by_status,
|
||||
'total_potential_impact': round(total_impact, 2)
|
||||
}
|
||||
|
||||
async def expire_old_insights(self) -> int:
|
||||
"""Mark expired insights as expired."""
|
||||
query = select(AIInsight).where(
|
||||
and_(
|
||||
AIInsight.expired_at.isnot(None),
|
||||
AIInsight.expired_at <= datetime.utcnow(),
|
||||
AIInsight.status.notin_(['applied', 'dismissed', 'expired'])
|
||||
)
|
||||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
insights = result.scalars().all()
|
||||
|
||||
count = 0
|
||||
for insight in insights:
|
||||
insight.status = 'expired'
|
||||
count += 1
|
||||
|
||||
await self.session.flush()
|
||||
return count
|
||||
27
services/ai_insights/app/schemas/__init__.py
Normal file
27
services/ai_insights/app/schemas/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""Pydantic schemas for AI Insights Service."""
|
||||
|
||||
from app.schemas.insight import (
|
||||
AIInsightBase,
|
||||
AIInsightCreate,
|
||||
AIInsightUpdate,
|
||||
AIInsightResponse,
|
||||
AIInsightList,
|
||||
InsightMetrics,
|
||||
InsightFilters
|
||||
)
|
||||
from app.schemas.feedback import (
|
||||
InsightFeedbackCreate,
|
||||
InsightFeedbackResponse
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AIInsightBase",
|
||||
"AIInsightCreate",
|
||||
"AIInsightUpdate",
|
||||
"AIInsightResponse",
|
||||
"AIInsightList",
|
||||
"InsightMetrics",
|
||||
"InsightFilters",
|
||||
"InsightFeedbackCreate",
|
||||
"InsightFeedbackResponse",
|
||||
]
|
||||
37
services/ai_insights/app/schemas/feedback.py
Normal file
37
services/ai_insights/app/schemas/feedback.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Pydantic schemas for Insight Feedback."""
|
||||
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from typing import Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
from decimal import Decimal
|
||||
|
||||
|
||||
class InsightFeedbackBase(BaseModel):
|
||||
"""Base schema for Insight Feedback."""
|
||||
|
||||
action_taken: str
|
||||
result_data: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
success: bool
|
||||
error_message: Optional[str] = None
|
||||
expected_impact_value: Optional[Decimal] = None
|
||||
actual_impact_value: Optional[Decimal] = None
|
||||
variance_percentage: Optional[Decimal] = None
|
||||
|
||||
|
||||
class InsightFeedbackCreate(InsightFeedbackBase):
|
||||
"""Schema for creating feedback."""
|
||||
|
||||
insight_id: UUID
|
||||
applied_by: Optional[str] = "system"
|
||||
|
||||
|
||||
class InsightFeedbackResponse(InsightFeedbackBase):
|
||||
"""Schema for feedback response."""
|
||||
|
||||
id: UUID
|
||||
insight_id: UUID
|
||||
applied_by: str
|
||||
created_at: datetime
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
93
services/ai_insights/app/schemas/insight.py
Normal file
93
services/ai_insights/app/schemas/insight.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Pydantic schemas for AI Insights."""
|
||||
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
from decimal import Decimal
|
||||
|
||||
|
||||
class AIInsightBase(BaseModel):
|
||||
"""Base schema for AI Insight."""
|
||||
|
||||
type: str = Field(..., description="optimization, alert, prediction, recommendation, insight, anomaly")
|
||||
priority: str = Field(..., description="low, medium, high, critical")
|
||||
category: str = Field(..., description="forecasting, inventory, production, procurement, customer, etc.")
|
||||
title: str = Field(..., max_length=255)
|
||||
description: str
|
||||
impact_type: Optional[str] = Field(None, description="cost_savings, revenue_increase, waste_reduction, etc.")
|
||||
impact_value: Optional[Decimal] = None
|
||||
impact_unit: Optional[str] = Field(None, description="euros, percentage, hours, units, etc.")
|
||||
confidence: int = Field(..., ge=0, le=100, description="Confidence score 0-100")
|
||||
metrics_json: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
actionable: bool = True
|
||||
recommendation_actions: Optional[List[Dict[str, str]]] = Field(default_factory=list)
|
||||
source_service: Optional[str] = None
|
||||
source_data_id: Optional[str] = None
|
||||
|
||||
|
||||
class AIInsightCreate(AIInsightBase):
|
||||
"""Schema for creating a new AI Insight."""
|
||||
|
||||
tenant_id: UUID
|
||||
|
||||
|
||||
class AIInsightUpdate(BaseModel):
|
||||
"""Schema for updating an AI Insight."""
|
||||
|
||||
status: Optional[str] = Field(None, description="new, acknowledged, in_progress, applied, dismissed, expired")
|
||||
applied_at: Optional[datetime] = None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class AIInsightResponse(AIInsightBase):
|
||||
"""Schema for AI Insight response."""
|
||||
|
||||
id: UUID
|
||||
tenant_id: UUID
|
||||
status: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
applied_at: Optional[datetime] = None
|
||||
expired_at: Optional[datetime] = None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class AIInsightList(BaseModel):
|
||||
"""Paginated list of AI Insights."""
|
||||
|
||||
items: List[AIInsightResponse]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
class InsightMetrics(BaseModel):
|
||||
"""Aggregate metrics for insights."""
|
||||
|
||||
total_insights: int
|
||||
actionable_insights: int
|
||||
average_confidence: float
|
||||
high_priority_count: int
|
||||
medium_priority_count: int
|
||||
low_priority_count: int
|
||||
critical_priority_count: int
|
||||
by_category: Dict[str, int]
|
||||
by_status: Dict[str, int]
|
||||
total_potential_impact: Optional[Decimal] = None
|
||||
|
||||
|
||||
class InsightFilters(BaseModel):
|
||||
"""Filters for querying insights."""
|
||||
|
||||
category: Optional[str] = None
|
||||
priority: Optional[str] = None
|
||||
status: Optional[str] = None
|
||||
actionable_only: bool = False
|
||||
min_confidence: int = 0
|
||||
source_service: Optional[str] = None
|
||||
from_date: Optional[datetime] = None
|
||||
to_date: Optional[datetime] = None
|
||||
229
services/ai_insights/app/scoring/confidence_calculator.py
Normal file
229
services/ai_insights/app/scoring/confidence_calculator.py
Normal file
@@ -0,0 +1,229 @@
|
||||
"""Confidence scoring calculator for AI Insights."""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import math
|
||||
|
||||
|
||||
class ConfidenceCalculator:
|
||||
"""
|
||||
Calculate unified confidence scores across different insight types.
|
||||
|
||||
Confidence is calculated based on multiple factors:
|
||||
- Data quality (completeness, consistency)
|
||||
- Model performance (historical accuracy)
|
||||
- Sample size (statistical significance)
|
||||
- Recency (how recent is the data)
|
||||
- Historical accuracy (past insight performance)
|
||||
"""
|
||||
|
||||
# Weights for different factors
|
||||
WEIGHTS = {
|
||||
'data_quality': 0.25,
|
||||
'model_performance': 0.30,
|
||||
'sample_size': 0.20,
|
||||
'recency': 0.15,
|
||||
'historical_accuracy': 0.10
|
||||
}
|
||||
|
||||
def calculate_confidence(
|
||||
self,
|
||||
data_quality_score: Optional[float] = None,
|
||||
model_performance_score: Optional[float] = None,
|
||||
sample_size: Optional[int] = None,
|
||||
data_date: Optional[datetime] = None,
|
||||
historical_accuracy: Optional[float] = None,
|
||||
insight_type: Optional[str] = None
|
||||
) -> int:
|
||||
"""
|
||||
Calculate overall confidence score (0-100).
|
||||
|
||||
Args:
|
||||
data_quality_score: 0-1 score for data quality
|
||||
model_performance_score: 0-1 score from model metrics (e.g., 1-MAPE)
|
||||
sample_size: Number of data points used
|
||||
data_date: Date of most recent data
|
||||
historical_accuracy: 0-1 score from past insight performance
|
||||
insight_type: Type of insight for specific adjustments
|
||||
|
||||
Returns:
|
||||
int: Confidence score 0-100
|
||||
"""
|
||||
scores = {}
|
||||
|
||||
# Data Quality Score (0-100)
|
||||
if data_quality_score is not None:
|
||||
scores['data_quality'] = min(100, data_quality_score * 100)
|
||||
else:
|
||||
scores['data_quality'] = 70 # Default
|
||||
|
||||
# Model Performance Score (0-100)
|
||||
if model_performance_score is not None:
|
||||
scores['model_performance'] = min(100, model_performance_score * 100)
|
||||
else:
|
||||
scores['model_performance'] = 75 # Default
|
||||
|
||||
# Sample Size Score (0-100)
|
||||
if sample_size is not None:
|
||||
scores['sample_size'] = self._score_sample_size(sample_size)
|
||||
else:
|
||||
scores['sample_size'] = 60 # Default
|
||||
|
||||
# Recency Score (0-100)
|
||||
if data_date is not None:
|
||||
scores['recency'] = self._score_recency(data_date)
|
||||
else:
|
||||
scores['recency'] = 80 # Default
|
||||
|
||||
# Historical Accuracy Score (0-100)
|
||||
if historical_accuracy is not None:
|
||||
scores['historical_accuracy'] = min(100, historical_accuracy * 100)
|
||||
else:
|
||||
scores['historical_accuracy'] = 65 # Default
|
||||
|
||||
# Calculate weighted average
|
||||
confidence = sum(
|
||||
scores[factor] * self.WEIGHTS[factor]
|
||||
for factor in scores
|
||||
)
|
||||
|
||||
# Apply insight-type specific adjustments
|
||||
confidence = self._apply_type_adjustments(confidence, insight_type)
|
||||
|
||||
return int(round(confidence))
|
||||
|
||||
def _score_sample_size(self, sample_size: int) -> float:
|
||||
"""
|
||||
Score based on sample size using logarithmic scale.
|
||||
|
||||
Args:
|
||||
sample_size: Number of data points
|
||||
|
||||
Returns:
|
||||
float: Score 0-100
|
||||
"""
|
||||
if sample_size <= 10:
|
||||
return 30.0
|
||||
elif sample_size <= 30:
|
||||
return 50.0
|
||||
elif sample_size <= 100:
|
||||
return 70.0
|
||||
elif sample_size <= 365:
|
||||
return 85.0
|
||||
else:
|
||||
# Logarithmic scaling for larger samples
|
||||
return min(100.0, 85 + (math.log10(sample_size) - math.log10(365)) * 10)
|
||||
|
||||
def _score_recency(self, data_date: datetime) -> float:
|
||||
"""
|
||||
Score based on data recency.
|
||||
|
||||
Args:
|
||||
data_date: Date of most recent data
|
||||
|
||||
Returns:
|
||||
float: Score 0-100
|
||||
"""
|
||||
days_old = (datetime.utcnow() - data_date).days
|
||||
|
||||
if days_old == 0:
|
||||
return 100.0
|
||||
elif days_old <= 1:
|
||||
return 95.0
|
||||
elif days_old <= 3:
|
||||
return 90.0
|
||||
elif days_old <= 7:
|
||||
return 80.0
|
||||
elif days_old <= 14:
|
||||
return 70.0
|
||||
elif days_old <= 30:
|
||||
return 60.0
|
||||
elif days_old <= 60:
|
||||
return 45.0
|
||||
else:
|
||||
# Exponential decay for older data
|
||||
return max(20.0, 60 * math.exp(-days_old / 60))
|
||||
|
||||
def _apply_type_adjustments(self, base_confidence: float, insight_type: Optional[str]) -> float:
|
||||
"""
|
||||
Apply insight-type specific confidence adjustments.
|
||||
|
||||
Args:
|
||||
base_confidence: Base confidence score
|
||||
insight_type: Type of insight
|
||||
|
||||
Returns:
|
||||
float: Adjusted confidence
|
||||
"""
|
||||
if not insight_type:
|
||||
return base_confidence
|
||||
|
||||
adjustments = {
|
||||
'prediction': -5, # Predictions inherently less certain
|
||||
'optimization': +2, # Optimizations based on solid math
|
||||
'alert': +3, # Alerts based on thresholds
|
||||
'recommendation': 0, # No adjustment
|
||||
'insight': +2, # Insights from data analysis
|
||||
'anomaly': -3 # Anomalies are uncertain
|
||||
}
|
||||
|
||||
adjustment = adjustments.get(insight_type, 0)
|
||||
return max(0, min(100, base_confidence + adjustment))
|
||||
|
||||
def calculate_forecast_confidence(
|
||||
self,
|
||||
model_mape: float,
|
||||
forecast_horizon_days: int,
|
||||
data_points: int,
|
||||
last_data_date: datetime
|
||||
) -> int:
|
||||
"""
|
||||
Specialized confidence calculation for forecasting insights.
|
||||
|
||||
Args:
|
||||
model_mape: Model MAPE (Mean Absolute Percentage Error)
|
||||
forecast_horizon_days: How many days ahead
|
||||
data_points: Number of historical data points
|
||||
last_data_date: Date of last training data
|
||||
|
||||
Returns:
|
||||
int: Confidence score 0-100
|
||||
"""
|
||||
# Model performance: 1 - (MAPE/100) capped at 1
|
||||
model_score = max(0, 1 - (model_mape / 100))
|
||||
|
||||
# Horizon penalty: Longer horizons = less confidence
|
||||
horizon_factor = max(0.5, 1 - (forecast_horizon_days / 30))
|
||||
|
||||
return self.calculate_confidence(
|
||||
data_quality_score=0.9, # Assume good quality
|
||||
model_performance_score=model_score * horizon_factor,
|
||||
sample_size=data_points,
|
||||
data_date=last_data_date,
|
||||
insight_type='prediction'
|
||||
)
|
||||
|
||||
def calculate_optimization_confidence(
|
||||
self,
|
||||
calculation_accuracy: float,
|
||||
data_completeness: float,
|
||||
sample_size: int
|
||||
) -> int:
|
||||
"""
|
||||
Confidence for optimization recommendations.
|
||||
|
||||
Args:
|
||||
calculation_accuracy: 0-1 score for optimization calculation reliability
|
||||
data_completeness: 0-1 score for data completeness
|
||||
sample_size: Number of data points
|
||||
|
||||
Returns:
|
||||
int: Confidence score 0-100
|
||||
"""
|
||||
return self.calculate_confidence(
|
||||
data_quality_score=data_completeness,
|
||||
model_performance_score=calculation_accuracy,
|
||||
sample_size=sample_size,
|
||||
data_date=datetime.utcnow(),
|
||||
insight_type='optimization'
|
||||
)
|
||||
67
services/ai_insights/migrations/env.py
Normal file
67
services/ai_insights/migrations/env.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""Alembic environment configuration."""
|
||||
|
||||
from logging.config import fileConfig
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
from alembic import context
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import Base
|
||||
from app.models import * # Import all models
|
||||
|
||||
# this is the Alembic Config object
|
||||
config = context.config
|
||||
|
||||
# Interpret the config file for Python logging
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# Set sqlalchemy.url from settings
|
||||
# Replace asyncpg with psycopg2 for synchronous Alembic migrations
|
||||
db_url = settings.DATABASE_URL.replace('postgresql+asyncpg://', 'postgresql://')
|
||||
config.set_main_option('sqlalchemy.url', db_url)
|
||||
|
||||
# Add your model's MetaData object here for 'autogenerate' support
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode."""
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode."""
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
|
||||
with connectable.connect() as connection:
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
26
services/ai_insights/migrations/script.py.mako
Normal file
26
services/ai_insights/migrations/script.py.mako
Normal file
@@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,111 @@
|
||||
"""Initial schema for AI Insights Service
|
||||
|
||||
Revision ID: 001
|
||||
Revises:
|
||||
Create Date: 2025-11-02 14:30:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '001'
|
||||
down_revision: Union[str, None] = None
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Create ai_insights table
|
||||
op.create_table(
|
||||
'ai_insights',
|
||||
sa.Column('id', UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column('tenant_id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('type', sa.String(50), nullable=False),
|
||||
sa.Column('priority', sa.String(20), nullable=False),
|
||||
sa.Column('category', sa.String(50), nullable=False),
|
||||
sa.Column('title', sa.String(255), nullable=False),
|
||||
sa.Column('description', sa.Text, nullable=False),
|
||||
sa.Column('impact_type', sa.String(50)),
|
||||
sa.Column('impact_value', sa.DECIMAL(10, 2)),
|
||||
sa.Column('impact_unit', sa.String(20)),
|
||||
sa.Column('confidence', sa.Integer, nullable=False),
|
||||
sa.Column('metrics_json', JSONB),
|
||||
sa.Column('actionable', sa.Boolean, nullable=False, server_default='true'),
|
||||
sa.Column('recommendation_actions', JSONB),
|
||||
sa.Column('status', sa.String(20), nullable=False, server_default='new'),
|
||||
sa.Column('source_service', sa.String(50)),
|
||||
sa.Column('source_data_id', sa.String(100)),
|
||||
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now(), nullable=False),
|
||||
sa.Column('applied_at', sa.TIMESTAMP(timezone=True)),
|
||||
sa.Column('expired_at', sa.TIMESTAMP(timezone=True)),
|
||||
sa.CheckConstraint('confidence >= 0 AND confidence <= 100', name='check_confidence_range')
|
||||
)
|
||||
|
||||
# Create indexes for ai_insights
|
||||
op.create_index('idx_tenant_id', 'ai_insights', ['tenant_id'])
|
||||
op.create_index('idx_type', 'ai_insights', ['type'])
|
||||
op.create_index('idx_priority', 'ai_insights', ['priority'])
|
||||
op.create_index('idx_category', 'ai_insights', ['category'])
|
||||
op.create_index('idx_confidence', 'ai_insights', ['confidence'])
|
||||
op.create_index('idx_status', 'ai_insights', ['status'])
|
||||
op.create_index('idx_actionable', 'ai_insights', ['actionable'])
|
||||
op.create_index('idx_created_at', 'ai_insights', ['created_at'])
|
||||
op.create_index('idx_tenant_status_category', 'ai_insights', ['tenant_id', 'status', 'category'])
|
||||
op.create_index('idx_tenant_created_confidence', 'ai_insights', ['tenant_id', 'created_at', 'confidence'])
|
||||
op.create_index('idx_actionable_status', 'ai_insights', ['actionable', 'status'])
|
||||
|
||||
# Create insight_feedback table
|
||||
op.create_table(
|
||||
'insight_feedback',
|
||||
sa.Column('id', UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column('insight_id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('action_taken', sa.String(100)),
|
||||
sa.Column('result_data', JSONB),
|
||||
sa.Column('success', sa.Boolean, nullable=False),
|
||||
sa.Column('error_message', sa.Text),
|
||||
sa.Column('expected_impact_value', sa.DECIMAL(10, 2)),
|
||||
sa.Column('actual_impact_value', sa.DECIMAL(10, 2)),
|
||||
sa.Column('variance_percentage', sa.DECIMAL(5, 2)),
|
||||
sa.Column('applied_by', sa.String(100)),
|
||||
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.ForeignKeyConstraint(['insight_id'], ['ai_insights.id'], ondelete='CASCADE')
|
||||
)
|
||||
|
||||
# Create indexes for insight_feedback
|
||||
op.create_index('idx_feedback_insight_id', 'insight_feedback', ['insight_id'])
|
||||
op.create_index('idx_feedback_success', 'insight_feedback', ['success'])
|
||||
op.create_index('idx_feedback_created_at', 'insight_feedback', ['created_at'])
|
||||
op.create_index('idx_insight_success', 'insight_feedback', ['insight_id', 'success'])
|
||||
op.create_index('idx_created_success', 'insight_feedback', ['created_at', 'success'])
|
||||
|
||||
# Create insight_correlations table
|
||||
op.create_table(
|
||||
'insight_correlations',
|
||||
sa.Column('id', UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column('parent_insight_id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('child_insight_id', UUID(as_uuid=True), nullable=False),
|
||||
sa.Column('correlation_type', sa.String(50), nullable=False),
|
||||
sa.Column('correlation_strength', sa.DECIMAL(3, 2), nullable=False),
|
||||
sa.Column('combined_confidence', sa.Integer),
|
||||
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.func.now(), nullable=False),
|
||||
sa.ForeignKeyConstraint(['parent_insight_id'], ['ai_insights.id'], ondelete='CASCADE'),
|
||||
sa.ForeignKeyConstraint(['child_insight_id'], ['ai_insights.id'], ondelete='CASCADE')
|
||||
)
|
||||
|
||||
# Create indexes for insight_correlations
|
||||
op.create_index('idx_corr_parent', 'insight_correlations', ['parent_insight_id'])
|
||||
op.create_index('idx_corr_child', 'insight_correlations', ['child_insight_id'])
|
||||
op.create_index('idx_corr_type', 'insight_correlations', ['correlation_type'])
|
||||
op.create_index('idx_corr_created_at', 'insight_correlations', ['created_at'])
|
||||
op.create_index('idx_parent_child', 'insight_correlations', ['parent_insight_id', 'child_insight_id'])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table('insight_correlations')
|
||||
op.drop_table('insight_feedback')
|
||||
op.drop_table('ai_insights')
|
||||
46
services/ai_insights/requirements.txt
Normal file
46
services/ai_insights/requirements.txt
Normal file
@@ -0,0 +1,46 @@
|
||||
# FastAPI and ASGI
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
python-multipart==0.0.6
|
||||
|
||||
# Database
|
||||
sqlalchemy==2.0.23
|
||||
alembic==1.12.1
|
||||
psycopg2-binary==2.9.9
|
||||
asyncpg==0.29.0
|
||||
|
||||
# Pydantic
|
||||
pydantic==2.5.0
|
||||
pydantic-settings==2.1.0
|
||||
|
||||
# HTTP Client
|
||||
httpx==0.25.1
|
||||
aiohttp==3.9.1
|
||||
|
||||
# Redis
|
||||
redis==5.0.1
|
||||
hiredis==2.2.3
|
||||
|
||||
# Utilities
|
||||
python-dotenv==1.0.0
|
||||
python-dateutil==2.8.2
|
||||
pytz==2023.3
|
||||
|
||||
# Logging
|
||||
structlog==23.2.0
|
||||
|
||||
# Machine Learning (for confidence scoring and impact estimation)
|
||||
numpy==1.26.2
|
||||
pandas==2.1.3
|
||||
scikit-learn==1.3.2
|
||||
|
||||
# Testing
|
||||
pytest==7.4.3
|
||||
pytest-asyncio==0.21.1
|
||||
pytest-cov==4.1.0
|
||||
httpx==0.25.1
|
||||
|
||||
# Code Quality
|
||||
black==23.11.0
|
||||
flake8==6.1.0
|
||||
mypy==1.7.1
|
||||
579
services/ai_insights/tests/test_feedback_learning_system.py
Normal file
579
services/ai_insights/tests/test_feedback_learning_system.py
Normal file
@@ -0,0 +1,579 @@
|
||||
"""
|
||||
Tests for Feedback Loop & Learning System
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta
|
||||
from services.ai_insights.app.ml.feedback_learning_system import FeedbackLearningSystem
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def learning_system():
|
||||
"""Create FeedbackLearningSystem instance."""
|
||||
return FeedbackLearningSystem(
|
||||
performance_threshold=0.85,
|
||||
degradation_threshold=0.10,
|
||||
min_feedback_samples=30
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def good_feedback_data():
|
||||
"""Generate feedback data for well-performing model."""
|
||||
np.random.seed(42)
|
||||
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||||
|
||||
feedback = []
|
||||
for i, date in enumerate(dates):
|
||||
predicted = 100 + np.random.normal(0, 10)
|
||||
actual = predicted + np.random.normal(0, 5) # Small error
|
||||
|
||||
error = predicted - actual
|
||||
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||||
accuracy = max(0, 100 - error_pct)
|
||||
|
||||
feedback.append({
|
||||
'insight_id': f'insight_{i}',
|
||||
'applied_at': date - timedelta(days=1),
|
||||
'outcome_date': date,
|
||||
'predicted_value': predicted,
|
||||
'actual_value': actual,
|
||||
'error': error,
|
||||
'error_pct': error_pct,
|
||||
'accuracy': accuracy,
|
||||
'confidence': 85
|
||||
})
|
||||
|
||||
return pd.DataFrame(feedback)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def degraded_feedback_data():
|
||||
"""Generate feedback data for degrading model."""
|
||||
np.random.seed(42)
|
||||
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||||
|
||||
feedback = []
|
||||
for i, date in enumerate(dates):
|
||||
# Introduce increasing error over time
|
||||
error_multiplier = 1 + (i / 50) * 2 # Errors double by end
|
||||
|
||||
predicted = 100 + np.random.normal(0, 10)
|
||||
actual = predicted + np.random.normal(0, 10 * error_multiplier)
|
||||
|
||||
error = predicted - actual
|
||||
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||||
accuracy = max(0, 100 - error_pct)
|
||||
|
||||
feedback.append({
|
||||
'insight_id': f'insight_{i}',
|
||||
'applied_at': date - timedelta(days=1),
|
||||
'outcome_date': date,
|
||||
'predicted_value': predicted,
|
||||
'actual_value': actual,
|
||||
'error': error,
|
||||
'error_pct': error_pct,
|
||||
'accuracy': accuracy,
|
||||
'confidence': 85
|
||||
})
|
||||
|
||||
return pd.DataFrame(feedback)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def biased_feedback_data():
|
||||
"""Generate feedback data with systematic bias."""
|
||||
np.random.seed(42)
|
||||
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||||
|
||||
feedback = []
|
||||
for i, date in enumerate(dates):
|
||||
predicted = 100 + np.random.normal(0, 10)
|
||||
# Systematic over-prediction by 15%
|
||||
actual = predicted * 0.85 + np.random.normal(0, 3)
|
||||
|
||||
error = predicted - actual
|
||||
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||||
accuracy = max(0, 100 - error_pct)
|
||||
|
||||
feedback.append({
|
||||
'insight_id': f'insight_{i}',
|
||||
'applied_at': date - timedelta(days=1),
|
||||
'outcome_date': date,
|
||||
'predicted_value': predicted,
|
||||
'actual_value': actual,
|
||||
'error': error,
|
||||
'error_pct': error_pct,
|
||||
'accuracy': accuracy,
|
||||
'confidence': 80
|
||||
})
|
||||
|
||||
return pd.DataFrame(feedback)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def poorly_calibrated_feedback_data():
|
||||
"""Generate feedback with poor confidence calibration."""
|
||||
np.random.seed(42)
|
||||
dates = pd.date_range(start=datetime.utcnow() - timedelta(days=60), periods=50, freq='D')
|
||||
|
||||
feedback = []
|
||||
for i, date in enumerate(dates):
|
||||
predicted = 100 + np.random.normal(0, 10)
|
||||
|
||||
# High confidence but low accuracy
|
||||
if i < 25:
|
||||
confidence = 90
|
||||
actual = predicted + np.random.normal(0, 20) # Large error
|
||||
else:
|
||||
confidence = 60
|
||||
actual = predicted + np.random.normal(0, 5) # Small error
|
||||
|
||||
error = predicted - actual
|
||||
error_pct = abs(error / actual * 100) if actual != 0 else 0
|
||||
accuracy = max(0, 100 - error_pct)
|
||||
|
||||
feedback.append({
|
||||
'insight_id': f'insight_{i}',
|
||||
'applied_at': date - timedelta(days=1),
|
||||
'outcome_date': date,
|
||||
'predicted_value': predicted,
|
||||
'actual_value': actual,
|
||||
'error': error,
|
||||
'error_pct': error_pct,
|
||||
'accuracy': accuracy,
|
||||
'confidence': confidence
|
||||
})
|
||||
|
||||
return pd.DataFrame(feedback)
|
||||
|
||||
|
||||
class TestPerformanceMetrics:
|
||||
"""Test performance metric calculation."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_calculate_metrics_good_performance(self, learning_system, good_feedback_data):
|
||||
"""Test metric calculation for good performance."""
|
||||
metrics = learning_system._calculate_performance_metrics(good_feedback_data)
|
||||
|
||||
assert 'accuracy' in metrics
|
||||
assert 'mae' in metrics
|
||||
assert 'rmse' in metrics
|
||||
assert 'mape' in metrics
|
||||
assert 'bias' in metrics
|
||||
assert 'r_squared' in metrics
|
||||
|
||||
# Good model should have high accuracy
|
||||
assert metrics['accuracy'] > 80
|
||||
assert metrics['mae'] < 10
|
||||
assert abs(metrics['bias']) < 5
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_calculate_metrics_degraded_performance(self, learning_system, degraded_feedback_data):
|
||||
"""Test metric calculation for degraded performance."""
|
||||
metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)
|
||||
|
||||
# Degraded model should have lower accuracy
|
||||
assert metrics['accuracy'] < 80
|
||||
assert metrics['mae'] > 5
|
||||
|
||||
|
||||
class TestPerformanceTrend:
|
||||
"""Test performance trend analysis."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stable_trend(self, learning_system, good_feedback_data):
|
||||
"""Test detection of stable performance trend."""
|
||||
trend = learning_system._analyze_performance_trend(good_feedback_data)
|
||||
|
||||
assert trend['trend'] in ['stable', 'improving']
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_degrading_trend(self, learning_system, degraded_feedback_data):
|
||||
"""Test detection of degrading performance trend."""
|
||||
trend = learning_system._analyze_performance_trend(degraded_feedback_data)
|
||||
|
||||
# May detect degrading trend depending on data
|
||||
assert trend['trend'] in ['degrading', 'stable']
|
||||
if trend['significant']:
|
||||
assert 'slope' in trend
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_insufficient_data_trend(self, learning_system):
|
||||
"""Test trend analysis with insufficient data."""
|
||||
small_data = pd.DataFrame([{
|
||||
'insight_id': 'test',
|
||||
'outcome_date': datetime.utcnow(),
|
||||
'accuracy': 90
|
||||
}])
|
||||
|
||||
trend = learning_system._analyze_performance_trend(small_data)
|
||||
assert trend['trend'] == 'insufficient_data'
|
||||
|
||||
|
||||
class TestDegradationDetection:
|
||||
"""Test performance degradation detection."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_degradation_detected(self, learning_system, good_feedback_data):
|
||||
"""Test no degradation for good performance."""
|
||||
current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
|
||||
trend = learning_system._analyze_performance_trend(good_feedback_data)
|
||||
|
||||
degradation = learning_system._detect_performance_degradation(
|
||||
current_metrics,
|
||||
baseline_performance={'accuracy': 85},
|
||||
trend_analysis=trend
|
||||
)
|
||||
|
||||
assert degradation['detected'] is False
|
||||
assert degradation['severity'] == 'none'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_degradation_below_threshold(self, learning_system):
|
||||
"""Test degradation detection when below absolute threshold."""
|
||||
current_metrics = {'accuracy': 70} # Below 85% threshold
|
||||
trend = {'trend': 'stable', 'significant': False}
|
||||
|
||||
degradation = learning_system._detect_performance_degradation(
|
||||
current_metrics,
|
||||
baseline_performance=None,
|
||||
trend_analysis=trend
|
||||
)
|
||||
|
||||
assert degradation['detected'] is True
|
||||
assert degradation['severity'] == 'high'
|
||||
assert len(degradation['reasons']) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_degradation_vs_baseline(self, learning_system):
|
||||
"""Test degradation detection vs baseline."""
|
||||
current_metrics = {'accuracy': 80}
|
||||
baseline = {'accuracy': 95} # 15.8% drop
|
||||
trend = {'trend': 'stable', 'significant': False}
|
||||
|
||||
degradation = learning_system._detect_performance_degradation(
|
||||
current_metrics,
|
||||
baseline_performance=baseline,
|
||||
trend_analysis=trend
|
||||
)
|
||||
|
||||
assert degradation['detected'] is True
|
||||
assert 'dropped' in degradation['reasons'][0].lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_degradation_trending_down(self, learning_system, degraded_feedback_data):
|
||||
"""Test degradation detection from trending down."""
|
||||
current_metrics = learning_system._calculate_performance_metrics(degraded_feedback_data)
|
||||
trend = learning_system._analyze_performance_trend(degraded_feedback_data)
|
||||
|
||||
degradation = learning_system._detect_performance_degradation(
|
||||
current_metrics,
|
||||
baseline_performance={'accuracy': 90},
|
||||
trend_analysis=trend
|
||||
)
|
||||
|
||||
# Should detect some form of degradation
|
||||
assert degradation['detected'] is True
|
||||
|
||||
|
||||
class TestRetrainingRecommendation:
|
||||
"""Test retraining recommendation generation."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_urgent_retraining_recommendation(self, learning_system):
|
||||
"""Test urgent retraining recommendation."""
|
||||
current_metrics = {'accuracy': 70}
|
||||
degradation = {
|
||||
'detected': True,
|
||||
'severity': 'high',
|
||||
'reasons': ['Accuracy below threshold'],
|
||||
'current_accuracy': 70,
|
||||
'baseline_accuracy': 90
|
||||
}
|
||||
trend = {'trend': 'degrading', 'significant': True}
|
||||
|
||||
recommendation = learning_system._generate_retraining_recommendation(
|
||||
'test_model',
|
||||
current_metrics,
|
||||
degradation,
|
||||
trend
|
||||
)
|
||||
|
||||
assert recommendation['recommended'] is True
|
||||
assert recommendation['priority'] == 'urgent'
|
||||
assert 'immediately' in recommendation['recommendation'].lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_retraining_needed(self, learning_system, good_feedback_data):
|
||||
"""Test no retraining recommendation for good performance."""
|
||||
current_metrics = learning_system._calculate_performance_metrics(good_feedback_data)
|
||||
degradation = {'detected': False, 'severity': 'none'}
|
||||
trend = learning_system._analyze_performance_trend(good_feedback_data)
|
||||
|
||||
recommendation = learning_system._generate_retraining_recommendation(
|
||||
'test_model',
|
||||
current_metrics,
|
||||
degradation,
|
||||
trend
|
||||
)
|
||||
|
||||
assert recommendation['recommended'] is False
|
||||
assert recommendation['priority'] == 'none'
|
||||
|
||||
|
||||
class TestErrorPatternDetection:
|
||||
"""Test error pattern identification."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_systematic_bias_detection(self, learning_system, biased_feedback_data):
|
||||
"""Test detection of systematic bias."""
|
||||
patterns = learning_system._identify_error_patterns(biased_feedback_data)
|
||||
|
||||
# Should detect over-prediction bias
|
||||
bias_patterns = [p for p in patterns if p['pattern'] == 'systematic_bias']
|
||||
assert len(bias_patterns) > 0
|
||||
|
||||
bias = bias_patterns[0]
|
||||
assert 'over-prediction' in bias['description']
|
||||
assert bias['severity'] in ['high', 'medium']
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_patterns_for_good_data(self, learning_system, good_feedback_data):
|
||||
"""Test no significant patterns for good data."""
|
||||
patterns = learning_system._identify_error_patterns(good_feedback_data)
|
||||
|
||||
# May have some minor patterns, but no high severity
|
||||
high_severity = [p for p in patterns if p.get('severity') == 'high']
|
||||
assert len(high_severity) == 0
|
||||
|
||||
|
||||
class TestConfidenceCalibration:
|
||||
"""Test confidence calibration analysis."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_well_calibrated_confidence(self, learning_system, good_feedback_data):
|
||||
"""Test well-calibrated confidence scores."""
|
||||
calibration = learning_system._calculate_confidence_calibration(good_feedback_data)
|
||||
|
||||
# Good data with consistent confidence should be well calibrated
|
||||
if 'overall_calibration_error' in calibration:
|
||||
# Small calibration error indicates good calibration
|
||||
assert calibration['overall_calibration_error'] < 20
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_poorly_calibrated_confidence(self, learning_system, poorly_calibrated_feedback_data):
|
||||
"""Test poorly calibrated confidence scores."""
|
||||
calibration = learning_system._calculate_confidence_calibration(poorly_calibrated_feedback_data)
|
||||
|
||||
# Should detect poor calibration
|
||||
assert calibration['calibrated'] is False
|
||||
if 'by_confidence_range' in calibration:
|
||||
assert len(calibration['by_confidence_range']) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_confidence_data(self, learning_system):
|
||||
"""Test calibration when no confidence scores available."""
|
||||
no_conf_data = pd.DataFrame([{
|
||||
'predicted_value': 100,
|
||||
'actual_value': 95,
|
||||
'accuracy': 95
|
||||
}])
|
||||
|
||||
calibration = learning_system._calculate_confidence_calibration(no_conf_data)
|
||||
assert calibration['calibrated'] is False
|
||||
assert 'reason' in calibration
|
||||
|
||||
|
||||
class TestCompletePerformanceAnalysis:
|
||||
"""Test complete performance analysis workflow."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_analyze_good_performance(self, learning_system, good_feedback_data):
|
||||
"""Test complete analysis of good performance."""
|
||||
result = await learning_system.analyze_model_performance(
|
||||
model_name='test_model',
|
||||
feedback_data=good_feedback_data,
|
||||
baseline_performance={'accuracy': 85}
|
||||
)
|
||||
|
||||
assert result['model_name'] == 'test_model'
|
||||
assert result['status'] != 'insufficient_feedback'
|
||||
assert 'current_performance' in result
|
||||
assert 'trend_analysis' in result
|
||||
assert 'degradation_detected' in result
|
||||
assert 'retraining_recommendation' in result
|
||||
|
||||
# Good performance should not recommend retraining
|
||||
assert result['retraining_recommendation']['recommended'] is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_analyze_degraded_performance(self, learning_system, degraded_feedback_data):
|
||||
"""Test complete analysis of degraded performance."""
|
||||
result = await learning_system.analyze_model_performance(
|
||||
model_name='degraded_model',
|
||||
feedback_data=degraded_feedback_data,
|
||||
baseline_performance={'accuracy': 90}
|
||||
)
|
||||
|
||||
assert result['degradation_detected']['detected'] is True
|
||||
assert result['retraining_recommendation']['recommended'] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_insufficient_feedback(self, learning_system):
|
||||
"""Test analysis with insufficient feedback samples."""
|
||||
small_data = pd.DataFrame([{
|
||||
'insight_id': 'test',
|
||||
'outcome_date': datetime.utcnow(),
|
||||
'predicted_value': 100,
|
||||
'actual_value': 95,
|
||||
'error': 5,
|
||||
'error_pct': 5,
|
||||
'accuracy': 95,
|
||||
'confidence': 85
|
||||
}])
|
||||
|
||||
result = await learning_system.analyze_model_performance(
|
||||
model_name='test_model',
|
||||
feedback_data=small_data
|
||||
)
|
||||
|
||||
assert result['status'] == 'insufficient_feedback'
|
||||
assert result['feedback_samples'] == 1
|
||||
assert result['required_samples'] == 30
|
||||
|
||||
|
||||
class TestLearningInsights:
|
||||
"""Test learning insight generation."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_urgent_retraining_insight(self, learning_system):
|
||||
"""Test generation of urgent retraining insight."""
|
||||
analyses = [{
|
||||
'model_name': 'urgent_model',
|
||||
'retraining_recommendation': {
|
||||
'priority': 'urgent',
|
||||
'recommended': True
|
||||
},
|
||||
'degradation_detected': {
|
||||
'detected': True
|
||||
}
|
||||
}]
|
||||
|
||||
insights = await learning_system.generate_learning_insights(
|
||||
analyses,
|
||||
tenant_id='tenant_123'
|
||||
)
|
||||
|
||||
# Should generate urgent warning
|
||||
urgent_insights = [i for i in insights if i['priority'] == 'urgent']
|
||||
assert len(urgent_insights) > 0
|
||||
|
||||
insight = urgent_insights[0]
|
||||
assert insight['type'] == 'warning'
|
||||
assert 'urgent_model' in insight['description'].lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_system_health_insight(self, learning_system):
|
||||
"""Test generation of system health insight."""
|
||||
# 3 models, 1 degraded
|
||||
analyses = [
|
||||
{
|
||||
'model_name': 'model_1',
|
||||
'degradation_detected': {'detected': False},
|
||||
'retraining_recommendation': {'priority': 'none'}
|
||||
},
|
||||
{
|
||||
'model_name': 'model_2',
|
||||
'degradation_detected': {'detected': False},
|
||||
'retraining_recommendation': {'priority': 'none'}
|
||||
},
|
||||
{
|
||||
'model_name': 'model_3',
|
||||
'degradation_detected': {'detected': True},
|
||||
'retraining_recommendation': {'priority': 'high'}
|
||||
}
|
||||
]
|
||||
|
||||
insights = await learning_system.generate_learning_insights(
|
||||
analyses,
|
||||
tenant_id='tenant_123'
|
||||
)
|
||||
|
||||
# Should generate system health insight (66% healthy < 80%)
|
||||
# Note: May or may not trigger depending on threshold
|
||||
# At minimum should not crash
|
||||
assert isinstance(insights, list)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_generate_calibration_insight(self, learning_system):
|
||||
"""Test generation of calibration insight."""
|
||||
analyses = [{
|
||||
'model_name': 'model_1',
|
||||
'degradation_detected': {'detected': False},
|
||||
'retraining_recommendation': {'priority': 'none'},
|
||||
'confidence_calibration': {
|
||||
'calibrated': False,
|
||||
'overall_calibration_error': 15
|
||||
}
|
||||
}]
|
||||
|
||||
insights = await learning_system.generate_learning_insights(
|
||||
analyses,
|
||||
tenant_id='tenant_123'
|
||||
)
|
||||
|
||||
# Should generate calibration insight
|
||||
calibration_insights = [
|
||||
i for i in insights
|
||||
if 'calibration' in i['title'].lower()
|
||||
]
|
||||
assert len(calibration_insights) > 0
|
||||
|
||||
|
||||
class TestROICalculation:
|
||||
"""Test ROI calculation."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_calculate_roi_with_impact_values(self, learning_system):
|
||||
"""Test ROI calculation with impact values."""
|
||||
feedback_data = pd.DataFrame([
|
||||
{
|
||||
'accuracy': 90,
|
||||
'impact_value': 1000
|
||||
},
|
||||
{
|
||||
'accuracy': 85,
|
||||
'impact_value': 1500
|
||||
},
|
||||
{
|
||||
'accuracy': 95,
|
||||
'impact_value': 800
|
||||
}
|
||||
])
|
||||
|
||||
roi = await learning_system.calculate_roi(
|
||||
feedback_data,
|
||||
insight_type='demand_forecast'
|
||||
)
|
||||
|
||||
assert roi['insight_type'] == 'demand_forecast'
|
||||
assert roi['samples'] == 3
|
||||
assert roi['avg_accuracy'] == 90.0
|
||||
assert roi['total_impact_value'] == 3300
|
||||
assert roi['roi_validated'] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_calculate_roi_without_impact_values(self, learning_system, good_feedback_data):
|
||||
"""Test ROI calculation without impact values."""
|
||||
roi = await learning_system.calculate_roi(
|
||||
good_feedback_data,
|
||||
insight_type='yield_prediction'
|
||||
)
|
||||
|
||||
assert roi['insight_type'] == 'yield_prediction'
|
||||
assert roi['samples'] > 0
|
||||
assert 'avg_accuracy' in roi
|
||||
assert roi['roi_validated'] is False
|
||||
Reference in New Issue
Block a user