Add new alert architecture
This commit is contained in:
136
README.md
136
README.md
@@ -1,136 +0,0 @@
|
||||
# Bakery Forecasting Platform - Microservices
|
||||
|
||||
## Overview
|
||||
AI-powered demand forecasting platform for bakeries in Madrid, Spain using microservices architecture.
|
||||
|
||||
## Architecture
|
||||
- **API Gateway**: Central entry point for all client requests
|
||||
- **Auth Service**: User authentication and authorization
|
||||
- **Training Service**: ML model training for demand forecasting
|
||||
- **Forecasting Service**: Generate predictions using trained models
|
||||
- **Data Service**: External data integration (weather, traffic, events)
|
||||
- **Tenant Service**: Multi-tenant management
|
||||
- **Notification Service**: Email and WhatsApp notifications
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
- Docker and Docker Compose
|
||||
- Python 3.11+
|
||||
- Node.js 18+
|
||||
|
||||
### Setup
|
||||
```bash
|
||||
# Run setup script (this script!)
|
||||
./scripts/setup.sh
|
||||
|
||||
# Start services
|
||||
docker-compose up -d
|
||||
|
||||
# Check service health
|
||||
curl http://localhost:8000/health
|
||||
```
|
||||
|
||||
### Services
|
||||
- **Gateway**: http://localhost:8000
|
||||
- **API Docs**: http://localhost:8000/docs
|
||||
- **Grafana**: http://localhost:3002
|
||||
- **Prometheus**: http://localhost:9090
|
||||
- **RabbitMQ Management**: http://localhost:15672
|
||||
|
||||
### Development
|
||||
|
||||
#### Running Tests
|
||||
```bash
|
||||
./scripts/test.sh
|
||||
```
|
||||
|
||||
#### Building Services
|
||||
```bash
|
||||
docker-compose build
|
||||
```
|
||||
|
||||
#### Viewing Logs
|
||||
```bash
|
||||
# All services
|
||||
docker-compose logs -f
|
||||
|
||||
# Specific service
|
||||
docker-compose logs -f auth-service
|
||||
```
|
||||
|
||||
#### Service URLs (Development)
|
||||
- Gateway: http://localhost:8000
|
||||
- Auth Service: http://localhost:8001
|
||||
- Training Service: http://localhost:8002
|
||||
- Forecasting Service: http://localhost:8003
|
||||
- Data Service: http://localhost:8004
|
||||
- Tenant Service: http://localhost:8005
|
||||
- Notification Service: http://localhost:8006
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Copy `.env.example` to `.env` and update the following:
|
||||
|
||||
```bash
|
||||
# External API Keys
|
||||
AEMET_API_KEY=your-aemet-api-key
|
||||
MADRID_OPENDATA_API_KEY=your-madrid-opendata-key
|
||||
|
||||
# Email Configuration
|
||||
SMTP_USER=your-email@gmail.com
|
||||
SMTP_PASSWORD=your-email-password
|
||||
|
||||
# WhatsApp API
|
||||
WHATSAPP_API_KEY=your-whatsapp-api-key
|
||||
|
||||
# JWT Secret (change in production!)
|
||||
JWT_SECRET_KEY=your-super-secret-jwt-key-change-in-production
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Services won't start
|
||||
```bash
|
||||
# Check if ports are available
|
||||
docker-compose ps
|
||||
netstat -tulpn | grep :8000
|
||||
|
||||
# Restart services
|
||||
docker-compose down
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
### Database connection issues
|
||||
```bash
|
||||
# Check database containers
|
||||
docker-compose logs auth-db
|
||||
docker-compose logs training-db
|
||||
|
||||
# Reset databases
|
||||
docker-compose down -v
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
### Service communication issues
|
||||
```bash
|
||||
# Check service health
|
||||
curl http://localhost:8000/health
|
||||
curl http://localhost:8001/health
|
||||
curl http://localhost:8002/health
|
||||
|
||||
# Check RabbitMQ
|
||||
open http://localhost:15672
|
||||
# User: bakery, Password: forecast123
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Configure External APIs**: Add your AEMET and Madrid Open Data API keys
|
||||
2. **Test Authentication**: Register a user and test login
|
||||
3. **Upload Sales Data**: Import historical sales data
|
||||
4. **Train Models**: Start your first training job
|
||||
5. **Generate Forecasts**: Create demand predictions
|
||||
|
||||
## License
|
||||
MIT License
|
||||
@@ -730,6 +730,43 @@ services:
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
alert-processor:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./services/alert_processor/Dockerfile
|
||||
args:
|
||||
- ENVIRONMENT=${ENVIRONMENT}
|
||||
- BUILD_DATE=${BUILD_DATE}
|
||||
image: bakery/alert-processor:${IMAGE_TAG}
|
||||
restart: unless-stopped
|
||||
env_file: .env
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
rabbitmq:
|
||||
condition: service_healthy
|
||||
notification-service:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- bakery-network
|
||||
volumes:
|
||||
- log_storage:/app/logs
|
||||
- ./services/alert_processor:/app
|
||||
- ./shared:/app/shared
|
||||
deploy:
|
||||
replicas: 2
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
reservations:
|
||||
memory: 256M
|
||||
# No health check needed - this is a background worker service
|
||||
# healthcheck:
|
||||
# test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
# interval: 30s
|
||||
# timeout: 10s
|
||||
# retries: 3
|
||||
|
||||
inventory-service:
|
||||
build:
|
||||
context: .
|
||||
@@ -760,7 +797,7 @@ services:
|
||||
- ./services/inventory:/app
|
||||
- ./shared:/app/shared
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health/')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@@ -797,7 +834,7 @@ services:
|
||||
- ./services/recipes:/app
|
||||
- ./shared:/app/shared
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@@ -835,7 +872,7 @@ services:
|
||||
- ./services/suppliers:/app
|
||||
- ./shared:/app/shared
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@@ -911,7 +948,7 @@ services:
|
||||
- ./services/orders:/app
|
||||
- ./shared:/app/shared
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@@ -950,7 +987,7 @@ services:
|
||||
- ./services/production:/app
|
||||
- ./shared:/app/shared
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
304
frontend/src/components/alerts/AlertCard.tsx
Normal file
304
frontend/src/components/alerts/AlertCard.tsx
Normal file
@@ -0,0 +1,304 @@
|
||||
// frontend/src/components/alerts/AlertCard.tsx
|
||||
/**
|
||||
* Individual alert/recommendation card component
|
||||
* Displays alert details with appropriate styling and actions
|
||||
*/
|
||||
|
||||
import React, { useState } from 'react';
|
||||
import { AlertItem, ItemSeverity, ItemType } from '../../types/alerts';
|
||||
import { formatDistanceToNow } from 'date-fns';
|
||||
import { es } from 'date-fns/locale';
|
||||
|
||||
interface AlertCardProps {
|
||||
item: AlertItem;
|
||||
onAcknowledge: (itemId: string) => void;
|
||||
onResolve: (itemId: string) => void;
|
||||
compact?: boolean;
|
||||
showActions?: boolean;
|
||||
}
|
||||
|
||||
const getSeverityConfig = (severity: ItemSeverity, itemType: ItemType) => {
|
||||
if (itemType === 'recommendation') {
|
||||
switch (severity) {
|
||||
case 'high':
|
||||
return {
|
||||
color: 'bg-blue-50 border-blue-200 text-blue-900',
|
||||
icon: '💡',
|
||||
badge: 'bg-blue-100 text-blue-800'
|
||||
};
|
||||
case 'medium':
|
||||
return {
|
||||
color: 'bg-blue-50 border-blue-100 text-blue-800',
|
||||
icon: '💡',
|
||||
badge: 'bg-blue-50 text-blue-600'
|
||||
};
|
||||
case 'low':
|
||||
return {
|
||||
color: 'bg-gray-50 border-gray-200 text-gray-700',
|
||||
icon: '💡',
|
||||
badge: 'bg-gray-100 text-gray-600'
|
||||
};
|
||||
default:
|
||||
return {
|
||||
color: 'bg-blue-50 border-blue-200 text-blue-900',
|
||||
icon: '💡',
|
||||
badge: 'bg-blue-100 text-blue-800'
|
||||
};
|
||||
}
|
||||
} else {
|
||||
switch (severity) {
|
||||
case 'urgent':
|
||||
return {
|
||||
color: 'bg-red-50 border-red-300 text-red-900',
|
||||
icon: '🚨',
|
||||
badge: 'bg-red-100 text-red-800',
|
||||
pulse: true
|
||||
};
|
||||
case 'high':
|
||||
return {
|
||||
color: 'bg-orange-50 border-orange-200 text-orange-900',
|
||||
icon: '⚠️',
|
||||
badge: 'bg-orange-100 text-orange-800'
|
||||
};
|
||||
case 'medium':
|
||||
return {
|
||||
color: 'bg-yellow-50 border-yellow-200 text-yellow-900',
|
||||
icon: '🔔',
|
||||
badge: 'bg-yellow-100 text-yellow-800'
|
||||
};
|
||||
case 'low':
|
||||
return {
|
||||
color: 'bg-green-50 border-green-200 text-green-900',
|
||||
icon: 'ℹ️',
|
||||
badge: 'bg-green-100 text-green-800'
|
||||
};
|
||||
default:
|
||||
return {
|
||||
color: 'bg-gray-50 border-gray-200 text-gray-700',
|
||||
icon: '📋',
|
||||
badge: 'bg-gray-100 text-gray-600'
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const getStatusConfig = (status: string) => {
|
||||
switch (status) {
|
||||
case 'acknowledged':
|
||||
return {
|
||||
color: 'bg-blue-100 text-blue-800',
|
||||
label: 'Reconocido'
|
||||
};
|
||||
case 'resolved':
|
||||
return {
|
||||
color: 'bg-green-100 text-green-800',
|
||||
label: 'Resuelto'
|
||||
};
|
||||
default:
|
||||
return {
|
||||
color: 'bg-gray-100 text-gray-800',
|
||||
label: 'Activo'
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
export const AlertCard: React.FC<AlertCardProps> = ({
|
||||
item,
|
||||
onAcknowledge,
|
||||
onResolve,
|
||||
compact = false,
|
||||
showActions = true
|
||||
}) => {
|
||||
const [isExpanded, setIsExpanded] = useState(false);
|
||||
const [actionLoading, setActionLoading] = useState<string | null>(null);
|
||||
|
||||
const severityConfig = getSeverityConfig(item.severity, item.item_type);
|
||||
const statusConfig = getStatusConfig(item.status);
|
||||
|
||||
const handleAction = async (action: () => void, actionType: string) => {
|
||||
setActionLoading(actionType);
|
||||
try {
|
||||
await action();
|
||||
} finally {
|
||||
setActionLoading(null);
|
||||
}
|
||||
};
|
||||
|
||||
const timeAgo = formatDistanceToNow(new Date(item.timestamp), {
|
||||
addSuffix: true,
|
||||
locale: es
|
||||
});
|
||||
|
||||
return (
|
||||
<div className={`
|
||||
rounded-lg border-2 transition-all duration-200 hover:shadow-md
|
||||
${severityConfig.color}
|
||||
${severityConfig.pulse ? 'animate-pulse' : ''}
|
||||
${item.status !== 'active' ? 'opacity-75' : ''}
|
||||
`}>
|
||||
{/* Header */}
|
||||
<div className="p-4">
|
||||
<div className="flex items-start justify-between">
|
||||
<div className="flex items-start space-x-3 flex-1 min-w-0">
|
||||
{/* Icon and Type Badge */}
|
||||
<div className="flex-shrink-0">
|
||||
<span className="text-2xl">{severityConfig.icon}</span>
|
||||
</div>
|
||||
|
||||
<div className="flex-1 min-w-0">
|
||||
{/* Title and Badges */}
|
||||
<div className="flex items-start justify-between mb-2">
|
||||
<div className="flex-1 min-w-0">
|
||||
<h3 className="text-lg font-semibold truncate">
|
||||
{item.title}
|
||||
</h3>
|
||||
<div className="flex items-center space-x-2 mt-1">
|
||||
<span className={`
|
||||
inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium
|
||||
${severityConfig.badge}
|
||||
`}>
|
||||
{item.item_type === 'alert' ? 'Alerta' : 'Recomendación'} - {item.severity}
|
||||
</span>
|
||||
<span className={`
|
||||
inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium
|
||||
${statusConfig.color}
|
||||
`}>
|
||||
{statusConfig.label}
|
||||
</span>
|
||||
<span className="text-xs text-gray-500">
|
||||
{item.service}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Expand Button */}
|
||||
{!compact && (
|
||||
<button
|
||||
onClick={() => setIsExpanded(!isExpanded)}
|
||||
className="ml-2 text-gray-400 hover:text-gray-600 transition-colors"
|
||||
>
|
||||
<svg
|
||||
className={`w-5 h-5 transform transition-transform ${
|
||||
isExpanded ? 'rotate-180' : ''
|
||||
}`}
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
>
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Message */}
|
||||
<p className={`text-sm ${compact ? 'line-clamp-2' : ''}`}>
|
||||
{item.message}
|
||||
</p>
|
||||
|
||||
{/* Timestamp */}
|
||||
<p className="text-xs text-gray-500 mt-2">
|
||||
{timeAgo} • {new Date(item.timestamp).toLocaleString('es-ES')}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Quick Actions */}
|
||||
{showActions && item.status === 'active' && (
|
||||
<div className="flex items-center space-x-2 mt-3">
|
||||
<button
|
||||
onClick={() => handleAction(() => onAcknowledge(item.id), 'acknowledge')}
|
||||
disabled={actionLoading === 'acknowledge'}
|
||||
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-blue-700 bg-blue-100 hover:bg-blue-200 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50"
|
||||
>
|
||||
{actionLoading === 'acknowledge' ? (
|
||||
<svg className="animate-spin -ml-1 mr-2 h-4 w-4 text-blue-700" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4 mr-1" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
)}
|
||||
Reconocer
|
||||
</button>
|
||||
|
||||
<button
|
||||
onClick={() => handleAction(() => onResolve(item.id), 'resolve')}
|
||||
disabled={actionLoading === 'resolve'}
|
||||
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-green-700 bg-green-100 hover:bg-green-200 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-green-500 disabled:opacity-50"
|
||||
>
|
||||
{actionLoading === 'resolve' ? (
|
||||
<svg className="animate-spin -ml-1 mr-2 h-4 w-4 text-green-700" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4 mr-1" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
)}
|
||||
Resolver
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Expanded Details */}
|
||||
{isExpanded && (
|
||||
<div className="border-t border-gray-200 px-4 py-3 bg-gray-50 bg-opacity-50">
|
||||
{/* Actions */}
|
||||
{item.actions.length > 0 && (
|
||||
<div className="mb-3">
|
||||
<h4 className="text-sm font-medium text-gray-700 mb-2">Acciones sugeridas:</h4>
|
||||
<ul className="list-disc list-inside space-y-1">
|
||||
{item.actions.map((action, index) => (
|
||||
<li key={index} className="text-sm text-gray-600">
|
||||
{action}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Metadata */}
|
||||
{Object.keys(item.metadata).length > 0 && (
|
||||
<div className="mb-3">
|
||||
<h4 className="text-sm font-medium text-gray-700 mb-2">Detalles técnicos:</h4>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-2">
|
||||
{Object.entries(item.metadata).map(([key, value]) => (
|
||||
<div key={key} className="text-sm">
|
||||
<span className="font-medium text-gray-600">{key}:</span>{' '}
|
||||
<span className="text-gray-800">
|
||||
{typeof value === 'object' ? JSON.stringify(value) : String(value)}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Acknowledgment/Resolution Info */}
|
||||
{(item.acknowledged_at || item.resolved_at) && (
|
||||
<div className="text-xs text-gray-500 space-y-1">
|
||||
{item.acknowledged_at && (
|
||||
<p>
|
||||
Reconocido: {new Date(item.acknowledged_at).toLocaleString('es-ES')}
|
||||
{item.acknowledged_by && ` por ${item.acknowledged_by}`}
|
||||
</p>
|
||||
)}
|
||||
{item.resolved_at && (
|
||||
<p>
|
||||
Resuelto: {new Date(item.resolved_at).toLocaleString('es-ES')}
|
||||
{item.resolved_by && ` por ${item.resolved_by}`}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
347
frontend/src/components/alerts/AlertDashboard.tsx
Normal file
347
frontend/src/components/alerts/AlertDashboard.tsx
Normal file
@@ -0,0 +1,347 @@
|
||||
// frontend/src/components/alerts/AlertDashboard.tsx
|
||||
/**
|
||||
* Main dashboard component for alerts and recommendations
|
||||
* Provides filtering, bulk actions, and real-time updates
|
||||
*/
|
||||
|
||||
import React, { useState, useEffect, useMemo } from 'react';
|
||||
import { AlertItem, ItemFilters, ItemType, ItemSeverity, ItemStatus } from '../../types/alerts';
|
||||
import { useAlertStream } from '../../hooks/useAlertStream';
|
||||
import { AlertCard } from './AlertCard';
|
||||
import { AlertFilters } from './AlertFilters';
|
||||
import { AlertStats } from './AlertStats';
|
||||
import { ConnectionStatus } from './ConnectionStatus';
|
||||
import { useTenantId } from '../../hooks/useTenantId';
|
||||
|
||||
interface AlertDashboardProps {
|
||||
className?: string;
|
||||
maxItems?: number;
|
||||
autoRequestNotifications?: boolean;
|
||||
}
|
||||
|
||||
export const AlertDashboard: React.FC<AlertDashboardProps> = ({
|
||||
className = '',
|
||||
maxItems = 50,
|
||||
autoRequestNotifications = true
|
||||
}) => {
|
||||
const tenantId = useTenantId();
|
||||
const {
|
||||
items,
|
||||
connectionState,
|
||||
urgentCount,
|
||||
highCount,
|
||||
recCount,
|
||||
acknowledgeItem,
|
||||
resolveItem,
|
||||
notificationPermission,
|
||||
requestNotificationPermission
|
||||
} = useAlertStream({ tenantId });
|
||||
|
||||
const [filters, setFilters] = useState<ItemFilters>({
|
||||
item_type: 'all',
|
||||
severity: 'all',
|
||||
status: 'all',
|
||||
service: 'all',
|
||||
search: ''
|
||||
});
|
||||
|
||||
const [selectedItems, setSelectedItems] = useState<string[]>([]);
|
||||
const [bulkActionsOpen, setBulkActionsOpen] = useState(false);
|
||||
const [viewMode, setViewMode] = useState<'list' | 'compact'>('list');
|
||||
|
||||
// Request notification permission on mount if needed
|
||||
useEffect(() => {
|
||||
if (autoRequestNotifications && notificationPermission === 'default') {
|
||||
// Delay request to avoid immediate popup
|
||||
const timer = setTimeout(() => {
|
||||
requestNotificationPermission();
|
||||
}, 2000);
|
||||
return () => clearTimeout(timer);
|
||||
}
|
||||
}, [autoRequestNotifications, notificationPermission, requestNotificationPermission]);
|
||||
|
||||
// Filter items based on current filters
|
||||
const filteredItems = useMemo(() => {
|
||||
let filtered = items;
|
||||
|
||||
// Filter by type
|
||||
if (filters.item_type !== 'all') {
|
||||
filtered = filtered.filter(item => item.item_type === filters.item_type);
|
||||
}
|
||||
|
||||
// Filter by severity
|
||||
if (filters.severity !== 'all') {
|
||||
filtered = filtered.filter(item => item.severity === filters.severity);
|
||||
}
|
||||
|
||||
// Filter by status
|
||||
if (filters.status !== 'all') {
|
||||
filtered = filtered.filter(item => item.status === filters.status);
|
||||
}
|
||||
|
||||
// Filter by service
|
||||
if (filters.service !== 'all') {
|
||||
filtered = filtered.filter(item => item.service === filters.service);
|
||||
}
|
||||
|
||||
// Filter by search text
|
||||
if (filters.search.trim()) {
|
||||
const searchLower = filters.search.toLowerCase();
|
||||
filtered = filtered.filter(item =>
|
||||
item.title.toLowerCase().includes(searchLower) ||
|
||||
item.message.toLowerCase().includes(searchLower) ||
|
||||
item.type.toLowerCase().includes(searchLower)
|
||||
);
|
||||
}
|
||||
|
||||
return filtered.slice(0, maxItems);
|
||||
}, [items, filters, maxItems]);
|
||||
|
||||
// Get unique services for filter dropdown
|
||||
const availableServices = useMemo(() => {
|
||||
const services = [...new Set(items.map(item => item.service))].sort();
|
||||
return services;
|
||||
}, [items]);
|
||||
|
||||
// Handle bulk actions
|
||||
const handleBulkAcknowledge = async () => {
|
||||
await Promise.all(selectedItems.map(id => acknowledgeItem(id)));
|
||||
setSelectedItems([]);
|
||||
setBulkActionsOpen(false);
|
||||
};
|
||||
|
||||
const handleBulkResolve = async () => {
|
||||
await Promise.all(selectedItems.map(id => resolveItem(id)));
|
||||
setSelectedItems([]);
|
||||
setBulkActionsOpen(false);
|
||||
};
|
||||
|
||||
const handleSelectAll = () => {
|
||||
const selectableItems = filteredItems
|
||||
.filter(item => item.status === 'active')
|
||||
.map(item => item.id);
|
||||
setSelectedItems(selectableItems);
|
||||
};
|
||||
|
||||
const handleClearSelection = () => {
|
||||
setSelectedItems([]);
|
||||
setBulkActionsOpen(false);
|
||||
};
|
||||
|
||||
const toggleItemSelection = (itemId: string) => {
|
||||
setSelectedItems(prev =>
|
||||
prev.includes(itemId)
|
||||
? prev.filter(id => id !== itemId)
|
||||
: [...prev, itemId]
|
||||
);
|
||||
};
|
||||
|
||||
const activeItems = filteredItems.filter(item => item.status === 'active');
|
||||
const hasSelection = selectedItems.length > 0;
|
||||
|
||||
return (
|
||||
<div className={`max-w-7xl mx-auto ${className}`}>
|
||||
{/* Header */}
|
||||
<div className="bg-white shadow-sm border-b border-gray-200 px-6 py-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">
|
||||
Sistema de Alertas y Recomendaciones
|
||||
</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
Monitoreo en tiempo real de operaciones de panadería
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Connection Status */}
|
||||
<ConnectionStatus connectionState={connectionState} />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Stats */}
|
||||
<AlertStats
|
||||
urgentCount={urgentCount}
|
||||
highCount={highCount}
|
||||
recCount={recCount}
|
||||
totalItems={items.length}
|
||||
activeItems={activeItems.length}
|
||||
/>
|
||||
|
||||
{/* Notification Permission Banner */}
|
||||
{notificationPermission === 'denied' && (
|
||||
<div className="bg-yellow-50 border border-yellow-200 rounded-md p-4 mx-6 mt-4">
|
||||
<div className="flex">
|
||||
<div className="flex-shrink-0">
|
||||
<svg className="h-5 w-5 text-yellow-400" fill="currentColor" viewBox="0 0 20 20">
|
||||
<path fillRule="evenodd" d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z" clipRule="evenodd" />
|
||||
</svg>
|
||||
</div>
|
||||
<div className="ml-3">
|
||||
<h3 className="text-sm font-medium text-yellow-800">
|
||||
Notificaciones bloqueadas
|
||||
</h3>
|
||||
<p className="text-sm text-yellow-700 mt-1">
|
||||
Las notificaciones del navegador están deshabilitadas. No recibirás alertas urgentes en tiempo real.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Filters and View Controls */}
|
||||
<div className="bg-white border-b border-gray-200 px-6 py-4">
|
||||
<div className="flex flex-col lg:flex-row lg:items-center lg:justify-between space-y-4 lg:space-y-0">
|
||||
<AlertFilters
|
||||
filters={filters}
|
||||
onFiltersChange={setFilters}
|
||||
availableServices={availableServices}
|
||||
/>
|
||||
|
||||
<div className="flex items-center space-x-4">
|
||||
{/* View Mode Toggle */}
|
||||
<div className="flex rounded-md shadow-sm">
|
||||
<button
|
||||
onClick={() => setViewMode('list')}
|
||||
className={`px-4 py-2 text-sm font-medium rounded-l-md border ${
|
||||
viewMode === 'list'
|
||||
? 'bg-blue-50 border-blue-200 text-blue-700'
|
||||
: 'bg-white border-gray-300 text-gray-700 hover:bg-gray-50'
|
||||
}`}
|
||||
>
|
||||
Lista
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setViewMode('compact')}
|
||||
className={`px-4 py-2 text-sm font-medium rounded-r-md border-l-0 border ${
|
||||
viewMode === 'compact'
|
||||
? 'bg-blue-50 border-blue-200 text-blue-700'
|
||||
: 'bg-white border-gray-300 text-gray-700 hover:bg-gray-50'
|
||||
}`}
|
||||
>
|
||||
Compacto
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Bulk Actions */}
|
||||
{activeItems.length > 0 && (
|
||||
<div className="flex items-center space-x-2">
|
||||
<button
|
||||
onClick={() => setBulkActionsOpen(!bulkActionsOpen)}
|
||||
className="inline-flex items-center px-4 py-2 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 bg-white hover:bg-gray-50"
|
||||
>
|
||||
Acciones masivas
|
||||
<svg className="ml-2 h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Bulk Actions Panel */}
|
||||
{bulkActionsOpen && activeItems.length > 0 && (
|
||||
<div className="mt-4 p-4 bg-gray-50 rounded-lg border border-gray-200">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center space-x-4">
|
||||
<span className="text-sm text-gray-600">
|
||||
{selectedItems.length} elementos seleccionados
|
||||
</span>
|
||||
<button
|
||||
onClick={handleSelectAll}
|
||||
className="text-sm text-blue-600 hover:text-blue-800"
|
||||
>
|
||||
Seleccionar todos los activos
|
||||
</button>
|
||||
<button
|
||||
onClick={handleClearSelection}
|
||||
className="text-sm text-gray-600 hover:text-gray-800"
|
||||
>
|
||||
Limpiar selección
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{hasSelection && (
|
||||
<div className="flex items-center space-x-2">
|
||||
<button
|
||||
onClick={handleBulkAcknowledge}
|
||||
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-blue-700 bg-blue-100 hover:bg-blue-200"
|
||||
>
|
||||
Reconocer seleccionados
|
||||
</button>
|
||||
<button
|
||||
onClick={handleBulkResolve}
|
||||
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-green-700 bg-green-100 hover:bg-green-200"
|
||||
>
|
||||
Resolver seleccionados
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Items List */}
|
||||
<div className="px-6 py-4">
|
||||
{filteredItems.length === 0 ? (
|
||||
<div className="text-center py-12">
|
||||
{items.length === 0 ? (
|
||||
<div>
|
||||
<svg className="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
||||
</svg>
|
||||
<h3 className="mt-2 text-sm font-medium text-gray-900">
|
||||
Sistema operativo
|
||||
</h3>
|
||||
<p className="mt-1 text-sm text-gray-500">
|
||||
No hay alertas activas en este momento. Todas las operaciones funcionan correctamente.
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div>
|
||||
<svg className="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
|
||||
</svg>
|
||||
<h3 className="mt-2 text-sm font-medium text-gray-900">
|
||||
No se encontraron elementos
|
||||
</h3>
|
||||
<p className="mt-1 text-sm text-gray-500">
|
||||
Intenta ajustar los filtros para ver más elementos.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className={`space-y-4 ${viewMode === 'compact' ? 'space-y-2' : ''}`}>
|
||||
{filteredItems.map((item) => (
|
||||
<div key={item.id} className="relative">
|
||||
{/* Selection Checkbox */}
|
||||
{bulkActionsOpen && item.status === 'active' && (
|
||||
<div className="absolute left-2 top-4 z-10">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={selectedItems.includes(item.id)}
|
||||
onChange={() => toggleItemSelection(item.id)}
|
||||
className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className={bulkActionsOpen && item.status === 'active' ? 'ml-8' : ''}>
|
||||
<AlertCard
|
||||
item={item}
|
||||
onAcknowledge={acknowledgeItem}
|
||||
onResolve={resolveItem}
|
||||
compact={viewMode === 'compact'}
|
||||
showActions={!bulkActionsOpen}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
148
frontend/src/components/alerts/AlertFilters.tsx
Normal file
148
frontend/src/components/alerts/AlertFilters.tsx
Normal file
@@ -0,0 +1,148 @@
|
||||
// frontend/src/components/alerts/AlertFilters.tsx
|
||||
/**
|
||||
* Filter controls for the alert dashboard
|
||||
*/
|
||||
|
||||
import React from 'react';
|
||||
import { ItemFilters, ItemType, ItemSeverity, ItemStatus } from '../../types/alerts';
|
||||
|
||||
interface AlertFiltersProps {
|
||||
filters: ItemFilters;
|
||||
onFiltersChange: (filters: ItemFilters) => void;
|
||||
availableServices: string[];
|
||||
}
|
||||
|
||||
export const AlertFilters: React.FC<AlertFiltersProps> = ({
|
||||
filters,
|
||||
onFiltersChange,
|
||||
availableServices
|
||||
}) => {
|
||||
const updateFilter = (key: keyof ItemFilters, value: string) => {
|
||||
onFiltersChange({
|
||||
...filters,
|
||||
[key]: value
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col sm:flex-row sm:items-center space-y-2 sm:space-y-0 sm:space-x-4">
|
||||
{/* Search */}
|
||||
<div className="flex-1 min-w-0">
|
||||
<label htmlFor="search" className="sr-only">
|
||||
Buscar
|
||||
</label>
|
||||
<div className="relative">
|
||||
<div className="absolute inset-y-0 left-0 pl-3 flex items-center pointer-events-none">
|
||||
<svg className="h-5 w-5 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
|
||||
</svg>
|
||||
</div>
|
||||
<input
|
||||
id="search"
|
||||
type="text"
|
||||
placeholder="Buscar alertas y recomendaciones..."
|
||||
value={filters.search}
|
||||
onChange={(e) => updateFilter('search', e.target.value)}
|
||||
className="block w-full pl-10 pr-3 py-2 border border-gray-300 rounded-md leading-5 bg-white placeholder-gray-500 focus:outline-none focus:placeholder-gray-400 focus:ring-1 focus:ring-blue-500 focus:border-blue-500 sm:text-sm"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Type Filter */}
|
||||
<div>
|
||||
<label htmlFor="type-filter" className="sr-only">
|
||||
Tipo
|
||||
</label>
|
||||
<select
|
||||
id="type-filter"
|
||||
value={filters.item_type}
|
||||
onChange={(e) => updateFilter('item_type', e.target.value)}
|
||||
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
|
||||
>
|
||||
<option value="all">Todos los tipos</option>
|
||||
<option value="alert">Alertas</option>
|
||||
<option value="recommendation">Recomendaciones</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Severity Filter */}
|
||||
<div>
|
||||
<label htmlFor="severity-filter" className="sr-only">
|
||||
Severidad
|
||||
</label>
|
||||
<select
|
||||
id="severity-filter"
|
||||
value={filters.severity}
|
||||
onChange={(e) => updateFilter('severity', e.target.value)}
|
||||
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
|
||||
>
|
||||
<option value="all">Todas las severidades</option>
|
||||
<option value="urgent">Urgente</option>
|
||||
<option value="high">Alta</option>
|
||||
<option value="medium">Media</option>
|
||||
<option value="low">Baja</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Status Filter */}
|
||||
<div>
|
||||
<label htmlFor="status-filter" className="sr-only">
|
||||
Estado
|
||||
</label>
|
||||
<select
|
||||
id="status-filter"
|
||||
value={filters.status}
|
||||
onChange={(e) => updateFilter('status', e.target.value)}
|
||||
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
|
||||
>
|
||||
<option value="all">Todos los estados</option>
|
||||
<option value="active">Activos</option>
|
||||
<option value="acknowledged">Reconocidos</option>
|
||||
<option value="resolved">Resueltos</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Service Filter */}
|
||||
{availableServices.length > 0 && (
|
||||
<div>
|
||||
<label htmlFor="service-filter" className="sr-only">
|
||||
Servicio
|
||||
</label>
|
||||
<select
|
||||
id="service-filter"
|
||||
value={filters.service}
|
||||
onChange={(e) => updateFilter('service', e.target.value)}
|
||||
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
|
||||
>
|
||||
<option value="all">Todos los servicios</option>
|
||||
{availableServices.map((service) => (
|
||||
<option key={service} value={service}>
|
||||
{service}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Clear Filters */}
|
||||
{(filters.search || filters.item_type !== 'all' || filters.severity !== 'all' ||
|
||||
filters.status !== 'all' || filters.service !== 'all') && (
|
||||
<button
|
||||
onClick={() => onFiltersChange({
|
||||
item_type: 'all',
|
||||
severity: 'all',
|
||||
status: 'all',
|
||||
service: 'all',
|
||||
search: ''
|
||||
})}
|
||||
className="inline-flex items-center px-3 py-2 border border-gray-300 shadow-sm text-sm leading-4 font-medium rounded-md text-gray-700 bg-white hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
|
||||
>
|
||||
<svg className="h-4 w-4 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
Limpiar
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
102
frontend/src/components/alerts/AlertStats.tsx
Normal file
102
frontend/src/components/alerts/AlertStats.tsx
Normal file
@@ -0,0 +1,102 @@
|
||||
// frontend/src/components/alerts/AlertStats.tsx
|
||||
/**
|
||||
* Statistics display for alerts and recommendations
|
||||
*/
|
||||
|
||||
import React from 'react';
|
||||
|
||||
interface AlertStatsProps {
|
||||
urgentCount: number;
|
||||
highCount: number;
|
||||
recCount: number;
|
||||
totalItems: number;
|
||||
activeItems: number;
|
||||
}
|
||||
|
||||
export const AlertStats: React.FC<AlertStatsProps> = ({
|
||||
urgentCount,
|
||||
highCount,
|
||||
recCount,
|
||||
totalItems,
|
||||
activeItems
|
||||
}) => {
|
||||
const stats = [
|
||||
{
|
||||
name: 'Alertas Urgentes',
|
||||
value: urgentCount,
|
||||
icon: '🚨',
|
||||
color: urgentCount > 0 ? 'text-red-600' : 'text-gray-600',
|
||||
bgColor: urgentCount > 0 ? 'bg-red-50' : 'bg-gray-50',
|
||||
borderColor: urgentCount > 0 ? 'border-red-200' : 'border-gray-200'
|
||||
},
|
||||
{
|
||||
name: 'Alertas Altas',
|
||||
value: highCount,
|
||||
icon: '⚠️',
|
||||
color: highCount > 0 ? 'text-orange-600' : 'text-gray-600',
|
||||
bgColor: highCount > 0 ? 'bg-orange-50' : 'bg-gray-50',
|
||||
borderColor: highCount > 0 ? 'border-orange-200' : 'border-gray-200'
|
||||
},
|
||||
{
|
||||
name: 'Recomendaciones',
|
||||
value: recCount,
|
||||
icon: '💡',
|
||||
color: recCount > 0 ? 'text-blue-600' : 'text-gray-600',
|
||||
bgColor: recCount > 0 ? 'bg-blue-50' : 'bg-gray-50',
|
||||
borderColor: recCount > 0 ? 'border-blue-200' : 'border-gray-200'
|
||||
},
|
||||
{
|
||||
name: 'Total Activos',
|
||||
value: activeItems,
|
||||
icon: '📊',
|
||||
color: 'text-gray-600',
|
||||
bgColor: 'bg-gray-50',
|
||||
borderColor: 'border-gray-200'
|
||||
}
|
||||
];
|
||||
|
||||
return (
|
||||
<div className="bg-white border-b border-gray-200">
|
||||
<div className="px-6 py-4">
|
||||
<dl className="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-4">
|
||||
{stats.map((stat) => (
|
||||
<div
|
||||
key={stat.name}
|
||||
className={`relative overflow-hidden rounded-lg border ${stat.borderColor} ${stat.bgColor} p-4 transition-all duration-200 hover:shadow-md`}
|
||||
>
|
||||
<dt className="flex items-center text-sm font-medium text-gray-600">
|
||||
<span className="text-lg mr-2">{stat.icon}</span>
|
||||
{stat.name}
|
||||
</dt>
|
||||
<dd className={`mt-1 text-2xl font-semibold ${stat.color}`}>
|
||||
{stat.value}
|
||||
</dd>
|
||||
|
||||
{/* Pulse animation for urgent alerts */}
|
||||
{stat.name === 'Alertas Urgentes' && urgentCount > 0 && (
|
||||
<div className="absolute inset-0 rounded-lg border-2 border-red-400 animate-pulse opacity-50"></div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</dl>
|
||||
|
||||
{/* Summary text */}
|
||||
<div className="mt-4 text-sm text-gray-600">
|
||||
{totalItems === 0 ? (
|
||||
<p className="flex items-center">
|
||||
<span className="text-green-500 mr-2">✅</span>
|
||||
Todos los sistemas funcionan correctamente
|
||||
</p>
|
||||
) : (
|
||||
<p>
|
||||
Mostrando {totalItems} elementos total{totalItems !== 1 ? 'es' : ''}
|
||||
{activeItems > 0 && (
|
||||
<>, {activeItems} activo{activeItems !== 1 ? 's' : ''}</>
|
||||
)}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
70
frontend/src/components/alerts/ConnectionStatus.tsx
Normal file
70
frontend/src/components/alerts/ConnectionStatus.tsx
Normal file
@@ -0,0 +1,70 @@
|
||||
// frontend/src/components/alerts/ConnectionStatus.tsx
|
||||
/**
|
||||
* Displays the current SSE connection status with appropriate styling
|
||||
*/
|
||||
|
||||
import React from 'react';
|
||||
import { SSEConnectionState } from '../../types/alerts';
|
||||
|
||||
interface ConnectionStatusProps {
|
||||
connectionState: SSEConnectionState;
|
||||
}
|
||||
|
||||
export const ConnectionStatus: React.FC<ConnectionStatusProps> = ({
|
||||
connectionState
|
||||
}) => {
|
||||
const getStatusConfig = (state: SSEConnectionState) => {
|
||||
switch (state.status) {
|
||||
case 'connected':
|
||||
return {
|
||||
color: 'bg-green-100 text-green-800 border-green-200',
|
||||
icon: '🟢',
|
||||
label: 'Conectado',
|
||||
description: 'Actualizaciones en tiempo real'
|
||||
};
|
||||
case 'connecting':
|
||||
return {
|
||||
color: 'bg-yellow-100 text-yellow-800 border-yellow-200',
|
||||
icon: '🟡',
|
||||
label: 'Conectando...',
|
||||
description: 'Estableciendo conexión'
|
||||
};
|
||||
case 'error':
|
||||
return {
|
||||
color: 'bg-red-100 text-red-800 border-red-200',
|
||||
icon: '🔴',
|
||||
label: 'Error de conexión',
|
||||
description: state.reconnectAttempts > 0 ? `Reintento ${state.reconnectAttempts}` : 'Fallo en la conexión'
|
||||
};
|
||||
case 'disconnected':
|
||||
default:
|
||||
return {
|
||||
color: 'bg-gray-100 text-gray-800 border-gray-200',
|
||||
icon: '⚪',
|
||||
label: 'Desconectado',
|
||||
description: 'Sin actualizaciones en tiempo real'
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const config = getStatusConfig(connectionState);
|
||||
|
||||
return (
|
||||
<div className={`inline-flex items-center px-3 py-2 rounded-md border text-sm font-medium ${config.color}`}>
|
||||
<span className="mr-2">{config.icon}</span>
|
||||
<div className="flex flex-col">
|
||||
<span className="font-medium">{config.label}</span>
|
||||
<span className="text-xs opacity-75">{config.description}</span>
|
||||
</div>
|
||||
|
||||
{connectionState.status === 'connecting' && (
|
||||
<div className="ml-2">
|
||||
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
||||
</svg>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
359
frontend/src/hooks/useAlertStream.ts
Normal file
359
frontend/src/hooks/useAlertStream.ts
Normal file
@@ -0,0 +1,359 @@
|
||||
// frontend/src/hooks/useAlertStream.ts
|
||||
/**
|
||||
* React hook for managing SSE connection to alert and recommendation stream
|
||||
* Handles connection management, reconnection, and real-time updates
|
||||
*/
|
||||
|
||||
import { useEffect, useState, useCallback, useRef } from 'react';
|
||||
import { AlertItem, ItemSeverity, ItemType, SSEConnectionState, NotificationPermission } from '../types/alerts';
|
||||
import { useAuth } from './useAuth';
|
||||
|
||||
interface UseAlertStreamProps {
|
||||
tenantId: string;
|
||||
autoConnect?: boolean;
|
||||
maxReconnectAttempts?: number;
|
||||
}
|
||||
|
||||
interface UseAlertStreamReturn {
|
||||
items: AlertItem[];
|
||||
connectionState: SSEConnectionState;
|
||||
urgentCount: number;
|
||||
highCount: number;
|
||||
recCount: number;
|
||||
acknowledgeItem: (itemId: string) => Promise<void>;
|
||||
resolveItem: (itemId: string) => Promise<void>;
|
||||
connect: () => void;
|
||||
disconnect: () => void;
|
||||
clearItems: () => void;
|
||||
notificationPermission: NotificationPermission;
|
||||
requestNotificationPermission: () => Promise<NotificationPermission>;
|
||||
}
|
||||
|
||||
export const useAlertStream = ({
|
||||
tenantId,
|
||||
autoConnect = true,
|
||||
maxReconnectAttempts = 10
|
||||
}: UseAlertStreamProps): UseAlertStreamReturn => {
|
||||
const [items, setItems] = useState<AlertItem[]>([]);
|
||||
const [connectionState, setConnectionState] = useState<SSEConnectionState>({
|
||||
status: 'disconnected',
|
||||
reconnectAttempts: 0
|
||||
});
|
||||
const [notificationPermission, setNotificationPermission] = useState<NotificationPermission>('default');
|
||||
|
||||
const eventSourceRef = useRef<EventSource | null>(null);
|
||||
const reconnectTimeoutRef = useRef<NodeJS.Timeout>();
|
||||
const isManuallyDisconnected = useRef(false);
|
||||
const { token } = useAuth();
|
||||
|
||||
// Initialize notification permission state
|
||||
useEffect(() => {
|
||||
if ('Notification' in window) {
|
||||
setNotificationPermission(Notification.permission);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const requestNotificationPermission = useCallback(async (): Promise<NotificationPermission> => {
|
||||
if (!('Notification' in window)) {
|
||||
return 'denied';
|
||||
}
|
||||
|
||||
const permission = await Notification.requestPermission();
|
||||
setNotificationPermission(permission);
|
||||
return permission;
|
||||
}, []);
|
||||
|
||||
const showBrowserNotification = useCallback((item: AlertItem) => {
|
||||
if (notificationPermission !== 'granted') return;
|
||||
|
||||
// Only show notifications for urgent/high alerts, not recommendations
|
||||
if (item.item_type === 'recommendation') return;
|
||||
if (!['urgent', 'high'].includes(item.severity)) return;
|
||||
|
||||
const notification = new Notification(item.title, {
|
||||
body: item.message,
|
||||
icon: '/favicon.ico',
|
||||
badge: '/badge-icon.png',
|
||||
tag: item.id,
|
||||
renotify: true,
|
||||
requireInteraction: item.severity === 'urgent',
|
||||
data: {
|
||||
itemId: item.id,
|
||||
itemType: item.item_type,
|
||||
severity: item.severity
|
||||
}
|
||||
});
|
||||
|
||||
// Auto-close non-urgent notifications after 5 seconds
|
||||
if (item.severity !== 'urgent') {
|
||||
setTimeout(() => notification.close(), 5000);
|
||||
}
|
||||
|
||||
notification.onclick = () => {
|
||||
window.focus();
|
||||
notification.close();
|
||||
// Could navigate to specific alert details
|
||||
};
|
||||
}, [notificationPermission]);
|
||||
|
||||
const playAlertSound = useCallback((severity: ItemSeverity) => {
|
||||
// Only play sounds for urgent alerts
|
||||
if (severity !== 'urgent') return;
|
||||
|
||||
try {
|
||||
const audio = new Audio('/sounds/alert-urgent.mp3');
|
||||
audio.volume = 0.5;
|
||||
audio.play().catch(() => {
|
||||
// Silently fail if audio can't play (user interaction required)
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn('Could not play alert sound:', error);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const addAndSortItems = useCallback((newItem: AlertItem) => {
|
||||
setItems(prev => {
|
||||
// Prevent duplicates
|
||||
if (prev.some(i => i.id === newItem.id)) return prev;
|
||||
|
||||
const updated = [newItem, ...prev];
|
||||
|
||||
// Sort by severity weight, then by timestamp
|
||||
const severityWeight = { urgent: 4, high: 3, medium: 2, low: 1 };
|
||||
|
||||
return updated.sort((a, b) => {
|
||||
const weightDiff = severityWeight[b.severity] - severityWeight[a.severity];
|
||||
if (weightDiff !== 0) return weightDiff;
|
||||
return new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime();
|
||||
}).slice(0, 100); // Keep only latest 100 items
|
||||
});
|
||||
}, []);
|
||||
|
||||
const connect = useCallback(() => {
|
||||
if (!token || !tenantId) {
|
||||
console.warn('Cannot connect to alert stream: missing token or tenantId');
|
||||
return;
|
||||
}
|
||||
|
||||
// Clean up existing connection
|
||||
if (eventSourceRef.current) {
|
||||
eventSourceRef.current.close();
|
||||
}
|
||||
|
||||
isManuallyDisconnected.current = false;
|
||||
setConnectionState(prev => ({ ...prev, status: 'connecting' }));
|
||||
|
||||
// Create SSE connection
|
||||
const url = `${process.env.REACT_APP_NOTIFICATION_SERVICE_URL || 'http://localhost:8002'}/api/v1/sse/alerts/stream/${tenantId}`;
|
||||
|
||||
const eventSource = new EventSource(url, {
|
||||
withCredentials: true
|
||||
});
|
||||
|
||||
// Add auth header (if supported by browser)
|
||||
if ('headers' in eventSource) {
|
||||
(eventSource as any).headers = {
|
||||
'Authorization': `Bearer ${token}`
|
||||
};
|
||||
}
|
||||
|
||||
eventSource.onopen = () => {
|
||||
setConnectionState(prev => ({
|
||||
...prev,
|
||||
status: 'connected',
|
||||
lastConnected: new Date(),
|
||||
reconnectAttempts: 0
|
||||
}));
|
||||
console.log('Alert stream connected');
|
||||
};
|
||||
|
||||
eventSource.addEventListener('connected', (event) => {
|
||||
console.log('Alert stream handshake completed:', event.data);
|
||||
});
|
||||
|
||||
eventSource.addEventListener('initial_items', (event) => {
|
||||
try {
|
||||
const initialItems = JSON.parse(event.data);
|
||||
setItems(initialItems);
|
||||
console.log(`Loaded ${initialItems.length} initial items`);
|
||||
} catch (error) {
|
||||
console.error('Error parsing initial items:', error);
|
||||
}
|
||||
});
|
||||
|
||||
eventSource.addEventListener('alert', (event) => {
|
||||
try {
|
||||
const newItem = JSON.parse(event.data);
|
||||
addAndSortItems(newItem);
|
||||
|
||||
// Show browser notification for urgent/high alerts
|
||||
showBrowserNotification(newItem);
|
||||
|
||||
// Play sound for urgent alerts
|
||||
if (newItem.severity === 'urgent') {
|
||||
playAlertSound(newItem.severity);
|
||||
}
|
||||
|
||||
console.log('New alert received:', newItem.type, newItem.severity);
|
||||
} catch (error) {
|
||||
console.error('Error processing alert event:', error);
|
||||
}
|
||||
});
|
||||
|
||||
eventSource.addEventListener('recommendation', (event) => {
|
||||
try {
|
||||
const newItem = JSON.parse(event.data);
|
||||
addAndSortItems(newItem);
|
||||
console.log('New recommendation received:', newItem.type);
|
||||
} catch (error) {
|
||||
console.error('Error processing recommendation event:', error);
|
||||
}
|
||||
});
|
||||
|
||||
eventSource.addEventListener('ping', (event) => {
|
||||
// Handle keepalive pings
|
||||
console.debug('SSE keepalive ping received');
|
||||
});
|
||||
|
||||
eventSource.onerror = (error) => {
|
||||
console.error('SSE error:', error);
|
||||
setConnectionState(prev => ({
|
||||
...prev,
|
||||
status: 'error'
|
||||
}));
|
||||
|
||||
eventSource.close();
|
||||
|
||||
// Attempt reconnection with exponential backoff
|
||||
if (!isManuallyDisconnected.current &&
|
||||
connectionState.reconnectAttempts < maxReconnectAttempts) {
|
||||
|
||||
const backoffTime = Math.min(1000 * Math.pow(2, connectionState.reconnectAttempts), 30000);
|
||||
|
||||
setConnectionState(prev => ({
|
||||
...prev,
|
||||
reconnectAttempts: prev.reconnectAttempts + 1
|
||||
}));
|
||||
|
||||
console.log(`Reconnecting in ${backoffTime}ms (attempt ${connectionState.reconnectAttempts + 1})`);
|
||||
|
||||
reconnectTimeoutRef.current = setTimeout(() => {
|
||||
connect();
|
||||
}, backoffTime);
|
||||
}
|
||||
};
|
||||
|
||||
eventSourceRef.current = eventSource;
|
||||
}, [token, tenantId, connectionState.reconnectAttempts, maxReconnectAttempts, addAndSortItems, showBrowserNotification, playAlertSound]);
|
||||
|
||||
const disconnect = useCallback(() => {
|
||||
isManuallyDisconnected.current = true;
|
||||
|
||||
if (eventSourceRef.current) {
|
||||
eventSourceRef.current.close();
|
||||
eventSourceRef.current = null;
|
||||
}
|
||||
|
||||
if (reconnectTimeoutRef.current) {
|
||||
clearTimeout(reconnectTimeoutRef.current);
|
||||
}
|
||||
|
||||
setConnectionState({
|
||||
status: 'disconnected',
|
||||
reconnectAttempts: 0
|
||||
});
|
||||
}, []);
|
||||
|
||||
const acknowledgeItem = useCallback(async (itemId: string) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.REACT_APP_NOTIFICATION_SERVICE_URL || 'http://localhost:8002'}/api/v1/sse/items/${itemId}/acknowledge`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if (response.ok) {
|
||||
setItems(prev => prev.map(item =>
|
||||
item.id === itemId
|
||||
? { ...item, status: 'acknowledged' as const, acknowledged_at: new Date().toISOString() }
|
||||
: item
|
||||
));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to acknowledge item:', error);
|
||||
}
|
||||
}, [token]);
|
||||
|
||||
const resolveItem = useCallback(async (itemId: string) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.REACT_APP_NOTIFICATION_SERVICE_URL || 'http://localhost:8002'}/api/v1/sse/items/${itemId}/resolve`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if (response.ok) {
|
||||
setItems(prev => prev.map(item =>
|
||||
item.id === itemId
|
||||
? { ...item, status: 'resolved' as const, resolved_at: new Date().toISOString() }
|
||||
: item
|
||||
));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to resolve item:', error);
|
||||
}
|
||||
}, [token]);
|
||||
|
||||
const clearItems = useCallback(() => {
|
||||
setItems([]);
|
||||
}, []);
|
||||
|
||||
// Auto-connect on mount if enabled
|
||||
useEffect(() => {
|
||||
if (autoConnect && token && tenantId) {
|
||||
connect();
|
||||
}
|
||||
|
||||
return () => {
|
||||
disconnect();
|
||||
};
|
||||
}, [autoConnect, token, tenantId]); // Don't include connect/disconnect to avoid loops
|
||||
|
||||
// Calculate counts
|
||||
const urgentCount = items.filter(i =>
|
||||
i.severity === 'urgent' && i.status === 'active' && i.item_type === 'alert'
|
||||
).length;
|
||||
|
||||
const highCount = items.filter(i =>
|
||||
i.severity === 'high' && i.status === 'active' && i.item_type === 'alert'
|
||||
).length;
|
||||
|
||||
const recCount = items.filter(i =>
|
||||
i.item_type === 'recommendation' && i.status === 'active'
|
||||
).length;
|
||||
|
||||
return {
|
||||
items,
|
||||
connectionState,
|
||||
urgentCount,
|
||||
highCount,
|
||||
recCount,
|
||||
acknowledgeItem,
|
||||
resolveItem,
|
||||
connect,
|
||||
disconnect,
|
||||
clearItems,
|
||||
notificationPermission,
|
||||
requestNotificationPermission
|
||||
};
|
||||
};
|
||||
126
frontend/src/types/alerts.ts
Normal file
126
frontend/src/types/alerts.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
// frontend/src/types/alerts.ts
|
||||
/**
|
||||
* TypeScript types for the unified alert and recommendation system
|
||||
*/
|
||||
|
||||
export type ItemType = 'alert' | 'recommendation';
|
||||
|
||||
export type ItemSeverity = 'urgent' | 'high' | 'medium' | 'low';
|
||||
|
||||
export type ItemStatus = 'active' | 'acknowledged' | 'resolved';
|
||||
|
||||
export interface AlertItem {
|
||||
id: string;
|
||||
tenant_id: string;
|
||||
item_type: ItemType;
|
||||
type: string; // Specific alert/recommendation type
|
||||
severity: ItemSeverity;
|
||||
status: ItemStatus;
|
||||
service: string;
|
||||
title: string;
|
||||
message: string;
|
||||
actions: string[];
|
||||
metadata: Record<string, any>;
|
||||
created_at: string;
|
||||
acknowledged_at?: string;
|
||||
acknowledged_by?: string;
|
||||
resolved_at?: string;
|
||||
resolved_by?: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface SSEEvent {
|
||||
event: string;
|
||||
data: string;
|
||||
id?: string;
|
||||
}
|
||||
|
||||
export interface ItemFilters {
|
||||
item_type: ItemType | 'all';
|
||||
severity: ItemSeverity | 'all';
|
||||
status: ItemStatus | 'all';
|
||||
service: string | 'all';
|
||||
search: string;
|
||||
}
|
||||
|
||||
export interface ItemCounts {
|
||||
total: number;
|
||||
alerts: {
|
||||
urgent: number;
|
||||
high: number;
|
||||
medium: number;
|
||||
low: number;
|
||||
};
|
||||
recommendations: {
|
||||
high: number;
|
||||
medium: number;
|
||||
low: number;
|
||||
};
|
||||
by_status: {
|
||||
active: number;
|
||||
acknowledged: number;
|
||||
resolved: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface NotificationSettings {
|
||||
browser_notifications: boolean;
|
||||
sound_enabled: boolean;
|
||||
auto_acknowledge_timeout: number; // minutes
|
||||
show_recommendations: boolean;
|
||||
urgent_only: boolean;
|
||||
}
|
||||
|
||||
export interface SSEConnectionState {
|
||||
status: 'connecting' | 'connected' | 'disconnected' | 'error';
|
||||
lastConnected?: Date;
|
||||
reconnectAttempts: number;
|
||||
latency?: number;
|
||||
}
|
||||
|
||||
// Notification permission states
|
||||
export type NotificationPermission = 'default' | 'granted' | 'denied';
|
||||
|
||||
// UI state
|
||||
export interface AlertUIState {
|
||||
filters: ItemFilters;
|
||||
selectedItems: string[];
|
||||
sortBy: 'created_at' | 'severity' | 'type';
|
||||
sortOrder: 'asc' | 'desc';
|
||||
viewMode: 'list' | 'grid' | 'compact';
|
||||
sidebarOpen: boolean;
|
||||
bulkActionsOpen: boolean;
|
||||
}
|
||||
|
||||
// Action types for alert responses
|
||||
export interface AlertAction {
|
||||
id: string;
|
||||
label: string;
|
||||
type: 'acknowledge' | 'resolve' | 'custom';
|
||||
icon?: string;
|
||||
variant?: 'primary' | 'secondary' | 'danger';
|
||||
requires_confirmation?: boolean;
|
||||
}
|
||||
|
||||
// Metrics for dashboard
|
||||
export interface AlertMetrics {
|
||||
response_time_avg: number; // seconds
|
||||
false_positive_rate: number;
|
||||
recommendation_adoption_rate: number;
|
||||
items_last_24h: number;
|
||||
top_alert_types: Array<{
|
||||
type: string;
|
||||
count: number;
|
||||
}>;
|
||||
service_health: Record<string, boolean>;
|
||||
}
|
||||
|
||||
// Template for creating new alerts (development/testing)
|
||||
export interface AlertTemplate {
|
||||
type: string;
|
||||
severity: ItemSeverity;
|
||||
title: string;
|
||||
message: string;
|
||||
actions: string[];
|
||||
metadata?: Record<string, any>;
|
||||
}
|
||||
@@ -0,0 +1,644 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Comprehensive monitoring dashboard for the Bakery Alert and Recommendation System",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(alert_items_published_total[5m])",
|
||||
"interval": "",
|
||||
"legendFormat": "{{item_type}} - {{severity}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Alert/Recommendation Publishing Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true,
|
||||
"text": {}
|
||||
},
|
||||
"pluginVersion": "8.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(alert_sse_active_connections)",
|
||||
"interval": "",
|
||||
"legendFormat": "Active SSE Connections",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Active SSE Connections",
|
||||
"type": "gauge"
|
||||
},
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "right"
|
||||
},
|
||||
"pieType": "pie",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (item_type) (alert_items_published_total)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{item_type}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Items by Type",
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "right"
|
||||
},
|
||||
"pieType": "pie",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (severity) (alert_items_published_total)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{severity}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Items by Severity",
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(alert_notifications_sent_total[5m])",
|
||||
"interval": "",
|
||||
"legendFormat": "{{channel}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Notification Delivery Rate by Channel",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, rate(alert_processing_duration_seconds_bucket[5m]))",
|
||||
"interval": "",
|
||||
"legendFormat": "95th percentile",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, rate(alert_processing_duration_seconds_bucket[5m]))",
|
||||
"interval": "",
|
||||
"legendFormat": "50th percentile (median)",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Processing Duration",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(alert_processing_errors_total[5m])",
|
||||
"interval": "",
|
||||
"legendFormat": "{{error_type}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(alert_delivery_failures_total[5m])",
|
||||
"interval": "",
|
||||
"legendFormat": "Delivery: {{channel}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Error Rates",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Health"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.displayMode",
|
||||
"value": "color-background"
|
||||
},
|
||||
{
|
||||
"id": "mappings",
|
||||
"value": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "red",
|
||||
"index": 0,
|
||||
"text": "Unhealthy"
|
||||
},
|
||||
"1": {
|
||||
"color": "green",
|
||||
"index": 1,
|
||||
"text": "Healthy"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "8.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "alert_system_component_health",
|
||||
"format": "table",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "System Component Health",
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"__name__": true,
|
||||
"instance": true,
|
||||
"job": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"Value": "Health",
|
||||
"component": "Component",
|
||||
"service": "Service"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"bakery",
|
||||
"alerts",
|
||||
"recommendations",
|
||||
"monitoring"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "Europe/Madrid",
|
||||
"title": "Bakery Alert & Recommendation System",
|
||||
"uid": "bakery-alert-system",
|
||||
"version": 1
|
||||
}
|
||||
@@ -0,0 +1,243 @@
|
||||
# infrastructure/monitoring/prometheus/rules/alert-system-rules.yml
|
||||
# Prometheus alerting rules for the Bakery Alert and Recommendation System
|
||||
|
||||
groups:
|
||||
- name: alert_system_health
|
||||
rules:
|
||||
# System component health alerts
|
||||
- alert: AlertSystemComponentDown
|
||||
expr: alert_system_component_health == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
service: "{{ $labels.service }}"
|
||||
component: "{{ $labels.component }}"
|
||||
annotations:
|
||||
summary: "Alert system component {{ $labels.component }} is unhealthy"
|
||||
description: "Component {{ $labels.component }} in service {{ $labels.service }} has been unhealthy for more than 2 minutes."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#component-health"
|
||||
|
||||
# Connection health alerts
|
||||
- alert: RabbitMQConnectionDown
|
||||
expr: alert_rabbitmq_connection_status == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: "{{ $labels.service }}"
|
||||
annotations:
|
||||
summary: "RabbitMQ connection down for {{ $labels.service }}"
|
||||
description: "Service {{ $labels.service }} has lost connection to RabbitMQ for more than 1 minute."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#rabbitmq-connection"
|
||||
|
||||
- alert: RedisConnectionDown
|
||||
expr: alert_redis_connection_status == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: "{{ $labels.service }}"
|
||||
annotations:
|
||||
summary: "Redis connection down for {{ $labels.service }}"
|
||||
description: "Service {{ $labels.service }} has lost connection to Redis for more than 1 minute."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#redis-connection"
|
||||
|
||||
# Leader election issues
|
||||
- alert: NoSchedulerLeader
|
||||
expr: sum(alert_scheduler_leader_status) == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "No scheduler leader elected"
|
||||
description: "No service has been elected as scheduler leader for more than 5 minutes. Scheduled checks may not be running."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#leader-election"
|
||||
|
||||
- name: alert_system_performance
|
||||
rules:
|
||||
# High error rates
|
||||
- alert: HighAlertProcessingErrorRate
|
||||
expr: rate(alert_processing_errors_total[5m]) > 0.1
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High alert processing error rate"
|
||||
description: "Alert processing error rate is {{ $value | humanizePercentage }} over the last 5 minutes."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#processing-errors"
|
||||
|
||||
- alert: HighNotificationDeliveryFailureRate
|
||||
expr: rate(alert_delivery_failures_total[5m]) / rate(alert_notifications_sent_total[5m]) > 0.05
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
channel: "{{ $labels.channel }}"
|
||||
annotations:
|
||||
summary: "High notification delivery failure rate for {{ $labels.channel }}"
|
||||
description: "Notification delivery failure rate for {{ $labels.channel }} is {{ $value | humanizePercentage }} over the last 5 minutes."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#delivery-failures"
|
||||
|
||||
# Processing latency
|
||||
- alert: HighAlertProcessingLatency
|
||||
expr: histogram_quantile(0.95, rate(alert_processing_duration_seconds_bucket[5m])) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High alert processing latency"
|
||||
description: "95th percentile alert processing latency is {{ $value }}s, exceeding 5s threshold."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#processing-latency"
|
||||
|
||||
# SSE connection issues
|
||||
- alert: TooManySSEConnections
|
||||
expr: sum(alert_sse_active_connections) > 1000
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Too many active SSE connections"
|
||||
description: "Number of active SSE connections ({{ $value }}) exceeds 1000. This may impact performance."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#sse-connections"
|
||||
|
||||
- alert: SSEConnectionErrors
|
||||
expr: rate(alert_sse_connection_errors_total[5m]) > 0.5
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High SSE connection error rate"
|
||||
description: "SSE connection error rate is {{ $value }} errors/second over the last 5 minutes."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#sse-errors"
|
||||
|
||||
- name: alert_system_business
|
||||
rules:
|
||||
# Alert volume anomalies
|
||||
- alert: UnusuallyHighAlertVolume
|
||||
expr: rate(alert_items_published_total{item_type="alert"}[10m]) > 2
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: "{{ $labels.service }}"
|
||||
annotations:
|
||||
summary: "Unusually high alert volume from {{ $labels.service }}"
|
||||
description: "Service {{ $labels.service }} is generating alerts at {{ $value }} alerts/second, which is above normal levels."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#high-volume"
|
||||
|
||||
- alert: NoAlertsGenerated
|
||||
expr: rate(alert_items_published_total[30m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "No alerts generated recently"
|
||||
description: "No alerts have been generated in the last 30 minutes. This may indicate a problem with detection systems."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#no-alerts"
|
||||
|
||||
# Response time issues
|
||||
- alert: SlowAlertResponseTime
|
||||
expr: histogram_quantile(0.95, rate(alert_item_response_time_seconds_bucket[1h])) > 3600
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Slow alert response times"
|
||||
description: "95th percentile alert response time is {{ $value | humanizeDuration }}, exceeding 1 hour."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#response-times"
|
||||
|
||||
# Critical alerts not acknowledged
|
||||
- alert: CriticalAlertsUnacknowledged
|
||||
expr: sum(alert_active_items_current{item_type="alert",severity="urgent"}) > 5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Multiple critical alerts unacknowledged"
|
||||
description: "{{ $value }} critical alerts remain unacknowledged for more than 10 minutes."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#critical-unacked"
|
||||
|
||||
- name: alert_system_capacity
|
||||
rules:
|
||||
# Queue size monitoring
|
||||
- alert: LargeSSEMessageQueues
|
||||
expr: alert_sse_message_queue_size > 100
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
tenant_id: "{{ $labels.tenant_id }}"
|
||||
annotations:
|
||||
summary: "Large SSE message queue for tenant {{ $labels.tenant_id }}"
|
||||
description: "SSE message queue for tenant {{ $labels.tenant_id }} has {{ $value }} messages, indicating potential client issues."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#sse-queues"
|
||||
|
||||
# Database storage issues
|
||||
- alert: SlowDatabaseStorage
|
||||
expr: histogram_quantile(0.95, rate(alert_database_storage_duration_seconds_bucket[5m])) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Slow database storage for alerts"
|
||||
description: "95th percentile database storage time is {{ $value }}s, exceeding 1s threshold."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#database-storage"
|
||||
|
||||
- name: alert_system_effectiveness
|
||||
rules:
|
||||
# False positive rate monitoring
|
||||
- alert: HighFalsePositiveRate
|
||||
expr: alert_false_positive_rate > 0.2
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
service: "{{ $labels.service }}"
|
||||
alert_type: "{{ $labels.alert_type }}"
|
||||
annotations:
|
||||
summary: "High false positive rate for {{ $labels.alert_type }}"
|
||||
description: "False positive rate for {{ $labels.alert_type }} in {{ $labels.service }} is {{ $value | humanizePercentage }}."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#false-positives"
|
||||
|
||||
# Low recommendation adoption
|
||||
- alert: LowRecommendationAdoption
|
||||
expr: rate(alert_recommendations_implemented_total[24h]) / rate(alert_items_published_total{item_type="recommendation"}[24h]) < 0.1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: info
|
||||
service: "{{ $labels.service }}"
|
||||
annotations:
|
||||
summary: "Low recommendation adoption rate"
|
||||
description: "Recommendation adoption rate for {{ $labels.service }} is {{ $value | humanizePercentage }} over the last 24 hours."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#recommendation-adoption"
|
||||
|
||||
# Additional alerting rules for specific scenarios
|
||||
- name: alert_system_critical_scenarios
|
||||
rules:
|
||||
# Complete system failure
|
||||
- alert: AlertSystemDown
|
||||
expr: up{job=~"alert-processor|notification-service"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: "{{ $labels.job }}"
|
||||
annotations:
|
||||
summary: "Alert system service {{ $labels.job }} is down"
|
||||
description: "Critical alert system service {{ $labels.job }} has been down for more than 1 minute."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#service-down"
|
||||
|
||||
# Data loss prevention
|
||||
- alert: AlertDataNotPersisted
|
||||
expr: rate(alert_items_processed_total[5m]) > 0 and rate(alert_database_storage_duration_seconds_count[5m]) == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Alert data not being persisted to database"
|
||||
description: "Alerts are being processed but not stored in database, potential data loss."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#data-persistence"
|
||||
|
||||
# Notification blackhole
|
||||
- alert: NotificationsNotDelivered
|
||||
expr: rate(alert_items_processed_total[5m]) > 0 and rate(alert_notifications_sent_total[5m]) == 0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Notifications not being delivered"
|
||||
description: "Alerts are being processed but no notifications are being sent."
|
||||
runbook_url: "https://docs.bakery.local/runbooks/alert-system#notification-delivery"
|
||||
197
migrations/001_create_alert_tables.sql
Normal file
197
migrations/001_create_alert_tables.sql
Normal file
@@ -0,0 +1,197 @@
|
||||
-- migrations/001_create_alert_tables.sql
|
||||
-- Database schema for unified alerts and recommendations system
|
||||
|
||||
-- Main alerts table (stores both alerts and recommendations)
|
||||
CREATE TABLE IF NOT EXISTS alerts (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
item_type VARCHAR(20) NOT NULL DEFAULT 'alert' CHECK (item_type IN ('alert', 'recommendation')),
|
||||
alert_type VARCHAR(50) NOT NULL, -- Specific type like 'critical_stock_shortage', 'inventory_optimization'
|
||||
severity VARCHAR(20) NOT NULL CHECK (severity IN ('urgent', 'high', 'medium', 'low')),
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'acknowledged', 'resolved')),
|
||||
service VARCHAR(50) NOT NULL,
|
||||
|
||||
title VARCHAR(255) NOT NULL,
|
||||
message TEXT NOT NULL,
|
||||
actions JSONB DEFAULT '[]',
|
||||
metadata JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
acknowledged_at TIMESTAMP WITH TIME ZONE,
|
||||
acknowledged_by UUID,
|
||||
resolved_at TIMESTAMP WITH TIME ZONE,
|
||||
resolved_by UUID,
|
||||
|
||||
-- Add severity weight for sorting
|
||||
severity_weight INT GENERATED ALWAYS AS (
|
||||
CASE severity
|
||||
WHEN 'urgent' THEN 4
|
||||
WHEN 'high' THEN 3
|
||||
WHEN 'medium' THEN 2
|
||||
WHEN 'low' THEN 1
|
||||
END
|
||||
) STORED
|
||||
);
|
||||
|
||||
-- Indexes for performance
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_tenant_status ON alerts(tenant_id, status);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_created_at ON alerts(created_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_severity ON alerts(severity_weight DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_tenant_active ON alerts(tenant_id, status) WHERE status = 'active';
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_item_type ON alerts(item_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_service ON alerts(service);
|
||||
|
||||
-- Composite index for common queries
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_tenant_type_status ON alerts(tenant_id, item_type, status);
|
||||
|
||||
-- Alert history for audit trail (applies to both alerts and recommendations)
|
||||
CREATE TABLE IF NOT EXISTS alert_history (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
alert_id UUID REFERENCES alerts(id) ON DELETE CASCADE,
|
||||
tenant_id UUID NOT NULL,
|
||||
action VARCHAR(50) NOT NULL,
|
||||
performed_by UUID,
|
||||
performed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
details JSONB DEFAULT '{}'
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_alert_history_alert ON alert_history(alert_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_alert_history_tenant ON alert_history(tenant_id);
|
||||
|
||||
-- Database triggers for immediate alerts (recommendations typically not triggered this way)
|
||||
-- Stock critical trigger
|
||||
CREATE OR REPLACE FUNCTION notify_stock_critical()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
-- Only trigger for alerts, not recommendations
|
||||
IF NEW.current_stock < NEW.minimum_stock AND
|
||||
OLD.current_stock >= OLD.minimum_stock THEN
|
||||
PERFORM pg_notify(
|
||||
'stock_alerts',
|
||||
json_build_object(
|
||||
'tenant_id', NEW.tenant_id,
|
||||
'ingredient_id', NEW.id,
|
||||
'name', NEW.name,
|
||||
'current_stock', NEW.current_stock,
|
||||
'minimum_stock', NEW.minimum_stock,
|
||||
'alert_type', 'critical_stock_shortage'
|
||||
)::text
|
||||
);
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Temperature breach trigger
|
||||
CREATE OR REPLACE FUNCTION notify_temperature_breach()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
IF NEW.temperature > NEW.max_threshold AND
|
||||
NEW.breach_duration_minutes > 30 THEN
|
||||
PERFORM pg_notify(
|
||||
'temperature_alerts',
|
||||
json_build_object(
|
||||
'tenant_id', NEW.tenant_id,
|
||||
'sensor_id', NEW.sensor_id,
|
||||
'location', NEW.location,
|
||||
'temperature', NEW.temperature,
|
||||
'duration', NEW.breach_duration_minutes,
|
||||
'alert_type', 'temperature_breach'
|
||||
)::text
|
||||
);
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Production delay trigger
|
||||
CREATE OR REPLACE FUNCTION notify_production_delay()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
IF NEW.status = 'delayed' AND OLD.status != 'delayed' THEN
|
||||
PERFORM pg_notify(
|
||||
'production_alerts',
|
||||
json_build_object(
|
||||
'tenant_id', NEW.tenant_id,
|
||||
'batch_id', NEW.id,
|
||||
'product_name', NEW.product_name,
|
||||
'planned_completion', NEW.planned_completion_time,
|
||||
'delay_minutes', EXTRACT(EPOCH FROM (NOW() - NEW.planned_completion_time))/60,
|
||||
'alert_type', 'production_delay'
|
||||
)::text
|
||||
);
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create placeholder tables for triggers (these would exist in respective services)
|
||||
-- This is just for reference - actual tables should be in service-specific migrations
|
||||
|
||||
-- Inventory items table structure (for reference)
|
||||
CREATE TABLE IF NOT EXISTS inventory_items (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
current_stock DECIMAL(10,2) DEFAULT 0,
|
||||
minimum_stock DECIMAL(10,2) DEFAULT 0,
|
||||
maximum_stock DECIMAL(10,2),
|
||||
unit VARCHAR(50) DEFAULT 'kg',
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Temperature readings table structure (for reference)
|
||||
CREATE TABLE IF NOT EXISTS temperature_readings (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
sensor_id VARCHAR(100) NOT NULL,
|
||||
location VARCHAR(255) NOT NULL,
|
||||
temperature DECIMAL(5,2) NOT NULL,
|
||||
max_threshold DECIMAL(5,2) DEFAULT 25.0,
|
||||
breach_duration_minutes INT DEFAULT 0,
|
||||
recorded_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Production batches table structure (for reference)
|
||||
CREATE TABLE IF NOT EXISTS production_batches (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
tenant_id UUID NOT NULL,
|
||||
product_name VARCHAR(255) NOT NULL,
|
||||
status VARCHAR(50) DEFAULT 'planned',
|
||||
planned_completion_time TIMESTAMP WITH TIME ZONE,
|
||||
actual_completion_time TIMESTAMP WITH TIME ZONE,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Apply triggers (only if tables exist)
|
||||
DO $$
|
||||
BEGIN
|
||||
-- Stock critical trigger
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'inventory_items') THEN
|
||||
DROP TRIGGER IF EXISTS stock_critical_trigger ON inventory_items;
|
||||
CREATE TRIGGER stock_critical_trigger
|
||||
AFTER UPDATE ON inventory_items
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION notify_stock_critical();
|
||||
END IF;
|
||||
|
||||
-- Temperature breach trigger
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'temperature_readings') THEN
|
||||
DROP TRIGGER IF EXISTS temperature_breach_trigger ON temperature_readings;
|
||||
CREATE TRIGGER temperature_breach_trigger
|
||||
AFTER INSERT OR UPDATE ON temperature_readings
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION notify_temperature_breach();
|
||||
END IF;
|
||||
|
||||
-- Production delay trigger
|
||||
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'production_batches') THEN
|
||||
DROP TRIGGER IF EXISTS production_delay_trigger ON production_batches;
|
||||
CREATE TRIGGER production_delay_trigger
|
||||
AFTER UPDATE ON production_batches
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION notify_production_delay();
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
26
services/alert_processor/Dockerfile
Normal file
26
services/alert_processor/Dockerfile
Normal file
@@ -0,0 +1,26 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements and install dependencies
|
||||
COPY services/alert_processor/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy shared libraries
|
||||
COPY shared/ /app/shared/
|
||||
|
||||
# Copy application code
|
||||
COPY services/alert_processor/app/ /app/app/
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["python", "-m", "app.main"]
|
||||
1
services/alert_processor/app/__init__.py
Normal file
1
services/alert_processor/app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Alert Processor Service
|
||||
49
services/alert_processor/app/config.py
Normal file
49
services/alert_processor/app/config.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# services/alert_processor/app/config.py
|
||||
"""
|
||||
Alert Processor Service Configuration
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
from shared.config.base import BaseServiceSettings
|
||||
|
||||
class AlertProcessorConfig(BaseServiceSettings):
|
||||
"""Configuration for Alert Processor Service"""
|
||||
SERVICE_NAME: str = "alert-processor"
|
||||
APP_NAME: str = "Alert Processor Service"
|
||||
DESCRIPTION: str = "Central alert and recommendation processor"
|
||||
|
||||
# Use the notification database for alert storage
|
||||
# This makes sense since alerts and notifications are closely related
|
||||
DATABASE_URL: str = os.getenv(
|
||||
"NOTIFICATION_DATABASE_URL",
|
||||
"postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db"
|
||||
)
|
||||
|
||||
# Use dedicated Redis DB for alert processing
|
||||
REDIS_DB: int = int(os.getenv("ALERT_PROCESSOR_REDIS_DB", "6"))
|
||||
|
||||
# Alert processing configuration
|
||||
BATCH_SIZE: int = int(os.getenv("ALERT_BATCH_SIZE", "10"))
|
||||
PROCESSING_TIMEOUT: int = int(os.getenv("ALERT_PROCESSING_TIMEOUT", "30"))
|
||||
|
||||
# Deduplication settings
|
||||
ALERT_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("ALERT_DEDUPLICATION_WINDOW_MINUTES", "15"))
|
||||
RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES", "60"))
|
||||
|
||||
# Alert severity channel mappings (hardcoded for now to avoid config parsing issues)
|
||||
@property
|
||||
def urgent_channels(self) -> List[str]:
|
||||
return ["whatsapp", "email", "push", "dashboard"]
|
||||
|
||||
@property
|
||||
def high_channels(self) -> List[str]:
|
||||
return ["whatsapp", "email", "dashboard"]
|
||||
|
||||
@property
|
||||
def medium_channels(self) -> List[str]:
|
||||
return ["email", "dashboard"]
|
||||
|
||||
@property
|
||||
def low_channels(self) -> List[str]:
|
||||
return ["dashboard"]
|
||||
360
services/alert_processor/app/main.py
Normal file
360
services/alert_processor/app/main.py
Normal file
@@ -0,0 +1,360 @@
|
||||
# services/alert_processor/app/main.py
|
||||
"""
|
||||
Alert Processor Service - Central hub for processing alerts and recommendations
|
||||
Consumes from RabbitMQ, stores in database, and routes to notification service
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import signal
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
import structlog
|
||||
import redis.asyncio as aioredis
|
||||
from aio_pika import connect_robust, IncomingMessage, ExchangeType
|
||||
|
||||
from app.config import AlertProcessorConfig
|
||||
from shared.database.base import create_database_manager
|
||||
from shared.clients.base_service_client import BaseServiceClient
|
||||
from shared.config.rabbitmq_config import RABBITMQ_CONFIG
|
||||
|
||||
# Setup logging
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="ISO"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.JSONRenderer()
|
||||
],
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class NotificationServiceClient(BaseServiceClient):
|
||||
"""Client for notification service"""
|
||||
|
||||
def __init__(self, config: AlertProcessorConfig):
|
||||
super().__init__("notification-service", config)
|
||||
self.config = config
|
||||
|
||||
def get_service_base_path(self) -> str:
|
||||
"""Return the base path for notification service APIs"""
|
||||
return "/api/v1"
|
||||
|
||||
async def send_notification(self, tenant_id: str, notification: Dict[str, Any], channels: list) -> Dict[str, Any]:
|
||||
"""Send notification via notification service"""
|
||||
try:
|
||||
response = await self.post(
|
||||
"/api/v1/notifications/send",
|
||||
json={
|
||||
"tenant_id": tenant_id,
|
||||
"notification": notification,
|
||||
"channels": channels
|
||||
}
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error("Failed to send notification", error=str(e), tenant_id=tenant_id)
|
||||
return {"status": "failed", "error": str(e)}
|
||||
|
||||
class AlertProcessorService:
|
||||
"""
|
||||
Central service for processing and routing alerts and recommendations
|
||||
Integrates with notification service for multi-channel delivery
|
||||
"""
|
||||
|
||||
def __init__(self, config: AlertProcessorConfig):
|
||||
self.config = config
|
||||
self.db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
||||
self.notification_client = NotificationServiceClient(config)
|
||||
self.redis = None
|
||||
self.connection = None
|
||||
self.channel = None
|
||||
self.running = False
|
||||
|
||||
# Metrics
|
||||
self.items_processed = 0
|
||||
self.items_stored = 0
|
||||
self.notifications_sent = 0
|
||||
self.errors_count = 0
|
||||
|
||||
async def start(self):
|
||||
"""Start the alert processor service"""
|
||||
try:
|
||||
logger.info("Starting Alert Processor Service")
|
||||
|
||||
# Connect to Redis for SSE publishing
|
||||
self.redis = aioredis.from_url(self.config.REDIS_URL)
|
||||
logger.info("Connected to Redis")
|
||||
|
||||
# Connect to RabbitMQ
|
||||
await self._setup_rabbitmq()
|
||||
|
||||
# Start consuming messages
|
||||
await self._start_consuming()
|
||||
|
||||
self.running = True
|
||||
logger.info("Alert Processor Service started successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to start Alert Processor Service", error=str(e))
|
||||
raise
|
||||
|
||||
async def _setup_rabbitmq(self):
|
||||
"""Setup RabbitMQ connection and configuration"""
|
||||
self.connection = await connect_robust(
|
||||
self.config.RABBITMQ_URL,
|
||||
heartbeat=30,
|
||||
connection_attempts=5
|
||||
)
|
||||
self.channel = await self.connection.channel()
|
||||
await self.channel.set_qos(prefetch_count=10) # Process 10 messages at a time
|
||||
|
||||
# Setup exchange and queue based on config
|
||||
exchange_config = RABBITMQ_CONFIG["exchanges"]["alerts"]
|
||||
self.exchange = await self.channel.declare_exchange(
|
||||
exchange_config["name"],
|
||||
getattr(ExchangeType, exchange_config["type"].upper()),
|
||||
durable=exchange_config["durable"]
|
||||
)
|
||||
|
||||
queue_config = RABBITMQ_CONFIG["queues"]["alert_processing"]
|
||||
self.queue = await self.channel.declare_queue(
|
||||
queue_config["name"],
|
||||
durable=queue_config["durable"],
|
||||
arguments=queue_config["arguments"]
|
||||
)
|
||||
|
||||
# Bind to all alert and recommendation routing keys
|
||||
await self.queue.bind(self.exchange, routing_key="*.*.*")
|
||||
|
||||
logger.info("RabbitMQ setup completed")
|
||||
|
||||
async def _start_consuming(self):
|
||||
"""Start consuming messages from RabbitMQ"""
|
||||
await self.queue.consume(self.process_item)
|
||||
logger.info("Started consuming alert messages")
|
||||
|
||||
async def process_item(self, message: IncomingMessage):
|
||||
"""Process incoming alert or recommendation"""
|
||||
async with message.process():
|
||||
try:
|
||||
# Parse message
|
||||
item = json.loads(message.body.decode())
|
||||
|
||||
logger.info("Processing item",
|
||||
item_type=item.get('item_type'),
|
||||
alert_type=item.get('type'),
|
||||
severity=item.get('severity'),
|
||||
tenant_id=item.get('tenant_id'))
|
||||
|
||||
# Store in database
|
||||
stored_item = await self.store_item(item)
|
||||
self.items_stored += 1
|
||||
|
||||
# Determine delivery channels based on severity and type
|
||||
channels = self.get_channels_by_severity_and_type(
|
||||
item['severity'],
|
||||
item['item_type']
|
||||
)
|
||||
|
||||
# Send via notification service if channels are specified
|
||||
if channels:
|
||||
notification_result = await self.notification_client.send_notification(
|
||||
tenant_id=item['tenant_id'],
|
||||
notification={
|
||||
'type': item['item_type'], # 'alert' or 'recommendation'
|
||||
'id': item['id'],
|
||||
'title': item['title'],
|
||||
'message': item['message'],
|
||||
'severity': item['severity'],
|
||||
'metadata': item.get('metadata', {}),
|
||||
'actions': item.get('actions', []),
|
||||
'email': item.get('email'),
|
||||
'phone': item.get('phone'),
|
||||
'user_id': item.get('user_id')
|
||||
},
|
||||
channels=channels
|
||||
)
|
||||
|
||||
if notification_result.get('status') == 'success':
|
||||
self.notifications_sent += 1
|
||||
|
||||
# Stream to SSE for real-time dashboard (always)
|
||||
await self.stream_to_sse(item['tenant_id'], stored_item)
|
||||
|
||||
self.items_processed += 1
|
||||
|
||||
logger.info("Item processed successfully",
|
||||
item_id=item['id'],
|
||||
channels=len(channels))
|
||||
|
||||
except Exception as e:
|
||||
self.errors_count += 1
|
||||
logger.error("Item processing failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def store_item(self, item: dict) -> dict:
|
||||
"""Store alert or recommendation in database"""
|
||||
from sqlalchemy import text
|
||||
|
||||
query = text("""
|
||||
INSERT INTO alerts (
|
||||
id, tenant_id, item_type, alert_type, severity, status,
|
||||
service, title, message, actions, metadata,
|
||||
created_at
|
||||
) VALUES (:id, :tenant_id, :item_type, :alert_type, :severity, :status,
|
||||
:service, :title, :message, :actions, :metadata, :created_at)
|
||||
RETURNING *
|
||||
""")
|
||||
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(
|
||||
query,
|
||||
{
|
||||
'id': item['id'],
|
||||
'tenant_id': item['tenant_id'],
|
||||
'item_type': item['item_type'], # 'alert' or 'recommendation'
|
||||
'alert_type': item['type'],
|
||||
'severity': item['severity'],
|
||||
'status': 'active',
|
||||
'service': item['service'],
|
||||
'title': item['title'],
|
||||
'message': item['message'],
|
||||
'actions': json.dumps(item.get('actions', [])),
|
||||
'metadata': json.dumps(item.get('metadata', {})),
|
||||
'created_at': item['timestamp']
|
||||
}
|
||||
)
|
||||
|
||||
row = result.fetchone()
|
||||
await session.commit()
|
||||
|
||||
logger.debug("Item stored in database", item_id=item['id'])
|
||||
return dict(row._mapping)
|
||||
|
||||
async def stream_to_sse(self, tenant_id: str, item: dict):
|
||||
"""Publish item to Redis for SSE streaming"""
|
||||
channel = f"alerts:{tenant_id}"
|
||||
|
||||
# Prepare message for SSE
|
||||
sse_message = {
|
||||
'id': item['id'],
|
||||
'item_type': item['item_type'],
|
||||
'type': item['alert_type'],
|
||||
'severity': item['severity'],
|
||||
'title': item['title'],
|
||||
'message': item['message'],
|
||||
'actions': json.loads(item['actions']) if isinstance(item['actions'], str) else item['actions'],
|
||||
'metadata': json.loads(item['metadata']) if isinstance(item['metadata'], str) else item['metadata'],
|
||||
'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at'],
|
||||
'status': item['status']
|
||||
}
|
||||
|
||||
# Publish to Redis channel for SSE
|
||||
await self.redis.publish(channel, json.dumps(sse_message))
|
||||
|
||||
logger.debug("Item published to SSE", tenant_id=tenant_id, item_id=item['id'])
|
||||
|
||||
def get_channels_by_severity_and_type(self, severity: str, item_type: str) -> list:
|
||||
"""Determine notification channels based on severity, type, and time"""
|
||||
current_hour = datetime.now().hour
|
||||
|
||||
channels = ['dashboard'] # Always include dashboard (SSE)
|
||||
|
||||
if item_type == 'alert':
|
||||
if severity == 'urgent':
|
||||
# Urgent alerts: All channels immediately
|
||||
channels.extend(['whatsapp', 'email', 'push'])
|
||||
elif severity == 'high':
|
||||
# High alerts: WhatsApp and email during extended hours
|
||||
if 6 <= current_hour <= 22:
|
||||
channels.extend(['whatsapp', 'email'])
|
||||
else:
|
||||
channels.append('email') # Email only during night
|
||||
elif severity == 'medium':
|
||||
# Medium alerts: Email during business hours
|
||||
if 7 <= current_hour <= 20:
|
||||
channels.append('email')
|
||||
# Low severity: Dashboard only
|
||||
|
||||
elif item_type == 'recommendation':
|
||||
# Recommendations: Less urgent, limit channels and respect business hours
|
||||
if severity in ['medium', 'high']:
|
||||
if 8 <= current_hour <= 19: # Business hours for recommendations
|
||||
channels.append('email')
|
||||
# Low/urgent (rare for recs): Dashboard only
|
||||
|
||||
return channels
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the alert processor service"""
|
||||
self.running = False
|
||||
logger.info("Stopping Alert Processor Service")
|
||||
|
||||
try:
|
||||
# Close RabbitMQ connection
|
||||
if self.connection and not self.connection.is_closed:
|
||||
await self.connection.close()
|
||||
|
||||
# Close Redis connection
|
||||
if self.redis:
|
||||
await self.redis.close()
|
||||
|
||||
logger.info("Alert Processor Service stopped")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error stopping service", error=str(e))
|
||||
|
||||
def get_metrics(self) -> Dict[str, Any]:
|
||||
"""Get service metrics"""
|
||||
return {
|
||||
"items_processed": self.items_processed,
|
||||
"items_stored": self.items_stored,
|
||||
"notifications_sent": self.notifications_sent,
|
||||
"errors_count": self.errors_count,
|
||||
"running": self.running
|
||||
}
|
||||
|
||||
async def main():
|
||||
"""Main entry point"""
|
||||
config = AlertProcessorConfig()
|
||||
service = AlertProcessorService(config)
|
||||
|
||||
# Setup signal handlers for graceful shutdown
|
||||
async def shutdown():
|
||||
logger.info("Received shutdown signal")
|
||||
await service.stop()
|
||||
sys.exit(0)
|
||||
|
||||
# Register signal handlers
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
signal.signal(sig, lambda s, f: asyncio.create_task(shutdown()))
|
||||
|
||||
try:
|
||||
# Start the service
|
||||
await service.start()
|
||||
|
||||
# Keep running
|
||||
while service.running:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received keyboard interrupt")
|
||||
except Exception as e:
|
||||
logger.error("Service failed", error=str(e))
|
||||
finally:
|
||||
await service.stop()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
12
services/alert_processor/requirements.txt
Normal file
12
services/alert_processor/requirements.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
aio-pika==9.3.1
|
||||
redis==5.0.1
|
||||
asyncpg==0.29.0
|
||||
sqlalchemy==2.0.23
|
||||
structlog==23.2.0
|
||||
prometheus-client==0.19.0
|
||||
pydantic-settings==2.1.0
|
||||
pydantic==2.5.2
|
||||
httpx==0.25.2
|
||||
python-jose[cryptography]==3.3.0
|
||||
@@ -1,129 +0,0 @@
|
||||
# ================================================================
|
||||
# services/auth/README.md
|
||||
# ================================================================
|
||||
# Authentication Service
|
||||
|
||||
Microservice for user authentication and authorization in the bakery forecasting platform.
|
||||
|
||||
## Features
|
||||
|
||||
- User registration and login
|
||||
- JWT access and refresh tokens
|
||||
- Password security validation
|
||||
- Rate limiting and login attempt tracking
|
||||
- Multi-tenant user management
|
||||
- Session management
|
||||
- Event publishing for user actions
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Development
|
||||
|
||||
```bash
|
||||
# Start dependencies
|
||||
docker-compose up -d auth-db redis rabbitmq
|
||||
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Run migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Start service
|
||||
uvicorn app.main:app --reload --host 0.0.0.0 --port 8001
|
||||
```
|
||||
|
||||
### With Docker
|
||||
|
||||
```bash
|
||||
# Start everything
|
||||
docker-compose up -d
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f auth-service
|
||||
|
||||
# Run tests
|
||||
docker-compose exec auth-service pytest
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Authentication
|
||||
- `POST /api/v1/auth/register` - Register new user
|
||||
- `POST /api/v1/auth/login` - User login
|
||||
- `POST /api/v1/auth/refresh` - Refresh access token
|
||||
- `POST /api/v1/auth/verify` - Verify token
|
||||
- `POST /api/v1/auth/logout` - Logout user
|
||||
|
||||
### User Management
|
||||
- `GET /api/v1/users/me` - Get current user
|
||||
- `PUT /api/v1/users/me` - Update current user
|
||||
- `POST /api/v1/users/change-password` - Change password
|
||||
|
||||
### Health
|
||||
- `GET /health` - Health check
|
||||
- `GET /metrics` - Prometheus metrics
|
||||
|
||||
## Configuration
|
||||
|
||||
Set these environment variables:
|
||||
|
||||
```bash
|
||||
DATABASE_URL=postgresql+asyncpg://auth_user:auth_pass123@auth-db:5432/auth_db
|
||||
REDIS_URL=redis://redis:6379/0
|
||||
RABBITMQ_URL=amqp://bakery:forecast123@rabbitmq:5672/
|
||||
JWT_SECRET_KEY=your-super-secret-jwt-key-change-in-production
|
||||
JWT_ACCESS_TOKEN_EXPIRE_MINUTES=30
|
||||
JWT_REFRESH_TOKEN_EXPIRE_DAYS=7
|
||||
MAX_LOGIN_ATTEMPTS=5
|
||||
LOCKOUT_DURATION_MINUTES=30
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=app
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_auth.py -v
|
||||
```
|
||||
|
||||
## Database Migrations
|
||||
|
||||
```bash
|
||||
# Create migration
|
||||
alembic revision --autogenerate -m "description"
|
||||
|
||||
# Apply migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Rollback
|
||||
alembic downgrade -1
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
- Health endpoint: `/health`
|
||||
- Metrics endpoint: `/metrics` (Prometheus format)
|
||||
- Logs: Structured JSON logging
|
||||
- Tracing: Request ID tracking
|
||||
|
||||
## Security Features
|
||||
|
||||
- Bcrypt password hashing
|
||||
- JWT tokens with expiration
|
||||
- Rate limiting on login attempts
|
||||
- Account lockout protection
|
||||
- IP and user agent tracking
|
||||
- Token revocation support
|
||||
|
||||
## Events Published
|
||||
|
||||
- `user.registered` - When user registers
|
||||
- `user.login` - When user logs in
|
||||
- `user.logout` - When user logs out
|
||||
- `user.password_changed` - When password changes
|
||||
@@ -1,169 +0,0 @@
|
||||
================================================================
|
||||
# Documentation: services/forecasting/README.md
|
||||
# ================================================================
|
||||
|
||||
# Forecasting Service
|
||||
|
||||
AI-powered demand prediction service for bakery operations in Madrid, Spain.
|
||||
|
||||
## Overview
|
||||
|
||||
The Forecasting Service is a specialized microservice responsible for generating accurate demand predictions for bakery products. It integrates trained ML models with real-time weather and traffic data to provide actionable forecasts for business planning.
|
||||
|
||||
## Features
|
||||
|
||||
### Core Functionality
|
||||
- **Single Product Forecasting**: Generate predictions for individual products
|
||||
- **Batch Forecasting**: Process multiple products and time periods
|
||||
- **Real-time Predictions**: On-demand forecasting with external data
|
||||
- **Business Rules**: Spanish bakery-specific adjustments
|
||||
- **Alert System**: Automated notifications for demand anomalies
|
||||
|
||||
### Integration Points
|
||||
- **Training Service**: Loads trained Prophet models
|
||||
- **Data Service**: Retrieves weather and traffic data
|
||||
- **Notification Service**: Sends alerts and reports
|
||||
- **Gateway Service**: Authentication and request routing
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Forecasts
|
||||
- `POST /api/v1/forecasts/single` - Generate single forecast
|
||||
- `POST /api/v1/forecasts/batch` - Generate batch forecasts
|
||||
- `GET /api/v1/forecasts/list` - List historical forecasts
|
||||
- `GET /api/v1/forecasts/alerts` - Get forecast alerts
|
||||
- `PUT /api/v1/forecasts/alerts/{id}/acknowledge` - Acknowledge alert
|
||||
|
||||
### Predictions
|
||||
- `POST /api/v1/predictions/realtime` - Real-time prediction
|
||||
- `GET /api/v1/predictions/quick/{product}` - Quick multi-day forecast
|
||||
|
||||
## Business Logic
|
||||
|
||||
### Spanish Bakery Rules
|
||||
- **Siesta Impact**: Reduced afternoon activity consideration
|
||||
- **Weather Adjustments**: Rain reduces traffic, extreme temperatures affect product mix
|
||||
- **Holiday Handling**: Spanish holiday calendar integration
|
||||
- **Weekend Patterns**: Different demand patterns for weekends
|
||||
|
||||
### Business Types
|
||||
- **Individual Bakery**: Single location with direct sales
|
||||
- **Central Workshop**: Production facility supplying multiple locations
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
# Database
|
||||
DATABASE_URL=postgresql+asyncpg://user:pass@host:port/db
|
||||
|
||||
# External Services
|
||||
TRAINING_SERVICE_URL=http://training-service:8000
|
||||
DATA_SERVICE_URL=http://data-service:8000
|
||||
|
||||
# Business Rules
|
||||
WEEKEND_ADJUSTMENT_FACTOR=0.8
|
||||
HOLIDAY_ADJUSTMENT_FACTOR=0.5
|
||||
RAIN_IMPACT_FACTOR=0.7
|
||||
```
|
||||
|
||||
### Performance Settings
|
||||
```bash
|
||||
MAX_FORECAST_DAYS=30
|
||||
PREDICTION_CACHE_TTL_HOURS=6
|
||||
FORECAST_BATCH_SIZE=100
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Setup
|
||||
```bash
|
||||
cd services/forecasting
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Testing
|
||||
```bash
|
||||
pytest tests/ -v --cov=app
|
||||
```
|
||||
|
||||
### Running Locally
|
||||
```bash
|
||||
uvicorn app.main:app --reload --port 8000
|
||||
```
|
||||
|
||||
## Deployment
|
||||
|
||||
### Docker
|
||||
```bash
|
||||
docker build -t forecasting-service .
|
||||
docker run -p 8000:8000 forecasting-service
|
||||
```
|
||||
|
||||
### Kubernetes
|
||||
```bash
|
||||
kubectl apply -f infrastructure/kubernetes/base/forecasting-service.yaml
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Metrics
|
||||
- `forecasts_generated_total` - Total forecasts generated
|
||||
- `predictions_served_total` - Total predictions served
|
||||
- `forecast_processing_time_seconds` - Processing time histogram
|
||||
- `active_models_count` - Number of active models
|
||||
|
||||
### Health Checks
|
||||
- `/health` - Service health status
|
||||
- `/metrics` - Prometheus metrics endpoint
|
||||
|
||||
## Performance
|
||||
|
||||
### Benchmarks
|
||||
- **Single Forecast**: < 2 seconds average
|
||||
- **Batch Forecasting**: 100 products in < 30 seconds
|
||||
- **Concurrent Load**: 95%+ success rate at 20 concurrent requests
|
||||
|
||||
### Optimization
|
||||
- Model caching for faster predictions
|
||||
- Feature preparation optimization
|
||||
- Database query optimization
|
||||
- Asynchronous external API calls
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **No Model Found Error**
|
||||
- Ensure training service has models for tenant/product
|
||||
- Check model training logs in training service
|
||||
|
||||
2. **High Prediction Latency**
|
||||
- Monitor model cache hit rate
|
||||
- Check external service response times
|
||||
- Review database query performance
|
||||
|
||||
3. **Inaccurate Predictions**
|
||||
- Verify external data quality (weather/traffic)
|
||||
- Check model performance metrics
|
||||
- Review business rule configurations
|
||||
|
||||
### Logging
|
||||
```bash
|
||||
# View service logs
|
||||
docker logs forecasting-service
|
||||
|
||||
# Debug level logging
|
||||
LOG_LEVEL=DEBUG uvicorn app.main:app
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Follow the existing code structure and patterns
|
||||
2. Add tests for new functionality
|
||||
3. Update documentation for API changes
|
||||
4. Ensure performance benchmarks are maintained
|
||||
|
||||
## License
|
||||
|
||||
This service is part of the Bakery Forecasting Platform - MIT License
|
||||
@@ -14,6 +14,7 @@ import structlog
|
||||
from app.core.config import settings
|
||||
from app.core.database import init_db, close_db
|
||||
from app.api import ingredients, stock, classification
|
||||
from app.services.inventory_alert_service import InventoryAlertService
|
||||
from shared.monitoring.health import router as health_router
|
||||
from shared.monitoring.metrics import setup_metrics_early
|
||||
# Auth decorators are used in endpoints, no global setup needed
|
||||
@@ -32,6 +33,14 @@ async def lifespan(app: FastAPI):
|
||||
await init_db()
|
||||
logger.info("Database initialized successfully")
|
||||
|
||||
# Initialize alert service
|
||||
alert_service = InventoryAlertService(settings)
|
||||
await alert_service.start()
|
||||
logger.info("Inventory alert service started")
|
||||
|
||||
# Store alert service in app state
|
||||
app.state.alert_service = alert_service
|
||||
|
||||
# Setup metrics is already done early - no need to do it here
|
||||
logger.info("Metrics setup completed")
|
||||
|
||||
@@ -44,6 +53,11 @@ async def lifespan(app: FastAPI):
|
||||
# Shutdown
|
||||
logger.info("Shutting down Inventory Service")
|
||||
try:
|
||||
# Stop alert service
|
||||
if hasattr(app.state, 'alert_service'):
|
||||
await app.state.alert_service.stop()
|
||||
logger.info("Alert service stopped")
|
||||
|
||||
await close_db()
|
||||
logger.info("Database connections closed")
|
||||
except Exception as e:
|
||||
|
||||
710
services/inventory/app/services/inventory_alert_service.py
Normal file
710
services/inventory/app/services/inventory_alert_service.py
Normal file
@@ -0,0 +1,710 @@
|
||||
# services/inventory/app/services/inventory_alert_service.py
|
||||
"""
|
||||
Inventory-specific alert and recommendation detection service
|
||||
Implements hybrid detection patterns for critical stock issues and optimization opportunities
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional
|
||||
from uuid import UUID
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
|
||||
from shared.alerts.templates import format_item_message
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class InventoryAlertService(BaseAlertService, AlertServiceMixin):
|
||||
"""Inventory service alert and recommendation detection"""
|
||||
|
||||
def setup_scheduled_checks(self):
|
||||
"""Inventory-specific scheduled checks for alerts and recommendations"""
|
||||
|
||||
# Critical stock checks - every 5 minutes (alerts)
|
||||
self.scheduler.add_job(
|
||||
self.check_stock_levels,
|
||||
CronTrigger(minute='*/5'),
|
||||
id='stock_levels',
|
||||
misfire_grace_time=30,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Expiry checks - every 2 minutes (food safety critical, alerts)
|
||||
self.scheduler.add_job(
|
||||
self.check_expiring_products,
|
||||
CronTrigger(minute='*/2'),
|
||||
id='expiry_check',
|
||||
misfire_grace_time=30,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Temperature checks - every 2 minutes (alerts)
|
||||
self.scheduler.add_job(
|
||||
self.check_temperature_breaches,
|
||||
CronTrigger(minute='*/2'),
|
||||
id='temperature_check',
|
||||
misfire_grace_time=30,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Inventory optimization - every 30 minutes (recommendations)
|
||||
self.scheduler.add_job(
|
||||
self.generate_inventory_recommendations,
|
||||
CronTrigger(minute='*/30'),
|
||||
id='inventory_recs',
|
||||
misfire_grace_time=120,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Waste reduction analysis - every hour (recommendations)
|
||||
self.scheduler.add_job(
|
||||
self.generate_waste_reduction_recommendations,
|
||||
CronTrigger(minute='0'),
|
||||
id='waste_reduction_recs',
|
||||
misfire_grace_time=300,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
logger.info("Inventory alert schedules configured",
|
||||
service=self.config.SERVICE_NAME)
|
||||
|
||||
async def check_stock_levels(self):
|
||||
"""Batch check all stock levels for critical shortages (alerts)"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
query = """
|
||||
WITH stock_analysis AS (
|
||||
SELECT
|
||||
i.*,
|
||||
COALESCE(p.scheduled_quantity, 0) as tomorrow_needed,
|
||||
COALESCE(s.avg_daily_usage, 0) as avg_daily_usage,
|
||||
COALESCE(s.lead_time_days, 7) as lead_time_days,
|
||||
CASE
|
||||
WHEN i.current_stock < i.minimum_stock THEN 'critical'
|
||||
WHEN i.current_stock < i.minimum_stock * 1.2 THEN 'low'
|
||||
WHEN i.current_stock > i.maximum_stock THEN 'overstock'
|
||||
ELSE 'normal'
|
||||
END as status,
|
||||
GREATEST(0, i.minimum_stock - i.current_stock) as shortage_amount
|
||||
FROM inventory_items i
|
||||
LEFT JOIN production_schedule p ON p.ingredient_id = i.id
|
||||
AND p.date = CURRENT_DATE + INTERVAL '1 day'
|
||||
LEFT JOIN supplier_items s ON s.ingredient_id = i.id
|
||||
WHERE i.tenant_id = $1 AND i.active = true
|
||||
)
|
||||
SELECT * FROM stock_analysis WHERE status != 'normal'
|
||||
ORDER BY
|
||||
CASE status
|
||||
WHEN 'critical' THEN 1
|
||||
WHEN 'low' THEN 2
|
||||
WHEN 'overstock' THEN 3
|
||||
END,
|
||||
shortage_amount DESC
|
||||
"""
|
||||
|
||||
tenants = await self.get_active_tenants()
|
||||
|
||||
for tenant_id in tenants:
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query), {"tenant_id": tenant_id})
|
||||
issues = result.fetchall()
|
||||
|
||||
for issue in issues:
|
||||
await self._process_stock_issue(tenant_id, issue)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error checking stock for tenant",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e))
|
||||
|
||||
logger.debug("Stock level check completed",
|
||||
tenants_checked=len(tenants))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Stock level check failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _process_stock_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
|
||||
"""Process individual stock issue"""
|
||||
try:
|
||||
if issue['status'] == 'critical':
|
||||
# Critical stock shortage - immediate alert
|
||||
template_data = self.format_spanish_message(
|
||||
'critical_stock_shortage',
|
||||
ingredient_name=issue["name"],
|
||||
current_stock=issue["current_stock"],
|
||||
required_stock=issue["tomorrow_needed"] or issue["minimum_stock"],
|
||||
shortage_amount=issue["shortage_amount"]
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'critical_stock_shortage',
|
||||
'severity': 'urgent',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(issue['id']),
|
||||
'current_stock': float(issue['current_stock']),
|
||||
'minimum_stock': float(issue['minimum_stock']),
|
||||
'shortage_amount': float(issue['shortage_amount']),
|
||||
'tomorrow_needed': float(issue['tomorrow_needed'] or 0),
|
||||
'lead_time_days': issue['lead_time_days']
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
elif issue['status'] == 'low':
|
||||
# Low stock - high priority alert
|
||||
template_data = self.format_spanish_message(
|
||||
'critical_stock_shortage',
|
||||
ingredient_name=issue["name"],
|
||||
current_stock=issue["current_stock"],
|
||||
required_stock=issue["minimum_stock"]
|
||||
)
|
||||
|
||||
severity = self.get_business_hours_severity('high')
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'low_stock_warning',
|
||||
'severity': severity,
|
||||
'title': f'⚠️ Stock Bajo: {issue["name"]}',
|
||||
'message': f'Stock actual {issue["current_stock"]}kg, mínimo {issue["minimum_stock"]}kg. Considerar pedido pronto.',
|
||||
'actions': ['Revisar consumo', 'Programar pedido', 'Contactar proveedor'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(issue['id']),
|
||||
'current_stock': float(issue['current_stock']),
|
||||
'minimum_stock': float(issue['minimum_stock'])
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
elif issue['status'] == 'overstock':
|
||||
# Overstock - medium priority alert
|
||||
severity = self.get_business_hours_severity('medium')
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'overstock_warning',
|
||||
'severity': severity,
|
||||
'title': f'📦 Exceso de Stock: {issue["name"]}',
|
||||
'message': f'Stock actual {issue["current_stock"]}kg excede máximo {issue["maximum_stock"]}kg. Revisar para evitar caducidad.',
|
||||
'actions': ['Revisar caducidades', 'Aumentar producción', 'Ofertas especiales', 'Ajustar pedidos'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(issue['id']),
|
||||
'current_stock': float(issue['current_stock']),
|
||||
'maximum_stock': float(issue['maximum_stock'])
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing stock issue",
|
||||
ingredient_id=str(issue.get('id')),
|
||||
error=str(e))
|
||||
|
||||
async def check_expiring_products(self):
|
||||
"""Check for products approaching expiry (alerts)"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
query = """
|
||||
SELECT
|
||||
i.id, i.name, i.current_stock, i.tenant_id,
|
||||
b.id as batch_id, b.expiry_date, b.quantity,
|
||||
EXTRACT(days FROM (b.expiry_date - CURRENT_DATE)) as days_to_expiry
|
||||
FROM inventory_items i
|
||||
JOIN inventory_batches b ON b.ingredient_id = i.id
|
||||
WHERE b.expiry_date <= CURRENT_DATE + INTERVAL '7 days'
|
||||
AND b.quantity > 0
|
||||
AND b.status = 'active'
|
||||
ORDER BY b.expiry_date ASC
|
||||
"""
|
||||
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query))
|
||||
expiring_items = result.fetchall()
|
||||
|
||||
# Group by tenant
|
||||
by_tenant = {}
|
||||
for item in expiring_items:
|
||||
tenant_id = item['tenant_id']
|
||||
if tenant_id not in by_tenant:
|
||||
by_tenant[tenant_id] = []
|
||||
by_tenant[tenant_id].append(item)
|
||||
|
||||
for tenant_id, items in by_tenant.items():
|
||||
await self._process_expiring_items(tenant_id, items)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Expiry check failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _process_expiring_items(self, tenant_id: UUID, items: List[Dict[str, Any]]):
|
||||
"""Process expiring items for a tenant"""
|
||||
try:
|
||||
# Group by urgency
|
||||
expired = [i for i in items if i['days_to_expiry'] <= 0]
|
||||
urgent = [i for i in items if 0 < i['days_to_expiry'] <= 2]
|
||||
warning = [i for i in items if 2 < i['days_to_expiry'] <= 7]
|
||||
|
||||
# Process expired products (urgent alerts)
|
||||
if expired:
|
||||
product_count = len(expired)
|
||||
product_names = [i['name'] for i in expired[:3]] # First 3 names
|
||||
if len(expired) > 3:
|
||||
product_names.append(f"y {len(expired) - 3} más")
|
||||
|
||||
template_data = self.format_spanish_message(
|
||||
'expired_products',
|
||||
product_count=product_count,
|
||||
product_names=", ".join(product_names)
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'expired_products',
|
||||
'severity': 'urgent',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'expired_items': [
|
||||
{
|
||||
'id': str(item['id']),
|
||||
'name': item['name'],
|
||||
'batch_id': str(item['batch_id']),
|
||||
'quantity': float(item['quantity']),
|
||||
'days_expired': abs(item['days_to_expiry'])
|
||||
} for item in expired
|
||||
]
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
# Process urgent expiry (high alerts)
|
||||
if urgent:
|
||||
for item in urgent:
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'urgent_expiry',
|
||||
'severity': 'high',
|
||||
'title': f'⏰ Caducidad Urgente: {item["name"]}',
|
||||
'message': f'{item["name"]} caduca en {item["days_to_expiry"]} día(s). Usar prioritariamente.',
|
||||
'actions': ['Usar inmediatamente', 'Promoción especial', 'Revisar recetas', 'Documentar'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(item['id']),
|
||||
'batch_id': str(item['batch_id']),
|
||||
'days_to_expiry': item['days_to_expiry'],
|
||||
'quantity': float(item['quantity'])
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing expiring items",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e))
|
||||
|
||||
async def check_temperature_breaches(self):
|
||||
"""Check for temperature breaches (alerts)"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
query = """
|
||||
SELECT
|
||||
t.id, t.sensor_id, t.location, t.temperature,
|
||||
t.max_threshold, t.tenant_id,
|
||||
EXTRACT(minutes FROM (NOW() - t.first_breach_time)) as breach_duration_minutes
|
||||
FROM temperature_readings t
|
||||
WHERE t.temperature > t.max_threshold
|
||||
AND t.breach_duration_minutes >= 30 -- Only after 30 minutes
|
||||
AND t.last_alert_sent < NOW() - INTERVAL '15 minutes' -- Avoid spam
|
||||
ORDER BY t.temperature DESC, t.breach_duration_minutes DESC
|
||||
"""
|
||||
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query))
|
||||
breaches = result.fetchall()
|
||||
|
||||
for breach in breaches:
|
||||
await self._process_temperature_breach(breach)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Temperature check failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _process_temperature_breach(self, breach: Dict[str, Any]):
|
||||
"""Process temperature breach"""
|
||||
try:
|
||||
# Determine severity based on duration and temperature
|
||||
duration_minutes = breach['breach_duration_minutes']
|
||||
temp_excess = breach['temperature'] - breach['max_threshold']
|
||||
|
||||
if duration_minutes > 120 or temp_excess > 10:
|
||||
severity = 'urgent'
|
||||
elif duration_minutes > 60 or temp_excess > 5:
|
||||
severity = 'high'
|
||||
else:
|
||||
severity = 'medium'
|
||||
|
||||
template_data = self.format_spanish_message(
|
||||
'temperature_breach',
|
||||
location=breach['location'],
|
||||
temperature=breach['temperature'],
|
||||
duration=duration_minutes
|
||||
)
|
||||
|
||||
await self.publish_item(breach['tenant_id'], {
|
||||
'type': 'temperature_breach',
|
||||
'severity': severity,
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'sensor_id': breach['sensor_id'],
|
||||
'location': breach['location'],
|
||||
'temperature': float(breach['temperature']),
|
||||
'max_threshold': float(breach['max_threshold']),
|
||||
'duration_minutes': duration_minutes,
|
||||
'temperature_excess': temp_excess
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
# Update last alert sent time to avoid spam
|
||||
await self.db_manager.execute(
|
||||
"UPDATE temperature_readings SET last_alert_sent = NOW() WHERE id = $1",
|
||||
breach['id']
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing temperature breach",
|
||||
sensor_id=breach.get('sensor_id'),
|
||||
error=str(e))
|
||||
|
||||
async def generate_inventory_recommendations(self):
|
||||
"""Generate optimization recommendations based on usage patterns"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
# Analyze stock levels vs usage patterns
|
||||
query = """
|
||||
WITH usage_analysis AS (
|
||||
SELECT
|
||||
i.id, i.name, i.tenant_id, i.minimum_stock, i.maximum_stock,
|
||||
i.current_stock,
|
||||
AVG(sm.quantity) FILTER (WHERE sm.movement_type = 'out'
|
||||
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as avg_daily_usage,
|
||||
COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out'
|
||||
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as usage_days,
|
||||
MAX(sm.created_at) FILTER (WHERE sm.movement_type = 'out') as last_used
|
||||
FROM inventory_items i
|
||||
LEFT JOIN stock_movements sm ON sm.ingredient_id = i.id
|
||||
WHERE i.active = true AND i.tenant_id = $1
|
||||
GROUP BY i.id
|
||||
HAVING COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out'
|
||||
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') >= 5
|
||||
),
|
||||
recommendations AS (
|
||||
SELECT *,
|
||||
CASE
|
||||
WHEN avg_daily_usage * 7 > maximum_stock THEN 'increase_max'
|
||||
WHEN avg_daily_usage * 3 < minimum_stock THEN 'decrease_min'
|
||||
WHEN current_stock / NULLIF(avg_daily_usage, 0) > 14 THEN 'reduce_stock'
|
||||
WHEN avg_daily_usage > 0 AND minimum_stock / avg_daily_usage < 3 THEN 'increase_min'
|
||||
ELSE null
|
||||
END as recommendation_type
|
||||
FROM usage_analysis
|
||||
WHERE avg_daily_usage > 0
|
||||
)
|
||||
SELECT * FROM recommendations WHERE recommendation_type IS NOT NULL
|
||||
ORDER BY avg_daily_usage DESC
|
||||
"""
|
||||
|
||||
tenants = await self.get_active_tenants()
|
||||
|
||||
for tenant_id in tenants:
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query), {"tenant_id": tenant_id})
|
||||
recommendations = result.fetchall()
|
||||
|
||||
for rec in recommendations:
|
||||
await self._generate_stock_recommendation(tenant_id, rec)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating recommendations for tenant",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Inventory recommendations failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _generate_stock_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
|
||||
"""Generate specific stock recommendation"""
|
||||
try:
|
||||
if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
|
||||
return
|
||||
|
||||
rec_type = rec['recommendation_type']
|
||||
|
||||
if rec_type == 'increase_max':
|
||||
suggested_max = rec['avg_daily_usage'] * 10 # 10 days supply
|
||||
template_data = self.format_spanish_message(
|
||||
'inventory_optimization',
|
||||
ingredient_name=rec['name'],
|
||||
period=30,
|
||||
suggested_increase=suggested_max - rec['maximum_stock']
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'inventory_optimization',
|
||||
'severity': 'medium',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(rec['id']),
|
||||
'current_max': float(rec['maximum_stock']),
|
||||
'suggested_max': float(suggested_max),
|
||||
'avg_daily_usage': float(rec['avg_daily_usage']),
|
||||
'recommendation_type': rec_type
|
||||
}
|
||||
}, item_type='recommendation')
|
||||
|
||||
elif rec_type == 'decrease_min':
|
||||
suggested_min = rec['avg_daily_usage'] * 3 # 3 days safety stock
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'inventory_optimization',
|
||||
'severity': 'low',
|
||||
'title': f'📉 Optimización de Stock Mínimo: {rec["name"]}',
|
||||
'message': f'Uso promedio sugiere reducir stock mínimo de {rec["minimum_stock"]}kg a {suggested_min:.1f}kg.',
|
||||
'actions': ['Revisar niveles mínimos', 'Analizar tendencias', 'Ajustar configuración'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(rec['id']),
|
||||
'current_min': float(rec['minimum_stock']),
|
||||
'suggested_min': float(suggested_min),
|
||||
'avg_daily_usage': float(rec['avg_daily_usage']),
|
||||
'recommendation_type': rec_type
|
||||
}
|
||||
}, item_type='recommendation')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating stock recommendation",
|
||||
ingredient_id=str(rec.get('id')),
|
||||
error=str(e))
|
||||
|
||||
async def generate_waste_reduction_recommendations(self):
|
||||
"""Generate waste reduction recommendations"""
|
||||
try:
|
||||
# Analyze waste patterns
|
||||
query = """
|
||||
SELECT
|
||||
i.id, i.name, i.tenant_id,
|
||||
SUM(w.quantity) as total_waste_30d,
|
||||
COUNT(w.id) as waste_incidents,
|
||||
AVG(w.quantity) as avg_waste_per_incident,
|
||||
w.waste_reason
|
||||
FROM inventory_items i
|
||||
JOIN waste_logs w ON w.ingredient_id = i.id
|
||||
WHERE w.created_at > CURRENT_DATE - INTERVAL '30 days'
|
||||
AND i.tenant_id = $1
|
||||
GROUP BY i.id, w.waste_reason
|
||||
HAVING SUM(w.quantity) > 5 -- More than 5kg wasted
|
||||
ORDER BY total_waste_30d DESC
|
||||
"""
|
||||
|
||||
tenants = await self.get_active_tenants()
|
||||
|
||||
for tenant_id in tenants:
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query), {"tenant_id": tenant_id})
|
||||
waste_data = result.fetchall()
|
||||
|
||||
for waste in waste_data:
|
||||
await self._generate_waste_recommendation(tenant_id, waste)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating waste recommendations",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Waste reduction recommendations failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _generate_waste_recommendation(self, tenant_id: UUID, waste: Dict[str, Any]):
|
||||
"""Generate waste reduction recommendation"""
|
||||
try:
|
||||
waste_percentage = (waste['total_waste_30d'] / (waste['total_waste_30d'] + 100)) * 100 # Simplified calculation
|
||||
|
||||
template_data = self.format_spanish_message(
|
||||
'waste_reduction',
|
||||
product=waste['name'],
|
||||
waste_reduction_percent=waste_percentage
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'waste_reduction',
|
||||
'severity': 'low',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'ingredient_id': str(waste['id']),
|
||||
'total_waste_30d': float(waste['total_waste_30d']),
|
||||
'waste_incidents': waste['waste_incidents'],
|
||||
'waste_reason': waste['waste_reason'],
|
||||
'estimated_reduction_percent': waste_percentage
|
||||
}
|
||||
}, item_type='recommendation')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating waste recommendation",
|
||||
ingredient_id=str(waste.get('id')),
|
||||
error=str(e))
|
||||
|
||||
async def register_db_listeners(self, conn):
|
||||
"""Register inventory-specific database listeners"""
|
||||
try:
|
||||
await conn.add_listener('stock_alerts', self.handle_stock_db_alert)
|
||||
await conn.add_listener('temperature_alerts', self.handle_temperature_db_alert)
|
||||
|
||||
logger.info("Database listeners registered",
|
||||
service=self.config.SERVICE_NAME)
|
||||
except Exception as e:
|
||||
logger.error("Failed to register database listeners",
|
||||
service=self.config.SERVICE_NAME,
|
||||
error=str(e))
|
||||
|
||||
async def handle_stock_db_alert(self, connection, pid, channel, payload):
|
||||
"""Handle stock alert from database trigger"""
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
tenant_id = UUID(data['tenant_id'])
|
||||
|
||||
template_data = self.format_spanish_message(
|
||||
'critical_stock_shortage',
|
||||
ingredient_name=data['name'],
|
||||
current_stock=data['current_stock'],
|
||||
required_stock=data['minimum_stock']
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'critical_stock_shortage',
|
||||
'severity': 'urgent',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'ingredient_id': data['ingredient_id'],
|
||||
'current_stock': data['current_stock'],
|
||||
'minimum_stock': data['minimum_stock'],
|
||||
'trigger_source': 'database'
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error handling stock DB alert", error=str(e))
|
||||
|
||||
async def handle_temperature_db_alert(self, connection, pid, channel, payload):
|
||||
"""Handle temperature alert from database trigger"""
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
tenant_id = UUID(data['tenant_id'])
|
||||
|
||||
template_data = self.format_spanish_message(
|
||||
'temperature_breach',
|
||||
location=data['location'],
|
||||
temperature=data['temperature'],
|
||||
duration=data['duration']
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'temperature_breach',
|
||||
'severity': 'high',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'sensor_id': data['sensor_id'],
|
||||
'location': data['location'],
|
||||
'temperature': data['temperature'],
|
||||
'duration': data['duration'],
|
||||
'trigger_source': 'database'
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error handling temperature DB alert", error=str(e))
|
||||
|
||||
async def start_event_listener(self):
|
||||
"""Listen for inventory-affecting events"""
|
||||
try:
|
||||
# Subscribe to order events that might affect inventory
|
||||
await self.rabbitmq_client.consume_events(
|
||||
"bakery_events",
|
||||
f"inventory.orders.{self.config.SERVICE_NAME}",
|
||||
"orders.placed",
|
||||
self.handle_order_placed
|
||||
)
|
||||
|
||||
logger.info("Event listeners started",
|
||||
service=self.config.SERVICE_NAME)
|
||||
except Exception as e:
|
||||
logger.error("Failed to start event listeners",
|
||||
service=self.config.SERVICE_NAME,
|
||||
error=str(e))
|
||||
|
||||
async def handle_order_placed(self, message):
|
||||
"""Check if order critically affects stock"""
|
||||
try:
|
||||
order = json.loads(message.body)
|
||||
tenant_id = UUID(order['tenant_id'])
|
||||
|
||||
for item in order.get('items', []):
|
||||
# Check stock impact
|
||||
stock_info = await self.get_stock_after_order(item['ingredient_id'], item['quantity'])
|
||||
|
||||
if stock_info and stock_info['remaining'] < stock_info['minimum_stock']:
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'stock_depleted_by_order',
|
||||
'severity': 'high',
|
||||
'title': f'⚠️ Pedido Agota Stock: {stock_info["name"]}',
|
||||
'message': f'Pedido #{order["id"]} dejará stock en {stock_info["remaining"]}kg (mínimo {stock_info["minimum_stock"]}kg)',
|
||||
'actions': ['Revisar pedido', 'Contactar proveedor', 'Ajustar producción', 'Usar stock reserva'],
|
||||
'metadata': {
|
||||
'order_id': order['id'],
|
||||
'ingredient_id': item['ingredient_id'],
|
||||
'order_quantity': item['quantity'],
|
||||
'remaining_stock': stock_info['remaining'],
|
||||
'minimum_stock': stock_info['minimum_stock']
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error handling order placed event", error=str(e))
|
||||
|
||||
async def get_stock_after_order(self, ingredient_id: str, order_quantity: float) -> Optional[Dict[str, Any]]:
|
||||
"""Get stock information after hypothetical order"""
|
||||
try:
|
||||
query = """
|
||||
SELECT id, name, current_stock, minimum_stock,
|
||||
(current_stock - $2) as remaining
|
||||
FROM inventory_items
|
||||
WHERE id = $1
|
||||
"""
|
||||
|
||||
result = await self.db_manager.fetchrow(query, ingredient_id, order_quantity)
|
||||
return dict(result) if result else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting stock after order",
|
||||
ingredient_id=ingredient_id,
|
||||
error=str(e))
|
||||
return None
|
||||
@@ -30,8 +30,12 @@ passlib[bcrypt]==1.7.4
|
||||
structlog==23.2.0
|
||||
prometheus-client==0.19.0
|
||||
|
||||
# Message queues
|
||||
# Message queues and Redis
|
||||
aio-pika==9.3.1
|
||||
redis>=4.0.0
|
||||
|
||||
# Scheduling
|
||||
APScheduler==3.10.4
|
||||
|
||||
# Additional for inventory management
|
||||
python-barcode==0.15.1
|
||||
|
||||
@@ -1,321 +0,0 @@
|
||||
## 🎯 **Complete Notification Service Implementation**
|
||||
|
||||
### **📁 File Structure Created**
|
||||
|
||||
```
|
||||
services/notification/
|
||||
├── app/
|
||||
│ ├── main.py ✅ Complete FastAPI application
|
||||
│ ├── core/
|
||||
│ │ ├── config.py ✅ Configuration settings
|
||||
│ │ └── database.py ✅ Database initialization
|
||||
│ ├── models/
|
||||
│ │ ├── notifications.py ✅ Core notification models
|
||||
│ │ └── templates.py ✅ Template-specific models
|
||||
│ ├── schemas/
|
||||
│ │ └── notifications.py ✅ Pydantic schemas
|
||||
│ ├── services/
|
||||
│ │ ├── notification_service.py ✅ Main business logic
|
||||
│ │ ├── email_service.py ✅ Email delivery
|
||||
│ │ ├── whatsapp_service.py ✅ WhatsApp delivery
|
||||
│ │ └── messaging.py ✅ RabbitMQ integration
|
||||
│ └── api/
|
||||
│ └── notifications.py ✅ Complete API routes
|
||||
├── requirements.txt ✅ Python dependencies
|
||||
├── Dockerfile ✅ Container configuration
|
||||
└── .env.example ✅ Environment variables
|
||||
```
|
||||
|
||||
### **🔧 Key Features Implemented**
|
||||
|
||||
#### **1. Complete Business Logic**
|
||||
|
||||
- ✅ **NotificationService**: Core orchestration of all notification operations
|
||||
- ✅ **Multi-channel support**: Email, WhatsApp, Push (extensible)
|
||||
- ✅ **Template processing**: Jinja2-based template rendering
|
||||
- ✅ **Bulk notifications**: Batch processing with rate limiting
|
||||
- ✅ **User preferences**: Granular notification controls
|
||||
- ✅ **Scheduling**: Delayed notification delivery
|
||||
|
||||
#### **2. Email Service Integration**
|
||||
|
||||
- ✅ **SMTP support**: Configurable email providers (Gmail, SendGrid, etc
|
||||
- ✅ **HTML + Text emails**: Rich email templates with fallbacks
|
||||
- ✅ **Bulk email processing**: Rate-limited batch sending
|
||||
- ✅ **Template system**: Pre-built Spanish templates for bakeries
|
||||
- ✅ **Health checks**: SMTP connection monitoring
|
||||
- ✅ **Attachment support**: File attachment capabilities
|
||||
|
||||
#### **3. WhatsApp Service Integration**
|
||||
|
||||
- ✅ **Twilio integration**: WhatsApp Business API support
|
||||
- ✅ **Spanish phone formatting**: Automatic +34 country code handling
|
||||
- ✅ **Template messages**: WhatsApp Business template support
|
||||
- ✅ **Bulk WhatsApp**: Rate-limited batch messaging
|
||||
- ✅ **Delivery status**: Webhook handling for delivery confirmations
|
||||
|
||||
#### **4. Database Models & Schemas**
|
||||
|
||||
- ✅ **Complete data model**: Notifications, templates, preferences, logs
|
||||
- ✅ **Multi-tenant support**: Tenant-scoped notifications
|
||||
- ✅ **Audit trail**: Detailed delivery attempt logging
|
||||
- ✅ **Template management**: System and custom templates
|
||||
- ✅ **User preferences**: Granular notification controls
|
||||
|
||||
#### **5. API Integration with Gateway**
|
||||
|
||||
- ✅ **Gateway authentication**: Uses shared auth decorators
|
||||
- ✅ **Tenant isolation**: Automatic tenant scoping
|
||||
- ✅ **Role-based access**: Admin/manager/user permissions
|
||||
- ✅ **Complete CRUD**: Full notification management API
|
||||
- ✅ **Webhook endpoints**: External delivery status handling
|
||||
|
||||
#### **6. RabbitMQ Event Integration**
|
||||
|
||||
- ✅ **Event consumers**: Listens for user registration, forecasts, training
|
||||
- ✅ **Event publishers**: Publishes notification status events
|
||||
- ✅ **Auto-notifications**: Triggers welcome emails, alerts, reports
|
||||
- ✅ **Error handling**: Robust message processing with retry logic
|
||||
|
||||
#### **7. Spanish Bakery Templates**
|
||||
|
||||
- ✅ **Welcome email**: Professional onboarding email
|
||||
- ✅ **Forecast alerts**: Demand variation notifications
|
||||
- ✅ **Weekly reports**: Performance summary emails
|
||||
- ✅ **Responsive HTML**: Mobile-optimized email designs
|
||||
- ✅ **Spanish localization**: All content in Spanish
|
||||
|
||||
### **🚀 Integration with Your Architecture**
|
||||
|
||||
#### **Seamless Gateway Integration**
|
||||
|
||||
```python
|
||||
# Gateway already routes to notification service
|
||||
app.include_router(notification.router, prefix="/api/v1/notifications", tags=["notifications"])
|
||||
|
||||
# Authentication handled by gateway middleware
|
||||
# Tenant isolation automatic
|
||||
# User context passed via headers
|
||||
```
|
||||
|
||||
#### **Shared Library Usage**
|
||||
|
||||
```python
|
||||
# Uses your existing shared components
|
||||
from shared.auth.decorators import get_current_user_dep, get_current_tenant_id_dep
|
||||
from shared.messaging.rabbitmq import RabbitMQClient
|
||||
from shared.monitoring.metrics import MetricsCollector
|
||||
from shared.database.base import DatabaseManager
|
||||
```
|
||||
|
||||
#### **Event-Driven Architecture**
|
||||
|
||||
```python
|
||||
# Automatic notifications triggered by:
|
||||
# - User registration → Welcome email
|
||||
# - Forecast alerts → Alert emails + WhatsApp
|
||||
# - Training completion → Status notifications
|
||||
# - Data imports → Import confirmations
|
||||
```
|
||||
|
||||
### **📊 Production Features**
|
||||
|
||||
#### **Health Monitoring**
|
||||
|
||||
- ✅ **Database health checks**: Connection monitoring
|
||||
- ✅ **SMTP health checks**: Email service validation
|
||||
- ✅ **WhatsApp health checks**: API connectivity tests
|
||||
- ✅ **Prometheus metrics**: Delivery rates, response times
|
||||
- ✅ **Structured logging**: Comprehensive error tracking
|
||||
|
||||
#### **Rate Limiting & Scaling**
|
||||
|
||||
- ✅ **Email rate limits**: 1000/hour configurable
|
||||
- ✅ **WhatsApp rate limits**: 100/hour (Twilio limits)
|
||||
- ✅ **Batch processing**: Configurable batch sizes
|
||||
- ✅ **Retry logic**: Automatic retry with exponential backoff
|
||||
- ✅ **Queue management**: Background task processing
|
||||
|
||||
#### **Security & Compliance**
|
||||
|
||||
- ✅ **User consent**: Preference-based opt-in/out
|
||||
- ✅ **Tenant isolation**: Multi-tenant data separation
|
||||
- ✅ **GDPR compliance**: User data control
|
||||
- ✅ **Rate limiting**: DoS protection
|
||||
- ✅ **Input validation**: Pydantic schema validation
|
||||
|
||||
### **🎯 Business-Specific Features**
|
||||
|
||||
#### **Bakery Use Cases**
|
||||
|
||||
```python
|
||||
# Forecast alerts when demand varies >20%
|
||||
# Daily production recommendations
|
||||
# Weekly performance reports
|
||||
# Stock shortage notifications
|
||||
# Weather impact alerts
|
||||
# Holiday/event notifications
|
||||
```
|
||||
|
||||
#### **Spanish Localization**
|
||||
|
||||
- ✅ **Spanish templates**: Native Spanish content
|
||||
- ✅ **Madrid timezone**: Europe/Madrid default
|
||||
- ✅ **Spanish phone format**: +34 prefix handling
|
||||
- ✅ **Local business hours**: Quiet hours support
|
||||
- ✅ **Cultural context**: Bakery-specific terminology
|
||||
|
||||
### **🔄 How to Deploy**
|
||||
|
||||
#### **1. Add to Docker Compose**
|
||||
|
||||
```yaml
|
||||
# Already integrated in your docker-compose.yml
|
||||
notification-service:
|
||||
build: ./services/notification
|
||||
ports:
|
||||
- "8006:8000"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db
|
||||
depends_on:
|
||||
- notification-db
|
||||
- redis
|
||||
- rabbitmq
|
||||
```
|
||||
|
||||
#### **2. Environment Setup**
|
||||
|
||||
```bash
|
||||
# Copy environment template
|
||||
cp services/notification/.env.example services/notification/.env
|
||||
|
||||
# Configure email provider
|
||||
SMTP_USER=your-email@gmail.com
|
||||
SMTP_PASSWORD=your-app-password
|
||||
|
||||
# Configure WhatsApp (optional)
|
||||
WHATSAPP_API_KEY=your-twilio-sid:your-twilio-token
|
||||
```
|
||||
|
||||
#### **3. Start Service**
|
||||
|
||||
```bash
|
||||
# Service starts automatically with
|
||||
docker-compose up -d
|
||||
|
||||
# Check health
|
||||
curl http://localhost:8006/health
|
||||
|
||||
# View API docs
|
||||
open http://localhost:8006/docs
|
||||
```
|
||||
|
||||
### **📈 API Usage Examples**
|
||||
|
||||
#### **Send Welcome Email**
|
||||
|
||||
```python
|
||||
POST /api/v1/notifications/send
|
||||
{
|
||||
"type": "email",
|
||||
"recipient_email": "usuario@panaderia.com",
|
||||
"template_id": "welcome_email",
|
||||
"template_data": {
|
||||
"user_name": "Juan Carlos",
|
||||
"dashboard_url": "https://app.bakeryforecast.es/dashboard"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### **Send Forecast Alert**
|
||||
|
||||
```python
|
||||
POST /api/v1/notifications/send
|
||||
{
|
||||
"type": "email",
|
||||
"template_id": "forecast_alert_email",
|
||||
"template_data": {
|
||||
"bakery_name": "Panadería San Miguel",
|
||||
"product_name": "Pan integral",
|
||||
"forecast_date": "2025-01-25",
|
||||
"predicted_demand": 120,
|
||||
"variation_percentage": 35,
|
||||
"alert_message": "Aumento significativo esperado. Se recomienda incrementar producción."
|
||||
},
|
||||
"broadcast": true,
|
||||
"priority": "high"
|
||||
}
|
||||
```
|
||||
|
||||
#### **Update User Preferences**
|
||||
|
||||
```python
|
||||
PATCH /api/v1/notifications/preferences
|
||||
{
|
||||
"email_alerts": true,
|
||||
"whatsapp_enabled": false,
|
||||
"quiet_hours_start": "22:00",
|
||||
"quiet_hours_end": "08:00",
|
||||
"language": "es"
|
||||
}
|
||||
```
|
||||
|
||||
### **🎉 Key Benefits**
|
||||
|
||||
#### **✅ Production Ready**
|
||||
|
||||
- Complete error handling and logging
|
||||
- Health checks and monitoring
|
||||
- Rate limiting and security
|
||||
- Multi-tenant architecture
|
||||
- Scalable event-driven design
|
||||
|
||||
#### **✅ Business Focused**
|
||||
|
||||
- Spanish bakery templates
|
||||
- Madrid timezone/localization
|
||||
- Forecast-specific notifications
|
||||
- Professional email designs
|
||||
- WhatsApp support for urgent alerts
|
||||
|
||||
#### **✅ Developer Friendly**
|
||||
|
||||
- Comprehensive API documentation
|
||||
- Type-safe Pydantic schemas
|
||||
- Async/await throughout
|
||||
- Structured logging
|
||||
- Easy testing and debugging
|
||||
|
||||
#### **✅ Seamless Integration**
|
||||
|
||||
- Uses your shared libraries
|
||||
- Integrates with gateway auth
|
||||
- Follows your architectural patterns
|
||||
- Maintains tenant isolation
|
||||
- Publishes events to RabbitMQ
|
||||
|
||||
### **🚀 Next Steps**
|
||||
|
||||
#### **Immediate (Week 2)**
|
||||
|
||||
1. **Deploy the service**: Add to your docker-compose and start
|
||||
2. **Configure SMTP**: Set up email provider credentials
|
||||
3. **Test integration**: Send test notifications via API
|
||||
4. **Event integration**: Verify RabbitMQ event handling
|
||||
|
||||
#### **Production Optimization**
|
||||
|
||||
1. **Email provider**: Consider SendGrid/Mailgun for production
|
||||
2. **WhatsApp setup**: Configure Twilio Business API
|
||||
3. **Template customization**: Add tenant-specific templates
|
||||
4. **Analytics dashboard**: Add notification analytics to frontend
|
||||
|
||||
### **💡 Advanced Features Ready for Extension**
|
||||
|
||||
- ✅ **Push notifications**: Framework ready for mobile push
|
||||
- ✅ **SMS support**: Easy to add SMS providers
|
||||
- ✅ **A/B testing**: Template variant testing
|
||||
- ✅ **Scheduled campaigns**: Marketing email campaigns
|
||||
- ✅ **Analytics integration**: Detailed delivery analytics
|
||||
|
||||
**This notification service is now a complete, production-ready microservice that fully integrates with your bakery forecasting platform! It handles all notification needs from welcome emails to urgent forecast alerts, with proper Spanish localization and bakery-specific templates.** 🎯
|
||||
189
services/notification/app/api/sse_routes.py
Normal file
189
services/notification/app/api/sse_routes.py
Normal file
@@ -0,0 +1,189 @@
|
||||
# services/notification/app/api/sse_routes.py
|
||||
"""
|
||||
SSE routes for real-time alert and recommendation streaming
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Request, Depends, HTTPException, BackgroundTasks
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
import structlog
|
||||
|
||||
from shared.auth.decorators import get_current_user
|
||||
|
||||
router = APIRouter(prefix="/sse", tags=["sse"])
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@router.get("/alerts/stream/{tenant_id}")
|
||||
async def stream_alerts(
|
||||
tenant_id: str,
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
SSE endpoint for real-time alert and recommendation streaming
|
||||
Supports both alerts and recommendations through unified stream
|
||||
"""
|
||||
|
||||
# Verify user has access to this tenant
|
||||
if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
|
||||
raise HTTPException(403, "Access denied to this tenant")
|
||||
|
||||
# Get SSE service from app state
|
||||
sse_service = getattr(request.app.state, 'sse_service', None)
|
||||
if not sse_service:
|
||||
raise HTTPException(500, "SSE service not available")
|
||||
|
||||
async def event_generator():
|
||||
"""Generate SSE events for the client"""
|
||||
client_queue = asyncio.Queue(maxsize=100) # Limit queue size
|
||||
|
||||
try:
|
||||
# Register client
|
||||
await sse_service.add_client(tenant_id, client_queue)
|
||||
|
||||
logger.info("SSE client connected",
|
||||
tenant_id=tenant_id,
|
||||
user_id=getattr(current_user, 'id', 'unknown'))
|
||||
|
||||
# Stream events
|
||||
while True:
|
||||
# Check if client disconnected
|
||||
if await request.is_disconnected():
|
||||
logger.info("SSE client disconnected", tenant_id=tenant_id)
|
||||
break
|
||||
|
||||
try:
|
||||
# Wait for events with timeout for keepalive
|
||||
event = await asyncio.wait_for(
|
||||
client_queue.get(),
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
yield event
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# Send keepalive ping
|
||||
yield {
|
||||
"event": "ping",
|
||||
"data": json.dumps({
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"status": "keepalive"
|
||||
}),
|
||||
"id": f"ping_{int(datetime.now().timestamp())}"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error in SSE event generator",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error("SSE connection error",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
finally:
|
||||
# Clean up on disconnect
|
||||
try:
|
||||
await sse_service.remove_client(tenant_id, client_queue)
|
||||
logger.info("SSE client cleanup completed", tenant_id=tenant_id)
|
||||
except Exception as e:
|
||||
logger.error("Error cleaning up SSE client",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
|
||||
return EventSourceResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no", # Disable nginx buffering
|
||||
}
|
||||
)
|
||||
|
||||
@router.post("/items/{item_id}/acknowledge")
|
||||
async def acknowledge_item(
|
||||
item_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""Acknowledge an alert or recommendation"""
|
||||
try:
|
||||
# This would update the database
|
||||
# For now, just return success
|
||||
|
||||
logger.info("Item acknowledged",
|
||||
item_id=item_id,
|
||||
user_id=getattr(current_user, 'id', 'unknown'))
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"item_id": item_id,
|
||||
"acknowledged_by": getattr(current_user, 'id', 'unknown'),
|
||||
"acknowledged_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to acknowledge item", item_id=item_id, error=str(e))
|
||||
raise HTTPException(500, "Failed to acknowledge item")
|
||||
|
||||
@router.post("/items/{item_id}/resolve")
|
||||
async def resolve_item(
|
||||
item_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""Resolve an alert or recommendation"""
|
||||
try:
|
||||
# This would update the database
|
||||
# For now, just return success
|
||||
|
||||
logger.info("Item resolved",
|
||||
item_id=item_id,
|
||||
user_id=getattr(current_user, 'id', 'unknown'))
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"item_id": item_id,
|
||||
"resolved_by": getattr(current_user, 'id', 'unknown'),
|
||||
"resolved_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to resolve item", item_id=item_id, error=str(e))
|
||||
raise HTTPException(500, "Failed to resolve item")
|
||||
|
||||
@router.get("/status/{tenant_id}")
|
||||
async def get_sse_status(
|
||||
tenant_id: str,
|
||||
current_user = Depends(get_current_user)
|
||||
):
|
||||
"""Get SSE connection status for a tenant"""
|
||||
|
||||
# Verify user has access to this tenant
|
||||
if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
|
||||
raise HTTPException(403, "Access denied to this tenant")
|
||||
|
||||
try:
|
||||
# Get SSE service from app state
|
||||
sse_service = getattr(request.app.state, 'sse_service', None)
|
||||
if not sse_service:
|
||||
return {"status": "unavailable", "message": "SSE service not initialized"}
|
||||
|
||||
metrics = sse_service.get_metrics()
|
||||
tenant_connections = len(sse_service.active_connections.get(tenant_id, set()))
|
||||
|
||||
return {
|
||||
"status": "available",
|
||||
"tenant_id": tenant_id,
|
||||
"connections": tenant_connections,
|
||||
"total_connections": metrics["total_connections"],
|
||||
"active_tenants": metrics["active_tenants"]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get SSE status", tenant_id=tenant_id, error=str(e))
|
||||
raise HTTPException(500, "Failed to get SSE status")
|
||||
@@ -1,9 +1,9 @@
|
||||
# ================================================================
|
||||
# services/notification/app/main.py - COMPLETE IMPLEMENTATION
|
||||
# services/notification/app/main.py - ENHANCED WITH SSE SUPPORT
|
||||
# ================================================================
|
||||
"""
|
||||
Notification Service Main Application
|
||||
Handles email and WhatsApp notifications with full integration
|
||||
Handles email, WhatsApp notifications and SSE for real-time alerts/recommendations
|
||||
"""
|
||||
|
||||
import structlog
|
||||
@@ -15,7 +15,12 @@ from fastapi.responses import JSONResponse
|
||||
from app.core.config import settings
|
||||
from app.core.database import init_db
|
||||
from app.api.notifications import router as notification_router
|
||||
from app.api.sse_routes import router as sse_router
|
||||
from app.services.messaging import setup_messaging, cleanup_messaging
|
||||
from app.services.sse_service import SSEService
|
||||
from app.services.notification_orchestrator import NotificationOrchestrator
|
||||
from app.services.email_service import EmailService
|
||||
from app.services.whatsapp_service import WhatsAppService
|
||||
from shared.monitoring import setup_logging, HealthChecker
|
||||
from shared.monitoring.metrics import setup_metrics_early
|
||||
|
||||
@@ -30,8 +35,8 @@ health_checker = None
|
||||
# Create FastAPI app FIRST
|
||||
app = FastAPI(
|
||||
title="Bakery Notification Service",
|
||||
description="Email and WhatsApp notification service for bakery forecasting platform",
|
||||
version="1.0.0",
|
||||
description="Email, WhatsApp and SSE notification service for bakery alerts and recommendations",
|
||||
version="2.0.0",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc"
|
||||
)
|
||||
@@ -56,12 +61,36 @@ async def lifespan(app: FastAPI):
|
||||
await setup_messaging()
|
||||
logger.info("Messaging initialized")
|
||||
|
||||
# Initialize services
|
||||
email_service = EmailService()
|
||||
whatsapp_service = WhatsAppService()
|
||||
|
||||
# Initialize SSE service
|
||||
sse_service = SSEService(settings.REDIS_URL)
|
||||
await sse_service.initialize()
|
||||
logger.info("SSE service initialized")
|
||||
|
||||
# Create orchestrator
|
||||
orchestrator = NotificationOrchestrator(
|
||||
email_service=email_service,
|
||||
whatsapp_service=whatsapp_service,
|
||||
sse_service=sse_service
|
||||
)
|
||||
|
||||
# Store services in app state
|
||||
app.state.orchestrator = orchestrator
|
||||
app.state.sse_service = sse_service
|
||||
app.state.email_service = email_service
|
||||
app.state.whatsapp_service = whatsapp_service
|
||||
|
||||
# Register custom metrics (metrics_collector already exists)
|
||||
metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status"])
|
||||
metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status", "channel"])
|
||||
metrics_collector.register_counter("emails_sent_total", "Total emails sent", labels=["status"])
|
||||
metrics_collector.register_counter("whatsapp_sent_total", "Total WhatsApp messages sent", labels=["status"])
|
||||
metrics_collector.register_counter("sse_events_sent_total", "Total SSE events sent", labels=["tenant", "event_type"])
|
||||
metrics_collector.register_histogram("notification_processing_duration_seconds", "Time spent processing notifications")
|
||||
metrics_collector.register_gauge("notification_queue_size", "Current notification queue size")
|
||||
metrics_collector.register_gauge("sse_active_connections", "Number of active SSE connections")
|
||||
|
||||
# Setup health checker
|
||||
health_checker = HealthChecker("notification-service")
|
||||
@@ -93,14 +122,22 @@ async def lifespan(app: FastAPI):
|
||||
# Add WhatsApp service health check
|
||||
async def check_whatsapp_service():
|
||||
try:
|
||||
from app.services.whatsapp_service import WhatsAppService
|
||||
whatsapp_service = WhatsAppService()
|
||||
return await whatsapp_service.health_check()
|
||||
except Exception as e:
|
||||
return f"WhatsApp service error: {e}"
|
||||
|
||||
health_checker.add_check("whatsapp_service", check_whatsapp_service, timeout=10.0, critical=False)
|
||||
|
||||
# Add SSE service health check
|
||||
async def check_sse_service():
|
||||
try:
|
||||
metrics = sse_service.get_metrics()
|
||||
return "healthy" if metrics["redis_connected"] else "Redis connection failed"
|
||||
except Exception as e:
|
||||
return f"SSE service error: {e}"
|
||||
|
||||
health_checker.add_check("sse_service", check_sse_service, timeout=5.0, critical=True)
|
||||
|
||||
# Add messaging health check
|
||||
def check_messaging():
|
||||
try:
|
||||
@@ -115,7 +152,7 @@ async def lifespan(app: FastAPI):
|
||||
# Store health checker in app state
|
||||
app.state.health_checker = health_checker
|
||||
|
||||
logger.info("Notification Service started successfully")
|
||||
logger.info("Notification Service with SSE support started successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start Notification Service: {e}")
|
||||
@@ -126,10 +163,15 @@ async def lifespan(app: FastAPI):
|
||||
# Shutdown
|
||||
logger.info("Shutting down Notification Service...")
|
||||
try:
|
||||
# Shutdown SSE service
|
||||
if hasattr(app.state, 'sse_service'):
|
||||
await app.state.sse_service.shutdown()
|
||||
logger.info("SSE service shutdown completed")
|
||||
|
||||
await cleanup_messaging()
|
||||
logger.info("Messaging cleanup completed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during messaging cleanup: {e}")
|
||||
logger.error(f"Error during shutdown: {e}")
|
||||
|
||||
# Set lifespan AFTER metrics setup
|
||||
app.router.lifespan_context = lifespan
|
||||
@@ -145,18 +187,30 @@ app.add_middleware(
|
||||
|
||||
# Include routers
|
||||
app.include_router(notification_router, prefix="/api/v1", tags=["notifications"])
|
||||
app.include_router(sse_router, prefix="/api/v1", tags=["sse"])
|
||||
|
||||
# Health check endpoint
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Comprehensive health check endpoint"""
|
||||
"""Comprehensive health check endpoint including SSE"""
|
||||
if health_checker:
|
||||
return await health_checker.check_health()
|
||||
health_result = await health_checker.check_health()
|
||||
|
||||
# Add SSE metrics to health check
|
||||
if hasattr(app.state, 'sse_service'):
|
||||
try:
|
||||
sse_metrics = app.state.sse_service.get_metrics()
|
||||
health_result['sse_metrics'] = sse_metrics
|
||||
except Exception as e:
|
||||
health_result['sse_error'] = str(e)
|
||||
|
||||
return health_result
|
||||
else:
|
||||
return {
|
||||
"service": "notification-service",
|
||||
"status": "healthy",
|
||||
"version": "1.0.0"
|
||||
"version": "2.0.0",
|
||||
"features": ["email", "whatsapp", "sse", "alerts", "recommendations"]
|
||||
}
|
||||
|
||||
# Metrics endpoint
|
||||
|
||||
@@ -276,14 +276,26 @@ class EmailService:
|
||||
|
||||
# Test SMTP connection
|
||||
if self.smtp_ssl:
|
||||
# Use implicit TLS/SSL connection (port 465 typically)
|
||||
server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port, use_tls=True)
|
||||
await server.connect()
|
||||
# No need for starttls() when using implicit TLS
|
||||
else:
|
||||
# Use plain connection, optionally upgrade with STARTTLS
|
||||
server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port)
|
||||
|
||||
await server.connect()
|
||||
|
||||
if self.smtp_tls:
|
||||
await server.starttls()
|
||||
await server.connect()
|
||||
|
||||
if self.smtp_tls:
|
||||
# Try STARTTLS, but handle case where connection is already secure
|
||||
try:
|
||||
await server.starttls()
|
||||
except Exception as starttls_error:
|
||||
# If STARTTLS fails because connection is already using TLS, that's okay
|
||||
if "already using TLS" in str(starttls_error) or "already secure" in str(starttls_error):
|
||||
logger.debug("SMTP connection already secure, skipping STARTTLS")
|
||||
else:
|
||||
# Re-raise other STARTTLS errors
|
||||
raise starttls_error
|
||||
|
||||
await server.login(self.smtp_user, self.smtp_password)
|
||||
await server.quit()
|
||||
|
||||
279
services/notification/app/services/notification_orchestrator.py
Normal file
279
services/notification/app/services/notification_orchestrator.py
Normal file
@@ -0,0 +1,279 @@
|
||||
# services/notification/app/services/notification_orchestrator.py
|
||||
"""
|
||||
Notification orchestrator for managing delivery across all channels
|
||||
Includes SSE integration for real-time dashboard updates
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
|
||||
from .email_service import EmailService
|
||||
from .whatsapp_service import WhatsAppService
|
||||
from .sse_service import SSEService
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class NotificationOrchestrator:
|
||||
"""
|
||||
Orchestrates delivery across all notification channels
|
||||
Now includes SSE for real-time dashboard updates, with support for recommendations
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
email_service: EmailService,
|
||||
whatsapp_service: WhatsAppService,
|
||||
sse_service: SSEService,
|
||||
push_service=None # Optional push service
|
||||
):
|
||||
self.email_service = email_service
|
||||
self.whatsapp_service = whatsapp_service
|
||||
self.sse_service = sse_service
|
||||
self.push_service = push_service
|
||||
|
||||
async def send_notification(
|
||||
self,
|
||||
tenant_id: str,
|
||||
notification: Dict[str, Any],
|
||||
channels: List[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Send notification through specified channels
|
||||
Channels can include: email, whatsapp, push, dashboard (SSE)
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Always send to dashboard for visibility (SSE)
|
||||
if 'dashboard' in channels or notification.get('type') in ['alert', 'recommendation']:
|
||||
try:
|
||||
await self.sse_service.send_item_notification(
|
||||
tenant_id,
|
||||
notification
|
||||
)
|
||||
results['dashboard'] = {'status': 'sent', 'timestamp': datetime.utcnow().isoformat()}
|
||||
logger.info("Item sent to dashboard via SSE",
|
||||
tenant_id=tenant_id,
|
||||
item_type=notification.get('type'),
|
||||
item_id=notification.get('id'))
|
||||
except Exception as e:
|
||||
logger.error("Failed to send to dashboard",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
results['dashboard'] = {'status': 'failed', 'error': str(e)}
|
||||
|
||||
# Send to email channel
|
||||
if 'email' in channels:
|
||||
try:
|
||||
email_result = await self.email_service.send_notification_email(
|
||||
to_email=notification.get('email'),
|
||||
subject=notification.get('title'),
|
||||
template_data={
|
||||
'title': notification.get('title'),
|
||||
'message': notification.get('message'),
|
||||
'severity': notification.get('severity'),
|
||||
'item_type': notification.get('type'),
|
||||
'actions': notification.get('actions', []),
|
||||
'metadata': notification.get('metadata', {}),
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
},
|
||||
notification_type=notification.get('type', 'alert')
|
||||
)
|
||||
results['email'] = email_result
|
||||
except Exception as e:
|
||||
logger.error("Failed to send email",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
results['email'] = {'status': 'failed', 'error': str(e)}
|
||||
|
||||
# Send to WhatsApp channel
|
||||
if 'whatsapp' in channels:
|
||||
try:
|
||||
whatsapp_result = await self.whatsapp_service.send_notification_message(
|
||||
to_phone=notification.get('phone'),
|
||||
message=self._format_whatsapp_message(notification),
|
||||
notification_type=notification.get('type', 'alert')
|
||||
)
|
||||
results['whatsapp'] = whatsapp_result
|
||||
except Exception as e:
|
||||
logger.error("Failed to send WhatsApp",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
results['whatsapp'] = {'status': 'failed', 'error': str(e)}
|
||||
|
||||
# Send to push notification channel
|
||||
if 'push' in channels and self.push_service:
|
||||
try:
|
||||
push_result = await self.push_service.send_notification(
|
||||
user_id=notification.get('user_id'),
|
||||
title=notification.get('title'),
|
||||
body=notification.get('message'),
|
||||
data={
|
||||
'item_type': notification.get('type'),
|
||||
'severity': notification.get('severity'),
|
||||
'item_id': notification.get('id'),
|
||||
'metadata': notification.get('metadata', {})
|
||||
}
|
||||
)
|
||||
results['push'] = push_result
|
||||
except Exception as e:
|
||||
logger.error("Failed to send push notification",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
results['push'] = {'status': 'failed', 'error': str(e)}
|
||||
|
||||
# Log summary
|
||||
successful_channels = [ch for ch, result in results.items() if result.get('status') == 'sent']
|
||||
failed_channels = [ch for ch, result in results.items() if result.get('status') == 'failed']
|
||||
|
||||
logger.info("Notification delivery completed",
|
||||
tenant_id=tenant_id,
|
||||
item_type=notification.get('type'),
|
||||
item_id=notification.get('id'),
|
||||
successful_channels=successful_channels,
|
||||
failed_channels=failed_channels,
|
||||
total_channels=len(channels))
|
||||
|
||||
return {
|
||||
'status': 'completed',
|
||||
'successful_channels': successful_channels,
|
||||
'failed_channels': failed_channels,
|
||||
'results': results,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def _format_whatsapp_message(self, notification: Dict[str, Any]) -> str:
|
||||
"""Format message for WhatsApp with emojis and structure"""
|
||||
item_type = notification.get('type', 'alert')
|
||||
severity = notification.get('severity', 'medium')
|
||||
|
||||
# Get appropriate emoji
|
||||
type_emoji = '🚨' if item_type == 'alert' else '💡'
|
||||
severity_emoji = {
|
||||
'urgent': '🔴',
|
||||
'high': '🟡',
|
||||
'medium': '🔵',
|
||||
'low': '🟢'
|
||||
}.get(severity, '🔵')
|
||||
|
||||
message = f"{type_emoji} {severity_emoji} *{notification.get('title', 'Notificación')}*\n\n"
|
||||
message += f"{notification.get('message', '')}\n"
|
||||
|
||||
# Add actions if available
|
||||
actions = notification.get('actions', [])
|
||||
if actions and len(actions) > 0:
|
||||
message += "\n*Acciones sugeridas:*\n"
|
||||
for i, action in enumerate(actions[:3], 1): # Limit to 3 actions for WhatsApp
|
||||
message += f"{i}. {action}\n"
|
||||
|
||||
# Add timestamp
|
||||
message += f"\n_Enviado: {datetime.now().strftime('%H:%M, %d/%m/%Y')}_"
|
||||
|
||||
return message
|
||||
|
||||
def get_channels_by_severity(self, severity: str, item_type: str, hour: int = None) -> List[str]:
|
||||
"""
|
||||
Determine notification channels based on severity and item_type
|
||||
Now includes 'dashboard' as a channel
|
||||
"""
|
||||
if hour is None:
|
||||
hour = datetime.now().hour
|
||||
|
||||
# Dashboard always gets all items
|
||||
channels = ['dashboard']
|
||||
|
||||
if item_type == 'alert':
|
||||
if severity == 'urgent':
|
||||
# Urgent alerts: All channels immediately
|
||||
channels.extend(['email', 'whatsapp', 'push'])
|
||||
|
||||
elif severity == 'high':
|
||||
# High alerts: Email and WhatsApp during extended hours
|
||||
if 6 <= hour <= 22:
|
||||
channels.extend(['email', 'whatsapp'])
|
||||
else:
|
||||
channels.append('email') # Email only during night
|
||||
|
||||
elif severity == 'medium':
|
||||
# Medium alerts: Email during business hours
|
||||
if 7 <= hour <= 20:
|
||||
channels.append('email')
|
||||
|
||||
elif item_type == 'recommendation':
|
||||
# Recommendations: Generally less urgent, respect business hours
|
||||
if severity in ['medium', 'high']:
|
||||
if 8 <= hour <= 19: # Stricter business hours for recommendations
|
||||
channels.append('email')
|
||||
# Low/urgent: Dashboard only (urgent rare for recommendations)
|
||||
|
||||
return channels
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Check health of all notification channels"""
|
||||
health_status = {
|
||||
'status': 'healthy',
|
||||
'channels': {},
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Check email service
|
||||
try:
|
||||
email_health = await self.email_service.health_check()
|
||||
health_status['channels']['email'] = email_health
|
||||
except Exception as e:
|
||||
health_status['channels']['email'] = {'status': 'unhealthy', 'error': str(e)}
|
||||
|
||||
# Check WhatsApp service
|
||||
try:
|
||||
whatsapp_health = await self.whatsapp_service.health_check()
|
||||
health_status['channels']['whatsapp'] = whatsapp_health
|
||||
except Exception as e:
|
||||
health_status['channels']['whatsapp'] = {'status': 'unhealthy', 'error': str(e)}
|
||||
|
||||
# Check SSE service
|
||||
try:
|
||||
sse_metrics = self.sse_service.get_metrics()
|
||||
sse_status = 'healthy' if sse_metrics['redis_connected'] else 'unhealthy'
|
||||
health_status['channels']['sse'] = {
|
||||
'status': sse_status,
|
||||
'metrics': sse_metrics
|
||||
}
|
||||
except Exception as e:
|
||||
health_status['channels']['sse'] = {'status': 'unhealthy', 'error': str(e)}
|
||||
|
||||
# Check push service if available
|
||||
if self.push_service:
|
||||
try:
|
||||
push_health = await self.push_service.health_check()
|
||||
health_status['channels']['push'] = push_health
|
||||
except Exception as e:
|
||||
health_status['channels']['push'] = {'status': 'unhealthy', 'error': str(e)}
|
||||
|
||||
# Determine overall status
|
||||
unhealthy_channels = [
|
||||
ch for ch, status in health_status['channels'].items()
|
||||
if status.get('status') != 'healthy'
|
||||
]
|
||||
|
||||
if unhealthy_channels:
|
||||
health_status['status'] = 'degraded' if len(unhealthy_channels) < len(health_status['channels']) else 'unhealthy'
|
||||
health_status['unhealthy_channels'] = unhealthy_channels
|
||||
|
||||
return health_status
|
||||
|
||||
def get_metrics(self) -> Dict[str, Any]:
|
||||
"""Get aggregated metrics from all services"""
|
||||
metrics = {
|
||||
'timestamp': datetime.utcnow().isoformat(),
|
||||
'channels': {}
|
||||
}
|
||||
|
||||
# Get SSE metrics
|
||||
try:
|
||||
metrics['channels']['sse'] = self.sse_service.get_metrics()
|
||||
except Exception as e:
|
||||
logger.error("Failed to get SSE metrics", error=str(e))
|
||||
|
||||
# Additional metrics could be added here for other services
|
||||
|
||||
return metrics
|
||||
256
services/notification/app/services/sse_service.py
Normal file
256
services/notification/app/services/sse_service.py
Normal file
@@ -0,0 +1,256 @@
|
||||
# services/notification/app/services/sse_service.py
|
||||
"""
|
||||
Server-Sent Events service for real-time notifications
|
||||
Integrated within the notification service for alerts and recommendations
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from redis.asyncio import Redis
|
||||
import json
|
||||
from typing import Dict, Set, Any
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class SSEService:
|
||||
"""
|
||||
Server-Sent Events service for real-time notifications
|
||||
Handles both alerts and recommendations through unified SSE streams
|
||||
"""
|
||||
|
||||
def __init__(self, redis_url: str):
|
||||
self.redis_url = redis_url
|
||||
self.redis = None
|
||||
self.active_connections: Dict[str, Set[asyncio.Queue]] = {}
|
||||
self.pubsub_tasks: Dict[str, asyncio.Task] = {}
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize Redis connection"""
|
||||
try:
|
||||
self.redis = Redis.from_url(self.redis_url)
|
||||
logger.info("SSE Service initialized with Redis connection")
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize SSE service", error=str(e))
|
||||
raise
|
||||
|
||||
async def shutdown(self):
|
||||
"""Clean shutdown"""
|
||||
try:
|
||||
# Cancel all pubsub tasks
|
||||
for task in self.pubsub_tasks.values():
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Close all client connections
|
||||
for tenant_id, connections in self.active_connections.items():
|
||||
for queue in connections.copy():
|
||||
try:
|
||||
await queue.put({"event": "shutdown", "data": json.dumps({"status": "server_shutdown"})})
|
||||
except:
|
||||
pass
|
||||
|
||||
# Close Redis connection
|
||||
if self.redis:
|
||||
await self.redis.close()
|
||||
|
||||
logger.info("SSE Service shutdown completed")
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error during SSE shutdown", error=str(e))
|
||||
|
||||
async def add_client(self, tenant_id: str, client_queue: asyncio.Queue):
|
||||
"""Add a new SSE client connection"""
|
||||
try:
|
||||
if tenant_id not in self.active_connections:
|
||||
self.active_connections[tenant_id] = set()
|
||||
# Start pubsub listener for this tenant if not exists
|
||||
if tenant_id not in self.pubsub_tasks:
|
||||
task = asyncio.create_task(self._listen_to_tenant_channel(tenant_id))
|
||||
self.pubsub_tasks[tenant_id] = task
|
||||
|
||||
self.active_connections[tenant_id].add(client_queue)
|
||||
|
||||
client_count = len(self.active_connections[tenant_id])
|
||||
logger.info("SSE client added",
|
||||
tenant_id=tenant_id,
|
||||
total_clients=client_count)
|
||||
|
||||
# Send connection confirmation
|
||||
await client_queue.put({
|
||||
"event": "connected",
|
||||
"data": json.dumps({
|
||||
"status": "connected",
|
||||
"tenant_id": tenant_id,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"client_count": client_count
|
||||
})
|
||||
})
|
||||
|
||||
# Send any active items (alerts and recommendations)
|
||||
active_items = await self.get_active_items(tenant_id)
|
||||
if active_items:
|
||||
await client_queue.put({
|
||||
"event": "initial_items",
|
||||
"data": json.dumps(active_items)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error adding SSE client", tenant_id=tenant_id, error=str(e))
|
||||
|
||||
async def remove_client(self, tenant_id: str, client_queue: asyncio.Queue):
|
||||
"""Remove SSE client connection"""
|
||||
try:
|
||||
if tenant_id in self.active_connections:
|
||||
self.active_connections[tenant_id].discard(client_queue)
|
||||
|
||||
# If no more clients for this tenant, stop the pubsub listener
|
||||
if not self.active_connections[tenant_id]:
|
||||
del self.active_connections[tenant_id]
|
||||
if tenant_id in self.pubsub_tasks:
|
||||
task = self.pubsub_tasks[tenant_id]
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
del self.pubsub_tasks[tenant_id]
|
||||
|
||||
logger.info("SSE client removed", tenant_id=tenant_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error removing SSE client", tenant_id=tenant_id, error=str(e))
|
||||
|
||||
async def _listen_to_tenant_channel(self, tenant_id: str):
|
||||
"""Listen to Redis channel for tenant-specific items"""
|
||||
try:
|
||||
# Create a separate Redis connection for pubsub
|
||||
pubsub_redis = Redis.from_url(self.redis_url)
|
||||
pubsub = pubsub_redis.pubsub()
|
||||
channel = f"alerts:{tenant_id}"
|
||||
await pubsub.subscribe(channel)
|
||||
|
||||
logger.info("Started listening to tenant channel",
|
||||
tenant_id=tenant_id,
|
||||
channel=channel)
|
||||
|
||||
async for message in pubsub.listen():
|
||||
if message["type"] == "message":
|
||||
# Broadcast to all connected clients for this tenant
|
||||
await self.broadcast_to_tenant(tenant_id, message["data"])
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Stopped listening to tenant channel", tenant_id=tenant_id)
|
||||
except Exception as e:
|
||||
logger.error("Error in pubsub listener", tenant_id=tenant_id, error=str(e))
|
||||
finally:
|
||||
try:
|
||||
await pubsub.unsubscribe(channel)
|
||||
await pubsub_redis.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
async def broadcast_to_tenant(self, tenant_id: str, message: str):
|
||||
"""Broadcast message to all connected clients of a tenant"""
|
||||
if tenant_id not in self.active_connections:
|
||||
return
|
||||
|
||||
try:
|
||||
item_data = json.loads(message)
|
||||
event = {
|
||||
"event": item_data.get('item_type', 'item'), # 'alert' or 'recommendation'
|
||||
"data": json.dumps(item_data),
|
||||
"id": item_data.get("id")
|
||||
}
|
||||
|
||||
# Send to all connected clients
|
||||
disconnected = []
|
||||
for client_queue in self.active_connections[tenant_id]:
|
||||
try:
|
||||
# Use put_nowait to avoid blocking
|
||||
client_queue.put_nowait(event)
|
||||
except asyncio.QueueFull:
|
||||
logger.warning("Client queue full, dropping message", tenant_id=tenant_id)
|
||||
disconnected.append(client_queue)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to send to client", tenant_id=tenant_id, error=str(e))
|
||||
disconnected.append(client_queue)
|
||||
|
||||
# Clean up disconnected clients
|
||||
for queue in disconnected:
|
||||
await self.remove_client(tenant_id, queue)
|
||||
|
||||
if disconnected:
|
||||
logger.info("Cleaned up disconnected clients",
|
||||
tenant_id=tenant_id,
|
||||
count=len(disconnected))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error broadcasting to tenant", tenant_id=tenant_id, error=str(e))
|
||||
|
||||
async def send_item_notification(self, tenant_id: str, item: Dict[str, Any]):
|
||||
"""
|
||||
Send alert or recommendation via SSE (called by notification orchestrator)
|
||||
"""
|
||||
try:
|
||||
# Publish to Redis for SSE streaming
|
||||
channel = f"alerts:{tenant_id}"
|
||||
|
||||
item_message = {
|
||||
'id': item.get('id'),
|
||||
'item_type': item.get('type'), # 'alert' or 'recommendation'
|
||||
'type': item.get('alert_type', item.get('type')),
|
||||
'severity': item.get('severity'),
|
||||
'title': item.get('title'),
|
||||
'message': item.get('message'),
|
||||
'actions': item.get('actions', []),
|
||||
'metadata': item.get('metadata', {}),
|
||||
'timestamp': item.get('timestamp', datetime.utcnow().isoformat()),
|
||||
'status': 'active'
|
||||
}
|
||||
|
||||
await self.redis.publish(channel, json.dumps(item_message))
|
||||
|
||||
logger.info("Item published to SSE",
|
||||
tenant_id=tenant_id,
|
||||
item_type=item.get('type'),
|
||||
item_id=item.get('id'))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error sending item notification via SSE",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
|
||||
async def get_active_items(self, tenant_id: str) -> list:
|
||||
"""Fetch active alerts and recommendations from database"""
|
||||
try:
|
||||
# This would integrate with the actual database
|
||||
# For now, return empty list as placeholder
|
||||
# In real implementation, this would query the alerts table
|
||||
|
||||
# Example query:
|
||||
# query = """
|
||||
# SELECT id, item_type, alert_type, severity, title, message,
|
||||
# actions, metadata, created_at, status
|
||||
# FROM alerts
|
||||
# WHERE tenant_id = $1
|
||||
# AND status = 'active'
|
||||
# ORDER BY severity_weight DESC, created_at DESC
|
||||
# LIMIT 50
|
||||
# """
|
||||
|
||||
return [] # Placeholder
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error fetching active items", tenant_id=tenant_id, error=str(e))
|
||||
return []
|
||||
|
||||
def get_metrics(self) -> Dict[str, Any]:
|
||||
"""Get SSE service metrics"""
|
||||
return {
|
||||
"active_tenants": len(self.active_connections),
|
||||
"total_connections": sum(len(connections) for connections in self.active_connections.values()),
|
||||
"active_listeners": len(self.pubsub_tasks),
|
||||
"redis_connected": self.redis and not self.redis.closed
|
||||
}
|
||||
@@ -30,6 +30,17 @@ class WhatsAppService:
|
||||
self.from_number = settings.WHATSAPP_FROM_NUMBER
|
||||
self.enabled = settings.ENABLE_WHATSAPP_NOTIFICATIONS
|
||||
|
||||
def _parse_api_credentials(self):
|
||||
"""Parse API key into username and password for Twilio basic auth"""
|
||||
if not self.api_key or ":" not in self.api_key:
|
||||
raise ValueError("WhatsApp API key must be in format 'username:password'")
|
||||
|
||||
api_parts = self.api_key.split(":", 1)
|
||||
if len(api_parts) != 2:
|
||||
raise ValueError("Invalid WhatsApp API key format")
|
||||
|
||||
return api_parts[0], api_parts[1]
|
||||
|
||||
async def send_message(
|
||||
self,
|
||||
to_phone: str,
|
||||
@@ -181,10 +192,22 @@ class WhatsAppService:
|
||||
return False
|
||||
|
||||
# Test API connectivity with a simple request
|
||||
# Parse API key (expected format: username:password for Twilio basic auth)
|
||||
if ":" not in self.api_key:
|
||||
logger.error("WhatsApp API key must be in format 'username:password'")
|
||||
return False
|
||||
|
||||
api_parts = self.api_key.split(":", 1) # Split on first : only
|
||||
if len(api_parts) != 2:
|
||||
logger.error("Invalid WhatsApp API key format")
|
||||
return False
|
||||
|
||||
username, password = api_parts
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/v1/Account", # Twilio account info endpoint
|
||||
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
|
||||
auth=(username, password)
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
@@ -206,6 +229,13 @@ class WhatsAppService:
|
||||
async def _send_text_message(self, to_phone: str, message: str) -> bool:
|
||||
"""Send regular text message via Twilio"""
|
||||
try:
|
||||
# Parse API credentials
|
||||
try:
|
||||
username, password = self._parse_api_credentials()
|
||||
except ValueError as e:
|
||||
logger.error(f"WhatsApp API key configuration error: {e}")
|
||||
return False
|
||||
|
||||
# Prepare request data
|
||||
data = {
|
||||
"From": f"whatsapp:{self.from_number}",
|
||||
@@ -216,9 +246,9 @@ class WhatsAppService:
|
||||
# Send via Twilio API
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
|
||||
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
|
||||
data=data,
|
||||
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
|
||||
auth=(username, password)
|
||||
)
|
||||
|
||||
if response.status_code == 201:
|
||||
@@ -245,6 +275,13 @@ class WhatsAppService:
|
||||
) -> bool:
|
||||
"""Send WhatsApp template message via Twilio"""
|
||||
try:
|
||||
# Parse API credentials
|
||||
try:
|
||||
username, password = self._parse_api_credentials()
|
||||
except ValueError as e:
|
||||
logger.error(f"WhatsApp API key configuration error: {e}")
|
||||
return False
|
||||
|
||||
# Prepare template data
|
||||
content_variables = {str(i+1): param for i, param in enumerate(parameters)}
|
||||
|
||||
@@ -258,9 +295,9 @@ class WhatsAppService:
|
||||
# Send via Twilio API
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
|
||||
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
|
||||
data=data,
|
||||
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
|
||||
auth=(username, password)
|
||||
)
|
||||
|
||||
if response.status_code == 201:
|
||||
@@ -315,10 +352,17 @@ class WhatsAppService:
|
||||
async def _get_message_status(self, message_sid: str) -> Optional[str]:
|
||||
"""Get message delivery status from Twilio"""
|
||||
try:
|
||||
# Parse API credentials
|
||||
try:
|
||||
username, password = self._parse_api_credentials()
|
||||
except ValueError as e:
|
||||
logger.error(f"WhatsApp API key configuration error: {e}")
|
||||
return None
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages/{message_sid}.json",
|
||||
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
|
||||
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages/{message_sid}.json",
|
||||
auth=(username, password)
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
|
||||
@@ -3,6 +3,7 @@ fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
pydantic==2.5.0
|
||||
pydantic-settings==2.1.0
|
||||
sse-starlette==1.6.5
|
||||
|
||||
# Database
|
||||
sqlalchemy==2.0.23
|
||||
@@ -22,8 +23,9 @@ aiofiles==23.2.1
|
||||
aiosmtplib==3.0.1
|
||||
email-validator==2.1.0
|
||||
|
||||
# Messaging
|
||||
# Messaging & Redis
|
||||
aio-pika==9.3.1
|
||||
redis==5.0.1
|
||||
|
||||
# Template Engine
|
||||
jinja2==3.1.2
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
# Orders Service
|
||||
|
||||
Customer orders and procurement planning service for the bakery management system.
|
||||
|
||||
## Overview
|
||||
|
||||
The Orders Service handles all order-related operations including:
|
||||
|
||||
- **Customer Management**: Complete customer lifecycle and relationship management
|
||||
- **Order Processing**: End-to-end order management from creation to fulfillment
|
||||
- **Procurement Planning**: Automated procurement requirement calculation and planning
|
||||
- **Business Intelligence**: Order pattern analysis and business model detection
|
||||
- **Dashboard Analytics**: Comprehensive reporting and metrics for order operations
|
||||
|
||||
## Features
|
||||
|
||||
### Core Capabilities
|
||||
- Customer registration and management with detailed profiles
|
||||
- Order creation, tracking, and status management
|
||||
- Automated demand requirements calculation for production planning
|
||||
- Procurement planning with supplier coordination
|
||||
- Business model detection (individual bakery vs central bakery)
|
||||
- Comprehensive dashboard with real-time metrics
|
||||
- Integration with production, inventory, suppliers, and sales services
|
||||
|
||||
### API Endpoints
|
||||
|
||||
#### Dashboard & Analytics
|
||||
- `GET /api/v1/tenants/{tenant_id}/orders/dashboard-summary` - Comprehensive dashboard data
|
||||
- `GET /api/v1/tenants/{tenant_id}/orders/demand-requirements` - Demand analysis for production
|
||||
- `GET /api/v1/tenants/{tenant_id}/orders/business-model` - Business model detection
|
||||
|
||||
#### Order Management
|
||||
- `POST /api/v1/tenants/{tenant_id}/orders` - Create new customer order
|
||||
- `GET /api/v1/tenants/{tenant_id}/orders` - List orders with filtering and pagination
|
||||
- `GET /api/v1/tenants/{tenant_id}/orders/{order_id}` - Get order details with items
|
||||
- `PUT /api/v1/tenants/{tenant_id}/orders/{order_id}/status` - Update order status
|
||||
|
||||
#### Customer Management
|
||||
- `POST /api/v1/tenants/{tenant_id}/customers` - Create new customer
|
||||
- `GET /api/v1/tenants/{tenant_id}/customers` - List customers with filtering
|
||||
- `GET /api/v1/tenants/{tenant_id}/customers/{customer_id}` - Get customer details
|
||||
|
||||
#### Health & Status
|
||||
- `GET /api/v1/tenants/{tenant_id}/orders/status` - Service status information
|
||||
|
||||
## Service Integration
|
||||
|
||||
### Shared Clients Used
|
||||
- **InventoryServiceClient**: Stock levels, product availability validation
|
||||
- **ProductionServiceClient**: Production notifications, capacity planning
|
||||
- **SalesServiceClient**: Historical sales data for demand forecasting
|
||||
- **NotificationServiceClient**: Customer notifications and alerts
|
||||
|
||||
### Authentication
|
||||
Uses shared authentication patterns with tenant isolation:
|
||||
- JWT token validation
|
||||
- Tenant access verification
|
||||
- User permission checks
|
||||
|
||||
## Configuration
|
||||
|
||||
Key configuration options in `app/core/config.py`:
|
||||
|
||||
### Order Processing
|
||||
- `ORDER_PROCESSING_ENABLED`: Enable automatic order processing (default: true)
|
||||
- `AUTO_APPROVE_ORDERS`: Automatically approve orders (default: false)
|
||||
- `MAX_ORDER_ITEMS`: Maximum items per order (default: 50)
|
||||
|
||||
### Procurement Planning
|
||||
- `PROCUREMENT_PLANNING_ENABLED`: Enable procurement planning (default: true)
|
||||
- `PROCUREMENT_LEAD_TIME_DAYS`: Standard procurement lead time (default: 3)
|
||||
- `DEMAND_FORECAST_DAYS`: Days for demand forecasting (default: 14)
|
||||
- `SAFETY_STOCK_PERCENTAGE`: Safety stock buffer (default: 20%)
|
||||
|
||||
### Business Model Detection
|
||||
- `ENABLE_BUSINESS_MODEL_DETECTION`: Enable automatic detection (default: true)
|
||||
- `CENTRAL_BAKERY_ORDER_THRESHOLD`: Order threshold for central bakery (default: 20)
|
||||
- `INDIVIDUAL_BAKERY_ORDER_THRESHOLD`: Order threshold for individual bakery (default: 5)
|
||||
|
||||
### Customer Management
|
||||
- `CUSTOMER_VALIDATION_ENABLED`: Enable customer validation (default: true)
|
||||
- `MAX_CUSTOMERS_PER_TENANT`: Maximum customers per tenant (default: 10000)
|
||||
- `CUSTOMER_CREDIT_CHECK_ENABLED`: Enable credit checking (default: false)
|
||||
|
||||
### Order Validation
|
||||
- `MIN_ORDER_VALUE`: Minimum order value (default: 0.0)
|
||||
- `MAX_ORDER_VALUE`: Maximum order value (default: 100000.0)
|
||||
- `VALIDATE_PRODUCT_AVAILABILITY`: Check product availability (default: true)
|
||||
|
||||
### Alert Thresholds
|
||||
- `HIGH_VALUE_ORDER_THRESHOLD`: High-value order alert (default: 5000.0)
|
||||
- `LARGE_QUANTITY_ORDER_THRESHOLD`: Large quantity alert (default: 100)
|
||||
- `RUSH_ORDER_HOURS_THRESHOLD`: Rush order time threshold (default: 24)
|
||||
- `PROCUREMENT_SHORTAGE_THRESHOLD`: Procurement shortage alert (default: 90%)
|
||||
|
||||
### Payment and Pricing
|
||||
- `PAYMENT_VALIDATION_ENABLED`: Enable payment validation (default: true)
|
||||
- `DYNAMIC_PRICING_ENABLED`: Enable dynamic pricing (default: false)
|
||||
- `DISCOUNT_ENABLED`: Enable discounts (default: true)
|
||||
- `MAX_DISCOUNT_PERCENTAGE`: Maximum discount allowed (default: 50%)
|
||||
|
||||
### Delivery and Fulfillment
|
||||
- `DELIVERY_TRACKING_ENABLED`: Enable delivery tracking (default: true)
|
||||
- `DEFAULT_DELIVERY_WINDOW_HOURS`: Default delivery window (default: 48)
|
||||
- `PICKUP_ENABLED`: Enable pickup orders (default: true)
|
||||
- `DELIVERY_ENABLED`: Enable delivery orders (default: true)
|
||||
|
||||
## Database Models
|
||||
|
||||
### Customer
|
||||
- Complete customer profile with contact information
|
||||
- Business type classification (individual, business, central_bakery)
|
||||
- Payment terms and credit management
|
||||
- Order history and metrics tracking
|
||||
- Delivery preferences and special requirements
|
||||
|
||||
### CustomerOrder
|
||||
- Comprehensive order tracking from creation to delivery
|
||||
- Status management with full audit trail
|
||||
- Financial calculations including discounts and taxes
|
||||
- Delivery scheduling and fulfillment tracking
|
||||
- Business model detection and categorization
|
||||
- Customer communication preferences
|
||||
|
||||
### OrderItem
|
||||
- Detailed line item tracking with product specifications
|
||||
- Customization and special instruction support
|
||||
- Production requirement integration
|
||||
- Cost tracking and margin analysis
|
||||
- Quality control integration
|
||||
|
||||
### OrderStatusHistory
|
||||
- Complete audit trail of order status changes
|
||||
- Event tracking with detailed context
|
||||
- User attribution and change reasons
|
||||
- Customer notification tracking
|
||||
|
||||
### ProcurementPlan
|
||||
- Master procurement planning with business model context
|
||||
- Supplier diversification and risk assessment
|
||||
- Performance tracking and cost analysis
|
||||
- Integration with demand forecasting
|
||||
|
||||
### ProcurementRequirement
|
||||
- Detailed procurement requirements per product/ingredient
|
||||
- Current inventory level integration
|
||||
- Supplier preference and lead time management
|
||||
- Quality specifications and special requirements
|
||||
|
||||
### OrderAlert
|
||||
- Comprehensive alert system for order issues
|
||||
- Multiple severity levels with appropriate routing
|
||||
- Business impact assessment
|
||||
- Resolution tracking and performance metrics
|
||||
|
||||
## Business Logic
|
||||
|
||||
### Order Processing Flow
|
||||
1. **Order Creation**: Validate customer, calculate totals, create order record
|
||||
2. **Item Processing**: Create order items with specifications and requirements
|
||||
3. **Status Tracking**: Maintain complete audit trail of status changes
|
||||
4. **Customer Metrics**: Update customer statistics and relationship data
|
||||
5. **Business Model Detection**: Analyze patterns to determine bakery type
|
||||
6. **Alert Generation**: Check for high-value, rush, or large orders
|
||||
7. **Service Integration**: Notify production and inventory services
|
||||
|
||||
### Procurement Planning
|
||||
1. **Demand Analysis**: Aggregate orders by delivery date and products
|
||||
2. **Inventory Integration**: Check current stock levels and reservations
|
||||
3. **Requirement Calculation**: Calculate net procurement needs with safety buffer
|
||||
4. **Supplier Coordination**: Match requirements with preferred suppliers
|
||||
5. **Lead Time Planning**: Account for supplier lead times and delivery windows
|
||||
6. **Risk Assessment**: Evaluate supply risks and backup options
|
||||
|
||||
### Business Model Detection
|
||||
- **Individual Bakery**: Low order volume, direct customer sales, standard products
|
||||
- **Central Bakery**: High volume, wholesale operations, bulk orders
|
||||
- **Detection Factors**: Order frequency, quantity, customer types, sales channels
|
||||
|
||||
## Alert System
|
||||
|
||||
### Alert Types
|
||||
- **High Value Orders**: Orders exceeding configured thresholds
|
||||
- **Rush Orders**: Orders with tight delivery requirements
|
||||
- **Large Quantity Orders**: Orders with unusually high item counts
|
||||
- **Payment Issues**: Payment validation failures or credit problems
|
||||
- **Procurement Shortages**: Insufficient inventory for order fulfillment
|
||||
- **Customer Issues**: New customers, credit limit exceedances, special requirements
|
||||
|
||||
### Severity Levels
|
||||
- **Critical**: WhatsApp + Email + Dashboard + SMS
|
||||
- **High**: WhatsApp + Email + Dashboard
|
||||
- **Medium**: Email + Dashboard
|
||||
- **Low**: Dashboard only
|
||||
|
||||
## Development
|
||||
|
||||
### Setup
|
||||
```bash
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Set up database
|
||||
# Configure ORDERS_DATABASE_URL environment variable
|
||||
|
||||
# Run migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Start service
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
### Testing
|
||||
```bash
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=app
|
||||
```
|
||||
|
||||
### Docker
|
||||
```bash
|
||||
# Build image
|
||||
docker build -t orders-service .
|
||||
|
||||
# Run container
|
||||
docker run -p 8000:8000 orders-service
|
||||
```
|
||||
|
||||
## Deployment
|
||||
|
||||
The service is designed for containerized deployment with:
|
||||
- Health checks at `/health`
|
||||
- Structured logging
|
||||
- Metrics collection
|
||||
- Database migrations
|
||||
- Service discovery integration
|
||||
|
||||
## Architecture
|
||||
|
||||
Follows Domain-Driven Microservices Architecture:
|
||||
- Clean separation of concerns
|
||||
- Repository pattern for data access
|
||||
- Service layer for business logic
|
||||
- API layer for external interface
|
||||
- Shared infrastructure for cross-cutting concerns
|
||||
@@ -5,7 +5,7 @@
|
||||
Orders Service Database Configuration
|
||||
"""
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
|
||||
from sqlalchemy.orm import sessionmaker, DeclarativeBase
|
||||
import structlog
|
||||
@@ -72,7 +72,7 @@ async def get_db_health() -> bool:
|
||||
"""Check database health"""
|
||||
try:
|
||||
async with async_engine.begin() as conn:
|
||||
await conn.execute("SELECT 1")
|
||||
await conn.execute(text("SELECT 1"))
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Database health check failed", error=str(e))
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
# POS Integration Service
|
||||
|
||||
This service handles integration with external Point of Sale (POS) systems for the Bakery IA platform.
|
||||
|
||||
## Supported POS Systems
|
||||
|
||||
- **Square POS** - Popular payment and POS solution with strong API support
|
||||
- **Toast POS** - Restaurant-focused POS system with comprehensive features
|
||||
- **Lightspeed Restaurant** - Full-featured restaurant management system
|
||||
|
||||
## Features
|
||||
|
||||
- **Real-time webhook handling** from POS systems
|
||||
- **Bidirectional data synchronization** with sales service
|
||||
- **Secure credential management** with encryption
|
||||
- **Multi-tenant support** with tenant-specific configurations
|
||||
- **Comprehensive transaction logging** and audit trails
|
||||
- **Automatic duplicate detection** and handling
|
||||
- **Rate limiting and retry mechanisms** for reliability
|
||||
|
||||
## Architecture
|
||||
|
||||
The POS service follows the established microservices architecture:
|
||||
|
||||
```
|
||||
POS Service
|
||||
├── API Layer (FastAPI)
|
||||
├── Business Logic (Services)
|
||||
├── Data Access (Repositories)
|
||||
├── External Integrations (POS Providers)
|
||||
├── Webhook Handlers
|
||||
└── Background Sync Jobs
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Configuration Management
|
||||
- `GET /api/v1/tenants/{tenant_id}/pos/configurations` - List POS configurations
|
||||
- `POST /api/v1/tenants/{tenant_id}/pos/configurations` - Create new configuration
|
||||
- `PUT /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Update configuration
|
||||
- `DELETE /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Delete configuration
|
||||
|
||||
### Webhook Handling
|
||||
- `POST /api/v1/webhooks/{pos_system}` - Receive webhooks from POS systems
|
||||
- `GET /api/v1/webhooks/{pos_system}/status` - Get webhook status
|
||||
|
||||
### Data Synchronization
|
||||
- `POST /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync` - Trigger sync
|
||||
- `GET /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync/status` - Get sync status
|
||||
- `GET /api/v1/tenants/{tenant_id}/pos/transactions` - Get POS transactions
|
||||
|
||||
## Database Schema
|
||||
|
||||
### Core Tables
|
||||
- `pos_configurations` - POS system configurations per tenant
|
||||
- `pos_transactions` - Transaction data from POS systems
|
||||
- `pos_transaction_items` - Individual items within transactions
|
||||
- `pos_webhook_logs` - Webhook event logs
|
||||
- `pos_sync_logs` - Synchronization operation logs
|
||||
|
||||
## Environment Variables
|
||||
|
||||
See `app/core/config.py` for all configuration options. Key variables include:
|
||||
|
||||
```bash
|
||||
# Database
|
||||
POS_DATABASE_URL=postgresql+asyncpg://pos_user:pos_pass123@pos-db:5432/pos_db
|
||||
|
||||
# POS Provider Credentials
|
||||
SQUARE_APPLICATION_ID=your_square_app_id
|
||||
SQUARE_ACCESS_TOKEN=your_square_token
|
||||
TOAST_CLIENT_ID=your_toast_client_id
|
||||
LIGHTSPEED_CLIENT_ID=your_lightspeed_client_id
|
||||
|
||||
# Webhook Configuration
|
||||
WEBHOOK_BASE_URL=https://your-domain.com
|
||||
WEBHOOK_SECRET=your_webhook_secret
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Running the Service
|
||||
|
||||
```bash
|
||||
# Using Docker Compose (recommended)
|
||||
docker-compose up pos-service
|
||||
|
||||
# Local development
|
||||
cd services/pos
|
||||
pip install -r requirements.txt
|
||||
uvicorn app.main:app --reload --port 8000
|
||||
```
|
||||
|
||||
### Database Migrations
|
||||
|
||||
```bash
|
||||
# Create migration
|
||||
alembic revision --autogenerate -m "Description"
|
||||
|
||||
# Apply migrations
|
||||
alembic upgrade head
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
# Run tests
|
||||
pytest tests/
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=app tests/
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- POS credentials are encrypted before storage
|
||||
- Webhook signatures are verified for authenticity
|
||||
- All API endpoints require tenant-based authentication
|
||||
- Rate limiting prevents abuse
|
||||
- Sensitive data is logged with appropriate redaction
|
||||
|
||||
## Monitoring
|
||||
|
||||
The service includes comprehensive monitoring:
|
||||
|
||||
- Health check endpoints
|
||||
- Prometheus metrics
|
||||
- Structured logging
|
||||
- Performance tracking
|
||||
- Error rate monitoring
|
||||
|
||||
## Integration Flow
|
||||
|
||||
1. **Configuration**: Set up POS system credentials via API
|
||||
2. **Webhook Registration**: Register webhook URLs with POS providers
|
||||
3. **Real-time Events**: Receive and process webhook events
|
||||
4. **Data Sync**: Periodic synchronization of transaction data
|
||||
5. **Sales Integration**: Forward processed data to sales service
|
||||
@@ -1,187 +0,0 @@
|
||||
# Production Service
|
||||
|
||||
Production planning and batch management service for the bakery management system.
|
||||
|
||||
## Overview
|
||||
|
||||
The Production Service handles all production-related operations including:
|
||||
|
||||
- **Production Planning**: Calculate daily requirements using demand forecasts and inventory levels
|
||||
- **Batch Management**: Track production batches from start to finish
|
||||
- **Capacity Management**: Equipment, staff, and time scheduling
|
||||
- **Quality Control**: Yield tracking, waste management, efficiency metrics
|
||||
- **Alert System**: Comprehensive monitoring and notifications
|
||||
|
||||
## Features
|
||||
|
||||
### Core Capabilities
|
||||
- Daily production requirements calculation
|
||||
- Production batch lifecycle management
|
||||
- Real-time capacity planning and utilization
|
||||
- Quality control tracking and metrics
|
||||
- Comprehensive alert system with multiple severity levels
|
||||
- Integration with inventory, orders, recipes, and sales services
|
||||
|
||||
### API Endpoints
|
||||
|
||||
#### Dashboard & Planning
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/dashboard-summary` - Production dashboard data
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/daily-requirements` - Daily production planning
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/requirements` - Requirements for procurement
|
||||
|
||||
#### Batch Management
|
||||
- `POST /api/v1/tenants/{tenant_id}/production/batches` - Create production batch
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/batches/active` - Get active batches
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/batches/{batch_id}` - Get batch details
|
||||
- `PUT /api/v1/tenants/{tenant_id}/production/batches/{batch_id}/status` - Update batch status
|
||||
|
||||
#### Scheduling & Capacity
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/schedule` - Production schedule
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/capacity/status` - Capacity status
|
||||
|
||||
#### Alerts & Monitoring
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/alerts` - Production alerts
|
||||
- `POST /api/v1/tenants/{tenant_id}/production/alerts/{alert_id}/acknowledge` - Acknowledge alerts
|
||||
|
||||
#### Analytics
|
||||
- `GET /api/v1/tenants/{tenant_id}/production/metrics/yield` - Yield metrics
|
||||
|
||||
## Service Integration
|
||||
|
||||
### Shared Clients Used
|
||||
- **InventoryServiceClient**: Stock levels, ingredient availability
|
||||
- **OrdersServiceClient**: Demand requirements, customer orders
|
||||
- **RecipesServiceClient**: Recipe requirements, ingredient calculations
|
||||
- **SalesServiceClient**: Historical sales data
|
||||
- **NotificationServiceClient**: Alert notifications
|
||||
|
||||
### Authentication
|
||||
Uses shared authentication patterns with tenant isolation:
|
||||
- JWT token validation
|
||||
- Tenant access verification
|
||||
- User permission checks
|
||||
|
||||
## Configuration
|
||||
|
||||
Key configuration options in `app/core/config.py`:
|
||||
|
||||
### Production Planning
|
||||
- `PLANNING_HORIZON_DAYS`: Days ahead for planning (default: 7)
|
||||
- `PRODUCTION_BUFFER_PERCENTAGE`: Safety buffer for production (default: 10%)
|
||||
- `MINIMUM_BATCH_SIZE`: Minimum batch size (default: 1.0)
|
||||
- `MAXIMUM_BATCH_SIZE`: Maximum batch size (default: 100.0)
|
||||
|
||||
### Capacity Management
|
||||
- `DEFAULT_WORKING_HOURS_PER_DAY`: Standard working hours (default: 12)
|
||||
- `MAX_OVERTIME_HOURS`: Maximum overtime allowed (default: 4)
|
||||
- `CAPACITY_UTILIZATION_TARGET`: Target utilization (default: 85%)
|
||||
|
||||
### Quality Control
|
||||
- `MINIMUM_YIELD_PERCENTAGE`: Minimum acceptable yield (default: 85%)
|
||||
- `QUALITY_SCORE_THRESHOLD`: Minimum quality score (default: 8.0)
|
||||
|
||||
### Alert Thresholds
|
||||
- `CAPACITY_EXCEEDED_THRESHOLD`: Capacity alert threshold (default: 100%)
|
||||
- `PRODUCTION_DELAY_THRESHOLD_MINUTES`: Delay alert threshold (default: 60)
|
||||
- `LOW_YIELD_ALERT_THRESHOLD`: Low yield alert (default: 80%)
|
||||
|
||||
## Database Models
|
||||
|
||||
### ProductionBatch
|
||||
- Complete batch tracking from planning to completion
|
||||
- Status management (pending, in_progress, completed, etc.)
|
||||
- Cost tracking and yield calculations
|
||||
- Quality metrics integration
|
||||
|
||||
### ProductionSchedule
|
||||
- Daily production scheduling
|
||||
- Capacity planning and tracking
|
||||
- Staff and equipment assignments
|
||||
- Performance metrics
|
||||
|
||||
### ProductionCapacity
|
||||
- Resource availability tracking
|
||||
- Equipment and staff capacity
|
||||
- Maintenance scheduling
|
||||
- Utilization monitoring
|
||||
|
||||
### QualityCheck
|
||||
- Quality control measurements
|
||||
- Pass/fail tracking
|
||||
- Defect recording
|
||||
- Corrective action management
|
||||
|
||||
### ProductionAlert
|
||||
- Comprehensive alert system
|
||||
- Multiple severity levels
|
||||
- Action recommendations
|
||||
- Resolution tracking
|
||||
|
||||
## Alert System
|
||||
|
||||
### Alert Types
|
||||
- **Capacity Exceeded**: When production requirements exceed available capacity
|
||||
- **Production Delay**: When batches are delayed beyond thresholds
|
||||
- **Cost Spike**: When production costs exceed normal ranges
|
||||
- **Low Yield**: When yield percentages fall below targets
|
||||
- **Quality Issues**: When quality scores consistently decline
|
||||
- **Equipment Maintenance**: When equipment needs maintenance
|
||||
|
||||
### Severity Levels
|
||||
- **Critical**: WhatsApp + Email + Dashboard + SMS
|
||||
- **High**: WhatsApp + Email + Dashboard
|
||||
- **Medium**: Email + Dashboard
|
||||
- **Low**: Dashboard only
|
||||
|
||||
## Development
|
||||
|
||||
### Setup
|
||||
```bash
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Set up database
|
||||
# Configure DATABASE_URL environment variable
|
||||
|
||||
# Run migrations
|
||||
alembic upgrade head
|
||||
|
||||
# Start service
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
### Testing
|
||||
```bash
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=app
|
||||
```
|
||||
|
||||
### Docker
|
||||
```bash
|
||||
# Build image
|
||||
docker build -t production-service .
|
||||
|
||||
# Run container
|
||||
docker run -p 8000:8000 production-service
|
||||
```
|
||||
|
||||
## Deployment
|
||||
|
||||
The service is designed for containerized deployment with:
|
||||
- Health checks at `/health`
|
||||
- Structured logging
|
||||
- Metrics collection
|
||||
- Database migrations
|
||||
- Service discovery integration
|
||||
|
||||
## Architecture
|
||||
|
||||
Follows Domain-Driven Microservices Architecture:
|
||||
- Clean separation of concerns
|
||||
- Repository pattern for data access
|
||||
- Service layer for business logic
|
||||
- API layer for external interface
|
||||
- Shared infrastructure for cross-cutting concerns
|
||||
@@ -14,6 +14,7 @@ import structlog
|
||||
from app.core.config import settings
|
||||
from app.core.database import init_database, get_db_health
|
||||
from app.api.production import router as production_router
|
||||
from app.services.production_alert_service import ProductionAlertService
|
||||
|
||||
# Configure logging
|
||||
logger = structlog.get_logger()
|
||||
@@ -25,6 +26,16 @@ async def lifespan(app: FastAPI):
|
||||
# Startup
|
||||
try:
|
||||
await init_database()
|
||||
logger.info("Database initialized")
|
||||
|
||||
# Initialize alert service
|
||||
alert_service = ProductionAlertService(settings)
|
||||
await alert_service.start()
|
||||
logger.info("Production alert service started")
|
||||
|
||||
# Store alert service in app state
|
||||
app.state.alert_service = alert_service
|
||||
|
||||
logger.info("Production service started successfully")
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize production service", error=str(e))
|
||||
@@ -34,6 +45,13 @@ async def lifespan(app: FastAPI):
|
||||
|
||||
# Shutdown
|
||||
logger.info("Production service shutting down")
|
||||
try:
|
||||
# Stop alert service
|
||||
if hasattr(app.state, 'alert_service'):
|
||||
await app.state.alert_service.stop()
|
||||
logger.info("Alert service stopped")
|
||||
except Exception as e:
|
||||
logger.error("Error during shutdown", error=str(e))
|
||||
|
||||
|
||||
# Create FastAPI application
|
||||
|
||||
795
services/production/app/services/production_alert_service.py
Normal file
795
services/production/app/services/production_alert_service.py
Normal file
@@ -0,0 +1,795 @@
|
||||
# services/production/app/services/production_alert_service.py
|
||||
"""
|
||||
Production-specific alert and recommendation detection service
|
||||
Monitors production capacity, delays, quality issues, and optimization opportunities
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional
|
||||
from uuid import UUID
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
|
||||
from shared.alerts.templates import format_item_message
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class ProductionAlertService(BaseAlertService, AlertServiceMixin):
|
||||
"""Production service alert and recommendation detection"""
|
||||
|
||||
def setup_scheduled_checks(self):
|
||||
"""Production-specific scheduled checks for alerts and recommendations"""
|
||||
|
||||
# Production capacity checks - every 10 minutes during business hours (alerts)
|
||||
self.scheduler.add_job(
|
||||
self.check_production_capacity,
|
||||
CronTrigger(minute='*/10', hour='6-20'),
|
||||
id='capacity_check',
|
||||
misfire_grace_time=60,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Production delays - every 5 minutes during production hours (alerts)
|
||||
self.scheduler.add_job(
|
||||
self.check_production_delays,
|
||||
CronTrigger(minute='*/5', hour='4-22'),
|
||||
id='delay_check',
|
||||
misfire_grace_time=30,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Quality issues check - every 15 minutes (alerts)
|
||||
self.scheduler.add_job(
|
||||
self.check_quality_issues,
|
||||
CronTrigger(minute='*/15'),
|
||||
id='quality_check',
|
||||
misfire_grace_time=60,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Equipment monitoring - every 3 minutes (alerts)
|
||||
self.scheduler.add_job(
|
||||
self.check_equipment_status,
|
||||
CronTrigger(minute='*/3'),
|
||||
id='equipment_check',
|
||||
misfire_grace_time=30,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Efficiency recommendations - every 30 minutes (recommendations)
|
||||
self.scheduler.add_job(
|
||||
self.generate_efficiency_recommendations,
|
||||
CronTrigger(minute='*/30'),
|
||||
id='efficiency_recs',
|
||||
misfire_grace_time=120,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
# Energy optimization - every hour (recommendations)
|
||||
self.scheduler.add_job(
|
||||
self.generate_energy_recommendations,
|
||||
CronTrigger(minute='0'),
|
||||
id='energy_recs',
|
||||
misfire_grace_time=300,
|
||||
max_instances=1
|
||||
)
|
||||
|
||||
logger.info("Production alert schedules configured",
|
||||
service=self.config.SERVICE_NAME)
|
||||
|
||||
async def check_production_capacity(self):
|
||||
"""Check if production plan exceeds capacity (alerts)"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
query = """
|
||||
WITH capacity_analysis AS (
|
||||
SELECT
|
||||
p.tenant_id,
|
||||
p.planned_date,
|
||||
SUM(p.planned_quantity) as total_planned,
|
||||
MAX(pc.daily_capacity) as max_daily_capacity,
|
||||
COUNT(DISTINCT p.equipment_id) as equipment_count,
|
||||
AVG(pc.efficiency_percent) as avg_efficiency,
|
||||
CASE
|
||||
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 1.2 THEN 'severe_overload'
|
||||
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) THEN 'overload'
|
||||
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 0.9 THEN 'near_capacity'
|
||||
ELSE 'normal'
|
||||
END as capacity_status,
|
||||
(SUM(p.planned_quantity) / MAX(pc.daily_capacity)) * 100 as capacity_percentage
|
||||
FROM production_schedule p
|
||||
JOIN production_capacity pc ON pc.equipment_id = p.equipment_id
|
||||
WHERE p.planned_date >= CURRENT_DATE
|
||||
AND p.planned_date <= CURRENT_DATE + INTERVAL '3 days'
|
||||
AND p.status IN ('planned', 'in_progress')
|
||||
AND p.tenant_id = $1
|
||||
GROUP BY p.tenant_id, p.planned_date
|
||||
)
|
||||
SELECT * FROM capacity_analysis
|
||||
WHERE capacity_status != 'normal'
|
||||
ORDER BY capacity_percentage DESC
|
||||
"""
|
||||
|
||||
# Check production capacity without tenant dependencies
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
# Simplified query using only existing production tables
|
||||
simplified_query = text("""
|
||||
SELECT
|
||||
pb.tenant_id,
|
||||
DATE(pb.planned_start_time) as planned_date,
|
||||
COUNT(*) as batch_count,
|
||||
SUM(pb.planned_quantity) as total_planned,
|
||||
'capacity_check' as capacity_status
|
||||
FROM production_batches pb
|
||||
WHERE pb.planned_start_time >= CURRENT_DATE
|
||||
AND pb.planned_start_time <= CURRENT_DATE + INTERVAL '3 days'
|
||||
AND pb.status IN ('planned', 'pending', 'in_progress')
|
||||
GROUP BY pb.tenant_id, DATE(pb.planned_start_time)
|
||||
HAVING COUNT(*) > 10 -- Alert if more than 10 batches per day
|
||||
ORDER BY total_planned DESC
|
||||
""")
|
||||
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(simplified_query)
|
||||
capacity_issues = result.fetchall()
|
||||
|
||||
for issue in capacity_issues:
|
||||
await self._process_capacity_issue(issue.tenant_id, issue)
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Simplified capacity check failed", error=str(e))
|
||||
|
||||
except Exception as e:
|
||||
# Skip capacity checks if tables don't exist (graceful degradation)
|
||||
if "does not exist" in str(e):
|
||||
logger.debug("Capacity check skipped - missing tables", error=str(e))
|
||||
else:
|
||||
logger.error("Capacity check failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _process_capacity_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
|
||||
"""Process capacity overload issue"""
|
||||
try:
|
||||
status = issue['capacity_status']
|
||||
percentage = issue['capacity_percentage']
|
||||
|
||||
if status == 'severe_overload':
|
||||
template_data = self.format_spanish_message(
|
||||
'order_overload',
|
||||
percentage=int(percentage - 100)
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'severe_capacity_overload',
|
||||
'severity': 'urgent',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'planned_date': issue['planned_date'].isoformat(),
|
||||
'capacity_percentage': float(percentage),
|
||||
'overload_percentage': float(percentage - 100),
|
||||
'equipment_count': issue['equipment_count']
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
elif status == 'overload':
|
||||
severity = self.get_business_hours_severity('high')
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'capacity_overload',
|
||||
'severity': severity,
|
||||
'title': f'⚠️ Capacidad Excedida: {percentage:.0f}%',
|
||||
'message': f'Producción planificada para {issue["planned_date"]} excede capacidad en {percentage-100:.0f}%.',
|
||||
'actions': ['Redistribuir cargas', 'Ampliar turnos', 'Subcontratar', 'Posponer pedidos'],
|
||||
'metadata': {
|
||||
'planned_date': issue['planned_date'].isoformat(),
|
||||
'capacity_percentage': float(percentage),
|
||||
'equipment_count': issue['equipment_count']
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
elif status == 'near_capacity':
|
||||
severity = self.get_business_hours_severity('medium')
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'near_capacity',
|
||||
'severity': severity,
|
||||
'title': f'📊 Cerca de Capacidad Máxima: {percentage:.0f}%',
|
||||
'message': f'Producción del {issue["planned_date"]} está al {percentage:.0f}% de capacidad. Monitorear de cerca.',
|
||||
'actions': ['Revisar planificación', 'Preparar contingencias', 'Optimizar eficiencia'],
|
||||
'metadata': {
|
||||
'planned_date': issue['planned_date'].isoformat(),
|
||||
'capacity_percentage': float(percentage)
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing capacity issue", error=str(e))
|
||||
|
||||
async def check_production_delays(self):
|
||||
"""Check for production delays (alerts)"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
# Simplified query without customer_orders dependency
|
||||
query = """
|
||||
SELECT
|
||||
pb.id, pb.tenant_id, pb.product_name, pb.batch_number,
|
||||
pb.planned_end_time as planned_completion_time, pb.actual_start_time,
|
||||
pb.actual_end_time as estimated_completion_time, pb.status,
|
||||
EXTRACT(minutes FROM (NOW() - pb.planned_end_time)) as delay_minutes,
|
||||
COALESCE(pb.priority::text, 'medium') as priority_level,
|
||||
1 as affected_orders -- Default to 1 since we can't count orders
|
||||
FROM production_batches pb
|
||||
WHERE pb.status IN ('in_progress', 'delayed')
|
||||
AND (
|
||||
(pb.planned_end_time < NOW() AND pb.status = 'in_progress')
|
||||
OR pb.status = 'delayed'
|
||||
)
|
||||
AND pb.planned_end_time > NOW() - INTERVAL '24 hours'
|
||||
ORDER BY
|
||||
CASE COALESCE(pb.priority::text, 'medium')
|
||||
WHEN 'urgent' THEN 1 WHEN 'high' THEN 2 ELSE 3
|
||||
END,
|
||||
delay_minutes DESC
|
||||
"""
|
||||
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query))
|
||||
delays = result.fetchall()
|
||||
|
||||
for delay in delays:
|
||||
await self._process_production_delay(delay)
|
||||
|
||||
except Exception as e:
|
||||
# Skip delay checks if tables don't exist (graceful degradation)
|
||||
if "does not exist" in str(e):
|
||||
logger.debug("Production delay check skipped - missing tables", error=str(e))
|
||||
else:
|
||||
logger.error("Production delay check failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _process_production_delay(self, delay: Dict[str, Any]):
|
||||
"""Process production delay"""
|
||||
try:
|
||||
delay_minutes = delay['delay_minutes']
|
||||
priority = delay['priority_level']
|
||||
affected_orders = delay['affected_orders']
|
||||
|
||||
# Determine severity based on delay time and priority
|
||||
if delay_minutes > 120 or priority == 'urgent':
|
||||
severity = 'urgent'
|
||||
elif delay_minutes > 60 or priority == 'high':
|
||||
severity = 'high'
|
||||
elif delay_minutes > 30:
|
||||
severity = 'medium'
|
||||
else:
|
||||
severity = 'low'
|
||||
|
||||
template_data = self.format_spanish_message(
|
||||
'production_delay',
|
||||
batch_name=f"{delay['product_name']} #{delay['batch_number']}",
|
||||
delay_minutes=int(delay_minutes)
|
||||
)
|
||||
|
||||
await self.publish_item(delay['tenant_id'], {
|
||||
'type': 'production_delay',
|
||||
'severity': severity,
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'batch_id': str(delay['id']),
|
||||
'product_name': delay['product_name'],
|
||||
'batch_number': delay['batch_number'],
|
||||
'delay_minutes': delay_minutes,
|
||||
'priority_level': priority,
|
||||
'affected_orders': affected_orders,
|
||||
'planned_completion': delay['planned_completion_time'].isoformat()
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing production delay",
|
||||
batch_id=str(delay.get('id')),
|
||||
error=str(e))
|
||||
|
||||
async def check_quality_issues(self):
|
||||
"""Check for quality control issues (alerts)"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
# Fixed query using actual quality_checks table structure
|
||||
query = """
|
||||
SELECT
|
||||
qc.id, qc.tenant_id, qc.batch_id, qc.check_type as test_type,
|
||||
qc.quality_score as result_value,
|
||||
qc.target_weight as min_acceptable,
|
||||
(qc.target_weight * (1 + qc.tolerance_percentage/100)) as max_acceptable,
|
||||
CASE
|
||||
WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 'critical'
|
||||
WHEN qc.pass_fail = false THEN 'major'
|
||||
ELSE 'minor'
|
||||
END as qc_severity,
|
||||
qc.created_at,
|
||||
pb.product_name, pb.batch_number,
|
||||
COUNT(*) OVER (PARTITION BY qc.batch_id) as total_failures
|
||||
FROM quality_checks qc
|
||||
JOIN production_batches pb ON pb.id = qc.batch_id
|
||||
WHERE qc.pass_fail = false -- Use pass_fail instead of status
|
||||
AND qc.created_at > NOW() - INTERVAL '4 hours'
|
||||
AND qc.corrective_action_needed = true -- Use this instead of acknowledged
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 1
|
||||
WHEN qc.pass_fail = false THEN 2
|
||||
ELSE 3
|
||||
END,
|
||||
qc.created_at DESC
|
||||
"""
|
||||
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query))
|
||||
quality_issues = result.fetchall()
|
||||
|
||||
for issue in quality_issues:
|
||||
await self._process_quality_issue(issue)
|
||||
|
||||
except Exception as e:
|
||||
# Skip quality checks if tables don't exist (graceful degradation)
|
||||
if "does not exist" in str(e) or "column" in str(e).lower() and "does not exist" in str(e).lower():
|
||||
logger.debug("Quality check skipped - missing tables or columns", error=str(e))
|
||||
else:
|
||||
logger.error("Quality check failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _process_quality_issue(self, issue: Dict[str, Any]):
|
||||
"""Process quality control failure"""
|
||||
try:
|
||||
qc_severity = issue['qc_severity']
|
||||
total_failures = issue['total_failures']
|
||||
|
||||
# Map QC severity to alert severity
|
||||
if qc_severity == 'critical' or total_failures > 2:
|
||||
severity = 'urgent'
|
||||
elif qc_severity == 'major':
|
||||
severity = 'high'
|
||||
else:
|
||||
severity = 'medium'
|
||||
|
||||
await self.publish_item(issue['tenant_id'], {
|
||||
'type': 'quality_control_failure',
|
||||
'severity': severity,
|
||||
'title': f'❌ Fallo Control Calidad: {issue["product_name"]}',
|
||||
'message': f'Lote {issue["batch_number"]} falló en {issue["test_type"]}. Valor: {issue["result_value"]} (rango: {issue["min_acceptable"]}-{issue["max_acceptable"]})',
|
||||
'actions': ['Revisar lote', 'Repetir prueba', 'Ajustar proceso', 'Documentar causa'],
|
||||
'metadata': {
|
||||
'quality_check_id': str(issue['id']),
|
||||
'batch_id': str(issue['batch_id']),
|
||||
'test_type': issue['test_type'],
|
||||
'result_value': float(issue['result_value']),
|
||||
'min_acceptable': float(issue['min_acceptable']),
|
||||
'max_acceptable': float(issue['max_acceptable']),
|
||||
'qc_severity': qc_severity,
|
||||
'total_failures': total_failures
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
# Mark as acknowledged to avoid duplicates
|
||||
await self.db_manager.execute(
|
||||
"UPDATE quality_checks SET acknowledged = true WHERE id = $1",
|
||||
issue['id']
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing quality issue",
|
||||
quality_check_id=str(issue.get('id')),
|
||||
error=str(e))
|
||||
|
||||
async def check_equipment_status(self):
|
||||
"""Check equipment status and failures (alerts)"""
|
||||
# Equipment tables don't exist in production database - skip this check
|
||||
logger.debug("Equipment check skipped - equipment tables not available in production database")
|
||||
return
|
||||
|
||||
async def _process_equipment_issue(self, equipment: Dict[str, Any]):
|
||||
"""Process equipment issue"""
|
||||
try:
|
||||
status = equipment['status']
|
||||
efficiency = equipment.get('efficiency_percent', 100)
|
||||
days_to_maintenance = equipment.get('days_to_maintenance', 30)
|
||||
|
||||
if status == 'error':
|
||||
template_data = self.format_spanish_message(
|
||||
'equipment_failure',
|
||||
equipment_name=equipment['name']
|
||||
)
|
||||
|
||||
await self.publish_item(equipment['tenant_id'], {
|
||||
'type': 'equipment_failure',
|
||||
'severity': 'urgent',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'equipment_id': str(equipment['id']),
|
||||
'equipment_name': equipment['name'],
|
||||
'equipment_type': equipment['type'],
|
||||
'error_count': equipment.get('error_count', 0),
|
||||
'last_reading': equipment.get('last_reading').isoformat() if equipment.get('last_reading') else None
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
elif status == 'maintenance_required' or days_to_maintenance <= 1:
|
||||
severity = 'high' if days_to_maintenance <= 1 else 'medium'
|
||||
|
||||
await self.publish_item(equipment['tenant_id'], {
|
||||
'type': 'maintenance_required',
|
||||
'severity': severity,
|
||||
'title': f'🔧 Mantenimiento Requerido: {equipment["name"]}',
|
||||
'message': f'Equipo {equipment["name"]} requiere mantenimiento en {days_to_maintenance} días.',
|
||||
'actions': ['Programar mantenimiento', 'Revisar historial', 'Preparar repuestos', 'Planificar parada'],
|
||||
'metadata': {
|
||||
'equipment_id': str(equipment['id']),
|
||||
'days_to_maintenance': days_to_maintenance,
|
||||
'last_maintenance': equipment.get('last_maintenance').isoformat() if equipment.get('last_maintenance') else None
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
elif efficiency < 80:
|
||||
severity = 'medium' if efficiency < 70 else 'low'
|
||||
|
||||
await self.publish_item(equipment['tenant_id'], {
|
||||
'type': 'low_equipment_efficiency',
|
||||
'severity': severity,
|
||||
'title': f'📉 Baja Eficiencia: {equipment["name"]}',
|
||||
'message': f'Eficiencia del {equipment["name"]} bajó a {efficiency:.1f}%. Revisar funcionamiento.',
|
||||
'actions': ['Revisar configuración', 'Limpiar equipo', 'Calibrar sensores', 'Revisar mantenimiento'],
|
||||
'metadata': {
|
||||
'equipment_id': str(equipment['id']),
|
||||
'efficiency_percent': float(efficiency),
|
||||
'temperature': equipment.get('temperature'),
|
||||
'vibration_level': equipment.get('vibration_level')
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing equipment issue",
|
||||
equipment_id=str(equipment.get('id')),
|
||||
error=str(e))
|
||||
|
||||
async def generate_efficiency_recommendations(self):
|
||||
"""Generate production efficiency recommendations"""
|
||||
try:
|
||||
self._checks_performed += 1
|
||||
|
||||
# Analyze production patterns for efficiency opportunities
|
||||
query = """
|
||||
WITH efficiency_analysis AS (
|
||||
SELECT
|
||||
pb.tenant_id, pb.product_name,
|
||||
AVG(EXTRACT(minutes FROM (pb.actual_completion_time - pb.actual_start_time))) as avg_production_time,
|
||||
AVG(pb.planned_duration_minutes) as avg_planned_duration,
|
||||
COUNT(*) as batch_count,
|
||||
AVG(pb.yield_percentage) as avg_yield,
|
||||
EXTRACT(hour FROM pb.actual_start_time) as start_hour
|
||||
FROM production_batches pb
|
||||
WHERE pb.status = 'completed'
|
||||
AND pb.actual_completion_time > CURRENT_DATE - INTERVAL '30 days'
|
||||
AND pb.tenant_id = $1
|
||||
GROUP BY pb.tenant_id, pb.product_name, EXTRACT(hour FROM pb.actual_start_time)
|
||||
HAVING COUNT(*) >= 3
|
||||
),
|
||||
recommendations AS (
|
||||
SELECT *,
|
||||
CASE
|
||||
WHEN avg_production_time > avg_planned_duration * 1.2 THEN 'reduce_production_time'
|
||||
WHEN avg_yield < 85 THEN 'improve_yield'
|
||||
WHEN start_hour BETWEEN 14 AND 16 AND avg_production_time > avg_planned_duration * 1.1 THEN 'avoid_afternoon_production'
|
||||
ELSE null
|
||||
END as recommendation_type,
|
||||
(avg_production_time - avg_planned_duration) / avg_planned_duration * 100 as efficiency_loss_percent
|
||||
FROM efficiency_analysis
|
||||
)
|
||||
SELECT * FROM recommendations
|
||||
WHERE recommendation_type IS NOT NULL
|
||||
AND efficiency_loss_percent > 10
|
||||
ORDER BY efficiency_loss_percent DESC
|
||||
"""
|
||||
|
||||
tenants = await self.get_active_tenants()
|
||||
|
||||
for tenant_id in tenants:
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query), {"tenant_id": tenant_id})
|
||||
recommendations = result.fetchall()
|
||||
|
||||
for rec in recommendations:
|
||||
await self._generate_efficiency_recommendation(tenant_id, rec)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating efficiency recommendations",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Efficiency recommendations failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _generate_efficiency_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
|
||||
"""Generate specific efficiency recommendation"""
|
||||
try:
|
||||
if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
|
||||
return
|
||||
|
||||
rec_type = rec['recommendation_type']
|
||||
efficiency_loss = rec['efficiency_loss_percent']
|
||||
|
||||
if rec_type == 'reduce_production_time':
|
||||
template_data = self.format_spanish_message(
|
||||
'production_efficiency',
|
||||
suggested_time=f"{rec['start_hour']:02d}:00",
|
||||
savings_percent=efficiency_loss
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'production_efficiency',
|
||||
'severity': 'medium',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'product_name': rec['product_name'],
|
||||
'avg_production_time': float(rec['avg_production_time']),
|
||||
'avg_planned_duration': float(rec['avg_planned_duration']),
|
||||
'efficiency_loss_percent': float(efficiency_loss),
|
||||
'batch_count': rec['batch_count'],
|
||||
'recommendation_type': rec_type
|
||||
}
|
||||
}, item_type='recommendation')
|
||||
|
||||
elif rec_type == 'improve_yield':
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'yield_improvement',
|
||||
'severity': 'medium',
|
||||
'title': f'📈 Mejorar Rendimiento: {rec["product_name"]}',
|
||||
'message': f'Rendimiento promedio del {rec["product_name"]} es {rec["avg_yield"]:.1f}%. Oportunidad de mejora.',
|
||||
'actions': ['Revisar receta', 'Optimizar proceso', 'Entrenar personal', 'Verificar ingredientes'],
|
||||
'metadata': {
|
||||
'product_name': rec['product_name'],
|
||||
'avg_yield': float(rec['avg_yield']),
|
||||
'batch_count': rec['batch_count'],
|
||||
'recommendation_type': rec_type
|
||||
}
|
||||
}, item_type='recommendation')
|
||||
|
||||
elif rec_type == 'avoid_afternoon_production':
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'schedule_optimization',
|
||||
'severity': 'low',
|
||||
'title': f'⏰ Optimizar Horario: {rec["product_name"]}',
|
||||
'message': f'Producción de {rec["product_name"]} en horario {rec["start_hour"]}:00 muestra menor eficiencia.',
|
||||
'actions': ['Cambiar horario', 'Analizar causas', 'Revisar personal', 'Optimizar ambiente'],
|
||||
'metadata': {
|
||||
'product_name': rec['product_name'],
|
||||
'start_hour': rec['start_hour'],
|
||||
'efficiency_loss_percent': float(efficiency_loss),
|
||||
'recommendation_type': rec_type
|
||||
}
|
||||
}, item_type='recommendation')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating efficiency recommendation",
|
||||
product_name=rec.get('product_name'),
|
||||
error=str(e))
|
||||
|
||||
async def generate_energy_recommendations(self):
|
||||
"""Generate energy optimization recommendations"""
|
||||
try:
|
||||
# Analyze energy consumption patterns
|
||||
query = """
|
||||
SELECT
|
||||
e.tenant_id, e.name as equipment_name, e.type,
|
||||
AVG(ec.energy_consumption_kwh) as avg_energy,
|
||||
EXTRACT(hour FROM ec.recorded_at) as hour_of_day,
|
||||
COUNT(*) as readings_count
|
||||
FROM equipment e
|
||||
JOIN energy_consumption ec ON ec.equipment_id = e.id
|
||||
WHERE ec.recorded_at > CURRENT_DATE - INTERVAL '30 days'
|
||||
AND e.tenant_id = $1
|
||||
GROUP BY e.tenant_id, e.id, EXTRACT(hour FROM ec.recorded_at)
|
||||
HAVING COUNT(*) >= 10
|
||||
ORDER BY avg_energy DESC
|
||||
"""
|
||||
|
||||
tenants = await self.get_active_tenants()
|
||||
|
||||
for tenant_id in tenants:
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(text(query), {"tenant_id": tenant_id})
|
||||
energy_data = result.fetchall()
|
||||
|
||||
# Analyze for peak hours and optimization opportunities
|
||||
await self._analyze_energy_patterns(tenant_id, energy_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating energy recommendations",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Energy recommendations failed", error=str(e))
|
||||
self._errors_count += 1
|
||||
|
||||
async def _analyze_energy_patterns(self, tenant_id: UUID, energy_data: List[Dict[str, Any]]):
|
||||
"""Analyze energy consumption patterns for optimization"""
|
||||
try:
|
||||
if not energy_data:
|
||||
return
|
||||
|
||||
# Group by equipment and find peak hours
|
||||
equipment_data = {}
|
||||
for record in energy_data:
|
||||
equipment = record['equipment_name']
|
||||
if equipment not in equipment_data:
|
||||
equipment_data[equipment] = []
|
||||
equipment_data[equipment].append(record)
|
||||
|
||||
for equipment, records in equipment_data.items():
|
||||
# Find peak consumption hours
|
||||
peak_hour_record = max(records, key=lambda x: x['avg_energy'])
|
||||
off_peak_records = [r for r in records if r['hour_of_day'] < 7 or r['hour_of_day'] > 22]
|
||||
|
||||
if off_peak_records and peak_hour_record['avg_energy'] > 0:
|
||||
min_off_peak = min(off_peak_records, key=lambda x: x['avg_energy'])
|
||||
potential_savings = ((peak_hour_record['avg_energy'] - min_off_peak['avg_energy']) /
|
||||
peak_hour_record['avg_energy']) * 100
|
||||
|
||||
if potential_savings > 15: # More than 15% potential savings
|
||||
template_data = self.format_spanish_message(
|
||||
'energy_optimization',
|
||||
start_time=f"{min_off_peak['hour_of_day']:02d}:00",
|
||||
end_time=f"{min_off_peak['hour_of_day']+2:02d}:00",
|
||||
savings_euros=potential_savings * 0.15 # Rough estimate
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'energy_optimization',
|
||||
'severity': 'low',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'equipment_name': equipment,
|
||||
'peak_hour': peak_hour_record['hour_of_day'],
|
||||
'optimal_hour': min_off_peak['hour_of_day'],
|
||||
'potential_savings_percent': float(potential_savings),
|
||||
'peak_consumption': float(peak_hour_record['avg_energy']),
|
||||
'optimal_consumption': float(min_off_peak['avg_energy'])
|
||||
}
|
||||
}, item_type='recommendation')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error analyzing energy patterns", error=str(e))
|
||||
|
||||
async def register_db_listeners(self, conn):
|
||||
"""Register production-specific database listeners"""
|
||||
try:
|
||||
await conn.add_listener('production_alerts', self.handle_production_db_alert)
|
||||
|
||||
logger.info("Database listeners registered",
|
||||
service=self.config.SERVICE_NAME)
|
||||
except Exception as e:
|
||||
logger.error("Failed to register database listeners",
|
||||
service=self.config.SERVICE_NAME,
|
||||
error=str(e))
|
||||
|
||||
async def handle_production_db_alert(self, connection, pid, channel, payload):
|
||||
"""Handle production alert from database trigger"""
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
tenant_id = UUID(data['tenant_id'])
|
||||
|
||||
template_data = self.format_spanish_message(
|
||||
'production_delay',
|
||||
batch_name=f"{data['product_name']} #{data.get('batch_number', 'N/A')}",
|
||||
delay_minutes=data['delay_minutes']
|
||||
)
|
||||
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'production_delay',
|
||||
'severity': 'high',
|
||||
'title': template_data['title'],
|
||||
'message': template_data['message'],
|
||||
'actions': template_data['actions'],
|
||||
'metadata': {
|
||||
'batch_id': data['batch_id'],
|
||||
'delay_minutes': data['delay_minutes'],
|
||||
'trigger_source': 'database'
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error handling production DB alert", error=str(e))
|
||||
|
||||
async def start_event_listener(self):
|
||||
"""Listen for production-affecting events"""
|
||||
try:
|
||||
# Subscribe to inventory events that might affect production
|
||||
await self.rabbitmq_client.consume_events(
|
||||
"bakery_events",
|
||||
f"production.inventory.{self.config.SERVICE_NAME}",
|
||||
"inventory.critical_shortage",
|
||||
self.handle_inventory_shortage
|
||||
)
|
||||
|
||||
logger.info("Event listeners started",
|
||||
service=self.config.SERVICE_NAME)
|
||||
except Exception as e:
|
||||
logger.error("Failed to start event listeners",
|
||||
service=self.config.SERVICE_NAME,
|
||||
error=str(e))
|
||||
|
||||
async def handle_inventory_shortage(self, message):
|
||||
"""Handle critical inventory shortage affecting production"""
|
||||
try:
|
||||
shortage = json.loads(message.body)
|
||||
tenant_id = UUID(shortage['tenant_id'])
|
||||
|
||||
# Check if this ingredient affects any current production
|
||||
affected_batches = await self.get_affected_production_batches(
|
||||
shortage['ingredient_id']
|
||||
)
|
||||
|
||||
if affected_batches:
|
||||
await self.publish_item(tenant_id, {
|
||||
'type': 'production_ingredient_shortage',
|
||||
'severity': 'high',
|
||||
'title': f'🚨 Falta Ingrediente para Producción',
|
||||
'message': f'Escasez de {shortage["ingredient_name"]} afecta {len(affected_batches)} lotes en producción.',
|
||||
'actions': ['Buscar ingrediente alternativo', 'Pausar producción', 'Contactar proveedor urgente', 'Reorganizar plan'],
|
||||
'metadata': {
|
||||
'ingredient_id': shortage['ingredient_id'],
|
||||
'ingredient_name': shortage['ingredient_name'],
|
||||
'affected_batches': [str(b) for b in affected_batches],
|
||||
'shortage_amount': shortage.get('shortage_amount', 0)
|
||||
}
|
||||
}, item_type='alert')
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error handling inventory shortage event", error=str(e))
|
||||
|
||||
async def get_affected_production_batches(self, ingredient_id: str) -> List[str]:
|
||||
"""Get production batches affected by ingredient shortage"""
|
||||
try:
|
||||
query = """
|
||||
SELECT DISTINCT pb.id
|
||||
FROM production_batches pb
|
||||
JOIN recipe_ingredients ri ON ri.recipe_id = pb.recipe_id
|
||||
WHERE ri.ingredient_id = $1
|
||||
AND pb.status IN ('planned', 'in_progress')
|
||||
AND pb.planned_completion_time > NOW()
|
||||
"""
|
||||
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
result_rows = await session.execute(text(query), {"ingredient_id": ingredient_id})
|
||||
result = result_rows.fetchall()
|
||||
return [str(row['id']) for row in result]
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting affected production batches",
|
||||
ingredient_id=ingredient_id,
|
||||
error=str(e))
|
||||
return []
|
||||
@@ -15,6 +15,14 @@ httpx==0.25.2
|
||||
|
||||
# Logging and monitoring
|
||||
structlog==23.2.0
|
||||
prometheus-client==0.19.0
|
||||
|
||||
# Message queues and Redis
|
||||
aio-pika==9.3.1
|
||||
redis>=4.0.0
|
||||
|
||||
# Scheduling
|
||||
APScheduler==3.10.4
|
||||
|
||||
# Date and time utilities
|
||||
python-dateutil==2.8.2
|
||||
|
||||
@@ -1,220 +0,0 @@
|
||||
## 🎯 **Migration Summary: Prophet Models to Training Service** (Continued)
|
||||
|
||||
### **✅ What Was Migrated**
|
||||
|
||||
Perfect! I've created a complete migration of the Prophet ML training functionality from the monolithic backend to a dedicated training microservice. Here's a comprehensive summary of what has been implemented:
|
||||
|
||||
## 🎯 **Migration Summary: Prophet Models to Training Service**
|
||||
|
||||
### **✅ What Was Migrated**
|
||||
|
||||
1. **Prophet Manager** (`prophet_manager.py`):
|
||||
- Enhanced model training with bakery-specific configurations
|
||||
- Spanish holidays integration
|
||||
- Advanced model persistence and metadata storage
|
||||
- Training metrics calculation
|
||||
|
||||
2. **ML Trainer** (`trainer.py`):
|
||||
- Complete training orchestration for multiple products
|
||||
- Single product training capability
|
||||
- Model performance evaluation
|
||||
- Async-first design replacing Celery complexity
|
||||
|
||||
3. **Data Processor** (`data_processor.py`):
|
||||
- Advanced feature engineering for bakery forecasting
|
||||
- Weather and traffic data integration
|
||||
- Spanish holiday and school calendar detection
|
||||
- Temporal feature extraction
|
||||
|
||||
4. **API Layer** (`training.py`):
|
||||
- RESTful endpoints for training job management
|
||||
- Real-time progress tracking
|
||||
- Job cancellation and status monitoring
|
||||
- Data validation before training
|
||||
|
||||
5. **Database Models** (`training.py`):
|
||||
- `ModelTrainingLog`: Job execution tracking
|
||||
- `TrainedModel`: Model registry and versioning
|
||||
- `ModelPerformanceMetric`: Performance monitoring
|
||||
- `TrainingJobQueue`: Job scheduling system
|
||||
|
||||
6. **Service Layer** (`training_service.py`):
|
||||
- Business logic orchestration
|
||||
- External service integration (data service)
|
||||
- Job lifecycle management
|
||||
- Error handling and recovery
|
||||
|
||||
7. **Messaging Integration** (`messaging.py`):
|
||||
- Event-driven architecture with RabbitMQ
|
||||
- Inter-service communication
|
||||
- Real-time notifications
|
||||
- Event publishing for other services
|
||||
|
||||
### **🔧 Key Improvements Over Old System**
|
||||
|
||||
#### **1. Eliminated Celery Complexity**
|
||||
- **Before**: Complex Celery worker setup with sync/async mixing
|
||||
- **After**: Pure async implementation with FastAPI background tasks
|
||||
|
||||
#### **2. Better Error Handling**
|
||||
- **Before**: Celery task failures were hard to debug
|
||||
- **After**: Detailed error tracking and recovery mechanisms
|
||||
|
||||
#### **3. Real-Time Progress Tracking**
|
||||
- **Before**: Limited visibility into training progress
|
||||
- **After**: Real-time updates with detailed step-by-step progress
|
||||
|
||||
#### **4. Service Isolation**
|
||||
- **Before**: Training tightly coupled with main application
|
||||
- **After**: Independent service that can scale separately
|
||||
|
||||
#### **5. Enhanced Model Management**
|
||||
- **Before**: Basic model storage in filesystem
|
||||
- **After**: Complete model lifecycle with versioning and metadata
|
||||
|
||||
### **🚀 New Capabilities**
|
||||
|
||||
#### **1. Advanced Training Features**
|
||||
```python
|
||||
# Support for different training modes
|
||||
await trainer.train_tenant_models(...) # All products
|
||||
await trainer.train_single_product(...) # Single product
|
||||
await trainer.evaluate_model_performance(...) # Performance evaluation
|
||||
```
|
||||
|
||||
#### **2. Real-Time Job Management**
|
||||
```python
|
||||
# Job lifecycle management
|
||||
POST /training/jobs # Start training
|
||||
GET /training/jobs/{id}/status # Get progress
|
||||
POST /training/jobs/{id}/cancel # Cancel job
|
||||
GET /training/jobs/{id}/logs # View detailed logs
|
||||
```
|
||||
|
||||
#### **3. Data Validation**
|
||||
```python
|
||||
# Pre-training validation
|
||||
POST /training/validate # Check data quality before training
|
||||
```
|
||||
|
||||
#### **4. Event-Driven Architecture**
|
||||
```python
|
||||
# Automatic event publishing
|
||||
await publish_job_started(job_id, tenant_id, config)
|
||||
await publish_job_completed(job_id, tenant_id, results)
|
||||
await publish_model_trained(model_id, tenant_id, product_name, metrics)
|
||||
```
|
||||
|
||||
### **📊 Performance Improvements**
|
||||
|
||||
#### **1. Faster Training Startup**
|
||||
- **Before**: 30-60 seconds Celery worker initialization
|
||||
- **After**: <5 seconds direct async execution
|
||||
|
||||
#### **2. Better Resource Utilization**
|
||||
- **Before**: Fixed Celery worker pools
|
||||
- **After**: Dynamic scaling based on demand
|
||||
|
||||
#### **3. Improved Memory Management**
|
||||
- **Before**: Memory leaks in long-running Celery workers
|
||||
- **After**: Clean memory usage with proper cleanup
|
||||
|
||||
### **🔒 Enhanced Security & Monitoring**
|
||||
|
||||
#### **1. Authentication Integration**
|
||||
```python
|
||||
# Secure endpoints with tenant isolation
|
||||
@router.post("/jobs")
|
||||
async def start_training_job(
|
||||
request: TrainingJobRequest,
|
||||
tenant_id: str = Depends(get_current_tenant_id) # Automatic tenant isolation
|
||||
):
|
||||
```
|
||||
|
||||
#### **2. Comprehensive Monitoring**
|
||||
```python
|
||||
# Built-in metrics collection
|
||||
metrics.increment_counter("training_jobs_started")
|
||||
metrics.increment_counter("training_jobs_completed")
|
||||
metrics.increment_counter("training_jobs_failed")
|
||||
```
|
||||
|
||||
#### **3. Detailed Logging**
|
||||
```python
|
||||
# Structured logging with context
|
||||
logger.info(f"Training job {job_id} completed successfully",
|
||||
extra={"tenant_id": tenant_id, "models_trained": count})
|
||||
```
|
||||
|
||||
### **🔄 Integration with Existing Architecture**
|
||||
|
||||
#### **1. Seamless API Integration**
|
||||
The new training service integrates perfectly with the existing gateway:
|
||||
|
||||
```yaml
|
||||
# API Gateway routes to training service
|
||||
/api/training/* → http://training-service:8000/
|
||||
```
|
||||
|
||||
#### **2. Event-Driven Communication**
|
||||
```python
|
||||
# Other services can listen to training events
|
||||
"training.job.completed" → forecasting-service (update models)
|
||||
"training.job.completed" → notification-service (send alerts)
|
||||
"training.model.updated" → tenant-service (update quotas)
|
||||
```
|
||||
|
||||
#### **3. Database Independence**
|
||||
- Training service has its own PostgreSQL database
|
||||
- Clean separation from other service data
|
||||
- Easy to scale and backup independently
|
||||
|
||||
### **📦 Deployment Ready**
|
||||
|
||||
#### **1. Docker Configuration**
|
||||
- Optimized Dockerfile with proper security
|
||||
- Non-root user execution
|
||||
- Health checks included
|
||||
|
||||
#### **2. Requirements Management**
|
||||
- Pinned dependency versions
|
||||
- Separated development/production requirements
|
||||
- Prophet and ML libraries properly configured
|
||||
|
||||
#### **3. Environment Configuration**
|
||||
```python
|
||||
# Flexible configuration management
|
||||
MODEL_STORAGE_PATH=/app/models
|
||||
MAX_TRAINING_TIME_MINUTES=30
|
||||
MIN_TRAINING_DATA_DAYS=30
|
||||
PROPHET_SEASONALITY_MODE=additive
|
||||
```
|
||||
|
||||
### **🎯 Migration Benefits Summary**
|
||||
|
||||
| Aspect | Before (Celery) | After (Microservice) |
|
||||
|--------|----------------|----------------------|
|
||||
| **Startup Time** | 30-60 seconds | <5 seconds |
|
||||
| **Error Handling** | Basic | Comprehensive |
|
||||
| **Progress Tracking** | Limited | Real-time |
|
||||
| **Scalability** | Fixed workers | Dynamic scaling |
|
||||
| **Debugging** | Difficult | Easy with logs |
|
||||
| **Testing** | Complex | Simple unit tests |
|
||||
| **Deployment** | Monolithic | Independent |
|
||||
| **Monitoring** | Basic | Full observability |
|
||||
|
||||
### **🔧 Ready for Production**
|
||||
|
||||
This training service is **production-ready** and provides:
|
||||
|
||||
1. **Robust Error Handling**: Graceful failure recovery
|
||||
2. **Horizontal Scaling**: Can run multiple instances
|
||||
3. **Performance Monitoring**: Built-in metrics and health checks
|
||||
4. **Security**: Proper authentication and tenant isolation
|
||||
5. **Maintainability**: Clean code structure and comprehensive tests
|
||||
|
||||
### **🚀 Next Steps**
|
||||
|
||||
The training service is now ready to be integrated into your microservices architecture. It completely replaces the old Celery-based training system while providing significant improvements in reliability, performance, and maintainability.
|
||||
|
||||
The implementation follows all the microservices best practices and integrates seamlessly with the broader platform architecture you're building for the Madrid bakery forecasting system.
|
||||
1
shared/alerts/__init__.py
Normal file
1
shared/alerts/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# shared/alerts/__init__.py
|
||||
353
shared/alerts/base_service.py
Normal file
353
shared/alerts/base_service.py
Normal file
@@ -0,0 +1,353 @@
|
||||
# shared/alerts/base_service.py
|
||||
"""
|
||||
Base alert service pattern for all microservices
|
||||
Supports both alerts and recommendations through unified detection patterns
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from typing import List, Dict, Any, Optional
|
||||
from uuid import UUID
|
||||
from datetime import datetime, timedelta
|
||||
import structlog
|
||||
from redis.asyncio import Redis
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
from shared.messaging.rabbitmq import RabbitMQClient
|
||||
from shared.database.base import DatabaseManager
|
||||
from shared.config.rabbitmq_config import get_routing_key
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
class BaseAlertService:
|
||||
"""
|
||||
Base class for service-specific alert and recommendation detection
|
||||
Implements hybrid detection patterns: scheduled jobs, event-driven, and database triggers
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.db_manager = DatabaseManager(config.DATABASE_URL)
|
||||
self.rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, config.SERVICE_NAME)
|
||||
self.redis = None
|
||||
self.scheduler = AsyncIOScheduler()
|
||||
self.is_leader = False
|
||||
self.exchange = "alerts.exchange"
|
||||
|
||||
# Metrics
|
||||
self._items_published = 0
|
||||
self._checks_performed = 0
|
||||
self._errors_count = 0
|
||||
|
||||
async def start(self):
|
||||
"""Initialize all detection mechanisms"""
|
||||
try:
|
||||
# Connect to Redis for leader election and deduplication
|
||||
self.redis = await Redis.from_url(self.config.REDIS_URL)
|
||||
logger.info("Connected to Redis", service=self.config.SERVICE_NAME)
|
||||
|
||||
# Connect to RabbitMQ
|
||||
await self.rabbitmq_client.connect()
|
||||
logger.info("Connected to RabbitMQ", service=self.config.SERVICE_NAME)
|
||||
|
||||
# Start leader election for scheduled jobs
|
||||
asyncio.create_task(self.maintain_leadership())
|
||||
|
||||
# Setup scheduled checks (runs only on leader)
|
||||
self.setup_scheduled_checks()
|
||||
|
||||
# Start database listener (runs on all instances)
|
||||
await self.start_database_listener()
|
||||
|
||||
# Start event listener (runs on all instances)
|
||||
await self.start_event_listener()
|
||||
|
||||
logger.info("Alert service started", service=self.config.SERVICE_NAME)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to start alert service", service=self.config.SERVICE_NAME, error=str(e))
|
||||
raise
|
||||
|
||||
async def stop(self):
|
||||
"""Clean shutdown"""
|
||||
try:
|
||||
# Stop scheduler
|
||||
if self.scheduler.running:
|
||||
self.scheduler.shutdown()
|
||||
|
||||
# Close connections
|
||||
if self.redis:
|
||||
await self.redis.aclose() # Use aclose() for modern Redis client
|
||||
|
||||
await self.rabbitmq_client.disconnect()
|
||||
|
||||
logger.info("Alert service stopped", service=self.config.SERVICE_NAME)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error stopping alert service", service=self.config.SERVICE_NAME, error=str(e))
|
||||
|
||||
# PATTERN 1: Scheduled Background Jobs
|
||||
def setup_scheduled_checks(self):
|
||||
"""Configure scheduled alert checks - Override in service"""
|
||||
raise NotImplementedError("Subclasses must implement setup_scheduled_checks")
|
||||
|
||||
async def maintain_leadership(self):
|
||||
"""Leader election for scheduled jobs"""
|
||||
lock_key = f"scheduler_lock:{self.config.SERVICE_NAME}"
|
||||
lock_ttl = 60
|
||||
|
||||
while True:
|
||||
try:
|
||||
instance_id = getattr(self.config, 'INSTANCE_ID', 'default')
|
||||
was_leader = self.is_leader
|
||||
|
||||
# Try to acquire new leadership if not currently leader
|
||||
if not self.is_leader:
|
||||
result = await self.redis.set(
|
||||
lock_key,
|
||||
instance_id,
|
||||
ex=lock_ttl,
|
||||
nx=True
|
||||
)
|
||||
self.is_leader = result is not None
|
||||
else:
|
||||
# Already leader - try to extend the lock
|
||||
current_value = await self.redis.get(lock_key)
|
||||
if current_value and current_value.decode() == instance_id:
|
||||
# Still our lock, extend it
|
||||
await self.redis.expire(lock_key, lock_ttl)
|
||||
self.is_leader = True
|
||||
else:
|
||||
# Lock expired or taken by someone else
|
||||
self.is_leader = False
|
||||
|
||||
# Handle leadership changes
|
||||
if self.is_leader and not was_leader:
|
||||
self.scheduler.start()
|
||||
logger.info("Acquired scheduler leadership", service=self.config.SERVICE_NAME)
|
||||
elif not self.is_leader and was_leader:
|
||||
self.scheduler.shutdown()
|
||||
logger.info("Lost scheduler leadership", service=self.config.SERVICE_NAME)
|
||||
|
||||
await asyncio.sleep(lock_ttl // 2)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Leadership error", service=self.config.SERVICE_NAME, error=str(e))
|
||||
self.is_leader = False
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# PATTERN 2: Event-Driven Detection
|
||||
async def start_event_listener(self):
|
||||
"""Listen for business events - Override in service"""
|
||||
pass
|
||||
|
||||
# PATTERN 3: Database Triggers
|
||||
async def start_database_listener(self):
|
||||
"""Listen for database notifications"""
|
||||
try:
|
||||
import asyncpg
|
||||
# Convert SQLAlchemy URL format to plain PostgreSQL for asyncpg
|
||||
database_url = self.config.DATABASE_URL
|
||||
if database_url.startswith('postgresql+asyncpg://'):
|
||||
database_url = database_url.replace('postgresql+asyncpg://', 'postgresql://')
|
||||
|
||||
conn = await asyncpg.connect(database_url)
|
||||
|
||||
# Register listeners based on service
|
||||
await self.register_db_listeners(conn)
|
||||
|
||||
logger.info("Database listeners registered", service=self.config.SERVICE_NAME)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to setup database listeners", service=self.config.SERVICE_NAME, error=str(e))
|
||||
|
||||
async def register_db_listeners(self, conn):
|
||||
"""Register database listeners - Override in service"""
|
||||
pass
|
||||
|
||||
# Publishing (Updated for type)
|
||||
async def publish_item(self, tenant_id: UUID, item: Dict[str, Any], item_type: str = 'alert'):
|
||||
"""Publish alert or recommendation to RabbitMQ with deduplication"""
|
||||
|
||||
try:
|
||||
# Check for duplicate
|
||||
item_key = f"{tenant_id}:{item_type}:{item['type']}:{item.get('metadata', {}).get('id', '')}"
|
||||
if await self.is_duplicate_item(item_key):
|
||||
logger.debug("Duplicate item skipped",
|
||||
service=self.config.SERVICE_NAME,
|
||||
item_type=item_type,
|
||||
alert_type=item['type'])
|
||||
return False
|
||||
|
||||
# Add metadata
|
||||
item['id'] = str(uuid.uuid4())
|
||||
item['tenant_id'] = str(tenant_id)
|
||||
item['service'] = self.config.SERVICE_NAME
|
||||
item['timestamp'] = datetime.utcnow().isoformat()
|
||||
item['item_type'] = item_type # 'alert' or 'recommendation'
|
||||
|
||||
# Determine routing key based on severity and type
|
||||
routing_key = get_routing_key(item_type, item['severity'], self.config.SERVICE_NAME)
|
||||
|
||||
# Publish to RabbitMQ
|
||||
success = await self.rabbitmq_client.publish_event(
|
||||
exchange_name=self.exchange,
|
||||
routing_key=routing_key,
|
||||
event_data=item
|
||||
)
|
||||
|
||||
if success:
|
||||
self._items_published += 1
|
||||
logger.info("Item published successfully",
|
||||
service=self.config.SERVICE_NAME,
|
||||
item_type=item_type,
|
||||
alert_type=item['type'],
|
||||
severity=item['severity'],
|
||||
routing_key=routing_key)
|
||||
else:
|
||||
self._errors_count += 1
|
||||
logger.error("Failed to publish item",
|
||||
service=self.config.SERVICE_NAME,
|
||||
item_type=item_type,
|
||||
alert_type=item['type'])
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
self._errors_count += 1
|
||||
logger.error("Error publishing item",
|
||||
service=self.config.SERVICE_NAME,
|
||||
error=str(e),
|
||||
item_type=item_type)
|
||||
return False
|
||||
|
||||
async def is_duplicate_item(self, item_key: str, window_minutes: int = 15) -> bool:
|
||||
"""Prevent duplicate items within time window"""
|
||||
key = f"item_sent:{item_key}"
|
||||
try:
|
||||
result = await self.redis.set(
|
||||
key, "1",
|
||||
ex=window_minutes * 60,
|
||||
nx=True
|
||||
)
|
||||
return result is None # None means duplicate
|
||||
except Exception as e:
|
||||
logger.error("Error checking duplicate", error=str(e))
|
||||
return False # Allow publishing if check fails
|
||||
|
||||
# Helper methods
|
||||
async def get_active_tenants(self) -> List[UUID]:
|
||||
"""Get list of active tenant IDs"""
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
query = text("SELECT DISTINCT tenant_id FROM tenants WHERE status = 'active'")
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(query)
|
||||
return [row.tenant_id for row in result.fetchall()]
|
||||
except Exception as e:
|
||||
# If tenants table doesn't exist, skip tenant-based processing
|
||||
if "does not exist" in str(e):
|
||||
logger.debug("Tenants table not found, skipping tenant-based alert processing")
|
||||
return []
|
||||
else:
|
||||
logger.error("Error fetching active tenants", error=str(e))
|
||||
return []
|
||||
|
||||
async def get_tenant_config(self, tenant_id: UUID) -> Dict[str, Any]:
|
||||
"""Get tenant-specific configuration"""
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
query = text("SELECT config FROM tenants WHERE tenant_id = :tenant_id")
|
||||
async with self.db_manager.get_session() as session:
|
||||
result = await session.execute(query, {"tenant_id": tenant_id})
|
||||
row = result.fetchone()
|
||||
return json.loads(row.config) if row and row.config else {}
|
||||
except Exception as e:
|
||||
logger.error("Error fetching tenant config", tenant_id=str(tenant_id), error=str(e))
|
||||
return {}
|
||||
|
||||
# Health and metrics
|
||||
def get_metrics(self) -> Dict[str, Any]:
|
||||
"""Get service metrics"""
|
||||
return {
|
||||
"items_published": self._items_published,
|
||||
"checks_performed": self._checks_performed,
|
||||
"errors_count": self._errors_count,
|
||||
"is_leader": self.is_leader,
|
||||
"scheduler_running": self.scheduler.running,
|
||||
"redis_connected": self.redis and not self.redis.closed,
|
||||
"rabbitmq_connected": self.rabbitmq_client.connected if self.rabbitmq_client else False
|
||||
}
|
||||
|
||||
async def health_check(self) -> Dict[str, Any]:
|
||||
"""Comprehensive health check"""
|
||||
try:
|
||||
# Check Redis
|
||||
redis_healthy = False
|
||||
if self.redis and not self.redis.closed:
|
||||
await self.redis.ping()
|
||||
redis_healthy = True
|
||||
|
||||
# Check RabbitMQ
|
||||
rabbitmq_healthy = self.rabbitmq_client.connected if self.rabbitmq_client else False
|
||||
|
||||
# Check database
|
||||
db_healthy = False
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
async with self.db_manager.get_session() as session:
|
||||
await session.execute(text("SELECT 1"))
|
||||
db_healthy = True
|
||||
except:
|
||||
pass
|
||||
|
||||
status = "healthy" if all([redis_healthy, rabbitmq_healthy, db_healthy]) else "unhealthy"
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"service": self.config.SERVICE_NAME,
|
||||
"components": {
|
||||
"redis": "healthy" if redis_healthy else "unhealthy",
|
||||
"rabbitmq": "healthy" if rabbitmq_healthy else "unhealthy",
|
||||
"database": "healthy" if db_healthy else "unhealthy",
|
||||
"scheduler": "running" if self.scheduler.running else "stopped"
|
||||
},
|
||||
"metrics": self.get_metrics()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"status": "error",
|
||||
"service": self.config.SERVICE_NAME,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
class AlertServiceMixin:
|
||||
"""Mixin providing common alert helper methods"""
|
||||
|
||||
def format_spanish_message(self, template_key: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Format Spanish alert message"""
|
||||
from shared.alerts.templates import format_item_message
|
||||
return format_item_message(template_key, 'es', **kwargs)
|
||||
|
||||
def get_business_hours_severity(self, base_severity: str) -> str:
|
||||
"""Adjust severity based on business hours"""
|
||||
current_hour = datetime.now().hour
|
||||
|
||||
# Reduce non-critical severity outside business hours (7-20)
|
||||
if not (7 <= current_hour <= 20):
|
||||
if base_severity == 'medium':
|
||||
return 'low'
|
||||
elif base_severity == 'high' and current_hour < 6 or current_hour > 22:
|
||||
return 'medium'
|
||||
|
||||
return base_severity
|
||||
|
||||
def should_send_recommendation(self, tenant_id: UUID, rec_type: str) -> bool:
|
||||
"""Check if recommendation should be sent based on tenant preferences"""
|
||||
# Implement tenant-specific recommendation frequency limits
|
||||
# This is a simplified version
|
||||
return True
|
||||
218
shared/alerts/templates.py
Normal file
218
shared/alerts/templates.py
Normal file
@@ -0,0 +1,218 @@
|
||||
# shared/alerts/templates.py
|
||||
"""
|
||||
Alert and recommendation templates in Spanish for the bakery platform
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
ITEM_TEMPLATES = {
|
||||
# ALERTS - Critical Issues Requiring Immediate Action
|
||||
'critical_stock_shortage': {
|
||||
'es': {
|
||||
'title': '🚨 Stock Crítico: {ingredient_name}',
|
||||
'message': 'Solo {current_stock}kg disponibles, necesarios {required_stock}kg para producción de mañana. Acción inmediata requerida.',
|
||||
'actions': ['Realizar pedido de emergencia', 'Contactar proveedor', 'Ajustar plan de producción']
|
||||
},
|
||||
'en': {
|
||||
'title': '🚨 Critical Stock: {ingredient_name}',
|
||||
'message': 'Only {current_stock}kg available, {required_stock}kg needed for tomorrow\'s production. Immediate action required.',
|
||||
'actions': ['Place emergency order', 'Contact supplier', 'Adjust production plan']
|
||||
}
|
||||
},
|
||||
'temperature_breach': {
|
||||
'es': {
|
||||
'title': '🌡️ ALERTA TEMPERATURA',
|
||||
'message': '{location}: {temperature}°C durante {duration} minutos. Revisar productos inmediatamente para evitar deterioro.',
|
||||
'actions': ['Verificar productos', 'Llamar técnico refrigeración', 'Documentar incidencia', 'Mover productos']
|
||||
},
|
||||
'en': {
|
||||
'title': '🌡️ TEMPERATURE ALERT',
|
||||
'message': '{location}: {temperature}°C for {duration} minutes. Check products immediately to prevent spoilage.',
|
||||
'actions': ['Check products', 'Call refrigeration technician', 'Document incident', 'Move products']
|
||||
}
|
||||
},
|
||||
'production_delay': {
|
||||
'es': {
|
||||
'title': '⏰ Retraso en Producción',
|
||||
'message': 'Lote {batch_name} con {delay_minutes} minutos de retraso. Impacto en entregas del día.',
|
||||
'actions': ['Acelerar producción', 'Notificar clientes', 'Reorganizar horarios', 'Buscar capacidad adicional']
|
||||
}
|
||||
},
|
||||
'expired_products': {
|
||||
'es': {
|
||||
'title': '📅 Productos Caducados',
|
||||
'message': '{product_count} productos han caducado hoy. Retirar inmediatamente por seguridad alimentaria.',
|
||||
'actions': ['Retirar productos', 'Revisar inventario', 'Ajustar pedidos', 'Documentar pérdidas']
|
||||
}
|
||||
},
|
||||
'equipment_failure': {
|
||||
'es': {
|
||||
'title': '⚙️ Fallo de Equipo',
|
||||
'message': '{equipment_name} no está funcionando correctamente. Producción afectada.',
|
||||
'actions': ['Parar producción', 'Llamar mantenimiento', 'Usar equipo alternativo', 'Documentar fallo']
|
||||
}
|
||||
},
|
||||
'order_overload': {
|
||||
'es': {
|
||||
'title': '📋 Sobrecarga de Pedidos',
|
||||
'message': 'Capacidad excedida en {percentage}%. Riesgo de no cumplir entregas.',
|
||||
'actions': ['Priorizar pedidos', 'Aumentar turnos', 'Rechazar nuevos pedidos', 'Buscar ayuda externa']
|
||||
}
|
||||
},
|
||||
'supplier_delay': {
|
||||
'es': {
|
||||
'title': '🚚 Retraso de Proveedor',
|
||||
'message': 'Entrega de {supplier_name} retrasada {hours} horas. Impacto en producción de {products}.',
|
||||
'actions': ['Contactar proveedor', 'Buscar alternativas', 'Ajustar producción', 'Usar stock reserva']
|
||||
}
|
||||
},
|
||||
|
||||
# RECOMMENDATIONS - Proactive Suggestions for Optimization
|
||||
'inventory_optimization': {
|
||||
'es': {
|
||||
'title': '📈 Optimización de Stock: {ingredient_name}',
|
||||
'message': 'Basado en tendencias de {period} días, sugerimos aumentar stock mínimo en {suggested_increase}kg para reducir costos.',
|
||||
'actions': ['Revisar niveles mínimos', 'Analizar proveedores', 'Actualizar configuración', 'Programar pedido mayor']
|
||||
},
|
||||
'en': {
|
||||
'title': '📈 Stock Optimization: {ingredient_name}',
|
||||
'message': 'Based on {period} day trends, suggest increasing minimum stock by {suggested_increase}kg to reduce costs.',
|
||||
'actions': ['Review minimum levels', 'Analyze suppliers', 'Update configuration', 'Schedule larger order']
|
||||
}
|
||||
},
|
||||
'production_efficiency': {
|
||||
'es': {
|
||||
'title': '⚙️ Mejora de Eficiencia',
|
||||
'message': 'Cambiar horarios de horneado a {suggested_time} puede reducir costos energéticos en {savings_percent}%.',
|
||||
'actions': ['Revisar horarios', 'Consultar personal', 'Probar nuevo horario', 'Medir resultados']
|
||||
}
|
||||
},
|
||||
'sales_opportunity': {
|
||||
'es': {
|
||||
'title': '💰 Oportunidad de Venta',
|
||||
'message': '{product_name} tiene alta demanda los {days}. Incrementar producción puede aumentar ventas {increase_percent}%.',
|
||||
'actions': ['Aumentar producción', 'Promocionar producto', 'Revisar precios', 'Planificar ingredientes']
|
||||
}
|
||||
},
|
||||
'seasonal_adjustment': {
|
||||
'es': {
|
||||
'title': '🍂 Ajuste Estacional',
|
||||
'message': 'Época de {season}: ajustar producción de {products} según patrones históricos.',
|
||||
'actions': ['Revisar recetas estacionales', 'Ajustar inventario', 'Planificar promociones', 'Entrenar personal']
|
||||
}
|
||||
},
|
||||
'cost_reduction': {
|
||||
'es': {
|
||||
'title': '💡 Reducción de Costos',
|
||||
'message': 'Cambiar a proveedor {supplier_name} para {ingredient} puede ahorrar {savings_euros}€/mes.',
|
||||
'actions': ['Evaluar calidad', 'Negociar precios', 'Probar muestras', 'Cambiar proveedor gradualmente']
|
||||
}
|
||||
},
|
||||
'waste_reduction': {
|
||||
'es': {
|
||||
'title': '♻️ Reducción de Desperdicio',
|
||||
'message': 'Ajustar tamaños de lote de {product} puede reducir desperdicio en {waste_reduction_percent}%.',
|
||||
'actions': ['Analizar ventas', 'Ajustar recetas', 'Cambiar lotes', 'Monitorear resultados']
|
||||
}
|
||||
},
|
||||
'quality_improvement': {
|
||||
'es': {
|
||||
'title': '⭐ Mejora de Calidad',
|
||||
'message': 'Temperatura de horneado de {product} puede optimizarse para mejor textura y sabor.',
|
||||
'actions': ['Probar temperaturas', 'Documentar cambios', 'Entrenar panaderos', 'Obtener feedback']
|
||||
}
|
||||
},
|
||||
'customer_satisfaction': {
|
||||
'es': {
|
||||
'title': '😊 Satisfacción del Cliente',
|
||||
'message': 'Clientes solicitan más {product} los {days}. Considerar aumentar disponibilidad.',
|
||||
'actions': ['Revisar comentarios', 'Aumentar producción', 'Crear promociones', 'Mejorar exhibición']
|
||||
}
|
||||
},
|
||||
'energy_optimization': {
|
||||
'es': {
|
||||
'title': '⚡ Optimización Energética',
|
||||
'message': 'Consolidar horneado entre {start_time} y {end_time} puede reducir costos energéticos {savings_euros}€/día.',
|
||||
'actions': ['Revisar horarios energía', 'Reorganizar producción', 'Optimizar hornos', 'Medir consumo']
|
||||
}
|
||||
},
|
||||
'staff_optimization': {
|
||||
'es': {
|
||||
'title': '👥 Optimización de Personal',
|
||||
'message': 'Picos de trabajo los {days} a las {hours}. Considerar ajustar turnos para mejor eficiencia.',
|
||||
'actions': ['Analizar cargas trabajo', 'Reorganizar turnos', 'Entrenar polivalencia', 'Contratar temporal']
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def format_item_message(template_key: str, language: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Format item message using template with validation"""
|
||||
template = ITEM_TEMPLATES.get(template_key, {}).get(language, {})
|
||||
|
||||
if not template:
|
||||
# Fallback for missing templates
|
||||
return {
|
||||
'title': f'Notificación: {template_key}',
|
||||
'message': f'Información: {", ".join([f"{k}: {v}" for k, v in kwargs.items()])}',
|
||||
'actions': ['Revisar', 'Documentar']
|
||||
}
|
||||
|
||||
try:
|
||||
# Format with provided kwargs, handling missing values gracefully
|
||||
formatted_title = template['title'].format(**kwargs)
|
||||
formatted_message = template['message'].format(**kwargs)
|
||||
|
||||
return {
|
||||
'title': formatted_title,
|
||||
'message': formatted_message,
|
||||
'actions': template.get('actions', [])
|
||||
}
|
||||
except KeyError as e:
|
||||
# Handle missing format parameters
|
||||
return {
|
||||
'title': template.get('title', f'Notificación: {template_key}'),
|
||||
'message': f"Error en plantilla - parámetro faltante: {e}. Datos: {kwargs}",
|
||||
'actions': template.get('actions', ['Revisar configuración'])
|
||||
}
|
||||
|
||||
def get_severity_emoji(severity: str) -> str:
|
||||
"""Get emoji for severity level"""
|
||||
emoji_map = {
|
||||
'urgent': '🚨',
|
||||
'high': '⚠️',
|
||||
'medium': '💡',
|
||||
'low': 'ℹ️'
|
||||
}
|
||||
return emoji_map.get(severity, '📋')
|
||||
|
||||
def get_item_type_emoji(item_type: str) -> str:
|
||||
"""Get emoji for item type"""
|
||||
emoji_map = {
|
||||
'alert': '🚨',
|
||||
'recommendation': '💡'
|
||||
}
|
||||
return emoji_map.get(item_type, '📋')
|
||||
|
||||
def format_business_time(hour: int) -> str:
|
||||
"""Format hour in Spanish business context"""
|
||||
if hour == 0:
|
||||
return "medianoche"
|
||||
elif hour < 12:
|
||||
return f"{hour}:00 AM"
|
||||
elif hour == 12:
|
||||
return "12:00 PM (mediodía)"
|
||||
else:
|
||||
return f"{hour-12}:00 PM"
|
||||
|
||||
def get_spanish_day_name(day_number: int) -> str:
|
||||
"""Get Spanish day name (0=Monday)"""
|
||||
days = ["lunes", "martes", "miércoles", "jueves", "viernes", "sábado", "domingo"]
|
||||
return days[day_number] if 0 <= day_number <= 6 else "día desconocido"
|
||||
|
||||
def format_currency(amount: float) -> str:
|
||||
"""Format currency in Spanish Euro format"""
|
||||
return f"{amount:.2f}€"
|
||||
|
||||
def format_percentage(value: float) -> str:
|
||||
"""Format percentage in Spanish format"""
|
||||
return f"{value:.1f}%"
|
||||
82
shared/config/rabbitmq_config.py
Normal file
82
shared/config/rabbitmq_config.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# shared/config/rabbitmq_config.py
|
||||
"""
|
||||
RabbitMQ configuration for the alert and recommendation system
|
||||
Supports both alerts and recommendations through a unified topic exchange
|
||||
"""
|
||||
|
||||
RABBITMQ_CONFIG = {
|
||||
"exchanges": {
|
||||
"alerts": {
|
||||
"name": "alerts.exchange",
|
||||
"type": "topic",
|
||||
"durable": True,
|
||||
"auto_delete": False
|
||||
},
|
||||
"dead_letter": {
|
||||
"name": "dlx.exchange",
|
||||
"type": "direct",
|
||||
"durable": True,
|
||||
"auto_delete": False
|
||||
}
|
||||
},
|
||||
"queues": {
|
||||
"alert_processing": {
|
||||
"name": "alert.processing.queue",
|
||||
"durable": True,
|
||||
"arguments": {
|
||||
"x-message-ttl": 3600000, # 1 hour TTL
|
||||
"x-max-length": 10000, # Max 10k messages
|
||||
"x-overflow": "reject-publish",
|
||||
"x-dead-letter-exchange": "dlx.exchange",
|
||||
"x-dead-letter-routing-key": "failed.items"
|
||||
}
|
||||
},
|
||||
"dead_letter": {
|
||||
"name": "alert.dead_letter.queue",
|
||||
"durable": True,
|
||||
"arguments": {
|
||||
"x-message-ttl": 86400000 # 24 hours for dead letters
|
||||
}
|
||||
}
|
||||
},
|
||||
"bindings": [
|
||||
{
|
||||
"queue": "alert.processing.queue",
|
||||
"exchange": "alerts.exchange",
|
||||
"routing_key": "*.*.*" # alert/recommendation.severity.service
|
||||
},
|
||||
{
|
||||
"queue": "alert.dead_letter.queue",
|
||||
"exchange": "dlx.exchange",
|
||||
"routing_key": "failed.items"
|
||||
}
|
||||
],
|
||||
"routing_patterns": {
|
||||
# alert/recommendation.severity.service_name
|
||||
"alert": "alert.{severity}.{service}",
|
||||
"recommendation": "recommendation.{severity}.{service}",
|
||||
"all_alerts": "alert.*.*",
|
||||
"all_recommendations": "recommendation.*.*",
|
||||
"urgent_items": "*.urgent.*",
|
||||
"high_items": "*.high.*"
|
||||
}
|
||||
}
|
||||
|
||||
def get_routing_key(item_type: str, severity: str, service: str) -> str:
|
||||
"""Generate routing key for item publishing"""
|
||||
return f"{item_type}.{severity}.{service}"
|
||||
|
||||
def get_binding_patterns(item_types: list = None, severities: list = None, services: list = None) -> list:
|
||||
"""Generate binding patterns for selective consumption"""
|
||||
patterns = []
|
||||
|
||||
item_types = item_types or ["alert", "recommendation"]
|
||||
severities = severities or ["urgent", "high", "medium", "low"]
|
||||
services = services or ["*"]
|
||||
|
||||
for item_type in item_types:
|
||||
for severity in severities:
|
||||
for service in services:
|
||||
patterns.append(f"{item_type}.{severity}.{service}")
|
||||
|
||||
return patterns
|
||||
@@ -112,7 +112,7 @@ class DatabaseUtils:
|
||||
"checked_in": pool.checkedin(),
|
||||
"checked_out": pool.checkedout(),
|
||||
"overflow": pool.overflow(),
|
||||
"invalid": pool.invalid()
|
||||
"status": pool.status()
|
||||
}
|
||||
else:
|
||||
return {"status": "no_pool"}
|
||||
|
||||
420
shared/monitoring/alert_metrics.py
Normal file
420
shared/monitoring/alert_metrics.py
Normal file
@@ -0,0 +1,420 @@
|
||||
# shared/monitoring/alert_metrics.py
|
||||
"""
|
||||
Metrics and monitoring for the alert and recommendation system
|
||||
Provides comprehensive metrics for tracking system performance and effectiveness
|
||||
"""
|
||||
|
||||
from prometheus_client import Counter, Histogram, Gauge, Summary, Info
|
||||
from typing import Dict, Any
|
||||
import time
|
||||
from functools import wraps
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# =================================================================
|
||||
# DETECTION METRICS
|
||||
# =================================================================
|
||||
|
||||
# Alert and recommendation generation
|
||||
items_published = Counter(
|
||||
'alert_items_published_total',
|
||||
'Total number of alerts and recommendations published',
|
||||
['service', 'item_type', 'severity', 'type']
|
||||
)
|
||||
|
||||
item_checks_performed = Counter(
|
||||
'alert_checks_performed_total',
|
||||
'Total number of alert checks performed',
|
||||
['service', 'check_type', 'pattern']
|
||||
)
|
||||
|
||||
item_check_duration = Histogram(
|
||||
'alert_check_duration_seconds',
|
||||
'Time taken to perform alert checks',
|
||||
['service', 'check_type'],
|
||||
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60]
|
||||
)
|
||||
|
||||
alert_detection_errors = Counter(
|
||||
'alert_detection_errors_total',
|
||||
'Total number of errors during alert detection',
|
||||
['service', 'error_type', 'check_type']
|
||||
)
|
||||
|
||||
# Deduplication metrics
|
||||
duplicate_items_prevented = Counter(
|
||||
'duplicate_items_prevented_total',
|
||||
'Number of duplicate alerts/recommendations prevented',
|
||||
['service', 'item_type', 'type']
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# PROCESSING METRICS
|
||||
# =================================================================
|
||||
|
||||
# Alert processor metrics
|
||||
items_processed = Counter(
|
||||
'alert_items_processed_total',
|
||||
'Total number of items processed by alert processor',
|
||||
['item_type', 'severity', 'type', 'status']
|
||||
)
|
||||
|
||||
item_processing_duration = Histogram(
|
||||
'alert_processing_duration_seconds',
|
||||
'Time taken to process alerts/recommendations',
|
||||
['item_type', 'severity'],
|
||||
buckets=[0.01, 0.05, 0.1, 0.5, 1, 2, 5]
|
||||
)
|
||||
|
||||
database_storage_duration = Histogram(
|
||||
'alert_database_storage_duration_seconds',
|
||||
'Time taken to store items in database',
|
||||
buckets=[0.01, 0.05, 0.1, 0.5, 1]
|
||||
)
|
||||
|
||||
processing_errors = Counter(
|
||||
'alert_processing_errors_total',
|
||||
'Total number of processing errors',
|
||||
['error_type', 'item_type']
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# DELIVERY METRICS
|
||||
# =================================================================
|
||||
|
||||
# Notification delivery
|
||||
notifications_sent = Counter(
|
||||
'alert_notifications_sent_total',
|
||||
'Total notifications sent through all channels',
|
||||
['channel', 'item_type', 'severity', 'status']
|
||||
)
|
||||
|
||||
notification_delivery_duration = Histogram(
|
||||
'alert_notification_delivery_duration_seconds',
|
||||
'Time from item generation to delivery',
|
||||
['item_type', 'severity', 'channel'],
|
||||
buckets=[0.1, 0.5, 1, 5, 10, 30, 60]
|
||||
)
|
||||
|
||||
delivery_failures = Counter(
|
||||
'alert_delivery_failures_total',
|
||||
'Failed notification deliveries',
|
||||
['channel', 'item_type', 'error_type']
|
||||
)
|
||||
|
||||
# Channel-specific metrics
|
||||
email_notifications = Counter(
|
||||
'alert_email_notifications_total',
|
||||
'Email notifications sent',
|
||||
['status', 'item_type']
|
||||
)
|
||||
|
||||
whatsapp_notifications = Counter(
|
||||
'alert_whatsapp_notifications_total',
|
||||
'WhatsApp notifications sent',
|
||||
['status', 'item_type']
|
||||
)
|
||||
|
||||
sse_events_sent = Counter(
|
||||
'alert_sse_events_sent_total',
|
||||
'SSE events sent to dashboard',
|
||||
['tenant', 'event_type', 'item_type']
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# SSE METRICS
|
||||
# =================================================================
|
||||
|
||||
# SSE connection metrics
|
||||
sse_active_connections = Gauge(
|
||||
'alert_sse_active_connections',
|
||||
'Number of active SSE connections',
|
||||
['tenant_id']
|
||||
)
|
||||
|
||||
sse_connection_duration = Histogram(
|
||||
'alert_sse_connection_duration_seconds',
|
||||
'Duration of SSE connections',
|
||||
buckets=[10, 30, 60, 300, 600, 1800, 3600]
|
||||
)
|
||||
|
||||
sse_message_queue_size = Gauge(
|
||||
'alert_sse_message_queue_size',
|
||||
'Current size of SSE message queues',
|
||||
['tenant_id']
|
||||
)
|
||||
|
||||
sse_connection_errors = Counter(
|
||||
'alert_sse_connection_errors_total',
|
||||
'SSE connection errors',
|
||||
['error_type', 'tenant_id']
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# SYSTEM HEALTH METRICS
|
||||
# =================================================================
|
||||
|
||||
# Active items gauge
|
||||
active_items_gauge = Gauge(
|
||||
'alert_active_items_current',
|
||||
'Current number of active alerts and recommendations',
|
||||
['tenant_id', 'item_type', 'severity']
|
||||
)
|
||||
|
||||
# System component health
|
||||
system_component_health = Gauge(
|
||||
'alert_system_component_health',
|
||||
'Health status of alert system components (1=healthy, 0=unhealthy)',
|
||||
['component', 'service']
|
||||
)
|
||||
|
||||
# Leader election status
|
||||
scheduler_leader_status = Gauge(
|
||||
'alert_scheduler_leader_status',
|
||||
'Leader election status for schedulers (1=leader, 0=follower)',
|
||||
['service']
|
||||
)
|
||||
|
||||
# Message queue health
|
||||
rabbitmq_connection_status = Gauge(
|
||||
'alert_rabbitmq_connection_status',
|
||||
'RabbitMQ connection status (1=connected, 0=disconnected)',
|
||||
['service']
|
||||
)
|
||||
|
||||
redis_connection_status = Gauge(
|
||||
'alert_redis_connection_status',
|
||||
'Redis connection status (1=connected, 0=disconnected)',
|
||||
['service']
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# BUSINESS METRICS
|
||||
# =================================================================
|
||||
|
||||
# Alert response metrics
|
||||
items_acknowledged = Counter(
|
||||
'alert_items_acknowledged_total',
|
||||
'Number of items acknowledged by users',
|
||||
['item_type', 'severity', 'service']
|
||||
)
|
||||
|
||||
items_resolved = Counter(
|
||||
'alert_items_resolved_total',
|
||||
'Number of items resolved by users',
|
||||
['item_type', 'severity', 'service']
|
||||
)
|
||||
|
||||
item_response_time = Histogram(
|
||||
'alert_item_response_time_seconds',
|
||||
'Time from item creation to acknowledgment',
|
||||
['item_type', 'severity'],
|
||||
buckets=[60, 300, 600, 1800, 3600, 7200, 14400]
|
||||
)
|
||||
|
||||
# Recommendation adoption
|
||||
recommendations_implemented = Counter(
|
||||
'alert_recommendations_implemented_total',
|
||||
'Number of recommendations marked as implemented',
|
||||
['type', 'service']
|
||||
)
|
||||
|
||||
# Effectiveness metrics
|
||||
false_positive_rate = Gauge(
|
||||
'alert_false_positive_rate',
|
||||
'Rate of false positive alerts',
|
||||
['service', 'alert_type']
|
||||
)
|
||||
|
||||
# =================================================================
|
||||
# PERFORMANCE DECORATORS
|
||||
# =================================================================
|
||||
|
||||
def track_duration(metric: Histogram, **labels):
|
||||
"""Decorator to track function execution time"""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def async_wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
metric.labels(**labels).observe(time.time() - start_time)
|
||||
return result
|
||||
except Exception as e:
|
||||
# Track error duration too
|
||||
metric.labels(**labels).observe(time.time() - start_time)
|
||||
raise
|
||||
|
||||
@wraps(func)
|
||||
def sync_wrapper(*args, **kwargs):
|
||||
start_time = time.time()
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
metric.labels(**labels).observe(time.time() - start_time)
|
||||
return result
|
||||
except Exception as e:
|
||||
metric.labels(**labels).observe(time.time() - start_time)
|
||||
raise
|
||||
|
||||
return async_wrapper if hasattr(func, '__code__') and func.__code__.co_flags & 0x80 else sync_wrapper
|
||||
return decorator
|
||||
|
||||
def track_errors(error_counter: Counter, **labels):
|
||||
"""Decorator to track errors in functions"""
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
async def async_wrapper(*args, **kwargs):
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
error_counter.labels(error_type=type(e).__name__, **labels).inc()
|
||||
raise
|
||||
|
||||
@wraps(func)
|
||||
def sync_wrapper(*args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
error_counter.labels(error_type=type(e).__name__, **labels).inc()
|
||||
raise
|
||||
|
||||
return async_wrapper if hasattr(func, '__code__') and func.__code__.co_flags & 0x80 else sync_wrapper
|
||||
return decorator
|
||||
|
||||
# =================================================================
|
||||
# UTILITY FUNCTIONS
|
||||
# =================================================================
|
||||
|
||||
def record_item_published(service: str, item_type: str, severity: str, alert_type: str):
|
||||
"""Record that an item was published"""
|
||||
items_published.labels(
|
||||
service=service,
|
||||
item_type=item_type,
|
||||
severity=severity,
|
||||
type=alert_type
|
||||
).inc()
|
||||
|
||||
def record_item_processed(item_type: str, severity: str, alert_type: str, status: str):
|
||||
"""Record that an item was processed"""
|
||||
items_processed.labels(
|
||||
item_type=item_type,
|
||||
severity=severity,
|
||||
type=alert_type,
|
||||
status=status
|
||||
).inc()
|
||||
|
||||
def record_notification_sent(channel: str, item_type: str, severity: str, status: str):
|
||||
"""Record notification delivery"""
|
||||
notifications_sent.labels(
|
||||
channel=channel,
|
||||
item_type=item_type,
|
||||
severity=severity,
|
||||
status=status
|
||||
).inc()
|
||||
|
||||
def update_active_items(tenant_id: str, item_type: str, severity: str, count: int):
|
||||
"""Update active items gauge"""
|
||||
active_items_gauge.labels(
|
||||
tenant_id=tenant_id,
|
||||
item_type=item_type,
|
||||
severity=severity
|
||||
).set(count)
|
||||
|
||||
def update_component_health(component: str, service: str, is_healthy: bool):
|
||||
"""Update component health status"""
|
||||
system_component_health.labels(
|
||||
component=component,
|
||||
service=service
|
||||
).set(1 if is_healthy else 0)
|
||||
|
||||
def update_connection_status(connection_type: str, service: str, is_connected: bool):
|
||||
"""Update connection status"""
|
||||
if connection_type == 'rabbitmq':
|
||||
rabbitmq_connection_status.labels(service=service).set(1 if is_connected else 0)
|
||||
elif connection_type == 'redis':
|
||||
redis_connection_status.labels(service=service).set(1 if is_connected else 0)
|
||||
|
||||
# =================================================================
|
||||
# METRICS AGGREGATOR
|
||||
# =================================================================
|
||||
|
||||
class AlertMetricsCollector:
|
||||
"""Centralized metrics collector for alert system"""
|
||||
|
||||
def __init__(self, service_name: str):
|
||||
self.service_name = service_name
|
||||
|
||||
def record_check_performed(self, check_type: str, pattern: str):
|
||||
"""Record that a check was performed"""
|
||||
item_checks_performed.labels(
|
||||
service=self.service_name,
|
||||
check_type=check_type,
|
||||
pattern=pattern
|
||||
).inc()
|
||||
|
||||
def record_detection_error(self, error_type: str, check_type: str):
|
||||
"""Record detection error"""
|
||||
alert_detection_errors.labels(
|
||||
service=self.service_name,
|
||||
error_type=error_type,
|
||||
check_type=check_type
|
||||
).inc()
|
||||
|
||||
def record_duplicate_prevented(self, item_type: str, alert_type: str):
|
||||
"""Record prevented duplicate"""
|
||||
duplicate_items_prevented.labels(
|
||||
service=self.service_name,
|
||||
item_type=item_type,
|
||||
type=alert_type
|
||||
).inc()
|
||||
|
||||
def update_leader_status(self, is_leader: bool):
|
||||
"""Update leader election status"""
|
||||
scheduler_leader_status.labels(service=self.service_name).set(1 if is_leader else 0)
|
||||
|
||||
def get_service_metrics(self) -> Dict[str, Any]:
|
||||
"""Get all metrics for this service"""
|
||||
return {
|
||||
'service': self.service_name,
|
||||
'items_published': items_published._value._value,
|
||||
'checks_performed': item_checks_performed._value._value,
|
||||
'detection_errors': alert_detection_errors._value._value,
|
||||
'duplicates_prevented': duplicate_items_prevented._value._value
|
||||
}
|
||||
|
||||
# =================================================================
|
||||
# DASHBOARD METRICS
|
||||
# =================================================================
|
||||
|
||||
def get_system_overview_metrics() -> Dict[str, Any]:
|
||||
"""Get overview metrics for monitoring dashboard"""
|
||||
try:
|
||||
return {
|
||||
'total_items_published': sum(items_published._value._value.values()),
|
||||
'total_checks_performed': sum(item_checks_performed._value._value.values()),
|
||||
'total_notifications_sent': sum(notifications_sent._value._value.values()),
|
||||
'active_sse_connections': sum(sse_active_connections._value._value.values()),
|
||||
'processing_errors': sum(processing_errors._value._value.values()),
|
||||
'delivery_failures': sum(delivery_failures._value._value.values()),
|
||||
'timestamp': time.time()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Error collecting overview metrics", error=str(e))
|
||||
return {'error': str(e), 'timestamp': time.time()}
|
||||
|
||||
def get_tenant_metrics(tenant_id: str) -> Dict[str, Any]:
|
||||
"""Get metrics for a specific tenant"""
|
||||
try:
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'active_connections': sse_active_connections.labels(tenant_id=tenant_id)._value._value,
|
||||
'events_sent': sum([
|
||||
v for k, v in sse_events_sent._value._value.items()
|
||||
if k[0] == tenant_id
|
||||
]),
|
||||
'timestamp': time.time()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Error collecting tenant metrics", tenant_id=tenant_id, error=str(e))
|
||||
return {'tenant_id': tenant_id, 'error': str(e), 'timestamp': time.time()}
|
||||
@@ -1,8 +0,0 @@
|
||||
# ================================================================
|
||||
# shared/notifications/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
Shared Notifications Module - Alert integration using existing notification service
|
||||
"""
|
||||
|
||||
__all__ = []
|
||||
Reference in New Issue
Block a user