Add new alert architecture

This commit is contained in:
Urtzi Alfaro
2025-08-23 10:19:58 +02:00
parent 1a9839240e
commit 4b4268d640
45 changed files with 6518 additions and 1590 deletions

136
README.md
View File

@@ -1,136 +0,0 @@
# Bakery Forecasting Platform - Microservices
## Overview
AI-powered demand forecasting platform for bakeries in Madrid, Spain using microservices architecture.
## Architecture
- **API Gateway**: Central entry point for all client requests
- **Auth Service**: User authentication and authorization
- **Training Service**: ML model training for demand forecasting
- **Forecasting Service**: Generate predictions using trained models
- **Data Service**: External data integration (weather, traffic, events)
- **Tenant Service**: Multi-tenant management
- **Notification Service**: Email and WhatsApp notifications
## Quick Start
### Prerequisites
- Docker and Docker Compose
- Python 3.11+
- Node.js 18+
### Setup
```bash
# Run setup script (this script!)
./scripts/setup.sh
# Start services
docker-compose up -d
# Check service health
curl http://localhost:8000/health
```
### Services
- **Gateway**: http://localhost:8000
- **API Docs**: http://localhost:8000/docs
- **Grafana**: http://localhost:3002
- **Prometheus**: http://localhost:9090
- **RabbitMQ Management**: http://localhost:15672
### Development
#### Running Tests
```bash
./scripts/test.sh
```
#### Building Services
```bash
docker-compose build
```
#### Viewing Logs
```bash
# All services
docker-compose logs -f
# Specific service
docker-compose logs -f auth-service
```
#### Service URLs (Development)
- Gateway: http://localhost:8000
- Auth Service: http://localhost:8001
- Training Service: http://localhost:8002
- Forecasting Service: http://localhost:8003
- Data Service: http://localhost:8004
- Tenant Service: http://localhost:8005
- Notification Service: http://localhost:8006
## Environment Variables
Copy `.env.example` to `.env` and update the following:
```bash
# External API Keys
AEMET_API_KEY=your-aemet-api-key
MADRID_OPENDATA_API_KEY=your-madrid-opendata-key
# Email Configuration
SMTP_USER=your-email@gmail.com
SMTP_PASSWORD=your-email-password
# WhatsApp API
WHATSAPP_API_KEY=your-whatsapp-api-key
# JWT Secret (change in production!)
JWT_SECRET_KEY=your-super-secret-jwt-key-change-in-production
```
## Troubleshooting
### Services won't start
```bash
# Check if ports are available
docker-compose ps
netstat -tulpn | grep :8000
# Restart services
docker-compose down
docker-compose up -d
```
### Database connection issues
```bash
# Check database containers
docker-compose logs auth-db
docker-compose logs training-db
# Reset databases
docker-compose down -v
docker-compose up -d
```
### Service communication issues
```bash
# Check service health
curl http://localhost:8000/health
curl http://localhost:8001/health
curl http://localhost:8002/health
# Check RabbitMQ
open http://localhost:15672
# User: bakery, Password: forecast123
```
## Next Steps
1. **Configure External APIs**: Add your AEMET and Madrid Open Data API keys
2. **Test Authentication**: Register a user and test login
3. **Upload Sales Data**: Import historical sales data
4. **Train Models**: Start your first training job
5. **Generate Forecasts**: Create demand predictions
## License
MIT License

View File

@@ -730,6 +730,43 @@ services:
timeout: 10s
retries: 3
alert-processor:
build:
context: .
dockerfile: ./services/alert_processor/Dockerfile
args:
- ENVIRONMENT=${ENVIRONMENT}
- BUILD_DATE=${BUILD_DATE}
image: bakery/alert-processor:${IMAGE_TAG}
restart: unless-stopped
env_file: .env
depends_on:
redis:
condition: service_healthy
rabbitmq:
condition: service_healthy
notification-service:
condition: service_healthy
networks:
- bakery-network
volumes:
- log_storage:/app/logs
- ./services/alert_processor:/app
- ./shared:/app/shared
deploy:
replicas: 2
resources:
limits:
memory: 512M
reservations:
memory: 256M
# No health check needed - this is a background worker service
# healthcheck:
# test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
# interval: 30s
# timeout: 10s
# retries: 3
inventory-service:
build:
context: .
@@ -760,7 +797,7 @@ services:
- ./services/inventory:/app
- ./shared:/app/shared
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health/')"]
interval: 30s
timeout: 10s
retries: 3
@@ -797,7 +834,7 @@ services:
- ./services/recipes:/app
- ./shared:/app/shared
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3
@@ -835,7 +872,7 @@ services:
- ./services/suppliers:/app
- ./shared:/app/shared
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3
@@ -911,7 +948,7 @@ services:
- ./services/orders:/app
- ./shared:/app/shared
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3
@@ -950,7 +987,7 @@ services:
- ./services/production:/app
- ./shared:/app/shared
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 10s
retries: 3

View File

@@ -0,0 +1,304 @@
// frontend/src/components/alerts/AlertCard.tsx
/**
* Individual alert/recommendation card component
* Displays alert details with appropriate styling and actions
*/
import React, { useState } from 'react';
import { AlertItem, ItemSeverity, ItemType } from '../../types/alerts';
import { formatDistanceToNow } from 'date-fns';
import { es } from 'date-fns/locale';
interface AlertCardProps {
item: AlertItem;
onAcknowledge: (itemId: string) => void;
onResolve: (itemId: string) => void;
compact?: boolean;
showActions?: boolean;
}
const getSeverityConfig = (severity: ItemSeverity, itemType: ItemType) => {
if (itemType === 'recommendation') {
switch (severity) {
case 'high':
return {
color: 'bg-blue-50 border-blue-200 text-blue-900',
icon: '💡',
badge: 'bg-blue-100 text-blue-800'
};
case 'medium':
return {
color: 'bg-blue-50 border-blue-100 text-blue-800',
icon: '💡',
badge: 'bg-blue-50 text-blue-600'
};
case 'low':
return {
color: 'bg-gray-50 border-gray-200 text-gray-700',
icon: '💡',
badge: 'bg-gray-100 text-gray-600'
};
default:
return {
color: 'bg-blue-50 border-blue-200 text-blue-900',
icon: '💡',
badge: 'bg-blue-100 text-blue-800'
};
}
} else {
switch (severity) {
case 'urgent':
return {
color: 'bg-red-50 border-red-300 text-red-900',
icon: '🚨',
badge: 'bg-red-100 text-red-800',
pulse: true
};
case 'high':
return {
color: 'bg-orange-50 border-orange-200 text-orange-900',
icon: '⚠️',
badge: 'bg-orange-100 text-orange-800'
};
case 'medium':
return {
color: 'bg-yellow-50 border-yellow-200 text-yellow-900',
icon: '🔔',
badge: 'bg-yellow-100 text-yellow-800'
};
case 'low':
return {
color: 'bg-green-50 border-green-200 text-green-900',
icon: '',
badge: 'bg-green-100 text-green-800'
};
default:
return {
color: 'bg-gray-50 border-gray-200 text-gray-700',
icon: '📋',
badge: 'bg-gray-100 text-gray-600'
};
}
}
};
const getStatusConfig = (status: string) => {
switch (status) {
case 'acknowledged':
return {
color: 'bg-blue-100 text-blue-800',
label: 'Reconocido'
};
case 'resolved':
return {
color: 'bg-green-100 text-green-800',
label: 'Resuelto'
};
default:
return {
color: 'bg-gray-100 text-gray-800',
label: 'Activo'
};
}
};
export const AlertCard: React.FC<AlertCardProps> = ({
item,
onAcknowledge,
onResolve,
compact = false,
showActions = true
}) => {
const [isExpanded, setIsExpanded] = useState(false);
const [actionLoading, setActionLoading] = useState<string | null>(null);
const severityConfig = getSeverityConfig(item.severity, item.item_type);
const statusConfig = getStatusConfig(item.status);
const handleAction = async (action: () => void, actionType: string) => {
setActionLoading(actionType);
try {
await action();
} finally {
setActionLoading(null);
}
};
const timeAgo = formatDistanceToNow(new Date(item.timestamp), {
addSuffix: true,
locale: es
});
return (
<div className={`
rounded-lg border-2 transition-all duration-200 hover:shadow-md
${severityConfig.color}
${severityConfig.pulse ? 'animate-pulse' : ''}
${item.status !== 'active' ? 'opacity-75' : ''}
`}>
{/* Header */}
<div className="p-4">
<div className="flex items-start justify-between">
<div className="flex items-start space-x-3 flex-1 min-w-0">
{/* Icon and Type Badge */}
<div className="flex-shrink-0">
<span className="text-2xl">{severityConfig.icon}</span>
</div>
<div className="flex-1 min-w-0">
{/* Title and Badges */}
<div className="flex items-start justify-between mb-2">
<div className="flex-1 min-w-0">
<h3 className="text-lg font-semibold truncate">
{item.title}
</h3>
<div className="flex items-center space-x-2 mt-1">
<span className={`
inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium
${severityConfig.badge}
`}>
{item.item_type === 'alert' ? 'Alerta' : 'Recomendación'} - {item.severity}
</span>
<span className={`
inline-flex items-center px-2 py-0.5 rounded-full text-xs font-medium
${statusConfig.color}
`}>
{statusConfig.label}
</span>
<span className="text-xs text-gray-500">
{item.service}
</span>
</div>
</div>
{/* Expand Button */}
{!compact && (
<button
onClick={() => setIsExpanded(!isExpanded)}
className="ml-2 text-gray-400 hover:text-gray-600 transition-colors"
>
<svg
className={`w-5 h-5 transform transition-transform ${
isExpanded ? 'rotate-180' : ''
}`}
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
>
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
</button>
)}
</div>
{/* Message */}
<p className={`text-sm ${compact ? 'line-clamp-2' : ''}`}>
{item.message}
</p>
{/* Timestamp */}
<p className="text-xs text-gray-500 mt-2">
{timeAgo} {new Date(item.timestamp).toLocaleString('es-ES')}
</p>
</div>
</div>
</div>
{/* Quick Actions */}
{showActions && item.status === 'active' && (
<div className="flex items-center space-x-2 mt-3">
<button
onClick={() => handleAction(() => onAcknowledge(item.id), 'acknowledge')}
disabled={actionLoading === 'acknowledge'}
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-blue-700 bg-blue-100 hover:bg-blue-200 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50"
>
{actionLoading === 'acknowledge' ? (
<svg className="animate-spin -ml-1 mr-2 h-4 w-4 text-blue-700" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
) : (
<svg className="w-4 h-4 mr-1" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 13l4 4L19 7" />
</svg>
)}
Reconocer
</button>
<button
onClick={() => handleAction(() => onResolve(item.id), 'resolve')}
disabled={actionLoading === 'resolve'}
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-green-700 bg-green-100 hover:bg-green-200 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-green-500 disabled:opacity-50"
>
{actionLoading === 'resolve' ? (
<svg className="animate-spin -ml-1 mr-2 h-4 w-4 text-green-700" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
) : (
<svg className="w-4 h-4 mr-1" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
)}
Resolver
</button>
</div>
)}
</div>
{/* Expanded Details */}
{isExpanded && (
<div className="border-t border-gray-200 px-4 py-3 bg-gray-50 bg-opacity-50">
{/* Actions */}
{item.actions.length > 0 && (
<div className="mb-3">
<h4 className="text-sm font-medium text-gray-700 mb-2">Acciones sugeridas:</h4>
<ul className="list-disc list-inside space-y-1">
{item.actions.map((action, index) => (
<li key={index} className="text-sm text-gray-600">
{action}
</li>
))}
</ul>
</div>
)}
{/* Metadata */}
{Object.keys(item.metadata).length > 0 && (
<div className="mb-3">
<h4 className="text-sm font-medium text-gray-700 mb-2">Detalles técnicos:</h4>
<div className="grid grid-cols-1 md:grid-cols-2 gap-2">
{Object.entries(item.metadata).map(([key, value]) => (
<div key={key} className="text-sm">
<span className="font-medium text-gray-600">{key}:</span>{' '}
<span className="text-gray-800">
{typeof value === 'object' ? JSON.stringify(value) : String(value)}
</span>
</div>
))}
</div>
</div>
)}
{/* Acknowledgment/Resolution Info */}
{(item.acknowledged_at || item.resolved_at) && (
<div className="text-xs text-gray-500 space-y-1">
{item.acknowledged_at && (
<p>
Reconocido: {new Date(item.acknowledged_at).toLocaleString('es-ES')}
{item.acknowledged_by && ` por ${item.acknowledged_by}`}
</p>
)}
{item.resolved_at && (
<p>
Resuelto: {new Date(item.resolved_at).toLocaleString('es-ES')}
{item.resolved_by && ` por ${item.resolved_by}`}
</p>
)}
</div>
)}
</div>
)}
</div>
);
};

View File

@@ -0,0 +1,347 @@
// frontend/src/components/alerts/AlertDashboard.tsx
/**
* Main dashboard component for alerts and recommendations
* Provides filtering, bulk actions, and real-time updates
*/
import React, { useState, useEffect, useMemo } from 'react';
import { AlertItem, ItemFilters, ItemType, ItemSeverity, ItemStatus } from '../../types/alerts';
import { useAlertStream } from '../../hooks/useAlertStream';
import { AlertCard } from './AlertCard';
import { AlertFilters } from './AlertFilters';
import { AlertStats } from './AlertStats';
import { ConnectionStatus } from './ConnectionStatus';
import { useTenantId } from '../../hooks/useTenantId';
interface AlertDashboardProps {
className?: string;
maxItems?: number;
autoRequestNotifications?: boolean;
}
export const AlertDashboard: React.FC<AlertDashboardProps> = ({
className = '',
maxItems = 50,
autoRequestNotifications = true
}) => {
const tenantId = useTenantId();
const {
items,
connectionState,
urgentCount,
highCount,
recCount,
acknowledgeItem,
resolveItem,
notificationPermission,
requestNotificationPermission
} = useAlertStream({ tenantId });
const [filters, setFilters] = useState<ItemFilters>({
item_type: 'all',
severity: 'all',
status: 'all',
service: 'all',
search: ''
});
const [selectedItems, setSelectedItems] = useState<string[]>([]);
const [bulkActionsOpen, setBulkActionsOpen] = useState(false);
const [viewMode, setViewMode] = useState<'list' | 'compact'>('list');
// Request notification permission on mount if needed
useEffect(() => {
if (autoRequestNotifications && notificationPermission === 'default') {
// Delay request to avoid immediate popup
const timer = setTimeout(() => {
requestNotificationPermission();
}, 2000);
return () => clearTimeout(timer);
}
}, [autoRequestNotifications, notificationPermission, requestNotificationPermission]);
// Filter items based on current filters
const filteredItems = useMemo(() => {
let filtered = items;
// Filter by type
if (filters.item_type !== 'all') {
filtered = filtered.filter(item => item.item_type === filters.item_type);
}
// Filter by severity
if (filters.severity !== 'all') {
filtered = filtered.filter(item => item.severity === filters.severity);
}
// Filter by status
if (filters.status !== 'all') {
filtered = filtered.filter(item => item.status === filters.status);
}
// Filter by service
if (filters.service !== 'all') {
filtered = filtered.filter(item => item.service === filters.service);
}
// Filter by search text
if (filters.search.trim()) {
const searchLower = filters.search.toLowerCase();
filtered = filtered.filter(item =>
item.title.toLowerCase().includes(searchLower) ||
item.message.toLowerCase().includes(searchLower) ||
item.type.toLowerCase().includes(searchLower)
);
}
return filtered.slice(0, maxItems);
}, [items, filters, maxItems]);
// Get unique services for filter dropdown
const availableServices = useMemo(() => {
const services = [...new Set(items.map(item => item.service))].sort();
return services;
}, [items]);
// Handle bulk actions
const handleBulkAcknowledge = async () => {
await Promise.all(selectedItems.map(id => acknowledgeItem(id)));
setSelectedItems([]);
setBulkActionsOpen(false);
};
const handleBulkResolve = async () => {
await Promise.all(selectedItems.map(id => resolveItem(id)));
setSelectedItems([]);
setBulkActionsOpen(false);
};
const handleSelectAll = () => {
const selectableItems = filteredItems
.filter(item => item.status === 'active')
.map(item => item.id);
setSelectedItems(selectableItems);
};
const handleClearSelection = () => {
setSelectedItems([]);
setBulkActionsOpen(false);
};
const toggleItemSelection = (itemId: string) => {
setSelectedItems(prev =>
prev.includes(itemId)
? prev.filter(id => id !== itemId)
: [...prev, itemId]
);
};
const activeItems = filteredItems.filter(item => item.status === 'active');
const hasSelection = selectedItems.length > 0;
return (
<div className={`max-w-7xl mx-auto ${className}`}>
{/* Header */}
<div className="bg-white shadow-sm border-b border-gray-200 px-6 py-4">
<div className="flex items-center justify-between">
<div>
<h1 className="text-2xl font-bold text-gray-900">
Sistema de Alertas y Recomendaciones
</h1>
<p className="text-sm text-gray-600 mt-1">
Monitoreo en tiempo real de operaciones de panadería
</p>
</div>
{/* Connection Status */}
<ConnectionStatus connectionState={connectionState} />
</div>
</div>
{/* Stats */}
<AlertStats
urgentCount={urgentCount}
highCount={highCount}
recCount={recCount}
totalItems={items.length}
activeItems={activeItems.length}
/>
{/* Notification Permission Banner */}
{notificationPermission === 'denied' && (
<div className="bg-yellow-50 border border-yellow-200 rounded-md p-4 mx-6 mt-4">
<div className="flex">
<div className="flex-shrink-0">
<svg className="h-5 w-5 text-yellow-400" fill="currentColor" viewBox="0 0 20 20">
<path fillRule="evenodd" d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z" clipRule="evenodd" />
</svg>
</div>
<div className="ml-3">
<h3 className="text-sm font-medium text-yellow-800">
Notificaciones bloqueadas
</h3>
<p className="text-sm text-yellow-700 mt-1">
Las notificaciones del navegador están deshabilitadas. No recibirás alertas urgentes en tiempo real.
</p>
</div>
</div>
</div>
)}
{/* Filters and View Controls */}
<div className="bg-white border-b border-gray-200 px-6 py-4">
<div className="flex flex-col lg:flex-row lg:items-center lg:justify-between space-y-4 lg:space-y-0">
<AlertFilters
filters={filters}
onFiltersChange={setFilters}
availableServices={availableServices}
/>
<div className="flex items-center space-x-4">
{/* View Mode Toggle */}
<div className="flex rounded-md shadow-sm">
<button
onClick={() => setViewMode('list')}
className={`px-4 py-2 text-sm font-medium rounded-l-md border ${
viewMode === 'list'
? 'bg-blue-50 border-blue-200 text-blue-700'
: 'bg-white border-gray-300 text-gray-700 hover:bg-gray-50'
}`}
>
Lista
</button>
<button
onClick={() => setViewMode('compact')}
className={`px-4 py-2 text-sm font-medium rounded-r-md border-l-0 border ${
viewMode === 'compact'
? 'bg-blue-50 border-blue-200 text-blue-700'
: 'bg-white border-gray-300 text-gray-700 hover:bg-gray-50'
}`}
>
Compacto
</button>
</div>
{/* Bulk Actions */}
{activeItems.length > 0 && (
<div className="flex items-center space-x-2">
<button
onClick={() => setBulkActionsOpen(!bulkActionsOpen)}
className="inline-flex items-center px-4 py-2 border border-gray-300 rounded-md shadow-sm text-sm font-medium text-gray-700 bg-white hover:bg-gray-50"
>
Acciones masivas
<svg className="ml-2 h-4 w-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
</svg>
</button>
</div>
)}
</div>
</div>
{/* Bulk Actions Panel */}
{bulkActionsOpen && activeItems.length > 0 && (
<div className="mt-4 p-4 bg-gray-50 rounded-lg border border-gray-200">
<div className="flex items-center justify-between">
<div className="flex items-center space-x-4">
<span className="text-sm text-gray-600">
{selectedItems.length} elementos seleccionados
</span>
<button
onClick={handleSelectAll}
className="text-sm text-blue-600 hover:text-blue-800"
>
Seleccionar todos los activos
</button>
<button
onClick={handleClearSelection}
className="text-sm text-gray-600 hover:text-gray-800"
>
Limpiar selección
</button>
</div>
{hasSelection && (
<div className="flex items-center space-x-2">
<button
onClick={handleBulkAcknowledge}
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-blue-700 bg-blue-100 hover:bg-blue-200"
>
Reconocer seleccionados
</button>
<button
onClick={handleBulkResolve}
className="inline-flex items-center px-3 py-1 border border-transparent text-sm font-medium rounded-md text-green-700 bg-green-100 hover:bg-green-200"
>
Resolver seleccionados
</button>
</div>
)}
</div>
</div>
)}
</div>
{/* Items List */}
<div className="px-6 py-4">
{filteredItems.length === 0 ? (
<div className="text-center py-12">
{items.length === 0 ? (
<div>
<svg className="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<h3 className="mt-2 text-sm font-medium text-gray-900">
Sistema operativo
</h3>
<p className="mt-1 text-sm text-gray-500">
No hay alertas activas en este momento. Todas las operaciones funcionan correctamente.
</p>
</div>
) : (
<div>
<svg className="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
<h3 className="mt-2 text-sm font-medium text-gray-900">
No se encontraron elementos
</h3>
<p className="mt-1 text-sm text-gray-500">
Intenta ajustar los filtros para ver más elementos.
</p>
</div>
)}
</div>
) : (
<div className={`space-y-4 ${viewMode === 'compact' ? 'space-y-2' : ''}`}>
{filteredItems.map((item) => (
<div key={item.id} className="relative">
{/* Selection Checkbox */}
{bulkActionsOpen && item.status === 'active' && (
<div className="absolute left-2 top-4 z-10">
<input
type="checkbox"
checked={selectedItems.includes(item.id)}
onChange={() => toggleItemSelection(item.id)}
className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
/>
</div>
)}
<div className={bulkActionsOpen && item.status === 'active' ? 'ml-8' : ''}>
<AlertCard
item={item}
onAcknowledge={acknowledgeItem}
onResolve={resolveItem}
compact={viewMode === 'compact'}
showActions={!bulkActionsOpen}
/>
</div>
</div>
))}
</div>
)}
</div>
</div>
);
};

View File

@@ -0,0 +1,148 @@
// frontend/src/components/alerts/AlertFilters.tsx
/**
* Filter controls for the alert dashboard
*/
import React from 'react';
import { ItemFilters, ItemType, ItemSeverity, ItemStatus } from '../../types/alerts';
interface AlertFiltersProps {
filters: ItemFilters;
onFiltersChange: (filters: ItemFilters) => void;
availableServices: string[];
}
export const AlertFilters: React.FC<AlertFiltersProps> = ({
filters,
onFiltersChange,
availableServices
}) => {
const updateFilter = (key: keyof ItemFilters, value: string) => {
onFiltersChange({
...filters,
[key]: value
});
};
return (
<div className="flex flex-col sm:flex-row sm:items-center space-y-2 sm:space-y-0 sm:space-x-4">
{/* Search */}
<div className="flex-1 min-w-0">
<label htmlFor="search" className="sr-only">
Buscar
</label>
<div className="relative">
<div className="absolute inset-y-0 left-0 pl-3 flex items-center pointer-events-none">
<svg className="h-5 w-5 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
</div>
<input
id="search"
type="text"
placeholder="Buscar alertas y recomendaciones..."
value={filters.search}
onChange={(e) => updateFilter('search', e.target.value)}
className="block w-full pl-10 pr-3 py-2 border border-gray-300 rounded-md leading-5 bg-white placeholder-gray-500 focus:outline-none focus:placeholder-gray-400 focus:ring-1 focus:ring-blue-500 focus:border-blue-500 sm:text-sm"
/>
</div>
</div>
{/* Type Filter */}
<div>
<label htmlFor="type-filter" className="sr-only">
Tipo
</label>
<select
id="type-filter"
value={filters.item_type}
onChange={(e) => updateFilter('item_type', e.target.value)}
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
>
<option value="all">Todos los tipos</option>
<option value="alert">Alertas</option>
<option value="recommendation">Recomendaciones</option>
</select>
</div>
{/* Severity Filter */}
<div>
<label htmlFor="severity-filter" className="sr-only">
Severidad
</label>
<select
id="severity-filter"
value={filters.severity}
onChange={(e) => updateFilter('severity', e.target.value)}
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
>
<option value="all">Todas las severidades</option>
<option value="urgent">Urgente</option>
<option value="high">Alta</option>
<option value="medium">Media</option>
<option value="low">Baja</option>
</select>
</div>
{/* Status Filter */}
<div>
<label htmlFor="status-filter" className="sr-only">
Estado
</label>
<select
id="status-filter"
value={filters.status}
onChange={(e) => updateFilter('status', e.target.value)}
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
>
<option value="all">Todos los estados</option>
<option value="active">Activos</option>
<option value="acknowledged">Reconocidos</option>
<option value="resolved">Resueltos</option>
</select>
</div>
{/* Service Filter */}
{availableServices.length > 0 && (
<div>
<label htmlFor="service-filter" className="sr-only">
Servicio
</label>
<select
id="service-filter"
value={filters.service}
onChange={(e) => updateFilter('service', e.target.value)}
className="block w-full pl-3 pr-10 py-2 text-base border border-gray-300 focus:outline-none focus:ring-blue-500 focus:border-blue-500 sm:text-sm rounded-md"
>
<option value="all">Todos los servicios</option>
{availableServices.map((service) => (
<option key={service} value={service}>
{service}
</option>
))}
</select>
</div>
)}
{/* Clear Filters */}
{(filters.search || filters.item_type !== 'all' || filters.severity !== 'all' ||
filters.status !== 'all' || filters.service !== 'all') && (
<button
onClick={() => onFiltersChange({
item_type: 'all',
severity: 'all',
status: 'all',
service: 'all',
search: ''
})}
className="inline-flex items-center px-3 py-2 border border-gray-300 shadow-sm text-sm leading-4 font-medium rounded-md text-gray-700 bg-white hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
>
<svg className="h-4 w-4 mr-2" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
Limpiar
</button>
)}
</div>
);
};

View File

@@ -0,0 +1,102 @@
// frontend/src/components/alerts/AlertStats.tsx
/**
* Statistics display for alerts and recommendations
*/
import React from 'react';
interface AlertStatsProps {
urgentCount: number;
highCount: number;
recCount: number;
totalItems: number;
activeItems: number;
}
export const AlertStats: React.FC<AlertStatsProps> = ({
urgentCount,
highCount,
recCount,
totalItems,
activeItems
}) => {
const stats = [
{
name: 'Alertas Urgentes',
value: urgentCount,
icon: '🚨',
color: urgentCount > 0 ? 'text-red-600' : 'text-gray-600',
bgColor: urgentCount > 0 ? 'bg-red-50' : 'bg-gray-50',
borderColor: urgentCount > 0 ? 'border-red-200' : 'border-gray-200'
},
{
name: 'Alertas Altas',
value: highCount,
icon: '⚠️',
color: highCount > 0 ? 'text-orange-600' : 'text-gray-600',
bgColor: highCount > 0 ? 'bg-orange-50' : 'bg-gray-50',
borderColor: highCount > 0 ? 'border-orange-200' : 'border-gray-200'
},
{
name: 'Recomendaciones',
value: recCount,
icon: '💡',
color: recCount > 0 ? 'text-blue-600' : 'text-gray-600',
bgColor: recCount > 0 ? 'bg-blue-50' : 'bg-gray-50',
borderColor: recCount > 0 ? 'border-blue-200' : 'border-gray-200'
},
{
name: 'Total Activos',
value: activeItems,
icon: '📊',
color: 'text-gray-600',
bgColor: 'bg-gray-50',
borderColor: 'border-gray-200'
}
];
return (
<div className="bg-white border-b border-gray-200">
<div className="px-6 py-4">
<dl className="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-4">
{stats.map((stat) => (
<div
key={stat.name}
className={`relative overflow-hidden rounded-lg border ${stat.borderColor} ${stat.bgColor} p-4 transition-all duration-200 hover:shadow-md`}
>
<dt className="flex items-center text-sm font-medium text-gray-600">
<span className="text-lg mr-2">{stat.icon}</span>
{stat.name}
</dt>
<dd className={`mt-1 text-2xl font-semibold ${stat.color}`}>
{stat.value}
</dd>
{/* Pulse animation for urgent alerts */}
{stat.name === 'Alertas Urgentes' && urgentCount > 0 && (
<div className="absolute inset-0 rounded-lg border-2 border-red-400 animate-pulse opacity-50"></div>
)}
</div>
))}
</dl>
{/* Summary text */}
<div className="mt-4 text-sm text-gray-600">
{totalItems === 0 ? (
<p className="flex items-center">
<span className="text-green-500 mr-2"></span>
Todos los sistemas funcionan correctamente
</p>
) : (
<p>
Mostrando {totalItems} elementos total{totalItems !== 1 ? 'es' : ''}
{activeItems > 0 && (
<>, {activeItems} activo{activeItems !== 1 ? 's' : ''}</>
)}
</p>
)}
</div>
</div>
</div>
);
};

View File

@@ -0,0 +1,70 @@
// frontend/src/components/alerts/ConnectionStatus.tsx
/**
* Displays the current SSE connection status with appropriate styling
*/
import React from 'react';
import { SSEConnectionState } from '../../types/alerts';
interface ConnectionStatusProps {
connectionState: SSEConnectionState;
}
export const ConnectionStatus: React.FC<ConnectionStatusProps> = ({
connectionState
}) => {
const getStatusConfig = (state: SSEConnectionState) => {
switch (state.status) {
case 'connected':
return {
color: 'bg-green-100 text-green-800 border-green-200',
icon: '🟢',
label: 'Conectado',
description: 'Actualizaciones en tiempo real'
};
case 'connecting':
return {
color: 'bg-yellow-100 text-yellow-800 border-yellow-200',
icon: '🟡',
label: 'Conectando...',
description: 'Estableciendo conexión'
};
case 'error':
return {
color: 'bg-red-100 text-red-800 border-red-200',
icon: '🔴',
label: 'Error de conexión',
description: state.reconnectAttempts > 0 ? `Reintento ${state.reconnectAttempts}` : 'Fallo en la conexión'
};
case 'disconnected':
default:
return {
color: 'bg-gray-100 text-gray-800 border-gray-200',
icon: '⚪',
label: 'Desconectado',
description: 'Sin actualizaciones en tiempo real'
};
}
};
const config = getStatusConfig(connectionState);
return (
<div className={`inline-flex items-center px-3 py-2 rounded-md border text-sm font-medium ${config.color}`}>
<span className="mr-2">{config.icon}</span>
<div className="flex flex-col">
<span className="font-medium">{config.label}</span>
<span className="text-xs opacity-75">{config.description}</span>
</div>
{connectionState.status === 'connecting' && (
<div className="ml-2">
<svg className="animate-spin h-4 w-4" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
</div>
)}
</div>
);
};

View File

@@ -0,0 +1,359 @@
// frontend/src/hooks/useAlertStream.ts
/**
* React hook for managing SSE connection to alert and recommendation stream
* Handles connection management, reconnection, and real-time updates
*/
import { useEffect, useState, useCallback, useRef } from 'react';
import { AlertItem, ItemSeverity, ItemType, SSEConnectionState, NotificationPermission } from '../types/alerts';
import { useAuth } from './useAuth';
interface UseAlertStreamProps {
tenantId: string;
autoConnect?: boolean;
maxReconnectAttempts?: number;
}
interface UseAlertStreamReturn {
items: AlertItem[];
connectionState: SSEConnectionState;
urgentCount: number;
highCount: number;
recCount: number;
acknowledgeItem: (itemId: string) => Promise<void>;
resolveItem: (itemId: string) => Promise<void>;
connect: () => void;
disconnect: () => void;
clearItems: () => void;
notificationPermission: NotificationPermission;
requestNotificationPermission: () => Promise<NotificationPermission>;
}
export const useAlertStream = ({
tenantId,
autoConnect = true,
maxReconnectAttempts = 10
}: UseAlertStreamProps): UseAlertStreamReturn => {
const [items, setItems] = useState<AlertItem[]>([]);
const [connectionState, setConnectionState] = useState<SSEConnectionState>({
status: 'disconnected',
reconnectAttempts: 0
});
const [notificationPermission, setNotificationPermission] = useState<NotificationPermission>('default');
const eventSourceRef = useRef<EventSource | null>(null);
const reconnectTimeoutRef = useRef<NodeJS.Timeout>();
const isManuallyDisconnected = useRef(false);
const { token } = useAuth();
// Initialize notification permission state
useEffect(() => {
if ('Notification' in window) {
setNotificationPermission(Notification.permission);
}
}, []);
const requestNotificationPermission = useCallback(async (): Promise<NotificationPermission> => {
if (!('Notification' in window)) {
return 'denied';
}
const permission = await Notification.requestPermission();
setNotificationPermission(permission);
return permission;
}, []);
const showBrowserNotification = useCallback((item: AlertItem) => {
if (notificationPermission !== 'granted') return;
// Only show notifications for urgent/high alerts, not recommendations
if (item.item_type === 'recommendation') return;
if (!['urgent', 'high'].includes(item.severity)) return;
const notification = new Notification(item.title, {
body: item.message,
icon: '/favicon.ico',
badge: '/badge-icon.png',
tag: item.id,
renotify: true,
requireInteraction: item.severity === 'urgent',
data: {
itemId: item.id,
itemType: item.item_type,
severity: item.severity
}
});
// Auto-close non-urgent notifications after 5 seconds
if (item.severity !== 'urgent') {
setTimeout(() => notification.close(), 5000);
}
notification.onclick = () => {
window.focus();
notification.close();
// Could navigate to specific alert details
};
}, [notificationPermission]);
const playAlertSound = useCallback((severity: ItemSeverity) => {
// Only play sounds for urgent alerts
if (severity !== 'urgent') return;
try {
const audio = new Audio('/sounds/alert-urgent.mp3');
audio.volume = 0.5;
audio.play().catch(() => {
// Silently fail if audio can't play (user interaction required)
});
} catch (error) {
console.warn('Could not play alert sound:', error);
}
}, []);
const addAndSortItems = useCallback((newItem: AlertItem) => {
setItems(prev => {
// Prevent duplicates
if (prev.some(i => i.id === newItem.id)) return prev;
const updated = [newItem, ...prev];
// Sort by severity weight, then by timestamp
const severityWeight = { urgent: 4, high: 3, medium: 2, low: 1 };
return updated.sort((a, b) => {
const weightDiff = severityWeight[b.severity] - severityWeight[a.severity];
if (weightDiff !== 0) return weightDiff;
return new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime();
}).slice(0, 100); // Keep only latest 100 items
});
}, []);
const connect = useCallback(() => {
if (!token || !tenantId) {
console.warn('Cannot connect to alert stream: missing token or tenantId');
return;
}
// Clean up existing connection
if (eventSourceRef.current) {
eventSourceRef.current.close();
}
isManuallyDisconnected.current = false;
setConnectionState(prev => ({ ...prev, status: 'connecting' }));
// Create SSE connection
const url = `${process.env.REACT_APP_NOTIFICATION_SERVICE_URL || 'http://localhost:8002'}/api/v1/sse/alerts/stream/${tenantId}`;
const eventSource = new EventSource(url, {
withCredentials: true
});
// Add auth header (if supported by browser)
if ('headers' in eventSource) {
(eventSource as any).headers = {
'Authorization': `Bearer ${token}`
};
}
eventSource.onopen = () => {
setConnectionState(prev => ({
...prev,
status: 'connected',
lastConnected: new Date(),
reconnectAttempts: 0
}));
console.log('Alert stream connected');
};
eventSource.addEventListener('connected', (event) => {
console.log('Alert stream handshake completed:', event.data);
});
eventSource.addEventListener('initial_items', (event) => {
try {
const initialItems = JSON.parse(event.data);
setItems(initialItems);
console.log(`Loaded ${initialItems.length} initial items`);
} catch (error) {
console.error('Error parsing initial items:', error);
}
});
eventSource.addEventListener('alert', (event) => {
try {
const newItem = JSON.parse(event.data);
addAndSortItems(newItem);
// Show browser notification for urgent/high alerts
showBrowserNotification(newItem);
// Play sound for urgent alerts
if (newItem.severity === 'urgent') {
playAlertSound(newItem.severity);
}
console.log('New alert received:', newItem.type, newItem.severity);
} catch (error) {
console.error('Error processing alert event:', error);
}
});
eventSource.addEventListener('recommendation', (event) => {
try {
const newItem = JSON.parse(event.data);
addAndSortItems(newItem);
console.log('New recommendation received:', newItem.type);
} catch (error) {
console.error('Error processing recommendation event:', error);
}
});
eventSource.addEventListener('ping', (event) => {
// Handle keepalive pings
console.debug('SSE keepalive ping received');
});
eventSource.onerror = (error) => {
console.error('SSE error:', error);
setConnectionState(prev => ({
...prev,
status: 'error'
}));
eventSource.close();
// Attempt reconnection with exponential backoff
if (!isManuallyDisconnected.current &&
connectionState.reconnectAttempts < maxReconnectAttempts) {
const backoffTime = Math.min(1000 * Math.pow(2, connectionState.reconnectAttempts), 30000);
setConnectionState(prev => ({
...prev,
reconnectAttempts: prev.reconnectAttempts + 1
}));
console.log(`Reconnecting in ${backoffTime}ms (attempt ${connectionState.reconnectAttempts + 1})`);
reconnectTimeoutRef.current = setTimeout(() => {
connect();
}, backoffTime);
}
};
eventSourceRef.current = eventSource;
}, [token, tenantId, connectionState.reconnectAttempts, maxReconnectAttempts, addAndSortItems, showBrowserNotification, playAlertSound]);
const disconnect = useCallback(() => {
isManuallyDisconnected.current = true;
if (eventSourceRef.current) {
eventSourceRef.current.close();
eventSourceRef.current = null;
}
if (reconnectTimeoutRef.current) {
clearTimeout(reconnectTimeoutRef.current);
}
setConnectionState({
status: 'disconnected',
reconnectAttempts: 0
});
}, []);
const acknowledgeItem = useCallback(async (itemId: string) => {
try {
const response = await fetch(
`${process.env.REACT_APP_NOTIFICATION_SERVICE_URL || 'http://localhost:8002'}/api/v1/sse/items/${itemId}/acknowledge`,
{
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json'
}
}
);
if (response.ok) {
setItems(prev => prev.map(item =>
item.id === itemId
? { ...item, status: 'acknowledged' as const, acknowledged_at: new Date().toISOString() }
: item
));
}
} catch (error) {
console.error('Failed to acknowledge item:', error);
}
}, [token]);
const resolveItem = useCallback(async (itemId: string) => {
try {
const response = await fetch(
`${process.env.REACT_APP_NOTIFICATION_SERVICE_URL || 'http://localhost:8002'}/api/v1/sse/items/${itemId}/resolve`,
{
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json'
}
}
);
if (response.ok) {
setItems(prev => prev.map(item =>
item.id === itemId
? { ...item, status: 'resolved' as const, resolved_at: new Date().toISOString() }
: item
));
}
} catch (error) {
console.error('Failed to resolve item:', error);
}
}, [token]);
const clearItems = useCallback(() => {
setItems([]);
}, []);
// Auto-connect on mount if enabled
useEffect(() => {
if (autoConnect && token && tenantId) {
connect();
}
return () => {
disconnect();
};
}, [autoConnect, token, tenantId]); // Don't include connect/disconnect to avoid loops
// Calculate counts
const urgentCount = items.filter(i =>
i.severity === 'urgent' && i.status === 'active' && i.item_type === 'alert'
).length;
const highCount = items.filter(i =>
i.severity === 'high' && i.status === 'active' && i.item_type === 'alert'
).length;
const recCount = items.filter(i =>
i.item_type === 'recommendation' && i.status === 'active'
).length;
return {
items,
connectionState,
urgentCount,
highCount,
recCount,
acknowledgeItem,
resolveItem,
connect,
disconnect,
clearItems,
notificationPermission,
requestNotificationPermission
};
};

View File

@@ -0,0 +1,126 @@
// frontend/src/types/alerts.ts
/**
* TypeScript types for the unified alert and recommendation system
*/
export type ItemType = 'alert' | 'recommendation';
export type ItemSeverity = 'urgent' | 'high' | 'medium' | 'low';
export type ItemStatus = 'active' | 'acknowledged' | 'resolved';
export interface AlertItem {
id: string;
tenant_id: string;
item_type: ItemType;
type: string; // Specific alert/recommendation type
severity: ItemSeverity;
status: ItemStatus;
service: string;
title: string;
message: string;
actions: string[];
metadata: Record<string, any>;
created_at: string;
acknowledged_at?: string;
acknowledged_by?: string;
resolved_at?: string;
resolved_by?: string;
timestamp: string;
}
export interface SSEEvent {
event: string;
data: string;
id?: string;
}
export interface ItemFilters {
item_type: ItemType | 'all';
severity: ItemSeverity | 'all';
status: ItemStatus | 'all';
service: string | 'all';
search: string;
}
export interface ItemCounts {
total: number;
alerts: {
urgent: number;
high: number;
medium: number;
low: number;
};
recommendations: {
high: number;
medium: number;
low: number;
};
by_status: {
active: number;
acknowledged: number;
resolved: number;
};
}
export interface NotificationSettings {
browser_notifications: boolean;
sound_enabled: boolean;
auto_acknowledge_timeout: number; // minutes
show_recommendations: boolean;
urgent_only: boolean;
}
export interface SSEConnectionState {
status: 'connecting' | 'connected' | 'disconnected' | 'error';
lastConnected?: Date;
reconnectAttempts: number;
latency?: number;
}
// Notification permission states
export type NotificationPermission = 'default' | 'granted' | 'denied';
// UI state
export interface AlertUIState {
filters: ItemFilters;
selectedItems: string[];
sortBy: 'created_at' | 'severity' | 'type';
sortOrder: 'asc' | 'desc';
viewMode: 'list' | 'grid' | 'compact';
sidebarOpen: boolean;
bulkActionsOpen: boolean;
}
// Action types for alert responses
export interface AlertAction {
id: string;
label: string;
type: 'acknowledge' | 'resolve' | 'custom';
icon?: string;
variant?: 'primary' | 'secondary' | 'danger';
requires_confirmation?: boolean;
}
// Metrics for dashboard
export interface AlertMetrics {
response_time_avg: number; // seconds
false_positive_rate: number;
recommendation_adoption_rate: number;
items_last_24h: number;
top_alert_types: Array<{
type: string;
count: number;
}>;
service_health: Record<string, boolean>;
}
// Template for creating new alerts (development/testing)
export interface AlertTemplate {
type: string;
severity: ItemSeverity;
title: string;
message: string;
actions: string[];
metadata?: Record<string, any>;
}

View File

@@ -0,0 +1,644 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Comprehensive monitoring dashboard for the Bakery Alert and Recommendation System",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"expr": "rate(alert_items_published_total[5m])",
"interval": "",
"legendFormat": "{{item_type}} - {{severity}}",
"refId": "A"
}
],
"title": "Alert/Recommendation Publishing Rate",
"type": "timeseries"
},
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.0.0",
"targets": [
{
"expr": "sum(alert_sse_active_connections)",
"interval": "",
"legendFormat": "Active SSE Connections",
"refId": "A"
}
],
"title": "Active SSE Connections",
"type": "gauge"
},
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"displayMode": "list",
"placement": "right"
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"expr": "sum by (item_type) (alert_items_published_total)",
"interval": "",
"legendFormat": "{{item_type}}",
"refId": "A"
}
],
"title": "Items by Type",
"type": "piechart"
},
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 8
},
"id": 4,
"options": {
"legend": {
"displayMode": "list",
"placement": "right"
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"expr": "sum by (severity) (alert_items_published_total)",
"interval": "",
"legendFormat": "{{severity}}",
"refId": "A"
}
],
"title": "Items by Severity",
"type": "piechart"
},
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 8
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"expr": "rate(alert_notifications_sent_total[5m])",
"interval": "",
"legendFormat": "{{channel}}",
"refId": "A"
}
],
"title": "Notification Delivery Rate by Channel",
"type": "timeseries"
},
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"expr": "histogram_quantile(0.95, rate(alert_processing_duration_seconds_bucket[5m]))",
"interval": "",
"legendFormat": "95th percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, rate(alert_processing_duration_seconds_bucket[5m]))",
"interval": "",
"legendFormat": "50th percentile (median)",
"refId": "B"
}
],
"title": "Processing Duration",
"type": "timeseries"
},
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"expr": "rate(alert_processing_errors_total[5m])",
"interval": "",
"legendFormat": "{{error_type}}",
"refId": "A"
},
{
"expr": "rate(alert_delivery_failures_total[5m])",
"interval": "",
"legendFormat": "Delivery: {{channel}}",
"refId": "B"
}
],
"title": "Error Rates",
"type": "timeseries"
},
{
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Health"
},
"properties": [
{
"id": "custom.displayMode",
"value": "color-background"
},
{
"id": "mappings",
"value": [
{
"options": {
"0": {
"color": "red",
"index": 0,
"text": "Unhealthy"
},
"1": {
"color": "green",
"index": 1,
"text": "Healthy"
}
},
"type": "value"
}
]
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 24
},
"id": 8,
"options": {
"showHeader": true
},
"pluginVersion": "8.0.0",
"targets": [
{
"expr": "alert_system_component_health",
"format": "table",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "System Component Health",
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"__name__": true,
"instance": true,
"job": true
},
"indexByName": {},
"renameByName": {
"Value": "Health",
"component": "Component",
"service": "Service"
}
}
}
],
"type": "table"
}
],
"schemaVersion": 27,
"style": "dark",
"tags": [
"bakery",
"alerts",
"recommendations",
"monitoring"
],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "Europe/Madrid",
"title": "Bakery Alert & Recommendation System",
"uid": "bakery-alert-system",
"version": 1
}

View File

@@ -0,0 +1,243 @@
# infrastructure/monitoring/prometheus/rules/alert-system-rules.yml
# Prometheus alerting rules for the Bakery Alert and Recommendation System
groups:
- name: alert_system_health
rules:
# System component health alerts
- alert: AlertSystemComponentDown
expr: alert_system_component_health == 0
for: 2m
labels:
severity: critical
service: "{{ $labels.service }}"
component: "{{ $labels.component }}"
annotations:
summary: "Alert system component {{ $labels.component }} is unhealthy"
description: "Component {{ $labels.component }} in service {{ $labels.service }} has been unhealthy for more than 2 minutes."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#component-health"
# Connection health alerts
- alert: RabbitMQConnectionDown
expr: alert_rabbitmq_connection_status == 0
for: 1m
labels:
severity: critical
service: "{{ $labels.service }}"
annotations:
summary: "RabbitMQ connection down for {{ $labels.service }}"
description: "Service {{ $labels.service }} has lost connection to RabbitMQ for more than 1 minute."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#rabbitmq-connection"
- alert: RedisConnectionDown
expr: alert_redis_connection_status == 0
for: 1m
labels:
severity: critical
service: "{{ $labels.service }}"
annotations:
summary: "Redis connection down for {{ $labels.service }}"
description: "Service {{ $labels.service }} has lost connection to Redis for more than 1 minute."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#redis-connection"
# Leader election issues
- alert: NoSchedulerLeader
expr: sum(alert_scheduler_leader_status) == 0
for: 5m
labels:
severity: warning
annotations:
summary: "No scheduler leader elected"
description: "No service has been elected as scheduler leader for more than 5 minutes. Scheduled checks may not be running."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#leader-election"
- name: alert_system_performance
rules:
# High error rates
- alert: HighAlertProcessingErrorRate
expr: rate(alert_processing_errors_total[5m]) > 0.1
for: 2m
labels:
severity: warning
annotations:
summary: "High alert processing error rate"
description: "Alert processing error rate is {{ $value | humanizePercentage }} over the last 5 minutes."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#processing-errors"
- alert: HighNotificationDeliveryFailureRate
expr: rate(alert_delivery_failures_total[5m]) / rate(alert_notifications_sent_total[5m]) > 0.05
for: 3m
labels:
severity: warning
channel: "{{ $labels.channel }}"
annotations:
summary: "High notification delivery failure rate for {{ $labels.channel }}"
description: "Notification delivery failure rate for {{ $labels.channel }} is {{ $value | humanizePercentage }} over the last 5 minutes."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#delivery-failures"
# Processing latency
- alert: HighAlertProcessingLatency
expr: histogram_quantile(0.95, rate(alert_processing_duration_seconds_bucket[5m])) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "High alert processing latency"
description: "95th percentile alert processing latency is {{ $value }}s, exceeding 5s threshold."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#processing-latency"
# SSE connection issues
- alert: TooManySSEConnections
expr: sum(alert_sse_active_connections) > 1000
for: 2m
labels:
severity: warning
annotations:
summary: "Too many active SSE connections"
description: "Number of active SSE connections ({{ $value }}) exceeds 1000. This may impact performance."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#sse-connections"
- alert: SSEConnectionErrors
expr: rate(alert_sse_connection_errors_total[5m]) > 0.5
for: 3m
labels:
severity: warning
annotations:
summary: "High SSE connection error rate"
description: "SSE connection error rate is {{ $value }} errors/second over the last 5 minutes."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#sse-errors"
- name: alert_system_business
rules:
# Alert volume anomalies
- alert: UnusuallyHighAlertVolume
expr: rate(alert_items_published_total{item_type="alert"}[10m]) > 2
for: 5m
labels:
severity: warning
service: "{{ $labels.service }}"
annotations:
summary: "Unusually high alert volume from {{ $labels.service }}"
description: "Service {{ $labels.service }} is generating alerts at {{ $value }} alerts/second, which is above normal levels."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#high-volume"
- alert: NoAlertsGenerated
expr: rate(alert_items_published_total[30m]) == 0
for: 15m
labels:
severity: warning
annotations:
summary: "No alerts generated recently"
description: "No alerts have been generated in the last 30 minutes. This may indicate a problem with detection systems."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#no-alerts"
# Response time issues
- alert: SlowAlertResponseTime
expr: histogram_quantile(0.95, rate(alert_item_response_time_seconds_bucket[1h])) > 3600
for: 10m
labels:
severity: warning
annotations:
summary: "Slow alert response times"
description: "95th percentile alert response time is {{ $value | humanizeDuration }}, exceeding 1 hour."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#response-times"
# Critical alerts not acknowledged
- alert: CriticalAlertsUnacknowledged
expr: sum(alert_active_items_current{item_type="alert",severity="urgent"}) > 5
for: 10m
labels:
severity: critical
annotations:
summary: "Multiple critical alerts unacknowledged"
description: "{{ $value }} critical alerts remain unacknowledged for more than 10 minutes."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#critical-unacked"
- name: alert_system_capacity
rules:
# Queue size monitoring
- alert: LargeSSEMessageQueues
expr: alert_sse_message_queue_size > 100
for: 5m
labels:
severity: warning
tenant_id: "{{ $labels.tenant_id }}"
annotations:
summary: "Large SSE message queue for tenant {{ $labels.tenant_id }}"
description: "SSE message queue for tenant {{ $labels.tenant_id }} has {{ $value }} messages, indicating potential client issues."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#sse-queues"
# Database storage issues
- alert: SlowDatabaseStorage
expr: histogram_quantile(0.95, rate(alert_database_storage_duration_seconds_bucket[5m])) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "Slow database storage for alerts"
description: "95th percentile database storage time is {{ $value }}s, exceeding 1s threshold."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#database-storage"
- name: alert_system_effectiveness
rules:
# False positive rate monitoring
- alert: HighFalsePositiveRate
expr: alert_false_positive_rate > 0.2
for: 30m
labels:
severity: warning
service: "{{ $labels.service }}"
alert_type: "{{ $labels.alert_type }}"
annotations:
summary: "High false positive rate for {{ $labels.alert_type }}"
description: "False positive rate for {{ $labels.alert_type }} in {{ $labels.service }} is {{ $value | humanizePercentage }}."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#false-positives"
# Low recommendation adoption
- alert: LowRecommendationAdoption
expr: rate(alert_recommendations_implemented_total[24h]) / rate(alert_items_published_total{item_type="recommendation"}[24h]) < 0.1
for: 1h
labels:
severity: info
service: "{{ $labels.service }}"
annotations:
summary: "Low recommendation adoption rate"
description: "Recommendation adoption rate for {{ $labels.service }} is {{ $value | humanizePercentage }} over the last 24 hours."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#recommendation-adoption"
# Additional alerting rules for specific scenarios
- name: alert_system_critical_scenarios
rules:
# Complete system failure
- alert: AlertSystemDown
expr: up{job=~"alert-processor|notification-service"} == 0
for: 1m
labels:
severity: critical
service: "{{ $labels.job }}"
annotations:
summary: "Alert system service {{ $labels.job }} is down"
description: "Critical alert system service {{ $labels.job }} has been down for more than 1 minute."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#service-down"
# Data loss prevention
- alert: AlertDataNotPersisted
expr: rate(alert_items_processed_total[5m]) > 0 and rate(alert_database_storage_duration_seconds_count[5m]) == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Alert data not being persisted to database"
description: "Alerts are being processed but not stored in database, potential data loss."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#data-persistence"
# Notification blackhole
- alert: NotificationsNotDelivered
expr: rate(alert_items_processed_total[5m]) > 0 and rate(alert_notifications_sent_total[5m]) == 0
for: 3m
labels:
severity: critical
annotations:
summary: "Notifications not being delivered"
description: "Alerts are being processed but no notifications are being sent."
runbook_url: "https://docs.bakery.local/runbooks/alert-system#notification-delivery"

View File

@@ -0,0 +1,197 @@
-- migrations/001_create_alert_tables.sql
-- Database schema for unified alerts and recommendations system
-- Main alerts table (stores both alerts and recommendations)
CREATE TABLE IF NOT EXISTS alerts (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
item_type VARCHAR(20) NOT NULL DEFAULT 'alert' CHECK (item_type IN ('alert', 'recommendation')),
alert_type VARCHAR(50) NOT NULL, -- Specific type like 'critical_stock_shortage', 'inventory_optimization'
severity VARCHAR(20) NOT NULL CHECK (severity IN ('urgent', 'high', 'medium', 'low')),
status VARCHAR(20) NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'acknowledged', 'resolved')),
service VARCHAR(50) NOT NULL,
title VARCHAR(255) NOT NULL,
message TEXT NOT NULL,
actions JSONB DEFAULT '[]',
metadata JSONB DEFAULT '{}',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
acknowledged_at TIMESTAMP WITH TIME ZONE,
acknowledged_by UUID,
resolved_at TIMESTAMP WITH TIME ZONE,
resolved_by UUID,
-- Add severity weight for sorting
severity_weight INT GENERATED ALWAYS AS (
CASE severity
WHEN 'urgent' THEN 4
WHEN 'high' THEN 3
WHEN 'medium' THEN 2
WHEN 'low' THEN 1
END
) STORED
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_alerts_tenant_status ON alerts(tenant_id, status);
CREATE INDEX IF NOT EXISTS idx_alerts_created_at ON alerts(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_alerts_severity ON alerts(severity_weight DESC);
CREATE INDEX IF NOT EXISTS idx_alerts_tenant_active ON alerts(tenant_id, status) WHERE status = 'active';
CREATE INDEX IF NOT EXISTS idx_alerts_item_type ON alerts(item_type);
CREATE INDEX IF NOT EXISTS idx_alerts_service ON alerts(service);
-- Composite index for common queries
CREATE INDEX IF NOT EXISTS idx_alerts_tenant_type_status ON alerts(tenant_id, item_type, status);
-- Alert history for audit trail (applies to both alerts and recommendations)
CREATE TABLE IF NOT EXISTS alert_history (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
alert_id UUID REFERENCES alerts(id) ON DELETE CASCADE,
tenant_id UUID NOT NULL,
action VARCHAR(50) NOT NULL,
performed_by UUID,
performed_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
details JSONB DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_alert_history_alert ON alert_history(alert_id);
CREATE INDEX IF NOT EXISTS idx_alert_history_tenant ON alert_history(tenant_id);
-- Database triggers for immediate alerts (recommendations typically not triggered this way)
-- Stock critical trigger
CREATE OR REPLACE FUNCTION notify_stock_critical()
RETURNS TRIGGER AS $$
BEGIN
-- Only trigger for alerts, not recommendations
IF NEW.current_stock < NEW.minimum_stock AND
OLD.current_stock >= OLD.minimum_stock THEN
PERFORM pg_notify(
'stock_alerts',
json_build_object(
'tenant_id', NEW.tenant_id,
'ingredient_id', NEW.id,
'name', NEW.name,
'current_stock', NEW.current_stock,
'minimum_stock', NEW.minimum_stock,
'alert_type', 'critical_stock_shortage'
)::text
);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Temperature breach trigger
CREATE OR REPLACE FUNCTION notify_temperature_breach()
RETURNS TRIGGER AS $$
BEGIN
IF NEW.temperature > NEW.max_threshold AND
NEW.breach_duration_minutes > 30 THEN
PERFORM pg_notify(
'temperature_alerts',
json_build_object(
'tenant_id', NEW.tenant_id,
'sensor_id', NEW.sensor_id,
'location', NEW.location,
'temperature', NEW.temperature,
'duration', NEW.breach_duration_minutes,
'alert_type', 'temperature_breach'
)::text
);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Production delay trigger
CREATE OR REPLACE FUNCTION notify_production_delay()
RETURNS TRIGGER AS $$
BEGIN
IF NEW.status = 'delayed' AND OLD.status != 'delayed' THEN
PERFORM pg_notify(
'production_alerts',
json_build_object(
'tenant_id', NEW.tenant_id,
'batch_id', NEW.id,
'product_name', NEW.product_name,
'planned_completion', NEW.planned_completion_time,
'delay_minutes', EXTRACT(EPOCH FROM (NOW() - NEW.planned_completion_time))/60,
'alert_type', 'production_delay'
)::text
);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create placeholder tables for triggers (these would exist in respective services)
-- This is just for reference - actual tables should be in service-specific migrations
-- Inventory items table structure (for reference)
CREATE TABLE IF NOT EXISTS inventory_items (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
name VARCHAR(255) NOT NULL,
current_stock DECIMAL(10,2) DEFAULT 0,
minimum_stock DECIMAL(10,2) DEFAULT 0,
maximum_stock DECIMAL(10,2),
unit VARCHAR(50) DEFAULT 'kg',
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Temperature readings table structure (for reference)
CREATE TABLE IF NOT EXISTS temperature_readings (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
sensor_id VARCHAR(100) NOT NULL,
location VARCHAR(255) NOT NULL,
temperature DECIMAL(5,2) NOT NULL,
max_threshold DECIMAL(5,2) DEFAULT 25.0,
breach_duration_minutes INT DEFAULT 0,
recorded_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Production batches table structure (for reference)
CREATE TABLE IF NOT EXISTS production_batches (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tenant_id UUID NOT NULL,
product_name VARCHAR(255) NOT NULL,
status VARCHAR(50) DEFAULT 'planned',
planned_completion_time TIMESTAMP WITH TIME ZONE,
actual_completion_time TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Apply triggers (only if tables exist)
DO $$
BEGIN
-- Stock critical trigger
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'inventory_items') THEN
DROP TRIGGER IF EXISTS stock_critical_trigger ON inventory_items;
CREATE TRIGGER stock_critical_trigger
AFTER UPDATE ON inventory_items
FOR EACH ROW
EXECUTE FUNCTION notify_stock_critical();
END IF;
-- Temperature breach trigger
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'temperature_readings') THEN
DROP TRIGGER IF EXISTS temperature_breach_trigger ON temperature_readings;
CREATE TRIGGER temperature_breach_trigger
AFTER INSERT OR UPDATE ON temperature_readings
FOR EACH ROW
EXECUTE FUNCTION notify_temperature_breach();
END IF;
-- Production delay trigger
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'production_batches') THEN
DROP TRIGGER IF EXISTS production_delay_trigger ON production_batches;
CREATE TRIGGER production_delay_trigger
AFTER UPDATE ON production_batches
FOR EACH ROW
EXECUTE FUNCTION notify_production_delay();
END IF;
END
$$;

View File

@@ -0,0 +1,26 @@
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install dependencies
COPY services/alert_processor/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy shared libraries
COPY shared/ /app/shared/
# Copy application code
COPY services/alert_processor/app/ /app/app/
# Create non-root user
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
USER appuser
EXPOSE 8000
CMD ["python", "-m", "app.main"]

View File

@@ -0,0 +1 @@
# Alert Processor Service

View File

@@ -0,0 +1,49 @@
# services/alert_processor/app/config.py
"""
Alert Processor Service Configuration
"""
import os
from typing import List
from shared.config.base import BaseServiceSettings
class AlertProcessorConfig(BaseServiceSettings):
"""Configuration for Alert Processor Service"""
SERVICE_NAME: str = "alert-processor"
APP_NAME: str = "Alert Processor Service"
DESCRIPTION: str = "Central alert and recommendation processor"
# Use the notification database for alert storage
# This makes sense since alerts and notifications are closely related
DATABASE_URL: str = os.getenv(
"NOTIFICATION_DATABASE_URL",
"postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db"
)
# Use dedicated Redis DB for alert processing
REDIS_DB: int = int(os.getenv("ALERT_PROCESSOR_REDIS_DB", "6"))
# Alert processing configuration
BATCH_SIZE: int = int(os.getenv("ALERT_BATCH_SIZE", "10"))
PROCESSING_TIMEOUT: int = int(os.getenv("ALERT_PROCESSING_TIMEOUT", "30"))
# Deduplication settings
ALERT_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("ALERT_DEDUPLICATION_WINDOW_MINUTES", "15"))
RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES", "60"))
# Alert severity channel mappings (hardcoded for now to avoid config parsing issues)
@property
def urgent_channels(self) -> List[str]:
return ["whatsapp", "email", "push", "dashboard"]
@property
def high_channels(self) -> List[str]:
return ["whatsapp", "email", "dashboard"]
@property
def medium_channels(self) -> List[str]:
return ["email", "dashboard"]
@property
def low_channels(self) -> List[str]:
return ["dashboard"]

View File

@@ -0,0 +1,360 @@
# services/alert_processor/app/main.py
"""
Alert Processor Service - Central hub for processing alerts and recommendations
Consumes from RabbitMQ, stores in database, and routes to notification service
"""
import asyncio
import json
import signal
import sys
from datetime import datetime
from typing import Dict, Any
import structlog
import redis.asyncio as aioredis
from aio_pika import connect_robust, IncomingMessage, ExchangeType
from app.config import AlertProcessorConfig
from shared.database.base import create_database_manager
from shared.clients.base_service_client import BaseServiceClient
from shared.config.rabbitmq_config import RABBITMQ_CONFIG
# Setup logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="ISO"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
class NotificationServiceClient(BaseServiceClient):
"""Client for notification service"""
def __init__(self, config: AlertProcessorConfig):
super().__init__("notification-service", config)
self.config = config
def get_service_base_path(self) -> str:
"""Return the base path for notification service APIs"""
return "/api/v1"
async def send_notification(self, tenant_id: str, notification: Dict[str, Any], channels: list) -> Dict[str, Any]:
"""Send notification via notification service"""
try:
response = await self.post(
"/api/v1/notifications/send",
json={
"tenant_id": tenant_id,
"notification": notification,
"channels": channels
}
)
return response
except Exception as e:
logger.error("Failed to send notification", error=str(e), tenant_id=tenant_id)
return {"status": "failed", "error": str(e)}
class AlertProcessorService:
"""
Central service for processing and routing alerts and recommendations
Integrates with notification service for multi-channel delivery
"""
def __init__(self, config: AlertProcessorConfig):
self.config = config
self.db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
self.notification_client = NotificationServiceClient(config)
self.redis = None
self.connection = None
self.channel = None
self.running = False
# Metrics
self.items_processed = 0
self.items_stored = 0
self.notifications_sent = 0
self.errors_count = 0
async def start(self):
"""Start the alert processor service"""
try:
logger.info("Starting Alert Processor Service")
# Connect to Redis for SSE publishing
self.redis = aioredis.from_url(self.config.REDIS_URL)
logger.info("Connected to Redis")
# Connect to RabbitMQ
await self._setup_rabbitmq()
# Start consuming messages
await self._start_consuming()
self.running = True
logger.info("Alert Processor Service started successfully")
except Exception as e:
logger.error("Failed to start Alert Processor Service", error=str(e))
raise
async def _setup_rabbitmq(self):
"""Setup RabbitMQ connection and configuration"""
self.connection = await connect_robust(
self.config.RABBITMQ_URL,
heartbeat=30,
connection_attempts=5
)
self.channel = await self.connection.channel()
await self.channel.set_qos(prefetch_count=10) # Process 10 messages at a time
# Setup exchange and queue based on config
exchange_config = RABBITMQ_CONFIG["exchanges"]["alerts"]
self.exchange = await self.channel.declare_exchange(
exchange_config["name"],
getattr(ExchangeType, exchange_config["type"].upper()),
durable=exchange_config["durable"]
)
queue_config = RABBITMQ_CONFIG["queues"]["alert_processing"]
self.queue = await self.channel.declare_queue(
queue_config["name"],
durable=queue_config["durable"],
arguments=queue_config["arguments"]
)
# Bind to all alert and recommendation routing keys
await self.queue.bind(self.exchange, routing_key="*.*.*")
logger.info("RabbitMQ setup completed")
async def _start_consuming(self):
"""Start consuming messages from RabbitMQ"""
await self.queue.consume(self.process_item)
logger.info("Started consuming alert messages")
async def process_item(self, message: IncomingMessage):
"""Process incoming alert or recommendation"""
async with message.process():
try:
# Parse message
item = json.loads(message.body.decode())
logger.info("Processing item",
item_type=item.get('item_type'),
alert_type=item.get('type'),
severity=item.get('severity'),
tenant_id=item.get('tenant_id'))
# Store in database
stored_item = await self.store_item(item)
self.items_stored += 1
# Determine delivery channels based on severity and type
channels = self.get_channels_by_severity_and_type(
item['severity'],
item['item_type']
)
# Send via notification service if channels are specified
if channels:
notification_result = await self.notification_client.send_notification(
tenant_id=item['tenant_id'],
notification={
'type': item['item_type'], # 'alert' or 'recommendation'
'id': item['id'],
'title': item['title'],
'message': item['message'],
'severity': item['severity'],
'metadata': item.get('metadata', {}),
'actions': item.get('actions', []),
'email': item.get('email'),
'phone': item.get('phone'),
'user_id': item.get('user_id')
},
channels=channels
)
if notification_result.get('status') == 'success':
self.notifications_sent += 1
# Stream to SSE for real-time dashboard (always)
await self.stream_to_sse(item['tenant_id'], stored_item)
self.items_processed += 1
logger.info("Item processed successfully",
item_id=item['id'],
channels=len(channels))
except Exception as e:
self.errors_count += 1
logger.error("Item processing failed", error=str(e))
raise
async def store_item(self, item: dict) -> dict:
"""Store alert or recommendation in database"""
from sqlalchemy import text
query = text("""
INSERT INTO alerts (
id, tenant_id, item_type, alert_type, severity, status,
service, title, message, actions, metadata,
created_at
) VALUES (:id, :tenant_id, :item_type, :alert_type, :severity, :status,
:service, :title, :message, :actions, :metadata, :created_at)
RETURNING *
""")
async with self.db_manager.get_session() as session:
result = await session.execute(
query,
{
'id': item['id'],
'tenant_id': item['tenant_id'],
'item_type': item['item_type'], # 'alert' or 'recommendation'
'alert_type': item['type'],
'severity': item['severity'],
'status': 'active',
'service': item['service'],
'title': item['title'],
'message': item['message'],
'actions': json.dumps(item.get('actions', [])),
'metadata': json.dumps(item.get('metadata', {})),
'created_at': item['timestamp']
}
)
row = result.fetchone()
await session.commit()
logger.debug("Item stored in database", item_id=item['id'])
return dict(row._mapping)
async def stream_to_sse(self, tenant_id: str, item: dict):
"""Publish item to Redis for SSE streaming"""
channel = f"alerts:{tenant_id}"
# Prepare message for SSE
sse_message = {
'id': item['id'],
'item_type': item['item_type'],
'type': item['alert_type'],
'severity': item['severity'],
'title': item['title'],
'message': item['message'],
'actions': json.loads(item['actions']) if isinstance(item['actions'], str) else item['actions'],
'metadata': json.loads(item['metadata']) if isinstance(item['metadata'], str) else item['metadata'],
'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at'],
'status': item['status']
}
# Publish to Redis channel for SSE
await self.redis.publish(channel, json.dumps(sse_message))
logger.debug("Item published to SSE", tenant_id=tenant_id, item_id=item['id'])
def get_channels_by_severity_and_type(self, severity: str, item_type: str) -> list:
"""Determine notification channels based on severity, type, and time"""
current_hour = datetime.now().hour
channels = ['dashboard'] # Always include dashboard (SSE)
if item_type == 'alert':
if severity == 'urgent':
# Urgent alerts: All channels immediately
channels.extend(['whatsapp', 'email', 'push'])
elif severity == 'high':
# High alerts: WhatsApp and email during extended hours
if 6 <= current_hour <= 22:
channels.extend(['whatsapp', 'email'])
else:
channels.append('email') # Email only during night
elif severity == 'medium':
# Medium alerts: Email during business hours
if 7 <= current_hour <= 20:
channels.append('email')
# Low severity: Dashboard only
elif item_type == 'recommendation':
# Recommendations: Less urgent, limit channels and respect business hours
if severity in ['medium', 'high']:
if 8 <= current_hour <= 19: # Business hours for recommendations
channels.append('email')
# Low/urgent (rare for recs): Dashboard only
return channels
async def stop(self):
"""Stop the alert processor service"""
self.running = False
logger.info("Stopping Alert Processor Service")
try:
# Close RabbitMQ connection
if self.connection and not self.connection.is_closed:
await self.connection.close()
# Close Redis connection
if self.redis:
await self.redis.close()
logger.info("Alert Processor Service stopped")
except Exception as e:
logger.error("Error stopping service", error=str(e))
def get_metrics(self) -> Dict[str, Any]:
"""Get service metrics"""
return {
"items_processed": self.items_processed,
"items_stored": self.items_stored,
"notifications_sent": self.notifications_sent,
"errors_count": self.errors_count,
"running": self.running
}
async def main():
"""Main entry point"""
config = AlertProcessorConfig()
service = AlertProcessorService(config)
# Setup signal handlers for graceful shutdown
async def shutdown():
logger.info("Received shutdown signal")
await service.stop()
sys.exit(0)
# Register signal handlers
for sig in (signal.SIGTERM, signal.SIGINT):
signal.signal(sig, lambda s, f: asyncio.create_task(shutdown()))
try:
# Start the service
await service.start()
# Keep running
while service.running:
await asyncio.sleep(1)
except KeyboardInterrupt:
logger.info("Received keyboard interrupt")
except Exception as e:
logger.error("Service failed", error=str(e))
finally:
await service.stop()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,12 @@
fastapi==0.104.1
uvicorn[standard]==0.24.0
aio-pika==9.3.1
redis==5.0.1
asyncpg==0.29.0
sqlalchemy==2.0.23
structlog==23.2.0
prometheus-client==0.19.0
pydantic-settings==2.1.0
pydantic==2.5.2
httpx==0.25.2
python-jose[cryptography]==3.3.0

View File

@@ -1,129 +0,0 @@
# ================================================================
# services/auth/README.md
# ================================================================
# Authentication Service
Microservice for user authentication and authorization in the bakery forecasting platform.
## Features
- User registration and login
- JWT access and refresh tokens
- Password security validation
- Rate limiting and login attempt tracking
- Multi-tenant user management
- Session management
- Event publishing for user actions
## Quick Start
### Development
```bash
# Start dependencies
docker-compose up -d auth-db redis rabbitmq
# Install dependencies
pip install -r requirements.txt
# Run migrations
alembic upgrade head
# Start service
uvicorn app.main:app --reload --host 0.0.0.0 --port 8001
```
### With Docker
```bash
# Start everything
docker-compose up -d
# View logs
docker-compose logs -f auth-service
# Run tests
docker-compose exec auth-service pytest
```
## API Endpoints
### Authentication
- `POST /api/v1/auth/register` - Register new user
- `POST /api/v1/auth/login` - User login
- `POST /api/v1/auth/refresh` - Refresh access token
- `POST /api/v1/auth/verify` - Verify token
- `POST /api/v1/auth/logout` - Logout user
### User Management
- `GET /api/v1/users/me` - Get current user
- `PUT /api/v1/users/me` - Update current user
- `POST /api/v1/users/change-password` - Change password
### Health
- `GET /health` - Health check
- `GET /metrics` - Prometheus metrics
## Configuration
Set these environment variables:
```bash
DATABASE_URL=postgresql+asyncpg://auth_user:auth_pass123@auth-db:5432/auth_db
REDIS_URL=redis://redis:6379/0
RABBITMQ_URL=amqp://bakery:forecast123@rabbitmq:5672/
JWT_SECRET_KEY=your-super-secret-jwt-key-change-in-production
JWT_ACCESS_TOKEN_EXPIRE_MINUTES=30
JWT_REFRESH_TOKEN_EXPIRE_DAYS=7
MAX_LOGIN_ATTEMPTS=5
LOCKOUT_DURATION_MINUTES=30
```
## Testing
```bash
# Run all tests
pytest
# Run with coverage
pytest --cov=app
# Run specific test file
pytest tests/test_auth.py -v
```
## Database Migrations
```bash
# Create migration
alembic revision --autogenerate -m "description"
# Apply migrations
alembic upgrade head
# Rollback
alembic downgrade -1
```
## Monitoring
- Health endpoint: `/health`
- Metrics endpoint: `/metrics` (Prometheus format)
- Logs: Structured JSON logging
- Tracing: Request ID tracking
## Security Features
- Bcrypt password hashing
- JWT tokens with expiration
- Rate limiting on login attempts
- Account lockout protection
- IP and user agent tracking
- Token revocation support
## Events Published
- `user.registered` - When user registers
- `user.login` - When user logs in
- `user.logout` - When user logs out
- `user.password_changed` - When password changes

View File

@@ -1,169 +0,0 @@
================================================================
# Documentation: services/forecasting/README.md
# ================================================================
# Forecasting Service
AI-powered demand prediction service for bakery operations in Madrid, Spain.
## Overview
The Forecasting Service is a specialized microservice responsible for generating accurate demand predictions for bakery products. It integrates trained ML models with real-time weather and traffic data to provide actionable forecasts for business planning.
## Features
### Core Functionality
- **Single Product Forecasting**: Generate predictions for individual products
- **Batch Forecasting**: Process multiple products and time periods
- **Real-time Predictions**: On-demand forecasting with external data
- **Business Rules**: Spanish bakery-specific adjustments
- **Alert System**: Automated notifications for demand anomalies
### Integration Points
- **Training Service**: Loads trained Prophet models
- **Data Service**: Retrieves weather and traffic data
- **Notification Service**: Sends alerts and reports
- **Gateway Service**: Authentication and request routing
## API Endpoints
### Forecasts
- `POST /api/v1/forecasts/single` - Generate single forecast
- `POST /api/v1/forecasts/batch` - Generate batch forecasts
- `GET /api/v1/forecasts/list` - List historical forecasts
- `GET /api/v1/forecasts/alerts` - Get forecast alerts
- `PUT /api/v1/forecasts/alerts/{id}/acknowledge` - Acknowledge alert
### Predictions
- `POST /api/v1/predictions/realtime` - Real-time prediction
- `GET /api/v1/predictions/quick/{product}` - Quick multi-day forecast
## Business Logic
### Spanish Bakery Rules
- **Siesta Impact**: Reduced afternoon activity consideration
- **Weather Adjustments**: Rain reduces traffic, extreme temperatures affect product mix
- **Holiday Handling**: Spanish holiday calendar integration
- **Weekend Patterns**: Different demand patterns for weekends
### Business Types
- **Individual Bakery**: Single location with direct sales
- **Central Workshop**: Production facility supplying multiple locations
## Configuration
### Environment Variables
```bash
# Database
DATABASE_URL=postgresql+asyncpg://user:pass@host:port/db
# External Services
TRAINING_SERVICE_URL=http://training-service:8000
DATA_SERVICE_URL=http://data-service:8000
# Business Rules
WEEKEND_ADJUSTMENT_FACTOR=0.8
HOLIDAY_ADJUSTMENT_FACTOR=0.5
RAIN_IMPACT_FACTOR=0.7
```
### Performance Settings
```bash
MAX_FORECAST_DAYS=30
PREDICTION_CACHE_TTL_HOURS=6
FORECAST_BATCH_SIZE=100
```
## Development
### Setup
```bash
cd services/forecasting
pip install -r requirements.txt
```
### Testing
```bash
pytest tests/ -v --cov=app
```
### Running Locally
```bash
uvicorn app.main:app --reload --port 8000
```
## Deployment
### Docker
```bash
docker build -t forecasting-service .
docker run -p 8000:8000 forecasting-service
```
### Kubernetes
```bash
kubectl apply -f infrastructure/kubernetes/base/forecasting-service.yaml
```
## Monitoring
### Metrics
- `forecasts_generated_total` - Total forecasts generated
- `predictions_served_total` - Total predictions served
- `forecast_processing_time_seconds` - Processing time histogram
- `active_models_count` - Number of active models
### Health Checks
- `/health` - Service health status
- `/metrics` - Prometheus metrics endpoint
## Performance
### Benchmarks
- **Single Forecast**: < 2 seconds average
- **Batch Forecasting**: 100 products in < 30 seconds
- **Concurrent Load**: 95%+ success rate at 20 concurrent requests
### Optimization
- Model caching for faster predictions
- Feature preparation optimization
- Database query optimization
- Asynchronous external API calls
## Troubleshooting
### Common Issues
1. **No Model Found Error**
- Ensure training service has models for tenant/product
- Check model training logs in training service
2. **High Prediction Latency**
- Monitor model cache hit rate
- Check external service response times
- Review database query performance
3. **Inaccurate Predictions**
- Verify external data quality (weather/traffic)
- Check model performance metrics
- Review business rule configurations
### Logging
```bash
# View service logs
docker logs forecasting-service
# Debug level logging
LOG_LEVEL=DEBUG uvicorn app.main:app
```
## Contributing
1. Follow the existing code structure and patterns
2. Add tests for new functionality
3. Update documentation for API changes
4. Ensure performance benchmarks are maintained
## License
This service is part of the Bakery Forecasting Platform - MIT License

View File

@@ -14,6 +14,7 @@ import structlog
from app.core.config import settings
from app.core.database import init_db, close_db
from app.api import ingredients, stock, classification
from app.services.inventory_alert_service import InventoryAlertService
from shared.monitoring.health import router as health_router
from shared.monitoring.metrics import setup_metrics_early
# Auth decorators are used in endpoints, no global setup needed
@@ -32,6 +33,14 @@ async def lifespan(app: FastAPI):
await init_db()
logger.info("Database initialized successfully")
# Initialize alert service
alert_service = InventoryAlertService(settings)
await alert_service.start()
logger.info("Inventory alert service started")
# Store alert service in app state
app.state.alert_service = alert_service
# Setup metrics is already done early - no need to do it here
logger.info("Metrics setup completed")
@@ -44,6 +53,11 @@ async def lifespan(app: FastAPI):
# Shutdown
logger.info("Shutting down Inventory Service")
try:
# Stop alert service
if hasattr(app.state, 'alert_service'):
await app.state.alert_service.stop()
logger.info("Alert service stopped")
await close_db()
logger.info("Database connections closed")
except Exception as e:

View File

@@ -0,0 +1,710 @@
# services/inventory/app/services/inventory_alert_service.py
"""
Inventory-specific alert and recommendation detection service
Implements hybrid detection patterns for critical stock issues and optimization opportunities
"""
import asyncio
import json
from typing import List, Dict, Any, Optional
from uuid import UUID
from datetime import datetime, timedelta
import structlog
from apscheduler.triggers.cron import CronTrigger
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
from shared.alerts.templates import format_item_message
logger = structlog.get_logger()
class InventoryAlertService(BaseAlertService, AlertServiceMixin):
"""Inventory service alert and recommendation detection"""
def setup_scheduled_checks(self):
"""Inventory-specific scheduled checks for alerts and recommendations"""
# Critical stock checks - every 5 minutes (alerts)
self.scheduler.add_job(
self.check_stock_levels,
CronTrigger(minute='*/5'),
id='stock_levels',
misfire_grace_time=30,
max_instances=1
)
# Expiry checks - every 2 minutes (food safety critical, alerts)
self.scheduler.add_job(
self.check_expiring_products,
CronTrigger(minute='*/2'),
id='expiry_check',
misfire_grace_time=30,
max_instances=1
)
# Temperature checks - every 2 minutes (alerts)
self.scheduler.add_job(
self.check_temperature_breaches,
CronTrigger(minute='*/2'),
id='temperature_check',
misfire_grace_time=30,
max_instances=1
)
# Inventory optimization - every 30 minutes (recommendations)
self.scheduler.add_job(
self.generate_inventory_recommendations,
CronTrigger(minute='*/30'),
id='inventory_recs',
misfire_grace_time=120,
max_instances=1
)
# Waste reduction analysis - every hour (recommendations)
self.scheduler.add_job(
self.generate_waste_reduction_recommendations,
CronTrigger(minute='0'),
id='waste_reduction_recs',
misfire_grace_time=300,
max_instances=1
)
logger.info("Inventory alert schedules configured",
service=self.config.SERVICE_NAME)
async def check_stock_levels(self):
"""Batch check all stock levels for critical shortages (alerts)"""
try:
self._checks_performed += 1
query = """
WITH stock_analysis AS (
SELECT
i.*,
COALESCE(p.scheduled_quantity, 0) as tomorrow_needed,
COALESCE(s.avg_daily_usage, 0) as avg_daily_usage,
COALESCE(s.lead_time_days, 7) as lead_time_days,
CASE
WHEN i.current_stock < i.minimum_stock THEN 'critical'
WHEN i.current_stock < i.minimum_stock * 1.2 THEN 'low'
WHEN i.current_stock > i.maximum_stock THEN 'overstock'
ELSE 'normal'
END as status,
GREATEST(0, i.minimum_stock - i.current_stock) as shortage_amount
FROM inventory_items i
LEFT JOIN production_schedule p ON p.ingredient_id = i.id
AND p.date = CURRENT_DATE + INTERVAL '1 day'
LEFT JOIN supplier_items s ON s.ingredient_id = i.id
WHERE i.tenant_id = $1 AND i.active = true
)
SELECT * FROM stock_analysis WHERE status != 'normal'
ORDER BY
CASE status
WHEN 'critical' THEN 1
WHEN 'low' THEN 2
WHEN 'overstock' THEN 3
END,
shortage_amount DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
issues = result.fetchall()
for issue in issues:
await self._process_stock_issue(tenant_id, issue)
except Exception as e:
logger.error("Error checking stock for tenant",
tenant_id=str(tenant_id),
error=str(e))
logger.debug("Stock level check completed",
tenants_checked=len(tenants))
except Exception as e:
logger.error("Stock level check failed", error=str(e))
self._errors_count += 1
async def _process_stock_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
"""Process individual stock issue"""
try:
if issue['status'] == 'critical':
# Critical stock shortage - immediate alert
template_data = self.format_spanish_message(
'critical_stock_shortage',
ingredient_name=issue["name"],
current_stock=issue["current_stock"],
required_stock=issue["tomorrow_needed"] or issue["minimum_stock"],
shortage_amount=issue["shortage_amount"]
)
await self.publish_item(tenant_id, {
'type': 'critical_stock_shortage',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': str(issue['id']),
'current_stock': float(issue['current_stock']),
'minimum_stock': float(issue['minimum_stock']),
'shortage_amount': float(issue['shortage_amount']),
'tomorrow_needed': float(issue['tomorrow_needed'] or 0),
'lead_time_days': issue['lead_time_days']
}
}, item_type='alert')
elif issue['status'] == 'low':
# Low stock - high priority alert
template_data = self.format_spanish_message(
'critical_stock_shortage',
ingredient_name=issue["name"],
current_stock=issue["current_stock"],
required_stock=issue["minimum_stock"]
)
severity = self.get_business_hours_severity('high')
await self.publish_item(tenant_id, {
'type': 'low_stock_warning',
'severity': severity,
'title': f'⚠️ Stock Bajo: {issue["name"]}',
'message': f'Stock actual {issue["current_stock"]}kg, mínimo {issue["minimum_stock"]}kg. Considerar pedido pronto.',
'actions': ['Revisar consumo', 'Programar pedido', 'Contactar proveedor'],
'metadata': {
'ingredient_id': str(issue['id']),
'current_stock': float(issue['current_stock']),
'minimum_stock': float(issue['minimum_stock'])
}
}, item_type='alert')
elif issue['status'] == 'overstock':
# Overstock - medium priority alert
severity = self.get_business_hours_severity('medium')
await self.publish_item(tenant_id, {
'type': 'overstock_warning',
'severity': severity,
'title': f'📦 Exceso de Stock: {issue["name"]}',
'message': f'Stock actual {issue["current_stock"]}kg excede máximo {issue["maximum_stock"]}kg. Revisar para evitar caducidad.',
'actions': ['Revisar caducidades', 'Aumentar producción', 'Ofertas especiales', 'Ajustar pedidos'],
'metadata': {
'ingredient_id': str(issue['id']),
'current_stock': float(issue['current_stock']),
'maximum_stock': float(issue['maximum_stock'])
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing stock issue",
ingredient_id=str(issue.get('id')),
error=str(e))
async def check_expiring_products(self):
"""Check for products approaching expiry (alerts)"""
try:
self._checks_performed += 1
query = """
SELECT
i.id, i.name, i.current_stock, i.tenant_id,
b.id as batch_id, b.expiry_date, b.quantity,
EXTRACT(days FROM (b.expiry_date - CURRENT_DATE)) as days_to_expiry
FROM inventory_items i
JOIN inventory_batches b ON b.ingredient_id = i.id
WHERE b.expiry_date <= CURRENT_DATE + INTERVAL '7 days'
AND b.quantity > 0
AND b.status = 'active'
ORDER BY b.expiry_date ASC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
expiring_items = result.fetchall()
# Group by tenant
by_tenant = {}
for item in expiring_items:
tenant_id = item['tenant_id']
if tenant_id not in by_tenant:
by_tenant[tenant_id] = []
by_tenant[tenant_id].append(item)
for tenant_id, items in by_tenant.items():
await self._process_expiring_items(tenant_id, items)
except Exception as e:
logger.error("Expiry check failed", error=str(e))
self._errors_count += 1
async def _process_expiring_items(self, tenant_id: UUID, items: List[Dict[str, Any]]):
"""Process expiring items for a tenant"""
try:
# Group by urgency
expired = [i for i in items if i['days_to_expiry'] <= 0]
urgent = [i for i in items if 0 < i['days_to_expiry'] <= 2]
warning = [i for i in items if 2 < i['days_to_expiry'] <= 7]
# Process expired products (urgent alerts)
if expired:
product_count = len(expired)
product_names = [i['name'] for i in expired[:3]] # First 3 names
if len(expired) > 3:
product_names.append(f"y {len(expired) - 3} más")
template_data = self.format_spanish_message(
'expired_products',
product_count=product_count,
product_names=", ".join(product_names)
)
await self.publish_item(tenant_id, {
'type': 'expired_products',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'expired_items': [
{
'id': str(item['id']),
'name': item['name'],
'batch_id': str(item['batch_id']),
'quantity': float(item['quantity']),
'days_expired': abs(item['days_to_expiry'])
} for item in expired
]
}
}, item_type='alert')
# Process urgent expiry (high alerts)
if urgent:
for item in urgent:
await self.publish_item(tenant_id, {
'type': 'urgent_expiry',
'severity': 'high',
'title': f'⏰ Caducidad Urgente: {item["name"]}',
'message': f'{item["name"]} caduca en {item["days_to_expiry"]} día(s). Usar prioritariamente.',
'actions': ['Usar inmediatamente', 'Promoción especial', 'Revisar recetas', 'Documentar'],
'metadata': {
'ingredient_id': str(item['id']),
'batch_id': str(item['batch_id']),
'days_to_expiry': item['days_to_expiry'],
'quantity': float(item['quantity'])
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing expiring items",
tenant_id=str(tenant_id),
error=str(e))
async def check_temperature_breaches(self):
"""Check for temperature breaches (alerts)"""
try:
self._checks_performed += 1
query = """
SELECT
t.id, t.sensor_id, t.location, t.temperature,
t.max_threshold, t.tenant_id,
EXTRACT(minutes FROM (NOW() - t.first_breach_time)) as breach_duration_minutes
FROM temperature_readings t
WHERE t.temperature > t.max_threshold
AND t.breach_duration_minutes >= 30 -- Only after 30 minutes
AND t.last_alert_sent < NOW() - INTERVAL '15 minutes' -- Avoid spam
ORDER BY t.temperature DESC, t.breach_duration_minutes DESC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
breaches = result.fetchall()
for breach in breaches:
await self._process_temperature_breach(breach)
except Exception as e:
logger.error("Temperature check failed", error=str(e))
self._errors_count += 1
async def _process_temperature_breach(self, breach: Dict[str, Any]):
"""Process temperature breach"""
try:
# Determine severity based on duration and temperature
duration_minutes = breach['breach_duration_minutes']
temp_excess = breach['temperature'] - breach['max_threshold']
if duration_minutes > 120 or temp_excess > 10:
severity = 'urgent'
elif duration_minutes > 60 or temp_excess > 5:
severity = 'high'
else:
severity = 'medium'
template_data = self.format_spanish_message(
'temperature_breach',
location=breach['location'],
temperature=breach['temperature'],
duration=duration_minutes
)
await self.publish_item(breach['tenant_id'], {
'type': 'temperature_breach',
'severity': severity,
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'sensor_id': breach['sensor_id'],
'location': breach['location'],
'temperature': float(breach['temperature']),
'max_threshold': float(breach['max_threshold']),
'duration_minutes': duration_minutes,
'temperature_excess': temp_excess
}
}, item_type='alert')
# Update last alert sent time to avoid spam
await self.db_manager.execute(
"UPDATE temperature_readings SET last_alert_sent = NOW() WHERE id = $1",
breach['id']
)
except Exception as e:
logger.error("Error processing temperature breach",
sensor_id=breach.get('sensor_id'),
error=str(e))
async def generate_inventory_recommendations(self):
"""Generate optimization recommendations based on usage patterns"""
try:
self._checks_performed += 1
# Analyze stock levels vs usage patterns
query = """
WITH usage_analysis AS (
SELECT
i.id, i.name, i.tenant_id, i.minimum_stock, i.maximum_stock,
i.current_stock,
AVG(sm.quantity) FILTER (WHERE sm.movement_type = 'out'
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as avg_daily_usage,
COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out'
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as usage_days,
MAX(sm.created_at) FILTER (WHERE sm.movement_type = 'out') as last_used
FROM inventory_items i
LEFT JOIN stock_movements sm ON sm.ingredient_id = i.id
WHERE i.active = true AND i.tenant_id = $1
GROUP BY i.id
HAVING COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out'
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') >= 5
),
recommendations AS (
SELECT *,
CASE
WHEN avg_daily_usage * 7 > maximum_stock THEN 'increase_max'
WHEN avg_daily_usage * 3 < minimum_stock THEN 'decrease_min'
WHEN current_stock / NULLIF(avg_daily_usage, 0) > 14 THEN 'reduce_stock'
WHEN avg_daily_usage > 0 AND minimum_stock / avg_daily_usage < 3 THEN 'increase_min'
ELSE null
END as recommendation_type
FROM usage_analysis
WHERE avg_daily_usage > 0
)
SELECT * FROM recommendations WHERE recommendation_type IS NOT NULL
ORDER BY avg_daily_usage DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
recommendations = result.fetchall()
for rec in recommendations:
await self._generate_stock_recommendation(tenant_id, rec)
except Exception as e:
logger.error("Error generating recommendations for tenant",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Inventory recommendations failed", error=str(e))
self._errors_count += 1
async def _generate_stock_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
"""Generate specific stock recommendation"""
try:
if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
return
rec_type = rec['recommendation_type']
if rec_type == 'increase_max':
suggested_max = rec['avg_daily_usage'] * 10 # 10 days supply
template_data = self.format_spanish_message(
'inventory_optimization',
ingredient_name=rec['name'],
period=30,
suggested_increase=suggested_max - rec['maximum_stock']
)
await self.publish_item(tenant_id, {
'type': 'inventory_optimization',
'severity': 'medium',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': str(rec['id']),
'current_max': float(rec['maximum_stock']),
'suggested_max': float(suggested_max),
'avg_daily_usage': float(rec['avg_daily_usage']),
'recommendation_type': rec_type
}
}, item_type='recommendation')
elif rec_type == 'decrease_min':
suggested_min = rec['avg_daily_usage'] * 3 # 3 days safety stock
await self.publish_item(tenant_id, {
'type': 'inventory_optimization',
'severity': 'low',
'title': f'📉 Optimización de Stock Mínimo: {rec["name"]}',
'message': f'Uso promedio sugiere reducir stock mínimo de {rec["minimum_stock"]}kg a {suggested_min:.1f}kg.',
'actions': ['Revisar niveles mínimos', 'Analizar tendencias', 'Ajustar configuración'],
'metadata': {
'ingredient_id': str(rec['id']),
'current_min': float(rec['minimum_stock']),
'suggested_min': float(suggested_min),
'avg_daily_usage': float(rec['avg_daily_usage']),
'recommendation_type': rec_type
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error generating stock recommendation",
ingredient_id=str(rec.get('id')),
error=str(e))
async def generate_waste_reduction_recommendations(self):
"""Generate waste reduction recommendations"""
try:
# Analyze waste patterns
query = """
SELECT
i.id, i.name, i.tenant_id,
SUM(w.quantity) as total_waste_30d,
COUNT(w.id) as waste_incidents,
AVG(w.quantity) as avg_waste_per_incident,
w.waste_reason
FROM inventory_items i
JOIN waste_logs w ON w.ingredient_id = i.id
WHERE w.created_at > CURRENT_DATE - INTERVAL '30 days'
AND i.tenant_id = $1
GROUP BY i.id, w.waste_reason
HAVING SUM(w.quantity) > 5 -- More than 5kg wasted
ORDER BY total_waste_30d DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
waste_data = result.fetchall()
for waste in waste_data:
await self._generate_waste_recommendation(tenant_id, waste)
except Exception as e:
logger.error("Error generating waste recommendations",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Waste reduction recommendations failed", error=str(e))
self._errors_count += 1
async def _generate_waste_recommendation(self, tenant_id: UUID, waste: Dict[str, Any]):
"""Generate waste reduction recommendation"""
try:
waste_percentage = (waste['total_waste_30d'] / (waste['total_waste_30d'] + 100)) * 100 # Simplified calculation
template_data = self.format_spanish_message(
'waste_reduction',
product=waste['name'],
waste_reduction_percent=waste_percentage
)
await self.publish_item(tenant_id, {
'type': 'waste_reduction',
'severity': 'low',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': str(waste['id']),
'total_waste_30d': float(waste['total_waste_30d']),
'waste_incidents': waste['waste_incidents'],
'waste_reason': waste['waste_reason'],
'estimated_reduction_percent': waste_percentage
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error generating waste recommendation",
ingredient_id=str(waste.get('id')),
error=str(e))
async def register_db_listeners(self, conn):
"""Register inventory-specific database listeners"""
try:
await conn.add_listener('stock_alerts', self.handle_stock_db_alert)
await conn.add_listener('temperature_alerts', self.handle_temperature_db_alert)
logger.info("Database listeners registered",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to register database listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_stock_db_alert(self, connection, pid, channel, payload):
"""Handle stock alert from database trigger"""
try:
data = json.loads(payload)
tenant_id = UUID(data['tenant_id'])
template_data = self.format_spanish_message(
'critical_stock_shortage',
ingredient_name=data['name'],
current_stock=data['current_stock'],
required_stock=data['minimum_stock']
)
await self.publish_item(tenant_id, {
'type': 'critical_stock_shortage',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': data['ingredient_id'],
'current_stock': data['current_stock'],
'minimum_stock': data['minimum_stock'],
'trigger_source': 'database'
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling stock DB alert", error=str(e))
async def handle_temperature_db_alert(self, connection, pid, channel, payload):
"""Handle temperature alert from database trigger"""
try:
data = json.loads(payload)
tenant_id = UUID(data['tenant_id'])
template_data = self.format_spanish_message(
'temperature_breach',
location=data['location'],
temperature=data['temperature'],
duration=data['duration']
)
await self.publish_item(tenant_id, {
'type': 'temperature_breach',
'severity': 'high',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'sensor_id': data['sensor_id'],
'location': data['location'],
'temperature': data['temperature'],
'duration': data['duration'],
'trigger_source': 'database'
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling temperature DB alert", error=str(e))
async def start_event_listener(self):
"""Listen for inventory-affecting events"""
try:
# Subscribe to order events that might affect inventory
await self.rabbitmq_client.consume_events(
"bakery_events",
f"inventory.orders.{self.config.SERVICE_NAME}",
"orders.placed",
self.handle_order_placed
)
logger.info("Event listeners started",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to start event listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_order_placed(self, message):
"""Check if order critically affects stock"""
try:
order = json.loads(message.body)
tenant_id = UUID(order['tenant_id'])
for item in order.get('items', []):
# Check stock impact
stock_info = await self.get_stock_after_order(item['ingredient_id'], item['quantity'])
if stock_info and stock_info['remaining'] < stock_info['minimum_stock']:
await self.publish_item(tenant_id, {
'type': 'stock_depleted_by_order',
'severity': 'high',
'title': f'⚠️ Pedido Agota Stock: {stock_info["name"]}',
'message': f'Pedido #{order["id"]} dejará stock en {stock_info["remaining"]}kg (mínimo {stock_info["minimum_stock"]}kg)',
'actions': ['Revisar pedido', 'Contactar proveedor', 'Ajustar producción', 'Usar stock reserva'],
'metadata': {
'order_id': order['id'],
'ingredient_id': item['ingredient_id'],
'order_quantity': item['quantity'],
'remaining_stock': stock_info['remaining'],
'minimum_stock': stock_info['minimum_stock']
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling order placed event", error=str(e))
async def get_stock_after_order(self, ingredient_id: str, order_quantity: float) -> Optional[Dict[str, Any]]:
"""Get stock information after hypothetical order"""
try:
query = """
SELECT id, name, current_stock, minimum_stock,
(current_stock - $2) as remaining
FROM inventory_items
WHERE id = $1
"""
result = await self.db_manager.fetchrow(query, ingredient_id, order_quantity)
return dict(result) if result else None
except Exception as e:
logger.error("Error getting stock after order",
ingredient_id=ingredient_id,
error=str(e))
return None

View File

@@ -30,8 +30,12 @@ passlib[bcrypt]==1.7.4
structlog==23.2.0
prometheus-client==0.19.0
# Message queues
# Message queues and Redis
aio-pika==9.3.1
redis>=4.0.0
# Scheduling
APScheduler==3.10.4
# Additional for inventory management
python-barcode==0.15.1

View File

@@ -1,321 +0,0 @@
## 🎯 **Complete Notification Service Implementation**
### **📁 File Structure Created**
```
services/notification/
├── app/
│ ├── main.py ✅ Complete FastAPI application
│ ├── core/
│ │ ├── config.py ✅ Configuration settings
│ │ └── database.py ✅ Database initialization
│ ├── models/
│ │ ├── notifications.py ✅ Core notification models
│ │ └── templates.py ✅ Template-specific models
│ ├── schemas/
│ │ └── notifications.py ✅ Pydantic schemas
│ ├── services/
│ │ ├── notification_service.py ✅ Main business logic
│ │ ├── email_service.py ✅ Email delivery
│ │ ├── whatsapp_service.py ✅ WhatsApp delivery
│ │ └── messaging.py ✅ RabbitMQ integration
│ └── api/
│ └── notifications.py ✅ Complete API routes
├── requirements.txt ✅ Python dependencies
├── Dockerfile ✅ Container configuration
└── .env.example ✅ Environment variables
```
### **🔧 Key Features Implemented**
#### **1. Complete Business Logic**
-**NotificationService**: Core orchestration of all notification operations
-**Multi-channel support**: Email, WhatsApp, Push (extensible)
-**Template processing**: Jinja2-based template rendering
-**Bulk notifications**: Batch processing with rate limiting
-**User preferences**: Granular notification controls
-**Scheduling**: Delayed notification delivery
#### **2. Email Service Integration**
-**SMTP support**: Configurable email providers (Gmail, SendGrid, etc
-**HTML + Text emails**: Rich email templates with fallbacks
-**Bulk email processing**: Rate-limited batch sending
-**Template system**: Pre-built Spanish templates for bakeries
-**Health checks**: SMTP connection monitoring
-**Attachment support**: File attachment capabilities
#### **3. WhatsApp Service Integration**
-**Twilio integration**: WhatsApp Business API support
-**Spanish phone formatting**: Automatic +34 country code handling
-**Template messages**: WhatsApp Business template support
-**Bulk WhatsApp**: Rate-limited batch messaging
-**Delivery status**: Webhook handling for delivery confirmations
#### **4. Database Models & Schemas**
-**Complete data model**: Notifications, templates, preferences, logs
-**Multi-tenant support**: Tenant-scoped notifications
-**Audit trail**: Detailed delivery attempt logging
-**Template management**: System and custom templates
-**User preferences**: Granular notification controls
#### **5. API Integration with Gateway**
-**Gateway authentication**: Uses shared auth decorators
-**Tenant isolation**: Automatic tenant scoping
-**Role-based access**: Admin/manager/user permissions
-**Complete CRUD**: Full notification management API
-**Webhook endpoints**: External delivery status handling
#### **6. RabbitMQ Event Integration**
-**Event consumers**: Listens for user registration, forecasts, training
-**Event publishers**: Publishes notification status events
-**Auto-notifications**: Triggers welcome emails, alerts, reports
-**Error handling**: Robust message processing with retry logic
#### **7. Spanish Bakery Templates**
-**Welcome email**: Professional onboarding email
-**Forecast alerts**: Demand variation notifications
-**Weekly reports**: Performance summary emails
-**Responsive HTML**: Mobile-optimized email designs
-**Spanish localization**: All content in Spanish
### **🚀 Integration with Your Architecture**
#### **Seamless Gateway Integration**
```python
# Gateway already routes to notification service
app.include_router(notification.router, prefix="/api/v1/notifications", tags=["notifications"])
# Authentication handled by gateway middleware
# Tenant isolation automatic
# User context passed via headers
```
#### **Shared Library Usage**
```python
# Uses your existing shared components
from shared.auth.decorators import get_current_user_dep, get_current_tenant_id_dep
from shared.messaging.rabbitmq import RabbitMQClient
from shared.monitoring.metrics import MetricsCollector
from shared.database.base import DatabaseManager
```
#### **Event-Driven Architecture**
```python
# Automatic notifications triggered by:
# - User registration → Welcome email
# - Forecast alerts → Alert emails + WhatsApp
# - Training completion → Status notifications
# - Data imports → Import confirmations
```
### **📊 Production Features**
#### **Health Monitoring**
-**Database health checks**: Connection monitoring
-**SMTP health checks**: Email service validation
-**WhatsApp health checks**: API connectivity tests
-**Prometheus metrics**: Delivery rates, response times
-**Structured logging**: Comprehensive error tracking
#### **Rate Limiting & Scaling**
-**Email rate limits**: 1000/hour configurable
-**WhatsApp rate limits**: 100/hour (Twilio limits)
-**Batch processing**: Configurable batch sizes
-**Retry logic**: Automatic retry with exponential backoff
-**Queue management**: Background task processing
#### **Security & Compliance**
-**User consent**: Preference-based opt-in/out
-**Tenant isolation**: Multi-tenant data separation
-**GDPR compliance**: User data control
-**Rate limiting**: DoS protection
-**Input validation**: Pydantic schema validation
### **🎯 Business-Specific Features**
#### **Bakery Use Cases**
```python
# Forecast alerts when demand varies >20%
# Daily production recommendations
# Weekly performance reports
# Stock shortage notifications
# Weather impact alerts
# Holiday/event notifications
```
#### **Spanish Localization**
-**Spanish templates**: Native Spanish content
-**Madrid timezone**: Europe/Madrid default
-**Spanish phone format**: +34 prefix handling
-**Local business hours**: Quiet hours support
-**Cultural context**: Bakery-specific terminology
### **🔄 How to Deploy**
#### **1. Add to Docker Compose**
```yaml
# Already integrated in your docker-compose.yml
notification-service:
build: ./services/notification
ports:
- "8006:8000"
environment:
- DATABASE_URL=postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db
depends_on:
- notification-db
- redis
- rabbitmq
```
#### **2. Environment Setup**
```bash
# Copy environment template
cp services/notification/.env.example services/notification/.env
# Configure email provider
SMTP_USER=your-email@gmail.com
SMTP_PASSWORD=your-app-password
# Configure WhatsApp (optional)
WHATSAPP_API_KEY=your-twilio-sid:your-twilio-token
```
#### **3. Start Service**
```bash
# Service starts automatically with
docker-compose up -d
# Check health
curl http://localhost:8006/health
# View API docs
open http://localhost:8006/docs
```
### **📈 API Usage Examples**
#### **Send Welcome Email**
```python
POST /api/v1/notifications/send
{
"type": "email",
"recipient_email": "usuario@panaderia.com",
"template_id": "welcome_email",
"template_data": {
"user_name": "Juan Carlos",
"dashboard_url": "https://app.bakeryforecast.es/dashboard"
}
}
```
#### **Send Forecast Alert**
```python
POST /api/v1/notifications/send
{
"type": "email",
"template_id": "forecast_alert_email",
"template_data": {
"bakery_name": "Panadería San Miguel",
"product_name": "Pan integral",
"forecast_date": "2025-01-25",
"predicted_demand": 120,
"variation_percentage": 35,
"alert_message": "Aumento significativo esperado. Se recomienda incrementar producción."
},
"broadcast": true,
"priority": "high"
}
```
#### **Update User Preferences**
```python
PATCH /api/v1/notifications/preferences
{
"email_alerts": true,
"whatsapp_enabled": false,
"quiet_hours_start": "22:00",
"quiet_hours_end": "08:00",
"language": "es"
}
```
### **🎉 Key Benefits**
#### **✅ Production Ready**
- Complete error handling and logging
- Health checks and monitoring
- Rate limiting and security
- Multi-tenant architecture
- Scalable event-driven design
#### **✅ Business Focused**
- Spanish bakery templates
- Madrid timezone/localization
- Forecast-specific notifications
- Professional email designs
- WhatsApp support for urgent alerts
#### **✅ Developer Friendly**
- Comprehensive API documentation
- Type-safe Pydantic schemas
- Async/await throughout
- Structured logging
- Easy testing and debugging
#### **✅ Seamless Integration**
- Uses your shared libraries
- Integrates with gateway auth
- Follows your architectural patterns
- Maintains tenant isolation
- Publishes events to RabbitMQ
### **🚀 Next Steps**
#### **Immediate (Week 2)**
1. **Deploy the service**: Add to your docker-compose and start
2. **Configure SMTP**: Set up email provider credentials
3. **Test integration**: Send test notifications via API
4. **Event integration**: Verify RabbitMQ event handling
#### **Production Optimization**
1. **Email provider**: Consider SendGrid/Mailgun for production
2. **WhatsApp setup**: Configure Twilio Business API
3. **Template customization**: Add tenant-specific templates
4. **Analytics dashboard**: Add notification analytics to frontend
### **💡 Advanced Features Ready for Extension**
-**Push notifications**: Framework ready for mobile push
-**SMS support**: Easy to add SMS providers
-**A/B testing**: Template variant testing
-**Scheduled campaigns**: Marketing email campaigns
-**Analytics integration**: Detailed delivery analytics
**This notification service is now a complete, production-ready microservice that fully integrates with your bakery forecasting platform! It handles all notification needs from welcome emails to urgent forecast alerts, with proper Spanish localization and bakery-specific templates.** 🎯

View File

@@ -0,0 +1,189 @@
# services/notification/app/api/sse_routes.py
"""
SSE routes for real-time alert and recommendation streaming
"""
import asyncio
import json
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Request, Depends, HTTPException, BackgroundTasks
from sse_starlette.sse import EventSourceResponse
import structlog
from shared.auth.decorators import get_current_user
router = APIRouter(prefix="/sse", tags=["sse"])
logger = structlog.get_logger()
@router.get("/alerts/stream/{tenant_id}")
async def stream_alerts(
tenant_id: str,
request: Request,
background_tasks: BackgroundTasks,
current_user = Depends(get_current_user)
):
"""
SSE endpoint for real-time alert and recommendation streaming
Supports both alerts and recommendations through unified stream
"""
# Verify user has access to this tenant
if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
raise HTTPException(403, "Access denied to this tenant")
# Get SSE service from app state
sse_service = getattr(request.app.state, 'sse_service', None)
if not sse_service:
raise HTTPException(500, "SSE service not available")
async def event_generator():
"""Generate SSE events for the client"""
client_queue = asyncio.Queue(maxsize=100) # Limit queue size
try:
# Register client
await sse_service.add_client(tenant_id, client_queue)
logger.info("SSE client connected",
tenant_id=tenant_id,
user_id=getattr(current_user, 'id', 'unknown'))
# Stream events
while True:
# Check if client disconnected
if await request.is_disconnected():
logger.info("SSE client disconnected", tenant_id=tenant_id)
break
try:
# Wait for events with timeout for keepalive
event = await asyncio.wait_for(
client_queue.get(),
timeout=30.0
)
yield event
except asyncio.TimeoutError:
# Send keepalive ping
yield {
"event": "ping",
"data": json.dumps({
"timestamp": datetime.utcnow().isoformat(),
"status": "keepalive"
}),
"id": f"ping_{int(datetime.now().timestamp())}"
}
except Exception as e:
logger.error("Error in SSE event generator",
tenant_id=tenant_id,
error=str(e))
break
except Exception as e:
logger.error("SSE connection error",
tenant_id=tenant_id,
error=str(e))
finally:
# Clean up on disconnect
try:
await sse_service.remove_client(tenant_id, client_queue)
logger.info("SSE client cleanup completed", tenant_id=tenant_id)
except Exception as e:
logger.error("Error cleaning up SSE client",
tenant_id=tenant_id,
error=str(e))
return EventSourceResponse(
event_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering
}
)
@router.post("/items/{item_id}/acknowledge")
async def acknowledge_item(
item_id: str,
current_user = Depends(get_current_user)
):
"""Acknowledge an alert or recommendation"""
try:
# This would update the database
# For now, just return success
logger.info("Item acknowledged",
item_id=item_id,
user_id=getattr(current_user, 'id', 'unknown'))
return {
"status": "success",
"item_id": item_id,
"acknowledged_by": getattr(current_user, 'id', 'unknown'),
"acknowledged_at": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error("Failed to acknowledge item", item_id=item_id, error=str(e))
raise HTTPException(500, "Failed to acknowledge item")
@router.post("/items/{item_id}/resolve")
async def resolve_item(
item_id: str,
current_user = Depends(get_current_user)
):
"""Resolve an alert or recommendation"""
try:
# This would update the database
# For now, just return success
logger.info("Item resolved",
item_id=item_id,
user_id=getattr(current_user, 'id', 'unknown'))
return {
"status": "success",
"item_id": item_id,
"resolved_by": getattr(current_user, 'id', 'unknown'),
"resolved_at": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error("Failed to resolve item", item_id=item_id, error=str(e))
raise HTTPException(500, "Failed to resolve item")
@router.get("/status/{tenant_id}")
async def get_sse_status(
tenant_id: str,
current_user = Depends(get_current_user)
):
"""Get SSE connection status for a tenant"""
# Verify user has access to this tenant
if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
raise HTTPException(403, "Access denied to this tenant")
try:
# Get SSE service from app state
sse_service = getattr(request.app.state, 'sse_service', None)
if not sse_service:
return {"status": "unavailable", "message": "SSE service not initialized"}
metrics = sse_service.get_metrics()
tenant_connections = len(sse_service.active_connections.get(tenant_id, set()))
return {
"status": "available",
"tenant_id": tenant_id,
"connections": tenant_connections,
"total_connections": metrics["total_connections"],
"active_tenants": metrics["active_tenants"]
}
except Exception as e:
logger.error("Failed to get SSE status", tenant_id=tenant_id, error=str(e))
raise HTTPException(500, "Failed to get SSE status")

View File

@@ -1,9 +1,9 @@
# ================================================================
# services/notification/app/main.py - COMPLETE IMPLEMENTATION
# services/notification/app/main.py - ENHANCED WITH SSE SUPPORT
# ================================================================
"""
Notification Service Main Application
Handles email and WhatsApp notifications with full integration
Handles email, WhatsApp notifications and SSE for real-time alerts/recommendations
"""
import structlog
@@ -15,7 +15,12 @@ from fastapi.responses import JSONResponse
from app.core.config import settings
from app.core.database import init_db
from app.api.notifications import router as notification_router
from app.api.sse_routes import router as sse_router
from app.services.messaging import setup_messaging, cleanup_messaging
from app.services.sse_service import SSEService
from app.services.notification_orchestrator import NotificationOrchestrator
from app.services.email_service import EmailService
from app.services.whatsapp_service import WhatsAppService
from shared.monitoring import setup_logging, HealthChecker
from shared.monitoring.metrics import setup_metrics_early
@@ -30,8 +35,8 @@ health_checker = None
# Create FastAPI app FIRST
app = FastAPI(
title="Bakery Notification Service",
description="Email and WhatsApp notification service for bakery forecasting platform",
version="1.0.0",
description="Email, WhatsApp and SSE notification service for bakery alerts and recommendations",
version="2.0.0",
docs_url="/docs",
redoc_url="/redoc"
)
@@ -56,12 +61,36 @@ async def lifespan(app: FastAPI):
await setup_messaging()
logger.info("Messaging initialized")
# Initialize services
email_service = EmailService()
whatsapp_service = WhatsAppService()
# Initialize SSE service
sse_service = SSEService(settings.REDIS_URL)
await sse_service.initialize()
logger.info("SSE service initialized")
# Create orchestrator
orchestrator = NotificationOrchestrator(
email_service=email_service,
whatsapp_service=whatsapp_service,
sse_service=sse_service
)
# Store services in app state
app.state.orchestrator = orchestrator
app.state.sse_service = sse_service
app.state.email_service = email_service
app.state.whatsapp_service = whatsapp_service
# Register custom metrics (metrics_collector already exists)
metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status"])
metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status", "channel"])
metrics_collector.register_counter("emails_sent_total", "Total emails sent", labels=["status"])
metrics_collector.register_counter("whatsapp_sent_total", "Total WhatsApp messages sent", labels=["status"])
metrics_collector.register_counter("sse_events_sent_total", "Total SSE events sent", labels=["tenant", "event_type"])
metrics_collector.register_histogram("notification_processing_duration_seconds", "Time spent processing notifications")
metrics_collector.register_gauge("notification_queue_size", "Current notification queue size")
metrics_collector.register_gauge("sse_active_connections", "Number of active SSE connections")
# Setup health checker
health_checker = HealthChecker("notification-service")
@@ -93,14 +122,22 @@ async def lifespan(app: FastAPI):
# Add WhatsApp service health check
async def check_whatsapp_service():
try:
from app.services.whatsapp_service import WhatsAppService
whatsapp_service = WhatsAppService()
return await whatsapp_service.health_check()
except Exception as e:
return f"WhatsApp service error: {e}"
health_checker.add_check("whatsapp_service", check_whatsapp_service, timeout=10.0, critical=False)
# Add SSE service health check
async def check_sse_service():
try:
metrics = sse_service.get_metrics()
return "healthy" if metrics["redis_connected"] else "Redis connection failed"
except Exception as e:
return f"SSE service error: {e}"
health_checker.add_check("sse_service", check_sse_service, timeout=5.0, critical=True)
# Add messaging health check
def check_messaging():
try:
@@ -115,7 +152,7 @@ async def lifespan(app: FastAPI):
# Store health checker in app state
app.state.health_checker = health_checker
logger.info("Notification Service started successfully")
logger.info("Notification Service with SSE support started successfully")
except Exception as e:
logger.error(f"Failed to start Notification Service: {e}")
@@ -126,10 +163,15 @@ async def lifespan(app: FastAPI):
# Shutdown
logger.info("Shutting down Notification Service...")
try:
# Shutdown SSE service
if hasattr(app.state, 'sse_service'):
await app.state.sse_service.shutdown()
logger.info("SSE service shutdown completed")
await cleanup_messaging()
logger.info("Messaging cleanup completed")
except Exception as e:
logger.error(f"Error during messaging cleanup: {e}")
logger.error(f"Error during shutdown: {e}")
# Set lifespan AFTER metrics setup
app.router.lifespan_context = lifespan
@@ -145,18 +187,30 @@ app.add_middleware(
# Include routers
app.include_router(notification_router, prefix="/api/v1", tags=["notifications"])
app.include_router(sse_router, prefix="/api/v1", tags=["sse"])
# Health check endpoint
@app.get("/health")
async def health_check():
"""Comprehensive health check endpoint"""
"""Comprehensive health check endpoint including SSE"""
if health_checker:
return await health_checker.check_health()
health_result = await health_checker.check_health()
# Add SSE metrics to health check
if hasattr(app.state, 'sse_service'):
try:
sse_metrics = app.state.sse_service.get_metrics()
health_result['sse_metrics'] = sse_metrics
except Exception as e:
health_result['sse_error'] = str(e)
return health_result
else:
return {
"service": "notification-service",
"status": "healthy",
"version": "1.0.0"
"version": "2.0.0",
"features": ["email", "whatsapp", "sse", "alerts", "recommendations"]
}
# Metrics endpoint

View File

@@ -276,14 +276,26 @@ class EmailService:
# Test SMTP connection
if self.smtp_ssl:
# Use implicit TLS/SSL connection (port 465 typically)
server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port, use_tls=True)
await server.connect()
# No need for starttls() when using implicit TLS
else:
# Use plain connection, optionally upgrade with STARTTLS
server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port)
await server.connect()
if self.smtp_tls:
await server.starttls()
await server.connect()
if self.smtp_tls:
# Try STARTTLS, but handle case where connection is already secure
try:
await server.starttls()
except Exception as starttls_error:
# If STARTTLS fails because connection is already using TLS, that's okay
if "already using TLS" in str(starttls_error) or "already secure" in str(starttls_error):
logger.debug("SMTP connection already secure, skipping STARTTLS")
else:
# Re-raise other STARTTLS errors
raise starttls_error
await server.login(self.smtp_user, self.smtp_password)
await server.quit()

View File

@@ -0,0 +1,279 @@
# services/notification/app/services/notification_orchestrator.py
"""
Notification orchestrator for managing delivery across all channels
Includes SSE integration for real-time dashboard updates
"""
from typing import List, Dict, Any
from datetime import datetime
import structlog
from .email_service import EmailService
from .whatsapp_service import WhatsAppService
from .sse_service import SSEService
logger = structlog.get_logger()
class NotificationOrchestrator:
"""
Orchestrates delivery across all notification channels
Now includes SSE for real-time dashboard updates, with support for recommendations
"""
def __init__(
self,
email_service: EmailService,
whatsapp_service: WhatsAppService,
sse_service: SSEService,
push_service=None # Optional push service
):
self.email_service = email_service
self.whatsapp_service = whatsapp_service
self.sse_service = sse_service
self.push_service = push_service
async def send_notification(
self,
tenant_id: str,
notification: Dict[str, Any],
channels: List[str]
) -> Dict[str, Any]:
"""
Send notification through specified channels
Channels can include: email, whatsapp, push, dashboard (SSE)
"""
results = {}
# Always send to dashboard for visibility (SSE)
if 'dashboard' in channels or notification.get('type') in ['alert', 'recommendation']:
try:
await self.sse_service.send_item_notification(
tenant_id,
notification
)
results['dashboard'] = {'status': 'sent', 'timestamp': datetime.utcnow().isoformat()}
logger.info("Item sent to dashboard via SSE",
tenant_id=tenant_id,
item_type=notification.get('type'),
item_id=notification.get('id'))
except Exception as e:
logger.error("Failed to send to dashboard",
tenant_id=tenant_id,
error=str(e))
results['dashboard'] = {'status': 'failed', 'error': str(e)}
# Send to email channel
if 'email' in channels:
try:
email_result = await self.email_service.send_notification_email(
to_email=notification.get('email'),
subject=notification.get('title'),
template_data={
'title': notification.get('title'),
'message': notification.get('message'),
'severity': notification.get('severity'),
'item_type': notification.get('type'),
'actions': notification.get('actions', []),
'metadata': notification.get('metadata', {}),
'timestamp': datetime.utcnow().isoformat()
},
notification_type=notification.get('type', 'alert')
)
results['email'] = email_result
except Exception as e:
logger.error("Failed to send email",
tenant_id=tenant_id,
error=str(e))
results['email'] = {'status': 'failed', 'error': str(e)}
# Send to WhatsApp channel
if 'whatsapp' in channels:
try:
whatsapp_result = await self.whatsapp_service.send_notification_message(
to_phone=notification.get('phone'),
message=self._format_whatsapp_message(notification),
notification_type=notification.get('type', 'alert')
)
results['whatsapp'] = whatsapp_result
except Exception as e:
logger.error("Failed to send WhatsApp",
tenant_id=tenant_id,
error=str(e))
results['whatsapp'] = {'status': 'failed', 'error': str(e)}
# Send to push notification channel
if 'push' in channels and self.push_service:
try:
push_result = await self.push_service.send_notification(
user_id=notification.get('user_id'),
title=notification.get('title'),
body=notification.get('message'),
data={
'item_type': notification.get('type'),
'severity': notification.get('severity'),
'item_id': notification.get('id'),
'metadata': notification.get('metadata', {})
}
)
results['push'] = push_result
except Exception as e:
logger.error("Failed to send push notification",
tenant_id=tenant_id,
error=str(e))
results['push'] = {'status': 'failed', 'error': str(e)}
# Log summary
successful_channels = [ch for ch, result in results.items() if result.get('status') == 'sent']
failed_channels = [ch for ch, result in results.items() if result.get('status') == 'failed']
logger.info("Notification delivery completed",
tenant_id=tenant_id,
item_type=notification.get('type'),
item_id=notification.get('id'),
successful_channels=successful_channels,
failed_channels=failed_channels,
total_channels=len(channels))
return {
'status': 'completed',
'successful_channels': successful_channels,
'failed_channels': failed_channels,
'results': results,
'timestamp': datetime.utcnow().isoformat()
}
def _format_whatsapp_message(self, notification: Dict[str, Any]) -> str:
"""Format message for WhatsApp with emojis and structure"""
item_type = notification.get('type', 'alert')
severity = notification.get('severity', 'medium')
# Get appropriate emoji
type_emoji = '🚨' if item_type == 'alert' else '💡'
severity_emoji = {
'urgent': '🔴',
'high': '🟡',
'medium': '🔵',
'low': '🟢'
}.get(severity, '🔵')
message = f"{type_emoji} {severity_emoji} *{notification.get('title', 'Notificación')}*\n\n"
message += f"{notification.get('message', '')}\n"
# Add actions if available
actions = notification.get('actions', [])
if actions and len(actions) > 0:
message += "\n*Acciones sugeridas:*\n"
for i, action in enumerate(actions[:3], 1): # Limit to 3 actions for WhatsApp
message += f"{i}. {action}\n"
# Add timestamp
message += f"\n_Enviado: {datetime.now().strftime('%H:%M, %d/%m/%Y')}_"
return message
def get_channels_by_severity(self, severity: str, item_type: str, hour: int = None) -> List[str]:
"""
Determine notification channels based on severity and item_type
Now includes 'dashboard' as a channel
"""
if hour is None:
hour = datetime.now().hour
# Dashboard always gets all items
channels = ['dashboard']
if item_type == 'alert':
if severity == 'urgent':
# Urgent alerts: All channels immediately
channels.extend(['email', 'whatsapp', 'push'])
elif severity == 'high':
# High alerts: Email and WhatsApp during extended hours
if 6 <= hour <= 22:
channels.extend(['email', 'whatsapp'])
else:
channels.append('email') # Email only during night
elif severity == 'medium':
# Medium alerts: Email during business hours
if 7 <= hour <= 20:
channels.append('email')
elif item_type == 'recommendation':
# Recommendations: Generally less urgent, respect business hours
if severity in ['medium', 'high']:
if 8 <= hour <= 19: # Stricter business hours for recommendations
channels.append('email')
# Low/urgent: Dashboard only (urgent rare for recommendations)
return channels
async def health_check(self) -> Dict[str, Any]:
"""Check health of all notification channels"""
health_status = {
'status': 'healthy',
'channels': {},
'timestamp': datetime.utcnow().isoformat()
}
# Check email service
try:
email_health = await self.email_service.health_check()
health_status['channels']['email'] = email_health
except Exception as e:
health_status['channels']['email'] = {'status': 'unhealthy', 'error': str(e)}
# Check WhatsApp service
try:
whatsapp_health = await self.whatsapp_service.health_check()
health_status['channels']['whatsapp'] = whatsapp_health
except Exception as e:
health_status['channels']['whatsapp'] = {'status': 'unhealthy', 'error': str(e)}
# Check SSE service
try:
sse_metrics = self.sse_service.get_metrics()
sse_status = 'healthy' if sse_metrics['redis_connected'] else 'unhealthy'
health_status['channels']['sse'] = {
'status': sse_status,
'metrics': sse_metrics
}
except Exception as e:
health_status['channels']['sse'] = {'status': 'unhealthy', 'error': str(e)}
# Check push service if available
if self.push_service:
try:
push_health = await self.push_service.health_check()
health_status['channels']['push'] = push_health
except Exception as e:
health_status['channels']['push'] = {'status': 'unhealthy', 'error': str(e)}
# Determine overall status
unhealthy_channels = [
ch for ch, status in health_status['channels'].items()
if status.get('status') != 'healthy'
]
if unhealthy_channels:
health_status['status'] = 'degraded' if len(unhealthy_channels) < len(health_status['channels']) else 'unhealthy'
health_status['unhealthy_channels'] = unhealthy_channels
return health_status
def get_metrics(self) -> Dict[str, Any]:
"""Get aggregated metrics from all services"""
metrics = {
'timestamp': datetime.utcnow().isoformat(),
'channels': {}
}
# Get SSE metrics
try:
metrics['channels']['sse'] = self.sse_service.get_metrics()
except Exception as e:
logger.error("Failed to get SSE metrics", error=str(e))
# Additional metrics could be added here for other services
return metrics

View File

@@ -0,0 +1,256 @@
# services/notification/app/services/sse_service.py
"""
Server-Sent Events service for real-time notifications
Integrated within the notification service for alerts and recommendations
"""
import asyncio
from redis.asyncio import Redis
import json
from typing import Dict, Set, Any
from datetime import datetime
import structlog
logger = structlog.get_logger()
class SSEService:
"""
Server-Sent Events service for real-time notifications
Handles both alerts and recommendations through unified SSE streams
"""
def __init__(self, redis_url: str):
self.redis_url = redis_url
self.redis = None
self.active_connections: Dict[str, Set[asyncio.Queue]] = {}
self.pubsub_tasks: Dict[str, asyncio.Task] = {}
async def initialize(self):
"""Initialize Redis connection"""
try:
self.redis = Redis.from_url(self.redis_url)
logger.info("SSE Service initialized with Redis connection")
except Exception as e:
logger.error("Failed to initialize SSE service", error=str(e))
raise
async def shutdown(self):
"""Clean shutdown"""
try:
# Cancel all pubsub tasks
for task in self.pubsub_tasks.values():
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
# Close all client connections
for tenant_id, connections in self.active_connections.items():
for queue in connections.copy():
try:
await queue.put({"event": "shutdown", "data": json.dumps({"status": "server_shutdown"})})
except:
pass
# Close Redis connection
if self.redis:
await self.redis.close()
logger.info("SSE Service shutdown completed")
except Exception as e:
logger.error("Error during SSE shutdown", error=str(e))
async def add_client(self, tenant_id: str, client_queue: asyncio.Queue):
"""Add a new SSE client connection"""
try:
if tenant_id not in self.active_connections:
self.active_connections[tenant_id] = set()
# Start pubsub listener for this tenant if not exists
if tenant_id not in self.pubsub_tasks:
task = asyncio.create_task(self._listen_to_tenant_channel(tenant_id))
self.pubsub_tasks[tenant_id] = task
self.active_connections[tenant_id].add(client_queue)
client_count = len(self.active_connections[tenant_id])
logger.info("SSE client added",
tenant_id=tenant_id,
total_clients=client_count)
# Send connection confirmation
await client_queue.put({
"event": "connected",
"data": json.dumps({
"status": "connected",
"tenant_id": tenant_id,
"timestamp": datetime.utcnow().isoformat(),
"client_count": client_count
})
})
# Send any active items (alerts and recommendations)
active_items = await self.get_active_items(tenant_id)
if active_items:
await client_queue.put({
"event": "initial_items",
"data": json.dumps(active_items)
})
except Exception as e:
logger.error("Error adding SSE client", tenant_id=tenant_id, error=str(e))
async def remove_client(self, tenant_id: str, client_queue: asyncio.Queue):
"""Remove SSE client connection"""
try:
if tenant_id in self.active_connections:
self.active_connections[tenant_id].discard(client_queue)
# If no more clients for this tenant, stop the pubsub listener
if not self.active_connections[tenant_id]:
del self.active_connections[tenant_id]
if tenant_id in self.pubsub_tasks:
task = self.pubsub_tasks[tenant_id]
if not task.done():
task.cancel()
del self.pubsub_tasks[tenant_id]
logger.info("SSE client removed", tenant_id=tenant_id)
except Exception as e:
logger.error("Error removing SSE client", tenant_id=tenant_id, error=str(e))
async def _listen_to_tenant_channel(self, tenant_id: str):
"""Listen to Redis channel for tenant-specific items"""
try:
# Create a separate Redis connection for pubsub
pubsub_redis = Redis.from_url(self.redis_url)
pubsub = pubsub_redis.pubsub()
channel = f"alerts:{tenant_id}"
await pubsub.subscribe(channel)
logger.info("Started listening to tenant channel",
tenant_id=tenant_id,
channel=channel)
async for message in pubsub.listen():
if message["type"] == "message":
# Broadcast to all connected clients for this tenant
await self.broadcast_to_tenant(tenant_id, message["data"])
except asyncio.CancelledError:
logger.info("Stopped listening to tenant channel", tenant_id=tenant_id)
except Exception as e:
logger.error("Error in pubsub listener", tenant_id=tenant_id, error=str(e))
finally:
try:
await pubsub.unsubscribe(channel)
await pubsub_redis.close()
except:
pass
async def broadcast_to_tenant(self, tenant_id: str, message: str):
"""Broadcast message to all connected clients of a tenant"""
if tenant_id not in self.active_connections:
return
try:
item_data = json.loads(message)
event = {
"event": item_data.get('item_type', 'item'), # 'alert' or 'recommendation'
"data": json.dumps(item_data),
"id": item_data.get("id")
}
# Send to all connected clients
disconnected = []
for client_queue in self.active_connections[tenant_id]:
try:
# Use put_nowait to avoid blocking
client_queue.put_nowait(event)
except asyncio.QueueFull:
logger.warning("Client queue full, dropping message", tenant_id=tenant_id)
disconnected.append(client_queue)
except Exception as e:
logger.warning("Failed to send to client", tenant_id=tenant_id, error=str(e))
disconnected.append(client_queue)
# Clean up disconnected clients
for queue in disconnected:
await self.remove_client(tenant_id, queue)
if disconnected:
logger.info("Cleaned up disconnected clients",
tenant_id=tenant_id,
count=len(disconnected))
except Exception as e:
logger.error("Error broadcasting to tenant", tenant_id=tenant_id, error=str(e))
async def send_item_notification(self, tenant_id: str, item: Dict[str, Any]):
"""
Send alert or recommendation via SSE (called by notification orchestrator)
"""
try:
# Publish to Redis for SSE streaming
channel = f"alerts:{tenant_id}"
item_message = {
'id': item.get('id'),
'item_type': item.get('type'), # 'alert' or 'recommendation'
'type': item.get('alert_type', item.get('type')),
'severity': item.get('severity'),
'title': item.get('title'),
'message': item.get('message'),
'actions': item.get('actions', []),
'metadata': item.get('metadata', {}),
'timestamp': item.get('timestamp', datetime.utcnow().isoformat()),
'status': 'active'
}
await self.redis.publish(channel, json.dumps(item_message))
logger.info("Item published to SSE",
tenant_id=tenant_id,
item_type=item.get('type'),
item_id=item.get('id'))
except Exception as e:
logger.error("Error sending item notification via SSE",
tenant_id=tenant_id,
error=str(e))
async def get_active_items(self, tenant_id: str) -> list:
"""Fetch active alerts and recommendations from database"""
try:
# This would integrate with the actual database
# For now, return empty list as placeholder
# In real implementation, this would query the alerts table
# Example query:
# query = """
# SELECT id, item_type, alert_type, severity, title, message,
# actions, metadata, created_at, status
# FROM alerts
# WHERE tenant_id = $1
# AND status = 'active'
# ORDER BY severity_weight DESC, created_at DESC
# LIMIT 50
# """
return [] # Placeholder
except Exception as e:
logger.error("Error fetching active items", tenant_id=tenant_id, error=str(e))
return []
def get_metrics(self) -> Dict[str, Any]:
"""Get SSE service metrics"""
return {
"active_tenants": len(self.active_connections),
"total_connections": sum(len(connections) for connections in self.active_connections.values()),
"active_listeners": len(self.pubsub_tasks),
"redis_connected": self.redis and not self.redis.closed
}

View File

@@ -30,6 +30,17 @@ class WhatsAppService:
self.from_number = settings.WHATSAPP_FROM_NUMBER
self.enabled = settings.ENABLE_WHATSAPP_NOTIFICATIONS
def _parse_api_credentials(self):
"""Parse API key into username and password for Twilio basic auth"""
if not self.api_key or ":" not in self.api_key:
raise ValueError("WhatsApp API key must be in format 'username:password'")
api_parts = self.api_key.split(":", 1)
if len(api_parts) != 2:
raise ValueError("Invalid WhatsApp API key format")
return api_parts[0], api_parts[1]
async def send_message(
self,
to_phone: str,
@@ -181,10 +192,22 @@ class WhatsAppService:
return False
# Test API connectivity with a simple request
# Parse API key (expected format: username:password for Twilio basic auth)
if ":" not in self.api_key:
logger.error("WhatsApp API key must be in format 'username:password'")
return False
api_parts = self.api_key.split(":", 1) # Split on first : only
if len(api_parts) != 2:
logger.error("Invalid WhatsApp API key format")
return False
username, password = api_parts
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.base_url}/v1/Account", # Twilio account info endpoint
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
auth=(username, password)
)
if response.status_code == 200:
@@ -206,6 +229,13 @@ class WhatsAppService:
async def _send_text_message(self, to_phone: str, message: str) -> bool:
"""Send regular text message via Twilio"""
try:
# Parse API credentials
try:
username, password = self._parse_api_credentials()
except ValueError as e:
logger.error(f"WhatsApp API key configuration error: {e}")
return False
# Prepare request data
data = {
"From": f"whatsapp:{self.from_number}",
@@ -216,9 +246,9 @@ class WhatsAppService:
# Send via Twilio API
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
data=data,
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
auth=(username, password)
)
if response.status_code == 201:
@@ -245,6 +275,13 @@ class WhatsAppService:
) -> bool:
"""Send WhatsApp template message via Twilio"""
try:
# Parse API credentials
try:
username, password = self._parse_api_credentials()
except ValueError as e:
logger.error(f"WhatsApp API key configuration error: {e}")
return False
# Prepare template data
content_variables = {str(i+1): param for i, param in enumerate(parameters)}
@@ -258,9 +295,9 @@ class WhatsAppService:
# Send via Twilio API
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
data=data,
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
auth=(username, password)
)
if response.status_code == 201:
@@ -315,10 +352,17 @@ class WhatsAppService:
async def _get_message_status(self, message_sid: str) -> Optional[str]:
"""Get message delivery status from Twilio"""
try:
# Parse API credentials
try:
username, password = self._parse_api_credentials()
except ValueError as e:
logger.error(f"WhatsApp API key configuration error: {e}")
return None
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages/{message_sid}.json",
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages/{message_sid}.json",
auth=(username, password)
)
if response.status_code == 200:

View File

@@ -3,6 +3,7 @@ fastapi==0.104.1
uvicorn[standard]==0.24.0
pydantic==2.5.0
pydantic-settings==2.1.0
sse-starlette==1.6.5
# Database
sqlalchemy==2.0.23
@@ -22,8 +23,9 @@ aiofiles==23.2.1
aiosmtplib==3.0.1
email-validator==2.1.0
# Messaging
# Messaging & Redis
aio-pika==9.3.1
redis==5.0.1
# Template Engine
jinja2==3.1.2

View File

@@ -1,248 +0,0 @@
# Orders Service
Customer orders and procurement planning service for the bakery management system.
## Overview
The Orders Service handles all order-related operations including:
- **Customer Management**: Complete customer lifecycle and relationship management
- **Order Processing**: End-to-end order management from creation to fulfillment
- **Procurement Planning**: Automated procurement requirement calculation and planning
- **Business Intelligence**: Order pattern analysis and business model detection
- **Dashboard Analytics**: Comprehensive reporting and metrics for order operations
## Features
### Core Capabilities
- Customer registration and management with detailed profiles
- Order creation, tracking, and status management
- Automated demand requirements calculation for production planning
- Procurement planning with supplier coordination
- Business model detection (individual bakery vs central bakery)
- Comprehensive dashboard with real-time metrics
- Integration with production, inventory, suppliers, and sales services
### API Endpoints
#### Dashboard & Analytics
- `GET /api/v1/tenants/{tenant_id}/orders/dashboard-summary` - Comprehensive dashboard data
- `GET /api/v1/tenants/{tenant_id}/orders/demand-requirements` - Demand analysis for production
- `GET /api/v1/tenants/{tenant_id}/orders/business-model` - Business model detection
#### Order Management
- `POST /api/v1/tenants/{tenant_id}/orders` - Create new customer order
- `GET /api/v1/tenants/{tenant_id}/orders` - List orders with filtering and pagination
- `GET /api/v1/tenants/{tenant_id}/orders/{order_id}` - Get order details with items
- `PUT /api/v1/tenants/{tenant_id}/orders/{order_id}/status` - Update order status
#### Customer Management
- `POST /api/v1/tenants/{tenant_id}/customers` - Create new customer
- `GET /api/v1/tenants/{tenant_id}/customers` - List customers with filtering
- `GET /api/v1/tenants/{tenant_id}/customers/{customer_id}` - Get customer details
#### Health & Status
- `GET /api/v1/tenants/{tenant_id}/orders/status` - Service status information
## Service Integration
### Shared Clients Used
- **InventoryServiceClient**: Stock levels, product availability validation
- **ProductionServiceClient**: Production notifications, capacity planning
- **SalesServiceClient**: Historical sales data for demand forecasting
- **NotificationServiceClient**: Customer notifications and alerts
### Authentication
Uses shared authentication patterns with tenant isolation:
- JWT token validation
- Tenant access verification
- User permission checks
## Configuration
Key configuration options in `app/core/config.py`:
### Order Processing
- `ORDER_PROCESSING_ENABLED`: Enable automatic order processing (default: true)
- `AUTO_APPROVE_ORDERS`: Automatically approve orders (default: false)
- `MAX_ORDER_ITEMS`: Maximum items per order (default: 50)
### Procurement Planning
- `PROCUREMENT_PLANNING_ENABLED`: Enable procurement planning (default: true)
- `PROCUREMENT_LEAD_TIME_DAYS`: Standard procurement lead time (default: 3)
- `DEMAND_FORECAST_DAYS`: Days for demand forecasting (default: 14)
- `SAFETY_STOCK_PERCENTAGE`: Safety stock buffer (default: 20%)
### Business Model Detection
- `ENABLE_BUSINESS_MODEL_DETECTION`: Enable automatic detection (default: true)
- `CENTRAL_BAKERY_ORDER_THRESHOLD`: Order threshold for central bakery (default: 20)
- `INDIVIDUAL_BAKERY_ORDER_THRESHOLD`: Order threshold for individual bakery (default: 5)
### Customer Management
- `CUSTOMER_VALIDATION_ENABLED`: Enable customer validation (default: true)
- `MAX_CUSTOMERS_PER_TENANT`: Maximum customers per tenant (default: 10000)
- `CUSTOMER_CREDIT_CHECK_ENABLED`: Enable credit checking (default: false)
### Order Validation
- `MIN_ORDER_VALUE`: Minimum order value (default: 0.0)
- `MAX_ORDER_VALUE`: Maximum order value (default: 100000.0)
- `VALIDATE_PRODUCT_AVAILABILITY`: Check product availability (default: true)
### Alert Thresholds
- `HIGH_VALUE_ORDER_THRESHOLD`: High-value order alert (default: 5000.0)
- `LARGE_QUANTITY_ORDER_THRESHOLD`: Large quantity alert (default: 100)
- `RUSH_ORDER_HOURS_THRESHOLD`: Rush order time threshold (default: 24)
- `PROCUREMENT_SHORTAGE_THRESHOLD`: Procurement shortage alert (default: 90%)
### Payment and Pricing
- `PAYMENT_VALIDATION_ENABLED`: Enable payment validation (default: true)
- `DYNAMIC_PRICING_ENABLED`: Enable dynamic pricing (default: false)
- `DISCOUNT_ENABLED`: Enable discounts (default: true)
- `MAX_DISCOUNT_PERCENTAGE`: Maximum discount allowed (default: 50%)
### Delivery and Fulfillment
- `DELIVERY_TRACKING_ENABLED`: Enable delivery tracking (default: true)
- `DEFAULT_DELIVERY_WINDOW_HOURS`: Default delivery window (default: 48)
- `PICKUP_ENABLED`: Enable pickup orders (default: true)
- `DELIVERY_ENABLED`: Enable delivery orders (default: true)
## Database Models
### Customer
- Complete customer profile with contact information
- Business type classification (individual, business, central_bakery)
- Payment terms and credit management
- Order history and metrics tracking
- Delivery preferences and special requirements
### CustomerOrder
- Comprehensive order tracking from creation to delivery
- Status management with full audit trail
- Financial calculations including discounts and taxes
- Delivery scheduling and fulfillment tracking
- Business model detection and categorization
- Customer communication preferences
### OrderItem
- Detailed line item tracking with product specifications
- Customization and special instruction support
- Production requirement integration
- Cost tracking and margin analysis
- Quality control integration
### OrderStatusHistory
- Complete audit trail of order status changes
- Event tracking with detailed context
- User attribution and change reasons
- Customer notification tracking
### ProcurementPlan
- Master procurement planning with business model context
- Supplier diversification and risk assessment
- Performance tracking and cost analysis
- Integration with demand forecasting
### ProcurementRequirement
- Detailed procurement requirements per product/ingredient
- Current inventory level integration
- Supplier preference and lead time management
- Quality specifications and special requirements
### OrderAlert
- Comprehensive alert system for order issues
- Multiple severity levels with appropriate routing
- Business impact assessment
- Resolution tracking and performance metrics
## Business Logic
### Order Processing Flow
1. **Order Creation**: Validate customer, calculate totals, create order record
2. **Item Processing**: Create order items with specifications and requirements
3. **Status Tracking**: Maintain complete audit trail of status changes
4. **Customer Metrics**: Update customer statistics and relationship data
5. **Business Model Detection**: Analyze patterns to determine bakery type
6. **Alert Generation**: Check for high-value, rush, or large orders
7. **Service Integration**: Notify production and inventory services
### Procurement Planning
1. **Demand Analysis**: Aggregate orders by delivery date and products
2. **Inventory Integration**: Check current stock levels and reservations
3. **Requirement Calculation**: Calculate net procurement needs with safety buffer
4. **Supplier Coordination**: Match requirements with preferred suppliers
5. **Lead Time Planning**: Account for supplier lead times and delivery windows
6. **Risk Assessment**: Evaluate supply risks and backup options
### Business Model Detection
- **Individual Bakery**: Low order volume, direct customer sales, standard products
- **Central Bakery**: High volume, wholesale operations, bulk orders
- **Detection Factors**: Order frequency, quantity, customer types, sales channels
## Alert System
### Alert Types
- **High Value Orders**: Orders exceeding configured thresholds
- **Rush Orders**: Orders with tight delivery requirements
- **Large Quantity Orders**: Orders with unusually high item counts
- **Payment Issues**: Payment validation failures or credit problems
- **Procurement Shortages**: Insufficient inventory for order fulfillment
- **Customer Issues**: New customers, credit limit exceedances, special requirements
### Severity Levels
- **Critical**: WhatsApp + Email + Dashboard + SMS
- **High**: WhatsApp + Email + Dashboard
- **Medium**: Email + Dashboard
- **Low**: Dashboard only
## Development
### Setup
```bash
# Install dependencies
pip install -r requirements.txt
# Set up database
# Configure ORDERS_DATABASE_URL environment variable
# Run migrations
alembic upgrade head
# Start service
uvicorn app.main:app --reload
```
### Testing
```bash
# Run tests
pytest
# Run with coverage
pytest --cov=app
```
### Docker
```bash
# Build image
docker build -t orders-service .
# Run container
docker run -p 8000:8000 orders-service
```
## Deployment
The service is designed for containerized deployment with:
- Health checks at `/health`
- Structured logging
- Metrics collection
- Database migrations
- Service discovery integration
## Architecture
Follows Domain-Driven Microservices Architecture:
- Clean separation of concerns
- Repository pattern for data access
- Service layer for business logic
- API layer for external interface
- Shared infrastructure for cross-cutting concerns

View File

@@ -5,7 +5,7 @@
Orders Service Database Configuration
"""
from sqlalchemy import create_engine
from sqlalchemy import create_engine, text
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
from sqlalchemy.orm import sessionmaker, DeclarativeBase
import structlog
@@ -72,7 +72,7 @@ async def get_db_health() -> bool:
"""Check database health"""
try:
async with async_engine.begin() as conn:
await conn.execute("SELECT 1")
await conn.execute(text("SELECT 1"))
return True
except Exception as e:
logger.error("Database health check failed", error=str(e))

View File

@@ -1,138 +0,0 @@
# POS Integration Service
This service handles integration with external Point of Sale (POS) systems for the Bakery IA platform.
## Supported POS Systems
- **Square POS** - Popular payment and POS solution with strong API support
- **Toast POS** - Restaurant-focused POS system with comprehensive features
- **Lightspeed Restaurant** - Full-featured restaurant management system
## Features
- **Real-time webhook handling** from POS systems
- **Bidirectional data synchronization** with sales service
- **Secure credential management** with encryption
- **Multi-tenant support** with tenant-specific configurations
- **Comprehensive transaction logging** and audit trails
- **Automatic duplicate detection** and handling
- **Rate limiting and retry mechanisms** for reliability
## Architecture
The POS service follows the established microservices architecture:
```
POS Service
├── API Layer (FastAPI)
├── Business Logic (Services)
├── Data Access (Repositories)
├── External Integrations (POS Providers)
├── Webhook Handlers
└── Background Sync Jobs
```
## API Endpoints
### Configuration Management
- `GET /api/v1/tenants/{tenant_id}/pos/configurations` - List POS configurations
- `POST /api/v1/tenants/{tenant_id}/pos/configurations` - Create new configuration
- `PUT /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Update configuration
- `DELETE /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Delete configuration
### Webhook Handling
- `POST /api/v1/webhooks/{pos_system}` - Receive webhooks from POS systems
- `GET /api/v1/webhooks/{pos_system}/status` - Get webhook status
### Data Synchronization
- `POST /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync` - Trigger sync
- `GET /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync/status` - Get sync status
- `GET /api/v1/tenants/{tenant_id}/pos/transactions` - Get POS transactions
## Database Schema
### Core Tables
- `pos_configurations` - POS system configurations per tenant
- `pos_transactions` - Transaction data from POS systems
- `pos_transaction_items` - Individual items within transactions
- `pos_webhook_logs` - Webhook event logs
- `pos_sync_logs` - Synchronization operation logs
## Environment Variables
See `app/core/config.py` for all configuration options. Key variables include:
```bash
# Database
POS_DATABASE_URL=postgresql+asyncpg://pos_user:pos_pass123@pos-db:5432/pos_db
# POS Provider Credentials
SQUARE_APPLICATION_ID=your_square_app_id
SQUARE_ACCESS_TOKEN=your_square_token
TOAST_CLIENT_ID=your_toast_client_id
LIGHTSPEED_CLIENT_ID=your_lightspeed_client_id
# Webhook Configuration
WEBHOOK_BASE_URL=https://your-domain.com
WEBHOOK_SECRET=your_webhook_secret
```
## Development
### Running the Service
```bash
# Using Docker Compose (recommended)
docker-compose up pos-service
# Local development
cd services/pos
pip install -r requirements.txt
uvicorn app.main:app --reload --port 8000
```
### Database Migrations
```bash
# Create migration
alembic revision --autogenerate -m "Description"
# Apply migrations
alembic upgrade head
```
### Testing
```bash
# Run tests
pytest tests/
# Run with coverage
pytest --cov=app tests/
```
## Security Considerations
- POS credentials are encrypted before storage
- Webhook signatures are verified for authenticity
- All API endpoints require tenant-based authentication
- Rate limiting prevents abuse
- Sensitive data is logged with appropriate redaction
## Monitoring
The service includes comprehensive monitoring:
- Health check endpoints
- Prometheus metrics
- Structured logging
- Performance tracking
- Error rate monitoring
## Integration Flow
1. **Configuration**: Set up POS system credentials via API
2. **Webhook Registration**: Register webhook URLs with POS providers
3. **Real-time Events**: Receive and process webhook events
4. **Data Sync**: Periodic synchronization of transaction data
5. **Sales Integration**: Forward processed data to sales service

View File

@@ -1,187 +0,0 @@
# Production Service
Production planning and batch management service for the bakery management system.
## Overview
The Production Service handles all production-related operations including:
- **Production Planning**: Calculate daily requirements using demand forecasts and inventory levels
- **Batch Management**: Track production batches from start to finish
- **Capacity Management**: Equipment, staff, and time scheduling
- **Quality Control**: Yield tracking, waste management, efficiency metrics
- **Alert System**: Comprehensive monitoring and notifications
## Features
### Core Capabilities
- Daily production requirements calculation
- Production batch lifecycle management
- Real-time capacity planning and utilization
- Quality control tracking and metrics
- Comprehensive alert system with multiple severity levels
- Integration with inventory, orders, recipes, and sales services
### API Endpoints
#### Dashboard & Planning
- `GET /api/v1/tenants/{tenant_id}/production/dashboard-summary` - Production dashboard data
- `GET /api/v1/tenants/{tenant_id}/production/daily-requirements` - Daily production planning
- `GET /api/v1/tenants/{tenant_id}/production/requirements` - Requirements for procurement
#### Batch Management
- `POST /api/v1/tenants/{tenant_id}/production/batches` - Create production batch
- `GET /api/v1/tenants/{tenant_id}/production/batches/active` - Get active batches
- `GET /api/v1/tenants/{tenant_id}/production/batches/{batch_id}` - Get batch details
- `PUT /api/v1/tenants/{tenant_id}/production/batches/{batch_id}/status` - Update batch status
#### Scheduling & Capacity
- `GET /api/v1/tenants/{tenant_id}/production/schedule` - Production schedule
- `GET /api/v1/tenants/{tenant_id}/production/capacity/status` - Capacity status
#### Alerts & Monitoring
- `GET /api/v1/tenants/{tenant_id}/production/alerts` - Production alerts
- `POST /api/v1/tenants/{tenant_id}/production/alerts/{alert_id}/acknowledge` - Acknowledge alerts
#### Analytics
- `GET /api/v1/tenants/{tenant_id}/production/metrics/yield` - Yield metrics
## Service Integration
### Shared Clients Used
- **InventoryServiceClient**: Stock levels, ingredient availability
- **OrdersServiceClient**: Demand requirements, customer orders
- **RecipesServiceClient**: Recipe requirements, ingredient calculations
- **SalesServiceClient**: Historical sales data
- **NotificationServiceClient**: Alert notifications
### Authentication
Uses shared authentication patterns with tenant isolation:
- JWT token validation
- Tenant access verification
- User permission checks
## Configuration
Key configuration options in `app/core/config.py`:
### Production Planning
- `PLANNING_HORIZON_DAYS`: Days ahead for planning (default: 7)
- `PRODUCTION_BUFFER_PERCENTAGE`: Safety buffer for production (default: 10%)
- `MINIMUM_BATCH_SIZE`: Minimum batch size (default: 1.0)
- `MAXIMUM_BATCH_SIZE`: Maximum batch size (default: 100.0)
### Capacity Management
- `DEFAULT_WORKING_HOURS_PER_DAY`: Standard working hours (default: 12)
- `MAX_OVERTIME_HOURS`: Maximum overtime allowed (default: 4)
- `CAPACITY_UTILIZATION_TARGET`: Target utilization (default: 85%)
### Quality Control
- `MINIMUM_YIELD_PERCENTAGE`: Minimum acceptable yield (default: 85%)
- `QUALITY_SCORE_THRESHOLD`: Minimum quality score (default: 8.0)
### Alert Thresholds
- `CAPACITY_EXCEEDED_THRESHOLD`: Capacity alert threshold (default: 100%)
- `PRODUCTION_DELAY_THRESHOLD_MINUTES`: Delay alert threshold (default: 60)
- `LOW_YIELD_ALERT_THRESHOLD`: Low yield alert (default: 80%)
## Database Models
### ProductionBatch
- Complete batch tracking from planning to completion
- Status management (pending, in_progress, completed, etc.)
- Cost tracking and yield calculations
- Quality metrics integration
### ProductionSchedule
- Daily production scheduling
- Capacity planning and tracking
- Staff and equipment assignments
- Performance metrics
### ProductionCapacity
- Resource availability tracking
- Equipment and staff capacity
- Maintenance scheduling
- Utilization monitoring
### QualityCheck
- Quality control measurements
- Pass/fail tracking
- Defect recording
- Corrective action management
### ProductionAlert
- Comprehensive alert system
- Multiple severity levels
- Action recommendations
- Resolution tracking
## Alert System
### Alert Types
- **Capacity Exceeded**: When production requirements exceed available capacity
- **Production Delay**: When batches are delayed beyond thresholds
- **Cost Spike**: When production costs exceed normal ranges
- **Low Yield**: When yield percentages fall below targets
- **Quality Issues**: When quality scores consistently decline
- **Equipment Maintenance**: When equipment needs maintenance
### Severity Levels
- **Critical**: WhatsApp + Email + Dashboard + SMS
- **High**: WhatsApp + Email + Dashboard
- **Medium**: Email + Dashboard
- **Low**: Dashboard only
## Development
### Setup
```bash
# Install dependencies
pip install -r requirements.txt
# Set up database
# Configure DATABASE_URL environment variable
# Run migrations
alembic upgrade head
# Start service
uvicorn app.main:app --reload
```
### Testing
```bash
# Run tests
pytest
# Run with coverage
pytest --cov=app
```
### Docker
```bash
# Build image
docker build -t production-service .
# Run container
docker run -p 8000:8000 production-service
```
## Deployment
The service is designed for containerized deployment with:
- Health checks at `/health`
- Structured logging
- Metrics collection
- Database migrations
- Service discovery integration
## Architecture
Follows Domain-Driven Microservices Architecture:
- Clean separation of concerns
- Repository pattern for data access
- Service layer for business logic
- API layer for external interface
- Shared infrastructure for cross-cutting concerns

View File

@@ -14,6 +14,7 @@ import structlog
from app.core.config import settings
from app.core.database import init_database, get_db_health
from app.api.production import router as production_router
from app.services.production_alert_service import ProductionAlertService
# Configure logging
logger = structlog.get_logger()
@@ -25,6 +26,16 @@ async def lifespan(app: FastAPI):
# Startup
try:
await init_database()
logger.info("Database initialized")
# Initialize alert service
alert_service = ProductionAlertService(settings)
await alert_service.start()
logger.info("Production alert service started")
# Store alert service in app state
app.state.alert_service = alert_service
logger.info("Production service started successfully")
except Exception as e:
logger.error("Failed to initialize production service", error=str(e))
@@ -34,6 +45,13 @@ async def lifespan(app: FastAPI):
# Shutdown
logger.info("Production service shutting down")
try:
# Stop alert service
if hasattr(app.state, 'alert_service'):
await app.state.alert_service.stop()
logger.info("Alert service stopped")
except Exception as e:
logger.error("Error during shutdown", error=str(e))
# Create FastAPI application

View File

@@ -0,0 +1,795 @@
# services/production/app/services/production_alert_service.py
"""
Production-specific alert and recommendation detection service
Monitors production capacity, delays, quality issues, and optimization opportunities
"""
import json
from typing import List, Dict, Any, Optional
from uuid import UUID
from datetime import datetime, timedelta
import structlog
from apscheduler.triggers.cron import CronTrigger
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
from shared.alerts.templates import format_item_message
logger = structlog.get_logger()
class ProductionAlertService(BaseAlertService, AlertServiceMixin):
"""Production service alert and recommendation detection"""
def setup_scheduled_checks(self):
"""Production-specific scheduled checks for alerts and recommendations"""
# Production capacity checks - every 10 minutes during business hours (alerts)
self.scheduler.add_job(
self.check_production_capacity,
CronTrigger(minute='*/10', hour='6-20'),
id='capacity_check',
misfire_grace_time=60,
max_instances=1
)
# Production delays - every 5 minutes during production hours (alerts)
self.scheduler.add_job(
self.check_production_delays,
CronTrigger(minute='*/5', hour='4-22'),
id='delay_check',
misfire_grace_time=30,
max_instances=1
)
# Quality issues check - every 15 minutes (alerts)
self.scheduler.add_job(
self.check_quality_issues,
CronTrigger(minute='*/15'),
id='quality_check',
misfire_grace_time=60,
max_instances=1
)
# Equipment monitoring - every 3 minutes (alerts)
self.scheduler.add_job(
self.check_equipment_status,
CronTrigger(minute='*/3'),
id='equipment_check',
misfire_grace_time=30,
max_instances=1
)
# Efficiency recommendations - every 30 minutes (recommendations)
self.scheduler.add_job(
self.generate_efficiency_recommendations,
CronTrigger(minute='*/30'),
id='efficiency_recs',
misfire_grace_time=120,
max_instances=1
)
# Energy optimization - every hour (recommendations)
self.scheduler.add_job(
self.generate_energy_recommendations,
CronTrigger(minute='0'),
id='energy_recs',
misfire_grace_time=300,
max_instances=1
)
logger.info("Production alert schedules configured",
service=self.config.SERVICE_NAME)
async def check_production_capacity(self):
"""Check if production plan exceeds capacity (alerts)"""
try:
self._checks_performed += 1
query = """
WITH capacity_analysis AS (
SELECT
p.tenant_id,
p.planned_date,
SUM(p.planned_quantity) as total_planned,
MAX(pc.daily_capacity) as max_daily_capacity,
COUNT(DISTINCT p.equipment_id) as equipment_count,
AVG(pc.efficiency_percent) as avg_efficiency,
CASE
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 1.2 THEN 'severe_overload'
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) THEN 'overload'
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 0.9 THEN 'near_capacity'
ELSE 'normal'
END as capacity_status,
(SUM(p.planned_quantity) / MAX(pc.daily_capacity)) * 100 as capacity_percentage
FROM production_schedule p
JOIN production_capacity pc ON pc.equipment_id = p.equipment_id
WHERE p.planned_date >= CURRENT_DATE
AND p.planned_date <= CURRENT_DATE + INTERVAL '3 days'
AND p.status IN ('planned', 'in_progress')
AND p.tenant_id = $1
GROUP BY p.tenant_id, p.planned_date
)
SELECT * FROM capacity_analysis
WHERE capacity_status != 'normal'
ORDER BY capacity_percentage DESC
"""
# Check production capacity without tenant dependencies
try:
from sqlalchemy import text
# Simplified query using only existing production tables
simplified_query = text("""
SELECT
pb.tenant_id,
DATE(pb.planned_start_time) as planned_date,
COUNT(*) as batch_count,
SUM(pb.planned_quantity) as total_planned,
'capacity_check' as capacity_status
FROM production_batches pb
WHERE pb.planned_start_time >= CURRENT_DATE
AND pb.planned_start_time <= CURRENT_DATE + INTERVAL '3 days'
AND pb.status IN ('planned', 'pending', 'in_progress')
GROUP BY pb.tenant_id, DATE(pb.planned_start_time)
HAVING COUNT(*) > 10 -- Alert if more than 10 batches per day
ORDER BY total_planned DESC
""")
async with self.db_manager.get_session() as session:
result = await session.execute(simplified_query)
capacity_issues = result.fetchall()
for issue in capacity_issues:
await self._process_capacity_issue(issue.tenant_id, issue)
except Exception as e:
logger.debug("Simplified capacity check failed", error=str(e))
except Exception as e:
# Skip capacity checks if tables don't exist (graceful degradation)
if "does not exist" in str(e):
logger.debug("Capacity check skipped - missing tables", error=str(e))
else:
logger.error("Capacity check failed", error=str(e))
self._errors_count += 1
async def _process_capacity_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
"""Process capacity overload issue"""
try:
status = issue['capacity_status']
percentage = issue['capacity_percentage']
if status == 'severe_overload':
template_data = self.format_spanish_message(
'order_overload',
percentage=int(percentage - 100)
)
await self.publish_item(tenant_id, {
'type': 'severe_capacity_overload',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'planned_date': issue['planned_date'].isoformat(),
'capacity_percentage': float(percentage),
'overload_percentage': float(percentage - 100),
'equipment_count': issue['equipment_count']
}
}, item_type='alert')
elif status == 'overload':
severity = self.get_business_hours_severity('high')
await self.publish_item(tenant_id, {
'type': 'capacity_overload',
'severity': severity,
'title': f'⚠️ Capacidad Excedida: {percentage:.0f}%',
'message': f'Producción planificada para {issue["planned_date"]} excede capacidad en {percentage-100:.0f}%.',
'actions': ['Redistribuir cargas', 'Ampliar turnos', 'Subcontratar', 'Posponer pedidos'],
'metadata': {
'planned_date': issue['planned_date'].isoformat(),
'capacity_percentage': float(percentage),
'equipment_count': issue['equipment_count']
}
}, item_type='alert')
elif status == 'near_capacity':
severity = self.get_business_hours_severity('medium')
await self.publish_item(tenant_id, {
'type': 'near_capacity',
'severity': severity,
'title': f'📊 Cerca de Capacidad Máxima: {percentage:.0f}%',
'message': f'Producción del {issue["planned_date"]} está al {percentage:.0f}% de capacidad. Monitorear de cerca.',
'actions': ['Revisar planificación', 'Preparar contingencias', 'Optimizar eficiencia'],
'metadata': {
'planned_date': issue['planned_date'].isoformat(),
'capacity_percentage': float(percentage)
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing capacity issue", error=str(e))
async def check_production_delays(self):
"""Check for production delays (alerts)"""
try:
self._checks_performed += 1
# Simplified query without customer_orders dependency
query = """
SELECT
pb.id, pb.tenant_id, pb.product_name, pb.batch_number,
pb.planned_end_time as planned_completion_time, pb.actual_start_time,
pb.actual_end_time as estimated_completion_time, pb.status,
EXTRACT(minutes FROM (NOW() - pb.planned_end_time)) as delay_minutes,
COALESCE(pb.priority::text, 'medium') as priority_level,
1 as affected_orders -- Default to 1 since we can't count orders
FROM production_batches pb
WHERE pb.status IN ('in_progress', 'delayed')
AND (
(pb.planned_end_time < NOW() AND pb.status = 'in_progress')
OR pb.status = 'delayed'
)
AND pb.planned_end_time > NOW() - INTERVAL '24 hours'
ORDER BY
CASE COALESCE(pb.priority::text, 'medium')
WHEN 'urgent' THEN 1 WHEN 'high' THEN 2 ELSE 3
END,
delay_minutes DESC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
delays = result.fetchall()
for delay in delays:
await self._process_production_delay(delay)
except Exception as e:
# Skip delay checks if tables don't exist (graceful degradation)
if "does not exist" in str(e):
logger.debug("Production delay check skipped - missing tables", error=str(e))
else:
logger.error("Production delay check failed", error=str(e))
self._errors_count += 1
async def _process_production_delay(self, delay: Dict[str, Any]):
"""Process production delay"""
try:
delay_minutes = delay['delay_minutes']
priority = delay['priority_level']
affected_orders = delay['affected_orders']
# Determine severity based on delay time and priority
if delay_minutes > 120 or priority == 'urgent':
severity = 'urgent'
elif delay_minutes > 60 or priority == 'high':
severity = 'high'
elif delay_minutes > 30:
severity = 'medium'
else:
severity = 'low'
template_data = self.format_spanish_message(
'production_delay',
batch_name=f"{delay['product_name']} #{delay['batch_number']}",
delay_minutes=int(delay_minutes)
)
await self.publish_item(delay['tenant_id'], {
'type': 'production_delay',
'severity': severity,
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'batch_id': str(delay['id']),
'product_name': delay['product_name'],
'batch_number': delay['batch_number'],
'delay_minutes': delay_minutes,
'priority_level': priority,
'affected_orders': affected_orders,
'planned_completion': delay['planned_completion_time'].isoformat()
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing production delay",
batch_id=str(delay.get('id')),
error=str(e))
async def check_quality_issues(self):
"""Check for quality control issues (alerts)"""
try:
self._checks_performed += 1
# Fixed query using actual quality_checks table structure
query = """
SELECT
qc.id, qc.tenant_id, qc.batch_id, qc.check_type as test_type,
qc.quality_score as result_value,
qc.target_weight as min_acceptable,
(qc.target_weight * (1 + qc.tolerance_percentage/100)) as max_acceptable,
CASE
WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 'critical'
WHEN qc.pass_fail = false THEN 'major'
ELSE 'minor'
END as qc_severity,
qc.created_at,
pb.product_name, pb.batch_number,
COUNT(*) OVER (PARTITION BY qc.batch_id) as total_failures
FROM quality_checks qc
JOIN production_batches pb ON pb.id = qc.batch_id
WHERE qc.pass_fail = false -- Use pass_fail instead of status
AND qc.created_at > NOW() - INTERVAL '4 hours'
AND qc.corrective_action_needed = true -- Use this instead of acknowledged
ORDER BY
CASE
WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 1
WHEN qc.pass_fail = false THEN 2
ELSE 3
END,
qc.created_at DESC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
quality_issues = result.fetchall()
for issue in quality_issues:
await self._process_quality_issue(issue)
except Exception as e:
# Skip quality checks if tables don't exist (graceful degradation)
if "does not exist" in str(e) or "column" in str(e).lower() and "does not exist" in str(e).lower():
logger.debug("Quality check skipped - missing tables or columns", error=str(e))
else:
logger.error("Quality check failed", error=str(e))
self._errors_count += 1
async def _process_quality_issue(self, issue: Dict[str, Any]):
"""Process quality control failure"""
try:
qc_severity = issue['qc_severity']
total_failures = issue['total_failures']
# Map QC severity to alert severity
if qc_severity == 'critical' or total_failures > 2:
severity = 'urgent'
elif qc_severity == 'major':
severity = 'high'
else:
severity = 'medium'
await self.publish_item(issue['tenant_id'], {
'type': 'quality_control_failure',
'severity': severity,
'title': f'❌ Fallo Control Calidad: {issue["product_name"]}',
'message': f'Lote {issue["batch_number"]} falló en {issue["test_type"]}. Valor: {issue["result_value"]} (rango: {issue["min_acceptable"]}-{issue["max_acceptable"]})',
'actions': ['Revisar lote', 'Repetir prueba', 'Ajustar proceso', 'Documentar causa'],
'metadata': {
'quality_check_id': str(issue['id']),
'batch_id': str(issue['batch_id']),
'test_type': issue['test_type'],
'result_value': float(issue['result_value']),
'min_acceptable': float(issue['min_acceptable']),
'max_acceptable': float(issue['max_acceptable']),
'qc_severity': qc_severity,
'total_failures': total_failures
}
}, item_type='alert')
# Mark as acknowledged to avoid duplicates
await self.db_manager.execute(
"UPDATE quality_checks SET acknowledged = true WHERE id = $1",
issue['id']
)
except Exception as e:
logger.error("Error processing quality issue",
quality_check_id=str(issue.get('id')),
error=str(e))
async def check_equipment_status(self):
"""Check equipment status and failures (alerts)"""
# Equipment tables don't exist in production database - skip this check
logger.debug("Equipment check skipped - equipment tables not available in production database")
return
async def _process_equipment_issue(self, equipment: Dict[str, Any]):
"""Process equipment issue"""
try:
status = equipment['status']
efficiency = equipment.get('efficiency_percent', 100)
days_to_maintenance = equipment.get('days_to_maintenance', 30)
if status == 'error':
template_data = self.format_spanish_message(
'equipment_failure',
equipment_name=equipment['name']
)
await self.publish_item(equipment['tenant_id'], {
'type': 'equipment_failure',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'equipment_id': str(equipment['id']),
'equipment_name': equipment['name'],
'equipment_type': equipment['type'],
'error_count': equipment.get('error_count', 0),
'last_reading': equipment.get('last_reading').isoformat() if equipment.get('last_reading') else None
}
}, item_type='alert')
elif status == 'maintenance_required' or days_to_maintenance <= 1:
severity = 'high' if days_to_maintenance <= 1 else 'medium'
await self.publish_item(equipment['tenant_id'], {
'type': 'maintenance_required',
'severity': severity,
'title': f'🔧 Mantenimiento Requerido: {equipment["name"]}',
'message': f'Equipo {equipment["name"]} requiere mantenimiento en {days_to_maintenance} días.',
'actions': ['Programar mantenimiento', 'Revisar historial', 'Preparar repuestos', 'Planificar parada'],
'metadata': {
'equipment_id': str(equipment['id']),
'days_to_maintenance': days_to_maintenance,
'last_maintenance': equipment.get('last_maintenance').isoformat() if equipment.get('last_maintenance') else None
}
}, item_type='alert')
elif efficiency < 80:
severity = 'medium' if efficiency < 70 else 'low'
await self.publish_item(equipment['tenant_id'], {
'type': 'low_equipment_efficiency',
'severity': severity,
'title': f'📉 Baja Eficiencia: {equipment["name"]}',
'message': f'Eficiencia del {equipment["name"]} bajó a {efficiency:.1f}%. Revisar funcionamiento.',
'actions': ['Revisar configuración', 'Limpiar equipo', 'Calibrar sensores', 'Revisar mantenimiento'],
'metadata': {
'equipment_id': str(equipment['id']),
'efficiency_percent': float(efficiency),
'temperature': equipment.get('temperature'),
'vibration_level': equipment.get('vibration_level')
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing equipment issue",
equipment_id=str(equipment.get('id')),
error=str(e))
async def generate_efficiency_recommendations(self):
"""Generate production efficiency recommendations"""
try:
self._checks_performed += 1
# Analyze production patterns for efficiency opportunities
query = """
WITH efficiency_analysis AS (
SELECT
pb.tenant_id, pb.product_name,
AVG(EXTRACT(minutes FROM (pb.actual_completion_time - pb.actual_start_time))) as avg_production_time,
AVG(pb.planned_duration_minutes) as avg_planned_duration,
COUNT(*) as batch_count,
AVG(pb.yield_percentage) as avg_yield,
EXTRACT(hour FROM pb.actual_start_time) as start_hour
FROM production_batches pb
WHERE pb.status = 'completed'
AND pb.actual_completion_time > CURRENT_DATE - INTERVAL '30 days'
AND pb.tenant_id = $1
GROUP BY pb.tenant_id, pb.product_name, EXTRACT(hour FROM pb.actual_start_time)
HAVING COUNT(*) >= 3
),
recommendations AS (
SELECT *,
CASE
WHEN avg_production_time > avg_planned_duration * 1.2 THEN 'reduce_production_time'
WHEN avg_yield < 85 THEN 'improve_yield'
WHEN start_hour BETWEEN 14 AND 16 AND avg_production_time > avg_planned_duration * 1.1 THEN 'avoid_afternoon_production'
ELSE null
END as recommendation_type,
(avg_production_time - avg_planned_duration) / avg_planned_duration * 100 as efficiency_loss_percent
FROM efficiency_analysis
)
SELECT * FROM recommendations
WHERE recommendation_type IS NOT NULL
AND efficiency_loss_percent > 10
ORDER BY efficiency_loss_percent DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
recommendations = result.fetchall()
for rec in recommendations:
await self._generate_efficiency_recommendation(tenant_id, rec)
except Exception as e:
logger.error("Error generating efficiency recommendations",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Efficiency recommendations failed", error=str(e))
self._errors_count += 1
async def _generate_efficiency_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
"""Generate specific efficiency recommendation"""
try:
if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
return
rec_type = rec['recommendation_type']
efficiency_loss = rec['efficiency_loss_percent']
if rec_type == 'reduce_production_time':
template_data = self.format_spanish_message(
'production_efficiency',
suggested_time=f"{rec['start_hour']:02d}:00",
savings_percent=efficiency_loss
)
await self.publish_item(tenant_id, {
'type': 'production_efficiency',
'severity': 'medium',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'product_name': rec['product_name'],
'avg_production_time': float(rec['avg_production_time']),
'avg_planned_duration': float(rec['avg_planned_duration']),
'efficiency_loss_percent': float(efficiency_loss),
'batch_count': rec['batch_count'],
'recommendation_type': rec_type
}
}, item_type='recommendation')
elif rec_type == 'improve_yield':
await self.publish_item(tenant_id, {
'type': 'yield_improvement',
'severity': 'medium',
'title': f'📈 Mejorar Rendimiento: {rec["product_name"]}',
'message': f'Rendimiento promedio del {rec["product_name"]} es {rec["avg_yield"]:.1f}%. Oportunidad de mejora.',
'actions': ['Revisar receta', 'Optimizar proceso', 'Entrenar personal', 'Verificar ingredientes'],
'metadata': {
'product_name': rec['product_name'],
'avg_yield': float(rec['avg_yield']),
'batch_count': rec['batch_count'],
'recommendation_type': rec_type
}
}, item_type='recommendation')
elif rec_type == 'avoid_afternoon_production':
await self.publish_item(tenant_id, {
'type': 'schedule_optimization',
'severity': 'low',
'title': f'⏰ Optimizar Horario: {rec["product_name"]}',
'message': f'Producción de {rec["product_name"]} en horario {rec["start_hour"]}:00 muestra menor eficiencia.',
'actions': ['Cambiar horario', 'Analizar causas', 'Revisar personal', 'Optimizar ambiente'],
'metadata': {
'product_name': rec['product_name'],
'start_hour': rec['start_hour'],
'efficiency_loss_percent': float(efficiency_loss),
'recommendation_type': rec_type
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error generating efficiency recommendation",
product_name=rec.get('product_name'),
error=str(e))
async def generate_energy_recommendations(self):
"""Generate energy optimization recommendations"""
try:
# Analyze energy consumption patterns
query = """
SELECT
e.tenant_id, e.name as equipment_name, e.type,
AVG(ec.energy_consumption_kwh) as avg_energy,
EXTRACT(hour FROM ec.recorded_at) as hour_of_day,
COUNT(*) as readings_count
FROM equipment e
JOIN energy_consumption ec ON ec.equipment_id = e.id
WHERE ec.recorded_at > CURRENT_DATE - INTERVAL '30 days'
AND e.tenant_id = $1
GROUP BY e.tenant_id, e.id, EXTRACT(hour FROM ec.recorded_at)
HAVING COUNT(*) >= 10
ORDER BY avg_energy DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
energy_data = result.fetchall()
# Analyze for peak hours and optimization opportunities
await self._analyze_energy_patterns(tenant_id, energy_data)
except Exception as e:
logger.error("Error generating energy recommendations",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Energy recommendations failed", error=str(e))
self._errors_count += 1
async def _analyze_energy_patterns(self, tenant_id: UUID, energy_data: List[Dict[str, Any]]):
"""Analyze energy consumption patterns for optimization"""
try:
if not energy_data:
return
# Group by equipment and find peak hours
equipment_data = {}
for record in energy_data:
equipment = record['equipment_name']
if equipment not in equipment_data:
equipment_data[equipment] = []
equipment_data[equipment].append(record)
for equipment, records in equipment_data.items():
# Find peak consumption hours
peak_hour_record = max(records, key=lambda x: x['avg_energy'])
off_peak_records = [r for r in records if r['hour_of_day'] < 7 or r['hour_of_day'] > 22]
if off_peak_records and peak_hour_record['avg_energy'] > 0:
min_off_peak = min(off_peak_records, key=lambda x: x['avg_energy'])
potential_savings = ((peak_hour_record['avg_energy'] - min_off_peak['avg_energy']) /
peak_hour_record['avg_energy']) * 100
if potential_savings > 15: # More than 15% potential savings
template_data = self.format_spanish_message(
'energy_optimization',
start_time=f"{min_off_peak['hour_of_day']:02d}:00",
end_time=f"{min_off_peak['hour_of_day']+2:02d}:00",
savings_euros=potential_savings * 0.15 # Rough estimate
)
await self.publish_item(tenant_id, {
'type': 'energy_optimization',
'severity': 'low',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'equipment_name': equipment,
'peak_hour': peak_hour_record['hour_of_day'],
'optimal_hour': min_off_peak['hour_of_day'],
'potential_savings_percent': float(potential_savings),
'peak_consumption': float(peak_hour_record['avg_energy']),
'optimal_consumption': float(min_off_peak['avg_energy'])
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error analyzing energy patterns", error=str(e))
async def register_db_listeners(self, conn):
"""Register production-specific database listeners"""
try:
await conn.add_listener('production_alerts', self.handle_production_db_alert)
logger.info("Database listeners registered",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to register database listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_production_db_alert(self, connection, pid, channel, payload):
"""Handle production alert from database trigger"""
try:
data = json.loads(payload)
tenant_id = UUID(data['tenant_id'])
template_data = self.format_spanish_message(
'production_delay',
batch_name=f"{data['product_name']} #{data.get('batch_number', 'N/A')}",
delay_minutes=data['delay_minutes']
)
await self.publish_item(tenant_id, {
'type': 'production_delay',
'severity': 'high',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'batch_id': data['batch_id'],
'delay_minutes': data['delay_minutes'],
'trigger_source': 'database'
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling production DB alert", error=str(e))
async def start_event_listener(self):
"""Listen for production-affecting events"""
try:
# Subscribe to inventory events that might affect production
await self.rabbitmq_client.consume_events(
"bakery_events",
f"production.inventory.{self.config.SERVICE_NAME}",
"inventory.critical_shortage",
self.handle_inventory_shortage
)
logger.info("Event listeners started",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to start event listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_inventory_shortage(self, message):
"""Handle critical inventory shortage affecting production"""
try:
shortage = json.loads(message.body)
tenant_id = UUID(shortage['tenant_id'])
# Check if this ingredient affects any current production
affected_batches = await self.get_affected_production_batches(
shortage['ingredient_id']
)
if affected_batches:
await self.publish_item(tenant_id, {
'type': 'production_ingredient_shortage',
'severity': 'high',
'title': f'🚨 Falta Ingrediente para Producción',
'message': f'Escasez de {shortage["ingredient_name"]} afecta {len(affected_batches)} lotes en producción.',
'actions': ['Buscar ingrediente alternativo', 'Pausar producción', 'Contactar proveedor urgente', 'Reorganizar plan'],
'metadata': {
'ingredient_id': shortage['ingredient_id'],
'ingredient_name': shortage['ingredient_name'],
'affected_batches': [str(b) for b in affected_batches],
'shortage_amount': shortage.get('shortage_amount', 0)
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling inventory shortage event", error=str(e))
async def get_affected_production_batches(self, ingredient_id: str) -> List[str]:
"""Get production batches affected by ingredient shortage"""
try:
query = """
SELECT DISTINCT pb.id
FROM production_batches pb
JOIN recipe_ingredients ri ON ri.recipe_id = pb.recipe_id
WHERE ri.ingredient_id = $1
AND pb.status IN ('planned', 'in_progress')
AND pb.planned_completion_time > NOW()
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result_rows = await session.execute(text(query), {"ingredient_id": ingredient_id})
result = result_rows.fetchall()
return [str(row['id']) for row in result]
except Exception as e:
logger.error("Error getting affected production batches",
ingredient_id=ingredient_id,
error=str(e))
return []

View File

@@ -15,6 +15,14 @@ httpx==0.25.2
# Logging and monitoring
structlog==23.2.0
prometheus-client==0.19.0
# Message queues and Redis
aio-pika==9.3.1
redis>=4.0.0
# Scheduling
APScheduler==3.10.4
# Date and time utilities
python-dateutil==2.8.2

View File

@@ -1,220 +0,0 @@
## 🎯 **Migration Summary: Prophet Models to Training Service** (Continued)
### **✅ What Was Migrated**
Perfect! I've created a complete migration of the Prophet ML training functionality from the monolithic backend to a dedicated training microservice. Here's a comprehensive summary of what has been implemented:
## 🎯 **Migration Summary: Prophet Models to Training Service**
### **✅ What Was Migrated**
1. **Prophet Manager** (`prophet_manager.py`):
- Enhanced model training with bakery-specific configurations
- Spanish holidays integration
- Advanced model persistence and metadata storage
- Training metrics calculation
2. **ML Trainer** (`trainer.py`):
- Complete training orchestration for multiple products
- Single product training capability
- Model performance evaluation
- Async-first design replacing Celery complexity
3. **Data Processor** (`data_processor.py`):
- Advanced feature engineering for bakery forecasting
- Weather and traffic data integration
- Spanish holiday and school calendar detection
- Temporal feature extraction
4. **API Layer** (`training.py`):
- RESTful endpoints for training job management
- Real-time progress tracking
- Job cancellation and status monitoring
- Data validation before training
5. **Database Models** (`training.py`):
- `ModelTrainingLog`: Job execution tracking
- `TrainedModel`: Model registry and versioning
- `ModelPerformanceMetric`: Performance monitoring
- `TrainingJobQueue`: Job scheduling system
6. **Service Layer** (`training_service.py`):
- Business logic orchestration
- External service integration (data service)
- Job lifecycle management
- Error handling and recovery
7. **Messaging Integration** (`messaging.py`):
- Event-driven architecture with RabbitMQ
- Inter-service communication
- Real-time notifications
- Event publishing for other services
### **🔧 Key Improvements Over Old System**
#### **1. Eliminated Celery Complexity**
- **Before**: Complex Celery worker setup with sync/async mixing
- **After**: Pure async implementation with FastAPI background tasks
#### **2. Better Error Handling**
- **Before**: Celery task failures were hard to debug
- **After**: Detailed error tracking and recovery mechanisms
#### **3. Real-Time Progress Tracking**
- **Before**: Limited visibility into training progress
- **After**: Real-time updates with detailed step-by-step progress
#### **4. Service Isolation**
- **Before**: Training tightly coupled with main application
- **After**: Independent service that can scale separately
#### **5. Enhanced Model Management**
- **Before**: Basic model storage in filesystem
- **After**: Complete model lifecycle with versioning and metadata
### **🚀 New Capabilities**
#### **1. Advanced Training Features**
```python
# Support for different training modes
await trainer.train_tenant_models(...) # All products
await trainer.train_single_product(...) # Single product
await trainer.evaluate_model_performance(...) # Performance evaluation
```
#### **2. Real-Time Job Management**
```python
# Job lifecycle management
POST /training/jobs # Start training
GET /training/jobs/{id}/status # Get progress
POST /training/jobs/{id}/cancel # Cancel job
GET /training/jobs/{id}/logs # View detailed logs
```
#### **3. Data Validation**
```python
# Pre-training validation
POST /training/validate # Check data quality before training
```
#### **4. Event-Driven Architecture**
```python
# Automatic event publishing
await publish_job_started(job_id, tenant_id, config)
await publish_job_completed(job_id, tenant_id, results)
await publish_model_trained(model_id, tenant_id, product_name, metrics)
```
### **📊 Performance Improvements**
#### **1. Faster Training Startup**
- **Before**: 30-60 seconds Celery worker initialization
- **After**: <5 seconds direct async execution
#### **2. Better Resource Utilization**
- **Before**: Fixed Celery worker pools
- **After**: Dynamic scaling based on demand
#### **3. Improved Memory Management**
- **Before**: Memory leaks in long-running Celery workers
- **After**: Clean memory usage with proper cleanup
### **🔒 Enhanced Security & Monitoring**
#### **1. Authentication Integration**
```python
# Secure endpoints with tenant isolation
@router.post("/jobs")
async def start_training_job(
request: TrainingJobRequest,
tenant_id: str = Depends(get_current_tenant_id) # Automatic tenant isolation
):
```
#### **2. Comprehensive Monitoring**
```python
# Built-in metrics collection
metrics.increment_counter("training_jobs_started")
metrics.increment_counter("training_jobs_completed")
metrics.increment_counter("training_jobs_failed")
```
#### **3. Detailed Logging**
```python
# Structured logging with context
logger.info(f"Training job {job_id} completed successfully",
extra={"tenant_id": tenant_id, "models_trained": count})
```
### **🔄 Integration with Existing Architecture**
#### **1. Seamless API Integration**
The new training service integrates perfectly with the existing gateway:
```yaml
# API Gateway routes to training service
/api/training/* → http://training-service:8000/
```
#### **2. Event-Driven Communication**
```python
# Other services can listen to training events
"training.job.completed" forecasting-service (update models)
"training.job.completed" notification-service (send alerts)
"training.model.updated" tenant-service (update quotas)
```
#### **3. Database Independence**
- Training service has its own PostgreSQL database
- Clean separation from other service data
- Easy to scale and backup independently
### **📦 Deployment Ready**
#### **1. Docker Configuration**
- Optimized Dockerfile with proper security
- Non-root user execution
- Health checks included
#### **2. Requirements Management**
- Pinned dependency versions
- Separated development/production requirements
- Prophet and ML libraries properly configured
#### **3. Environment Configuration**
```python
# Flexible configuration management
MODEL_STORAGE_PATH=/app/models
MAX_TRAINING_TIME_MINUTES=30
MIN_TRAINING_DATA_DAYS=30
PROPHET_SEASONALITY_MODE=additive
```
### **🎯 Migration Benefits Summary**
| Aspect | Before (Celery) | After (Microservice) |
|--------|----------------|----------------------|
| **Startup Time** | 30-60 seconds | <5 seconds |
| **Error Handling** | Basic | Comprehensive |
| **Progress Tracking** | Limited | Real-time |
| **Scalability** | Fixed workers | Dynamic scaling |
| **Debugging** | Difficult | Easy with logs |
| **Testing** | Complex | Simple unit tests |
| **Deployment** | Monolithic | Independent |
| **Monitoring** | Basic | Full observability |
### **🔧 Ready for Production**
This training service is **production-ready** and provides:
1. **Robust Error Handling**: Graceful failure recovery
2. **Horizontal Scaling**: Can run multiple instances
3. **Performance Monitoring**: Built-in metrics and health checks
4. **Security**: Proper authentication and tenant isolation
5. **Maintainability**: Clean code structure and comprehensive tests
### **🚀 Next Steps**
The training service is now ready to be integrated into your microservices architecture. It completely replaces the old Celery-based training system while providing significant improvements in reliability, performance, and maintainability.
The implementation follows all the microservices best practices and integrates seamlessly with the broader platform architecture you're building for the Madrid bakery forecasting system.

View File

@@ -0,0 +1 @@
# shared/alerts/__init__.py

View File

@@ -0,0 +1,353 @@
# shared/alerts/base_service.py
"""
Base alert service pattern for all microservices
Supports both alerts and recommendations through unified detection patterns
"""
import asyncio
import json
import uuid
from typing import List, Dict, Any, Optional
from uuid import UUID
from datetime import datetime, timedelta
import structlog
from redis.asyncio import Redis
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from shared.messaging.rabbitmq import RabbitMQClient
from shared.database.base import DatabaseManager
from shared.config.rabbitmq_config import get_routing_key
logger = structlog.get_logger()
class BaseAlertService:
"""
Base class for service-specific alert and recommendation detection
Implements hybrid detection patterns: scheduled jobs, event-driven, and database triggers
"""
def __init__(self, config):
self.config = config
self.db_manager = DatabaseManager(config.DATABASE_URL)
self.rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, config.SERVICE_NAME)
self.redis = None
self.scheduler = AsyncIOScheduler()
self.is_leader = False
self.exchange = "alerts.exchange"
# Metrics
self._items_published = 0
self._checks_performed = 0
self._errors_count = 0
async def start(self):
"""Initialize all detection mechanisms"""
try:
# Connect to Redis for leader election and deduplication
self.redis = await Redis.from_url(self.config.REDIS_URL)
logger.info("Connected to Redis", service=self.config.SERVICE_NAME)
# Connect to RabbitMQ
await self.rabbitmq_client.connect()
logger.info("Connected to RabbitMQ", service=self.config.SERVICE_NAME)
# Start leader election for scheduled jobs
asyncio.create_task(self.maintain_leadership())
# Setup scheduled checks (runs only on leader)
self.setup_scheduled_checks()
# Start database listener (runs on all instances)
await self.start_database_listener()
# Start event listener (runs on all instances)
await self.start_event_listener()
logger.info("Alert service started", service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to start alert service", service=self.config.SERVICE_NAME, error=str(e))
raise
async def stop(self):
"""Clean shutdown"""
try:
# Stop scheduler
if self.scheduler.running:
self.scheduler.shutdown()
# Close connections
if self.redis:
await self.redis.aclose() # Use aclose() for modern Redis client
await self.rabbitmq_client.disconnect()
logger.info("Alert service stopped", service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Error stopping alert service", service=self.config.SERVICE_NAME, error=str(e))
# PATTERN 1: Scheduled Background Jobs
def setup_scheduled_checks(self):
"""Configure scheduled alert checks - Override in service"""
raise NotImplementedError("Subclasses must implement setup_scheduled_checks")
async def maintain_leadership(self):
"""Leader election for scheduled jobs"""
lock_key = f"scheduler_lock:{self.config.SERVICE_NAME}"
lock_ttl = 60
while True:
try:
instance_id = getattr(self.config, 'INSTANCE_ID', 'default')
was_leader = self.is_leader
# Try to acquire new leadership if not currently leader
if not self.is_leader:
result = await self.redis.set(
lock_key,
instance_id,
ex=lock_ttl,
nx=True
)
self.is_leader = result is not None
else:
# Already leader - try to extend the lock
current_value = await self.redis.get(lock_key)
if current_value and current_value.decode() == instance_id:
# Still our lock, extend it
await self.redis.expire(lock_key, lock_ttl)
self.is_leader = True
else:
# Lock expired or taken by someone else
self.is_leader = False
# Handle leadership changes
if self.is_leader and not was_leader:
self.scheduler.start()
logger.info("Acquired scheduler leadership", service=self.config.SERVICE_NAME)
elif not self.is_leader and was_leader:
self.scheduler.shutdown()
logger.info("Lost scheduler leadership", service=self.config.SERVICE_NAME)
await asyncio.sleep(lock_ttl // 2)
except Exception as e:
logger.error("Leadership error", service=self.config.SERVICE_NAME, error=str(e))
self.is_leader = False
await asyncio.sleep(5)
# PATTERN 2: Event-Driven Detection
async def start_event_listener(self):
"""Listen for business events - Override in service"""
pass
# PATTERN 3: Database Triggers
async def start_database_listener(self):
"""Listen for database notifications"""
try:
import asyncpg
# Convert SQLAlchemy URL format to plain PostgreSQL for asyncpg
database_url = self.config.DATABASE_URL
if database_url.startswith('postgresql+asyncpg://'):
database_url = database_url.replace('postgresql+asyncpg://', 'postgresql://')
conn = await asyncpg.connect(database_url)
# Register listeners based on service
await self.register_db_listeners(conn)
logger.info("Database listeners registered", service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to setup database listeners", service=self.config.SERVICE_NAME, error=str(e))
async def register_db_listeners(self, conn):
"""Register database listeners - Override in service"""
pass
# Publishing (Updated for type)
async def publish_item(self, tenant_id: UUID, item: Dict[str, Any], item_type: str = 'alert'):
"""Publish alert or recommendation to RabbitMQ with deduplication"""
try:
# Check for duplicate
item_key = f"{tenant_id}:{item_type}:{item['type']}:{item.get('metadata', {}).get('id', '')}"
if await self.is_duplicate_item(item_key):
logger.debug("Duplicate item skipped",
service=self.config.SERVICE_NAME,
item_type=item_type,
alert_type=item['type'])
return False
# Add metadata
item['id'] = str(uuid.uuid4())
item['tenant_id'] = str(tenant_id)
item['service'] = self.config.SERVICE_NAME
item['timestamp'] = datetime.utcnow().isoformat()
item['item_type'] = item_type # 'alert' or 'recommendation'
# Determine routing key based on severity and type
routing_key = get_routing_key(item_type, item['severity'], self.config.SERVICE_NAME)
# Publish to RabbitMQ
success = await self.rabbitmq_client.publish_event(
exchange_name=self.exchange,
routing_key=routing_key,
event_data=item
)
if success:
self._items_published += 1
logger.info("Item published successfully",
service=self.config.SERVICE_NAME,
item_type=item_type,
alert_type=item['type'],
severity=item['severity'],
routing_key=routing_key)
else:
self._errors_count += 1
logger.error("Failed to publish item",
service=self.config.SERVICE_NAME,
item_type=item_type,
alert_type=item['type'])
return success
except Exception as e:
self._errors_count += 1
logger.error("Error publishing item",
service=self.config.SERVICE_NAME,
error=str(e),
item_type=item_type)
return False
async def is_duplicate_item(self, item_key: str, window_minutes: int = 15) -> bool:
"""Prevent duplicate items within time window"""
key = f"item_sent:{item_key}"
try:
result = await self.redis.set(
key, "1",
ex=window_minutes * 60,
nx=True
)
return result is None # None means duplicate
except Exception as e:
logger.error("Error checking duplicate", error=str(e))
return False # Allow publishing if check fails
# Helper methods
async def get_active_tenants(self) -> List[UUID]:
"""Get list of active tenant IDs"""
try:
from sqlalchemy import text
query = text("SELECT DISTINCT tenant_id FROM tenants WHERE status = 'active'")
async with self.db_manager.get_session() as session:
result = await session.execute(query)
return [row.tenant_id for row in result.fetchall()]
except Exception as e:
# If tenants table doesn't exist, skip tenant-based processing
if "does not exist" in str(e):
logger.debug("Tenants table not found, skipping tenant-based alert processing")
return []
else:
logger.error("Error fetching active tenants", error=str(e))
return []
async def get_tenant_config(self, tenant_id: UUID) -> Dict[str, Any]:
"""Get tenant-specific configuration"""
try:
from sqlalchemy import text
query = text("SELECT config FROM tenants WHERE tenant_id = :tenant_id")
async with self.db_manager.get_session() as session:
result = await session.execute(query, {"tenant_id": tenant_id})
row = result.fetchone()
return json.loads(row.config) if row and row.config else {}
except Exception as e:
logger.error("Error fetching tenant config", tenant_id=str(tenant_id), error=str(e))
return {}
# Health and metrics
def get_metrics(self) -> Dict[str, Any]:
"""Get service metrics"""
return {
"items_published": self._items_published,
"checks_performed": self._checks_performed,
"errors_count": self._errors_count,
"is_leader": self.is_leader,
"scheduler_running": self.scheduler.running,
"redis_connected": self.redis and not self.redis.closed,
"rabbitmq_connected": self.rabbitmq_client.connected if self.rabbitmq_client else False
}
async def health_check(self) -> Dict[str, Any]:
"""Comprehensive health check"""
try:
# Check Redis
redis_healthy = False
if self.redis and not self.redis.closed:
await self.redis.ping()
redis_healthy = True
# Check RabbitMQ
rabbitmq_healthy = self.rabbitmq_client.connected if self.rabbitmq_client else False
# Check database
db_healthy = False
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
await session.execute(text("SELECT 1"))
db_healthy = True
except:
pass
status = "healthy" if all([redis_healthy, rabbitmq_healthy, db_healthy]) else "unhealthy"
return {
"status": status,
"service": self.config.SERVICE_NAME,
"components": {
"redis": "healthy" if redis_healthy else "unhealthy",
"rabbitmq": "healthy" if rabbitmq_healthy else "unhealthy",
"database": "healthy" if db_healthy else "unhealthy",
"scheduler": "running" if self.scheduler.running else "stopped"
},
"metrics": self.get_metrics()
}
except Exception as e:
return {
"status": "error",
"service": self.config.SERVICE_NAME,
"error": str(e)
}
class AlertServiceMixin:
"""Mixin providing common alert helper methods"""
def format_spanish_message(self, template_key: str, **kwargs) -> Dict[str, Any]:
"""Format Spanish alert message"""
from shared.alerts.templates import format_item_message
return format_item_message(template_key, 'es', **kwargs)
def get_business_hours_severity(self, base_severity: str) -> str:
"""Adjust severity based on business hours"""
current_hour = datetime.now().hour
# Reduce non-critical severity outside business hours (7-20)
if not (7 <= current_hour <= 20):
if base_severity == 'medium':
return 'low'
elif base_severity == 'high' and current_hour < 6 or current_hour > 22:
return 'medium'
return base_severity
def should_send_recommendation(self, tenant_id: UUID, rec_type: str) -> bool:
"""Check if recommendation should be sent based on tenant preferences"""
# Implement tenant-specific recommendation frequency limits
# This is a simplified version
return True

218
shared/alerts/templates.py Normal file
View File

@@ -0,0 +1,218 @@
# shared/alerts/templates.py
"""
Alert and recommendation templates in Spanish for the bakery platform
"""
from typing import Dict, Any
ITEM_TEMPLATES = {
# ALERTS - Critical Issues Requiring Immediate Action
'critical_stock_shortage': {
'es': {
'title': '🚨 Stock Crítico: {ingredient_name}',
'message': 'Solo {current_stock}kg disponibles, necesarios {required_stock}kg para producción de mañana. Acción inmediata requerida.',
'actions': ['Realizar pedido de emergencia', 'Contactar proveedor', 'Ajustar plan de producción']
},
'en': {
'title': '🚨 Critical Stock: {ingredient_name}',
'message': 'Only {current_stock}kg available, {required_stock}kg needed for tomorrow\'s production. Immediate action required.',
'actions': ['Place emergency order', 'Contact supplier', 'Adjust production plan']
}
},
'temperature_breach': {
'es': {
'title': '🌡️ ALERTA TEMPERATURA',
'message': '{location}: {temperature}°C durante {duration} minutos. Revisar productos inmediatamente para evitar deterioro.',
'actions': ['Verificar productos', 'Llamar técnico refrigeración', 'Documentar incidencia', 'Mover productos']
},
'en': {
'title': '🌡️ TEMPERATURE ALERT',
'message': '{location}: {temperature}°C for {duration} minutes. Check products immediately to prevent spoilage.',
'actions': ['Check products', 'Call refrigeration technician', 'Document incident', 'Move products']
}
},
'production_delay': {
'es': {
'title': '⏰ Retraso en Producción',
'message': 'Lote {batch_name} con {delay_minutes} minutos de retraso. Impacto en entregas del día.',
'actions': ['Acelerar producción', 'Notificar clientes', 'Reorganizar horarios', 'Buscar capacidad adicional']
}
},
'expired_products': {
'es': {
'title': '📅 Productos Caducados',
'message': '{product_count} productos han caducado hoy. Retirar inmediatamente por seguridad alimentaria.',
'actions': ['Retirar productos', 'Revisar inventario', 'Ajustar pedidos', 'Documentar pérdidas']
}
},
'equipment_failure': {
'es': {
'title': '⚙️ Fallo de Equipo',
'message': '{equipment_name} no está funcionando correctamente. Producción afectada.',
'actions': ['Parar producción', 'Llamar mantenimiento', 'Usar equipo alternativo', 'Documentar fallo']
}
},
'order_overload': {
'es': {
'title': '📋 Sobrecarga de Pedidos',
'message': 'Capacidad excedida en {percentage}%. Riesgo de no cumplir entregas.',
'actions': ['Priorizar pedidos', 'Aumentar turnos', 'Rechazar nuevos pedidos', 'Buscar ayuda externa']
}
},
'supplier_delay': {
'es': {
'title': '🚚 Retraso de Proveedor',
'message': 'Entrega de {supplier_name} retrasada {hours} horas. Impacto en producción de {products}.',
'actions': ['Contactar proveedor', 'Buscar alternativas', 'Ajustar producción', 'Usar stock reserva']
}
},
# RECOMMENDATIONS - Proactive Suggestions for Optimization
'inventory_optimization': {
'es': {
'title': '📈 Optimización de Stock: {ingredient_name}',
'message': 'Basado en tendencias de {period} días, sugerimos aumentar stock mínimo en {suggested_increase}kg para reducir costos.',
'actions': ['Revisar niveles mínimos', 'Analizar proveedores', 'Actualizar configuración', 'Programar pedido mayor']
},
'en': {
'title': '📈 Stock Optimization: {ingredient_name}',
'message': 'Based on {period} day trends, suggest increasing minimum stock by {suggested_increase}kg to reduce costs.',
'actions': ['Review minimum levels', 'Analyze suppliers', 'Update configuration', 'Schedule larger order']
}
},
'production_efficiency': {
'es': {
'title': '⚙️ Mejora de Eficiencia',
'message': 'Cambiar horarios de horneado a {suggested_time} puede reducir costos energéticos en {savings_percent}%.',
'actions': ['Revisar horarios', 'Consultar personal', 'Probar nuevo horario', 'Medir resultados']
}
},
'sales_opportunity': {
'es': {
'title': '💰 Oportunidad de Venta',
'message': '{product_name} tiene alta demanda los {days}. Incrementar producción puede aumentar ventas {increase_percent}%.',
'actions': ['Aumentar producción', 'Promocionar producto', 'Revisar precios', 'Planificar ingredientes']
}
},
'seasonal_adjustment': {
'es': {
'title': '🍂 Ajuste Estacional',
'message': 'Época de {season}: ajustar producción de {products} según patrones históricos.',
'actions': ['Revisar recetas estacionales', 'Ajustar inventario', 'Planificar promociones', 'Entrenar personal']
}
},
'cost_reduction': {
'es': {
'title': '💡 Reducción de Costos',
'message': 'Cambiar a proveedor {supplier_name} para {ingredient} puede ahorrar {savings_euros}€/mes.',
'actions': ['Evaluar calidad', 'Negociar precios', 'Probar muestras', 'Cambiar proveedor gradualmente']
}
},
'waste_reduction': {
'es': {
'title': '♻️ Reducción de Desperdicio',
'message': 'Ajustar tamaños de lote de {product} puede reducir desperdicio en {waste_reduction_percent}%.',
'actions': ['Analizar ventas', 'Ajustar recetas', 'Cambiar lotes', 'Monitorear resultados']
}
},
'quality_improvement': {
'es': {
'title': '⭐ Mejora de Calidad',
'message': 'Temperatura de horneado de {product} puede optimizarse para mejor textura y sabor.',
'actions': ['Probar temperaturas', 'Documentar cambios', 'Entrenar panaderos', 'Obtener feedback']
}
},
'customer_satisfaction': {
'es': {
'title': '😊 Satisfacción del Cliente',
'message': 'Clientes solicitan más {product} los {days}. Considerar aumentar disponibilidad.',
'actions': ['Revisar comentarios', 'Aumentar producción', 'Crear promociones', 'Mejorar exhibición']
}
},
'energy_optimization': {
'es': {
'title': '⚡ Optimización Energética',
'message': 'Consolidar horneado entre {start_time} y {end_time} puede reducir costos energéticos {savings_euros}€/día.',
'actions': ['Revisar horarios energía', 'Reorganizar producción', 'Optimizar hornos', 'Medir consumo']
}
},
'staff_optimization': {
'es': {
'title': '👥 Optimización de Personal',
'message': 'Picos de trabajo los {days} a las {hours}. Considerar ajustar turnos para mejor eficiencia.',
'actions': ['Analizar cargas trabajo', 'Reorganizar turnos', 'Entrenar polivalencia', 'Contratar temporal']
}
}
}
def format_item_message(template_key: str, language: str, **kwargs) -> Dict[str, Any]:
"""Format item message using template with validation"""
template = ITEM_TEMPLATES.get(template_key, {}).get(language, {})
if not template:
# Fallback for missing templates
return {
'title': f'Notificación: {template_key}',
'message': f'Información: {", ".join([f"{k}: {v}" for k, v in kwargs.items()])}',
'actions': ['Revisar', 'Documentar']
}
try:
# Format with provided kwargs, handling missing values gracefully
formatted_title = template['title'].format(**kwargs)
formatted_message = template['message'].format(**kwargs)
return {
'title': formatted_title,
'message': formatted_message,
'actions': template.get('actions', [])
}
except KeyError as e:
# Handle missing format parameters
return {
'title': template.get('title', f'Notificación: {template_key}'),
'message': f"Error en plantilla - parámetro faltante: {e}. Datos: {kwargs}",
'actions': template.get('actions', ['Revisar configuración'])
}
def get_severity_emoji(severity: str) -> str:
"""Get emoji for severity level"""
emoji_map = {
'urgent': '🚨',
'high': '⚠️',
'medium': '💡',
'low': ''
}
return emoji_map.get(severity, '📋')
def get_item_type_emoji(item_type: str) -> str:
"""Get emoji for item type"""
emoji_map = {
'alert': '🚨',
'recommendation': '💡'
}
return emoji_map.get(item_type, '📋')
def format_business_time(hour: int) -> str:
"""Format hour in Spanish business context"""
if hour == 0:
return "medianoche"
elif hour < 12:
return f"{hour}:00 AM"
elif hour == 12:
return "12:00 PM (mediodía)"
else:
return f"{hour-12}:00 PM"
def get_spanish_day_name(day_number: int) -> str:
"""Get Spanish day name (0=Monday)"""
days = ["lunes", "martes", "miércoles", "jueves", "viernes", "sábado", "domingo"]
return days[day_number] if 0 <= day_number <= 6 else "día desconocido"
def format_currency(amount: float) -> str:
"""Format currency in Spanish Euro format"""
return f"{amount:.2f}"
def format_percentage(value: float) -> str:
"""Format percentage in Spanish format"""
return f"{value:.1f}%"

View File

@@ -0,0 +1,82 @@
# shared/config/rabbitmq_config.py
"""
RabbitMQ configuration for the alert and recommendation system
Supports both alerts and recommendations through a unified topic exchange
"""
RABBITMQ_CONFIG = {
"exchanges": {
"alerts": {
"name": "alerts.exchange",
"type": "topic",
"durable": True,
"auto_delete": False
},
"dead_letter": {
"name": "dlx.exchange",
"type": "direct",
"durable": True,
"auto_delete": False
}
},
"queues": {
"alert_processing": {
"name": "alert.processing.queue",
"durable": True,
"arguments": {
"x-message-ttl": 3600000, # 1 hour TTL
"x-max-length": 10000, # Max 10k messages
"x-overflow": "reject-publish",
"x-dead-letter-exchange": "dlx.exchange",
"x-dead-letter-routing-key": "failed.items"
}
},
"dead_letter": {
"name": "alert.dead_letter.queue",
"durable": True,
"arguments": {
"x-message-ttl": 86400000 # 24 hours for dead letters
}
}
},
"bindings": [
{
"queue": "alert.processing.queue",
"exchange": "alerts.exchange",
"routing_key": "*.*.*" # alert/recommendation.severity.service
},
{
"queue": "alert.dead_letter.queue",
"exchange": "dlx.exchange",
"routing_key": "failed.items"
}
],
"routing_patterns": {
# alert/recommendation.severity.service_name
"alert": "alert.{severity}.{service}",
"recommendation": "recommendation.{severity}.{service}",
"all_alerts": "alert.*.*",
"all_recommendations": "recommendation.*.*",
"urgent_items": "*.urgent.*",
"high_items": "*.high.*"
}
}
def get_routing_key(item_type: str, severity: str, service: str) -> str:
"""Generate routing key for item publishing"""
return f"{item_type}.{severity}.{service}"
def get_binding_patterns(item_types: list = None, severities: list = None, services: list = None) -> list:
"""Generate binding patterns for selective consumption"""
patterns = []
item_types = item_types or ["alert", "recommendation"]
severities = severities or ["urgent", "high", "medium", "low"]
services = services or ["*"]
for item_type in item_types:
for severity in severities:
for service in services:
patterns.append(f"{item_type}.{severity}.{service}")
return patterns

View File

@@ -112,7 +112,7 @@ class DatabaseUtils:
"checked_in": pool.checkedin(),
"checked_out": pool.checkedout(),
"overflow": pool.overflow(),
"invalid": pool.invalid()
"status": pool.status()
}
else:
return {"status": "no_pool"}

View File

@@ -0,0 +1,420 @@
# shared/monitoring/alert_metrics.py
"""
Metrics and monitoring for the alert and recommendation system
Provides comprehensive metrics for tracking system performance and effectiveness
"""
from prometheus_client import Counter, Histogram, Gauge, Summary, Info
from typing import Dict, Any
import time
from functools import wraps
import structlog
logger = structlog.get_logger()
# =================================================================
# DETECTION METRICS
# =================================================================
# Alert and recommendation generation
items_published = Counter(
'alert_items_published_total',
'Total number of alerts and recommendations published',
['service', 'item_type', 'severity', 'type']
)
item_checks_performed = Counter(
'alert_checks_performed_total',
'Total number of alert checks performed',
['service', 'check_type', 'pattern']
)
item_check_duration = Histogram(
'alert_check_duration_seconds',
'Time taken to perform alert checks',
['service', 'check_type'],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60]
)
alert_detection_errors = Counter(
'alert_detection_errors_total',
'Total number of errors during alert detection',
['service', 'error_type', 'check_type']
)
# Deduplication metrics
duplicate_items_prevented = Counter(
'duplicate_items_prevented_total',
'Number of duplicate alerts/recommendations prevented',
['service', 'item_type', 'type']
)
# =================================================================
# PROCESSING METRICS
# =================================================================
# Alert processor metrics
items_processed = Counter(
'alert_items_processed_total',
'Total number of items processed by alert processor',
['item_type', 'severity', 'type', 'status']
)
item_processing_duration = Histogram(
'alert_processing_duration_seconds',
'Time taken to process alerts/recommendations',
['item_type', 'severity'],
buckets=[0.01, 0.05, 0.1, 0.5, 1, 2, 5]
)
database_storage_duration = Histogram(
'alert_database_storage_duration_seconds',
'Time taken to store items in database',
buckets=[0.01, 0.05, 0.1, 0.5, 1]
)
processing_errors = Counter(
'alert_processing_errors_total',
'Total number of processing errors',
['error_type', 'item_type']
)
# =================================================================
# DELIVERY METRICS
# =================================================================
# Notification delivery
notifications_sent = Counter(
'alert_notifications_sent_total',
'Total notifications sent through all channels',
['channel', 'item_type', 'severity', 'status']
)
notification_delivery_duration = Histogram(
'alert_notification_delivery_duration_seconds',
'Time from item generation to delivery',
['item_type', 'severity', 'channel'],
buckets=[0.1, 0.5, 1, 5, 10, 30, 60]
)
delivery_failures = Counter(
'alert_delivery_failures_total',
'Failed notification deliveries',
['channel', 'item_type', 'error_type']
)
# Channel-specific metrics
email_notifications = Counter(
'alert_email_notifications_total',
'Email notifications sent',
['status', 'item_type']
)
whatsapp_notifications = Counter(
'alert_whatsapp_notifications_total',
'WhatsApp notifications sent',
['status', 'item_type']
)
sse_events_sent = Counter(
'alert_sse_events_sent_total',
'SSE events sent to dashboard',
['tenant', 'event_type', 'item_type']
)
# =================================================================
# SSE METRICS
# =================================================================
# SSE connection metrics
sse_active_connections = Gauge(
'alert_sse_active_connections',
'Number of active SSE connections',
['tenant_id']
)
sse_connection_duration = Histogram(
'alert_sse_connection_duration_seconds',
'Duration of SSE connections',
buckets=[10, 30, 60, 300, 600, 1800, 3600]
)
sse_message_queue_size = Gauge(
'alert_sse_message_queue_size',
'Current size of SSE message queues',
['tenant_id']
)
sse_connection_errors = Counter(
'alert_sse_connection_errors_total',
'SSE connection errors',
['error_type', 'tenant_id']
)
# =================================================================
# SYSTEM HEALTH METRICS
# =================================================================
# Active items gauge
active_items_gauge = Gauge(
'alert_active_items_current',
'Current number of active alerts and recommendations',
['tenant_id', 'item_type', 'severity']
)
# System component health
system_component_health = Gauge(
'alert_system_component_health',
'Health status of alert system components (1=healthy, 0=unhealthy)',
['component', 'service']
)
# Leader election status
scheduler_leader_status = Gauge(
'alert_scheduler_leader_status',
'Leader election status for schedulers (1=leader, 0=follower)',
['service']
)
# Message queue health
rabbitmq_connection_status = Gauge(
'alert_rabbitmq_connection_status',
'RabbitMQ connection status (1=connected, 0=disconnected)',
['service']
)
redis_connection_status = Gauge(
'alert_redis_connection_status',
'Redis connection status (1=connected, 0=disconnected)',
['service']
)
# =================================================================
# BUSINESS METRICS
# =================================================================
# Alert response metrics
items_acknowledged = Counter(
'alert_items_acknowledged_total',
'Number of items acknowledged by users',
['item_type', 'severity', 'service']
)
items_resolved = Counter(
'alert_items_resolved_total',
'Number of items resolved by users',
['item_type', 'severity', 'service']
)
item_response_time = Histogram(
'alert_item_response_time_seconds',
'Time from item creation to acknowledgment',
['item_type', 'severity'],
buckets=[60, 300, 600, 1800, 3600, 7200, 14400]
)
# Recommendation adoption
recommendations_implemented = Counter(
'alert_recommendations_implemented_total',
'Number of recommendations marked as implemented',
['type', 'service']
)
# Effectiveness metrics
false_positive_rate = Gauge(
'alert_false_positive_rate',
'Rate of false positive alerts',
['service', 'alert_type']
)
# =================================================================
# PERFORMANCE DECORATORS
# =================================================================
def track_duration(metric: Histogram, **labels):
"""Decorator to track function execution time"""
def decorator(func):
@wraps(func)
async def async_wrapper(*args, **kwargs):
start_time = time.time()
try:
result = await func(*args, **kwargs)
metric.labels(**labels).observe(time.time() - start_time)
return result
except Exception as e:
# Track error duration too
metric.labels(**labels).observe(time.time() - start_time)
raise
@wraps(func)
def sync_wrapper(*args, **kwargs):
start_time = time.time()
try:
result = func(*args, **kwargs)
metric.labels(**labels).observe(time.time() - start_time)
return result
except Exception as e:
metric.labels(**labels).observe(time.time() - start_time)
raise
return async_wrapper if hasattr(func, '__code__') and func.__code__.co_flags & 0x80 else sync_wrapper
return decorator
def track_errors(error_counter: Counter, **labels):
"""Decorator to track errors in functions"""
def decorator(func):
@wraps(func)
async def async_wrapper(*args, **kwargs):
try:
return await func(*args, **kwargs)
except Exception as e:
error_counter.labels(error_type=type(e).__name__, **labels).inc()
raise
@wraps(func)
def sync_wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
error_counter.labels(error_type=type(e).__name__, **labels).inc()
raise
return async_wrapper if hasattr(func, '__code__') and func.__code__.co_flags & 0x80 else sync_wrapper
return decorator
# =================================================================
# UTILITY FUNCTIONS
# =================================================================
def record_item_published(service: str, item_type: str, severity: str, alert_type: str):
"""Record that an item was published"""
items_published.labels(
service=service,
item_type=item_type,
severity=severity,
type=alert_type
).inc()
def record_item_processed(item_type: str, severity: str, alert_type: str, status: str):
"""Record that an item was processed"""
items_processed.labels(
item_type=item_type,
severity=severity,
type=alert_type,
status=status
).inc()
def record_notification_sent(channel: str, item_type: str, severity: str, status: str):
"""Record notification delivery"""
notifications_sent.labels(
channel=channel,
item_type=item_type,
severity=severity,
status=status
).inc()
def update_active_items(tenant_id: str, item_type: str, severity: str, count: int):
"""Update active items gauge"""
active_items_gauge.labels(
tenant_id=tenant_id,
item_type=item_type,
severity=severity
).set(count)
def update_component_health(component: str, service: str, is_healthy: bool):
"""Update component health status"""
system_component_health.labels(
component=component,
service=service
).set(1 if is_healthy else 0)
def update_connection_status(connection_type: str, service: str, is_connected: bool):
"""Update connection status"""
if connection_type == 'rabbitmq':
rabbitmq_connection_status.labels(service=service).set(1 if is_connected else 0)
elif connection_type == 'redis':
redis_connection_status.labels(service=service).set(1 if is_connected else 0)
# =================================================================
# METRICS AGGREGATOR
# =================================================================
class AlertMetricsCollector:
"""Centralized metrics collector for alert system"""
def __init__(self, service_name: str):
self.service_name = service_name
def record_check_performed(self, check_type: str, pattern: str):
"""Record that a check was performed"""
item_checks_performed.labels(
service=self.service_name,
check_type=check_type,
pattern=pattern
).inc()
def record_detection_error(self, error_type: str, check_type: str):
"""Record detection error"""
alert_detection_errors.labels(
service=self.service_name,
error_type=error_type,
check_type=check_type
).inc()
def record_duplicate_prevented(self, item_type: str, alert_type: str):
"""Record prevented duplicate"""
duplicate_items_prevented.labels(
service=self.service_name,
item_type=item_type,
type=alert_type
).inc()
def update_leader_status(self, is_leader: bool):
"""Update leader election status"""
scheduler_leader_status.labels(service=self.service_name).set(1 if is_leader else 0)
def get_service_metrics(self) -> Dict[str, Any]:
"""Get all metrics for this service"""
return {
'service': self.service_name,
'items_published': items_published._value._value,
'checks_performed': item_checks_performed._value._value,
'detection_errors': alert_detection_errors._value._value,
'duplicates_prevented': duplicate_items_prevented._value._value
}
# =================================================================
# DASHBOARD METRICS
# =================================================================
def get_system_overview_metrics() -> Dict[str, Any]:
"""Get overview metrics for monitoring dashboard"""
try:
return {
'total_items_published': sum(items_published._value._value.values()),
'total_checks_performed': sum(item_checks_performed._value._value.values()),
'total_notifications_sent': sum(notifications_sent._value._value.values()),
'active_sse_connections': sum(sse_active_connections._value._value.values()),
'processing_errors': sum(processing_errors._value._value.values()),
'delivery_failures': sum(delivery_failures._value._value.values()),
'timestamp': time.time()
}
except Exception as e:
logger.error("Error collecting overview metrics", error=str(e))
return {'error': str(e), 'timestamp': time.time()}
def get_tenant_metrics(tenant_id: str) -> Dict[str, Any]:
"""Get metrics for a specific tenant"""
try:
return {
'tenant_id': tenant_id,
'active_connections': sse_active_connections.labels(tenant_id=tenant_id)._value._value,
'events_sent': sum([
v for k, v in sse_events_sent._value._value.items()
if k[0] == tenant_id
]),
'timestamp': time.time()
}
except Exception as e:
logger.error("Error collecting tenant metrics", tenant_id=tenant_id, error=str(e))
return {'tenant_id': tenant_id, 'error': str(e), 'timestamp': time.time()}

View File

@@ -1,8 +0,0 @@
# ================================================================
# shared/notifications/__init__.py
# ================================================================
"""
Shared Notifications Module - Alert integration using existing notification service
"""
__all__ = []