Add new alert architecture

This commit is contained in:
Urtzi Alfaro
2025-08-23 10:19:58 +02:00
parent 1a9839240e
commit 4b4268d640
45 changed files with 6518 additions and 1590 deletions

View File

@@ -0,0 +1,26 @@
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install dependencies
COPY services/alert_processor/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy shared libraries
COPY shared/ /app/shared/
# Copy application code
COPY services/alert_processor/app/ /app/app/
# Create non-root user
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
USER appuser
EXPOSE 8000
CMD ["python", "-m", "app.main"]

View File

@@ -0,0 +1 @@
# Alert Processor Service

View File

@@ -0,0 +1,49 @@
# services/alert_processor/app/config.py
"""
Alert Processor Service Configuration
"""
import os
from typing import List
from shared.config.base import BaseServiceSettings
class AlertProcessorConfig(BaseServiceSettings):
"""Configuration for Alert Processor Service"""
SERVICE_NAME: str = "alert-processor"
APP_NAME: str = "Alert Processor Service"
DESCRIPTION: str = "Central alert and recommendation processor"
# Use the notification database for alert storage
# This makes sense since alerts and notifications are closely related
DATABASE_URL: str = os.getenv(
"NOTIFICATION_DATABASE_URL",
"postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db"
)
# Use dedicated Redis DB for alert processing
REDIS_DB: int = int(os.getenv("ALERT_PROCESSOR_REDIS_DB", "6"))
# Alert processing configuration
BATCH_SIZE: int = int(os.getenv("ALERT_BATCH_SIZE", "10"))
PROCESSING_TIMEOUT: int = int(os.getenv("ALERT_PROCESSING_TIMEOUT", "30"))
# Deduplication settings
ALERT_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("ALERT_DEDUPLICATION_WINDOW_MINUTES", "15"))
RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES", "60"))
# Alert severity channel mappings (hardcoded for now to avoid config parsing issues)
@property
def urgent_channels(self) -> List[str]:
return ["whatsapp", "email", "push", "dashboard"]
@property
def high_channels(self) -> List[str]:
return ["whatsapp", "email", "dashboard"]
@property
def medium_channels(self) -> List[str]:
return ["email", "dashboard"]
@property
def low_channels(self) -> List[str]:
return ["dashboard"]

View File

@@ -0,0 +1,360 @@
# services/alert_processor/app/main.py
"""
Alert Processor Service - Central hub for processing alerts and recommendations
Consumes from RabbitMQ, stores in database, and routes to notification service
"""
import asyncio
import json
import signal
import sys
from datetime import datetime
from typing import Dict, Any
import structlog
import redis.asyncio as aioredis
from aio_pika import connect_robust, IncomingMessage, ExchangeType
from app.config import AlertProcessorConfig
from shared.database.base import create_database_manager
from shared.clients.base_service_client import BaseServiceClient
from shared.config.rabbitmq_config import RABBITMQ_CONFIG
# Setup logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="ISO"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
class NotificationServiceClient(BaseServiceClient):
"""Client for notification service"""
def __init__(self, config: AlertProcessorConfig):
super().__init__("notification-service", config)
self.config = config
def get_service_base_path(self) -> str:
"""Return the base path for notification service APIs"""
return "/api/v1"
async def send_notification(self, tenant_id: str, notification: Dict[str, Any], channels: list) -> Dict[str, Any]:
"""Send notification via notification service"""
try:
response = await self.post(
"/api/v1/notifications/send",
json={
"tenant_id": tenant_id,
"notification": notification,
"channels": channels
}
)
return response
except Exception as e:
logger.error("Failed to send notification", error=str(e), tenant_id=tenant_id)
return {"status": "failed", "error": str(e)}
class AlertProcessorService:
"""
Central service for processing and routing alerts and recommendations
Integrates with notification service for multi-channel delivery
"""
def __init__(self, config: AlertProcessorConfig):
self.config = config
self.db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
self.notification_client = NotificationServiceClient(config)
self.redis = None
self.connection = None
self.channel = None
self.running = False
# Metrics
self.items_processed = 0
self.items_stored = 0
self.notifications_sent = 0
self.errors_count = 0
async def start(self):
"""Start the alert processor service"""
try:
logger.info("Starting Alert Processor Service")
# Connect to Redis for SSE publishing
self.redis = aioredis.from_url(self.config.REDIS_URL)
logger.info("Connected to Redis")
# Connect to RabbitMQ
await self._setup_rabbitmq()
# Start consuming messages
await self._start_consuming()
self.running = True
logger.info("Alert Processor Service started successfully")
except Exception as e:
logger.error("Failed to start Alert Processor Service", error=str(e))
raise
async def _setup_rabbitmq(self):
"""Setup RabbitMQ connection and configuration"""
self.connection = await connect_robust(
self.config.RABBITMQ_URL,
heartbeat=30,
connection_attempts=5
)
self.channel = await self.connection.channel()
await self.channel.set_qos(prefetch_count=10) # Process 10 messages at a time
# Setup exchange and queue based on config
exchange_config = RABBITMQ_CONFIG["exchanges"]["alerts"]
self.exchange = await self.channel.declare_exchange(
exchange_config["name"],
getattr(ExchangeType, exchange_config["type"].upper()),
durable=exchange_config["durable"]
)
queue_config = RABBITMQ_CONFIG["queues"]["alert_processing"]
self.queue = await self.channel.declare_queue(
queue_config["name"],
durable=queue_config["durable"],
arguments=queue_config["arguments"]
)
# Bind to all alert and recommendation routing keys
await self.queue.bind(self.exchange, routing_key="*.*.*")
logger.info("RabbitMQ setup completed")
async def _start_consuming(self):
"""Start consuming messages from RabbitMQ"""
await self.queue.consume(self.process_item)
logger.info("Started consuming alert messages")
async def process_item(self, message: IncomingMessage):
"""Process incoming alert or recommendation"""
async with message.process():
try:
# Parse message
item = json.loads(message.body.decode())
logger.info("Processing item",
item_type=item.get('item_type'),
alert_type=item.get('type'),
severity=item.get('severity'),
tenant_id=item.get('tenant_id'))
# Store in database
stored_item = await self.store_item(item)
self.items_stored += 1
# Determine delivery channels based on severity and type
channels = self.get_channels_by_severity_and_type(
item['severity'],
item['item_type']
)
# Send via notification service if channels are specified
if channels:
notification_result = await self.notification_client.send_notification(
tenant_id=item['tenant_id'],
notification={
'type': item['item_type'], # 'alert' or 'recommendation'
'id': item['id'],
'title': item['title'],
'message': item['message'],
'severity': item['severity'],
'metadata': item.get('metadata', {}),
'actions': item.get('actions', []),
'email': item.get('email'),
'phone': item.get('phone'),
'user_id': item.get('user_id')
},
channels=channels
)
if notification_result.get('status') == 'success':
self.notifications_sent += 1
# Stream to SSE for real-time dashboard (always)
await self.stream_to_sse(item['tenant_id'], stored_item)
self.items_processed += 1
logger.info("Item processed successfully",
item_id=item['id'],
channels=len(channels))
except Exception as e:
self.errors_count += 1
logger.error("Item processing failed", error=str(e))
raise
async def store_item(self, item: dict) -> dict:
"""Store alert or recommendation in database"""
from sqlalchemy import text
query = text("""
INSERT INTO alerts (
id, tenant_id, item_type, alert_type, severity, status,
service, title, message, actions, metadata,
created_at
) VALUES (:id, :tenant_id, :item_type, :alert_type, :severity, :status,
:service, :title, :message, :actions, :metadata, :created_at)
RETURNING *
""")
async with self.db_manager.get_session() as session:
result = await session.execute(
query,
{
'id': item['id'],
'tenant_id': item['tenant_id'],
'item_type': item['item_type'], # 'alert' or 'recommendation'
'alert_type': item['type'],
'severity': item['severity'],
'status': 'active',
'service': item['service'],
'title': item['title'],
'message': item['message'],
'actions': json.dumps(item.get('actions', [])),
'metadata': json.dumps(item.get('metadata', {})),
'created_at': item['timestamp']
}
)
row = result.fetchone()
await session.commit()
logger.debug("Item stored in database", item_id=item['id'])
return dict(row._mapping)
async def stream_to_sse(self, tenant_id: str, item: dict):
"""Publish item to Redis for SSE streaming"""
channel = f"alerts:{tenant_id}"
# Prepare message for SSE
sse_message = {
'id': item['id'],
'item_type': item['item_type'],
'type': item['alert_type'],
'severity': item['severity'],
'title': item['title'],
'message': item['message'],
'actions': json.loads(item['actions']) if isinstance(item['actions'], str) else item['actions'],
'metadata': json.loads(item['metadata']) if isinstance(item['metadata'], str) else item['metadata'],
'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at'],
'status': item['status']
}
# Publish to Redis channel for SSE
await self.redis.publish(channel, json.dumps(sse_message))
logger.debug("Item published to SSE", tenant_id=tenant_id, item_id=item['id'])
def get_channels_by_severity_and_type(self, severity: str, item_type: str) -> list:
"""Determine notification channels based on severity, type, and time"""
current_hour = datetime.now().hour
channels = ['dashboard'] # Always include dashboard (SSE)
if item_type == 'alert':
if severity == 'urgent':
# Urgent alerts: All channels immediately
channels.extend(['whatsapp', 'email', 'push'])
elif severity == 'high':
# High alerts: WhatsApp and email during extended hours
if 6 <= current_hour <= 22:
channels.extend(['whatsapp', 'email'])
else:
channels.append('email') # Email only during night
elif severity == 'medium':
# Medium alerts: Email during business hours
if 7 <= current_hour <= 20:
channels.append('email')
# Low severity: Dashboard only
elif item_type == 'recommendation':
# Recommendations: Less urgent, limit channels and respect business hours
if severity in ['medium', 'high']:
if 8 <= current_hour <= 19: # Business hours for recommendations
channels.append('email')
# Low/urgent (rare for recs): Dashboard only
return channels
async def stop(self):
"""Stop the alert processor service"""
self.running = False
logger.info("Stopping Alert Processor Service")
try:
# Close RabbitMQ connection
if self.connection and not self.connection.is_closed:
await self.connection.close()
# Close Redis connection
if self.redis:
await self.redis.close()
logger.info("Alert Processor Service stopped")
except Exception as e:
logger.error("Error stopping service", error=str(e))
def get_metrics(self) -> Dict[str, Any]:
"""Get service metrics"""
return {
"items_processed": self.items_processed,
"items_stored": self.items_stored,
"notifications_sent": self.notifications_sent,
"errors_count": self.errors_count,
"running": self.running
}
async def main():
"""Main entry point"""
config = AlertProcessorConfig()
service = AlertProcessorService(config)
# Setup signal handlers for graceful shutdown
async def shutdown():
logger.info("Received shutdown signal")
await service.stop()
sys.exit(0)
# Register signal handlers
for sig in (signal.SIGTERM, signal.SIGINT):
signal.signal(sig, lambda s, f: asyncio.create_task(shutdown()))
try:
# Start the service
await service.start()
# Keep running
while service.running:
await asyncio.sleep(1)
except KeyboardInterrupt:
logger.info("Received keyboard interrupt")
except Exception as e:
logger.error("Service failed", error=str(e))
finally:
await service.stop()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,12 @@
fastapi==0.104.1
uvicorn[standard]==0.24.0
aio-pika==9.3.1
redis==5.0.1
asyncpg==0.29.0
sqlalchemy==2.0.23
structlog==23.2.0
prometheus-client==0.19.0
pydantic-settings==2.1.0
pydantic==2.5.2
httpx==0.25.2
python-jose[cryptography]==3.3.0

View File

@@ -1,129 +0,0 @@
# ================================================================
# services/auth/README.md
# ================================================================
# Authentication Service
Microservice for user authentication and authorization in the bakery forecasting platform.
## Features
- User registration and login
- JWT access and refresh tokens
- Password security validation
- Rate limiting and login attempt tracking
- Multi-tenant user management
- Session management
- Event publishing for user actions
## Quick Start
### Development
```bash
# Start dependencies
docker-compose up -d auth-db redis rabbitmq
# Install dependencies
pip install -r requirements.txt
# Run migrations
alembic upgrade head
# Start service
uvicorn app.main:app --reload --host 0.0.0.0 --port 8001
```
### With Docker
```bash
# Start everything
docker-compose up -d
# View logs
docker-compose logs -f auth-service
# Run tests
docker-compose exec auth-service pytest
```
## API Endpoints
### Authentication
- `POST /api/v1/auth/register` - Register new user
- `POST /api/v1/auth/login` - User login
- `POST /api/v1/auth/refresh` - Refresh access token
- `POST /api/v1/auth/verify` - Verify token
- `POST /api/v1/auth/logout` - Logout user
### User Management
- `GET /api/v1/users/me` - Get current user
- `PUT /api/v1/users/me` - Update current user
- `POST /api/v1/users/change-password` - Change password
### Health
- `GET /health` - Health check
- `GET /metrics` - Prometheus metrics
## Configuration
Set these environment variables:
```bash
DATABASE_URL=postgresql+asyncpg://auth_user:auth_pass123@auth-db:5432/auth_db
REDIS_URL=redis://redis:6379/0
RABBITMQ_URL=amqp://bakery:forecast123@rabbitmq:5672/
JWT_SECRET_KEY=your-super-secret-jwt-key-change-in-production
JWT_ACCESS_TOKEN_EXPIRE_MINUTES=30
JWT_REFRESH_TOKEN_EXPIRE_DAYS=7
MAX_LOGIN_ATTEMPTS=5
LOCKOUT_DURATION_MINUTES=30
```
## Testing
```bash
# Run all tests
pytest
# Run with coverage
pytest --cov=app
# Run specific test file
pytest tests/test_auth.py -v
```
## Database Migrations
```bash
# Create migration
alembic revision --autogenerate -m "description"
# Apply migrations
alembic upgrade head
# Rollback
alembic downgrade -1
```
## Monitoring
- Health endpoint: `/health`
- Metrics endpoint: `/metrics` (Prometheus format)
- Logs: Structured JSON logging
- Tracing: Request ID tracking
## Security Features
- Bcrypt password hashing
- JWT tokens with expiration
- Rate limiting on login attempts
- Account lockout protection
- IP and user agent tracking
- Token revocation support
## Events Published
- `user.registered` - When user registers
- `user.login` - When user logs in
- `user.logout` - When user logs out
- `user.password_changed` - When password changes

View File

@@ -1,169 +0,0 @@
================================================================
# Documentation: services/forecasting/README.md
# ================================================================
# Forecasting Service
AI-powered demand prediction service for bakery operations in Madrid, Spain.
## Overview
The Forecasting Service is a specialized microservice responsible for generating accurate demand predictions for bakery products. It integrates trained ML models with real-time weather and traffic data to provide actionable forecasts for business planning.
## Features
### Core Functionality
- **Single Product Forecasting**: Generate predictions for individual products
- **Batch Forecasting**: Process multiple products and time periods
- **Real-time Predictions**: On-demand forecasting with external data
- **Business Rules**: Spanish bakery-specific adjustments
- **Alert System**: Automated notifications for demand anomalies
### Integration Points
- **Training Service**: Loads trained Prophet models
- **Data Service**: Retrieves weather and traffic data
- **Notification Service**: Sends alerts and reports
- **Gateway Service**: Authentication and request routing
## API Endpoints
### Forecasts
- `POST /api/v1/forecasts/single` - Generate single forecast
- `POST /api/v1/forecasts/batch` - Generate batch forecasts
- `GET /api/v1/forecasts/list` - List historical forecasts
- `GET /api/v1/forecasts/alerts` - Get forecast alerts
- `PUT /api/v1/forecasts/alerts/{id}/acknowledge` - Acknowledge alert
### Predictions
- `POST /api/v1/predictions/realtime` - Real-time prediction
- `GET /api/v1/predictions/quick/{product}` - Quick multi-day forecast
## Business Logic
### Spanish Bakery Rules
- **Siesta Impact**: Reduced afternoon activity consideration
- **Weather Adjustments**: Rain reduces traffic, extreme temperatures affect product mix
- **Holiday Handling**: Spanish holiday calendar integration
- **Weekend Patterns**: Different demand patterns for weekends
### Business Types
- **Individual Bakery**: Single location with direct sales
- **Central Workshop**: Production facility supplying multiple locations
## Configuration
### Environment Variables
```bash
# Database
DATABASE_URL=postgresql+asyncpg://user:pass@host:port/db
# External Services
TRAINING_SERVICE_URL=http://training-service:8000
DATA_SERVICE_URL=http://data-service:8000
# Business Rules
WEEKEND_ADJUSTMENT_FACTOR=0.8
HOLIDAY_ADJUSTMENT_FACTOR=0.5
RAIN_IMPACT_FACTOR=0.7
```
### Performance Settings
```bash
MAX_FORECAST_DAYS=30
PREDICTION_CACHE_TTL_HOURS=6
FORECAST_BATCH_SIZE=100
```
## Development
### Setup
```bash
cd services/forecasting
pip install -r requirements.txt
```
### Testing
```bash
pytest tests/ -v --cov=app
```
### Running Locally
```bash
uvicorn app.main:app --reload --port 8000
```
## Deployment
### Docker
```bash
docker build -t forecasting-service .
docker run -p 8000:8000 forecasting-service
```
### Kubernetes
```bash
kubectl apply -f infrastructure/kubernetes/base/forecasting-service.yaml
```
## Monitoring
### Metrics
- `forecasts_generated_total` - Total forecasts generated
- `predictions_served_total` - Total predictions served
- `forecast_processing_time_seconds` - Processing time histogram
- `active_models_count` - Number of active models
### Health Checks
- `/health` - Service health status
- `/metrics` - Prometheus metrics endpoint
## Performance
### Benchmarks
- **Single Forecast**: < 2 seconds average
- **Batch Forecasting**: 100 products in < 30 seconds
- **Concurrent Load**: 95%+ success rate at 20 concurrent requests
### Optimization
- Model caching for faster predictions
- Feature preparation optimization
- Database query optimization
- Asynchronous external API calls
## Troubleshooting
### Common Issues
1. **No Model Found Error**
- Ensure training service has models for tenant/product
- Check model training logs in training service
2. **High Prediction Latency**
- Monitor model cache hit rate
- Check external service response times
- Review database query performance
3. **Inaccurate Predictions**
- Verify external data quality (weather/traffic)
- Check model performance metrics
- Review business rule configurations
### Logging
```bash
# View service logs
docker logs forecasting-service
# Debug level logging
LOG_LEVEL=DEBUG uvicorn app.main:app
```
## Contributing
1. Follow the existing code structure and patterns
2. Add tests for new functionality
3. Update documentation for API changes
4. Ensure performance benchmarks are maintained
## License
This service is part of the Bakery Forecasting Platform - MIT License

View File

@@ -14,6 +14,7 @@ import structlog
from app.core.config import settings
from app.core.database import init_db, close_db
from app.api import ingredients, stock, classification
from app.services.inventory_alert_service import InventoryAlertService
from shared.monitoring.health import router as health_router
from shared.monitoring.metrics import setup_metrics_early
# Auth decorators are used in endpoints, no global setup needed
@@ -32,6 +33,14 @@ async def lifespan(app: FastAPI):
await init_db()
logger.info("Database initialized successfully")
# Initialize alert service
alert_service = InventoryAlertService(settings)
await alert_service.start()
logger.info("Inventory alert service started")
# Store alert service in app state
app.state.alert_service = alert_service
# Setup metrics is already done early - no need to do it here
logger.info("Metrics setup completed")
@@ -44,6 +53,11 @@ async def lifespan(app: FastAPI):
# Shutdown
logger.info("Shutting down Inventory Service")
try:
# Stop alert service
if hasattr(app.state, 'alert_service'):
await app.state.alert_service.stop()
logger.info("Alert service stopped")
await close_db()
logger.info("Database connections closed")
except Exception as e:

View File

@@ -0,0 +1,710 @@
# services/inventory/app/services/inventory_alert_service.py
"""
Inventory-specific alert and recommendation detection service
Implements hybrid detection patterns for critical stock issues and optimization opportunities
"""
import asyncio
import json
from typing import List, Dict, Any, Optional
from uuid import UUID
from datetime import datetime, timedelta
import structlog
from apscheduler.triggers.cron import CronTrigger
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
from shared.alerts.templates import format_item_message
logger = structlog.get_logger()
class InventoryAlertService(BaseAlertService, AlertServiceMixin):
"""Inventory service alert and recommendation detection"""
def setup_scheduled_checks(self):
"""Inventory-specific scheduled checks for alerts and recommendations"""
# Critical stock checks - every 5 minutes (alerts)
self.scheduler.add_job(
self.check_stock_levels,
CronTrigger(minute='*/5'),
id='stock_levels',
misfire_grace_time=30,
max_instances=1
)
# Expiry checks - every 2 minutes (food safety critical, alerts)
self.scheduler.add_job(
self.check_expiring_products,
CronTrigger(minute='*/2'),
id='expiry_check',
misfire_grace_time=30,
max_instances=1
)
# Temperature checks - every 2 minutes (alerts)
self.scheduler.add_job(
self.check_temperature_breaches,
CronTrigger(minute='*/2'),
id='temperature_check',
misfire_grace_time=30,
max_instances=1
)
# Inventory optimization - every 30 minutes (recommendations)
self.scheduler.add_job(
self.generate_inventory_recommendations,
CronTrigger(minute='*/30'),
id='inventory_recs',
misfire_grace_time=120,
max_instances=1
)
# Waste reduction analysis - every hour (recommendations)
self.scheduler.add_job(
self.generate_waste_reduction_recommendations,
CronTrigger(minute='0'),
id='waste_reduction_recs',
misfire_grace_time=300,
max_instances=1
)
logger.info("Inventory alert schedules configured",
service=self.config.SERVICE_NAME)
async def check_stock_levels(self):
"""Batch check all stock levels for critical shortages (alerts)"""
try:
self._checks_performed += 1
query = """
WITH stock_analysis AS (
SELECT
i.*,
COALESCE(p.scheduled_quantity, 0) as tomorrow_needed,
COALESCE(s.avg_daily_usage, 0) as avg_daily_usage,
COALESCE(s.lead_time_days, 7) as lead_time_days,
CASE
WHEN i.current_stock < i.minimum_stock THEN 'critical'
WHEN i.current_stock < i.minimum_stock * 1.2 THEN 'low'
WHEN i.current_stock > i.maximum_stock THEN 'overstock'
ELSE 'normal'
END as status,
GREATEST(0, i.minimum_stock - i.current_stock) as shortage_amount
FROM inventory_items i
LEFT JOIN production_schedule p ON p.ingredient_id = i.id
AND p.date = CURRENT_DATE + INTERVAL '1 day'
LEFT JOIN supplier_items s ON s.ingredient_id = i.id
WHERE i.tenant_id = $1 AND i.active = true
)
SELECT * FROM stock_analysis WHERE status != 'normal'
ORDER BY
CASE status
WHEN 'critical' THEN 1
WHEN 'low' THEN 2
WHEN 'overstock' THEN 3
END,
shortage_amount DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
issues = result.fetchall()
for issue in issues:
await self._process_stock_issue(tenant_id, issue)
except Exception as e:
logger.error("Error checking stock for tenant",
tenant_id=str(tenant_id),
error=str(e))
logger.debug("Stock level check completed",
tenants_checked=len(tenants))
except Exception as e:
logger.error("Stock level check failed", error=str(e))
self._errors_count += 1
async def _process_stock_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
"""Process individual stock issue"""
try:
if issue['status'] == 'critical':
# Critical stock shortage - immediate alert
template_data = self.format_spanish_message(
'critical_stock_shortage',
ingredient_name=issue["name"],
current_stock=issue["current_stock"],
required_stock=issue["tomorrow_needed"] or issue["minimum_stock"],
shortage_amount=issue["shortage_amount"]
)
await self.publish_item(tenant_id, {
'type': 'critical_stock_shortage',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': str(issue['id']),
'current_stock': float(issue['current_stock']),
'minimum_stock': float(issue['minimum_stock']),
'shortage_amount': float(issue['shortage_amount']),
'tomorrow_needed': float(issue['tomorrow_needed'] or 0),
'lead_time_days': issue['lead_time_days']
}
}, item_type='alert')
elif issue['status'] == 'low':
# Low stock - high priority alert
template_data = self.format_spanish_message(
'critical_stock_shortage',
ingredient_name=issue["name"],
current_stock=issue["current_stock"],
required_stock=issue["minimum_stock"]
)
severity = self.get_business_hours_severity('high')
await self.publish_item(tenant_id, {
'type': 'low_stock_warning',
'severity': severity,
'title': f'⚠️ Stock Bajo: {issue["name"]}',
'message': f'Stock actual {issue["current_stock"]}kg, mínimo {issue["minimum_stock"]}kg. Considerar pedido pronto.',
'actions': ['Revisar consumo', 'Programar pedido', 'Contactar proveedor'],
'metadata': {
'ingredient_id': str(issue['id']),
'current_stock': float(issue['current_stock']),
'minimum_stock': float(issue['minimum_stock'])
}
}, item_type='alert')
elif issue['status'] == 'overstock':
# Overstock - medium priority alert
severity = self.get_business_hours_severity('medium')
await self.publish_item(tenant_id, {
'type': 'overstock_warning',
'severity': severity,
'title': f'📦 Exceso de Stock: {issue["name"]}',
'message': f'Stock actual {issue["current_stock"]}kg excede máximo {issue["maximum_stock"]}kg. Revisar para evitar caducidad.',
'actions': ['Revisar caducidades', 'Aumentar producción', 'Ofertas especiales', 'Ajustar pedidos'],
'metadata': {
'ingredient_id': str(issue['id']),
'current_stock': float(issue['current_stock']),
'maximum_stock': float(issue['maximum_stock'])
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing stock issue",
ingredient_id=str(issue.get('id')),
error=str(e))
async def check_expiring_products(self):
"""Check for products approaching expiry (alerts)"""
try:
self._checks_performed += 1
query = """
SELECT
i.id, i.name, i.current_stock, i.tenant_id,
b.id as batch_id, b.expiry_date, b.quantity,
EXTRACT(days FROM (b.expiry_date - CURRENT_DATE)) as days_to_expiry
FROM inventory_items i
JOIN inventory_batches b ON b.ingredient_id = i.id
WHERE b.expiry_date <= CURRENT_DATE + INTERVAL '7 days'
AND b.quantity > 0
AND b.status = 'active'
ORDER BY b.expiry_date ASC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
expiring_items = result.fetchall()
# Group by tenant
by_tenant = {}
for item in expiring_items:
tenant_id = item['tenant_id']
if tenant_id not in by_tenant:
by_tenant[tenant_id] = []
by_tenant[tenant_id].append(item)
for tenant_id, items in by_tenant.items():
await self._process_expiring_items(tenant_id, items)
except Exception as e:
logger.error("Expiry check failed", error=str(e))
self._errors_count += 1
async def _process_expiring_items(self, tenant_id: UUID, items: List[Dict[str, Any]]):
"""Process expiring items for a tenant"""
try:
# Group by urgency
expired = [i for i in items if i['days_to_expiry'] <= 0]
urgent = [i for i in items if 0 < i['days_to_expiry'] <= 2]
warning = [i for i in items if 2 < i['days_to_expiry'] <= 7]
# Process expired products (urgent alerts)
if expired:
product_count = len(expired)
product_names = [i['name'] for i in expired[:3]] # First 3 names
if len(expired) > 3:
product_names.append(f"y {len(expired) - 3} más")
template_data = self.format_spanish_message(
'expired_products',
product_count=product_count,
product_names=", ".join(product_names)
)
await self.publish_item(tenant_id, {
'type': 'expired_products',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'expired_items': [
{
'id': str(item['id']),
'name': item['name'],
'batch_id': str(item['batch_id']),
'quantity': float(item['quantity']),
'days_expired': abs(item['days_to_expiry'])
} for item in expired
]
}
}, item_type='alert')
# Process urgent expiry (high alerts)
if urgent:
for item in urgent:
await self.publish_item(tenant_id, {
'type': 'urgent_expiry',
'severity': 'high',
'title': f'⏰ Caducidad Urgente: {item["name"]}',
'message': f'{item["name"]} caduca en {item["days_to_expiry"]} día(s). Usar prioritariamente.',
'actions': ['Usar inmediatamente', 'Promoción especial', 'Revisar recetas', 'Documentar'],
'metadata': {
'ingredient_id': str(item['id']),
'batch_id': str(item['batch_id']),
'days_to_expiry': item['days_to_expiry'],
'quantity': float(item['quantity'])
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing expiring items",
tenant_id=str(tenant_id),
error=str(e))
async def check_temperature_breaches(self):
"""Check for temperature breaches (alerts)"""
try:
self._checks_performed += 1
query = """
SELECT
t.id, t.sensor_id, t.location, t.temperature,
t.max_threshold, t.tenant_id,
EXTRACT(minutes FROM (NOW() - t.first_breach_time)) as breach_duration_minutes
FROM temperature_readings t
WHERE t.temperature > t.max_threshold
AND t.breach_duration_minutes >= 30 -- Only after 30 minutes
AND t.last_alert_sent < NOW() - INTERVAL '15 minutes' -- Avoid spam
ORDER BY t.temperature DESC, t.breach_duration_minutes DESC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
breaches = result.fetchall()
for breach in breaches:
await self._process_temperature_breach(breach)
except Exception as e:
logger.error("Temperature check failed", error=str(e))
self._errors_count += 1
async def _process_temperature_breach(self, breach: Dict[str, Any]):
"""Process temperature breach"""
try:
# Determine severity based on duration and temperature
duration_minutes = breach['breach_duration_minutes']
temp_excess = breach['temperature'] - breach['max_threshold']
if duration_minutes > 120 or temp_excess > 10:
severity = 'urgent'
elif duration_minutes > 60 or temp_excess > 5:
severity = 'high'
else:
severity = 'medium'
template_data = self.format_spanish_message(
'temperature_breach',
location=breach['location'],
temperature=breach['temperature'],
duration=duration_minutes
)
await self.publish_item(breach['tenant_id'], {
'type': 'temperature_breach',
'severity': severity,
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'sensor_id': breach['sensor_id'],
'location': breach['location'],
'temperature': float(breach['temperature']),
'max_threshold': float(breach['max_threshold']),
'duration_minutes': duration_minutes,
'temperature_excess': temp_excess
}
}, item_type='alert')
# Update last alert sent time to avoid spam
await self.db_manager.execute(
"UPDATE temperature_readings SET last_alert_sent = NOW() WHERE id = $1",
breach['id']
)
except Exception as e:
logger.error("Error processing temperature breach",
sensor_id=breach.get('sensor_id'),
error=str(e))
async def generate_inventory_recommendations(self):
"""Generate optimization recommendations based on usage patterns"""
try:
self._checks_performed += 1
# Analyze stock levels vs usage patterns
query = """
WITH usage_analysis AS (
SELECT
i.id, i.name, i.tenant_id, i.minimum_stock, i.maximum_stock,
i.current_stock,
AVG(sm.quantity) FILTER (WHERE sm.movement_type = 'out'
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as avg_daily_usage,
COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out'
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as usage_days,
MAX(sm.created_at) FILTER (WHERE sm.movement_type = 'out') as last_used
FROM inventory_items i
LEFT JOIN stock_movements sm ON sm.ingredient_id = i.id
WHERE i.active = true AND i.tenant_id = $1
GROUP BY i.id
HAVING COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out'
AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') >= 5
),
recommendations AS (
SELECT *,
CASE
WHEN avg_daily_usage * 7 > maximum_stock THEN 'increase_max'
WHEN avg_daily_usage * 3 < minimum_stock THEN 'decrease_min'
WHEN current_stock / NULLIF(avg_daily_usage, 0) > 14 THEN 'reduce_stock'
WHEN avg_daily_usage > 0 AND minimum_stock / avg_daily_usage < 3 THEN 'increase_min'
ELSE null
END as recommendation_type
FROM usage_analysis
WHERE avg_daily_usage > 0
)
SELECT * FROM recommendations WHERE recommendation_type IS NOT NULL
ORDER BY avg_daily_usage DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
recommendations = result.fetchall()
for rec in recommendations:
await self._generate_stock_recommendation(tenant_id, rec)
except Exception as e:
logger.error("Error generating recommendations for tenant",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Inventory recommendations failed", error=str(e))
self._errors_count += 1
async def _generate_stock_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
"""Generate specific stock recommendation"""
try:
if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
return
rec_type = rec['recommendation_type']
if rec_type == 'increase_max':
suggested_max = rec['avg_daily_usage'] * 10 # 10 days supply
template_data = self.format_spanish_message(
'inventory_optimization',
ingredient_name=rec['name'],
period=30,
suggested_increase=suggested_max - rec['maximum_stock']
)
await self.publish_item(tenant_id, {
'type': 'inventory_optimization',
'severity': 'medium',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': str(rec['id']),
'current_max': float(rec['maximum_stock']),
'suggested_max': float(suggested_max),
'avg_daily_usage': float(rec['avg_daily_usage']),
'recommendation_type': rec_type
}
}, item_type='recommendation')
elif rec_type == 'decrease_min':
suggested_min = rec['avg_daily_usage'] * 3 # 3 days safety stock
await self.publish_item(tenant_id, {
'type': 'inventory_optimization',
'severity': 'low',
'title': f'📉 Optimización de Stock Mínimo: {rec["name"]}',
'message': f'Uso promedio sugiere reducir stock mínimo de {rec["minimum_stock"]}kg a {suggested_min:.1f}kg.',
'actions': ['Revisar niveles mínimos', 'Analizar tendencias', 'Ajustar configuración'],
'metadata': {
'ingredient_id': str(rec['id']),
'current_min': float(rec['minimum_stock']),
'suggested_min': float(suggested_min),
'avg_daily_usage': float(rec['avg_daily_usage']),
'recommendation_type': rec_type
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error generating stock recommendation",
ingredient_id=str(rec.get('id')),
error=str(e))
async def generate_waste_reduction_recommendations(self):
"""Generate waste reduction recommendations"""
try:
# Analyze waste patterns
query = """
SELECT
i.id, i.name, i.tenant_id,
SUM(w.quantity) as total_waste_30d,
COUNT(w.id) as waste_incidents,
AVG(w.quantity) as avg_waste_per_incident,
w.waste_reason
FROM inventory_items i
JOIN waste_logs w ON w.ingredient_id = i.id
WHERE w.created_at > CURRENT_DATE - INTERVAL '30 days'
AND i.tenant_id = $1
GROUP BY i.id, w.waste_reason
HAVING SUM(w.quantity) > 5 -- More than 5kg wasted
ORDER BY total_waste_30d DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
waste_data = result.fetchall()
for waste in waste_data:
await self._generate_waste_recommendation(tenant_id, waste)
except Exception as e:
logger.error("Error generating waste recommendations",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Waste reduction recommendations failed", error=str(e))
self._errors_count += 1
async def _generate_waste_recommendation(self, tenant_id: UUID, waste: Dict[str, Any]):
"""Generate waste reduction recommendation"""
try:
waste_percentage = (waste['total_waste_30d'] / (waste['total_waste_30d'] + 100)) * 100 # Simplified calculation
template_data = self.format_spanish_message(
'waste_reduction',
product=waste['name'],
waste_reduction_percent=waste_percentage
)
await self.publish_item(tenant_id, {
'type': 'waste_reduction',
'severity': 'low',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': str(waste['id']),
'total_waste_30d': float(waste['total_waste_30d']),
'waste_incidents': waste['waste_incidents'],
'waste_reason': waste['waste_reason'],
'estimated_reduction_percent': waste_percentage
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error generating waste recommendation",
ingredient_id=str(waste.get('id')),
error=str(e))
async def register_db_listeners(self, conn):
"""Register inventory-specific database listeners"""
try:
await conn.add_listener('stock_alerts', self.handle_stock_db_alert)
await conn.add_listener('temperature_alerts', self.handle_temperature_db_alert)
logger.info("Database listeners registered",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to register database listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_stock_db_alert(self, connection, pid, channel, payload):
"""Handle stock alert from database trigger"""
try:
data = json.loads(payload)
tenant_id = UUID(data['tenant_id'])
template_data = self.format_spanish_message(
'critical_stock_shortage',
ingredient_name=data['name'],
current_stock=data['current_stock'],
required_stock=data['minimum_stock']
)
await self.publish_item(tenant_id, {
'type': 'critical_stock_shortage',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'ingredient_id': data['ingredient_id'],
'current_stock': data['current_stock'],
'minimum_stock': data['minimum_stock'],
'trigger_source': 'database'
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling stock DB alert", error=str(e))
async def handle_temperature_db_alert(self, connection, pid, channel, payload):
"""Handle temperature alert from database trigger"""
try:
data = json.loads(payload)
tenant_id = UUID(data['tenant_id'])
template_data = self.format_spanish_message(
'temperature_breach',
location=data['location'],
temperature=data['temperature'],
duration=data['duration']
)
await self.publish_item(tenant_id, {
'type': 'temperature_breach',
'severity': 'high',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'sensor_id': data['sensor_id'],
'location': data['location'],
'temperature': data['temperature'],
'duration': data['duration'],
'trigger_source': 'database'
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling temperature DB alert", error=str(e))
async def start_event_listener(self):
"""Listen for inventory-affecting events"""
try:
# Subscribe to order events that might affect inventory
await self.rabbitmq_client.consume_events(
"bakery_events",
f"inventory.orders.{self.config.SERVICE_NAME}",
"orders.placed",
self.handle_order_placed
)
logger.info("Event listeners started",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to start event listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_order_placed(self, message):
"""Check if order critically affects stock"""
try:
order = json.loads(message.body)
tenant_id = UUID(order['tenant_id'])
for item in order.get('items', []):
# Check stock impact
stock_info = await self.get_stock_after_order(item['ingredient_id'], item['quantity'])
if stock_info and stock_info['remaining'] < stock_info['minimum_stock']:
await self.publish_item(tenant_id, {
'type': 'stock_depleted_by_order',
'severity': 'high',
'title': f'⚠️ Pedido Agota Stock: {stock_info["name"]}',
'message': f'Pedido #{order["id"]} dejará stock en {stock_info["remaining"]}kg (mínimo {stock_info["minimum_stock"]}kg)',
'actions': ['Revisar pedido', 'Contactar proveedor', 'Ajustar producción', 'Usar stock reserva'],
'metadata': {
'order_id': order['id'],
'ingredient_id': item['ingredient_id'],
'order_quantity': item['quantity'],
'remaining_stock': stock_info['remaining'],
'minimum_stock': stock_info['minimum_stock']
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling order placed event", error=str(e))
async def get_stock_after_order(self, ingredient_id: str, order_quantity: float) -> Optional[Dict[str, Any]]:
"""Get stock information after hypothetical order"""
try:
query = """
SELECT id, name, current_stock, minimum_stock,
(current_stock - $2) as remaining
FROM inventory_items
WHERE id = $1
"""
result = await self.db_manager.fetchrow(query, ingredient_id, order_quantity)
return dict(result) if result else None
except Exception as e:
logger.error("Error getting stock after order",
ingredient_id=ingredient_id,
error=str(e))
return None

View File

@@ -30,8 +30,12 @@ passlib[bcrypt]==1.7.4
structlog==23.2.0
prometheus-client==0.19.0
# Message queues
# Message queues and Redis
aio-pika==9.3.1
redis>=4.0.0
# Scheduling
APScheduler==3.10.4
# Additional for inventory management
python-barcode==0.15.1

View File

@@ -1,321 +0,0 @@
## 🎯 **Complete Notification Service Implementation**
### **📁 File Structure Created**
```
services/notification/
├── app/
│ ├── main.py ✅ Complete FastAPI application
│ ├── core/
│ │ ├── config.py ✅ Configuration settings
│ │ └── database.py ✅ Database initialization
│ ├── models/
│ │ ├── notifications.py ✅ Core notification models
│ │ └── templates.py ✅ Template-specific models
│ ├── schemas/
│ │ └── notifications.py ✅ Pydantic schemas
│ ├── services/
│ │ ├── notification_service.py ✅ Main business logic
│ │ ├── email_service.py ✅ Email delivery
│ │ ├── whatsapp_service.py ✅ WhatsApp delivery
│ │ └── messaging.py ✅ RabbitMQ integration
│ └── api/
│ └── notifications.py ✅ Complete API routes
├── requirements.txt ✅ Python dependencies
├── Dockerfile ✅ Container configuration
└── .env.example ✅ Environment variables
```
### **🔧 Key Features Implemented**
#### **1. Complete Business Logic**
-**NotificationService**: Core orchestration of all notification operations
-**Multi-channel support**: Email, WhatsApp, Push (extensible)
-**Template processing**: Jinja2-based template rendering
-**Bulk notifications**: Batch processing with rate limiting
-**User preferences**: Granular notification controls
-**Scheduling**: Delayed notification delivery
#### **2. Email Service Integration**
-**SMTP support**: Configurable email providers (Gmail, SendGrid, etc
-**HTML + Text emails**: Rich email templates with fallbacks
-**Bulk email processing**: Rate-limited batch sending
-**Template system**: Pre-built Spanish templates for bakeries
-**Health checks**: SMTP connection monitoring
-**Attachment support**: File attachment capabilities
#### **3. WhatsApp Service Integration**
-**Twilio integration**: WhatsApp Business API support
-**Spanish phone formatting**: Automatic +34 country code handling
-**Template messages**: WhatsApp Business template support
-**Bulk WhatsApp**: Rate-limited batch messaging
-**Delivery status**: Webhook handling for delivery confirmations
#### **4. Database Models & Schemas**
-**Complete data model**: Notifications, templates, preferences, logs
-**Multi-tenant support**: Tenant-scoped notifications
-**Audit trail**: Detailed delivery attempt logging
-**Template management**: System and custom templates
-**User preferences**: Granular notification controls
#### **5. API Integration with Gateway**
-**Gateway authentication**: Uses shared auth decorators
-**Tenant isolation**: Automatic tenant scoping
-**Role-based access**: Admin/manager/user permissions
-**Complete CRUD**: Full notification management API
-**Webhook endpoints**: External delivery status handling
#### **6. RabbitMQ Event Integration**
-**Event consumers**: Listens for user registration, forecasts, training
-**Event publishers**: Publishes notification status events
-**Auto-notifications**: Triggers welcome emails, alerts, reports
-**Error handling**: Robust message processing with retry logic
#### **7. Spanish Bakery Templates**
-**Welcome email**: Professional onboarding email
-**Forecast alerts**: Demand variation notifications
-**Weekly reports**: Performance summary emails
-**Responsive HTML**: Mobile-optimized email designs
-**Spanish localization**: All content in Spanish
### **🚀 Integration with Your Architecture**
#### **Seamless Gateway Integration**
```python
# Gateway already routes to notification service
app.include_router(notification.router, prefix="/api/v1/notifications", tags=["notifications"])
# Authentication handled by gateway middleware
# Tenant isolation automatic
# User context passed via headers
```
#### **Shared Library Usage**
```python
# Uses your existing shared components
from shared.auth.decorators import get_current_user_dep, get_current_tenant_id_dep
from shared.messaging.rabbitmq import RabbitMQClient
from shared.monitoring.metrics import MetricsCollector
from shared.database.base import DatabaseManager
```
#### **Event-Driven Architecture**
```python
# Automatic notifications triggered by:
# - User registration → Welcome email
# - Forecast alerts → Alert emails + WhatsApp
# - Training completion → Status notifications
# - Data imports → Import confirmations
```
### **📊 Production Features**
#### **Health Monitoring**
-**Database health checks**: Connection monitoring
-**SMTP health checks**: Email service validation
-**WhatsApp health checks**: API connectivity tests
-**Prometheus metrics**: Delivery rates, response times
-**Structured logging**: Comprehensive error tracking
#### **Rate Limiting & Scaling**
-**Email rate limits**: 1000/hour configurable
-**WhatsApp rate limits**: 100/hour (Twilio limits)
-**Batch processing**: Configurable batch sizes
-**Retry logic**: Automatic retry with exponential backoff
-**Queue management**: Background task processing
#### **Security & Compliance**
-**User consent**: Preference-based opt-in/out
-**Tenant isolation**: Multi-tenant data separation
-**GDPR compliance**: User data control
-**Rate limiting**: DoS protection
-**Input validation**: Pydantic schema validation
### **🎯 Business-Specific Features**
#### **Bakery Use Cases**
```python
# Forecast alerts when demand varies >20%
# Daily production recommendations
# Weekly performance reports
# Stock shortage notifications
# Weather impact alerts
# Holiday/event notifications
```
#### **Spanish Localization**
-**Spanish templates**: Native Spanish content
-**Madrid timezone**: Europe/Madrid default
-**Spanish phone format**: +34 prefix handling
-**Local business hours**: Quiet hours support
-**Cultural context**: Bakery-specific terminology
### **🔄 How to Deploy**
#### **1. Add to Docker Compose**
```yaml
# Already integrated in your docker-compose.yml
notification-service:
build: ./services/notification
ports:
- "8006:8000"
environment:
- DATABASE_URL=postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db
depends_on:
- notification-db
- redis
- rabbitmq
```
#### **2. Environment Setup**
```bash
# Copy environment template
cp services/notification/.env.example services/notification/.env
# Configure email provider
SMTP_USER=your-email@gmail.com
SMTP_PASSWORD=your-app-password
# Configure WhatsApp (optional)
WHATSAPP_API_KEY=your-twilio-sid:your-twilio-token
```
#### **3. Start Service**
```bash
# Service starts automatically with
docker-compose up -d
# Check health
curl http://localhost:8006/health
# View API docs
open http://localhost:8006/docs
```
### **📈 API Usage Examples**
#### **Send Welcome Email**
```python
POST /api/v1/notifications/send
{
"type": "email",
"recipient_email": "usuario@panaderia.com",
"template_id": "welcome_email",
"template_data": {
"user_name": "Juan Carlos",
"dashboard_url": "https://app.bakeryforecast.es/dashboard"
}
}
```
#### **Send Forecast Alert**
```python
POST /api/v1/notifications/send
{
"type": "email",
"template_id": "forecast_alert_email",
"template_data": {
"bakery_name": "Panadería San Miguel",
"product_name": "Pan integral",
"forecast_date": "2025-01-25",
"predicted_demand": 120,
"variation_percentage": 35,
"alert_message": "Aumento significativo esperado. Se recomienda incrementar producción."
},
"broadcast": true,
"priority": "high"
}
```
#### **Update User Preferences**
```python
PATCH /api/v1/notifications/preferences
{
"email_alerts": true,
"whatsapp_enabled": false,
"quiet_hours_start": "22:00",
"quiet_hours_end": "08:00",
"language": "es"
}
```
### **🎉 Key Benefits**
#### **✅ Production Ready**
- Complete error handling and logging
- Health checks and monitoring
- Rate limiting and security
- Multi-tenant architecture
- Scalable event-driven design
#### **✅ Business Focused**
- Spanish bakery templates
- Madrid timezone/localization
- Forecast-specific notifications
- Professional email designs
- WhatsApp support for urgent alerts
#### **✅ Developer Friendly**
- Comprehensive API documentation
- Type-safe Pydantic schemas
- Async/await throughout
- Structured logging
- Easy testing and debugging
#### **✅ Seamless Integration**
- Uses your shared libraries
- Integrates with gateway auth
- Follows your architectural patterns
- Maintains tenant isolation
- Publishes events to RabbitMQ
### **🚀 Next Steps**
#### **Immediate (Week 2)**
1. **Deploy the service**: Add to your docker-compose and start
2. **Configure SMTP**: Set up email provider credentials
3. **Test integration**: Send test notifications via API
4. **Event integration**: Verify RabbitMQ event handling
#### **Production Optimization**
1. **Email provider**: Consider SendGrid/Mailgun for production
2. **WhatsApp setup**: Configure Twilio Business API
3. **Template customization**: Add tenant-specific templates
4. **Analytics dashboard**: Add notification analytics to frontend
### **💡 Advanced Features Ready for Extension**
-**Push notifications**: Framework ready for mobile push
-**SMS support**: Easy to add SMS providers
-**A/B testing**: Template variant testing
-**Scheduled campaigns**: Marketing email campaigns
-**Analytics integration**: Detailed delivery analytics
**This notification service is now a complete, production-ready microservice that fully integrates with your bakery forecasting platform! It handles all notification needs from welcome emails to urgent forecast alerts, with proper Spanish localization and bakery-specific templates.** 🎯

View File

@@ -0,0 +1,189 @@
# services/notification/app/api/sse_routes.py
"""
SSE routes for real-time alert and recommendation streaming
"""
import asyncio
import json
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Request, Depends, HTTPException, BackgroundTasks
from sse_starlette.sse import EventSourceResponse
import structlog
from shared.auth.decorators import get_current_user
router = APIRouter(prefix="/sse", tags=["sse"])
logger = structlog.get_logger()
@router.get("/alerts/stream/{tenant_id}")
async def stream_alerts(
tenant_id: str,
request: Request,
background_tasks: BackgroundTasks,
current_user = Depends(get_current_user)
):
"""
SSE endpoint for real-time alert and recommendation streaming
Supports both alerts and recommendations through unified stream
"""
# Verify user has access to this tenant
if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
raise HTTPException(403, "Access denied to this tenant")
# Get SSE service from app state
sse_service = getattr(request.app.state, 'sse_service', None)
if not sse_service:
raise HTTPException(500, "SSE service not available")
async def event_generator():
"""Generate SSE events for the client"""
client_queue = asyncio.Queue(maxsize=100) # Limit queue size
try:
# Register client
await sse_service.add_client(tenant_id, client_queue)
logger.info("SSE client connected",
tenant_id=tenant_id,
user_id=getattr(current_user, 'id', 'unknown'))
# Stream events
while True:
# Check if client disconnected
if await request.is_disconnected():
logger.info("SSE client disconnected", tenant_id=tenant_id)
break
try:
# Wait for events with timeout for keepalive
event = await asyncio.wait_for(
client_queue.get(),
timeout=30.0
)
yield event
except asyncio.TimeoutError:
# Send keepalive ping
yield {
"event": "ping",
"data": json.dumps({
"timestamp": datetime.utcnow().isoformat(),
"status": "keepalive"
}),
"id": f"ping_{int(datetime.now().timestamp())}"
}
except Exception as e:
logger.error("Error in SSE event generator",
tenant_id=tenant_id,
error=str(e))
break
except Exception as e:
logger.error("SSE connection error",
tenant_id=tenant_id,
error=str(e))
finally:
# Clean up on disconnect
try:
await sse_service.remove_client(tenant_id, client_queue)
logger.info("SSE client cleanup completed", tenant_id=tenant_id)
except Exception as e:
logger.error("Error cleaning up SSE client",
tenant_id=tenant_id,
error=str(e))
return EventSourceResponse(
event_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # Disable nginx buffering
}
)
@router.post("/items/{item_id}/acknowledge")
async def acknowledge_item(
item_id: str,
current_user = Depends(get_current_user)
):
"""Acknowledge an alert or recommendation"""
try:
# This would update the database
# For now, just return success
logger.info("Item acknowledged",
item_id=item_id,
user_id=getattr(current_user, 'id', 'unknown'))
return {
"status": "success",
"item_id": item_id,
"acknowledged_by": getattr(current_user, 'id', 'unknown'),
"acknowledged_at": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error("Failed to acknowledge item", item_id=item_id, error=str(e))
raise HTTPException(500, "Failed to acknowledge item")
@router.post("/items/{item_id}/resolve")
async def resolve_item(
item_id: str,
current_user = Depends(get_current_user)
):
"""Resolve an alert or recommendation"""
try:
# This would update the database
# For now, just return success
logger.info("Item resolved",
item_id=item_id,
user_id=getattr(current_user, 'id', 'unknown'))
return {
"status": "success",
"item_id": item_id,
"resolved_by": getattr(current_user, 'id', 'unknown'),
"resolved_at": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error("Failed to resolve item", item_id=item_id, error=str(e))
raise HTTPException(500, "Failed to resolve item")
@router.get("/status/{tenant_id}")
async def get_sse_status(
tenant_id: str,
current_user = Depends(get_current_user)
):
"""Get SSE connection status for a tenant"""
# Verify user has access to this tenant
if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
raise HTTPException(403, "Access denied to this tenant")
try:
# Get SSE service from app state
sse_service = getattr(request.app.state, 'sse_service', None)
if not sse_service:
return {"status": "unavailable", "message": "SSE service not initialized"}
metrics = sse_service.get_metrics()
tenant_connections = len(sse_service.active_connections.get(tenant_id, set()))
return {
"status": "available",
"tenant_id": tenant_id,
"connections": tenant_connections,
"total_connections": metrics["total_connections"],
"active_tenants": metrics["active_tenants"]
}
except Exception as e:
logger.error("Failed to get SSE status", tenant_id=tenant_id, error=str(e))
raise HTTPException(500, "Failed to get SSE status")

View File

@@ -1,9 +1,9 @@
# ================================================================
# services/notification/app/main.py - COMPLETE IMPLEMENTATION
# services/notification/app/main.py - ENHANCED WITH SSE SUPPORT
# ================================================================
"""
Notification Service Main Application
Handles email and WhatsApp notifications with full integration
Handles email, WhatsApp notifications and SSE for real-time alerts/recommendations
"""
import structlog
@@ -15,7 +15,12 @@ from fastapi.responses import JSONResponse
from app.core.config import settings
from app.core.database import init_db
from app.api.notifications import router as notification_router
from app.api.sse_routes import router as sse_router
from app.services.messaging import setup_messaging, cleanup_messaging
from app.services.sse_service import SSEService
from app.services.notification_orchestrator import NotificationOrchestrator
from app.services.email_service import EmailService
from app.services.whatsapp_service import WhatsAppService
from shared.monitoring import setup_logging, HealthChecker
from shared.monitoring.metrics import setup_metrics_early
@@ -30,8 +35,8 @@ health_checker = None
# Create FastAPI app FIRST
app = FastAPI(
title="Bakery Notification Service",
description="Email and WhatsApp notification service for bakery forecasting platform",
version="1.0.0",
description="Email, WhatsApp and SSE notification service for bakery alerts and recommendations",
version="2.0.0",
docs_url="/docs",
redoc_url="/redoc"
)
@@ -56,12 +61,36 @@ async def lifespan(app: FastAPI):
await setup_messaging()
logger.info("Messaging initialized")
# Initialize services
email_service = EmailService()
whatsapp_service = WhatsAppService()
# Initialize SSE service
sse_service = SSEService(settings.REDIS_URL)
await sse_service.initialize()
logger.info("SSE service initialized")
# Create orchestrator
orchestrator = NotificationOrchestrator(
email_service=email_service,
whatsapp_service=whatsapp_service,
sse_service=sse_service
)
# Store services in app state
app.state.orchestrator = orchestrator
app.state.sse_service = sse_service
app.state.email_service = email_service
app.state.whatsapp_service = whatsapp_service
# Register custom metrics (metrics_collector already exists)
metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status"])
metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status", "channel"])
metrics_collector.register_counter("emails_sent_total", "Total emails sent", labels=["status"])
metrics_collector.register_counter("whatsapp_sent_total", "Total WhatsApp messages sent", labels=["status"])
metrics_collector.register_counter("sse_events_sent_total", "Total SSE events sent", labels=["tenant", "event_type"])
metrics_collector.register_histogram("notification_processing_duration_seconds", "Time spent processing notifications")
metrics_collector.register_gauge("notification_queue_size", "Current notification queue size")
metrics_collector.register_gauge("sse_active_connections", "Number of active SSE connections")
# Setup health checker
health_checker = HealthChecker("notification-service")
@@ -93,14 +122,22 @@ async def lifespan(app: FastAPI):
# Add WhatsApp service health check
async def check_whatsapp_service():
try:
from app.services.whatsapp_service import WhatsAppService
whatsapp_service = WhatsAppService()
return await whatsapp_service.health_check()
except Exception as e:
return f"WhatsApp service error: {e}"
health_checker.add_check("whatsapp_service", check_whatsapp_service, timeout=10.0, critical=False)
# Add SSE service health check
async def check_sse_service():
try:
metrics = sse_service.get_metrics()
return "healthy" if metrics["redis_connected"] else "Redis connection failed"
except Exception as e:
return f"SSE service error: {e}"
health_checker.add_check("sse_service", check_sse_service, timeout=5.0, critical=True)
# Add messaging health check
def check_messaging():
try:
@@ -115,7 +152,7 @@ async def lifespan(app: FastAPI):
# Store health checker in app state
app.state.health_checker = health_checker
logger.info("Notification Service started successfully")
logger.info("Notification Service with SSE support started successfully")
except Exception as e:
logger.error(f"Failed to start Notification Service: {e}")
@@ -126,10 +163,15 @@ async def lifespan(app: FastAPI):
# Shutdown
logger.info("Shutting down Notification Service...")
try:
# Shutdown SSE service
if hasattr(app.state, 'sse_service'):
await app.state.sse_service.shutdown()
logger.info("SSE service shutdown completed")
await cleanup_messaging()
logger.info("Messaging cleanup completed")
except Exception as e:
logger.error(f"Error during messaging cleanup: {e}")
logger.error(f"Error during shutdown: {e}")
# Set lifespan AFTER metrics setup
app.router.lifespan_context = lifespan
@@ -145,18 +187,30 @@ app.add_middleware(
# Include routers
app.include_router(notification_router, prefix="/api/v1", tags=["notifications"])
app.include_router(sse_router, prefix="/api/v1", tags=["sse"])
# Health check endpoint
@app.get("/health")
async def health_check():
"""Comprehensive health check endpoint"""
"""Comprehensive health check endpoint including SSE"""
if health_checker:
return await health_checker.check_health()
health_result = await health_checker.check_health()
# Add SSE metrics to health check
if hasattr(app.state, 'sse_service'):
try:
sse_metrics = app.state.sse_service.get_metrics()
health_result['sse_metrics'] = sse_metrics
except Exception as e:
health_result['sse_error'] = str(e)
return health_result
else:
return {
"service": "notification-service",
"status": "healthy",
"version": "1.0.0"
"version": "2.0.0",
"features": ["email", "whatsapp", "sse", "alerts", "recommendations"]
}
# Metrics endpoint

View File

@@ -276,14 +276,26 @@ class EmailService:
# Test SMTP connection
if self.smtp_ssl:
# Use implicit TLS/SSL connection (port 465 typically)
server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port, use_tls=True)
await server.connect()
# No need for starttls() when using implicit TLS
else:
# Use plain connection, optionally upgrade with STARTTLS
server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port)
await server.connect()
if self.smtp_tls:
await server.starttls()
await server.connect()
if self.smtp_tls:
# Try STARTTLS, but handle case where connection is already secure
try:
await server.starttls()
except Exception as starttls_error:
# If STARTTLS fails because connection is already using TLS, that's okay
if "already using TLS" in str(starttls_error) or "already secure" in str(starttls_error):
logger.debug("SMTP connection already secure, skipping STARTTLS")
else:
# Re-raise other STARTTLS errors
raise starttls_error
await server.login(self.smtp_user, self.smtp_password)
await server.quit()

View File

@@ -0,0 +1,279 @@
# services/notification/app/services/notification_orchestrator.py
"""
Notification orchestrator for managing delivery across all channels
Includes SSE integration for real-time dashboard updates
"""
from typing import List, Dict, Any
from datetime import datetime
import structlog
from .email_service import EmailService
from .whatsapp_service import WhatsAppService
from .sse_service import SSEService
logger = structlog.get_logger()
class NotificationOrchestrator:
"""
Orchestrates delivery across all notification channels
Now includes SSE for real-time dashboard updates, with support for recommendations
"""
def __init__(
self,
email_service: EmailService,
whatsapp_service: WhatsAppService,
sse_service: SSEService,
push_service=None # Optional push service
):
self.email_service = email_service
self.whatsapp_service = whatsapp_service
self.sse_service = sse_service
self.push_service = push_service
async def send_notification(
self,
tenant_id: str,
notification: Dict[str, Any],
channels: List[str]
) -> Dict[str, Any]:
"""
Send notification through specified channels
Channels can include: email, whatsapp, push, dashboard (SSE)
"""
results = {}
# Always send to dashboard for visibility (SSE)
if 'dashboard' in channels or notification.get('type') in ['alert', 'recommendation']:
try:
await self.sse_service.send_item_notification(
tenant_id,
notification
)
results['dashboard'] = {'status': 'sent', 'timestamp': datetime.utcnow().isoformat()}
logger.info("Item sent to dashboard via SSE",
tenant_id=tenant_id,
item_type=notification.get('type'),
item_id=notification.get('id'))
except Exception as e:
logger.error("Failed to send to dashboard",
tenant_id=tenant_id,
error=str(e))
results['dashboard'] = {'status': 'failed', 'error': str(e)}
# Send to email channel
if 'email' in channels:
try:
email_result = await self.email_service.send_notification_email(
to_email=notification.get('email'),
subject=notification.get('title'),
template_data={
'title': notification.get('title'),
'message': notification.get('message'),
'severity': notification.get('severity'),
'item_type': notification.get('type'),
'actions': notification.get('actions', []),
'metadata': notification.get('metadata', {}),
'timestamp': datetime.utcnow().isoformat()
},
notification_type=notification.get('type', 'alert')
)
results['email'] = email_result
except Exception as e:
logger.error("Failed to send email",
tenant_id=tenant_id,
error=str(e))
results['email'] = {'status': 'failed', 'error': str(e)}
# Send to WhatsApp channel
if 'whatsapp' in channels:
try:
whatsapp_result = await self.whatsapp_service.send_notification_message(
to_phone=notification.get('phone'),
message=self._format_whatsapp_message(notification),
notification_type=notification.get('type', 'alert')
)
results['whatsapp'] = whatsapp_result
except Exception as e:
logger.error("Failed to send WhatsApp",
tenant_id=tenant_id,
error=str(e))
results['whatsapp'] = {'status': 'failed', 'error': str(e)}
# Send to push notification channel
if 'push' in channels and self.push_service:
try:
push_result = await self.push_service.send_notification(
user_id=notification.get('user_id'),
title=notification.get('title'),
body=notification.get('message'),
data={
'item_type': notification.get('type'),
'severity': notification.get('severity'),
'item_id': notification.get('id'),
'metadata': notification.get('metadata', {})
}
)
results['push'] = push_result
except Exception as e:
logger.error("Failed to send push notification",
tenant_id=tenant_id,
error=str(e))
results['push'] = {'status': 'failed', 'error': str(e)}
# Log summary
successful_channels = [ch for ch, result in results.items() if result.get('status') == 'sent']
failed_channels = [ch for ch, result in results.items() if result.get('status') == 'failed']
logger.info("Notification delivery completed",
tenant_id=tenant_id,
item_type=notification.get('type'),
item_id=notification.get('id'),
successful_channels=successful_channels,
failed_channels=failed_channels,
total_channels=len(channels))
return {
'status': 'completed',
'successful_channels': successful_channels,
'failed_channels': failed_channels,
'results': results,
'timestamp': datetime.utcnow().isoformat()
}
def _format_whatsapp_message(self, notification: Dict[str, Any]) -> str:
"""Format message for WhatsApp with emojis and structure"""
item_type = notification.get('type', 'alert')
severity = notification.get('severity', 'medium')
# Get appropriate emoji
type_emoji = '🚨' if item_type == 'alert' else '💡'
severity_emoji = {
'urgent': '🔴',
'high': '🟡',
'medium': '🔵',
'low': '🟢'
}.get(severity, '🔵')
message = f"{type_emoji} {severity_emoji} *{notification.get('title', 'Notificación')}*\n\n"
message += f"{notification.get('message', '')}\n"
# Add actions if available
actions = notification.get('actions', [])
if actions and len(actions) > 0:
message += "\n*Acciones sugeridas:*\n"
for i, action in enumerate(actions[:3], 1): # Limit to 3 actions for WhatsApp
message += f"{i}. {action}\n"
# Add timestamp
message += f"\n_Enviado: {datetime.now().strftime('%H:%M, %d/%m/%Y')}_"
return message
def get_channels_by_severity(self, severity: str, item_type: str, hour: int = None) -> List[str]:
"""
Determine notification channels based on severity and item_type
Now includes 'dashboard' as a channel
"""
if hour is None:
hour = datetime.now().hour
# Dashboard always gets all items
channels = ['dashboard']
if item_type == 'alert':
if severity == 'urgent':
# Urgent alerts: All channels immediately
channels.extend(['email', 'whatsapp', 'push'])
elif severity == 'high':
# High alerts: Email and WhatsApp during extended hours
if 6 <= hour <= 22:
channels.extend(['email', 'whatsapp'])
else:
channels.append('email') # Email only during night
elif severity == 'medium':
# Medium alerts: Email during business hours
if 7 <= hour <= 20:
channels.append('email')
elif item_type == 'recommendation':
# Recommendations: Generally less urgent, respect business hours
if severity in ['medium', 'high']:
if 8 <= hour <= 19: # Stricter business hours for recommendations
channels.append('email')
# Low/urgent: Dashboard only (urgent rare for recommendations)
return channels
async def health_check(self) -> Dict[str, Any]:
"""Check health of all notification channels"""
health_status = {
'status': 'healthy',
'channels': {},
'timestamp': datetime.utcnow().isoformat()
}
# Check email service
try:
email_health = await self.email_service.health_check()
health_status['channels']['email'] = email_health
except Exception as e:
health_status['channels']['email'] = {'status': 'unhealthy', 'error': str(e)}
# Check WhatsApp service
try:
whatsapp_health = await self.whatsapp_service.health_check()
health_status['channels']['whatsapp'] = whatsapp_health
except Exception as e:
health_status['channels']['whatsapp'] = {'status': 'unhealthy', 'error': str(e)}
# Check SSE service
try:
sse_metrics = self.sse_service.get_metrics()
sse_status = 'healthy' if sse_metrics['redis_connected'] else 'unhealthy'
health_status['channels']['sse'] = {
'status': sse_status,
'metrics': sse_metrics
}
except Exception as e:
health_status['channels']['sse'] = {'status': 'unhealthy', 'error': str(e)}
# Check push service if available
if self.push_service:
try:
push_health = await self.push_service.health_check()
health_status['channels']['push'] = push_health
except Exception as e:
health_status['channels']['push'] = {'status': 'unhealthy', 'error': str(e)}
# Determine overall status
unhealthy_channels = [
ch for ch, status in health_status['channels'].items()
if status.get('status') != 'healthy'
]
if unhealthy_channels:
health_status['status'] = 'degraded' if len(unhealthy_channels) < len(health_status['channels']) else 'unhealthy'
health_status['unhealthy_channels'] = unhealthy_channels
return health_status
def get_metrics(self) -> Dict[str, Any]:
"""Get aggregated metrics from all services"""
metrics = {
'timestamp': datetime.utcnow().isoformat(),
'channels': {}
}
# Get SSE metrics
try:
metrics['channels']['sse'] = self.sse_service.get_metrics()
except Exception as e:
logger.error("Failed to get SSE metrics", error=str(e))
# Additional metrics could be added here for other services
return metrics

View File

@@ -0,0 +1,256 @@
# services/notification/app/services/sse_service.py
"""
Server-Sent Events service for real-time notifications
Integrated within the notification service for alerts and recommendations
"""
import asyncio
from redis.asyncio import Redis
import json
from typing import Dict, Set, Any
from datetime import datetime
import structlog
logger = structlog.get_logger()
class SSEService:
"""
Server-Sent Events service for real-time notifications
Handles both alerts and recommendations through unified SSE streams
"""
def __init__(self, redis_url: str):
self.redis_url = redis_url
self.redis = None
self.active_connections: Dict[str, Set[asyncio.Queue]] = {}
self.pubsub_tasks: Dict[str, asyncio.Task] = {}
async def initialize(self):
"""Initialize Redis connection"""
try:
self.redis = Redis.from_url(self.redis_url)
logger.info("SSE Service initialized with Redis connection")
except Exception as e:
logger.error("Failed to initialize SSE service", error=str(e))
raise
async def shutdown(self):
"""Clean shutdown"""
try:
# Cancel all pubsub tasks
for task in self.pubsub_tasks.values():
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
# Close all client connections
for tenant_id, connections in self.active_connections.items():
for queue in connections.copy():
try:
await queue.put({"event": "shutdown", "data": json.dumps({"status": "server_shutdown"})})
except:
pass
# Close Redis connection
if self.redis:
await self.redis.close()
logger.info("SSE Service shutdown completed")
except Exception as e:
logger.error("Error during SSE shutdown", error=str(e))
async def add_client(self, tenant_id: str, client_queue: asyncio.Queue):
"""Add a new SSE client connection"""
try:
if tenant_id not in self.active_connections:
self.active_connections[tenant_id] = set()
# Start pubsub listener for this tenant if not exists
if tenant_id not in self.pubsub_tasks:
task = asyncio.create_task(self._listen_to_tenant_channel(tenant_id))
self.pubsub_tasks[tenant_id] = task
self.active_connections[tenant_id].add(client_queue)
client_count = len(self.active_connections[tenant_id])
logger.info("SSE client added",
tenant_id=tenant_id,
total_clients=client_count)
# Send connection confirmation
await client_queue.put({
"event": "connected",
"data": json.dumps({
"status": "connected",
"tenant_id": tenant_id,
"timestamp": datetime.utcnow().isoformat(),
"client_count": client_count
})
})
# Send any active items (alerts and recommendations)
active_items = await self.get_active_items(tenant_id)
if active_items:
await client_queue.put({
"event": "initial_items",
"data": json.dumps(active_items)
})
except Exception as e:
logger.error("Error adding SSE client", tenant_id=tenant_id, error=str(e))
async def remove_client(self, tenant_id: str, client_queue: asyncio.Queue):
"""Remove SSE client connection"""
try:
if tenant_id in self.active_connections:
self.active_connections[tenant_id].discard(client_queue)
# If no more clients for this tenant, stop the pubsub listener
if not self.active_connections[tenant_id]:
del self.active_connections[tenant_id]
if tenant_id in self.pubsub_tasks:
task = self.pubsub_tasks[tenant_id]
if not task.done():
task.cancel()
del self.pubsub_tasks[tenant_id]
logger.info("SSE client removed", tenant_id=tenant_id)
except Exception as e:
logger.error("Error removing SSE client", tenant_id=tenant_id, error=str(e))
async def _listen_to_tenant_channel(self, tenant_id: str):
"""Listen to Redis channel for tenant-specific items"""
try:
# Create a separate Redis connection for pubsub
pubsub_redis = Redis.from_url(self.redis_url)
pubsub = pubsub_redis.pubsub()
channel = f"alerts:{tenant_id}"
await pubsub.subscribe(channel)
logger.info("Started listening to tenant channel",
tenant_id=tenant_id,
channel=channel)
async for message in pubsub.listen():
if message["type"] == "message":
# Broadcast to all connected clients for this tenant
await self.broadcast_to_tenant(tenant_id, message["data"])
except asyncio.CancelledError:
logger.info("Stopped listening to tenant channel", tenant_id=tenant_id)
except Exception as e:
logger.error("Error in pubsub listener", tenant_id=tenant_id, error=str(e))
finally:
try:
await pubsub.unsubscribe(channel)
await pubsub_redis.close()
except:
pass
async def broadcast_to_tenant(self, tenant_id: str, message: str):
"""Broadcast message to all connected clients of a tenant"""
if tenant_id not in self.active_connections:
return
try:
item_data = json.loads(message)
event = {
"event": item_data.get('item_type', 'item'), # 'alert' or 'recommendation'
"data": json.dumps(item_data),
"id": item_data.get("id")
}
# Send to all connected clients
disconnected = []
for client_queue in self.active_connections[tenant_id]:
try:
# Use put_nowait to avoid blocking
client_queue.put_nowait(event)
except asyncio.QueueFull:
logger.warning("Client queue full, dropping message", tenant_id=tenant_id)
disconnected.append(client_queue)
except Exception as e:
logger.warning("Failed to send to client", tenant_id=tenant_id, error=str(e))
disconnected.append(client_queue)
# Clean up disconnected clients
for queue in disconnected:
await self.remove_client(tenant_id, queue)
if disconnected:
logger.info("Cleaned up disconnected clients",
tenant_id=tenant_id,
count=len(disconnected))
except Exception as e:
logger.error("Error broadcasting to tenant", tenant_id=tenant_id, error=str(e))
async def send_item_notification(self, tenant_id: str, item: Dict[str, Any]):
"""
Send alert or recommendation via SSE (called by notification orchestrator)
"""
try:
# Publish to Redis for SSE streaming
channel = f"alerts:{tenant_id}"
item_message = {
'id': item.get('id'),
'item_type': item.get('type'), # 'alert' or 'recommendation'
'type': item.get('alert_type', item.get('type')),
'severity': item.get('severity'),
'title': item.get('title'),
'message': item.get('message'),
'actions': item.get('actions', []),
'metadata': item.get('metadata', {}),
'timestamp': item.get('timestamp', datetime.utcnow().isoformat()),
'status': 'active'
}
await self.redis.publish(channel, json.dumps(item_message))
logger.info("Item published to SSE",
tenant_id=tenant_id,
item_type=item.get('type'),
item_id=item.get('id'))
except Exception as e:
logger.error("Error sending item notification via SSE",
tenant_id=tenant_id,
error=str(e))
async def get_active_items(self, tenant_id: str) -> list:
"""Fetch active alerts and recommendations from database"""
try:
# This would integrate with the actual database
# For now, return empty list as placeholder
# In real implementation, this would query the alerts table
# Example query:
# query = """
# SELECT id, item_type, alert_type, severity, title, message,
# actions, metadata, created_at, status
# FROM alerts
# WHERE tenant_id = $1
# AND status = 'active'
# ORDER BY severity_weight DESC, created_at DESC
# LIMIT 50
# """
return [] # Placeholder
except Exception as e:
logger.error("Error fetching active items", tenant_id=tenant_id, error=str(e))
return []
def get_metrics(self) -> Dict[str, Any]:
"""Get SSE service metrics"""
return {
"active_tenants": len(self.active_connections),
"total_connections": sum(len(connections) for connections in self.active_connections.values()),
"active_listeners": len(self.pubsub_tasks),
"redis_connected": self.redis and not self.redis.closed
}

View File

@@ -30,6 +30,17 @@ class WhatsAppService:
self.from_number = settings.WHATSAPP_FROM_NUMBER
self.enabled = settings.ENABLE_WHATSAPP_NOTIFICATIONS
def _parse_api_credentials(self):
"""Parse API key into username and password for Twilio basic auth"""
if not self.api_key or ":" not in self.api_key:
raise ValueError("WhatsApp API key must be in format 'username:password'")
api_parts = self.api_key.split(":", 1)
if len(api_parts) != 2:
raise ValueError("Invalid WhatsApp API key format")
return api_parts[0], api_parts[1]
async def send_message(
self,
to_phone: str,
@@ -181,10 +192,22 @@ class WhatsAppService:
return False
# Test API connectivity with a simple request
# Parse API key (expected format: username:password for Twilio basic auth)
if ":" not in self.api_key:
logger.error("WhatsApp API key must be in format 'username:password'")
return False
api_parts = self.api_key.split(":", 1) # Split on first : only
if len(api_parts) != 2:
logger.error("Invalid WhatsApp API key format")
return False
username, password = api_parts
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.base_url}/v1/Account", # Twilio account info endpoint
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
auth=(username, password)
)
if response.status_code == 200:
@@ -206,6 +229,13 @@ class WhatsAppService:
async def _send_text_message(self, to_phone: str, message: str) -> bool:
"""Send regular text message via Twilio"""
try:
# Parse API credentials
try:
username, password = self._parse_api_credentials()
except ValueError as e:
logger.error(f"WhatsApp API key configuration error: {e}")
return False
# Prepare request data
data = {
"From": f"whatsapp:{self.from_number}",
@@ -216,9 +246,9 @@ class WhatsAppService:
# Send via Twilio API
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
data=data,
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
auth=(username, password)
)
if response.status_code == 201:
@@ -245,6 +275,13 @@ class WhatsAppService:
) -> bool:
"""Send WhatsApp template message via Twilio"""
try:
# Parse API credentials
try:
username, password = self._parse_api_credentials()
except ValueError as e:
logger.error(f"WhatsApp API key configuration error: {e}")
return False
# Prepare template data
content_variables = {str(i+1): param for i, param in enumerate(parameters)}
@@ -258,9 +295,9 @@ class WhatsAppService:
# Send via Twilio API
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
data=data,
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
auth=(username, password)
)
if response.status_code == 201:
@@ -315,10 +352,17 @@ class WhatsAppService:
async def _get_message_status(self, message_sid: str) -> Optional[str]:
"""Get message delivery status from Twilio"""
try:
# Parse API credentials
try:
username, password = self._parse_api_credentials()
except ValueError as e:
logger.error(f"WhatsApp API key configuration error: {e}")
return None
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(
f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages/{message_sid}.json",
auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
f"{self.base_url}/2010-04-01/Accounts/{username}/Messages/{message_sid}.json",
auth=(username, password)
)
if response.status_code == 200:

View File

@@ -3,6 +3,7 @@ fastapi==0.104.1
uvicorn[standard]==0.24.0
pydantic==2.5.0
pydantic-settings==2.1.0
sse-starlette==1.6.5
# Database
sqlalchemy==2.0.23
@@ -22,8 +23,9 @@ aiofiles==23.2.1
aiosmtplib==3.0.1
email-validator==2.1.0
# Messaging
# Messaging & Redis
aio-pika==9.3.1
redis==5.0.1
# Template Engine
jinja2==3.1.2

View File

@@ -1,248 +0,0 @@
# Orders Service
Customer orders and procurement planning service for the bakery management system.
## Overview
The Orders Service handles all order-related operations including:
- **Customer Management**: Complete customer lifecycle and relationship management
- **Order Processing**: End-to-end order management from creation to fulfillment
- **Procurement Planning**: Automated procurement requirement calculation and planning
- **Business Intelligence**: Order pattern analysis and business model detection
- **Dashboard Analytics**: Comprehensive reporting and metrics for order operations
## Features
### Core Capabilities
- Customer registration and management with detailed profiles
- Order creation, tracking, and status management
- Automated demand requirements calculation for production planning
- Procurement planning with supplier coordination
- Business model detection (individual bakery vs central bakery)
- Comprehensive dashboard with real-time metrics
- Integration with production, inventory, suppliers, and sales services
### API Endpoints
#### Dashboard & Analytics
- `GET /api/v1/tenants/{tenant_id}/orders/dashboard-summary` - Comprehensive dashboard data
- `GET /api/v1/tenants/{tenant_id}/orders/demand-requirements` - Demand analysis for production
- `GET /api/v1/tenants/{tenant_id}/orders/business-model` - Business model detection
#### Order Management
- `POST /api/v1/tenants/{tenant_id}/orders` - Create new customer order
- `GET /api/v1/tenants/{tenant_id}/orders` - List orders with filtering and pagination
- `GET /api/v1/tenants/{tenant_id}/orders/{order_id}` - Get order details with items
- `PUT /api/v1/tenants/{tenant_id}/orders/{order_id}/status` - Update order status
#### Customer Management
- `POST /api/v1/tenants/{tenant_id}/customers` - Create new customer
- `GET /api/v1/tenants/{tenant_id}/customers` - List customers with filtering
- `GET /api/v1/tenants/{tenant_id}/customers/{customer_id}` - Get customer details
#### Health & Status
- `GET /api/v1/tenants/{tenant_id}/orders/status` - Service status information
## Service Integration
### Shared Clients Used
- **InventoryServiceClient**: Stock levels, product availability validation
- **ProductionServiceClient**: Production notifications, capacity planning
- **SalesServiceClient**: Historical sales data for demand forecasting
- **NotificationServiceClient**: Customer notifications and alerts
### Authentication
Uses shared authentication patterns with tenant isolation:
- JWT token validation
- Tenant access verification
- User permission checks
## Configuration
Key configuration options in `app/core/config.py`:
### Order Processing
- `ORDER_PROCESSING_ENABLED`: Enable automatic order processing (default: true)
- `AUTO_APPROVE_ORDERS`: Automatically approve orders (default: false)
- `MAX_ORDER_ITEMS`: Maximum items per order (default: 50)
### Procurement Planning
- `PROCUREMENT_PLANNING_ENABLED`: Enable procurement planning (default: true)
- `PROCUREMENT_LEAD_TIME_DAYS`: Standard procurement lead time (default: 3)
- `DEMAND_FORECAST_DAYS`: Days for demand forecasting (default: 14)
- `SAFETY_STOCK_PERCENTAGE`: Safety stock buffer (default: 20%)
### Business Model Detection
- `ENABLE_BUSINESS_MODEL_DETECTION`: Enable automatic detection (default: true)
- `CENTRAL_BAKERY_ORDER_THRESHOLD`: Order threshold for central bakery (default: 20)
- `INDIVIDUAL_BAKERY_ORDER_THRESHOLD`: Order threshold for individual bakery (default: 5)
### Customer Management
- `CUSTOMER_VALIDATION_ENABLED`: Enable customer validation (default: true)
- `MAX_CUSTOMERS_PER_TENANT`: Maximum customers per tenant (default: 10000)
- `CUSTOMER_CREDIT_CHECK_ENABLED`: Enable credit checking (default: false)
### Order Validation
- `MIN_ORDER_VALUE`: Minimum order value (default: 0.0)
- `MAX_ORDER_VALUE`: Maximum order value (default: 100000.0)
- `VALIDATE_PRODUCT_AVAILABILITY`: Check product availability (default: true)
### Alert Thresholds
- `HIGH_VALUE_ORDER_THRESHOLD`: High-value order alert (default: 5000.0)
- `LARGE_QUANTITY_ORDER_THRESHOLD`: Large quantity alert (default: 100)
- `RUSH_ORDER_HOURS_THRESHOLD`: Rush order time threshold (default: 24)
- `PROCUREMENT_SHORTAGE_THRESHOLD`: Procurement shortage alert (default: 90%)
### Payment and Pricing
- `PAYMENT_VALIDATION_ENABLED`: Enable payment validation (default: true)
- `DYNAMIC_PRICING_ENABLED`: Enable dynamic pricing (default: false)
- `DISCOUNT_ENABLED`: Enable discounts (default: true)
- `MAX_DISCOUNT_PERCENTAGE`: Maximum discount allowed (default: 50%)
### Delivery and Fulfillment
- `DELIVERY_TRACKING_ENABLED`: Enable delivery tracking (default: true)
- `DEFAULT_DELIVERY_WINDOW_HOURS`: Default delivery window (default: 48)
- `PICKUP_ENABLED`: Enable pickup orders (default: true)
- `DELIVERY_ENABLED`: Enable delivery orders (default: true)
## Database Models
### Customer
- Complete customer profile with contact information
- Business type classification (individual, business, central_bakery)
- Payment terms and credit management
- Order history and metrics tracking
- Delivery preferences and special requirements
### CustomerOrder
- Comprehensive order tracking from creation to delivery
- Status management with full audit trail
- Financial calculations including discounts and taxes
- Delivery scheduling and fulfillment tracking
- Business model detection and categorization
- Customer communication preferences
### OrderItem
- Detailed line item tracking with product specifications
- Customization and special instruction support
- Production requirement integration
- Cost tracking and margin analysis
- Quality control integration
### OrderStatusHistory
- Complete audit trail of order status changes
- Event tracking with detailed context
- User attribution and change reasons
- Customer notification tracking
### ProcurementPlan
- Master procurement planning with business model context
- Supplier diversification and risk assessment
- Performance tracking and cost analysis
- Integration with demand forecasting
### ProcurementRequirement
- Detailed procurement requirements per product/ingredient
- Current inventory level integration
- Supplier preference and lead time management
- Quality specifications and special requirements
### OrderAlert
- Comprehensive alert system for order issues
- Multiple severity levels with appropriate routing
- Business impact assessment
- Resolution tracking and performance metrics
## Business Logic
### Order Processing Flow
1. **Order Creation**: Validate customer, calculate totals, create order record
2. **Item Processing**: Create order items with specifications and requirements
3. **Status Tracking**: Maintain complete audit trail of status changes
4. **Customer Metrics**: Update customer statistics and relationship data
5. **Business Model Detection**: Analyze patterns to determine bakery type
6. **Alert Generation**: Check for high-value, rush, or large orders
7. **Service Integration**: Notify production and inventory services
### Procurement Planning
1. **Demand Analysis**: Aggregate orders by delivery date and products
2. **Inventory Integration**: Check current stock levels and reservations
3. **Requirement Calculation**: Calculate net procurement needs with safety buffer
4. **Supplier Coordination**: Match requirements with preferred suppliers
5. **Lead Time Planning**: Account for supplier lead times and delivery windows
6. **Risk Assessment**: Evaluate supply risks and backup options
### Business Model Detection
- **Individual Bakery**: Low order volume, direct customer sales, standard products
- **Central Bakery**: High volume, wholesale operations, bulk orders
- **Detection Factors**: Order frequency, quantity, customer types, sales channels
## Alert System
### Alert Types
- **High Value Orders**: Orders exceeding configured thresholds
- **Rush Orders**: Orders with tight delivery requirements
- **Large Quantity Orders**: Orders with unusually high item counts
- **Payment Issues**: Payment validation failures or credit problems
- **Procurement Shortages**: Insufficient inventory for order fulfillment
- **Customer Issues**: New customers, credit limit exceedances, special requirements
### Severity Levels
- **Critical**: WhatsApp + Email + Dashboard + SMS
- **High**: WhatsApp + Email + Dashboard
- **Medium**: Email + Dashboard
- **Low**: Dashboard only
## Development
### Setup
```bash
# Install dependencies
pip install -r requirements.txt
# Set up database
# Configure ORDERS_DATABASE_URL environment variable
# Run migrations
alembic upgrade head
# Start service
uvicorn app.main:app --reload
```
### Testing
```bash
# Run tests
pytest
# Run with coverage
pytest --cov=app
```
### Docker
```bash
# Build image
docker build -t orders-service .
# Run container
docker run -p 8000:8000 orders-service
```
## Deployment
The service is designed for containerized deployment with:
- Health checks at `/health`
- Structured logging
- Metrics collection
- Database migrations
- Service discovery integration
## Architecture
Follows Domain-Driven Microservices Architecture:
- Clean separation of concerns
- Repository pattern for data access
- Service layer for business logic
- API layer for external interface
- Shared infrastructure for cross-cutting concerns

View File

@@ -5,7 +5,7 @@
Orders Service Database Configuration
"""
from sqlalchemy import create_engine
from sqlalchemy import create_engine, text
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
from sqlalchemy.orm import sessionmaker, DeclarativeBase
import structlog
@@ -72,7 +72,7 @@ async def get_db_health() -> bool:
"""Check database health"""
try:
async with async_engine.begin() as conn:
await conn.execute("SELECT 1")
await conn.execute(text("SELECT 1"))
return True
except Exception as e:
logger.error("Database health check failed", error=str(e))

View File

@@ -1,138 +0,0 @@
# POS Integration Service
This service handles integration with external Point of Sale (POS) systems for the Bakery IA platform.
## Supported POS Systems
- **Square POS** - Popular payment and POS solution with strong API support
- **Toast POS** - Restaurant-focused POS system with comprehensive features
- **Lightspeed Restaurant** - Full-featured restaurant management system
## Features
- **Real-time webhook handling** from POS systems
- **Bidirectional data synchronization** with sales service
- **Secure credential management** with encryption
- **Multi-tenant support** with tenant-specific configurations
- **Comprehensive transaction logging** and audit trails
- **Automatic duplicate detection** and handling
- **Rate limiting and retry mechanisms** for reliability
## Architecture
The POS service follows the established microservices architecture:
```
POS Service
├── API Layer (FastAPI)
├── Business Logic (Services)
├── Data Access (Repositories)
├── External Integrations (POS Providers)
├── Webhook Handlers
└── Background Sync Jobs
```
## API Endpoints
### Configuration Management
- `GET /api/v1/tenants/{tenant_id}/pos/configurations` - List POS configurations
- `POST /api/v1/tenants/{tenant_id}/pos/configurations` - Create new configuration
- `PUT /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Update configuration
- `DELETE /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Delete configuration
### Webhook Handling
- `POST /api/v1/webhooks/{pos_system}` - Receive webhooks from POS systems
- `GET /api/v1/webhooks/{pos_system}/status` - Get webhook status
### Data Synchronization
- `POST /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync` - Trigger sync
- `GET /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync/status` - Get sync status
- `GET /api/v1/tenants/{tenant_id}/pos/transactions` - Get POS transactions
## Database Schema
### Core Tables
- `pos_configurations` - POS system configurations per tenant
- `pos_transactions` - Transaction data from POS systems
- `pos_transaction_items` - Individual items within transactions
- `pos_webhook_logs` - Webhook event logs
- `pos_sync_logs` - Synchronization operation logs
## Environment Variables
See `app/core/config.py` for all configuration options. Key variables include:
```bash
# Database
POS_DATABASE_URL=postgresql+asyncpg://pos_user:pos_pass123@pos-db:5432/pos_db
# POS Provider Credentials
SQUARE_APPLICATION_ID=your_square_app_id
SQUARE_ACCESS_TOKEN=your_square_token
TOAST_CLIENT_ID=your_toast_client_id
LIGHTSPEED_CLIENT_ID=your_lightspeed_client_id
# Webhook Configuration
WEBHOOK_BASE_URL=https://your-domain.com
WEBHOOK_SECRET=your_webhook_secret
```
## Development
### Running the Service
```bash
# Using Docker Compose (recommended)
docker-compose up pos-service
# Local development
cd services/pos
pip install -r requirements.txt
uvicorn app.main:app --reload --port 8000
```
### Database Migrations
```bash
# Create migration
alembic revision --autogenerate -m "Description"
# Apply migrations
alembic upgrade head
```
### Testing
```bash
# Run tests
pytest tests/
# Run with coverage
pytest --cov=app tests/
```
## Security Considerations
- POS credentials are encrypted before storage
- Webhook signatures are verified for authenticity
- All API endpoints require tenant-based authentication
- Rate limiting prevents abuse
- Sensitive data is logged with appropriate redaction
## Monitoring
The service includes comprehensive monitoring:
- Health check endpoints
- Prometheus metrics
- Structured logging
- Performance tracking
- Error rate monitoring
## Integration Flow
1. **Configuration**: Set up POS system credentials via API
2. **Webhook Registration**: Register webhook URLs with POS providers
3. **Real-time Events**: Receive and process webhook events
4. **Data Sync**: Periodic synchronization of transaction data
5. **Sales Integration**: Forward processed data to sales service

View File

@@ -1,187 +0,0 @@
# Production Service
Production planning and batch management service for the bakery management system.
## Overview
The Production Service handles all production-related operations including:
- **Production Planning**: Calculate daily requirements using demand forecasts and inventory levels
- **Batch Management**: Track production batches from start to finish
- **Capacity Management**: Equipment, staff, and time scheduling
- **Quality Control**: Yield tracking, waste management, efficiency metrics
- **Alert System**: Comprehensive monitoring and notifications
## Features
### Core Capabilities
- Daily production requirements calculation
- Production batch lifecycle management
- Real-time capacity planning and utilization
- Quality control tracking and metrics
- Comprehensive alert system with multiple severity levels
- Integration with inventory, orders, recipes, and sales services
### API Endpoints
#### Dashboard & Planning
- `GET /api/v1/tenants/{tenant_id}/production/dashboard-summary` - Production dashboard data
- `GET /api/v1/tenants/{tenant_id}/production/daily-requirements` - Daily production planning
- `GET /api/v1/tenants/{tenant_id}/production/requirements` - Requirements for procurement
#### Batch Management
- `POST /api/v1/tenants/{tenant_id}/production/batches` - Create production batch
- `GET /api/v1/tenants/{tenant_id}/production/batches/active` - Get active batches
- `GET /api/v1/tenants/{tenant_id}/production/batches/{batch_id}` - Get batch details
- `PUT /api/v1/tenants/{tenant_id}/production/batches/{batch_id}/status` - Update batch status
#### Scheduling & Capacity
- `GET /api/v1/tenants/{tenant_id}/production/schedule` - Production schedule
- `GET /api/v1/tenants/{tenant_id}/production/capacity/status` - Capacity status
#### Alerts & Monitoring
- `GET /api/v1/tenants/{tenant_id}/production/alerts` - Production alerts
- `POST /api/v1/tenants/{tenant_id}/production/alerts/{alert_id}/acknowledge` - Acknowledge alerts
#### Analytics
- `GET /api/v1/tenants/{tenant_id}/production/metrics/yield` - Yield metrics
## Service Integration
### Shared Clients Used
- **InventoryServiceClient**: Stock levels, ingredient availability
- **OrdersServiceClient**: Demand requirements, customer orders
- **RecipesServiceClient**: Recipe requirements, ingredient calculations
- **SalesServiceClient**: Historical sales data
- **NotificationServiceClient**: Alert notifications
### Authentication
Uses shared authentication patterns with tenant isolation:
- JWT token validation
- Tenant access verification
- User permission checks
## Configuration
Key configuration options in `app/core/config.py`:
### Production Planning
- `PLANNING_HORIZON_DAYS`: Days ahead for planning (default: 7)
- `PRODUCTION_BUFFER_PERCENTAGE`: Safety buffer for production (default: 10%)
- `MINIMUM_BATCH_SIZE`: Minimum batch size (default: 1.0)
- `MAXIMUM_BATCH_SIZE`: Maximum batch size (default: 100.0)
### Capacity Management
- `DEFAULT_WORKING_HOURS_PER_DAY`: Standard working hours (default: 12)
- `MAX_OVERTIME_HOURS`: Maximum overtime allowed (default: 4)
- `CAPACITY_UTILIZATION_TARGET`: Target utilization (default: 85%)
### Quality Control
- `MINIMUM_YIELD_PERCENTAGE`: Minimum acceptable yield (default: 85%)
- `QUALITY_SCORE_THRESHOLD`: Minimum quality score (default: 8.0)
### Alert Thresholds
- `CAPACITY_EXCEEDED_THRESHOLD`: Capacity alert threshold (default: 100%)
- `PRODUCTION_DELAY_THRESHOLD_MINUTES`: Delay alert threshold (default: 60)
- `LOW_YIELD_ALERT_THRESHOLD`: Low yield alert (default: 80%)
## Database Models
### ProductionBatch
- Complete batch tracking from planning to completion
- Status management (pending, in_progress, completed, etc.)
- Cost tracking and yield calculations
- Quality metrics integration
### ProductionSchedule
- Daily production scheduling
- Capacity planning and tracking
- Staff and equipment assignments
- Performance metrics
### ProductionCapacity
- Resource availability tracking
- Equipment and staff capacity
- Maintenance scheduling
- Utilization monitoring
### QualityCheck
- Quality control measurements
- Pass/fail tracking
- Defect recording
- Corrective action management
### ProductionAlert
- Comprehensive alert system
- Multiple severity levels
- Action recommendations
- Resolution tracking
## Alert System
### Alert Types
- **Capacity Exceeded**: When production requirements exceed available capacity
- **Production Delay**: When batches are delayed beyond thresholds
- **Cost Spike**: When production costs exceed normal ranges
- **Low Yield**: When yield percentages fall below targets
- **Quality Issues**: When quality scores consistently decline
- **Equipment Maintenance**: When equipment needs maintenance
### Severity Levels
- **Critical**: WhatsApp + Email + Dashboard + SMS
- **High**: WhatsApp + Email + Dashboard
- **Medium**: Email + Dashboard
- **Low**: Dashboard only
## Development
### Setup
```bash
# Install dependencies
pip install -r requirements.txt
# Set up database
# Configure DATABASE_URL environment variable
# Run migrations
alembic upgrade head
# Start service
uvicorn app.main:app --reload
```
### Testing
```bash
# Run tests
pytest
# Run with coverage
pytest --cov=app
```
### Docker
```bash
# Build image
docker build -t production-service .
# Run container
docker run -p 8000:8000 production-service
```
## Deployment
The service is designed for containerized deployment with:
- Health checks at `/health`
- Structured logging
- Metrics collection
- Database migrations
- Service discovery integration
## Architecture
Follows Domain-Driven Microservices Architecture:
- Clean separation of concerns
- Repository pattern for data access
- Service layer for business logic
- API layer for external interface
- Shared infrastructure for cross-cutting concerns

View File

@@ -14,6 +14,7 @@ import structlog
from app.core.config import settings
from app.core.database import init_database, get_db_health
from app.api.production import router as production_router
from app.services.production_alert_service import ProductionAlertService
# Configure logging
logger = structlog.get_logger()
@@ -25,6 +26,16 @@ async def lifespan(app: FastAPI):
# Startup
try:
await init_database()
logger.info("Database initialized")
# Initialize alert service
alert_service = ProductionAlertService(settings)
await alert_service.start()
logger.info("Production alert service started")
# Store alert service in app state
app.state.alert_service = alert_service
logger.info("Production service started successfully")
except Exception as e:
logger.error("Failed to initialize production service", error=str(e))
@@ -34,6 +45,13 @@ async def lifespan(app: FastAPI):
# Shutdown
logger.info("Production service shutting down")
try:
# Stop alert service
if hasattr(app.state, 'alert_service'):
await app.state.alert_service.stop()
logger.info("Alert service stopped")
except Exception as e:
logger.error("Error during shutdown", error=str(e))
# Create FastAPI application

View File

@@ -0,0 +1,795 @@
# services/production/app/services/production_alert_service.py
"""
Production-specific alert and recommendation detection service
Monitors production capacity, delays, quality issues, and optimization opportunities
"""
import json
from typing import List, Dict, Any, Optional
from uuid import UUID
from datetime import datetime, timedelta
import structlog
from apscheduler.triggers.cron import CronTrigger
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
from shared.alerts.templates import format_item_message
logger = structlog.get_logger()
class ProductionAlertService(BaseAlertService, AlertServiceMixin):
"""Production service alert and recommendation detection"""
def setup_scheduled_checks(self):
"""Production-specific scheduled checks for alerts and recommendations"""
# Production capacity checks - every 10 minutes during business hours (alerts)
self.scheduler.add_job(
self.check_production_capacity,
CronTrigger(minute='*/10', hour='6-20'),
id='capacity_check',
misfire_grace_time=60,
max_instances=1
)
# Production delays - every 5 minutes during production hours (alerts)
self.scheduler.add_job(
self.check_production_delays,
CronTrigger(minute='*/5', hour='4-22'),
id='delay_check',
misfire_grace_time=30,
max_instances=1
)
# Quality issues check - every 15 minutes (alerts)
self.scheduler.add_job(
self.check_quality_issues,
CronTrigger(minute='*/15'),
id='quality_check',
misfire_grace_time=60,
max_instances=1
)
# Equipment monitoring - every 3 minutes (alerts)
self.scheduler.add_job(
self.check_equipment_status,
CronTrigger(minute='*/3'),
id='equipment_check',
misfire_grace_time=30,
max_instances=1
)
# Efficiency recommendations - every 30 minutes (recommendations)
self.scheduler.add_job(
self.generate_efficiency_recommendations,
CronTrigger(minute='*/30'),
id='efficiency_recs',
misfire_grace_time=120,
max_instances=1
)
# Energy optimization - every hour (recommendations)
self.scheduler.add_job(
self.generate_energy_recommendations,
CronTrigger(minute='0'),
id='energy_recs',
misfire_grace_time=300,
max_instances=1
)
logger.info("Production alert schedules configured",
service=self.config.SERVICE_NAME)
async def check_production_capacity(self):
"""Check if production plan exceeds capacity (alerts)"""
try:
self._checks_performed += 1
query = """
WITH capacity_analysis AS (
SELECT
p.tenant_id,
p.planned_date,
SUM(p.planned_quantity) as total_planned,
MAX(pc.daily_capacity) as max_daily_capacity,
COUNT(DISTINCT p.equipment_id) as equipment_count,
AVG(pc.efficiency_percent) as avg_efficiency,
CASE
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 1.2 THEN 'severe_overload'
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) THEN 'overload'
WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 0.9 THEN 'near_capacity'
ELSE 'normal'
END as capacity_status,
(SUM(p.planned_quantity) / MAX(pc.daily_capacity)) * 100 as capacity_percentage
FROM production_schedule p
JOIN production_capacity pc ON pc.equipment_id = p.equipment_id
WHERE p.planned_date >= CURRENT_DATE
AND p.planned_date <= CURRENT_DATE + INTERVAL '3 days'
AND p.status IN ('planned', 'in_progress')
AND p.tenant_id = $1
GROUP BY p.tenant_id, p.planned_date
)
SELECT * FROM capacity_analysis
WHERE capacity_status != 'normal'
ORDER BY capacity_percentage DESC
"""
# Check production capacity without tenant dependencies
try:
from sqlalchemy import text
# Simplified query using only existing production tables
simplified_query = text("""
SELECT
pb.tenant_id,
DATE(pb.planned_start_time) as planned_date,
COUNT(*) as batch_count,
SUM(pb.planned_quantity) as total_planned,
'capacity_check' as capacity_status
FROM production_batches pb
WHERE pb.planned_start_time >= CURRENT_DATE
AND pb.planned_start_time <= CURRENT_DATE + INTERVAL '3 days'
AND pb.status IN ('planned', 'pending', 'in_progress')
GROUP BY pb.tenant_id, DATE(pb.planned_start_time)
HAVING COUNT(*) > 10 -- Alert if more than 10 batches per day
ORDER BY total_planned DESC
""")
async with self.db_manager.get_session() as session:
result = await session.execute(simplified_query)
capacity_issues = result.fetchall()
for issue in capacity_issues:
await self._process_capacity_issue(issue.tenant_id, issue)
except Exception as e:
logger.debug("Simplified capacity check failed", error=str(e))
except Exception as e:
# Skip capacity checks if tables don't exist (graceful degradation)
if "does not exist" in str(e):
logger.debug("Capacity check skipped - missing tables", error=str(e))
else:
logger.error("Capacity check failed", error=str(e))
self._errors_count += 1
async def _process_capacity_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
"""Process capacity overload issue"""
try:
status = issue['capacity_status']
percentage = issue['capacity_percentage']
if status == 'severe_overload':
template_data = self.format_spanish_message(
'order_overload',
percentage=int(percentage - 100)
)
await self.publish_item(tenant_id, {
'type': 'severe_capacity_overload',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'planned_date': issue['planned_date'].isoformat(),
'capacity_percentage': float(percentage),
'overload_percentage': float(percentage - 100),
'equipment_count': issue['equipment_count']
}
}, item_type='alert')
elif status == 'overload':
severity = self.get_business_hours_severity('high')
await self.publish_item(tenant_id, {
'type': 'capacity_overload',
'severity': severity,
'title': f'⚠️ Capacidad Excedida: {percentage:.0f}%',
'message': f'Producción planificada para {issue["planned_date"]} excede capacidad en {percentage-100:.0f}%.',
'actions': ['Redistribuir cargas', 'Ampliar turnos', 'Subcontratar', 'Posponer pedidos'],
'metadata': {
'planned_date': issue['planned_date'].isoformat(),
'capacity_percentage': float(percentage),
'equipment_count': issue['equipment_count']
}
}, item_type='alert')
elif status == 'near_capacity':
severity = self.get_business_hours_severity('medium')
await self.publish_item(tenant_id, {
'type': 'near_capacity',
'severity': severity,
'title': f'📊 Cerca de Capacidad Máxima: {percentage:.0f}%',
'message': f'Producción del {issue["planned_date"]} está al {percentage:.0f}% de capacidad. Monitorear de cerca.',
'actions': ['Revisar planificación', 'Preparar contingencias', 'Optimizar eficiencia'],
'metadata': {
'planned_date': issue['planned_date'].isoformat(),
'capacity_percentage': float(percentage)
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing capacity issue", error=str(e))
async def check_production_delays(self):
"""Check for production delays (alerts)"""
try:
self._checks_performed += 1
# Simplified query without customer_orders dependency
query = """
SELECT
pb.id, pb.tenant_id, pb.product_name, pb.batch_number,
pb.planned_end_time as planned_completion_time, pb.actual_start_time,
pb.actual_end_time as estimated_completion_time, pb.status,
EXTRACT(minutes FROM (NOW() - pb.planned_end_time)) as delay_minutes,
COALESCE(pb.priority::text, 'medium') as priority_level,
1 as affected_orders -- Default to 1 since we can't count orders
FROM production_batches pb
WHERE pb.status IN ('in_progress', 'delayed')
AND (
(pb.planned_end_time < NOW() AND pb.status = 'in_progress')
OR pb.status = 'delayed'
)
AND pb.planned_end_time > NOW() - INTERVAL '24 hours'
ORDER BY
CASE COALESCE(pb.priority::text, 'medium')
WHEN 'urgent' THEN 1 WHEN 'high' THEN 2 ELSE 3
END,
delay_minutes DESC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
delays = result.fetchall()
for delay in delays:
await self._process_production_delay(delay)
except Exception as e:
# Skip delay checks if tables don't exist (graceful degradation)
if "does not exist" in str(e):
logger.debug("Production delay check skipped - missing tables", error=str(e))
else:
logger.error("Production delay check failed", error=str(e))
self._errors_count += 1
async def _process_production_delay(self, delay: Dict[str, Any]):
"""Process production delay"""
try:
delay_minutes = delay['delay_minutes']
priority = delay['priority_level']
affected_orders = delay['affected_orders']
# Determine severity based on delay time and priority
if delay_minutes > 120 or priority == 'urgent':
severity = 'urgent'
elif delay_minutes > 60 or priority == 'high':
severity = 'high'
elif delay_minutes > 30:
severity = 'medium'
else:
severity = 'low'
template_data = self.format_spanish_message(
'production_delay',
batch_name=f"{delay['product_name']} #{delay['batch_number']}",
delay_minutes=int(delay_minutes)
)
await self.publish_item(delay['tenant_id'], {
'type': 'production_delay',
'severity': severity,
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'batch_id': str(delay['id']),
'product_name': delay['product_name'],
'batch_number': delay['batch_number'],
'delay_minutes': delay_minutes,
'priority_level': priority,
'affected_orders': affected_orders,
'planned_completion': delay['planned_completion_time'].isoformat()
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing production delay",
batch_id=str(delay.get('id')),
error=str(e))
async def check_quality_issues(self):
"""Check for quality control issues (alerts)"""
try:
self._checks_performed += 1
# Fixed query using actual quality_checks table structure
query = """
SELECT
qc.id, qc.tenant_id, qc.batch_id, qc.check_type as test_type,
qc.quality_score as result_value,
qc.target_weight as min_acceptable,
(qc.target_weight * (1 + qc.tolerance_percentage/100)) as max_acceptable,
CASE
WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 'critical'
WHEN qc.pass_fail = false THEN 'major'
ELSE 'minor'
END as qc_severity,
qc.created_at,
pb.product_name, pb.batch_number,
COUNT(*) OVER (PARTITION BY qc.batch_id) as total_failures
FROM quality_checks qc
JOIN production_batches pb ON pb.id = qc.batch_id
WHERE qc.pass_fail = false -- Use pass_fail instead of status
AND qc.created_at > NOW() - INTERVAL '4 hours'
AND qc.corrective_action_needed = true -- Use this instead of acknowledged
ORDER BY
CASE
WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 1
WHEN qc.pass_fail = false THEN 2
ELSE 3
END,
qc.created_at DESC
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query))
quality_issues = result.fetchall()
for issue in quality_issues:
await self._process_quality_issue(issue)
except Exception as e:
# Skip quality checks if tables don't exist (graceful degradation)
if "does not exist" in str(e) or "column" in str(e).lower() and "does not exist" in str(e).lower():
logger.debug("Quality check skipped - missing tables or columns", error=str(e))
else:
logger.error("Quality check failed", error=str(e))
self._errors_count += 1
async def _process_quality_issue(self, issue: Dict[str, Any]):
"""Process quality control failure"""
try:
qc_severity = issue['qc_severity']
total_failures = issue['total_failures']
# Map QC severity to alert severity
if qc_severity == 'critical' or total_failures > 2:
severity = 'urgent'
elif qc_severity == 'major':
severity = 'high'
else:
severity = 'medium'
await self.publish_item(issue['tenant_id'], {
'type': 'quality_control_failure',
'severity': severity,
'title': f'❌ Fallo Control Calidad: {issue["product_name"]}',
'message': f'Lote {issue["batch_number"]} falló en {issue["test_type"]}. Valor: {issue["result_value"]} (rango: {issue["min_acceptable"]}-{issue["max_acceptable"]})',
'actions': ['Revisar lote', 'Repetir prueba', 'Ajustar proceso', 'Documentar causa'],
'metadata': {
'quality_check_id': str(issue['id']),
'batch_id': str(issue['batch_id']),
'test_type': issue['test_type'],
'result_value': float(issue['result_value']),
'min_acceptable': float(issue['min_acceptable']),
'max_acceptable': float(issue['max_acceptable']),
'qc_severity': qc_severity,
'total_failures': total_failures
}
}, item_type='alert')
# Mark as acknowledged to avoid duplicates
await self.db_manager.execute(
"UPDATE quality_checks SET acknowledged = true WHERE id = $1",
issue['id']
)
except Exception as e:
logger.error("Error processing quality issue",
quality_check_id=str(issue.get('id')),
error=str(e))
async def check_equipment_status(self):
"""Check equipment status and failures (alerts)"""
# Equipment tables don't exist in production database - skip this check
logger.debug("Equipment check skipped - equipment tables not available in production database")
return
async def _process_equipment_issue(self, equipment: Dict[str, Any]):
"""Process equipment issue"""
try:
status = equipment['status']
efficiency = equipment.get('efficiency_percent', 100)
days_to_maintenance = equipment.get('days_to_maintenance', 30)
if status == 'error':
template_data = self.format_spanish_message(
'equipment_failure',
equipment_name=equipment['name']
)
await self.publish_item(equipment['tenant_id'], {
'type': 'equipment_failure',
'severity': 'urgent',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'equipment_id': str(equipment['id']),
'equipment_name': equipment['name'],
'equipment_type': equipment['type'],
'error_count': equipment.get('error_count', 0),
'last_reading': equipment.get('last_reading').isoformat() if equipment.get('last_reading') else None
}
}, item_type='alert')
elif status == 'maintenance_required' or days_to_maintenance <= 1:
severity = 'high' if days_to_maintenance <= 1 else 'medium'
await self.publish_item(equipment['tenant_id'], {
'type': 'maintenance_required',
'severity': severity,
'title': f'🔧 Mantenimiento Requerido: {equipment["name"]}',
'message': f'Equipo {equipment["name"]} requiere mantenimiento en {days_to_maintenance} días.',
'actions': ['Programar mantenimiento', 'Revisar historial', 'Preparar repuestos', 'Planificar parada'],
'metadata': {
'equipment_id': str(equipment['id']),
'days_to_maintenance': days_to_maintenance,
'last_maintenance': equipment.get('last_maintenance').isoformat() if equipment.get('last_maintenance') else None
}
}, item_type='alert')
elif efficiency < 80:
severity = 'medium' if efficiency < 70 else 'low'
await self.publish_item(equipment['tenant_id'], {
'type': 'low_equipment_efficiency',
'severity': severity,
'title': f'📉 Baja Eficiencia: {equipment["name"]}',
'message': f'Eficiencia del {equipment["name"]} bajó a {efficiency:.1f}%. Revisar funcionamiento.',
'actions': ['Revisar configuración', 'Limpiar equipo', 'Calibrar sensores', 'Revisar mantenimiento'],
'metadata': {
'equipment_id': str(equipment['id']),
'efficiency_percent': float(efficiency),
'temperature': equipment.get('temperature'),
'vibration_level': equipment.get('vibration_level')
}
}, item_type='alert')
except Exception as e:
logger.error("Error processing equipment issue",
equipment_id=str(equipment.get('id')),
error=str(e))
async def generate_efficiency_recommendations(self):
"""Generate production efficiency recommendations"""
try:
self._checks_performed += 1
# Analyze production patterns for efficiency opportunities
query = """
WITH efficiency_analysis AS (
SELECT
pb.tenant_id, pb.product_name,
AVG(EXTRACT(minutes FROM (pb.actual_completion_time - pb.actual_start_time))) as avg_production_time,
AVG(pb.planned_duration_minutes) as avg_planned_duration,
COUNT(*) as batch_count,
AVG(pb.yield_percentage) as avg_yield,
EXTRACT(hour FROM pb.actual_start_time) as start_hour
FROM production_batches pb
WHERE pb.status = 'completed'
AND pb.actual_completion_time > CURRENT_DATE - INTERVAL '30 days'
AND pb.tenant_id = $1
GROUP BY pb.tenant_id, pb.product_name, EXTRACT(hour FROM pb.actual_start_time)
HAVING COUNT(*) >= 3
),
recommendations AS (
SELECT *,
CASE
WHEN avg_production_time > avg_planned_duration * 1.2 THEN 'reduce_production_time'
WHEN avg_yield < 85 THEN 'improve_yield'
WHEN start_hour BETWEEN 14 AND 16 AND avg_production_time > avg_planned_duration * 1.1 THEN 'avoid_afternoon_production'
ELSE null
END as recommendation_type,
(avg_production_time - avg_planned_duration) / avg_planned_duration * 100 as efficiency_loss_percent
FROM efficiency_analysis
)
SELECT * FROM recommendations
WHERE recommendation_type IS NOT NULL
AND efficiency_loss_percent > 10
ORDER BY efficiency_loss_percent DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
recommendations = result.fetchall()
for rec in recommendations:
await self._generate_efficiency_recommendation(tenant_id, rec)
except Exception as e:
logger.error("Error generating efficiency recommendations",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Efficiency recommendations failed", error=str(e))
self._errors_count += 1
async def _generate_efficiency_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
"""Generate specific efficiency recommendation"""
try:
if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
return
rec_type = rec['recommendation_type']
efficiency_loss = rec['efficiency_loss_percent']
if rec_type == 'reduce_production_time':
template_data = self.format_spanish_message(
'production_efficiency',
suggested_time=f"{rec['start_hour']:02d}:00",
savings_percent=efficiency_loss
)
await self.publish_item(tenant_id, {
'type': 'production_efficiency',
'severity': 'medium',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'product_name': rec['product_name'],
'avg_production_time': float(rec['avg_production_time']),
'avg_planned_duration': float(rec['avg_planned_duration']),
'efficiency_loss_percent': float(efficiency_loss),
'batch_count': rec['batch_count'],
'recommendation_type': rec_type
}
}, item_type='recommendation')
elif rec_type == 'improve_yield':
await self.publish_item(tenant_id, {
'type': 'yield_improvement',
'severity': 'medium',
'title': f'📈 Mejorar Rendimiento: {rec["product_name"]}',
'message': f'Rendimiento promedio del {rec["product_name"]} es {rec["avg_yield"]:.1f}%. Oportunidad de mejora.',
'actions': ['Revisar receta', 'Optimizar proceso', 'Entrenar personal', 'Verificar ingredientes'],
'metadata': {
'product_name': rec['product_name'],
'avg_yield': float(rec['avg_yield']),
'batch_count': rec['batch_count'],
'recommendation_type': rec_type
}
}, item_type='recommendation')
elif rec_type == 'avoid_afternoon_production':
await self.publish_item(tenant_id, {
'type': 'schedule_optimization',
'severity': 'low',
'title': f'⏰ Optimizar Horario: {rec["product_name"]}',
'message': f'Producción de {rec["product_name"]} en horario {rec["start_hour"]}:00 muestra menor eficiencia.',
'actions': ['Cambiar horario', 'Analizar causas', 'Revisar personal', 'Optimizar ambiente'],
'metadata': {
'product_name': rec['product_name'],
'start_hour': rec['start_hour'],
'efficiency_loss_percent': float(efficiency_loss),
'recommendation_type': rec_type
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error generating efficiency recommendation",
product_name=rec.get('product_name'),
error=str(e))
async def generate_energy_recommendations(self):
"""Generate energy optimization recommendations"""
try:
# Analyze energy consumption patterns
query = """
SELECT
e.tenant_id, e.name as equipment_name, e.type,
AVG(ec.energy_consumption_kwh) as avg_energy,
EXTRACT(hour FROM ec.recorded_at) as hour_of_day,
COUNT(*) as readings_count
FROM equipment e
JOIN energy_consumption ec ON ec.equipment_id = e.id
WHERE ec.recorded_at > CURRENT_DATE - INTERVAL '30 days'
AND e.tenant_id = $1
GROUP BY e.tenant_id, e.id, EXTRACT(hour FROM ec.recorded_at)
HAVING COUNT(*) >= 10
ORDER BY avg_energy DESC
"""
tenants = await self.get_active_tenants()
for tenant_id in tenants:
try:
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result = await session.execute(text(query), {"tenant_id": tenant_id})
energy_data = result.fetchall()
# Analyze for peak hours and optimization opportunities
await self._analyze_energy_patterns(tenant_id, energy_data)
except Exception as e:
logger.error("Error generating energy recommendations",
tenant_id=str(tenant_id),
error=str(e))
except Exception as e:
logger.error("Energy recommendations failed", error=str(e))
self._errors_count += 1
async def _analyze_energy_patterns(self, tenant_id: UUID, energy_data: List[Dict[str, Any]]):
"""Analyze energy consumption patterns for optimization"""
try:
if not energy_data:
return
# Group by equipment and find peak hours
equipment_data = {}
for record in energy_data:
equipment = record['equipment_name']
if equipment not in equipment_data:
equipment_data[equipment] = []
equipment_data[equipment].append(record)
for equipment, records in equipment_data.items():
# Find peak consumption hours
peak_hour_record = max(records, key=lambda x: x['avg_energy'])
off_peak_records = [r for r in records if r['hour_of_day'] < 7 or r['hour_of_day'] > 22]
if off_peak_records and peak_hour_record['avg_energy'] > 0:
min_off_peak = min(off_peak_records, key=lambda x: x['avg_energy'])
potential_savings = ((peak_hour_record['avg_energy'] - min_off_peak['avg_energy']) /
peak_hour_record['avg_energy']) * 100
if potential_savings > 15: # More than 15% potential savings
template_data = self.format_spanish_message(
'energy_optimization',
start_time=f"{min_off_peak['hour_of_day']:02d}:00",
end_time=f"{min_off_peak['hour_of_day']+2:02d}:00",
savings_euros=potential_savings * 0.15 # Rough estimate
)
await self.publish_item(tenant_id, {
'type': 'energy_optimization',
'severity': 'low',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'equipment_name': equipment,
'peak_hour': peak_hour_record['hour_of_day'],
'optimal_hour': min_off_peak['hour_of_day'],
'potential_savings_percent': float(potential_savings),
'peak_consumption': float(peak_hour_record['avg_energy']),
'optimal_consumption': float(min_off_peak['avg_energy'])
}
}, item_type='recommendation')
except Exception as e:
logger.error("Error analyzing energy patterns", error=str(e))
async def register_db_listeners(self, conn):
"""Register production-specific database listeners"""
try:
await conn.add_listener('production_alerts', self.handle_production_db_alert)
logger.info("Database listeners registered",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to register database listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_production_db_alert(self, connection, pid, channel, payload):
"""Handle production alert from database trigger"""
try:
data = json.loads(payload)
tenant_id = UUID(data['tenant_id'])
template_data = self.format_spanish_message(
'production_delay',
batch_name=f"{data['product_name']} #{data.get('batch_number', 'N/A')}",
delay_minutes=data['delay_minutes']
)
await self.publish_item(tenant_id, {
'type': 'production_delay',
'severity': 'high',
'title': template_data['title'],
'message': template_data['message'],
'actions': template_data['actions'],
'metadata': {
'batch_id': data['batch_id'],
'delay_minutes': data['delay_minutes'],
'trigger_source': 'database'
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling production DB alert", error=str(e))
async def start_event_listener(self):
"""Listen for production-affecting events"""
try:
# Subscribe to inventory events that might affect production
await self.rabbitmq_client.consume_events(
"bakery_events",
f"production.inventory.{self.config.SERVICE_NAME}",
"inventory.critical_shortage",
self.handle_inventory_shortage
)
logger.info("Event listeners started",
service=self.config.SERVICE_NAME)
except Exception as e:
logger.error("Failed to start event listeners",
service=self.config.SERVICE_NAME,
error=str(e))
async def handle_inventory_shortage(self, message):
"""Handle critical inventory shortage affecting production"""
try:
shortage = json.loads(message.body)
tenant_id = UUID(shortage['tenant_id'])
# Check if this ingredient affects any current production
affected_batches = await self.get_affected_production_batches(
shortage['ingredient_id']
)
if affected_batches:
await self.publish_item(tenant_id, {
'type': 'production_ingredient_shortage',
'severity': 'high',
'title': f'🚨 Falta Ingrediente para Producción',
'message': f'Escasez de {shortage["ingredient_name"]} afecta {len(affected_batches)} lotes en producción.',
'actions': ['Buscar ingrediente alternativo', 'Pausar producción', 'Contactar proveedor urgente', 'Reorganizar plan'],
'metadata': {
'ingredient_id': shortage['ingredient_id'],
'ingredient_name': shortage['ingredient_name'],
'affected_batches': [str(b) for b in affected_batches],
'shortage_amount': shortage.get('shortage_amount', 0)
}
}, item_type='alert')
except Exception as e:
logger.error("Error handling inventory shortage event", error=str(e))
async def get_affected_production_batches(self, ingredient_id: str) -> List[str]:
"""Get production batches affected by ingredient shortage"""
try:
query = """
SELECT DISTINCT pb.id
FROM production_batches pb
JOIN recipe_ingredients ri ON ri.recipe_id = pb.recipe_id
WHERE ri.ingredient_id = $1
AND pb.status IN ('planned', 'in_progress')
AND pb.planned_completion_time > NOW()
"""
from sqlalchemy import text
async with self.db_manager.get_session() as session:
result_rows = await session.execute(text(query), {"ingredient_id": ingredient_id})
result = result_rows.fetchall()
return [str(row['id']) for row in result]
except Exception as e:
logger.error("Error getting affected production batches",
ingredient_id=ingredient_id,
error=str(e))
return []

View File

@@ -15,6 +15,14 @@ httpx==0.25.2
# Logging and monitoring
structlog==23.2.0
prometheus-client==0.19.0
# Message queues and Redis
aio-pika==9.3.1
redis>=4.0.0
# Scheduling
APScheduler==3.10.4
# Date and time utilities
python-dateutil==2.8.2

View File

@@ -1,220 +0,0 @@
## 🎯 **Migration Summary: Prophet Models to Training Service** (Continued)
### **✅ What Was Migrated**
Perfect! I've created a complete migration of the Prophet ML training functionality from the monolithic backend to a dedicated training microservice. Here's a comprehensive summary of what has been implemented:
## 🎯 **Migration Summary: Prophet Models to Training Service**
### **✅ What Was Migrated**
1. **Prophet Manager** (`prophet_manager.py`):
- Enhanced model training with bakery-specific configurations
- Spanish holidays integration
- Advanced model persistence and metadata storage
- Training metrics calculation
2. **ML Trainer** (`trainer.py`):
- Complete training orchestration for multiple products
- Single product training capability
- Model performance evaluation
- Async-first design replacing Celery complexity
3. **Data Processor** (`data_processor.py`):
- Advanced feature engineering for bakery forecasting
- Weather and traffic data integration
- Spanish holiday and school calendar detection
- Temporal feature extraction
4. **API Layer** (`training.py`):
- RESTful endpoints for training job management
- Real-time progress tracking
- Job cancellation and status monitoring
- Data validation before training
5. **Database Models** (`training.py`):
- `ModelTrainingLog`: Job execution tracking
- `TrainedModel`: Model registry and versioning
- `ModelPerformanceMetric`: Performance monitoring
- `TrainingJobQueue`: Job scheduling system
6. **Service Layer** (`training_service.py`):
- Business logic orchestration
- External service integration (data service)
- Job lifecycle management
- Error handling and recovery
7. **Messaging Integration** (`messaging.py`):
- Event-driven architecture with RabbitMQ
- Inter-service communication
- Real-time notifications
- Event publishing for other services
### **🔧 Key Improvements Over Old System**
#### **1. Eliminated Celery Complexity**
- **Before**: Complex Celery worker setup with sync/async mixing
- **After**: Pure async implementation with FastAPI background tasks
#### **2. Better Error Handling**
- **Before**: Celery task failures were hard to debug
- **After**: Detailed error tracking and recovery mechanisms
#### **3. Real-Time Progress Tracking**
- **Before**: Limited visibility into training progress
- **After**: Real-time updates with detailed step-by-step progress
#### **4. Service Isolation**
- **Before**: Training tightly coupled with main application
- **After**: Independent service that can scale separately
#### **5. Enhanced Model Management**
- **Before**: Basic model storage in filesystem
- **After**: Complete model lifecycle with versioning and metadata
### **🚀 New Capabilities**
#### **1. Advanced Training Features**
```python
# Support for different training modes
await trainer.train_tenant_models(...) # All products
await trainer.train_single_product(...) # Single product
await trainer.evaluate_model_performance(...) # Performance evaluation
```
#### **2. Real-Time Job Management**
```python
# Job lifecycle management
POST /training/jobs # Start training
GET /training/jobs/{id}/status # Get progress
POST /training/jobs/{id}/cancel # Cancel job
GET /training/jobs/{id}/logs # View detailed logs
```
#### **3. Data Validation**
```python
# Pre-training validation
POST /training/validate # Check data quality before training
```
#### **4. Event-Driven Architecture**
```python
# Automatic event publishing
await publish_job_started(job_id, tenant_id, config)
await publish_job_completed(job_id, tenant_id, results)
await publish_model_trained(model_id, tenant_id, product_name, metrics)
```
### **📊 Performance Improvements**
#### **1. Faster Training Startup**
- **Before**: 30-60 seconds Celery worker initialization
- **After**: <5 seconds direct async execution
#### **2. Better Resource Utilization**
- **Before**: Fixed Celery worker pools
- **After**: Dynamic scaling based on demand
#### **3. Improved Memory Management**
- **Before**: Memory leaks in long-running Celery workers
- **After**: Clean memory usage with proper cleanup
### **🔒 Enhanced Security & Monitoring**
#### **1. Authentication Integration**
```python
# Secure endpoints with tenant isolation
@router.post("/jobs")
async def start_training_job(
request: TrainingJobRequest,
tenant_id: str = Depends(get_current_tenant_id) # Automatic tenant isolation
):
```
#### **2. Comprehensive Monitoring**
```python
# Built-in metrics collection
metrics.increment_counter("training_jobs_started")
metrics.increment_counter("training_jobs_completed")
metrics.increment_counter("training_jobs_failed")
```
#### **3. Detailed Logging**
```python
# Structured logging with context
logger.info(f"Training job {job_id} completed successfully",
extra={"tenant_id": tenant_id, "models_trained": count})
```
### **🔄 Integration with Existing Architecture**
#### **1. Seamless API Integration**
The new training service integrates perfectly with the existing gateway:
```yaml
# API Gateway routes to training service
/api/training/* → http://training-service:8000/
```
#### **2. Event-Driven Communication**
```python
# Other services can listen to training events
"training.job.completed" forecasting-service (update models)
"training.job.completed" notification-service (send alerts)
"training.model.updated" tenant-service (update quotas)
```
#### **3. Database Independence**
- Training service has its own PostgreSQL database
- Clean separation from other service data
- Easy to scale and backup independently
### **📦 Deployment Ready**
#### **1. Docker Configuration**
- Optimized Dockerfile with proper security
- Non-root user execution
- Health checks included
#### **2. Requirements Management**
- Pinned dependency versions
- Separated development/production requirements
- Prophet and ML libraries properly configured
#### **3. Environment Configuration**
```python
# Flexible configuration management
MODEL_STORAGE_PATH=/app/models
MAX_TRAINING_TIME_MINUTES=30
MIN_TRAINING_DATA_DAYS=30
PROPHET_SEASONALITY_MODE=additive
```
### **🎯 Migration Benefits Summary**
| Aspect | Before (Celery) | After (Microservice) |
|--------|----------------|----------------------|
| **Startup Time** | 30-60 seconds | <5 seconds |
| **Error Handling** | Basic | Comprehensive |
| **Progress Tracking** | Limited | Real-time |
| **Scalability** | Fixed workers | Dynamic scaling |
| **Debugging** | Difficult | Easy with logs |
| **Testing** | Complex | Simple unit tests |
| **Deployment** | Monolithic | Independent |
| **Monitoring** | Basic | Full observability |
### **🔧 Ready for Production**
This training service is **production-ready** and provides:
1. **Robust Error Handling**: Graceful failure recovery
2. **Horizontal Scaling**: Can run multiple instances
3. **Performance Monitoring**: Built-in metrics and health checks
4. **Security**: Proper authentication and tenant isolation
5. **Maintainability**: Clean code structure and comprehensive tests
### **🚀 Next Steps**
The training service is now ready to be integrated into your microservices architecture. It completely replaces the old Celery-based training system while providing significant improvements in reliability, performance, and maintainability.
The implementation follows all the microservices best practices and integrates seamlessly with the broader platform architecture you're building for the Madrid bakery forecasting system.