Add new alert architecture

2025-08-23 10:19:58 +02:00
parent 1a9839240e
commit 4b4268d640
45 changed files with 6518 additions and 1590 deletions
--- a/services/alert_processor/Dockerfile
+++ b/services/alert_processor/Dockerfile
@@ -0,0 +1,26 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements and install dependencies
+COPY services/alert_processor/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy shared libraries
+COPY shared/ /app/shared/
+
+# Copy application code
+COPY services/alert_processor/app/ /app/app/
+
+# Create non-root user
+RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
+USER appuser
+
+EXPOSE 8000
+
+CMD ["python", "-m", "app.main"]
--- a/services/alert_processor/app/init.py
+++ b/services/alert_processor/app/init.py
@@ -0,0 +1 @@
+# Alert Processor Service
--- a/services/alert_processor/app/config.py
+++ b/services/alert_processor/app/config.py
@@ -0,0 +1,49 @@
+# services/alert_processor/app/config.py
+"""
+Alert Processor Service Configuration
+"""
+
+import os
+from typing import List
+from shared.config.base import BaseServiceSettings
+
+class AlertProcessorConfig(BaseServiceSettings):
+    """Configuration for Alert Processor Service"""
+    SERVICE_NAME: str = "alert-processor"
+    APP_NAME: str = "Alert Processor Service"
+    DESCRIPTION: str = "Central alert and recommendation processor"
+    
+    # Use the notification database for alert storage
+    # This makes sense since alerts and notifications are closely related
+    DATABASE_URL: str = os.getenv(
+        "NOTIFICATION_DATABASE_URL",
+        "postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db"
+    )
+    
+    # Use dedicated Redis DB for alert processing
+    REDIS_DB: int = int(os.getenv("ALERT_PROCESSOR_REDIS_DB", "6"))
+    
+    # Alert processing configuration
+    BATCH_SIZE: int = int(os.getenv("ALERT_BATCH_SIZE", "10"))
+    PROCESSING_TIMEOUT: int = int(os.getenv("ALERT_PROCESSING_TIMEOUT", "30"))
+    
+    # Deduplication settings
+    ALERT_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("ALERT_DEDUPLICATION_WINDOW_MINUTES", "15"))
+    RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES: int = int(os.getenv("RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES", "60"))
+    
+    # Alert severity channel mappings (hardcoded for now to avoid config parsing issues)
+    @property
+    def urgent_channels(self) -> List[str]:
+        return ["whatsapp", "email", "push", "dashboard"]
+    
+    @property  
+    def high_channels(self) -> List[str]:
+        return ["whatsapp", "email", "dashboard"]
+        
+    @property
+    def medium_channels(self) -> List[str]:
+        return ["email", "dashboard"]
+        
+    @property
+    def low_channels(self) -> List[str]:
+        return ["dashboard"]
--- a/services/alert_processor/app/main.py
+++ b/services/alert_processor/app/main.py
@@ -0,0 +1,360 @@
+# services/alert_processor/app/main.py
+"""
+Alert Processor Service - Central hub for processing alerts and recommendations
+Consumes from RabbitMQ, stores in database, and routes to notification service
+"""
+
+import asyncio
+import json
+import signal
+import sys
+from datetime import datetime
+from typing import Dict, Any
+import structlog
+import redis.asyncio as aioredis
+from aio_pika import connect_robust, IncomingMessage, ExchangeType
+
+from app.config import AlertProcessorConfig
+from shared.database.base import create_database_manager
+from shared.clients.base_service_client import BaseServiceClient
+from shared.config.rabbitmq_config import RABBITMQ_CONFIG
+
+# Setup logging
+structlog.configure(
+    processors=[
+        structlog.stdlib.filter_by_level,
+        structlog.stdlib.add_logger_name,
+        structlog.stdlib.add_log_level,
+        structlog.stdlib.PositionalArgumentsFormatter(),
+        structlog.processors.TimeStamper(fmt="ISO"),
+        structlog.processors.StackInfoRenderer(),
+        structlog.processors.format_exc_info,
+        structlog.processors.JSONRenderer()
+    ],
+    context_class=dict,
+    logger_factory=structlog.stdlib.LoggerFactory(),
+    wrapper_class=structlog.stdlib.BoundLogger,
+    cache_logger_on_first_use=True,
+)
+
+logger = structlog.get_logger()
+
+
+class NotificationServiceClient(BaseServiceClient):
+    """Client for notification service"""
+    
+    def __init__(self, config: AlertProcessorConfig):
+        super().__init__("notification-service", config)
+        self.config = config
+    
+    def get_service_base_path(self) -> str:
+        """Return the base path for notification service APIs"""
+        return "/api/v1"
+    
+    async def send_notification(self, tenant_id: str, notification: Dict[str, Any], channels: list) -> Dict[str, Any]:
+        """Send notification via notification service"""
+        try:
+            response = await self.post(
+                "/api/v1/notifications/send",
+                json={
+                    "tenant_id": tenant_id,
+                    "notification": notification,
+                    "channels": channels
+                }
+            )
+            return response
+        except Exception as e:
+            logger.error("Failed to send notification", error=str(e), tenant_id=tenant_id)
+            return {"status": "failed", "error": str(e)}
+
+class AlertProcessorService:
+    """
+    Central service for processing and routing alerts and recommendations
+    Integrates with notification service for multi-channel delivery
+    """
+    
+    def __init__(self, config: AlertProcessorConfig):
+        self.config = config
+        self.db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
+        self.notification_client = NotificationServiceClient(config)
+        self.redis = None
+        self.connection = None
+        self.channel = None
+        self.running = False
+        
+        # Metrics
+        self.items_processed = 0
+        self.items_stored = 0
+        self.notifications_sent = 0
+        self.errors_count = 0
+        
+    async def start(self):
+        """Start the alert processor service"""
+        try:
+            logger.info("Starting Alert Processor Service")
+            
+            # Connect to Redis for SSE publishing
+            self.redis = aioredis.from_url(self.config.REDIS_URL)
+            logger.info("Connected to Redis")
+            
+            # Connect to RabbitMQ
+            await self._setup_rabbitmq()
+            
+            # Start consuming messages
+            await self._start_consuming()
+            
+            self.running = True
+            logger.info("Alert Processor Service started successfully")
+            
+        except Exception as e:
+            logger.error("Failed to start Alert Processor Service", error=str(e))
+            raise
+    
+    async def _setup_rabbitmq(self):
+        """Setup RabbitMQ connection and configuration"""
+        self.connection = await connect_robust(
+            self.config.RABBITMQ_URL,
+            heartbeat=30,
+            connection_attempts=5
+        )
+        self.channel = await self.connection.channel()
+        await self.channel.set_qos(prefetch_count=10)  # Process 10 messages at a time
+        
+        # Setup exchange and queue based on config
+        exchange_config = RABBITMQ_CONFIG["exchanges"]["alerts"]
+        self.exchange = await self.channel.declare_exchange(
+            exchange_config["name"],
+            getattr(ExchangeType, exchange_config["type"].upper()),
+            durable=exchange_config["durable"]
+        )
+        
+        queue_config = RABBITMQ_CONFIG["queues"]["alert_processing"]
+        self.queue = await self.channel.declare_queue(
+            queue_config["name"],
+            durable=queue_config["durable"],
+            arguments=queue_config["arguments"]
+        )
+        
+        # Bind to all alert and recommendation routing keys
+        await self.queue.bind(self.exchange, routing_key="*.*.*")
+        
+        logger.info("RabbitMQ setup completed")
+    
+    async def _start_consuming(self):
+        """Start consuming messages from RabbitMQ"""
+        await self.queue.consume(self.process_item)
+        logger.info("Started consuming alert messages")
+        
+    async def process_item(self, message: IncomingMessage):
+        """Process incoming alert or recommendation"""
+        async with message.process():
+            try:
+                # Parse message
+                item = json.loads(message.body.decode())
+                
+                logger.info("Processing item", 
+                          item_type=item.get('item_type'),
+                          alert_type=item.get('type'),
+                          severity=item.get('severity'),
+                          tenant_id=item.get('tenant_id'))
+                
+                # Store in database
+                stored_item = await self.store_item(item)
+                self.items_stored += 1
+                
+                # Determine delivery channels based on severity and type
+                channels = self.get_channels_by_severity_and_type(
+                    item['severity'], 
+                    item['item_type']
+                )
+                
+                # Send via notification service if channels are specified
+                if channels:
+                    notification_result = await self.notification_client.send_notification(
+                        tenant_id=item['tenant_id'],
+                        notification={
+                            'type': item['item_type'],  # 'alert' or 'recommendation'
+                            'id': item['id'],
+                            'title': item['title'],
+                            'message': item['message'],
+                            'severity': item['severity'],
+                            'metadata': item.get('metadata', {}),
+                            'actions': item.get('actions', []),
+                            'email': item.get('email'),  
+                            'phone': item.get('phone'),  
+                            'user_id': item.get('user_id')  
+                        },
+                        channels=channels
+                    )
+                    
+                    if notification_result.get('status') == 'success':
+                        self.notifications_sent += 1
+                
+                # Stream to SSE for real-time dashboard (always)
+                await self.stream_to_sse(item['tenant_id'], stored_item)
+                
+                self.items_processed += 1
+                
+                logger.info("Item processed successfully", 
+                          item_id=item['id'],
+                          channels=len(channels))
+                
+            except Exception as e:
+                self.errors_count += 1
+                logger.error("Item processing failed", error=str(e))
+                raise
+    
+    async def store_item(self, item: dict) -> dict:
+        """Store alert or recommendation in database"""
+        from sqlalchemy import text
+        
+        query = text("""
+            INSERT INTO alerts (
+                id, tenant_id, item_type, alert_type, severity, status,
+                service, title, message, actions, metadata,
+                created_at
+            ) VALUES (:id, :tenant_id, :item_type, :alert_type, :severity, :status,
+                     :service, :title, :message, :actions, :metadata, :created_at)
+            RETURNING *
+        """)
+        
+        async with self.db_manager.get_session() as session:
+            result = await session.execute(
+                query,
+                {
+                    'id': item['id'],
+                    'tenant_id': item['tenant_id'],
+                    'item_type': item['item_type'],  # 'alert' or 'recommendation'
+                    'alert_type': item['type'],
+                    'severity': item['severity'],
+                    'status': 'active',
+                    'service': item['service'],
+                    'title': item['title'],
+                    'message': item['message'],
+                    'actions': json.dumps(item.get('actions', [])),
+                    'metadata': json.dumps(item.get('metadata', {})),
+                    'created_at': item['timestamp']
+                }
+            )
+            
+            row = result.fetchone()
+            await session.commit()
+            
+            logger.debug("Item stored in database", item_id=item['id'])
+            return dict(row._mapping)
+    
+    async def stream_to_sse(self, tenant_id: str, item: dict):
+        """Publish item to Redis for SSE streaming"""
+        channel = f"alerts:{tenant_id}"
+        
+        # Prepare message for SSE
+        sse_message = {
+            'id': item['id'],
+            'item_type': item['item_type'],
+            'type': item['alert_type'],
+            'severity': item['severity'],
+            'title': item['title'],
+            'message': item['message'],
+            'actions': json.loads(item['actions']) if isinstance(item['actions'], str) else item['actions'],
+            'metadata': json.loads(item['metadata']) if isinstance(item['metadata'], str) else item['metadata'],
+            'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at'],
+            'status': item['status']
+        }
+        
+        # Publish to Redis channel for SSE
+        await self.redis.publish(channel, json.dumps(sse_message))
+        
+        logger.debug("Item published to SSE", tenant_id=tenant_id, item_id=item['id'])
+    
+    def get_channels_by_severity_and_type(self, severity: str, item_type: str) -> list:
+        """Determine notification channels based on severity, type, and time"""
+        current_hour = datetime.now().hour
+        
+        channels = ['dashboard']  # Always include dashboard (SSE)
+        
+        if item_type == 'alert':
+            if severity == 'urgent':
+                # Urgent alerts: All channels immediately
+                channels.extend(['whatsapp', 'email', 'push'])
+            elif severity == 'high':
+                # High alerts: WhatsApp and email during extended hours
+                if 6 <= current_hour <= 22:
+                    channels.extend(['whatsapp', 'email'])
+                else:
+                    channels.append('email')  # Email only during night
+            elif severity == 'medium':
+                # Medium alerts: Email during business hours
+                if 7 <= current_hour <= 20:
+                    channels.append('email')
+            # Low severity: Dashboard only
+        
+        elif item_type == 'recommendation':
+            # Recommendations: Less urgent, limit channels and respect business hours
+            if severity in ['medium', 'high']:
+                if 8 <= current_hour <= 19:  # Business hours for recommendations
+                    channels.append('email')
+            # Low/urgent (rare for recs): Dashboard only
+        
+        return channels
+    
+    async def stop(self):
+        """Stop the alert processor service"""
+        self.running = False
+        logger.info("Stopping Alert Processor Service")
+        
+        try:
+            # Close RabbitMQ connection
+            if self.connection and not self.connection.is_closed:
+                await self.connection.close()
+            
+            # Close Redis connection
+            if self.redis:
+                await self.redis.close()
+                
+            logger.info("Alert Processor Service stopped")
+            
+        except Exception as e:
+            logger.error("Error stopping service", error=str(e))
+    
+    def get_metrics(self) -> Dict[str, Any]:
+        """Get service metrics"""
+        return {
+            "items_processed": self.items_processed,
+            "items_stored": self.items_stored,
+            "notifications_sent": self.notifications_sent,
+            "errors_count": self.errors_count,
+            "running": self.running
+        }
+
+async def main():
+    """Main entry point"""
+    config = AlertProcessorConfig()
+    service = AlertProcessorService(config)
+    
+    # Setup signal handlers for graceful shutdown
+    async def shutdown():
+        logger.info("Received shutdown signal")
+        await service.stop()
+        sys.exit(0)
+    
+    # Register signal handlers
+    for sig in (signal.SIGTERM, signal.SIGINT):
+        signal.signal(sig, lambda s, f: asyncio.create_task(shutdown()))
+    
+    try:
+        # Start the service
+        await service.start()
+        
+        # Keep running
+        while service.running:
+            await asyncio.sleep(1)
+            
+    except KeyboardInterrupt:
+        logger.info("Received keyboard interrupt")
+    except Exception as e:
+        logger.error("Service failed", error=str(e))
+    finally:
+        await service.stop()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/services/alert_processor/requirements.txt
+++ b/services/alert_processor/requirements.txt
@@ -0,0 +1,12 @@
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+aio-pika==9.3.1
+redis==5.0.1
+asyncpg==0.29.0
+sqlalchemy==2.0.23
+structlog==23.2.0
+prometheus-client==0.19.0
+pydantic-settings==2.1.0
+pydantic==2.5.2
+httpx==0.25.2
+python-jose[cryptography]==3.3.0
--- a/services/auth/README.md
+++ b/services/auth/README.md
@@ -1,129 +0,0 @@
-# ================================================================
-# services/auth/README.md
-# ================================================================
-# Authentication Service
-
-Microservice for user authentication and authorization in the bakery forecasting platform.
-
-## Features
-
- User registration and login
- JWT access and refresh tokens
- Password security validation
- Rate limiting and login attempt tracking
- Multi-tenant user management
- Session management
- Event publishing for user actions
-
-## Quick Start
-
-### Development
-
-```bash
-# Start dependencies
-docker-compose up -d auth-db redis rabbitmq
-
-# Install dependencies
-pip install -r requirements.txt
-
-# Run migrations
-alembic upgrade head
-
-# Start service
-uvicorn app.main:app --reload --host 0.0.0.0 --port 8001
-```
-
-### With Docker
-
-```bash
-# Start everything
-docker-compose up -d
-
-# View logs
-docker-compose logs -f auth-service
-
-# Run tests
-docker-compose exec auth-service pytest
-```
-
-## API Endpoints
-
-### Authentication
- `POST /api/v1/auth/register` - Register new user
- `POST /api/v1/auth/login` - User login
- `POST /api/v1/auth/refresh` - Refresh access token
- `POST /api/v1/auth/verify` - Verify token
- `POST /api/v1/auth/logout` - Logout user
-
-### User Management
- `GET /api/v1/users/me` - Get current user
- `PUT /api/v1/users/me` - Update current user
- `POST /api/v1/users/change-password` - Change password
-
-### Health
- `GET /health` - Health check
- `GET /metrics` - Prometheus metrics
-
-## Configuration
-
-Set these environment variables:
-
-```bash
-DATABASE_URL=postgresql+asyncpg://auth_user:auth_pass123@auth-db:5432/auth_db
-REDIS_URL=redis://redis:6379/0
-RABBITMQ_URL=amqp://bakery:forecast123@rabbitmq:5672/
-JWT_SECRET_KEY=your-super-secret-jwt-key-change-in-production
-JWT_ACCESS_TOKEN_EXPIRE_MINUTES=30
-JWT_REFRESH_TOKEN_EXPIRE_DAYS=7
-MAX_LOGIN_ATTEMPTS=5
-LOCKOUT_DURATION_MINUTES=30
-```
-
-## Testing
-
-```bash
-# Run all tests
-pytest
-
-# Run with coverage
-pytest --cov=app
-
-# Run specific test file
-pytest tests/test_auth.py -v
-```
-
-## Database Migrations
-
-```bash
-# Create migration
-alembic revision --autogenerate -m "description"
-
-# Apply migrations
-alembic upgrade head
-
-# Rollback
-alembic downgrade -1
-```
-
-## Monitoring
-
- Health endpoint: `/health`
- Metrics endpoint: `/metrics` (Prometheus format)
- Logs: Structured JSON logging
- Tracing: Request ID tracking
-
-## Security Features
-
- Bcrypt password hashing
- JWT tokens with expiration
- Rate limiting on login attempts
- Account lockout protection
- IP and user agent tracking
- Token revocation support
-
-## Events Published
-
- `user.registered` - When user registers
- `user.login` - When user logs in
- `user.logout` - When user logs out
- `user.password_changed` - When password changes
--- a/services/forecasting/README.md
+++ b/services/forecasting/README.md
@@ -1,169 +0,0 @@
- ================================================================
-# Documentation: services/forecasting/README.md
-# ================================================================
-
-# Forecasting Service
-
-AI-powered demand prediction service for bakery operations in Madrid, Spain.
-
-## Overview
-
-The Forecasting Service is a specialized microservice responsible for generating accurate demand predictions for bakery products. It integrates trained ML models with real-time weather and traffic data to provide actionable forecasts for business planning.
-
-## Features
-
-### Core Functionality
- **Single Product Forecasting**: Generate predictions for individual products
- **Batch Forecasting**: Process multiple products and time periods
- **Real-time Predictions**: On-demand forecasting with external data
- **Business Rules**: Spanish bakery-specific adjustments
- **Alert System**: Automated notifications for demand anomalies
-
-### Integration Points
- **Training Service**: Loads trained Prophet models
- **Data Service**: Retrieves weather and traffic data
- **Notification Service**: Sends alerts and reports
- **Gateway Service**: Authentication and request routing
-
-## API Endpoints
-
-### Forecasts
- `POST /api/v1/forecasts/single` - Generate single forecast
- `POST /api/v1/forecasts/batch` - Generate batch forecasts
- `GET /api/v1/forecasts/list` - List historical forecasts
- `GET /api/v1/forecasts/alerts` - Get forecast alerts
- `PUT /api/v1/forecasts/alerts/{id}/acknowledge` - Acknowledge alert
-
-### Predictions  
- `POST /api/v1/predictions/realtime` - Real-time prediction
- `GET /api/v1/predictions/quick/{product}` - Quick multi-day forecast
-
-## Business Logic
-
-### Spanish Bakery Rules
- **Siesta Impact**: Reduced afternoon activity consideration
- **Weather Adjustments**: Rain reduces traffic, extreme temperatures affect product mix
- **Holiday Handling**: Spanish holiday calendar integration
- **Weekend Patterns**: Different demand patterns for weekends
-
-### Business Types
- **Individual Bakery**: Single location with direct sales
- **Central Workshop**: Production facility supplying multiple locations
-
-## Configuration
-
-### Environment Variables
-```bash
-# Database
-DATABASE_URL=postgresql+asyncpg://user:pass@host:port/db
-
-# External Services
-TRAINING_SERVICE_URL=http://training-service:8000
-DATA_SERVICE_URL=http://data-service:8000
-
-# Business Rules
-WEEKEND_ADJUSTMENT_FACTOR=0.8
-HOLIDAY_ADJUSTMENT_FACTOR=0.5
-RAIN_IMPACT_FACTOR=0.7
-```
-
-### Performance Settings
-```bash
-MAX_FORECAST_DAYS=30
-PREDICTION_CACHE_TTL_HOURS=6
-FORECAST_BATCH_SIZE=100
-```
-
-## Development
-
-### Setup
-```bash
-cd services/forecasting
-pip install -r requirements.txt
-```
-
-### Testing
-```bash
-pytest tests/ -v --cov=app
-```
-
-### Running Locally
-```bash
-uvicorn app.main:app --reload --port 8000
-```
-
-## Deployment
-
-### Docker
-```bash
-docker build -t forecasting-service .
-docker run -p 8000:8000 forecasting-service
-```
-
-### Kubernetes
-```bash
-kubectl apply -f infrastructure/kubernetes/base/forecasting-service.yaml
-```
-
-## Monitoring
-
-### Metrics
- `forecasts_generated_total` - Total forecasts generated
- `predictions_served_total` - Total predictions served  
- `forecast_processing_time_seconds` - Processing time histogram
- `active_models_count` - Number of active models
-
-### Health Checks
- `/health` - Service health status
- `/metrics` - Prometheus metrics endpoint
-
-## Performance
-
-### Benchmarks
- **Single Forecast**: < 2 seconds average
- **Batch Forecasting**: 100 products in < 30 seconds
- **Concurrent Load**: 95%+ success rate at 20 concurrent requests
-
-### Optimization
- Model caching for faster predictions
- Feature preparation optimization
- Database query optimization
- Asynchronous external API calls
-
-## Troubleshooting
-
-### Common Issues
-
-1. **No Model Found Error**
-   - Ensure training service has models for tenant/product
-   - Check model training logs in training service
-
-2. **High Prediction Latency**
-   - Monitor model cache hit rate
-   - Check external service response times
-   - Review database query performance
-
-3. **Inaccurate Predictions**
-   - Verify external data quality (weather/traffic)
-   - Check model performance metrics
-   - Review business rule configurations
-
-### Logging
-```bash
-# View service logs
-docker logs forecasting-service
-
-# Debug level logging
-LOG_LEVEL=DEBUG uvicorn app.main:app
-```
-
-## Contributing
-
-1. Follow the existing code structure and patterns
-2. Add tests for new functionality
-3. Update documentation for API changes
-4. Ensure performance benchmarks are maintained
-
-## License
-
-This service is part of the Bakery Forecasting Platform - MIT License
--- a/services/inventory/app/main.py
+++ b/services/inventory/app/main.py
@@ -14,6 +14,7 @@ import structlog
 from app.core.config import settings
 from app.core.database import init_db, close_db
 from app.api import ingredients, stock, classification
+from app.services.inventory_alert_service import InventoryAlertService
 from shared.monitoring.health import router as health_router
 from shared.monitoring.metrics import setup_metrics_early
 # Auth decorators are used in endpoints, no global setup needed
@@ -32,6 +33,14 @@ async def lifespan(app: FastAPI):
        await init_db()
        logger.info("Database initialized successfully")
        
+        # Initialize alert service
+        alert_service = InventoryAlertService(settings)
+        await alert_service.start()
+        logger.info("Inventory alert service started")
+        
+        # Store alert service in app state
+        app.state.alert_service = alert_service
+        
        # Setup metrics is already done early - no need to do it here
        logger.info("Metrics setup completed")
        
@@ -44,6 +53,11 @@ async def lifespan(app: FastAPI):
        # Shutdown
        logger.info("Shutting down Inventory Service")
        try:
+            # Stop alert service
+            if hasattr(app.state, 'alert_service'):
+                await app.state.alert_service.stop()
+                logger.info("Alert service stopped")
+                
            await close_db()
            logger.info("Database connections closed")
        except Exception as e:
--- a/services/inventory/app/services/inventory_alert_service.py
+++ b/services/inventory/app/services/inventory_alert_service.py
@@ -0,0 +1,710 @@
+# services/inventory/app/services/inventory_alert_service.py
+"""
+Inventory-specific alert and recommendation detection service
+Implements hybrid detection patterns for critical stock issues and optimization opportunities
+"""
+
+import asyncio
+import json
+from typing import List, Dict, Any, Optional
+from uuid import UUID
+from datetime import datetime, timedelta
+import structlog
+from apscheduler.triggers.cron import CronTrigger
+
+from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
+from shared.alerts.templates import format_item_message
+
+logger = structlog.get_logger()
+
+class InventoryAlertService(BaseAlertService, AlertServiceMixin):
+    """Inventory service alert and recommendation detection"""
+    
+    def setup_scheduled_checks(self):
+        """Inventory-specific scheduled checks for alerts and recommendations"""
+        
+        # Critical stock checks - every 5 minutes (alerts)
+        self.scheduler.add_job(
+            self.check_stock_levels,
+            CronTrigger(minute='*/5'),
+            id='stock_levels',
+            misfire_grace_time=30,
+            max_instances=1
+        )
+        
+        # Expiry checks - every 2 minutes (food safety critical, alerts)
+        self.scheduler.add_job(
+            self.check_expiring_products,
+            CronTrigger(minute='*/2'),
+            id='expiry_check',
+            misfire_grace_time=30,
+            max_instances=1
+        )
+        
+        # Temperature checks - every 2 minutes (alerts)
+        self.scheduler.add_job(
+            self.check_temperature_breaches,
+            CronTrigger(minute='*/2'),
+            id='temperature_check',
+            misfire_grace_time=30,
+            max_instances=1
+        )
+        
+        # Inventory optimization - every 30 minutes (recommendations)
+        self.scheduler.add_job(
+            self.generate_inventory_recommendations,
+            CronTrigger(minute='*/30'),
+            id='inventory_recs',
+            misfire_grace_time=120,
+            max_instances=1
+        )
+        
+        # Waste reduction analysis - every hour (recommendations)
+        self.scheduler.add_job(
+            self.generate_waste_reduction_recommendations,
+            CronTrigger(minute='0'),
+            id='waste_reduction_recs',
+            misfire_grace_time=300,
+            max_instances=1
+        )
+        
+        logger.info("Inventory alert schedules configured", 
+                   service=self.config.SERVICE_NAME)
+    
+    async def check_stock_levels(self):
+        """Batch check all stock levels for critical shortages (alerts)"""
+        try:
+            self._checks_performed += 1
+            
+            query = """
+                WITH stock_analysis AS (
+                    SELECT 
+                        i.*,
+                        COALESCE(p.scheduled_quantity, 0) as tomorrow_needed,
+                        COALESCE(s.avg_daily_usage, 0) as avg_daily_usage,
+                        COALESCE(s.lead_time_days, 7) as lead_time_days,
+                        CASE 
+                            WHEN i.current_stock < i.minimum_stock THEN 'critical'
+                            WHEN i.current_stock < i.minimum_stock * 1.2 THEN 'low'
+                            WHEN i.current_stock > i.maximum_stock THEN 'overstock'
+                            ELSE 'normal'
+                        END as status,
+                        GREATEST(0, i.minimum_stock - i.current_stock) as shortage_amount
+                    FROM inventory_items i
+                    LEFT JOIN production_schedule p ON p.ingredient_id = i.id 
+                        AND p.date = CURRENT_DATE + INTERVAL '1 day'
+                    LEFT JOIN supplier_items s ON s.ingredient_id = i.id
+                    WHERE i.tenant_id = $1 AND i.active = true
+                )
+                SELECT * FROM stock_analysis WHERE status != 'normal'
+                ORDER BY 
+                    CASE status 
+                        WHEN 'critical' THEN 1 
+                        WHEN 'low' THEN 2 
+                        WHEN 'overstock' THEN 3 
+                    END,
+                    shortage_amount DESC
+            """
+            
+            tenants = await self.get_active_tenants()
+            
+            for tenant_id in tenants:
+                try:
+                    from sqlalchemy import text
+                    async with self.db_manager.get_session() as session:
+                        result = await session.execute(text(query), {"tenant_id": tenant_id})
+                        issues = result.fetchall()
+                    
+                    for issue in issues:
+                        await self._process_stock_issue(tenant_id, issue)
+                        
+                except Exception as e:
+                    logger.error("Error checking stock for tenant", 
+                               tenant_id=str(tenant_id), 
+                               error=str(e))
+                    
+            logger.debug("Stock level check completed", 
+                        tenants_checked=len(tenants))
+                        
+        except Exception as e:
+            logger.error("Stock level check failed", error=str(e))
+            self._errors_count += 1
+    
+    async def _process_stock_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
+        """Process individual stock issue"""
+        try:
+            if issue['status'] == 'critical':
+                # Critical stock shortage - immediate alert
+                template_data = self.format_spanish_message(
+                    'critical_stock_shortage',
+                    ingredient_name=issue["name"],
+                    current_stock=issue["current_stock"],
+                    required_stock=issue["tomorrow_needed"] or issue["minimum_stock"],
+                    shortage_amount=issue["shortage_amount"]
+                )
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'critical_stock_shortage',
+                    'severity': 'urgent',
+                    'title': template_data['title'],
+                    'message': template_data['message'],
+                    'actions': template_data['actions'],
+                    'metadata': {
+                        'ingredient_id': str(issue['id']),
+                        'current_stock': float(issue['current_stock']),
+                        'minimum_stock': float(issue['minimum_stock']),
+                        'shortage_amount': float(issue['shortage_amount']),
+                        'tomorrow_needed': float(issue['tomorrow_needed'] or 0),
+                        'lead_time_days': issue['lead_time_days']
+                    }
+                }, item_type='alert')
+                
+            elif issue['status'] == 'low':
+                # Low stock - high priority alert
+                template_data = self.format_spanish_message(
+                    'critical_stock_shortage',
+                    ingredient_name=issue["name"],
+                    current_stock=issue["current_stock"],
+                    required_stock=issue["minimum_stock"]
+                )
+                
+                severity = self.get_business_hours_severity('high')
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'low_stock_warning',
+                    'severity': severity,
+                    'title': f'⚠️ Stock Bajo: {issue["name"]}',
+                    'message': f'Stock actual {issue["current_stock"]}kg, mínimo {issue["minimum_stock"]}kg. Considerar pedido pronto.',
+                    'actions': ['Revisar consumo', 'Programar pedido', 'Contactar proveedor'],
+                    'metadata': {
+                        'ingredient_id': str(issue['id']),
+                        'current_stock': float(issue['current_stock']),
+                        'minimum_stock': float(issue['minimum_stock'])
+                    }
+                }, item_type='alert')
+                
+            elif issue['status'] == 'overstock':
+                # Overstock - medium priority alert
+                severity = self.get_business_hours_severity('medium')
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'overstock_warning',
+                    'severity': severity,
+                    'title': f'📦 Exceso de Stock: {issue["name"]}',
+                    'message': f'Stock actual {issue["current_stock"]}kg excede máximo {issue["maximum_stock"]}kg. Revisar para evitar caducidad.',
+                    'actions': ['Revisar caducidades', 'Aumentar producción', 'Ofertas especiales', 'Ajustar pedidos'],
+                    'metadata': {
+                        'ingredient_id': str(issue['id']),
+                        'current_stock': float(issue['current_stock']),
+                        'maximum_stock': float(issue['maximum_stock'])
+                    }
+                }, item_type='alert')
+                
+        except Exception as e:
+            logger.error("Error processing stock issue", 
+                       ingredient_id=str(issue.get('id')), 
+                       error=str(e))
+    
+    async def check_expiring_products(self):
+        """Check for products approaching expiry (alerts)"""
+        try:
+            self._checks_performed += 1
+            
+            query = """
+                SELECT 
+                    i.id, i.name, i.current_stock, i.tenant_id,
+                    b.id as batch_id, b.expiry_date, b.quantity,
+                    EXTRACT(days FROM (b.expiry_date - CURRENT_DATE)) as days_to_expiry
+                FROM inventory_items i
+                JOIN inventory_batches b ON b.ingredient_id = i.id
+                WHERE b.expiry_date <= CURRENT_DATE + INTERVAL '7 days'
+                AND b.quantity > 0
+                AND b.status = 'active'
+                ORDER BY b.expiry_date ASC
+            """
+            
+            from sqlalchemy import text
+            async with self.db_manager.get_session() as session:
+                result = await session.execute(text(query))
+                expiring_items = result.fetchall()
+            
+            # Group by tenant
+            by_tenant = {}
+            for item in expiring_items:
+                tenant_id = item['tenant_id']
+                if tenant_id not in by_tenant:
+                    by_tenant[tenant_id] = []
+                by_tenant[tenant_id].append(item)
+            
+            for tenant_id, items in by_tenant.items():
+                await self._process_expiring_items(tenant_id, items)
+                
+        except Exception as e:
+            logger.error("Expiry check failed", error=str(e))
+            self._errors_count += 1
+    
+    async def _process_expiring_items(self, tenant_id: UUID, items: List[Dict[str, Any]]):
+        """Process expiring items for a tenant"""
+        try:
+            # Group by urgency
+            expired = [i for i in items if i['days_to_expiry'] <= 0]
+            urgent = [i for i in items if 0 < i['days_to_expiry'] <= 2]
+            warning = [i for i in items if 2 < i['days_to_expiry'] <= 7]
+            
+            # Process expired products (urgent alerts)
+            if expired:
+                product_count = len(expired)
+                product_names = [i['name'] for i in expired[:3]]  # First 3 names
+                if len(expired) > 3:
+                    product_names.append(f"y {len(expired) - 3} más")
+                
+                template_data = self.format_spanish_message(
+                    'expired_products',
+                    product_count=product_count,
+                    product_names=", ".join(product_names)
+                )
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'expired_products',
+                    'severity': 'urgent',
+                    'title': template_data['title'],
+                    'message': template_data['message'],
+                    'actions': template_data['actions'],
+                    'metadata': {
+                        'expired_items': [
+                            {
+                                'id': str(item['id']),
+                                'name': item['name'],
+                                'batch_id': str(item['batch_id']),
+                                'quantity': float(item['quantity']),
+                                'days_expired': abs(item['days_to_expiry'])
+                            } for item in expired
+                        ]
+                    }
+                }, item_type='alert')
+            
+            # Process urgent expiry (high alerts)
+            if urgent:
+                for item in urgent:
+                    await self.publish_item(tenant_id, {
+                        'type': 'urgent_expiry',
+                        'severity': 'high',
+                        'title': f'⏰ Caducidad Urgente: {item["name"]}',
+                        'message': f'{item["name"]} caduca en {item["days_to_expiry"]} día(s). Usar prioritariamente.',
+                        'actions': ['Usar inmediatamente', 'Promoción especial', 'Revisar recetas', 'Documentar'],
+                        'metadata': {
+                            'ingredient_id': str(item['id']),
+                            'batch_id': str(item['batch_id']),
+                            'days_to_expiry': item['days_to_expiry'],
+                            'quantity': float(item['quantity'])
+                        }
+                    }, item_type='alert')
+            
+        except Exception as e:
+            logger.error("Error processing expiring items", 
+                       tenant_id=str(tenant_id), 
+                       error=str(e))
+    
+    async def check_temperature_breaches(self):
+        """Check for temperature breaches (alerts)"""
+        try:
+            self._checks_performed += 1
+            
+            query = """
+                SELECT 
+                    t.id, t.sensor_id, t.location, t.temperature, 
+                    t.max_threshold, t.tenant_id,
+                    EXTRACT(minutes FROM (NOW() - t.first_breach_time)) as breach_duration_minutes
+                FROM temperature_readings t
+                WHERE t.temperature > t.max_threshold
+                AND t.breach_duration_minutes >= 30  -- Only after 30 minutes
+                AND t.last_alert_sent < NOW() - INTERVAL '15 minutes'  -- Avoid spam
+                ORDER BY t.temperature DESC, t.breach_duration_minutes DESC
+            """
+            
+            from sqlalchemy import text
+            async with self.db_manager.get_session() as session:
+                result = await session.execute(text(query))
+                breaches = result.fetchall()
+            
+            for breach in breaches:
+                await self._process_temperature_breach(breach)
+                
+        except Exception as e:
+            logger.error("Temperature check failed", error=str(e))
+            self._errors_count += 1
+    
+    async def _process_temperature_breach(self, breach: Dict[str, Any]):
+        """Process temperature breach"""
+        try:
+            # Determine severity based on duration and temperature
+            duration_minutes = breach['breach_duration_minutes']
+            temp_excess = breach['temperature'] - breach['max_threshold']
+            
+            if duration_minutes > 120 or temp_excess > 10:
+                severity = 'urgent'
+            elif duration_minutes > 60 or temp_excess > 5:
+                severity = 'high'
+            else:
+                severity = 'medium'
+            
+            template_data = self.format_spanish_message(
+                'temperature_breach',
+                location=breach['location'],
+                temperature=breach['temperature'],
+                duration=duration_minutes
+            )
+            
+            await self.publish_item(breach['tenant_id'], {
+                'type': 'temperature_breach',
+                'severity': severity,
+                'title': template_data['title'],
+                'message': template_data['message'],
+                'actions': template_data['actions'],
+                'metadata': {
+                    'sensor_id': breach['sensor_id'],
+                    'location': breach['location'],
+                    'temperature': float(breach['temperature']),
+                    'max_threshold': float(breach['max_threshold']),
+                    'duration_minutes': duration_minutes,
+                    'temperature_excess': temp_excess
+                }
+            }, item_type='alert')
+            
+            # Update last alert sent time to avoid spam
+            await self.db_manager.execute(
+                "UPDATE temperature_readings SET last_alert_sent = NOW() WHERE id = $1",
+                breach['id']
+            )
+            
+        except Exception as e:
+            logger.error("Error processing temperature breach", 
+                       sensor_id=breach.get('sensor_id'), 
+                       error=str(e))
+    
+    async def generate_inventory_recommendations(self):
+        """Generate optimization recommendations based on usage patterns"""
+        try:
+            self._checks_performed += 1
+            
+            # Analyze stock levels vs usage patterns
+            query = """
+                WITH usage_analysis AS (
+                    SELECT 
+                        i.id, i.name, i.tenant_id, i.minimum_stock, i.maximum_stock,
+                        i.current_stock,
+                        AVG(sm.quantity) FILTER (WHERE sm.movement_type = 'out' 
+                            AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as avg_daily_usage,
+                        COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out' 
+                            AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') as usage_days,
+                        MAX(sm.created_at) FILTER (WHERE sm.movement_type = 'out') as last_used
+                    FROM inventory_items i
+                    LEFT JOIN stock_movements sm ON sm.ingredient_id = i.id
+                    WHERE i.active = true AND i.tenant_id = $1
+                    GROUP BY i.id
+                    HAVING COUNT(sm.id) FILTER (WHERE sm.movement_type = 'out' 
+                        AND sm.created_at > CURRENT_DATE - INTERVAL '30 days') >= 5
+                ),
+                recommendations AS (
+                    SELECT *,
+                        CASE 
+                            WHEN avg_daily_usage * 7 > maximum_stock THEN 'increase_max'
+                            WHEN avg_daily_usage * 3 < minimum_stock THEN 'decrease_min'
+                            WHEN current_stock / NULLIF(avg_daily_usage, 0) > 14 THEN 'reduce_stock'
+                            WHEN avg_daily_usage > 0 AND minimum_stock / avg_daily_usage < 3 THEN 'increase_min'
+                            ELSE null
+                        END as recommendation_type
+                    FROM usage_analysis
+                    WHERE avg_daily_usage > 0
+                )
+                SELECT * FROM recommendations WHERE recommendation_type IS NOT NULL
+                ORDER BY avg_daily_usage DESC
+            """
+            
+            tenants = await self.get_active_tenants()
+            
+            for tenant_id in tenants:
+                try:
+                    from sqlalchemy import text
+                    async with self.db_manager.get_session() as session:
+                        result = await session.execute(text(query), {"tenant_id": tenant_id})
+                        recommendations = result.fetchall()
+                    
+                    for rec in recommendations:
+                        await self._generate_stock_recommendation(tenant_id, rec)
+                        
+                except Exception as e:
+                    logger.error("Error generating recommendations for tenant", 
+                               tenant_id=str(tenant_id), 
+                               error=str(e))
+                    
+        except Exception as e:
+            logger.error("Inventory recommendations failed", error=str(e))
+            self._errors_count += 1
+    
+    async def _generate_stock_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
+        """Generate specific stock recommendation"""
+        try:
+            if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
+                return
+            
+            rec_type = rec['recommendation_type']
+            
+            if rec_type == 'increase_max':
+                suggested_max = rec['avg_daily_usage'] * 10  # 10 days supply
+                template_data = self.format_spanish_message(
+                    'inventory_optimization',
+                    ingredient_name=rec['name'],
+                    period=30,
+                    suggested_increase=suggested_max - rec['maximum_stock']
+                )
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'inventory_optimization',
+                    'severity': 'medium',
+                    'title': template_data['title'],
+                    'message': template_data['message'],
+                    'actions': template_data['actions'],
+                    'metadata': {
+                        'ingredient_id': str(rec['id']),
+                        'current_max': float(rec['maximum_stock']),
+                        'suggested_max': float(suggested_max),
+                        'avg_daily_usage': float(rec['avg_daily_usage']),
+                        'recommendation_type': rec_type
+                    }
+                }, item_type='recommendation')
+                
+            elif rec_type == 'decrease_min':
+                suggested_min = rec['avg_daily_usage'] * 3  # 3 days safety stock
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'inventory_optimization',
+                    'severity': 'low',
+                    'title': f'📉 Optimización de Stock Mínimo: {rec["name"]}',
+                    'message': f'Uso promedio sugiere reducir stock mínimo de {rec["minimum_stock"]}kg a {suggested_min:.1f}kg.',
+                    'actions': ['Revisar niveles mínimos', 'Analizar tendencias', 'Ajustar configuración'],
+                    'metadata': {
+                        'ingredient_id': str(rec['id']),
+                        'current_min': float(rec['minimum_stock']),
+                        'suggested_min': float(suggested_min),
+                        'avg_daily_usage': float(rec['avg_daily_usage']),
+                        'recommendation_type': rec_type
+                    }
+                }, item_type='recommendation')
+                
+        except Exception as e:
+            logger.error("Error generating stock recommendation", 
+                       ingredient_id=str(rec.get('id')), 
+                       error=str(e))
+    
+    async def generate_waste_reduction_recommendations(self):
+        """Generate waste reduction recommendations"""
+        try:
+            # Analyze waste patterns
+            query = """
+                SELECT 
+                    i.id, i.name, i.tenant_id,
+                    SUM(w.quantity) as total_waste_30d,
+                    COUNT(w.id) as waste_incidents,
+                    AVG(w.quantity) as avg_waste_per_incident,
+                    w.waste_reason
+                FROM inventory_items i
+                JOIN waste_logs w ON w.ingredient_id = i.id
+                WHERE w.created_at > CURRENT_DATE - INTERVAL '30 days'
+                AND i.tenant_id = $1
+                GROUP BY i.id, w.waste_reason
+                HAVING SUM(w.quantity) > 5  -- More than 5kg wasted
+                ORDER BY total_waste_30d DESC
+            """
+            
+            tenants = await self.get_active_tenants()
+            
+            for tenant_id in tenants:
+                try:
+                    from sqlalchemy import text
+                    async with self.db_manager.get_session() as session:
+                        result = await session.execute(text(query), {"tenant_id": tenant_id})
+                        waste_data = result.fetchall()
+                    
+                    for waste in waste_data:
+                        await self._generate_waste_recommendation(tenant_id, waste)
+                        
+                except Exception as e:
+                    logger.error("Error generating waste recommendations", 
+                               tenant_id=str(tenant_id), 
+                               error=str(e))
+                    
+        except Exception as e:
+            logger.error("Waste reduction recommendations failed", error=str(e))
+            self._errors_count += 1
+    
+    async def _generate_waste_recommendation(self, tenant_id: UUID, waste: Dict[str, Any]):
+        """Generate waste reduction recommendation"""
+        try:
+            waste_percentage = (waste['total_waste_30d'] / (waste['total_waste_30d'] + 100)) * 100  # Simplified calculation
+            
+            template_data = self.format_spanish_message(
+                'waste_reduction',
+                product=waste['name'],
+                waste_reduction_percent=waste_percentage
+            )
+            
+            await self.publish_item(tenant_id, {
+                'type': 'waste_reduction',
+                'severity': 'low',
+                'title': template_data['title'],
+                'message': template_data['message'],
+                'actions': template_data['actions'],
+                'metadata': {
+                    'ingredient_id': str(waste['id']),
+                    'total_waste_30d': float(waste['total_waste_30d']),
+                    'waste_incidents': waste['waste_incidents'],
+                    'waste_reason': waste['waste_reason'],
+                    'estimated_reduction_percent': waste_percentage
+                }
+            }, item_type='recommendation')
+            
+        except Exception as e:
+            logger.error("Error generating waste recommendation", 
+                       ingredient_id=str(waste.get('id')), 
+                       error=str(e))
+    
+    async def register_db_listeners(self, conn):
+        """Register inventory-specific database listeners"""
+        try:
+            await conn.add_listener('stock_alerts', self.handle_stock_db_alert)
+            await conn.add_listener('temperature_alerts', self.handle_temperature_db_alert)
+            
+            logger.info("Database listeners registered", 
+                       service=self.config.SERVICE_NAME)
+        except Exception as e:
+            logger.error("Failed to register database listeners", 
+                        service=self.config.SERVICE_NAME, 
+                        error=str(e))
+    
+    async def handle_stock_db_alert(self, connection, pid, channel, payload):
+        """Handle stock alert from database trigger"""
+        try:
+            data = json.loads(payload)
+            tenant_id = UUID(data['tenant_id'])
+            
+            template_data = self.format_spanish_message(
+                'critical_stock_shortage',
+                ingredient_name=data['name'],
+                current_stock=data['current_stock'],
+                required_stock=data['minimum_stock']
+            )
+            
+            await self.publish_item(tenant_id, {
+                'type': 'critical_stock_shortage',
+                'severity': 'urgent',
+                'title': template_data['title'],
+                'message': template_data['message'],
+                'actions': template_data['actions'],
+                'metadata': {
+                    'ingredient_id': data['ingredient_id'],
+                    'current_stock': data['current_stock'],
+                    'minimum_stock': data['minimum_stock'],
+                    'trigger_source': 'database'
+                }
+            }, item_type='alert')
+            
+        except Exception as e:
+            logger.error("Error handling stock DB alert", error=str(e))
+    
+    async def handle_temperature_db_alert(self, connection, pid, channel, payload):
+        """Handle temperature alert from database trigger"""
+        try:
+            data = json.loads(payload)
+            tenant_id = UUID(data['tenant_id'])
+            
+            template_data = self.format_spanish_message(
+                'temperature_breach',
+                location=data['location'],
+                temperature=data['temperature'],
+                duration=data['duration']
+            )
+            
+            await self.publish_item(tenant_id, {
+                'type': 'temperature_breach',
+                'severity': 'high',
+                'title': template_data['title'],
+                'message': template_data['message'],
+                'actions': template_data['actions'],
+                'metadata': {
+                    'sensor_id': data['sensor_id'],
+                    'location': data['location'],
+                    'temperature': data['temperature'],
+                    'duration': data['duration'],
+                    'trigger_source': 'database'
+                }
+            }, item_type='alert')
+            
+        except Exception as e:
+            logger.error("Error handling temperature DB alert", error=str(e))
+    
+    async def start_event_listener(self):
+        """Listen for inventory-affecting events"""
+        try:
+            # Subscribe to order events that might affect inventory
+            await self.rabbitmq_client.consume_events(
+                "bakery_events",
+                f"inventory.orders.{self.config.SERVICE_NAME}",
+                "orders.placed",
+                self.handle_order_placed
+            )
+            
+            logger.info("Event listeners started", 
+                       service=self.config.SERVICE_NAME)
+        except Exception as e:
+            logger.error("Failed to start event listeners", 
+                        service=self.config.SERVICE_NAME, 
+                        error=str(e))
+    
+    async def handle_order_placed(self, message):
+        """Check if order critically affects stock"""
+        try:
+            order = json.loads(message.body)
+            tenant_id = UUID(order['tenant_id'])
+            
+            for item in order.get('items', []):
+                # Check stock impact
+                stock_info = await self.get_stock_after_order(item['ingredient_id'], item['quantity'])
+                
+                if stock_info and stock_info['remaining'] < stock_info['minimum_stock']:
+                    await self.publish_item(tenant_id, {
+                        'type': 'stock_depleted_by_order',
+                        'severity': 'high',
+                        'title': f'⚠️ Pedido Agota Stock: {stock_info["name"]}',
+                        'message': f'Pedido #{order["id"]} dejará stock en {stock_info["remaining"]}kg (mínimo {stock_info["minimum_stock"]}kg)',
+                        'actions': ['Revisar pedido', 'Contactar proveedor', 'Ajustar producción', 'Usar stock reserva'],
+                        'metadata': {
+                            'order_id': order['id'],
+                            'ingredient_id': item['ingredient_id'],
+                            'order_quantity': item['quantity'],
+                            'remaining_stock': stock_info['remaining'],
+                            'minimum_stock': stock_info['minimum_stock']
+                        }
+                    }, item_type='alert')
+                    
+        except Exception as e:
+            logger.error("Error handling order placed event", error=str(e))
+    
+    async def get_stock_after_order(self, ingredient_id: str, order_quantity: float) -> Optional[Dict[str, Any]]:
+        """Get stock information after hypothetical order"""
+        try:
+            query = """
+                SELECT id, name, current_stock, minimum_stock,
+                       (current_stock - $2) as remaining
+                FROM inventory_items 
+                WHERE id = $1
+            """
+            
+            result = await self.db_manager.fetchrow(query, ingredient_id, order_quantity)
+            return dict(result) if result else None
+            
+        except Exception as e:
+            logger.error("Error getting stock after order", 
+                       ingredient_id=ingredient_id, 
+                       error=str(e))
+            return None
--- a/services/inventory/requirements.txt
+++ b/services/inventory/requirements.txt
@@ -30,8 +30,12 @@ passlib[bcrypt]==1.7.4
 structlog==23.2.0
 prometheus-client==0.19.0

-# Message queues
+# Message queues and Redis
 aio-pika==9.3.1
+redis>=4.0.0
+
+# Scheduling
+APScheduler==3.10.4

 # Additional for inventory management
 python-barcode==0.15.1
--- a/services/notification/README.md
+++ b/services/notification/README.md
@@ -1,321 +0,0 @@
-## 🎯 **Complete Notification Service Implementation**
-
-### **📁 File Structure Created**
-
-```
-services/notification/
-├── app/
-│   ├── main.py                     ✅ Complete FastAPI application
-│   ├── core/
-│   │   ├── config.py              ✅ Configuration settings
-│   │   └── database.py            ✅ Database initialization
-│   ├── models/
-│   │   ├── notifications.py       ✅ Core notification models
-│   │   └── templates.py           ✅ Template-specific models
-│   ├── schemas/
-│   │   └── notifications.py       ✅ Pydantic schemas
-│   ├── services/
-│   │   ├── notification_service.py ✅ Main business logic
-│   │   ├── email_service.py        ✅ Email delivery
-│   │   ├── whatsapp_service.py     ✅ WhatsApp delivery
-│   │   └── messaging.py           ✅ RabbitMQ integration
-│   └── api/
-│       └── notifications.py       ✅ Complete API routes
-├── requirements.txt               ✅ Python dependencies
-├── Dockerfile                     ✅ Container configuration
-└── .env.example                   ✅ Environment variables
-```
-
-### **🔧 Key Features Implemented**
-
-#### **1. Complete Business Logic**
-
- ✅ **NotificationService**: Core orchestration of all notification operations
- ✅ **Multi-channel support**: Email, WhatsApp, Push (extensible)
- ✅ **Template processing**: Jinja2-based template rendering
- ✅ **Bulk notifications**: Batch processing with rate limiting
- ✅ **User preferences**: Granular notification controls
- ✅ **Scheduling**: Delayed notification delivery
-
-#### **2. Email Service Integration**
-
- ✅ **SMTP support**: Configurable email providers (Gmail, SendGrid, etc
- ✅ **HTML + Text emails**: Rich email templates with fallbacks
- ✅ **Bulk email processing**: Rate-limited batch sending
- ✅ **Template system**: Pre-built Spanish templates for bakeries
- ✅ **Health checks**: SMTP connection monitoring
- ✅ **Attachment support**: File attachment capabilities
-
-#### **3. WhatsApp Service Integration**
-
- ✅ **Twilio integration**: WhatsApp Business API support
- ✅ **Spanish phone formatting**: Automatic +34 country code handling
- ✅ **Template messages**: WhatsApp Business template support
- ✅ **Bulk WhatsApp**: Rate-limited batch messaging
- ✅ **Delivery status**: Webhook handling for delivery confirmations
-
-#### **4. Database Models & Schemas**
-
- ✅ **Complete data model**: Notifications, templates, preferences, logs
- ✅ **Multi-tenant support**: Tenant-scoped notifications
- ✅ **Audit trail**: Detailed delivery attempt logging
- ✅ **Template management**: System and custom templates
- ✅ **User preferences**: Granular notification controls
-
-#### **5. API Integration with Gateway**
-
- ✅ **Gateway authentication**: Uses shared auth decorators
- ✅ **Tenant isolation**: Automatic tenant scoping
- ✅ **Role-based access**: Admin/manager/user permissions
- ✅ **Complete CRUD**: Full notification management API
- ✅ **Webhook endpoints**: External delivery status handling
-
-#### **6. RabbitMQ Event Integration**
-
- ✅ **Event consumers**: Listens for user registration, forecasts, training
- ✅ **Event publishers**: Publishes notification status events
- ✅ **Auto-notifications**: Triggers welcome emails, alerts, reports
- ✅ **Error handling**: Robust message processing with retry logic
-
-#### **7. Spanish Bakery Templates**
-
- ✅ **Welcome email**: Professional onboarding email
- ✅ **Forecast alerts**: Demand variation notifications
- ✅ **Weekly reports**: Performance summary emails
- ✅ **Responsive HTML**: Mobile-optimized email designs
- ✅ **Spanish localization**: All content in Spanish
-
-### **🚀 Integration with Your Architecture**
-
-#### **Seamless Gateway Integration**
-
-```python
-# Gateway already routes to notification service
-app.include_router(notification.router, prefix="/api/v1/notifications", tags=["notifications"])
-
-# Authentication handled by gateway middleware
-# Tenant isolation automatic
-# User context passed via headers
-```
-
-#### **Shared Library Usage**
-
-```python
-# Uses your existing shared components
-from shared.auth.decorators import get_current_user_dep, get_current_tenant_id_dep
-from shared.messaging.rabbitmq import RabbitMQClient
-from shared.monitoring.metrics import MetricsCollector
-from shared.database.base import DatabaseManager
-```
-
-#### **Event-Driven Architecture**
-
-```python
-# Automatic notifications triggered by:
-# - User registration → Welcome email
-# - Forecast alerts → Alert emails + WhatsApp
-# - Training completion → Status notifications
-# - Data imports → Import confirmations
-```
-
-### **📊 Production Features**
-
-#### **Health Monitoring**
-
- ✅ **Database health checks**: Connection monitoring
- ✅ **SMTP health checks**: Email service validation
- ✅ **WhatsApp health checks**: API connectivity tests
- ✅ **Prometheus metrics**: Delivery rates, response times
- ✅ **Structured logging**: Comprehensive error tracking
-
-#### **Rate Limiting & Scaling**
-
- ✅ **Email rate limits**: 1000/hour configurable
- ✅ **WhatsApp rate limits**: 100/hour (Twilio limits)
- ✅ **Batch processing**: Configurable batch sizes
- ✅ **Retry logic**: Automatic retry with exponential backoff
- ✅ **Queue management**: Background task processing
-
-#### **Security & Compliance**
-
- ✅ **User consent**: Preference-based opt-in/out
- ✅ **Tenant isolation**: Multi-tenant data separation
- ✅ **GDPR compliance**: User data control
- ✅ **Rate limiting**: DoS protection
- ✅ **Input validation**: Pydantic schema validation
-
-### **🎯 Business-Specific Features**
-
-#### **Bakery Use Cases**
-
-```python
-# Forecast alerts when demand varies >20%
-# Daily production recommendations
-# Weekly performance reports
-# Stock shortage notifications
-# Weather impact alerts
-# Holiday/event notifications
-```
-
-#### **Spanish Localization**
-
- ✅ **Spanish templates**: Native Spanish content
- ✅ **Madrid timezone**: Europe/Madrid default
- ✅ **Spanish phone format**: +34 prefix handling
- ✅ **Local business hours**: Quiet hours support
- ✅ **Cultural context**: Bakery-specific terminology
-
-### **🔄 How to Deploy**
-
-#### **1. Add to Docker Compose**
-
-```yaml
-# Already integrated in your docker-compose.yml
-notification-service:
-  build: ./services/notification
-  ports:
-    - "8006:8000"
-  environment:
-    - DATABASE_URL=postgresql+asyncpg://notification_user:notification_pass123@notification-db:5432/notification_db
-  depends_on:
-    - notification-db
-    - redis
-    - rabbitmq
-```
-
-#### **2. Environment Setup**
-
-```bash
-# Copy environment template
-cp services/notification/.env.example services/notification/.env
-
-# Configure email provider
-SMTP_USER=your-email@gmail.com
-SMTP_PASSWORD=your-app-password
-
-# Configure WhatsApp (optional)
-WHATSAPP_API_KEY=your-twilio-sid:your-twilio-token
-```
-
-#### **3. Start Service**
-
-```bash
-# Service starts automatically with
-docker-compose up -d
-
-# Check health
-curl http://localhost:8006/health
-
-# View API docs
-open http://localhost:8006/docs
-```
-
-### **📈 API Usage Examples**
-
-#### **Send Welcome Email**
-
-```python
-POST /api/v1/notifications/send
-{
-  "type": "email",
-  "recipient_email": "usuario@panaderia.com",
-  "template_id": "welcome_email",
-  "template_data": {
-    "user_name": "Juan Carlos",
-    "dashboard_url": "https://app.bakeryforecast.es/dashboard"
-  }
-}
-```
-
-#### **Send Forecast Alert**
-
-```python
-POST /api/v1/notifications/send
-{
-  "type": "email",
-  "template_id": "forecast_alert_email",
-  "template_data": {
-    "bakery_name": "Panadería San Miguel",
-    "product_name": "Pan integral",
-    "forecast_date": "2025-01-25",
-    "predicted_demand": 120,
-    "variation_percentage": 35,
-    "alert_message": "Aumento significativo esperado. Se recomienda incrementar producción."
-  },
-  "broadcast": true,
-  "priority": "high"
-}
-```
-
-#### **Update User Preferences**
-
-```python
-PATCH /api/v1/notifications/preferences
-{
-  "email_alerts": true,
-  "whatsapp_enabled": false,
-  "quiet_hours_start": "22:00",
-  "quiet_hours_end": "08:00",
-  "language": "es"
-}
-```
-
-### **🎉 Key Benefits**
-
-#### **✅ Production Ready**
-
- Complete error handling and logging
- Health checks and monitoring
- Rate limiting and security
- Multi-tenant architecture
- Scalable event-driven design
-
-#### **✅ Business Focused**
-
- Spanish bakery templates
- Madrid timezone/localization
- Forecast-specific notifications
- Professional email designs
- WhatsApp support for urgent alerts
-
-#### **✅ Developer Friendly**
-
- Comprehensive API documentation
- Type-safe Pydantic schemas
- Async/await throughout
- Structured logging
- Easy testing and debugging
-
-#### **✅ Seamless Integration**
-
- Uses your shared libraries
- Integrates with gateway auth
- Follows your architectural patterns
- Maintains tenant isolation
- Publishes events to RabbitMQ
-
-### **🚀 Next Steps**
-
-#### **Immediate (Week 2)**
-
-1. **Deploy the service**: Add to your docker-compose and start
-2. **Configure SMTP**: Set up email provider credentials
-3. **Test integration**: Send test notifications via API
-4. **Event integration**: Verify RabbitMQ event handling
-
-#### **Production Optimization**
-
-1. **Email provider**: Consider SendGrid/Mailgun for production
-2. **WhatsApp setup**: Configure Twilio Business API
-3. **Template customization**: Add tenant-specific templates
-4. **Analytics dashboard**: Add notification analytics to frontend
-
-### **💡 Advanced Features Ready for Extension**
-
- ✅ **Push notifications**: Framework ready for mobile push
- ✅ **SMS support**: Easy to add SMS providers
- ✅ **A/B testing**: Template variant testing
- ✅ **Scheduled campaigns**: Marketing email campaigns
- ✅ **Analytics integration**: Detailed delivery analytics
-
-**This notification service is now a complete, production-ready microservice that fully integrates with your bakery forecasting platform! It handles all notification needs from welcome emails to urgent forecast alerts, with proper Spanish localization and bakery-specific templates.** 🎯
--- a/services/notification/app/api/sse_routes.py
+++ b/services/notification/app/api/sse_routes.py
@@ -0,0 +1,189 @@
+# services/notification/app/api/sse_routes.py
+"""
+SSE routes for real-time alert and recommendation streaming
+"""
+
+import asyncio
+import json
+from datetime import datetime
+from typing import Optional
+from fastapi import APIRouter, Request, Depends, HTTPException, BackgroundTasks
+from sse_starlette.sse import EventSourceResponse
+import structlog
+
+from shared.auth.decorators import get_current_user
+
+router = APIRouter(prefix="/sse", tags=["sse"])
+logger = structlog.get_logger()
+
+@router.get("/alerts/stream/{tenant_id}")
+async def stream_alerts(
+    tenant_id: str,
+    request: Request,
+    background_tasks: BackgroundTasks,
+    current_user = Depends(get_current_user)
+):
+    """
+    SSE endpoint for real-time alert and recommendation streaming
+    Supports both alerts and recommendations through unified stream
+    """
+    
+    # Verify user has access to this tenant
+    if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
+        raise HTTPException(403, "Access denied to this tenant")
+    
+    # Get SSE service from app state
+    sse_service = getattr(request.app.state, 'sse_service', None)
+    if not sse_service:
+        raise HTTPException(500, "SSE service not available")
+    
+    async def event_generator():
+        """Generate SSE events for the client"""
+        client_queue = asyncio.Queue(maxsize=100)  # Limit queue size
+        
+        try:
+            # Register client
+            await sse_service.add_client(tenant_id, client_queue)
+            
+            logger.info("SSE client connected", 
+                       tenant_id=tenant_id, 
+                       user_id=getattr(current_user, 'id', 'unknown'))
+            
+            # Stream events
+            while True:
+                # Check if client disconnected
+                if await request.is_disconnected():
+                    logger.info("SSE client disconnected", tenant_id=tenant_id)
+                    break
+                    
+                try:
+                    # Wait for events with timeout for keepalive
+                    event = await asyncio.wait_for(
+                        client_queue.get(),
+                        timeout=30.0
+                    )
+                    
+                    yield event
+                    
+                except asyncio.TimeoutError:
+                    # Send keepalive ping
+                    yield {
+                        "event": "ping",
+                        "data": json.dumps({
+                            "timestamp": datetime.utcnow().isoformat(),
+                            "status": "keepalive"
+                        }),
+                        "id": f"ping_{int(datetime.now().timestamp())}"
+                    }
+                    
+                except Exception as e:
+                    logger.error("Error in SSE event generator", 
+                               tenant_id=tenant_id, 
+                               error=str(e))
+                    break
+                    
+        except Exception as e:
+            logger.error("SSE connection error", 
+                        tenant_id=tenant_id, 
+                        error=str(e))
+        finally:
+            # Clean up on disconnect
+            try:
+                await sse_service.remove_client(tenant_id, client_queue)
+                logger.info("SSE client cleanup completed", tenant_id=tenant_id)
+            except Exception as e:
+                logger.error("Error cleaning up SSE client", 
+                           tenant_id=tenant_id, 
+                           error=str(e))
+    
+    return EventSourceResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",  # Disable nginx buffering
+        }
+    )
+
+@router.post("/items/{item_id}/acknowledge")
+async def acknowledge_item(
+    item_id: str,
+    current_user = Depends(get_current_user)
+):
+    """Acknowledge an alert or recommendation"""
+    try:
+        # This would update the database
+        # For now, just return success
+        
+        logger.info("Item acknowledged", 
+                   item_id=item_id, 
+                   user_id=getattr(current_user, 'id', 'unknown'))
+        
+        return {
+            "status": "success",
+            "item_id": item_id,
+            "acknowledged_by": getattr(current_user, 'id', 'unknown'),
+            "acknowledged_at": datetime.utcnow().isoformat()
+        }
+        
+    except Exception as e:
+        logger.error("Failed to acknowledge item", item_id=item_id, error=str(e))
+        raise HTTPException(500, "Failed to acknowledge item")
+
+@router.post("/items/{item_id}/resolve")
+async def resolve_item(
+    item_id: str,
+    current_user = Depends(get_current_user)
+):
+    """Resolve an alert or recommendation"""
+    try:
+        # This would update the database
+        # For now, just return success
+        
+        logger.info("Item resolved", 
+                   item_id=item_id, 
+                   user_id=getattr(current_user, 'id', 'unknown'))
+        
+        return {
+            "status": "success",
+            "item_id": item_id,
+            "resolved_by": getattr(current_user, 'id', 'unknown'),
+            "resolved_at": datetime.utcnow().isoformat()
+        }
+        
+    except Exception as e:
+        logger.error("Failed to resolve item", item_id=item_id, error=str(e))
+        raise HTTPException(500, "Failed to resolve item")
+
+@router.get("/status/{tenant_id}")
+async def get_sse_status(
+    tenant_id: str,
+    current_user = Depends(get_current_user)
+):
+    """Get SSE connection status for a tenant"""
+    
+    # Verify user has access to this tenant
+    if not hasattr(current_user, 'has_access_to_tenant') or not current_user.has_access_to_tenant(tenant_id):
+        raise HTTPException(403, "Access denied to this tenant")
+    
+    try:
+        # Get SSE service from app state
+        sse_service = getattr(request.app.state, 'sse_service', None)
+        if not sse_service:
+            return {"status": "unavailable", "message": "SSE service not initialized"}
+        
+        metrics = sse_service.get_metrics()
+        tenant_connections = len(sse_service.active_connections.get(tenant_id, set()))
+        
+        return {
+            "status": "available",
+            "tenant_id": tenant_id,
+            "connections": tenant_connections,
+            "total_connections": metrics["total_connections"],
+            "active_tenants": metrics["active_tenants"]
+        }
+        
+    except Exception as e:
+        logger.error("Failed to get SSE status", tenant_id=tenant_id, error=str(e))
+        raise HTTPException(500, "Failed to get SSE status")
--- a/services/notification/app/main.py
+++ b/services/notification/app/main.py
@@ -1,9 +1,9 @@
 # ================================================================
-# services/notification/app/main.py - COMPLETE IMPLEMENTATION
+# services/notification/app/main.py - ENHANCED WITH SSE SUPPORT
 # ================================================================
 """
 Notification Service Main Application
-Handles email and WhatsApp notifications with full integration
+Handles email, WhatsApp notifications and SSE for real-time alerts/recommendations
 """

 import structlog
@@ -15,7 +15,12 @@ from fastapi.responses import JSONResponse
 from app.core.config import settings
 from app.core.database import init_db
 from app.api.notifications import router as notification_router
+from app.api.sse_routes import router as sse_router
 from app.services.messaging import setup_messaging, cleanup_messaging
+from app.services.sse_service import SSEService
+from app.services.notification_orchestrator import NotificationOrchestrator
+from app.services.email_service import EmailService
+from app.services.whatsapp_service import WhatsAppService
 from shared.monitoring import setup_logging, HealthChecker
 from shared.monitoring.metrics import setup_metrics_early

@@ -30,8 +35,8 @@ health_checker = None
 # Create FastAPI app FIRST
 app = FastAPI(
    title="Bakery Notification Service",
-    description="Email and WhatsApp notification service for bakery forecasting platform",
-    version="1.0.0",
+    description="Email, WhatsApp and SSE notification service for bakery alerts and recommendations",
+    version="2.0.0",
    docs_url="/docs",
    redoc_url="/redoc"
 )
@@ -56,12 +61,36 @@ async def lifespan(app: FastAPI):
        await setup_messaging()
        logger.info("Messaging initialized")
        
+        # Initialize services
+        email_service = EmailService()
+        whatsapp_service = WhatsAppService()
+        
+        # Initialize SSE service
+        sse_service = SSEService(settings.REDIS_URL)
+        await sse_service.initialize()
+        logger.info("SSE service initialized")
+        
+        # Create orchestrator
+        orchestrator = NotificationOrchestrator(
+            email_service=email_service,
+            whatsapp_service=whatsapp_service,
+            sse_service=sse_service
+        )
+        
+        # Store services in app state
+        app.state.orchestrator = orchestrator
+        app.state.sse_service = sse_service
+        app.state.email_service = email_service
+        app.state.whatsapp_service = whatsapp_service
+        
        # Register custom metrics (metrics_collector already exists)
-        metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status"])
+        metrics_collector.register_counter("notifications_sent_total", "Total notifications sent", labels=["type", "status", "channel"])
        metrics_collector.register_counter("emails_sent_total", "Total emails sent", labels=["status"])
        metrics_collector.register_counter("whatsapp_sent_total", "Total WhatsApp messages sent", labels=["status"])
+        metrics_collector.register_counter("sse_events_sent_total", "Total SSE events sent", labels=["tenant", "event_type"])
        metrics_collector.register_histogram("notification_processing_duration_seconds", "Time spent processing notifications")
        metrics_collector.register_gauge("notification_queue_size", "Current notification queue size")
+        metrics_collector.register_gauge("sse_active_connections", "Number of active SSE connections")
        
        # Setup health checker
        health_checker = HealthChecker("notification-service")
@@ -93,14 +122,22 @@ async def lifespan(app: FastAPI):
        # Add WhatsApp service health check
        async def check_whatsapp_service():
            try:
-                from app.services.whatsapp_service import WhatsAppService
-                whatsapp_service = WhatsAppService()
                return await whatsapp_service.health_check()
            except Exception as e:
                return f"WhatsApp service error: {e}"
        
        health_checker.add_check("whatsapp_service", check_whatsapp_service, timeout=10.0, critical=False)
        
+        # Add SSE service health check
+        async def check_sse_service():
+            try:
+                metrics = sse_service.get_metrics()
+                return "healthy" if metrics["redis_connected"] else "Redis connection failed"
+            except Exception as e:
+                return f"SSE service error: {e}"
+        
+        health_checker.add_check("sse_service", check_sse_service, timeout=5.0, critical=True)
+        
        # Add messaging health check
        def check_messaging():
            try:
@@ -115,7 +152,7 @@ async def lifespan(app: FastAPI):
        # Store health checker in app state
        app.state.health_checker = health_checker
        
-        logger.info("Notification Service started successfully")
+        logger.info("Notification Service with SSE support started successfully")
        
    except Exception as e:
        logger.error(f"Failed to start Notification Service: {e}")
@@ -126,10 +163,15 @@ async def lifespan(app: FastAPI):
    # Shutdown
    logger.info("Shutting down Notification Service...")
    try:
+        # Shutdown SSE service
+        if hasattr(app.state, 'sse_service'):
+            await app.state.sse_service.shutdown()
+            logger.info("SSE service shutdown completed")
+            
        await cleanup_messaging()
        logger.info("Messaging cleanup completed")
    except Exception as e:
-        logger.error(f"Error during messaging cleanup: {e}")
+        logger.error(f"Error during shutdown: {e}")

 # Set lifespan AFTER metrics setup
 app.router.lifespan_context = lifespan
@@ -145,18 +187,30 @@ app.add_middleware(

 # Include routers
 app.include_router(notification_router, prefix="/api/v1", tags=["notifications"])
+app.include_router(sse_router, prefix="/api/v1", tags=["sse"])

 # Health check endpoint
@app.get("/health")
 async def health_check():
-    """Comprehensive health check endpoint"""
+    """Comprehensive health check endpoint including SSE"""
    if health_checker:
-        return await health_checker.check_health()
+        health_result = await health_checker.check_health()
+        
+        # Add SSE metrics to health check
+        if hasattr(app.state, 'sse_service'):
+            try:
+                sse_metrics = app.state.sse_service.get_metrics()
+                health_result['sse_metrics'] = sse_metrics
+            except Exception as e:
+                health_result['sse_error'] = str(e)
+        
+        return health_result
    else:
        return {
            "service": "notification-service",
            "status": "healthy",
-            "version": "1.0.0"
+            "version": "2.0.0",
+            "features": ["email", "whatsapp", "sse", "alerts", "recommendations"]
        }

 # Metrics endpoint
--- a/services/notification/app/services/email_service.py
+++ b/services/notification/app/services/email_service.py
@@ -276,14 +276,26 @@ class EmailService:
            
            # Test SMTP connection
            if self.smtp_ssl:
+                # Use implicit TLS/SSL connection (port 465 typically)
                server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port, use_tls=True)
+                await server.connect()
+                # No need for starttls() when using implicit TLS
            else:
+                # Use plain connection, optionally upgrade with STARTTLS
                server = aiosmtplib.SMTP(hostname=self.smtp_host, port=self.smtp_port)
-            
-            await server.connect()
-            
-            if self.smtp_tls:
-                await server.starttls()
+                await server.connect()
+                
+                if self.smtp_tls:
+                    # Try STARTTLS, but handle case where connection is already secure
+                    try:
+                        await server.starttls()
+                    except Exception as starttls_error:
+                        # If STARTTLS fails because connection is already using TLS, that's okay
+                        if "already using TLS" in str(starttls_error) or "already secure" in str(starttls_error):
+                            logger.debug("SMTP connection already secure, skipping STARTTLS")
+                        else:
+                            # Re-raise other STARTTLS errors
+                            raise starttls_error
            
            await server.login(self.smtp_user, self.smtp_password)
            await server.quit()
--- a/services/notification/app/services/notification_orchestrator.py
+++ b/services/notification/app/services/notification_orchestrator.py
@@ -0,0 +1,279 @@
+# services/notification/app/services/notification_orchestrator.py
+"""
+Notification orchestrator for managing delivery across all channels
+Includes SSE integration for real-time dashboard updates
+"""
+
+from typing import List, Dict, Any
+from datetime import datetime
+import structlog
+
+from .email_service import EmailService
+from .whatsapp_service import WhatsAppService
+from .sse_service import SSEService
+
+logger = structlog.get_logger()
+
+class NotificationOrchestrator:
+    """
+    Orchestrates delivery across all notification channels
+    Now includes SSE for real-time dashboard updates, with support for recommendations
+    """
+    
+    def __init__(
+        self,
+        email_service: EmailService,
+        whatsapp_service: WhatsAppService,
+        sse_service: SSEService,
+        push_service=None  # Optional push service
+    ):
+        self.email_service = email_service
+        self.whatsapp_service = whatsapp_service
+        self.sse_service = sse_service
+        self.push_service = push_service
+        
+    async def send_notification(
+        self,
+        tenant_id: str,
+        notification: Dict[str, Any],
+        channels: List[str]
+    ) -> Dict[str, Any]:
+        """
+        Send notification through specified channels
+        Channels can include: email, whatsapp, push, dashboard (SSE)
+        """
+        results = {}
+        
+        # Always send to dashboard for visibility (SSE)
+        if 'dashboard' in channels or notification.get('type') in ['alert', 'recommendation']:
+            try:
+                await self.sse_service.send_item_notification(
+                    tenant_id,
+                    notification
+                )
+                results['dashboard'] = {'status': 'sent', 'timestamp': datetime.utcnow().isoformat()}
+                logger.info("Item sent to dashboard via SSE", 
+                          tenant_id=tenant_id, 
+                          item_type=notification.get('type'),
+                          item_id=notification.get('id'))
+            except Exception as e:
+                logger.error("Failed to send to dashboard", 
+                           tenant_id=tenant_id, 
+                           error=str(e))
+                results['dashboard'] = {'status': 'failed', 'error': str(e)}
+        
+        # Send to email channel
+        if 'email' in channels:
+            try:
+                email_result = await self.email_service.send_notification_email(
+                    to_email=notification.get('email'),
+                    subject=notification.get('title'),
+                    template_data={
+                        'title': notification.get('title'),
+                        'message': notification.get('message'),
+                        'severity': notification.get('severity'),
+                        'item_type': notification.get('type'),
+                        'actions': notification.get('actions', []),
+                        'metadata': notification.get('metadata', {}),
+                        'timestamp': datetime.utcnow().isoformat()
+                    },
+                    notification_type=notification.get('type', 'alert')
+                )
+                results['email'] = email_result
+            except Exception as e:
+                logger.error("Failed to send email", 
+                           tenant_id=tenant_id, 
+                           error=str(e))
+                results['email'] = {'status': 'failed', 'error': str(e)}
+                
+        # Send to WhatsApp channel
+        if 'whatsapp' in channels:
+            try:
+                whatsapp_result = await self.whatsapp_service.send_notification_message(
+                    to_phone=notification.get('phone'),
+                    message=self._format_whatsapp_message(notification),
+                    notification_type=notification.get('type', 'alert')
+                )
+                results['whatsapp'] = whatsapp_result
+            except Exception as e:
+                logger.error("Failed to send WhatsApp", 
+                           tenant_id=tenant_id, 
+                           error=str(e))
+                results['whatsapp'] = {'status': 'failed', 'error': str(e)}
+                
+        # Send to push notification channel
+        if 'push' in channels and self.push_service:
+            try:
+                push_result = await self.push_service.send_notification(
+                    user_id=notification.get('user_id'),
+                    title=notification.get('title'),
+                    body=notification.get('message'),
+                    data={
+                        'item_type': notification.get('type'),
+                        'severity': notification.get('severity'),
+                        'item_id': notification.get('id'),
+                        'metadata': notification.get('metadata', {})
+                    }
+                )
+                results['push'] = push_result
+            except Exception as e:
+                logger.error("Failed to send push notification", 
+                           tenant_id=tenant_id, 
+                           error=str(e))
+                results['push'] = {'status': 'failed', 'error': str(e)}
+        
+        # Log summary
+        successful_channels = [ch for ch, result in results.items() if result.get('status') == 'sent']
+        failed_channels = [ch for ch, result in results.items() if result.get('status') == 'failed']
+        
+        logger.info("Notification delivery completed", 
+                  tenant_id=tenant_id,
+                  item_type=notification.get('type'),
+                  item_id=notification.get('id'),
+                  successful_channels=successful_channels,
+                  failed_channels=failed_channels,
+                  total_channels=len(channels))
+        
+        return {
+            'status': 'completed',
+            'successful_channels': successful_channels,
+            'failed_channels': failed_channels,
+            'results': results,
+            'timestamp': datetime.utcnow().isoformat()
+        }
+    
+    def _format_whatsapp_message(self, notification: Dict[str, Any]) -> str:
+        """Format message for WhatsApp with emojis and structure"""
+        item_type = notification.get('type', 'alert')
+        severity = notification.get('severity', 'medium')
+        
+        # Get appropriate emoji
+        type_emoji = '🚨' if item_type == 'alert' else '💡'
+        severity_emoji = {
+            'urgent': '🔴',
+            'high': '🟡', 
+            'medium': '🔵',
+            'low': '🟢'
+        }.get(severity, '🔵')
+        
+        message = f"{type_emoji} {severity_emoji} *{notification.get('title', 'Notificación')}*\n\n"
+        message += f"{notification.get('message', '')}\n"
+        
+        # Add actions if available
+        actions = notification.get('actions', [])
+        if actions and len(actions) > 0:
+            message += "\n*Acciones sugeridas:*\n"
+            for i, action in enumerate(actions[:3], 1):  # Limit to 3 actions for WhatsApp
+                message += f"{i}. {action}\n"
+        
+        # Add timestamp
+        message += f"\n_Enviado: {datetime.now().strftime('%H:%M, %d/%m/%Y')}_"
+        
+        return message
+    
+    def get_channels_by_severity(self, severity: str, item_type: str, hour: int = None) -> List[str]:
+        """
+        Determine notification channels based on severity and item_type
+        Now includes 'dashboard' as a channel
+        """
+        if hour is None:
+            hour = datetime.now().hour
+            
+        # Dashboard always gets all items
+        channels = ['dashboard']
+        
+        if item_type == 'alert':
+            if severity == 'urgent':
+                # Urgent alerts: All channels immediately
+                channels.extend(['email', 'whatsapp', 'push'])
+                
+            elif severity == 'high':
+                # High alerts: Email and WhatsApp during extended hours
+                if 6 <= hour <= 22:
+                    channels.extend(['email', 'whatsapp'])
+                else:
+                    channels.append('email')  # Email only during night
+                
+            elif severity == 'medium':
+                # Medium alerts: Email during business hours
+                if 7 <= hour <= 20:
+                    channels.append('email')
+                    
+        elif item_type == 'recommendation':
+            # Recommendations: Generally less urgent, respect business hours
+            if severity in ['medium', 'high']:
+                if 8 <= hour <= 19:  # Stricter business hours for recommendations
+                    channels.append('email')
+            # Low/urgent: Dashboard only (urgent rare for recommendations)
+        
+        return channels
+    
+    async def health_check(self) -> Dict[str, Any]:
+        """Check health of all notification channels"""
+        health_status = {
+            'status': 'healthy',
+            'channels': {},
+            'timestamp': datetime.utcnow().isoformat()
+        }
+        
+        # Check email service
+        try:
+            email_health = await self.email_service.health_check()
+            health_status['channels']['email'] = email_health
+        except Exception as e:
+            health_status['channels']['email'] = {'status': 'unhealthy', 'error': str(e)}
+        
+        # Check WhatsApp service
+        try:
+            whatsapp_health = await self.whatsapp_service.health_check()
+            health_status['channels']['whatsapp'] = whatsapp_health
+        except Exception as e:
+            health_status['channels']['whatsapp'] = {'status': 'unhealthy', 'error': str(e)}
+        
+        # Check SSE service
+        try:
+            sse_metrics = self.sse_service.get_metrics()
+            sse_status = 'healthy' if sse_metrics['redis_connected'] else 'unhealthy'
+            health_status['channels']['sse'] = {
+                'status': sse_status,
+                'metrics': sse_metrics
+            }
+        except Exception as e:
+            health_status['channels']['sse'] = {'status': 'unhealthy', 'error': str(e)}
+        
+        # Check push service if available
+        if self.push_service:
+            try:
+                push_health = await self.push_service.health_check()
+                health_status['channels']['push'] = push_health
+            except Exception as e:
+                health_status['channels']['push'] = {'status': 'unhealthy', 'error': str(e)}
+        
+        # Determine overall status
+        unhealthy_channels = [
+            ch for ch, status in health_status['channels'].items() 
+            if status.get('status') != 'healthy'
+        ]
+        
+        if unhealthy_channels:
+            health_status['status'] = 'degraded' if len(unhealthy_channels) < len(health_status['channels']) else 'unhealthy'
+            health_status['unhealthy_channels'] = unhealthy_channels
+        
+        return health_status
+    
+    def get_metrics(self) -> Dict[str, Any]:
+        """Get aggregated metrics from all services"""
+        metrics = {
+            'timestamp': datetime.utcnow().isoformat(),
+            'channels': {}
+        }
+        
+        # Get SSE metrics
+        try:
+            metrics['channels']['sse'] = self.sse_service.get_metrics()
+        except Exception as e:
+            logger.error("Failed to get SSE metrics", error=str(e))
+        
+        # Additional metrics could be added here for other services
+        
+        return metrics
--- a/services/notification/app/services/sse_service.py
+++ b/services/notification/app/services/sse_service.py
@@ -0,0 +1,256 @@
+# services/notification/app/services/sse_service.py
+"""
+Server-Sent Events service for real-time notifications
+Integrated within the notification service for alerts and recommendations
+"""
+
+import asyncio
+from redis.asyncio import Redis
+import json
+from typing import Dict, Set, Any
+from datetime import datetime
+import structlog
+
+logger = structlog.get_logger()
+
+class SSEService:
+    """
+    Server-Sent Events service for real-time notifications
+    Handles both alerts and recommendations through unified SSE streams
+    """
+    
+    def __init__(self, redis_url: str):
+        self.redis_url = redis_url
+        self.redis = None
+        self.active_connections: Dict[str, Set[asyncio.Queue]] = {}
+        self.pubsub_tasks: Dict[str, asyncio.Task] = {}
+        
+    async def initialize(self):
+        """Initialize Redis connection"""
+        try:
+            self.redis = Redis.from_url(self.redis_url)
+            logger.info("SSE Service initialized with Redis connection")
+        except Exception as e:
+            logger.error("Failed to initialize SSE service", error=str(e))
+            raise
+        
+    async def shutdown(self):
+        """Clean shutdown"""
+        try:
+            # Cancel all pubsub tasks
+            for task in self.pubsub_tasks.values():
+                if not task.done():
+                    task.cancel()
+                    try:
+                        await task
+                    except asyncio.CancelledError:
+                        pass
+            
+            # Close all client connections
+            for tenant_id, connections in self.active_connections.items():
+                for queue in connections.copy():
+                    try:
+                        await queue.put({"event": "shutdown", "data": json.dumps({"status": "server_shutdown"})})
+                    except:
+                        pass
+            
+            # Close Redis connection
+            if self.redis:
+                await self.redis.close()
+                
+            logger.info("SSE Service shutdown completed")
+            
+        except Exception as e:
+            logger.error("Error during SSE shutdown", error=str(e))
+            
+    async def add_client(self, tenant_id: str, client_queue: asyncio.Queue):
+        """Add a new SSE client connection"""
+        try:
+            if tenant_id not in self.active_connections:
+                self.active_connections[tenant_id] = set()
+                # Start pubsub listener for this tenant if not exists
+                if tenant_id not in self.pubsub_tasks:
+                    task = asyncio.create_task(self._listen_to_tenant_channel(tenant_id))
+                    self.pubsub_tasks[tenant_id] = task
+                    
+            self.active_connections[tenant_id].add(client_queue)
+            
+            client_count = len(self.active_connections[tenant_id])
+            logger.info("SSE client added", 
+                       tenant_id=tenant_id, 
+                       total_clients=client_count)
+            
+            # Send connection confirmation
+            await client_queue.put({
+                "event": "connected",
+                "data": json.dumps({
+                    "status": "connected",
+                    "tenant_id": tenant_id,
+                    "timestamp": datetime.utcnow().isoformat(),
+                    "client_count": client_count
+                })
+            })
+            
+            # Send any active items (alerts and recommendations)
+            active_items = await self.get_active_items(tenant_id)
+            if active_items:
+                await client_queue.put({
+                    "event": "initial_items",
+                    "data": json.dumps(active_items)
+                })
+                
+        except Exception as e:
+            logger.error("Error adding SSE client", tenant_id=tenant_id, error=str(e))
+    
+    async def remove_client(self, tenant_id: str, client_queue: asyncio.Queue):
+        """Remove SSE client connection"""
+        try:
+            if tenant_id in self.active_connections:
+                self.active_connections[tenant_id].discard(client_queue)
+                
+                # If no more clients for this tenant, stop the pubsub listener
+                if not self.active_connections[tenant_id]:
+                    del self.active_connections[tenant_id]
+                    if tenant_id in self.pubsub_tasks:
+                        task = self.pubsub_tasks[tenant_id]
+                        if not task.done():
+                            task.cancel()
+                        del self.pubsub_tasks[tenant_id]
+                        
+            logger.info("SSE client removed", tenant_id=tenant_id)
+            
+        except Exception as e:
+            logger.error("Error removing SSE client", tenant_id=tenant_id, error=str(e))
+    
+    async def _listen_to_tenant_channel(self, tenant_id: str):
+        """Listen to Redis channel for tenant-specific items"""
+        try:
+            # Create a separate Redis connection for pubsub
+            pubsub_redis = Redis.from_url(self.redis_url)
+            pubsub = pubsub_redis.pubsub()
+            channel = f"alerts:{tenant_id}"
+            await pubsub.subscribe(channel)
+            
+            logger.info("Started listening to tenant channel", 
+                       tenant_id=tenant_id, 
+                       channel=channel)
+            
+            async for message in pubsub.listen():
+                if message["type"] == "message":
+                    # Broadcast to all connected clients for this tenant
+                    await self.broadcast_to_tenant(tenant_id, message["data"])
+                    
+        except asyncio.CancelledError:
+            logger.info("Stopped listening to tenant channel", tenant_id=tenant_id)
+        except Exception as e:
+            logger.error("Error in pubsub listener", tenant_id=tenant_id, error=str(e))
+        finally:
+            try:
+                await pubsub.unsubscribe(channel)
+                await pubsub_redis.close()
+            except:
+                pass
+    
+    async def broadcast_to_tenant(self, tenant_id: str, message: str):
+        """Broadcast message to all connected clients of a tenant"""
+        if tenant_id not in self.active_connections:
+            return
+            
+        try:
+            item_data = json.loads(message)
+            event = {
+                "event": item_data.get('item_type', 'item'),  # 'alert' or 'recommendation'
+                "data": json.dumps(item_data),
+                "id": item_data.get("id")
+            }
+            
+            # Send to all connected clients
+            disconnected = []
+            for client_queue in self.active_connections[tenant_id]:
+                try:
+                    # Use put_nowait to avoid blocking
+                    client_queue.put_nowait(event)
+                except asyncio.QueueFull:
+                    logger.warning("Client queue full, dropping message", tenant_id=tenant_id)
+                    disconnected.append(client_queue)
+                except Exception as e:
+                    logger.warning("Failed to send to client", tenant_id=tenant_id, error=str(e))
+                    disconnected.append(client_queue)
+                    
+            # Clean up disconnected clients
+            for queue in disconnected:
+                await self.remove_client(tenant_id, queue)
+                
+            if disconnected:
+                logger.info("Cleaned up disconnected clients", 
+                           tenant_id=tenant_id, 
+                           count=len(disconnected))
+                
+        except Exception as e:
+            logger.error("Error broadcasting to tenant", tenant_id=tenant_id, error=str(e))
+    
+    async def send_item_notification(self, tenant_id: str, item: Dict[str, Any]):
+        """
+        Send alert or recommendation via SSE (called by notification orchestrator)
+        """
+        try:
+            # Publish to Redis for SSE streaming
+            channel = f"alerts:{tenant_id}"
+            
+            item_message = {
+                'id': item.get('id'),
+                'item_type': item.get('type'),  # 'alert' or 'recommendation'
+                'type': item.get('alert_type', item.get('type')),
+                'severity': item.get('severity'),
+                'title': item.get('title'),
+                'message': item.get('message'),
+                'actions': item.get('actions', []),
+                'metadata': item.get('metadata', {}),
+                'timestamp': item.get('timestamp', datetime.utcnow().isoformat()),
+                'status': 'active'
+            }
+            
+            await self.redis.publish(channel, json.dumps(item_message))
+            
+            logger.info("Item published to SSE", 
+                       tenant_id=tenant_id, 
+                       item_type=item.get('type'),
+                       item_id=item.get('id'))
+            
+        except Exception as e:
+            logger.error("Error sending item notification via SSE", 
+                        tenant_id=tenant_id, 
+                        error=str(e))
+    
+    async def get_active_items(self, tenant_id: str) -> list:
+        """Fetch active alerts and recommendations from database"""
+        try:
+            # This would integrate with the actual database
+            # For now, return empty list as placeholder
+            # In real implementation, this would query the alerts table
+            
+            # Example query:
+            # query = """
+            #     SELECT id, item_type, alert_type, severity, title, message, 
+            #            actions, metadata, created_at, status
+            #     FROM alerts 
+            #     WHERE tenant_id = $1 
+            #     AND status = 'active'
+            #     ORDER BY severity_weight DESC, created_at DESC
+            #     LIMIT 50
+            # """
+            
+            return []  # Placeholder
+            
+        except Exception as e:
+            logger.error("Error fetching active items", tenant_id=tenant_id, error=str(e))
+            return []
+    
+    def get_metrics(self) -> Dict[str, Any]:
+        """Get SSE service metrics"""
+        return {
+            "active_tenants": len(self.active_connections),
+            "total_connections": sum(len(connections) for connections in self.active_connections.values()),
+            "active_listeners": len(self.pubsub_tasks),
+            "redis_connected": self.redis and not self.redis.closed
+        }
--- a/services/notification/app/services/whatsapp_service.py
+++ b/services/notification/app/services/whatsapp_service.py
@@ -30,6 +30,17 @@ class WhatsAppService:
        self.from_number = settings.WHATSAPP_FROM_NUMBER
        self.enabled = settings.ENABLE_WHATSAPP_NOTIFICATIONS
    
+    def _parse_api_credentials(self):
+        """Parse API key into username and password for Twilio basic auth"""
+        if not self.api_key or ":" not in self.api_key:
+            raise ValueError("WhatsApp API key must be in format 'username:password'")
+        
+        api_parts = self.api_key.split(":", 1)
+        if len(api_parts) != 2:
+            raise ValueError("Invalid WhatsApp API key format")
+        
+        return api_parts[0], api_parts[1]
+    
    async def send_message(
        self,
        to_phone: str,
@@ -181,10 +192,22 @@ class WhatsAppService:
                return False
            
            # Test API connectivity with a simple request
+            # Parse API key (expected format: username:password for Twilio basic auth)
+            if ":" not in self.api_key:
+                logger.error("WhatsApp API key must be in format 'username:password'")
+                return False
+                
+            api_parts = self.api_key.split(":", 1)  # Split on first : only
+            if len(api_parts) != 2:
+                logger.error("Invalid WhatsApp API key format")
+                return False
+                
+            username, password = api_parts
+            
            async with httpx.AsyncClient(timeout=10.0) as client:
                response = await client.get(
                    f"{self.base_url}/v1/Account",  # Twilio account info endpoint
-                    auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
+                    auth=(username, password)
                )
                
                if response.status_code == 200:
@@ -206,6 +229,13 @@ class WhatsAppService:
    async def _send_text_message(self, to_phone: str, message: str) -> bool:
        """Send regular text message via Twilio"""
        try:
+            # Parse API credentials
+            try:
+                username, password = self._parse_api_credentials()
+            except ValueError as e:
+                logger.error(f"WhatsApp API key configuration error: {e}")
+                return False
+            
            # Prepare request data
            data = {
                "From": f"whatsapp:{self.from_number}",
@@ -216,9 +246,9 @@ class WhatsAppService:
            # Send via Twilio API
            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.post(
-                    f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
+                    f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
                    data=data,
-                    auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
+                    auth=(username, password)
                )
                
                if response.status_code == 201:
@@ -245,6 +275,13 @@ class WhatsAppService:
    ) -> bool:
        """Send WhatsApp template message via Twilio"""
        try:
+            # Parse API credentials
+            try:
+                username, password = self._parse_api_credentials()
+            except ValueError as e:
+                logger.error(f"WhatsApp API key configuration error: {e}")
+                return False
+            
            # Prepare template data
            content_variables = {str(i+1): param for i, param in enumerate(parameters)}
            
@@ -258,9 +295,9 @@ class WhatsAppService:
            # Send via Twilio API
            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.post(
-                    f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages.json",
+                    f"{self.base_url}/2010-04-01/Accounts/{username}/Messages.json",
                    data=data,
-                    auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
+                    auth=(username, password)
                )
                
                if response.status_code == 201:
@@ -315,10 +352,17 @@ class WhatsAppService:
    async def _get_message_status(self, message_sid: str) -> Optional[str]:
        """Get message delivery status from Twilio"""
        try:
+            # Parse API credentials
+            try:
+                username, password = self._parse_api_credentials()
+            except ValueError as e:
+                logger.error(f"WhatsApp API key configuration error: {e}")
+                return None
+            
            async with httpx.AsyncClient(timeout=10.0) as client:
                response = await client.get(
-                    f"{self.base_url}/2010-04-01/Accounts/{self.api_key.split(':')[0]}/Messages/{message_sid}.json",
-                    auth=(self.api_key.split(":")[0], self.api_key.split(":")[1])
+                    f"{self.base_url}/2010-04-01/Accounts/{username}/Messages/{message_sid}.json",
+                    auth=(username, password)
                )
                
                if response.status_code == 200:
--- a/services/notification/requirements.txt
+++ b/services/notification/requirements.txt
@@ -3,6 +3,7 @@ fastapi==0.104.1
 uvicorn[standard]==0.24.0
 pydantic==2.5.0
 pydantic-settings==2.1.0
+sse-starlette==1.6.5

 # Database
 sqlalchemy==2.0.23
@@ -22,8 +23,9 @@ aiofiles==23.2.1
 aiosmtplib==3.0.1
 email-validator==2.1.0

-# Messaging
+# Messaging & Redis
 aio-pika==9.3.1
+redis==5.0.1

 # Template Engine
 jinja2==3.1.2
--- a/services/orders/README.md
+++ b/services/orders/README.md
@@ -1,248 +0,0 @@
-# Orders Service
-
-Customer orders and procurement planning service for the bakery management system.
-
-## Overview
-
-The Orders Service handles all order-related operations including:
-
- **Customer Management**: Complete customer lifecycle and relationship management
- **Order Processing**: End-to-end order management from creation to fulfillment
- **Procurement Planning**: Automated procurement requirement calculation and planning
- **Business Intelligence**: Order pattern analysis and business model detection
- **Dashboard Analytics**: Comprehensive reporting and metrics for order operations
-
-## Features
-
-### Core Capabilities
- Customer registration and management with detailed profiles
- Order creation, tracking, and status management
- Automated demand requirements calculation for production planning
- Procurement planning with supplier coordination
- Business model detection (individual bakery vs central bakery)
- Comprehensive dashboard with real-time metrics
- Integration with production, inventory, suppliers, and sales services
-
-### API Endpoints
-
-#### Dashboard & Analytics
- `GET /api/v1/tenants/{tenant_id}/orders/dashboard-summary` - Comprehensive dashboard data
- `GET /api/v1/tenants/{tenant_id}/orders/demand-requirements` - Demand analysis for production
- `GET /api/v1/tenants/{tenant_id}/orders/business-model` - Business model detection
-
-#### Order Management
- `POST /api/v1/tenants/{tenant_id}/orders` - Create new customer order
- `GET /api/v1/tenants/{tenant_id}/orders` - List orders with filtering and pagination
- `GET /api/v1/tenants/{tenant_id}/orders/{order_id}` - Get order details with items
- `PUT /api/v1/tenants/{tenant_id}/orders/{order_id}/status` - Update order status
-
-#### Customer Management
- `POST /api/v1/tenants/{tenant_id}/customers` - Create new customer
- `GET /api/v1/tenants/{tenant_id}/customers` - List customers with filtering
- `GET /api/v1/tenants/{tenant_id}/customers/{customer_id}` - Get customer details
-
-#### Health & Status
- `GET /api/v1/tenants/{tenant_id}/orders/status` - Service status information
-
-## Service Integration
-
-### Shared Clients Used
- **InventoryServiceClient**: Stock levels, product availability validation
- **ProductionServiceClient**: Production notifications, capacity planning
- **SalesServiceClient**: Historical sales data for demand forecasting
- **NotificationServiceClient**: Customer notifications and alerts
-
-### Authentication
-Uses shared authentication patterns with tenant isolation:
- JWT token validation
- Tenant access verification
- User permission checks
-
-## Configuration
-
-Key configuration options in `app/core/config.py`:
-
-### Order Processing
- `ORDER_PROCESSING_ENABLED`: Enable automatic order processing (default: true)
- `AUTO_APPROVE_ORDERS`: Automatically approve orders (default: false)
- `MAX_ORDER_ITEMS`: Maximum items per order (default: 50)
-
-### Procurement Planning
- `PROCUREMENT_PLANNING_ENABLED`: Enable procurement planning (default: true)
- `PROCUREMENT_LEAD_TIME_DAYS`: Standard procurement lead time (default: 3)
- `DEMAND_FORECAST_DAYS`: Days for demand forecasting (default: 14)
- `SAFETY_STOCK_PERCENTAGE`: Safety stock buffer (default: 20%)
-
-### Business Model Detection
- `ENABLE_BUSINESS_MODEL_DETECTION`: Enable automatic detection (default: true)
- `CENTRAL_BAKERY_ORDER_THRESHOLD`: Order threshold for central bakery (default: 20)
- `INDIVIDUAL_BAKERY_ORDER_THRESHOLD`: Order threshold for individual bakery (default: 5)
-
-### Customer Management
- `CUSTOMER_VALIDATION_ENABLED`: Enable customer validation (default: true)
- `MAX_CUSTOMERS_PER_TENANT`: Maximum customers per tenant (default: 10000)
- `CUSTOMER_CREDIT_CHECK_ENABLED`: Enable credit checking (default: false)
-
-### Order Validation
- `MIN_ORDER_VALUE`: Minimum order value (default: 0.0)
- `MAX_ORDER_VALUE`: Maximum order value (default: 100000.0)
- `VALIDATE_PRODUCT_AVAILABILITY`: Check product availability (default: true)
-
-### Alert Thresholds
- `HIGH_VALUE_ORDER_THRESHOLD`: High-value order alert (default: 5000.0)
- `LARGE_QUANTITY_ORDER_THRESHOLD`: Large quantity alert (default: 100)
- `RUSH_ORDER_HOURS_THRESHOLD`: Rush order time threshold (default: 24)
- `PROCUREMENT_SHORTAGE_THRESHOLD`: Procurement shortage alert (default: 90%)
-
-### Payment and Pricing
- `PAYMENT_VALIDATION_ENABLED`: Enable payment validation (default: true)
- `DYNAMIC_PRICING_ENABLED`: Enable dynamic pricing (default: false)
- `DISCOUNT_ENABLED`: Enable discounts (default: true)
- `MAX_DISCOUNT_PERCENTAGE`: Maximum discount allowed (default: 50%)
-
-### Delivery and Fulfillment
- `DELIVERY_TRACKING_ENABLED`: Enable delivery tracking (default: true)
- `DEFAULT_DELIVERY_WINDOW_HOURS`: Default delivery window (default: 48)
- `PICKUP_ENABLED`: Enable pickup orders (default: true)
- `DELIVERY_ENABLED`: Enable delivery orders (default: true)
-
-## Database Models
-
-### Customer
- Complete customer profile with contact information
- Business type classification (individual, business, central_bakery)
- Payment terms and credit management
- Order history and metrics tracking
- Delivery preferences and special requirements
-
-### CustomerOrder
- Comprehensive order tracking from creation to delivery
- Status management with full audit trail
- Financial calculations including discounts and taxes
- Delivery scheduling and fulfillment tracking
- Business model detection and categorization
- Customer communication preferences
-
-### OrderItem
- Detailed line item tracking with product specifications
- Customization and special instruction support
- Production requirement integration
- Cost tracking and margin analysis
- Quality control integration
-
-### OrderStatusHistory
- Complete audit trail of order status changes
- Event tracking with detailed context
- User attribution and change reasons
- Customer notification tracking
-
-### ProcurementPlan
- Master procurement planning with business model context
- Supplier diversification and risk assessment
- Performance tracking and cost analysis
- Integration with demand forecasting
-
-### ProcurementRequirement
- Detailed procurement requirements per product/ingredient
- Current inventory level integration
- Supplier preference and lead time management
- Quality specifications and special requirements
-
-### OrderAlert
- Comprehensive alert system for order issues
- Multiple severity levels with appropriate routing
- Business impact assessment
- Resolution tracking and performance metrics
-
-## Business Logic
-
-### Order Processing Flow
-1. **Order Creation**: Validate customer, calculate totals, create order record
-2. **Item Processing**: Create order items with specifications and requirements
-3. **Status Tracking**: Maintain complete audit trail of status changes
-4. **Customer Metrics**: Update customer statistics and relationship data
-5. **Business Model Detection**: Analyze patterns to determine bakery type
-6. **Alert Generation**: Check for high-value, rush, or large orders
-7. **Service Integration**: Notify production and inventory services
-
-### Procurement Planning
-1. **Demand Analysis**: Aggregate orders by delivery date and products
-2. **Inventory Integration**: Check current stock levels and reservations
-3. **Requirement Calculation**: Calculate net procurement needs with safety buffer
-4. **Supplier Coordination**: Match requirements with preferred suppliers
-5. **Lead Time Planning**: Account for supplier lead times and delivery windows
-6. **Risk Assessment**: Evaluate supply risks and backup options
-
-### Business Model Detection
- **Individual Bakery**: Low order volume, direct customer sales, standard products
- **Central Bakery**: High volume, wholesale operations, bulk orders
- **Detection Factors**: Order frequency, quantity, customer types, sales channels
-
-## Alert System
-
-### Alert Types
- **High Value Orders**: Orders exceeding configured thresholds
- **Rush Orders**: Orders with tight delivery requirements
- **Large Quantity Orders**: Orders with unusually high item counts
- **Payment Issues**: Payment validation failures or credit problems
- **Procurement Shortages**: Insufficient inventory for order fulfillment
- **Customer Issues**: New customers, credit limit exceedances, special requirements
-
-### Severity Levels
- **Critical**: WhatsApp + Email + Dashboard + SMS
- **High**: WhatsApp + Email + Dashboard
- **Medium**: Email + Dashboard
- **Low**: Dashboard only
-
-## Development
-
-### Setup
-```bash
-# Install dependencies
-pip install -r requirements.txt
-
-# Set up database
-# Configure ORDERS_DATABASE_URL environment variable
-
-# Run migrations
-alembic upgrade head
-
-# Start service
-uvicorn app.main:app --reload
-```
-
-### Testing
-```bash
-# Run tests
-pytest
-
-# Run with coverage
-pytest --cov=app
-```
-
-### Docker
-```bash
-# Build image
-docker build -t orders-service .
-
-# Run container
-docker run -p 8000:8000 orders-service
-```
-
-## Deployment
-
-The service is designed for containerized deployment with:
- Health checks at `/health`
- Structured logging
- Metrics collection
- Database migrations
- Service discovery integration
-
-## Architecture
-
-Follows Domain-Driven Microservices Architecture:
- Clean separation of concerns
- Repository pattern for data access
- Service layer for business logic
- API layer for external interface
- Shared infrastructure for cross-cutting concerns
--- a/services/orders/app/core/database.py
+++ b/services/orders/app/core/database.py
@@ -5,7 +5,7 @@
 Orders Service Database Configuration
 """

-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, text
 from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
 from sqlalchemy.orm import sessionmaker, DeclarativeBase
 import structlog
@@ -72,7 +72,7 @@ async def get_db_health() -> bool:
    """Check database health"""
    try:
        async with async_engine.begin() as conn:
-            await conn.execute("SELECT 1")
+            await conn.execute(text("SELECT 1"))
        return True
    except Exception as e:
        logger.error("Database health check failed", error=str(e))
--- a/services/pos/README.md
+++ b/services/pos/README.md
@@ -1,138 +0,0 @@
-# POS Integration Service
-
-This service handles integration with external Point of Sale (POS) systems for the Bakery IA platform.
-
-## Supported POS Systems
-
- **Square POS** - Popular payment and POS solution with strong API support
- **Toast POS** - Restaurant-focused POS system with comprehensive features
- **Lightspeed Restaurant** - Full-featured restaurant management system
-
-## Features
-
- **Real-time webhook handling** from POS systems
- **Bidirectional data synchronization** with sales service
- **Secure credential management** with encryption
- **Multi-tenant support** with tenant-specific configurations
- **Comprehensive transaction logging** and audit trails
- **Automatic duplicate detection** and handling
- **Rate limiting and retry mechanisms** for reliability
-
-## Architecture
-
-The POS service follows the established microservices architecture:
-
-```
-POS Service
-├── API Layer (FastAPI)
-├── Business Logic (Services)
-├── Data Access (Repositories)
-├── External Integrations (POS Providers)
-├── Webhook Handlers
-└── Background Sync Jobs
-```
-
-## API Endpoints
-
-### Configuration Management
- `GET /api/v1/tenants/{tenant_id}/pos/configurations` - List POS configurations
- `POST /api/v1/tenants/{tenant_id}/pos/configurations` - Create new configuration
- `PUT /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Update configuration
- `DELETE /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}` - Delete configuration
-
-### Webhook Handling
- `POST /api/v1/webhooks/{pos_system}` - Receive webhooks from POS systems
- `GET /api/v1/webhooks/{pos_system}/status` - Get webhook status
-
-### Data Synchronization
- `POST /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync` - Trigger sync
- `GET /api/v1/tenants/{tenant_id}/pos/configurations/{config_id}/sync/status` - Get sync status
- `GET /api/v1/tenants/{tenant_id}/pos/transactions` - Get POS transactions
-
-## Database Schema
-
-### Core Tables
- `pos_configurations` - POS system configurations per tenant
- `pos_transactions` - Transaction data from POS systems
- `pos_transaction_items` - Individual items within transactions
- `pos_webhook_logs` - Webhook event logs
- `pos_sync_logs` - Synchronization operation logs
-
-## Environment Variables
-
-See `app/core/config.py` for all configuration options. Key variables include:
-
-```bash
-# Database
-POS_DATABASE_URL=postgresql+asyncpg://pos_user:pos_pass123@pos-db:5432/pos_db
-
-# POS Provider Credentials
-SQUARE_APPLICATION_ID=your_square_app_id
-SQUARE_ACCESS_TOKEN=your_square_token
-TOAST_CLIENT_ID=your_toast_client_id
-LIGHTSPEED_CLIENT_ID=your_lightspeed_client_id
-
-# Webhook Configuration
-WEBHOOK_BASE_URL=https://your-domain.com
-WEBHOOK_SECRET=your_webhook_secret
-```
-
-## Development
-
-### Running the Service
-
-```bash
-# Using Docker Compose (recommended)
-docker-compose up pos-service
-
-# Local development
-cd services/pos
-pip install -r requirements.txt
-uvicorn app.main:app --reload --port 8000
-```
-
-### Database Migrations
-
-```bash
-# Create migration
-alembic revision --autogenerate -m "Description"
-
-# Apply migrations
-alembic upgrade head
-```
-
-### Testing
-
-```bash
-# Run tests
-pytest tests/
-
-# Run with coverage
-pytest --cov=app tests/
-```
-
-## Security Considerations
-
- POS credentials are encrypted before storage
- Webhook signatures are verified for authenticity
- All API endpoints require tenant-based authentication
- Rate limiting prevents abuse
- Sensitive data is logged with appropriate redaction
-
-## Monitoring
-
-The service includes comprehensive monitoring:
-
- Health check endpoints
- Prometheus metrics
- Structured logging
- Performance tracking
- Error rate monitoring
-
-## Integration Flow
-
-1. **Configuration**: Set up POS system credentials via API
-2. **Webhook Registration**: Register webhook URLs with POS providers
-3. **Real-time Events**: Receive and process webhook events
-4. **Data Sync**: Periodic synchronization of transaction data
-5. **Sales Integration**: Forward processed data to sales service
--- a/services/production/README.md
+++ b/services/production/README.md
@@ -1,187 +0,0 @@
-# Production Service
-
-Production planning and batch management service for the bakery management system.
-
-## Overview
-
-The Production Service handles all production-related operations including:
-
- **Production Planning**: Calculate daily requirements using demand forecasts and inventory levels
- **Batch Management**: Track production batches from start to finish  
- **Capacity Management**: Equipment, staff, and time scheduling
- **Quality Control**: Yield tracking, waste management, efficiency metrics
- **Alert System**: Comprehensive monitoring and notifications
-
-## Features
-
-### Core Capabilities
- Daily production requirements calculation
- Production batch lifecycle management
- Real-time capacity planning and utilization
- Quality control tracking and metrics
- Comprehensive alert system with multiple severity levels
- Integration with inventory, orders, recipes, and sales services
-
-### API Endpoints
-
-#### Dashboard & Planning
- `GET /api/v1/tenants/{tenant_id}/production/dashboard-summary` - Production dashboard data
- `GET /api/v1/tenants/{tenant_id}/production/daily-requirements` - Daily production planning
- `GET /api/v1/tenants/{tenant_id}/production/requirements` - Requirements for procurement
-
-#### Batch Management
- `POST /api/v1/tenants/{tenant_id}/production/batches` - Create production batch
- `GET /api/v1/tenants/{tenant_id}/production/batches/active` - Get active batches
- `GET /api/v1/tenants/{tenant_id}/production/batches/{batch_id}` - Get batch details
- `PUT /api/v1/tenants/{tenant_id}/production/batches/{batch_id}/status` - Update batch status
-
-#### Scheduling & Capacity
- `GET /api/v1/tenants/{tenant_id}/production/schedule` - Production schedule
- `GET /api/v1/tenants/{tenant_id}/production/capacity/status` - Capacity status
-
-#### Alerts & Monitoring
- `GET /api/v1/tenants/{tenant_id}/production/alerts` - Production alerts
- `POST /api/v1/tenants/{tenant_id}/production/alerts/{alert_id}/acknowledge` - Acknowledge alerts
-
-#### Analytics
- `GET /api/v1/tenants/{tenant_id}/production/metrics/yield` - Yield metrics
-
-## Service Integration
-
-### Shared Clients Used
- **InventoryServiceClient**: Stock levels, ingredient availability
- **OrdersServiceClient**: Demand requirements, customer orders
- **RecipesServiceClient**: Recipe requirements, ingredient calculations
- **SalesServiceClient**: Historical sales data
- **NotificationServiceClient**: Alert notifications
-
-### Authentication
-Uses shared authentication patterns with tenant isolation:
- JWT token validation
- Tenant access verification
- User permission checks
-
-## Configuration
-
-Key configuration options in `app/core/config.py`:
-
-### Production Planning
- `PLANNING_HORIZON_DAYS`: Days ahead for planning (default: 7)
- `PRODUCTION_BUFFER_PERCENTAGE`: Safety buffer for production (default: 10%)
- `MINIMUM_BATCH_SIZE`: Minimum batch size (default: 1.0)
- `MAXIMUM_BATCH_SIZE`: Maximum batch size (default: 100.0)
-
-### Capacity Management
- `DEFAULT_WORKING_HOURS_PER_DAY`: Standard working hours (default: 12)
- `MAX_OVERTIME_HOURS`: Maximum overtime allowed (default: 4)
- `CAPACITY_UTILIZATION_TARGET`: Target utilization (default: 85%)
-
-### Quality Control
- `MINIMUM_YIELD_PERCENTAGE`: Minimum acceptable yield (default: 85%)
- `QUALITY_SCORE_THRESHOLD`: Minimum quality score (default: 8.0)
-
-### Alert Thresholds
- `CAPACITY_EXCEEDED_THRESHOLD`: Capacity alert threshold (default: 100%)
- `PRODUCTION_DELAY_THRESHOLD_MINUTES`: Delay alert threshold (default: 60)
- `LOW_YIELD_ALERT_THRESHOLD`: Low yield alert (default: 80%)
-
-## Database Models
-
-### ProductionBatch
- Complete batch tracking from planning to completion
- Status management (pending, in_progress, completed, etc.)
- Cost tracking and yield calculations
- Quality metrics integration
-
-### ProductionSchedule
- Daily production scheduling
- Capacity planning and tracking
- Staff and equipment assignments
- Performance metrics
-
-### ProductionCapacity
- Resource availability tracking
- Equipment and staff capacity
- Maintenance scheduling
- Utilization monitoring
-
-### QualityCheck
- Quality control measurements
- Pass/fail tracking
- Defect recording
- Corrective action management
-
-### ProductionAlert
- Comprehensive alert system
- Multiple severity levels
- Action recommendations
- Resolution tracking
-
-## Alert System
-
-### Alert Types
- **Capacity Exceeded**: When production requirements exceed available capacity
- **Production Delay**: When batches are delayed beyond thresholds
- **Cost Spike**: When production costs exceed normal ranges
- **Low Yield**: When yield percentages fall below targets
- **Quality Issues**: When quality scores consistently decline
- **Equipment Maintenance**: When equipment needs maintenance
-
-### Severity Levels
- **Critical**: WhatsApp + Email + Dashboard + SMS
- **High**: WhatsApp + Email + Dashboard
- **Medium**: Email + Dashboard
- **Low**: Dashboard only
-
-## Development
-
-### Setup
-```bash
-# Install dependencies
-pip install -r requirements.txt
-
-# Set up database
-# Configure DATABASE_URL environment variable
-
-# Run migrations
-alembic upgrade head
-
-# Start service
-uvicorn app.main:app --reload
-```
-
-### Testing
-```bash
-# Run tests
-pytest
-
-# Run with coverage
-pytest --cov=app
-```
-
-### Docker
-```bash
-# Build image
-docker build -t production-service .
-
-# Run container
-docker run -p 8000:8000 production-service
-```
-
-## Deployment
-
-The service is designed for containerized deployment with:
- Health checks at `/health`
- Structured logging
- Metrics collection
- Database migrations
- Service discovery integration
-
-## Architecture
-
-Follows Domain-Driven Microservices Architecture:
- Clean separation of concerns
- Repository pattern for data access
- Service layer for business logic
- API layer for external interface
- Shared infrastructure for cross-cutting concerns
--- a/services/production/app/main.py
+++ b/services/production/app/main.py
@@ -14,6 +14,7 @@ import structlog
 from app.core.config import settings
 from app.core.database import init_database, get_db_health
 from app.api.production import router as production_router
+from app.services.production_alert_service import ProductionAlertService

 # Configure logging
 logger = structlog.get_logger()
@@ -25,6 +26,16 @@ async def lifespan(app: FastAPI):
    # Startup
    try:
        await init_database()
+        logger.info("Database initialized")
+        
+        # Initialize alert service
+        alert_service = ProductionAlertService(settings)
+        await alert_service.start()
+        logger.info("Production alert service started")
+        
+        # Store alert service in app state
+        app.state.alert_service = alert_service
+        
        logger.info("Production service started successfully")
    except Exception as e:
        logger.error("Failed to initialize production service", error=str(e))
@@ -34,6 +45,13 @@ async def lifespan(app: FastAPI):
    
    # Shutdown
    logger.info("Production service shutting down")
+    try:
+        # Stop alert service
+        if hasattr(app.state, 'alert_service'):
+            await app.state.alert_service.stop()
+            logger.info("Alert service stopped")
+    except Exception as e:
+        logger.error("Error during shutdown", error=str(e))


 # Create FastAPI application
--- a/services/production/app/services/production_alert_service.py
+++ b/services/production/app/services/production_alert_service.py
@@ -0,0 +1,795 @@
+# services/production/app/services/production_alert_service.py
+"""
+Production-specific alert and recommendation detection service
+Monitors production capacity, delays, quality issues, and optimization opportunities
+"""
+
+import json
+from typing import List, Dict, Any, Optional
+from uuid import UUID
+from datetime import datetime, timedelta
+import structlog
+from apscheduler.triggers.cron import CronTrigger
+
+from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
+from shared.alerts.templates import format_item_message
+
+logger = structlog.get_logger()
+
+class ProductionAlertService(BaseAlertService, AlertServiceMixin):
+    """Production service alert and recommendation detection"""
+    
+    def setup_scheduled_checks(self):
+        """Production-specific scheduled checks for alerts and recommendations"""
+        
+        # Production capacity checks - every 10 minutes during business hours (alerts)
+        self.scheduler.add_job(
+            self.check_production_capacity,
+            CronTrigger(minute='*/10', hour='6-20'),
+            id='capacity_check',
+            misfire_grace_time=60,
+            max_instances=1
+        )
+        
+        # Production delays - every 5 minutes during production hours (alerts)
+        self.scheduler.add_job(
+            self.check_production_delays,
+            CronTrigger(minute='*/5', hour='4-22'),
+            id='delay_check',
+            misfire_grace_time=30,
+            max_instances=1
+        )
+        
+        # Quality issues check - every 15 minutes (alerts)
+        self.scheduler.add_job(
+            self.check_quality_issues,
+            CronTrigger(minute='*/15'),
+            id='quality_check',
+            misfire_grace_time=60,
+            max_instances=1
+        )
+        
+        # Equipment monitoring - every 3 minutes (alerts)
+        self.scheduler.add_job(
+            self.check_equipment_status,
+            CronTrigger(minute='*/3'),
+            id='equipment_check',
+            misfire_grace_time=30,
+            max_instances=1
+        )
+        
+        # Efficiency recommendations - every 30 minutes (recommendations)
+        self.scheduler.add_job(
+            self.generate_efficiency_recommendations,
+            CronTrigger(minute='*/30'),
+            id='efficiency_recs',
+            misfire_grace_time=120,
+            max_instances=1
+        )
+        
+        # Energy optimization - every hour (recommendations)
+        self.scheduler.add_job(
+            self.generate_energy_recommendations,
+            CronTrigger(minute='0'),
+            id='energy_recs',
+            misfire_grace_time=300,
+            max_instances=1
+        )
+        
+        logger.info("Production alert schedules configured", 
+                   service=self.config.SERVICE_NAME)
+    
+    async def check_production_capacity(self):
+        """Check if production plan exceeds capacity (alerts)"""
+        try:
+            self._checks_performed += 1
+            
+            query = """
+                WITH capacity_analysis AS (
+                    SELECT 
+                        p.tenant_id,
+                        p.planned_date,
+                        SUM(p.planned_quantity) as total_planned,
+                        MAX(pc.daily_capacity) as max_daily_capacity,
+                        COUNT(DISTINCT p.equipment_id) as equipment_count,
+                        AVG(pc.efficiency_percent) as avg_efficiency,
+                        CASE 
+                            WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 1.2 THEN 'severe_overload'
+                            WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) THEN 'overload'
+                            WHEN SUM(p.planned_quantity) > MAX(pc.daily_capacity) * 0.9 THEN 'near_capacity'
+                            ELSE 'normal'
+                        END as capacity_status,
+                        (SUM(p.planned_quantity) / MAX(pc.daily_capacity)) * 100 as capacity_percentage
+                    FROM production_schedule p
+                    JOIN production_capacity pc ON pc.equipment_id = p.equipment_id
+                    WHERE p.planned_date >= CURRENT_DATE 
+                    AND p.planned_date <= CURRENT_DATE + INTERVAL '3 days'
+                    AND p.status IN ('planned', 'in_progress')
+                    AND p.tenant_id = $1
+                    GROUP BY p.tenant_id, p.planned_date
+                )
+                SELECT * FROM capacity_analysis 
+                WHERE capacity_status != 'normal'
+                ORDER BY capacity_percentage DESC
+            """
+            
+            # Check production capacity without tenant dependencies
+            try:
+                from sqlalchemy import text
+                # Simplified query using only existing production tables
+                simplified_query = text("""
+                    SELECT 
+                        pb.tenant_id,
+                        DATE(pb.planned_start_time) as planned_date,
+                        COUNT(*) as batch_count,
+                        SUM(pb.planned_quantity) as total_planned,
+                        'capacity_check' as capacity_status
+                    FROM production_batches pb
+                    WHERE pb.planned_start_time >= CURRENT_DATE 
+                    AND pb.planned_start_time <= CURRENT_DATE + INTERVAL '3 days'
+                    AND pb.status IN ('planned', 'pending', 'in_progress')
+                    GROUP BY pb.tenant_id, DATE(pb.planned_start_time)
+                    HAVING COUNT(*) > 10  -- Alert if more than 10 batches per day
+                    ORDER BY total_planned DESC
+                """)
+                
+                async with self.db_manager.get_session() as session:
+                    result = await session.execute(simplified_query)
+                    capacity_issues = result.fetchall()
+                
+                for issue in capacity_issues:
+                    await self._process_capacity_issue(issue.tenant_id, issue)
+                    
+            except Exception as e:
+                logger.debug("Simplified capacity check failed", error=str(e))
+                    
+        except Exception as e:
+            # Skip capacity checks if tables don't exist (graceful degradation)
+            if "does not exist" in str(e):
+                logger.debug("Capacity check skipped - missing tables", error=str(e))
+            else:
+                logger.error("Capacity check failed", error=str(e))
+                self._errors_count += 1
+    
+    async def _process_capacity_issue(self, tenant_id: UUID, issue: Dict[str, Any]):
+        """Process capacity overload issue"""
+        try:
+            status = issue['capacity_status']
+            percentage = issue['capacity_percentage']
+            
+            if status == 'severe_overload':
+                template_data = self.format_spanish_message(
+                    'order_overload',
+                    percentage=int(percentage - 100)
+                )
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'severe_capacity_overload',
+                    'severity': 'urgent',
+                    'title': template_data['title'],
+                    'message': template_data['message'],
+                    'actions': template_data['actions'],
+                    'metadata': {
+                        'planned_date': issue['planned_date'].isoformat(),
+                        'capacity_percentage': float(percentage),
+                        'overload_percentage': float(percentage - 100),
+                        'equipment_count': issue['equipment_count']
+                    }
+                }, item_type='alert')
+                
+            elif status == 'overload':
+                severity = self.get_business_hours_severity('high')
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'capacity_overload',
+                    'severity': severity,
+                    'title': f'⚠️ Capacidad Excedida: {percentage:.0f}%',
+                    'message': f'Producción planificada para {issue["planned_date"]} excede capacidad en {percentage-100:.0f}%.',
+                    'actions': ['Redistribuir cargas', 'Ampliar turnos', 'Subcontratar', 'Posponer pedidos'],
+                    'metadata': {
+                        'planned_date': issue['planned_date'].isoformat(),
+                        'capacity_percentage': float(percentage),
+                        'equipment_count': issue['equipment_count']
+                    }
+                }, item_type='alert')
+                
+            elif status == 'near_capacity':
+                severity = self.get_business_hours_severity('medium')
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'near_capacity',
+                    'severity': severity,
+                    'title': f'📊 Cerca de Capacidad Máxima: {percentage:.0f}%',
+                    'message': f'Producción del {issue["planned_date"]} está al {percentage:.0f}% de capacidad. Monitorear de cerca.',
+                    'actions': ['Revisar planificación', 'Preparar contingencias', 'Optimizar eficiencia'],
+                    'metadata': {
+                        'planned_date': issue['planned_date'].isoformat(),
+                        'capacity_percentage': float(percentage)
+                    }
+                }, item_type='alert')
+                
+        except Exception as e:
+            logger.error("Error processing capacity issue", error=str(e))
+    
+    async def check_production_delays(self):
+        """Check for production delays (alerts)"""
+        try:
+            self._checks_performed += 1
+            
+            # Simplified query without customer_orders dependency
+            query = """
+                SELECT 
+                    pb.id, pb.tenant_id, pb.product_name, pb.batch_number,
+                    pb.planned_end_time as planned_completion_time, pb.actual_start_time,
+                    pb.actual_end_time as estimated_completion_time, pb.status,
+                    EXTRACT(minutes FROM (NOW() - pb.planned_end_time)) as delay_minutes,
+                    COALESCE(pb.priority::text, 'medium') as priority_level,
+                    1 as affected_orders  -- Default to 1 since we can't count orders
+                FROM production_batches pb
+                WHERE pb.status IN ('in_progress', 'delayed')
+                AND (
+                    (pb.planned_end_time < NOW() AND pb.status = 'in_progress')
+                    OR pb.status = 'delayed'
+                )
+                AND pb.planned_end_time > NOW() - INTERVAL '24 hours'
+                ORDER BY 
+                    CASE COALESCE(pb.priority::text, 'medium') 
+                        WHEN 'urgent' THEN 1 WHEN 'high' THEN 2 ELSE 3 
+                    END,
+                    delay_minutes DESC
+            """
+            
+            from sqlalchemy import text
+            async with self.db_manager.get_session() as session:
+                result = await session.execute(text(query))
+                delays = result.fetchall()
+            
+            for delay in delays:
+                await self._process_production_delay(delay)
+                
+        except Exception as e:
+            # Skip delay checks if tables don't exist (graceful degradation)
+            if "does not exist" in str(e):
+                logger.debug("Production delay check skipped - missing tables", error=str(e))
+            else:
+                logger.error("Production delay check failed", error=str(e))
+                self._errors_count += 1
+    
+    async def _process_production_delay(self, delay: Dict[str, Any]):
+        """Process production delay"""
+        try:
+            delay_minutes = delay['delay_minutes']
+            priority = delay['priority_level']
+            affected_orders = delay['affected_orders']
+            
+            # Determine severity based on delay time and priority
+            if delay_minutes > 120 or priority == 'urgent':
+                severity = 'urgent'
+            elif delay_minutes > 60 or priority == 'high':
+                severity = 'high'
+            elif delay_minutes > 30:
+                severity = 'medium'
+            else:
+                severity = 'low'
+            
+            template_data = self.format_spanish_message(
+                'production_delay',
+                batch_name=f"{delay['product_name']} #{delay['batch_number']}",
+                delay_minutes=int(delay_minutes)
+            )
+            
+            await self.publish_item(delay['tenant_id'], {
+                'type': 'production_delay',
+                'severity': severity,
+                'title': template_data['title'],
+                'message': template_data['message'],
+                'actions': template_data['actions'],
+                'metadata': {
+                    'batch_id': str(delay['id']),
+                    'product_name': delay['product_name'],
+                    'batch_number': delay['batch_number'],
+                    'delay_minutes': delay_minutes,
+                    'priority_level': priority,
+                    'affected_orders': affected_orders,
+                    'planned_completion': delay['planned_completion_time'].isoformat()
+                }
+            }, item_type='alert')
+            
+        except Exception as e:
+            logger.error("Error processing production delay", 
+                       batch_id=str(delay.get('id')), 
+                       error=str(e))
+    
+    async def check_quality_issues(self):
+        """Check for quality control issues (alerts)"""
+        try:
+            self._checks_performed += 1
+            
+            # Fixed query using actual quality_checks table structure
+            query = """
+                SELECT 
+                    qc.id, qc.tenant_id, qc.batch_id, qc.check_type as test_type,
+                    qc.quality_score as result_value, 
+                    qc.target_weight as min_acceptable, 
+                    (qc.target_weight * (1 + qc.tolerance_percentage/100)) as max_acceptable,
+                    CASE 
+                        WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 'critical'
+                        WHEN qc.pass_fail = false THEN 'major'
+                        ELSE 'minor'
+                    END as qc_severity,
+                    qc.created_at,
+                    pb.product_name, pb.batch_number,
+                    COUNT(*) OVER (PARTITION BY qc.batch_id) as total_failures
+                FROM quality_checks qc
+                JOIN production_batches pb ON pb.id = qc.batch_id
+                WHERE qc.pass_fail = false  -- Use pass_fail instead of status
+                AND qc.created_at > NOW() - INTERVAL '4 hours'
+                AND qc.corrective_action_needed = true  -- Use this instead of acknowledged
+                ORDER BY 
+                    CASE 
+                        WHEN qc.pass_fail = false AND qc.defect_count > 5 THEN 1
+                        WHEN qc.pass_fail = false THEN 2 
+                        ELSE 3 
+                    END,
+                    qc.created_at DESC
+            """
+            
+            from sqlalchemy import text
+            async with self.db_manager.get_session() as session:
+                result = await session.execute(text(query))
+                quality_issues = result.fetchall()
+            
+            for issue in quality_issues:
+                await self._process_quality_issue(issue)
+                
+        except Exception as e:
+            # Skip quality checks if tables don't exist (graceful degradation)
+            if "does not exist" in str(e) or "column" in str(e).lower() and "does not exist" in str(e).lower():
+                logger.debug("Quality check skipped - missing tables or columns", error=str(e))
+            else:
+                logger.error("Quality check failed", error=str(e))
+                self._errors_count += 1
+    
+    async def _process_quality_issue(self, issue: Dict[str, Any]):
+        """Process quality control failure"""
+        try:
+            qc_severity = issue['qc_severity']
+            total_failures = issue['total_failures']
+            
+            # Map QC severity to alert severity
+            if qc_severity == 'critical' or total_failures > 2:
+                severity = 'urgent'
+            elif qc_severity == 'major':
+                severity = 'high'
+            else:
+                severity = 'medium'
+            
+            await self.publish_item(issue['tenant_id'], {
+                'type': 'quality_control_failure',
+                'severity': severity,
+                'title': f'❌ Fallo Control Calidad: {issue["product_name"]}',
+                'message': f'Lote {issue["batch_number"]} falló en {issue["test_type"]}. Valor: {issue["result_value"]} (rango: {issue["min_acceptable"]}-{issue["max_acceptable"]})',
+                'actions': ['Revisar lote', 'Repetir prueba', 'Ajustar proceso', 'Documentar causa'],
+                'metadata': {
+                    'quality_check_id': str(issue['id']),
+                    'batch_id': str(issue['batch_id']),
+                    'test_type': issue['test_type'],
+                    'result_value': float(issue['result_value']),
+                    'min_acceptable': float(issue['min_acceptable']),
+                    'max_acceptable': float(issue['max_acceptable']),
+                    'qc_severity': qc_severity,
+                    'total_failures': total_failures
+                }
+            }, item_type='alert')
+            
+            # Mark as acknowledged to avoid duplicates
+            await self.db_manager.execute(
+                "UPDATE quality_checks SET acknowledged = true WHERE id = $1",
+                issue['id']
+            )
+            
+        except Exception as e:
+            logger.error("Error processing quality issue", 
+                       quality_check_id=str(issue.get('id')), 
+                       error=str(e))
+    
+    async def check_equipment_status(self):
+        """Check equipment status and failures (alerts)"""
+        # Equipment tables don't exist in production database - skip this check
+        logger.debug("Equipment check skipped - equipment tables not available in production database")
+        return
+    
+    async def _process_equipment_issue(self, equipment: Dict[str, Any]):
+        """Process equipment issue"""
+        try:
+            status = equipment['status']
+            efficiency = equipment.get('efficiency_percent', 100)
+            days_to_maintenance = equipment.get('days_to_maintenance', 30)
+            
+            if status == 'error':
+                template_data = self.format_spanish_message(
+                    'equipment_failure',
+                    equipment_name=equipment['name']
+                )
+                
+                await self.publish_item(equipment['tenant_id'], {
+                    'type': 'equipment_failure',
+                    'severity': 'urgent',
+                    'title': template_data['title'],
+                    'message': template_data['message'],
+                    'actions': template_data['actions'],
+                    'metadata': {
+                        'equipment_id': str(equipment['id']),
+                        'equipment_name': equipment['name'],
+                        'equipment_type': equipment['type'],
+                        'error_count': equipment.get('error_count', 0),
+                        'last_reading': equipment.get('last_reading').isoformat() if equipment.get('last_reading') else None
+                    }
+                }, item_type='alert')
+                
+            elif status == 'maintenance_required' or days_to_maintenance <= 1:
+                severity = 'high' if days_to_maintenance <= 1 else 'medium'
+                
+                await self.publish_item(equipment['tenant_id'], {
+                    'type': 'maintenance_required',
+                    'severity': severity,
+                    'title': f'🔧 Mantenimiento Requerido: {equipment["name"]}',
+                    'message': f'Equipo {equipment["name"]} requiere mantenimiento en {days_to_maintenance} días.',
+                    'actions': ['Programar mantenimiento', 'Revisar historial', 'Preparar repuestos', 'Planificar parada'],
+                    'metadata': {
+                        'equipment_id': str(equipment['id']),
+                        'days_to_maintenance': days_to_maintenance,
+                        'last_maintenance': equipment.get('last_maintenance').isoformat() if equipment.get('last_maintenance') else None
+                    }
+                }, item_type='alert')
+                
+            elif efficiency < 80:
+                severity = 'medium' if efficiency < 70 else 'low'
+                
+                await self.publish_item(equipment['tenant_id'], {
+                    'type': 'low_equipment_efficiency',
+                    'severity': severity,
+                    'title': f'📉 Baja Eficiencia: {equipment["name"]}',
+                    'message': f'Eficiencia del {equipment["name"]} bajó a {efficiency:.1f}%. Revisar funcionamiento.',
+                    'actions': ['Revisar configuración', 'Limpiar equipo', 'Calibrar sensores', 'Revisar mantenimiento'],
+                    'metadata': {
+                        'equipment_id': str(equipment['id']),
+                        'efficiency_percent': float(efficiency),
+                        'temperature': equipment.get('temperature'),
+                        'vibration_level': equipment.get('vibration_level')
+                    }
+                }, item_type='alert')
+                
+        except Exception as e:
+            logger.error("Error processing equipment issue", 
+                       equipment_id=str(equipment.get('id')), 
+                       error=str(e))
+    
+    async def generate_efficiency_recommendations(self):
+        """Generate production efficiency recommendations"""
+        try:
+            self._checks_performed += 1
+            
+            # Analyze production patterns for efficiency opportunities
+            query = """
+                WITH efficiency_analysis AS (
+                    SELECT 
+                        pb.tenant_id, pb.product_name,
+                        AVG(EXTRACT(minutes FROM (pb.actual_completion_time - pb.actual_start_time))) as avg_production_time,
+                        AVG(pb.planned_duration_minutes) as avg_planned_duration,
+                        COUNT(*) as batch_count,
+                        AVG(pb.yield_percentage) as avg_yield,
+                        EXTRACT(hour FROM pb.actual_start_time) as start_hour
+                    FROM production_batches pb
+                    WHERE pb.status = 'completed'
+                    AND pb.actual_completion_time > CURRENT_DATE - INTERVAL '30 days'
+                    AND pb.tenant_id = $1
+                    GROUP BY pb.tenant_id, pb.product_name, EXTRACT(hour FROM pb.actual_start_time)
+                    HAVING COUNT(*) >= 3
+                ),
+                recommendations AS (
+                    SELECT *,
+                        CASE 
+                            WHEN avg_production_time > avg_planned_duration * 1.2 THEN 'reduce_production_time'
+                            WHEN avg_yield < 85 THEN 'improve_yield'
+                            WHEN start_hour BETWEEN 14 AND 16 AND avg_production_time > avg_planned_duration * 1.1 THEN 'avoid_afternoon_production'
+                            ELSE null
+                        END as recommendation_type,
+                        (avg_production_time - avg_planned_duration) / avg_planned_duration * 100 as efficiency_loss_percent
+                    FROM efficiency_analysis
+                )
+                SELECT * FROM recommendations 
+                WHERE recommendation_type IS NOT NULL
+                AND efficiency_loss_percent > 10
+                ORDER BY efficiency_loss_percent DESC
+            """
+            
+            tenants = await self.get_active_tenants()
+            
+            for tenant_id in tenants:
+                try:
+                    from sqlalchemy import text
+                    async with self.db_manager.get_session() as session:
+                        result = await session.execute(text(query), {"tenant_id": tenant_id})
+                        recommendations = result.fetchall()
+                    
+                    for rec in recommendations:
+                        await self._generate_efficiency_recommendation(tenant_id, rec)
+                        
+                except Exception as e:
+                    logger.error("Error generating efficiency recommendations", 
+                               tenant_id=str(tenant_id), 
+                               error=str(e))
+                    
+        except Exception as e:
+            logger.error("Efficiency recommendations failed", error=str(e))
+            self._errors_count += 1
+    
+    async def _generate_efficiency_recommendation(self, tenant_id: UUID, rec: Dict[str, Any]):
+        """Generate specific efficiency recommendation"""
+        try:
+            if not self.should_send_recommendation(tenant_id, rec['recommendation_type']):
+                return
+            
+            rec_type = rec['recommendation_type']
+            efficiency_loss = rec['efficiency_loss_percent']
+            
+            if rec_type == 'reduce_production_time':
+                template_data = self.format_spanish_message(
+                    'production_efficiency',
+                    suggested_time=f"{rec['start_hour']:02d}:00",
+                    savings_percent=efficiency_loss
+                )
+                
+                await self.publish_item(tenant_id, {
+                    'type': 'production_efficiency',
+                    'severity': 'medium',
+                    'title': template_data['title'],
+                    'message': template_data['message'],
+                    'actions': template_data['actions'],
+                    'metadata': {
+                        'product_name': rec['product_name'],
+                        'avg_production_time': float(rec['avg_production_time']),
+                        'avg_planned_duration': float(rec['avg_planned_duration']),
+                        'efficiency_loss_percent': float(efficiency_loss),
+                        'batch_count': rec['batch_count'],
+                        'recommendation_type': rec_type
+                    }
+                }, item_type='recommendation')
+                
+            elif rec_type == 'improve_yield':
+                await self.publish_item(tenant_id, {
+                    'type': 'yield_improvement',
+                    'severity': 'medium',
+                    'title': f'📈 Mejorar Rendimiento: {rec["product_name"]}',
+                    'message': f'Rendimiento promedio del {rec["product_name"]} es {rec["avg_yield"]:.1f}%. Oportunidad de mejora.',
+                    'actions': ['Revisar receta', 'Optimizar proceso', 'Entrenar personal', 'Verificar ingredientes'],
+                    'metadata': {
+                        'product_name': rec['product_name'],
+                        'avg_yield': float(rec['avg_yield']),
+                        'batch_count': rec['batch_count'],
+                        'recommendation_type': rec_type
+                    }
+                }, item_type='recommendation')
+                
+            elif rec_type == 'avoid_afternoon_production':
+                await self.publish_item(tenant_id, {
+                    'type': 'schedule_optimization',
+                    'severity': 'low',
+                    'title': f'⏰ Optimizar Horario: {rec["product_name"]}',
+                    'message': f'Producción de {rec["product_name"]} en horario {rec["start_hour"]}:00 muestra menor eficiencia.',
+                    'actions': ['Cambiar horario', 'Analizar causas', 'Revisar personal', 'Optimizar ambiente'],
+                    'metadata': {
+                        'product_name': rec['product_name'],
+                        'start_hour': rec['start_hour'],
+                        'efficiency_loss_percent': float(efficiency_loss),
+                        'recommendation_type': rec_type
+                    }
+                }, item_type='recommendation')
+                
+        except Exception as e:
+            logger.error("Error generating efficiency recommendation", 
+                       product_name=rec.get('product_name'), 
+                       error=str(e))
+    
+    async def generate_energy_recommendations(self):
+        """Generate energy optimization recommendations"""
+        try:
+            # Analyze energy consumption patterns
+            query = """
+                SELECT 
+                    e.tenant_id, e.name as equipment_name, e.type,
+                    AVG(ec.energy_consumption_kwh) as avg_energy,
+                    EXTRACT(hour FROM ec.recorded_at) as hour_of_day,
+                    COUNT(*) as readings_count
+                FROM equipment e
+                JOIN energy_consumption ec ON ec.equipment_id = e.id
+                WHERE ec.recorded_at > CURRENT_DATE - INTERVAL '30 days'
+                AND e.tenant_id = $1
+                GROUP BY e.tenant_id, e.id, EXTRACT(hour FROM ec.recorded_at)
+                HAVING COUNT(*) >= 10
+                ORDER BY avg_energy DESC
+            """
+            
+            tenants = await self.get_active_tenants()
+            
+            for tenant_id in tenants:
+                try:
+                    from sqlalchemy import text
+                    async with self.db_manager.get_session() as session:
+                        result = await session.execute(text(query), {"tenant_id": tenant_id})
+                        energy_data = result.fetchall()
+                    
+                    # Analyze for peak hours and optimization opportunities
+                    await self._analyze_energy_patterns(tenant_id, energy_data)
+                    
+                except Exception as e:
+                    logger.error("Error generating energy recommendations", 
+                               tenant_id=str(tenant_id), 
+                               error=str(e))
+                    
+        except Exception as e:
+            logger.error("Energy recommendations failed", error=str(e))
+            self._errors_count += 1
+    
+    async def _analyze_energy_patterns(self, tenant_id: UUID, energy_data: List[Dict[str, Any]]):
+        """Analyze energy consumption patterns for optimization"""
+        try:
+            if not energy_data:
+                return
+            
+            # Group by equipment and find peak hours
+            equipment_data = {}
+            for record in energy_data:
+                equipment = record['equipment_name']
+                if equipment not in equipment_data:
+                    equipment_data[equipment] = []
+                equipment_data[equipment].append(record)
+            
+            for equipment, records in equipment_data.items():
+                # Find peak consumption hours
+                peak_hour_record = max(records, key=lambda x: x['avg_energy'])
+                off_peak_records = [r for r in records if r['hour_of_day'] < 7 or r['hour_of_day'] > 22]
+                
+                if off_peak_records and peak_hour_record['avg_energy'] > 0:
+                    min_off_peak = min(off_peak_records, key=lambda x: x['avg_energy'])
+                    potential_savings = ((peak_hour_record['avg_energy'] - min_off_peak['avg_energy']) / 
+                                       peak_hour_record['avg_energy']) * 100
+                    
+                    if potential_savings > 15:  # More than 15% potential savings
+                        template_data = self.format_spanish_message(
+                            'energy_optimization',
+                            start_time=f"{min_off_peak['hour_of_day']:02d}:00",
+                            end_time=f"{min_off_peak['hour_of_day']+2:02d}:00",
+                            savings_euros=potential_savings * 0.15  # Rough estimate
+                        )
+                        
+                        await self.publish_item(tenant_id, {
+                            'type': 'energy_optimization',
+                            'severity': 'low',
+                            'title': template_data['title'],
+                            'message': template_data['message'],
+                            'actions': template_data['actions'],
+                            'metadata': {
+                                'equipment_name': equipment,
+                                'peak_hour': peak_hour_record['hour_of_day'],
+                                'optimal_hour': min_off_peak['hour_of_day'],
+                                'potential_savings_percent': float(potential_savings),
+                                'peak_consumption': float(peak_hour_record['avg_energy']),
+                                'optimal_consumption': float(min_off_peak['avg_energy'])
+                            }
+                        }, item_type='recommendation')
+                        
+        except Exception as e:
+            logger.error("Error analyzing energy patterns", error=str(e))
+    
+    async def register_db_listeners(self, conn):
+        """Register production-specific database listeners"""
+        try:
+            await conn.add_listener('production_alerts', self.handle_production_db_alert)
+            
+            logger.info("Database listeners registered", 
+                       service=self.config.SERVICE_NAME)
+        except Exception as e:
+            logger.error("Failed to register database listeners", 
+                        service=self.config.SERVICE_NAME, 
+                        error=str(e))
+    
+    async def handle_production_db_alert(self, connection, pid, channel, payload):
+        """Handle production alert from database trigger"""
+        try:
+            data = json.loads(payload)
+            tenant_id = UUID(data['tenant_id'])
+            
+            template_data = self.format_spanish_message(
+                'production_delay',
+                batch_name=f"{data['product_name']} #{data.get('batch_number', 'N/A')}",
+                delay_minutes=data['delay_minutes']
+            )
+            
+            await self.publish_item(tenant_id, {
+                'type': 'production_delay',
+                'severity': 'high',
+                'title': template_data['title'],
+                'message': template_data['message'],
+                'actions': template_data['actions'],
+                'metadata': {
+                    'batch_id': data['batch_id'],
+                    'delay_minutes': data['delay_minutes'],
+                    'trigger_source': 'database'
+                }
+            }, item_type='alert')
+            
+        except Exception as e:
+            logger.error("Error handling production DB alert", error=str(e))
+    
+    async def start_event_listener(self):
+        """Listen for production-affecting events"""
+        try:
+            # Subscribe to inventory events that might affect production
+            await self.rabbitmq_client.consume_events(
+                "bakery_events",
+                f"production.inventory.{self.config.SERVICE_NAME}",
+                "inventory.critical_shortage",
+                self.handle_inventory_shortage
+            )
+            
+            logger.info("Event listeners started", 
+                       service=self.config.SERVICE_NAME)
+        except Exception as e:
+            logger.error("Failed to start event listeners", 
+                        service=self.config.SERVICE_NAME, 
+                        error=str(e))
+    
+    async def handle_inventory_shortage(self, message):
+        """Handle critical inventory shortage affecting production"""
+        try:
+            shortage = json.loads(message.body)
+            tenant_id = UUID(shortage['tenant_id'])
+            
+            # Check if this ingredient affects any current production
+            affected_batches = await self.get_affected_production_batches(
+                shortage['ingredient_id']
+            )
+            
+            if affected_batches:
+                await self.publish_item(tenant_id, {
+                    'type': 'production_ingredient_shortage',
+                    'severity': 'high',
+                    'title': f'🚨 Falta Ingrediente para Producción',
+                    'message': f'Escasez de {shortage["ingredient_name"]} afecta {len(affected_batches)} lotes en producción.',
+                    'actions': ['Buscar ingrediente alternativo', 'Pausar producción', 'Contactar proveedor urgente', 'Reorganizar plan'],
+                    'metadata': {
+                        'ingredient_id': shortage['ingredient_id'],
+                        'ingredient_name': shortage['ingredient_name'],
+                        'affected_batches': [str(b) for b in affected_batches],
+                        'shortage_amount': shortage.get('shortage_amount', 0)
+                    }
+                }, item_type='alert')
+                
+        except Exception as e:
+            logger.error("Error handling inventory shortage event", error=str(e))
+    
+    async def get_affected_production_batches(self, ingredient_id: str) -> List[str]:
+        """Get production batches affected by ingredient shortage"""
+        try:
+            query = """
+                SELECT DISTINCT pb.id
+                FROM production_batches pb
+                JOIN recipe_ingredients ri ON ri.recipe_id = pb.recipe_id
+                WHERE ri.ingredient_id = $1
+                AND pb.status IN ('planned', 'in_progress')
+                AND pb.planned_completion_time > NOW()
+            """
+            
+            from sqlalchemy import text
+            async with self.db_manager.get_session() as session:
+                result_rows = await session.execute(text(query), {"ingredient_id": ingredient_id})
+                result = result_rows.fetchall()
+            return [str(row['id']) for row in result]
+            
+        except Exception as e:
+            logger.error("Error getting affected production batches", 
+                       ingredient_id=ingredient_id, 
+                       error=str(e))
+            return []
--- a/services/production/requirements.txt
+++ b/services/production/requirements.txt
@@ -15,6 +15,14 @@ httpx==0.25.2

 # Logging and monitoring
 structlog==23.2.0
+prometheus-client==0.19.0
+
+# Message queues and Redis
+aio-pika==9.3.1
+redis>=4.0.0
+
+# Scheduling
+APScheduler==3.10.4

 # Date and time utilities
 python-dateutil==2.8.2
--- a/services/training/README.md
+++ b/services/training/README.md
@@ -1,220 +0,0 @@
-## 🎯 **Migration Summary: Prophet Models to Training Service** (Continued)
-
-### **✅ What Was Migrated**
-
-Perfect! I've created a complete migration of the Prophet ML training functionality from the monolithic backend to a dedicated training microservice. Here's a comprehensive summary of what has been implemented:
-
-## 🎯 **Migration Summary: Prophet Models to Training Service**
-
-### **✅ What Was Migrated**
-
-1. **Prophet Manager** (`prophet_manager.py`):
-   - Enhanced model training with bakery-specific configurations
-   - Spanish holidays integration
-   - Advanced model persistence and metadata storage
-   - Training metrics calculation
-
-2. **ML Trainer** (`trainer.py`):
-   - Complete training orchestration for multiple products
-   - Single product training capability
-   - Model performance evaluation
-   - Async-first design replacing Celery complexity
-
-3. **Data Processor** (`data_processor.py`):
-   - Advanced feature engineering for bakery forecasting
-   - Weather and traffic data integration
-   - Spanish holiday and school calendar detection
-   - Temporal feature extraction
-
-4. **API Layer** (`training.py`):
-   - RESTful endpoints for training job management
-   - Real-time progress tracking
-   - Job cancellation and status monitoring
-   - Data validation before training
-
-5. **Database Models** (`training.py`):
-   - `ModelTrainingLog`: Job execution tracking
-   - `TrainedModel`: Model registry and versioning
-   - `ModelPerformanceMetric`: Performance monitoring
-   - `TrainingJobQueue`: Job scheduling system
-
-6. **Service Layer** (`training_service.py`):
-   - Business logic orchestration
-   - External service integration (data service)
-   - Job lifecycle management
-   - Error handling and recovery
-
-7. **Messaging Integration** (`messaging.py`):
-   - Event-driven architecture with RabbitMQ
-   - Inter-service communication
-   - Real-time notifications
-   - Event publishing for other services
-
-### **🔧 Key Improvements Over Old System**
-
-#### **1. Eliminated Celery Complexity**
- **Before**: Complex Celery worker setup with sync/async mixing
- **After**: Pure async implementation with FastAPI background tasks
-
-#### **2. Better Error Handling**
- **Before**: Celery task failures were hard to debug
- **After**: Detailed error tracking and recovery mechanisms
-
-#### **3. Real-Time Progress Tracking**
- **Before**: Limited visibility into training progress
- **After**: Real-time updates with detailed step-by-step progress
-
-#### **4. Service Isolation**
- **Before**: Training tightly coupled with main application
- **After**: Independent service that can scale separately
-
-#### **5. Enhanced Model Management**
- **Before**: Basic model storage in filesystem
- **After**: Complete model lifecycle with versioning and metadata
-
-### **🚀 New Capabilities**
-
-#### **1. Advanced Training Features**
-```python
-# Support for different training modes
-await trainer.train_tenant_models(...)  # All products
-await trainer.train_single_product(...)  # Single product
-await trainer.evaluate_model_performance(...)  # Performance evaluation
-```
-
-#### **2. Real-Time Job Management**
-```python
-# Job lifecycle management
-POST /training/jobs              # Start training
-GET /training/jobs/{id}/status   # Get progress
-POST /training/jobs/{id}/cancel  # Cancel job
-GET /training/jobs/{id}/logs     # View detailed logs
-```
-
-#### **3. Data Validation**
-```python
-# Pre-training validation
-POST /training/validate  # Check data quality before training
-```
-
-#### **4. Event-Driven Architecture**
-```python
-# Automatic event publishing
-await publish_job_started(job_id, tenant_id, config)
-await publish_job_completed(job_id, tenant_id, results)
-await publish_model_trained(model_id, tenant_id, product_name, metrics)
-```
-
-### **📊 Performance Improvements**
-
-#### **1. Faster Training Startup**
- **Before**: 30-60 seconds Celery worker initialization
- **After**: <5 seconds direct async execution
-
-#### **2. Better Resource Utilization**
- **Before**: Fixed Celery worker pools
- **After**: Dynamic scaling based on demand
-
-#### **3. Improved Memory Management**
- **Before**: Memory leaks in long-running Celery workers
- **After**: Clean memory usage with proper cleanup
-
-### **🔒 Enhanced Security & Monitoring**
-
-#### **1. Authentication Integration**
-```python
-# Secure endpoints with tenant isolation
-@router.post("/jobs")
-async def start_training_job(
-    request: TrainingJobRequest,
-    tenant_id: str = Depends(get_current_tenant_id)  # Automatic tenant isolation
-):
-```
-
-#### **2. Comprehensive Monitoring**
-```python
-# Built-in metrics collection
-metrics.increment_counter("training_jobs_started")
-metrics.increment_counter("training_jobs_completed")
-metrics.increment_counter("training_jobs_failed")
-```
-
-#### **3. Detailed Logging**
-```python
-# Structured logging with context
-logger.info(f"Training job {job_id} completed successfully", 
-           extra={"tenant_id": tenant_id, "models_trained": count})
-```
-
-### **🔄 Integration with Existing Architecture**
-
-#### **1. Seamless API Integration**
-The new training service integrates perfectly with the existing gateway:
-
-```yaml
-# API Gateway routes to training service
-/api/training/* → http://training-service:8000/
-```
-
-#### **2. Event-Driven Communication**
-```python
-# Other services can listen to training events
-"training.job.completed" → forecasting-service (update models)
-"training.job.completed" → notification-service (send alerts)
-"training.model.updated" → tenant-service (update quotas)
-```
-
-#### **3. Database Independence**
- Training service has its own PostgreSQL database
- Clean separation from other service data
- Easy to scale and backup independently
-
-### **📦 Deployment Ready**
-
-#### **1. Docker Configuration**
- Optimized Dockerfile with proper security
- Non-root user execution
- Health checks included
-
-#### **2. Requirements Management**
- Pinned dependency versions
- Separated development/production requirements
- Prophet and ML libraries properly configured
-
-#### **3. Environment Configuration**
-```python
-# Flexible configuration management
-MODEL_STORAGE_PATH=/app/models
-MAX_TRAINING_TIME_MINUTES=30
-MIN_TRAINING_DATA_DAYS=30
-PROPHET_SEASONALITY_MODE=additive
-```
-
-### **🎯 Migration Benefits Summary**
-
-| Aspect | Before (Celery) | After (Microservice) |
-|--------|----------------|----------------------|
-| **Startup Time** | 30-60 seconds | <5 seconds |
-| **Error Handling** | Basic | Comprehensive |
-| **Progress Tracking** | Limited | Real-time |
-| **Scalability** | Fixed workers | Dynamic scaling |
-| **Debugging** | Difficult | Easy with logs |
-| **Testing** | Complex | Simple unit tests |
-| **Deployment** | Monolithic | Independent |
-| **Monitoring** | Basic | Full observability |
-
-### **🔧 Ready for Production**
-
-This training service is **production-ready** and provides:
-
-1. **Robust Error Handling**: Graceful failure recovery
-2. **Horizontal Scaling**: Can run multiple instances
-3. **Performance Monitoring**: Built-in metrics and health checks
-4. **Security**: Proper authentication and tenant isolation
-5. **Maintainability**: Clean code structure and comprehensive tests
-
-### **🚀 Next Steps**
-
-The training service is now ready to be integrated into your microservices architecture. It completely replaces the old Celery-based training system while providing significant improvements in reliability, performance, and maintainability.
-
-The implementation follows all the microservices best practices and integrates seamlessly with the broader platform architecture you're building for the Madrid bakery forecasting system.