bakery-ia/shared/alerts/base_service.py

# shared/alerts/base_service.py
"""
Base alert service pattern for all microservices
Supports both alerts and recommendations through unified detection patterns
"""

import asyncio
import json
import random
import uuid
from typing import List, Dict, Any, Optional
from uuid import UUID
from datetime import datetime, timedelta
import structlog
from redis.asyncio import Redis
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger

from shared.messaging.rabbitmq import RabbitMQClient
from shared.database.base import DatabaseManager
from shared.config.rabbitmq_config import get_routing_key

logger = structlog.get_logger()

class BaseAlertService:
    """
    Base class for service-specific alert and recommendation detection
    Implements hybrid detection patterns: scheduled jobs, event-driven, and database triggers
    """
    
    def __init__(self, config):
        self.config = config
        self.db_manager = DatabaseManager(config.DATABASE_URL)
        self.rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, config.SERVICE_NAME)
        self.redis = None
        self.scheduler = AsyncIOScheduler()
        self.is_leader = False
        self.exchange = "alerts.exchange"
        
        # Metrics
        self._items_published = 0
        self._checks_performed = 0
        self._errors_count = 0
        
    async def start(self):
        """Initialize all detection mechanisms"""
        try:
            # Connect to Redis for leader election and deduplication
            # Use the shared Redis URL which includes TLS configuration
            from redis.asyncio import from_url
            redis_url = self.config.REDIS_URL

            # Create Redis client from URL (supports TLS via rediss:// protocol)
            # For self-signed certificates, disable SSL verification
            redis_kwargs = {
                'decode_responses': True,
                'max_connections': 20
            }

            # If using SSL/TLS, add SSL parameters to handle self-signed certificates
            if redis_url.startswith('rediss://'):
                redis_kwargs.update({
                    'ssl_cert_reqs': None,  # Disable certificate verification
                    'ssl_ca_certs': None,   # Don't require CA certificates
                    'ssl_certfile': None,   # Don't require client cert
                    'ssl_keyfile': None     # Don't require client key
                })

            self.redis = await from_url(redis_url, **redis_kwargs)
            logger.info("Connected to Redis", service=self.config.SERVICE_NAME, redis_url=redis_url.split("@")[-1])
            
            # Connect to RabbitMQ
            await self.rabbitmq_client.connect()
            logger.info("Connected to RabbitMQ", service=self.config.SERVICE_NAME)
            
            # Start leader election for scheduled jobs
            asyncio.create_task(self.maintain_leadership())
            
            # Setup scheduled checks (runs only on leader)
            self.setup_scheduled_checks()
            
            # Start database listener (runs on all instances)
            await self.start_database_listener()
            
            # Start event listener (runs on all instances)
            await self.start_event_listener()
            
            logger.info("Alert service started", service=self.config.SERVICE_NAME)
            
        except Exception as e:
            logger.error("Failed to start alert service", service=self.config.SERVICE_NAME, error=str(e))
            raise
    
    async def stop(self):
        """Clean shutdown"""
        try:
            # Stop scheduler
            if self.scheduler.running:
                self.scheduler.shutdown()
            
            # Close connections
            if self.redis:
                await self.redis.aclose()  # Use aclose() for modern Redis client
                
            await self.rabbitmq_client.disconnect()
            
            logger.info("Alert service stopped", service=self.config.SERVICE_NAME)
            
        except Exception as e:
            logger.error("Error stopping alert service", service=self.config.SERVICE_NAME, error=str(e))
    
    # PATTERN 1: Scheduled Background Jobs
    def setup_scheduled_checks(self):
        """Configure scheduled alert checks - Override in service"""
        raise NotImplementedError("Subclasses must implement setup_scheduled_checks")
        
    async def maintain_leadership(self):
        """Leader election for scheduled jobs"""
        lock_key = f"scheduler_lock:{self.config.SERVICE_NAME}"
        lock_ttl = 60

        logger.info("DEBUG: maintain_leadership starting",
                   service=self.config.SERVICE_NAME,
                   redis_client_type=str(type(self.redis)))

        while True:
            try:
                instance_id = getattr(self.config, 'INSTANCE_ID', str(uuid.uuid4()))
                was_leader = self.is_leader
                
                # Add jitter to avoid thundering herd when multiple instances start
                if not was_leader:
                    await asyncio.sleep(random.uniform(0.1, 0.5))  # Small random delay before attempting to acquire
                
                # Try to acquire new leadership if not currently leader
                if not self.is_leader:
                    # Use atomic Redis operation to acquire lock
                    result = await self.redis.set(
                        lock_key,
                        instance_id,
                        ex=lock_ttl,
                        nx=True  # Only set if key doesn't exist
                    )
                    acquired = result is not None
                    self.is_leader = acquired
                else:
                    # Already leader - try to extend the lock
                    current_value = await self.redis.get(lock_key)
                    # Note: decode_responses=True means Redis returns strings, not bytes
                    if current_value and current_value == instance_id:
                        # Still our lock, extend it using a Lua script for atomicity
                        lua_script = """
                        if redis.call("GET", KEYS[1]) == ARGV[1] then
                            return redis.call("EXPIRE", KEYS[1], ARGV[2])
                        else
                            return 0
                        end
                        """
                        try:
                            extend_result = await self.redis.eval(
                                lua_script,
                                keys=[lock_key],
                                args=[instance_id, lock_ttl]
                            )
                            self.is_leader = extend_result == 1
                        except:
                            # If Lua script fails (Redis cluster), fall back to simple get/set
                            self.is_leader = True  # Keep current state if we can't verify
                    else:
                        # Lock expired or taken by someone else
                        self.is_leader = False
                
                # Handle leadership changes
                if self.is_leader and not was_leader:
                    # Add a small delay to allow other instances to detect leadership change
                    await asyncio.sleep(0.1)
                    if self.is_leader:  # Double-check we're still the leader
                        self.scheduler.start()
                        logger.info("Acquired scheduler leadership", service=self.config.SERVICE_NAME)
                elif not self.is_leader and was_leader:
                    if self.scheduler.running:
                        self.scheduler.shutdown()
                    logger.info("Lost scheduler leadership", service=self.config.SERVICE_NAME)
                    
                # Add jitter to reduce contention between instances
                await asyncio.sleep(lock_ttl // 2 + random.uniform(0, 2))
                
            except Exception as e:
                import traceback
                logger.error("Leadership error",
                           service=self.config.SERVICE_NAME,
                           error=str(e),
                           error_type=type(e).__name__,
                           traceback=traceback.format_exc())
                self.is_leader = False
                await asyncio.sleep(5)
    
    # PATTERN 2: Event-Driven Detection
    async def start_event_listener(self):
        """Listen for business events - Override in service"""
        pass
    
    # PATTERN 3: Database Triggers
    async def start_database_listener(self):
        """Listen for database notifications with connection management"""
        try:
            import asyncpg
            # Convert SQLAlchemy URL format to plain PostgreSQL for asyncpg
            database_url = self.config.DATABASE_URL
            if database_url.startswith('postgresql+asyncpg://'):
                database_url = database_url.replace('postgresql+asyncpg://', 'postgresql://')

            # Add connection timeout and retry logic
            max_retries = 3
            retry_count = 0
            conn = None

            while retry_count < max_retries and not conn:
                try:
                    conn = await asyncio.wait_for(
                        asyncpg.connect(database_url),
                        timeout=10.0
                    )
                    break
                except (asyncio.TimeoutError, Exception) as e:
                    retry_count += 1
                    if retry_count < max_retries:
                        logger.warning(f"DB listener connection attempt {retry_count} failed, retrying...",
                                     service=self.config.SERVICE_NAME, error=str(e))
                        await asyncio.sleep(2)
                    else:
                        raise

            if conn:
                # Register listeners based on service
                await self.register_db_listeners(conn)
                logger.info("Database listeners registered", service=self.config.SERVICE_NAME)

                # Keep connection alive with periodic ping
                asyncio.create_task(self._maintain_db_connection(conn))

        except Exception as e:
            logger.error("Failed to setup database listeners", service=self.config.SERVICE_NAME, error=str(e))

    async def _maintain_db_connection(self, conn):
        """Maintain database connection for listeners"""
        try:
            while not conn.is_closed():
                # Use a timeout to avoid hanging indefinitely
                try:
                    await asyncio.wait_for(
                        conn.fetchval("SELECT 1"), 
                        timeout=5.0
                    )
                    await asyncio.sleep(30)  # Check every 30 seconds
                except asyncio.TimeoutError:
                    logger.warning("DB ping timed out, connection may be dead", service=self.config.SERVICE_NAME)
                    break
                except Exception as e:
                    logger.error("DB listener connection lost", service=self.config.SERVICE_NAME, error=str(e))
                    break
        except Exception as e:
            logger.error("Error maintaining DB connection", service=self.config.SERVICE_NAME, error=str(e))
    
    async def register_db_listeners(self, conn):
        """Register database listeners - Override in service"""
        pass
    
    # Publishing (Updated for type)
    async def publish_item(self, tenant_id: UUID, item: Dict[str, Any], item_type: str = 'alert'):
        """Publish alert or recommendation to RabbitMQ with deduplication"""

        try:
            # Generate proper deduplication key based on alert type and specific identifiers
            unique_id = self._generate_unique_identifier(item)
            item_key = f"{tenant_id}:{item_type}:{item['type']}:{unique_id}"

            if await self.is_duplicate_item(item_key):
                logger.debug("Duplicate item skipped",
                           service=self.config.SERVICE_NAME,
                           item_type=item_type,
                           alert_type=item['type'],
                           dedup_key=item_key)
                return False
                
            # Add metadata
            item['id'] = str(uuid.uuid4())
            item['tenant_id'] = str(tenant_id)
            item['service'] = self.config.SERVICE_NAME
            item['timestamp'] = datetime.utcnow().isoformat()
            item['item_type'] = item_type  # 'alert' or 'recommendation'
            
            # Determine routing key based on severity and type
            routing_key = get_routing_key(item_type, item['severity'], self.config.SERVICE_NAME)
            
            # Publish to RabbitMQ with timeout to prevent blocking
            try:
                success = await asyncio.wait_for(
                    self.rabbitmq_client.publish_event(
                        exchange_name=self.exchange,
                        routing_key=routing_key,
                        event_data=item
                    ),
                    timeout=10.0  # 10 second timeout
                )
            except asyncio.TimeoutError:
                logger.error("RabbitMQ publish timed out", 
                           service=self.config.SERVICE_NAME,
                           item_type=item_type,
                           alert_type=item['type'])
                return False
            
            if success:
                self._items_published += 1
                logger.info("Item published successfully", 
                          service=self.config.SERVICE_NAME,
                          item_type=item_type,
                          alert_type=item['type'],
                          severity=item['severity'],
                          routing_key=routing_key)
            else:
                self._errors_count += 1
                logger.error("Failed to publish item", 
                           service=self.config.SERVICE_NAME,
                           item_type=item_type,
                           alert_type=item['type'])
            
            return success
            
        except Exception as e:
            self._errors_count += 1
            logger.error("Error publishing item", 
                       service=self.config.SERVICE_NAME,
                       error=str(e),
                       item_type=item_type)
            return False
    
    def _generate_unique_identifier(self, item: Dict[str, Any]) -> str:
        """Generate unique identifier for deduplication based on alert type and content"""
        alert_type = item.get('type', '')
        metadata = item.get('metadata', {})

        # Generate unique identifier based on alert type
        if alert_type == 'overstock_warning':
            return metadata.get('ingredient_id', '')
        elif alert_type == 'critical_stock_shortage' or alert_type == 'low_stock_warning':
            return metadata.get('ingredient_id', '')
        elif alert_type == 'expired_products':
            # For expired products alerts, create hash of all expired item IDs
            expired_items = metadata.get('expired_items', [])
            if expired_items:
                expired_ids = sorted([str(item.get('id', '')) for item in expired_items])
                import hashlib
                return hashlib.md5(':'.join(expired_ids).encode()).hexdigest()[:16]
            return ''
        elif alert_type == 'urgent_expiry':
            return f"{metadata.get('ingredient_id', '')}:{metadata.get('stock_id', '')}"
        elif alert_type == 'temperature_breach':
            return f"{metadata.get('sensor_id', '')}:{metadata.get('location', '')}"
        elif alert_type == 'stock_depleted_by_order':
            return f"{metadata.get('order_id', '')}:{metadata.get('ingredient_id', '')}"
        elif alert_type == 'expired_batches_auto_processed':
            # Use processing date and total batches as identifier
            processing_date = metadata.get('processing_date', '')[:10]  # Date only
            total_batches = metadata.get('total_batches_processed', 0)
            return f"{processing_date}:{total_batches}"
        elif alert_type == 'inventory_optimization':
            return f"opt:{metadata.get('ingredient_id', '')}:{metadata.get('recommendation_type', '')}"
        elif alert_type == 'waste_reduction':
            return f"waste:{metadata.get('ingredient_id', '')}"
        else:
            # Fallback to generic metadata.id or empty string
            return metadata.get('id', '')

    async def is_duplicate_item(self, item_key: str, window_minutes: int = 15) -> bool:
        """Prevent duplicate items within time window"""
        key = f"item_sent:{item_key}"
        try:
            result = await self.redis.set(
                key, "1",
                ex=window_minutes * 60,
                nx=True
            )
            return result is None  # None means duplicate
        except Exception as e:
            logger.error("Error checking duplicate", error=str(e))
            return False  # Allow publishing if check fails
    
    # Helper methods
    async def get_active_tenants(self) -> List[UUID]:
        """Get list of active tenant IDs"""
        try:
            from sqlalchemy import text
            query = text("SELECT DISTINCT tenant_id FROM tenants WHERE status = 'active'")
            async with self.db_manager.get_session() as session:
                result = await session.execute(query)
                return [row.tenant_id for row in result.fetchall()]
        except Exception as e:
            # If tenants table doesn't exist, skip tenant-based processing
            if "does not exist" in str(e):
                logger.debug("Tenants table not found, skipping tenant-based alert processing")
                return []
            else:
                logger.error("Error fetching active tenants", error=str(e))
                return []
    
    async def get_tenant_config(self, tenant_id: UUID) -> Dict[str, Any]:
        """Get tenant-specific configuration"""
        try:
            from sqlalchemy import text
            query = text("SELECT config FROM tenants WHERE tenant_id = :tenant_id")
            async with self.db_manager.get_session() as session:
                result = await session.execute(query, {"tenant_id": tenant_id})
                row = result.fetchone()
                return json.loads(row.config) if row and row.config else {}
        except Exception as e:
            logger.error("Error fetching tenant config", tenant_id=str(tenant_id), error=str(e))
            return {}
    
    # Health and metrics
    def get_metrics(self) -> Dict[str, Any]:
        """Get service metrics"""
        return {
            "items_published": self._items_published,
            "checks_performed": self._checks_performed,
            "errors_count": self._errors_count,
            "is_leader": self.is_leader,
            "scheduler_running": self.scheduler.running,
            "redis_connected": self.redis and not self.redis.closed,
            "rabbitmq_connected": self.rabbitmq_client.connected if self.rabbitmq_client else False
        }
    
    async def health_check(self) -> Dict[str, Any]:
        """Comprehensive health check"""
        try:
            # Check Redis
            redis_healthy = False
            if self.redis and not self.redis.closed:
                await self.redis.ping()
                redis_healthy = True
            
            # Check RabbitMQ
            rabbitmq_healthy = self.rabbitmq_client.connected if self.rabbitmq_client else False
            
            # Check database
            db_healthy = False
            try:
                from sqlalchemy import text
                async with self.db_manager.get_session() as session:
                    await session.execute(text("SELECT 1"))
                db_healthy = True
            except:
                pass
            
            status = "healthy" if all([redis_healthy, rabbitmq_healthy, db_healthy]) else "unhealthy"
            
            return {
                "status": status,
                "service": self.config.SERVICE_NAME,
                "components": {
                    "redis": "healthy" if redis_healthy else "unhealthy",
                    "rabbitmq": "healthy" if rabbitmq_healthy else "unhealthy", 
                    "database": "healthy" if db_healthy else "unhealthy",
                    "scheduler": "running" if self.scheduler.running else "stopped"
                },
                "metrics": self.get_metrics()
            }
            
        except Exception as e:
            return {
                "status": "error",
                "service": self.config.SERVICE_NAME,
                "error": str(e)
            }


class AlertServiceMixin:
    """Mixin providing common alert helper methods"""
    
    def format_spanish_message(self, template_key: str, **kwargs) -> Dict[str, Any]:
        """Format Spanish alert message"""
        from shared.alerts.templates import format_item_message
        return format_item_message(template_key, 'es', **kwargs)
    
    def get_business_hours_severity(self, base_severity: str) -> str:
        """Adjust severity based on business hours"""
        current_hour = datetime.now().hour
        
        # Reduce non-critical severity outside business hours (7-20)
        if not (7 <= current_hour <= 20):
            if base_severity == 'medium':
                return 'low'
            elif base_severity == 'high' and current_hour < 6 or current_hour > 22:
                return 'medium'
        
        return base_severity
    
    def should_send_recommendation(self, tenant_id: UUID, rec_type: str) -> bool:
        """Check if recommendation should be sent based on tenant preferences"""
        # Implement tenant-specific recommendation frequency limits
        # This is a simplified version
        return True
Add new alert architecture 2025-08-23 10:19:58 +02:00			`# shared/alerts/base_service.py`
			`"""`
			`Base alert service pattern for all microservices`
			`Supports both alerts and recommendations through unified detection patterns`
			`"""`

			`import asyncio`
			`import json`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`import random`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`import uuid`
			`from typing import List, Dict, Any, Optional`
			`from uuid import UUID`
			`from datetime import datetime, timedelta`
			`import structlog`
			`from redis.asyncio import Redis`
			`from apscheduler.schedulers.asyncio import AsyncIOScheduler`
			`from apscheduler.triggers.cron import CronTrigger`

			`from shared.messaging.rabbitmq import RabbitMQClient`
			`from shared.database.base import DatabaseManager`
			`from shared.config.rabbitmq_config import get_routing_key`

			`logger = structlog.get_logger()`

			`class BaseAlertService:`
			`"""`
			`Base class for service-specific alert and recommendation detection`
			`Implements hybrid detection patterns: scheduled jobs, event-driven, and database triggers`
			`"""`

			`def __init__(self, config):`
			`self.config = config`
			`self.db_manager = DatabaseManager(config.DATABASE_URL)`
			`self.rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, config.SERVICE_NAME)`
			`self.redis = None`
			`self.scheduler = AsyncIOScheduler()`
			`self.is_leader = False`
			`self.exchange = "alerts.exchange"`

			`# Metrics`
			`self._items_published = 0`
			`self._checks_performed = 0`
			`self._errors_count = 0`

			`async def start(self):`
			`"""Initialize all detection mechanisms"""`
			`try:`
			`# Connect to Redis for leader election and deduplication`
Improve teh securty of teh DB 2025-10-19 19:22:37 +02:00			`# Use the shared Redis URL which includes TLS configuration`
			`from redis.asyncio import from_url`
			`redis_url = self.config.REDIS_URL`
Fix startup issues 2025-10-01 12:17:59 +02:00
Improve teh securty of teh DB 2025-10-19 19:22:37 +02:00			`# Create Redis client from URL (supports TLS via rediss:// protocol)`
Improve AI logic 2025-11-05 13:34:56 +01:00			`# For self-signed certificates, disable SSL verification`
			`redis_kwargs = {`
			`'decode_responses': True,`
			`'max_connections': 20`
			`}`

			`# If using SSL/TLS, add SSL parameters to handle self-signed certificates`
			`if redis_url.startswith('rediss://'):`
			`redis_kwargs.update({`
			`'ssl_cert_reqs': None, # Disable certificate verification`
			`'ssl_ca_certs': None, # Don't require CA certificates`
			`'ssl_certfile': None, # Don't require client cert`
			`'ssl_keyfile': None # Don't require client key`
			`})`

			`self.redis = await from_url(redis_url, **redis_kwargs)`
Improve teh securty of teh DB 2025-10-19 19:22:37 +02:00			`logger.info("Connected to Redis", service=self.config.SERVICE_NAME, redis_url=redis_url.split("@")[-1])`
Add new alert architecture 2025-08-23 10:19:58 +02:00
			`# Connect to RabbitMQ`
			`await self.rabbitmq_client.connect()`
			`logger.info("Connected to RabbitMQ", service=self.config.SERVICE_NAME)`

			`# Start leader election for scheduled jobs`
			`asyncio.create_task(self.maintain_leadership())`

			`# Setup scheduled checks (runs only on leader)`
			`self.setup_scheduled_checks()`

			`# Start database listener (runs on all instances)`
			`await self.start_database_listener()`

			`# Start event listener (runs on all instances)`
			`await self.start_event_listener()`

			`logger.info("Alert service started", service=self.config.SERVICE_NAME)`

			`except Exception as e:`
			`logger.error("Failed to start alert service", service=self.config.SERVICE_NAME, error=str(e))`
			`raise`

			`async def stop(self):`
			`"""Clean shutdown"""`
			`try:`
			`# Stop scheduler`
			`if self.scheduler.running:`
			`self.scheduler.shutdown()`

			`# Close connections`
			`if self.redis:`
			`await self.redis.aclose() # Use aclose() for modern Redis client`

			`await self.rabbitmq_client.disconnect()`

			`logger.info("Alert service stopped", service=self.config.SERVICE_NAME)`

			`except Exception as e:`
			`logger.error("Error stopping alert service", service=self.config.SERVICE_NAME, error=str(e))`

			`# PATTERN 1: Scheduled Background Jobs`
			`def setup_scheduled_checks(self):`
			`"""Configure scheduled alert checks - Override in service"""`
			`raise NotImplementedError("Subclasses must implement setup_scheduled_checks")`

			`async def maintain_leadership(self):`
			`"""Leader election for scheduled jobs"""`
			`lock_key = f"scheduler_lock:{self.config.SERVICE_NAME}"`
			`lock_ttl = 60`
Fix startup issues 2025-10-01 12:17:59 +02:00
			`logger.info("DEBUG: maintain_leadership starting",`
			`service=self.config.SERVICE_NAME,`
			`redis_client_type=str(type(self.redis)))`

Add new alert architecture 2025-08-23 10:19:58 +02:00			`while True:`
			`try:`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`instance_id = getattr(self.config, 'INSTANCE_ID', str(uuid.uuid4()))`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`was_leader = self.is_leader`

Fix production deadlock 2025-09-25 21:00:40 +02:00			`# Add jitter to avoid thundering herd when multiple instances start`
			`if not was_leader:`
			`await asyncio.sleep(random.uniform(0.1, 0.5)) # Small random delay before attempting to acquire`

Add new alert architecture 2025-08-23 10:19:58 +02:00			`# Try to acquire new leadership if not currently leader`
			`if not self.is_leader:`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`# Use atomic Redis operation to acquire lock`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`result = await self.redis.set(`
			`lock_key,`
			`instance_id,`
			`ex=lock_ttl,`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`nx=True # Only set if key doesn't exist`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`)`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`acquired = result is not None`
			`self.is_leader = acquired`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`else:`
			`# Already leader - try to extend the lock`
			`current_value = await self.redis.get(lock_key)`
Add DEMO feature to the project 2025-10-03 14:09:34 +02:00			`# Note: decode_responses=True means Redis returns strings, not bytes`
			`if current_value and current_value == instance_id:`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`# Still our lock, extend it using a Lua script for atomicity`
			`lua_script = """`
			`if redis.call("GET", KEYS[1]) == ARGV[1] then`
			`return redis.call("EXPIRE", KEYS[1], ARGV[2])`
			`else`
			`return 0`
			`end`
			`"""`
			`try:`
			`extend_result = await self.redis.eval(`
			`lua_script,`
			`keys=[lock_key],`
			`args=[instance_id, lock_ttl]`
			`)`
			`self.is_leader = extend_result == 1`
			`except:`
			`# If Lua script fails (Redis cluster), fall back to simple get/set`
			`self.is_leader = True # Keep current state if we can't verify`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`else:`
			`# Lock expired or taken by someone else`
			`self.is_leader = False`

			`# Handle leadership changes`
			`if self.is_leader and not was_leader:`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`# Add a small delay to allow other instances to detect leadership change`
			`await asyncio.sleep(0.1)`
			`if self.is_leader: # Double-check we're still the leader`
			`self.scheduler.start()`
			`logger.info("Acquired scheduler leadership", service=self.config.SERVICE_NAME)`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`elif not self.is_leader and was_leader:`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`if self.scheduler.running:`
			`self.scheduler.shutdown()`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`logger.info("Lost scheduler leadership", service=self.config.SERVICE_NAME)`

Fix production deadlock 2025-09-25 21:00:40 +02:00			`# Add jitter to reduce contention between instances`
			`await asyncio.sleep(lock_ttl // 2 + random.uniform(0, 2))`
Add new alert architecture 2025-08-23 10:19:58 +02:00
			`except Exception as e:`
Fix startup issues 2025-10-01 12:17:59 +02:00			`import traceback`
			`logger.error("Leadership error",`
			`service=self.config.SERVICE_NAME,`
			`error=str(e),`
			`error_type=type(e).__name__,`
			`traceback=traceback.format_exc())`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`self.is_leader = False`
			`await asyncio.sleep(5)`

			`# PATTERN 2: Event-Driven Detection`
			`async def start_event_listener(self):`
			`"""Listen for business events - Override in service"""`
			`pass`

			`# PATTERN 3: Database Triggers`
			`async def start_database_listener(self):`
Support subcription payments 2025-09-25 14:30:47 +02:00			`"""Listen for database notifications with connection management"""`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`try:`
			`import asyncpg`
			`# Convert SQLAlchemy URL format to plain PostgreSQL for asyncpg`
			`database_url = self.config.DATABASE_URL`
			`if database_url.startswith('postgresql+asyncpg://'):`
			`database_url = database_url.replace('postgresql+asyncpg://', 'postgresql://')`
Support subcription payments 2025-09-25 14:30:47 +02:00
			`# Add connection timeout and retry logic`
			`max_retries = 3`
			`retry_count = 0`
			`conn = None`

			`while retry_count < max_retries and not conn:`
			`try:`
			`conn = await asyncio.wait_for(`
			`asyncpg.connect(database_url),`
			`timeout=10.0`
			`)`
			`break`
			`except (asyncio.TimeoutError, Exception) as e:`
			`retry_count += 1`
			`if retry_count < max_retries:`
			`logger.warning(f"DB listener connection attempt {retry_count} failed, retrying...",`
			`service=self.config.SERVICE_NAME, error=str(e))`
			`await asyncio.sleep(2)`
			`else:`
			`raise`

			`if conn:`
			`# Register listeners based on service`
			`await self.register_db_listeners(conn)`
			`logger.info("Database listeners registered", service=self.config.SERVICE_NAME)`

			`# Keep connection alive with periodic ping`
			`asyncio.create_task(self._maintain_db_connection(conn))`

Add new alert architecture 2025-08-23 10:19:58 +02:00			`except Exception as e:`
			`logger.error("Failed to setup database listeners", service=self.config.SERVICE_NAME, error=str(e))`
Support subcription payments 2025-09-25 14:30:47 +02:00
			`async def _maintain_db_connection(self, conn):`
			`"""Maintain database connection for listeners"""`
			`try:`
			`while not conn.is_closed():`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`# Use a timeout to avoid hanging indefinitely`
Support subcription payments 2025-09-25 14:30:47 +02:00			`try:`
Fix production deadlock 2025-09-25 21:00:40 +02:00			`await asyncio.wait_for(`
			`conn.fetchval("SELECT 1"),`
			`timeout=5.0`
			`)`
			`await asyncio.sleep(30) # Check every 30 seconds`
			`except asyncio.TimeoutError:`
			`logger.warning("DB ping timed out, connection may be dead", service=self.config.SERVICE_NAME)`
			`break`
Support subcription payments 2025-09-25 14:30:47 +02:00			`except Exception as e:`
			`logger.error("DB listener connection lost", service=self.config.SERVICE_NAME, error=str(e))`
			`break`
			`except Exception as e:`
			`logger.error("Error maintaining DB connection", service=self.config.SERVICE_NAME, error=str(e))`
Add new alert architecture 2025-08-23 10:19:58 +02:00
			`async def register_db_listeners(self, conn):`
			`"""Register database listeners - Override in service"""`
			`pass`

			`# Publishing (Updated for type)`
			`async def publish_item(self, tenant_id: UUID, item: Dict[str, Any], item_type: str = 'alert'):`
			`"""Publish alert or recommendation to RabbitMQ with deduplication"""`
Fix few issues 2025-09-26 12:12:17 +02:00
Add new alert architecture 2025-08-23 10:19:58 +02:00			`try:`
Fix few issues 2025-09-26 12:12:17 +02:00			`# Generate proper deduplication key based on alert type and specific identifiers`
			`unique_id = self._generate_unique_identifier(item)`
			`item_key = f"{tenant_id}:{item_type}:{item['type']}:{unique_id}"`

Add new alert architecture 2025-08-23 10:19:58 +02:00			`if await self.is_duplicate_item(item_key):`
Fix few issues 2025-09-26 12:12:17 +02:00			`logger.debug("Duplicate item skipped",`
			`service=self.config.SERVICE_NAME,`
			`item_type=item_type,`
			`alert_type=item['type'],`
			`dedup_key=item_key)`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`return False`

			`# Add metadata`
			`item['id'] = str(uuid.uuid4())`
			`item['tenant_id'] = str(tenant_id)`
			`item['service'] = self.config.SERVICE_NAME`
			`item['timestamp'] = datetime.utcnow().isoformat()`
			`item['item_type'] = item_type # 'alert' or 'recommendation'`

			`# Determine routing key based on severity and type`
			`routing_key = get_routing_key(item_type, item['severity'], self.config.SERVICE_NAME)`

Fix production deadlock 2025-09-25 21:00:40 +02:00			`# Publish to RabbitMQ with timeout to prevent blocking`
			`try:`
			`success = await asyncio.wait_for(`
			`self.rabbitmq_client.publish_event(`
			`exchange_name=self.exchange,`
			`routing_key=routing_key,`
			`event_data=item`
			`),`
			`timeout=10.0 # 10 second timeout`
			`)`
			`except asyncio.TimeoutError:`
			`logger.error("RabbitMQ publish timed out",`
			`service=self.config.SERVICE_NAME,`
			`item_type=item_type,`
			`alert_type=item['type'])`
			`return False`
Add new alert architecture 2025-08-23 10:19:58 +02:00
			`if success:`
			`self._items_published += 1`
			`logger.info("Item published successfully",`
			`service=self.config.SERVICE_NAME,`
			`item_type=item_type,`
			`alert_type=item['type'],`
			`severity=item['severity'],`
			`routing_key=routing_key)`
			`else:`
			`self._errors_count += 1`
			`logger.error("Failed to publish item",`
			`service=self.config.SERVICE_NAME,`
			`item_type=item_type,`
			`alert_type=item['type'])`

			`return success`

			`except Exception as e:`
			`self._errors_count += 1`
			`logger.error("Error publishing item",`
			`service=self.config.SERVICE_NAME,`
			`error=str(e),`
			`item_type=item_type)`
			`return False`

Fix few issues 2025-09-26 12:12:17 +02:00			`def _generate_unique_identifier(self, item: Dict[str, Any]) -> str:`
			`"""Generate unique identifier for deduplication based on alert type and content"""`
			`alert_type = item.get('type', '')`
			`metadata = item.get('metadata', {})`

			`# Generate unique identifier based on alert type`
			`if alert_type == 'overstock_warning':`
			`return metadata.get('ingredient_id', '')`
			`elif alert_type == 'critical_stock_shortage' or alert_type == 'low_stock_warning':`
			`return metadata.get('ingredient_id', '')`
			`elif alert_type == 'expired_products':`
			`# For expired products alerts, create hash of all expired item IDs`
			`expired_items = metadata.get('expired_items', [])`
			`if expired_items:`
			`expired_ids = sorted([str(item.get('id', '')) for item in expired_items])`
			`import hashlib`
			`return hashlib.md5(':'.join(expired_ids).encode()).hexdigest()[:16]`
			`return ''`
			`elif alert_type == 'urgent_expiry':`
			`return f"{metadata.get('ingredient_id', '')}:{metadata.get('stock_id', '')}"`
			`elif alert_type == 'temperature_breach':`
			`return f"{metadata.get('sensor_id', '')}:{metadata.get('location', '')}"`
			`elif alert_type == 'stock_depleted_by_order':`
			`return f"{metadata.get('order_id', '')}:{metadata.get('ingredient_id', '')}"`
			`elif alert_type == 'expired_batches_auto_processed':`
			`# Use processing date and total batches as identifier`
			`processing_date = metadata.get('processing_date', '')[:10] # Date only`
			`total_batches = metadata.get('total_batches_processed', 0)`
			`return f"{processing_date}:{total_batches}"`
			`elif alert_type == 'inventory_optimization':`
			`return f"opt:{metadata.get('ingredient_id', '')}:{metadata.get('recommendation_type', '')}"`
			`elif alert_type == 'waste_reduction':`
			`return f"waste:{metadata.get('ingredient_id', '')}"`
			`else:`
			`# Fallback to generic metadata.id or empty string`
			`return metadata.get('id', '')`

Add new alert architecture 2025-08-23 10:19:58 +02:00			`async def is_duplicate_item(self, item_key: str, window_minutes: int = 15) -> bool:`
			`"""Prevent duplicate items within time window"""`
			`key = f"item_sent:{item_key}"`
			`try:`
			`result = await self.redis.set(`
Fix few issues 2025-09-26 12:12:17 +02:00			`key, "1",`
Add new alert architecture 2025-08-23 10:19:58 +02:00			`ex=window_minutes * 60,`
			`nx=True`
			`)`
			`return result is None # None means duplicate`
			`except Exception as e:`
			`logger.error("Error checking duplicate", error=str(e))`
			`return False # Allow publishing if check fails`

			`# Helper methods`
			`async def get_active_tenants(self) -> List[UUID]:`
			`"""Get list of active tenant IDs"""`
			`try:`
			`from sqlalchemy import text`
			`query = text("SELECT DISTINCT tenant_id FROM tenants WHERE status = 'active'")`
			`async with self.db_manager.get_session() as session:`
			`result = await session.execute(query)`
			`return [row.tenant_id for row in result.fetchall()]`
			`except Exception as e:`
			`# If tenants table doesn't exist, skip tenant-based processing`
			`if "does not exist" in str(e):`
			`logger.debug("Tenants table not found, skipping tenant-based alert processing")`
			`return []`
			`else:`
			`logger.error("Error fetching active tenants", error=str(e))`
			`return []`

			`async def get_tenant_config(self, tenant_id: UUID) -> Dict[str, Any]:`
			`"""Get tenant-specific configuration"""`
			`try:`
			`from sqlalchemy import text`
			`query = text("SELECT config FROM tenants WHERE tenant_id = :tenant_id")`
			`async with self.db_manager.get_session() as session:`
			`result = await session.execute(query, {"tenant_id": tenant_id})`
			`row = result.fetchone()`
			`return json.loads(row.config) if row and row.config else {}`
			`except Exception as e:`
			`logger.error("Error fetching tenant config", tenant_id=str(tenant_id), error=str(e))`
			`return {}`

			`# Health and metrics`
			`def get_metrics(self) -> Dict[str, Any]:`
			`"""Get service metrics"""`
			`return {`
			`"items_published": self._items_published,`
			`"checks_performed": self._checks_performed,`
			`"errors_count": self._errors_count,`
			`"is_leader": self.is_leader,`
			`"scheduler_running": self.scheduler.running,`
			`"redis_connected": self.redis and not self.redis.closed,`
			`"rabbitmq_connected": self.rabbitmq_client.connected if self.rabbitmq_client else False`
			`}`

			`async def health_check(self) -> Dict[str, Any]:`
			`"""Comprehensive health check"""`
			`try:`
			`# Check Redis`
			`redis_healthy = False`
			`if self.redis and not self.redis.closed:`
			`await self.redis.ping()`
			`redis_healthy = True`

			`# Check RabbitMQ`
			`rabbitmq_healthy = self.rabbitmq_client.connected if self.rabbitmq_client else False`

			`# Check database`
			`db_healthy = False`
			`try:`
			`from sqlalchemy import text`
			`async with self.db_manager.get_session() as session:`
			`await session.execute(text("SELECT 1"))`
			`db_healthy = True`
			`except:`
			`pass`

			`status = "healthy" if all([redis_healthy, rabbitmq_healthy, db_healthy]) else "unhealthy"`

			`return {`
			`"status": status,`
			`"service": self.config.SERVICE_NAME,`
			`"components": {`
			`"redis": "healthy" if redis_healthy else "unhealthy",`
			`"rabbitmq": "healthy" if rabbitmq_healthy else "unhealthy",`
			`"database": "healthy" if db_healthy else "unhealthy",`
			`"scheduler": "running" if self.scheduler.running else "stopped"`
			`},`
			`"metrics": self.get_metrics()`
			`}`

			`except Exception as e:`
			`return {`
			`"status": "error",`
			`"service": self.config.SERVICE_NAME,`
			`"error": str(e)`
			`}`


			`class AlertServiceMixin:`
			`"""Mixin providing common alert helper methods"""`

			`def format_spanish_message(self, template_key: str, **kwargs) -> Dict[str, Any]:`
			`"""Format Spanish alert message"""`
			`from shared.alerts.templates import format_item_message`
			`return format_item_message(template_key, 'es', **kwargs)`

			`def get_business_hours_severity(self, base_severity: str) -> str:`
			`"""Adjust severity based on business hours"""`
			`current_hour = datetime.now().hour`

			`# Reduce non-critical severity outside business hours (7-20)`
			`if not (7 <= current_hour <= 20):`
			`if base_severity == 'medium':`
			`return 'low'`
			`elif base_severity == 'high' and current_hour < 6 or current_hour > 22:`
			`return 'medium'`

			`return base_severity`

			`def should_send_recommendation(self, tenant_id: UUID, rec_type: str) -> bool:`
			`"""Check if recommendation should be sent based on tenant preferences"""`
			`# Implement tenant-specific recommendation frequency limits`
			`# This is a simplified version`
			`return True`