2025-08-23 10:19:58 +02:00
|
|
|
# shared/alerts/base_service.py
|
|
|
|
|
"""
|
|
|
|
|
Base alert service pattern for all microservices
|
|
|
|
|
Supports both alerts and recommendations through unified detection patterns
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import json
|
2025-09-25 21:00:40 +02:00
|
|
|
import random
|
2025-08-23 10:19:58 +02:00
|
|
|
import uuid
|
|
|
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
|
from uuid import UUID
|
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
import structlog
|
|
|
|
|
from redis.asyncio import Redis
|
|
|
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
|
|
|
from apscheduler.triggers.cron import CronTrigger
|
|
|
|
|
|
|
|
|
|
from shared.messaging.rabbitmq import RabbitMQClient
|
|
|
|
|
from shared.database.base import DatabaseManager
|
|
|
|
|
from shared.config.rabbitmq_config import get_routing_key
|
|
|
|
|
|
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
|
|
|
|
class BaseAlertService:
|
|
|
|
|
"""
|
|
|
|
|
Base class for service-specific alert and recommendation detection
|
|
|
|
|
Implements hybrid detection patterns: scheduled jobs, event-driven, and database triggers
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, config):
|
|
|
|
|
self.config = config
|
|
|
|
|
self.db_manager = DatabaseManager(config.DATABASE_URL)
|
|
|
|
|
self.rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, config.SERVICE_NAME)
|
|
|
|
|
self.redis = None
|
|
|
|
|
self.scheduler = AsyncIOScheduler()
|
|
|
|
|
self.is_leader = False
|
|
|
|
|
self.exchange = "alerts.exchange"
|
|
|
|
|
|
|
|
|
|
# Metrics
|
|
|
|
|
self._items_published = 0
|
|
|
|
|
self._checks_performed = 0
|
|
|
|
|
self._errors_count = 0
|
|
|
|
|
|
|
|
|
|
async def start(self):
|
|
|
|
|
"""Initialize all detection mechanisms"""
|
|
|
|
|
try:
|
|
|
|
|
# Connect to Redis for leader election and deduplication
|
2025-10-19 19:22:37 +02:00
|
|
|
# Use the shared Redis URL which includes TLS configuration
|
|
|
|
|
from redis.asyncio import from_url
|
|
|
|
|
redis_url = self.config.REDIS_URL
|
2025-10-01 12:17:59 +02:00
|
|
|
|
2025-10-19 19:22:37 +02:00
|
|
|
# Create Redis client from URL (supports TLS via rediss:// protocol)
|
2025-11-05 13:34:56 +01:00
|
|
|
# For self-signed certificates, disable SSL verification
|
|
|
|
|
redis_kwargs = {
|
|
|
|
|
'decode_responses': True,
|
|
|
|
|
'max_connections': 20
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# If using SSL/TLS, add SSL parameters to handle self-signed certificates
|
|
|
|
|
if redis_url.startswith('rediss://'):
|
|
|
|
|
redis_kwargs.update({
|
|
|
|
|
'ssl_cert_reqs': None, # Disable certificate verification
|
|
|
|
|
'ssl_ca_certs': None, # Don't require CA certificates
|
|
|
|
|
'ssl_certfile': None, # Don't require client cert
|
|
|
|
|
'ssl_keyfile': None # Don't require client key
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
self.redis = await from_url(redis_url, **redis_kwargs)
|
2025-10-19 19:22:37 +02:00
|
|
|
logger.info("Connected to Redis", service=self.config.SERVICE_NAME, redis_url=redis_url.split("@")[-1])
|
2025-08-23 10:19:58 +02:00
|
|
|
|
|
|
|
|
# Connect to RabbitMQ
|
|
|
|
|
await self.rabbitmq_client.connect()
|
|
|
|
|
logger.info("Connected to RabbitMQ", service=self.config.SERVICE_NAME)
|
|
|
|
|
|
|
|
|
|
# Start leader election for scheduled jobs
|
|
|
|
|
asyncio.create_task(self.maintain_leadership())
|
|
|
|
|
|
|
|
|
|
# Setup scheduled checks (runs only on leader)
|
|
|
|
|
self.setup_scheduled_checks()
|
|
|
|
|
|
|
|
|
|
# Start database listener (runs on all instances)
|
|
|
|
|
await self.start_database_listener()
|
|
|
|
|
|
|
|
|
|
# Start event listener (runs on all instances)
|
|
|
|
|
await self.start_event_listener()
|
|
|
|
|
|
|
|
|
|
logger.info("Alert service started", service=self.config.SERVICE_NAME)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Failed to start alert service", service=self.config.SERVICE_NAME, error=str(e))
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
async def stop(self):
|
|
|
|
|
"""Clean shutdown"""
|
|
|
|
|
try:
|
|
|
|
|
# Stop scheduler
|
|
|
|
|
if self.scheduler.running:
|
|
|
|
|
self.scheduler.shutdown()
|
|
|
|
|
|
|
|
|
|
# Close connections
|
|
|
|
|
if self.redis:
|
|
|
|
|
await self.redis.aclose() # Use aclose() for modern Redis client
|
|
|
|
|
|
|
|
|
|
await self.rabbitmq_client.disconnect()
|
|
|
|
|
|
|
|
|
|
logger.info("Alert service stopped", service=self.config.SERVICE_NAME)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Error stopping alert service", service=self.config.SERVICE_NAME, error=str(e))
|
|
|
|
|
|
|
|
|
|
# PATTERN 1: Scheduled Background Jobs
|
|
|
|
|
def setup_scheduled_checks(self):
|
|
|
|
|
"""Configure scheduled alert checks - Override in service"""
|
|
|
|
|
raise NotImplementedError("Subclasses must implement setup_scheduled_checks")
|
|
|
|
|
|
|
|
|
|
async def maintain_leadership(self):
|
|
|
|
|
"""Leader election for scheduled jobs"""
|
|
|
|
|
lock_key = f"scheduler_lock:{self.config.SERVICE_NAME}"
|
|
|
|
|
lock_ttl = 60
|
2025-11-13 16:01:08 +01:00
|
|
|
# Generate instance_id once for the lifetime of this leadership loop
|
|
|
|
|
# IMPORTANT: Don't regenerate on each iteration or lock extension will always fail!
|
|
|
|
|
instance_id = getattr(self.config, 'INSTANCE_ID', str(uuid.uuid4()))
|
2025-10-01 12:17:59 +02:00
|
|
|
|
|
|
|
|
logger.info("DEBUG: maintain_leadership starting",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
2025-11-13 16:01:08 +01:00
|
|
|
instance_id=instance_id,
|
2025-10-01 12:17:59 +02:00
|
|
|
redis_client_type=str(type(self.redis)))
|
|
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
was_leader = self.is_leader
|
|
|
|
|
|
2025-09-25 21:00:40 +02:00
|
|
|
# Add jitter to avoid thundering herd when multiple instances start
|
|
|
|
|
if not was_leader:
|
|
|
|
|
await asyncio.sleep(random.uniform(0.1, 0.5)) # Small random delay before attempting to acquire
|
|
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
# Try to acquire new leadership if not currently leader
|
|
|
|
|
if not self.is_leader:
|
2025-09-25 21:00:40 +02:00
|
|
|
# Use atomic Redis operation to acquire lock
|
2025-08-23 10:19:58 +02:00
|
|
|
result = await self.redis.set(
|
|
|
|
|
lock_key,
|
|
|
|
|
instance_id,
|
|
|
|
|
ex=lock_ttl,
|
2025-09-25 21:00:40 +02:00
|
|
|
nx=True # Only set if key doesn't exist
|
2025-08-23 10:19:58 +02:00
|
|
|
)
|
2025-09-25 21:00:40 +02:00
|
|
|
acquired = result is not None
|
|
|
|
|
self.is_leader = acquired
|
2025-08-23 10:19:58 +02:00
|
|
|
else:
|
2025-11-13 16:01:08 +01:00
|
|
|
# Already leader - try to extend the lock atomically
|
|
|
|
|
# Use SET with EX and GET to atomically refresh the lock
|
|
|
|
|
try:
|
|
|
|
|
# SET key value EX ttl GET returns the old value (atomic check-and-set)
|
|
|
|
|
# This is atomic and works in both standalone and cluster mode
|
|
|
|
|
old_value = await self.redis.set(
|
|
|
|
|
lock_key,
|
|
|
|
|
instance_id,
|
|
|
|
|
ex=lock_ttl,
|
|
|
|
|
get=True # Return old value (Python redis uses 'get' param for GET option)
|
|
|
|
|
)
|
|
|
|
|
# If old value matches our instance_id, we successfully extended
|
|
|
|
|
self.is_leader = old_value == instance_id
|
|
|
|
|
if self.is_leader:
|
|
|
|
|
logger.debug("Lock extended successfully",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
instance_id=instance_id,
|
|
|
|
|
ttl=lock_ttl)
|
|
|
|
|
else:
|
|
|
|
|
# Lock was taken by someone else or expired
|
|
|
|
|
logger.info("Lost lock ownership during extension",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
old_owner=old_value,
|
|
|
|
|
instance_id=instance_id)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# If extend fails, try to verify we still have the lock
|
|
|
|
|
logger.warning("Failed to extend lock, verifying ownership",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
error=str(e))
|
|
|
|
|
current_check = await self.redis.get(lock_key)
|
|
|
|
|
self.is_leader = current_check == instance_id
|
2025-08-23 10:19:58 +02:00
|
|
|
|
|
|
|
|
# Handle leadership changes
|
|
|
|
|
if self.is_leader and not was_leader:
|
2025-09-25 21:00:40 +02:00
|
|
|
# Add a small delay to allow other instances to detect leadership change
|
|
|
|
|
await asyncio.sleep(0.1)
|
|
|
|
|
if self.is_leader: # Double-check we're still the leader
|
|
|
|
|
self.scheduler.start()
|
|
|
|
|
logger.info("Acquired scheduler leadership", service=self.config.SERVICE_NAME)
|
2025-08-23 10:19:58 +02:00
|
|
|
elif not self.is_leader and was_leader:
|
2025-09-25 21:00:40 +02:00
|
|
|
if self.scheduler.running:
|
|
|
|
|
self.scheduler.shutdown()
|
2025-08-23 10:19:58 +02:00
|
|
|
logger.info("Lost scheduler leadership", service=self.config.SERVICE_NAME)
|
|
|
|
|
|
2025-09-25 21:00:40 +02:00
|
|
|
# Add jitter to reduce contention between instances
|
|
|
|
|
await asyncio.sleep(lock_ttl // 2 + random.uniform(0, 2))
|
2025-08-23 10:19:58 +02:00
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-10-01 12:17:59 +02:00
|
|
|
import traceback
|
|
|
|
|
logger.error("Leadership error",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
error=str(e),
|
|
|
|
|
error_type=type(e).__name__,
|
|
|
|
|
traceback=traceback.format_exc())
|
2025-08-23 10:19:58 +02:00
|
|
|
self.is_leader = False
|
|
|
|
|
await asyncio.sleep(5)
|
|
|
|
|
|
|
|
|
|
# PATTERN 2: Event-Driven Detection
|
|
|
|
|
async def start_event_listener(self):
|
|
|
|
|
"""Listen for business events - Override in service"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# PATTERN 3: Database Triggers
|
|
|
|
|
async def start_database_listener(self):
|
2025-09-25 14:30:47 +02:00
|
|
|
"""Listen for database notifications with connection management"""
|
2025-08-23 10:19:58 +02:00
|
|
|
try:
|
|
|
|
|
import asyncpg
|
|
|
|
|
# Convert SQLAlchemy URL format to plain PostgreSQL for asyncpg
|
|
|
|
|
database_url = self.config.DATABASE_URL
|
|
|
|
|
if database_url.startswith('postgresql+asyncpg://'):
|
|
|
|
|
database_url = database_url.replace('postgresql+asyncpg://', 'postgresql://')
|
2025-09-25 14:30:47 +02:00
|
|
|
|
|
|
|
|
# Add connection timeout and retry logic
|
|
|
|
|
max_retries = 3
|
|
|
|
|
retry_count = 0
|
|
|
|
|
conn = None
|
|
|
|
|
|
|
|
|
|
while retry_count < max_retries and not conn:
|
|
|
|
|
try:
|
|
|
|
|
conn = await asyncio.wait_for(
|
|
|
|
|
asyncpg.connect(database_url),
|
|
|
|
|
timeout=10.0
|
|
|
|
|
)
|
|
|
|
|
break
|
|
|
|
|
except (asyncio.TimeoutError, Exception) as e:
|
|
|
|
|
retry_count += 1
|
|
|
|
|
if retry_count < max_retries:
|
|
|
|
|
logger.warning(f"DB listener connection attempt {retry_count} failed, retrying...",
|
|
|
|
|
service=self.config.SERVICE_NAME, error=str(e))
|
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
|
else:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
if conn:
|
|
|
|
|
# Register listeners based on service
|
|
|
|
|
await self.register_db_listeners(conn)
|
|
|
|
|
logger.info("Database listeners registered", service=self.config.SERVICE_NAME)
|
|
|
|
|
|
|
|
|
|
# Keep connection alive with periodic ping
|
|
|
|
|
asyncio.create_task(self._maintain_db_connection(conn))
|
|
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Failed to setup database listeners", service=self.config.SERVICE_NAME, error=str(e))
|
2025-09-25 14:30:47 +02:00
|
|
|
|
|
|
|
|
async def _maintain_db_connection(self, conn):
|
|
|
|
|
"""Maintain database connection for listeners"""
|
|
|
|
|
try:
|
|
|
|
|
while not conn.is_closed():
|
2025-09-25 21:00:40 +02:00
|
|
|
# Use a timeout to avoid hanging indefinitely
|
2025-09-25 14:30:47 +02:00
|
|
|
try:
|
2025-09-25 21:00:40 +02:00
|
|
|
await asyncio.wait_for(
|
|
|
|
|
conn.fetchval("SELECT 1"),
|
|
|
|
|
timeout=5.0
|
|
|
|
|
)
|
|
|
|
|
await asyncio.sleep(30) # Check every 30 seconds
|
|
|
|
|
except asyncio.TimeoutError:
|
|
|
|
|
logger.warning("DB ping timed out, connection may be dead", service=self.config.SERVICE_NAME)
|
|
|
|
|
break
|
2025-09-25 14:30:47 +02:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("DB listener connection lost", service=self.config.SERVICE_NAME, error=str(e))
|
|
|
|
|
break
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Error maintaining DB connection", service=self.config.SERVICE_NAME, error=str(e))
|
2025-08-23 10:19:58 +02:00
|
|
|
|
|
|
|
|
async def register_db_listeners(self, conn):
|
|
|
|
|
"""Register database listeners - Override in service"""
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Publishing (Updated for type)
|
|
|
|
|
async def publish_item(self, tenant_id: UUID, item: Dict[str, Any], item_type: str = 'alert'):
|
2025-11-27 15:52:40 +01:00
|
|
|
"""Publish alert or recommendation to RabbitMQ with deduplication and validation"""
|
2025-09-26 12:12:17 +02:00
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
try:
|
2025-11-27 15:52:40 +01:00
|
|
|
# Validate alert structure before publishing
|
|
|
|
|
from shared.schemas.alert_types import RawAlert
|
|
|
|
|
try:
|
|
|
|
|
raw_alert = RawAlert(
|
|
|
|
|
tenant_id=str(tenant_id),
|
|
|
|
|
alert_type=item.get('type'),
|
|
|
|
|
title=item.get('title'),
|
|
|
|
|
message=item.get('message'),
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
actions=item.get('actions', []),
|
|
|
|
|
alert_metadata=item.get('metadata', {}),
|
|
|
|
|
item_type=item_type
|
|
|
|
|
)
|
|
|
|
|
# Validation passed, continue with validated data
|
|
|
|
|
logger.debug("Alert schema validation passed",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
alert_type=item.get('type'))
|
|
|
|
|
except Exception as validation_error:
|
|
|
|
|
logger.error("Alert schema validation failed",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
alert_type=item.get('type'),
|
|
|
|
|
error=str(validation_error))
|
|
|
|
|
self._errors_count += 1
|
|
|
|
|
return False
|
|
|
|
|
|
2025-09-26 12:12:17 +02:00
|
|
|
# Generate proper deduplication key based on alert type and specific identifiers
|
|
|
|
|
unique_id = self._generate_unique_identifier(item)
|
|
|
|
|
item_key = f"{tenant_id}:{item_type}:{item['type']}:{unique_id}"
|
|
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
if await self.is_duplicate_item(item_key):
|
2025-09-26 12:12:17 +02:00
|
|
|
logger.debug("Duplicate item skipped",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
item_type=item_type,
|
|
|
|
|
alert_type=item['type'],
|
|
|
|
|
dedup_key=item_key)
|
2025-08-23 10:19:58 +02:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Add metadata
|
|
|
|
|
item['id'] = str(uuid.uuid4())
|
|
|
|
|
item['tenant_id'] = str(tenant_id)
|
|
|
|
|
item['service'] = self.config.SERVICE_NAME
|
|
|
|
|
item['timestamp'] = datetime.utcnow().isoformat()
|
|
|
|
|
item['item_type'] = item_type # 'alert' or 'recommendation'
|
|
|
|
|
|
|
|
|
|
# Determine routing key based on severity and type
|
|
|
|
|
routing_key = get_routing_key(item_type, item['severity'], self.config.SERVICE_NAME)
|
|
|
|
|
|
2025-09-25 21:00:40 +02:00
|
|
|
# Publish to RabbitMQ with timeout to prevent blocking
|
|
|
|
|
try:
|
|
|
|
|
success = await asyncio.wait_for(
|
|
|
|
|
self.rabbitmq_client.publish_event(
|
|
|
|
|
exchange_name=self.exchange,
|
|
|
|
|
routing_key=routing_key,
|
|
|
|
|
event_data=item
|
|
|
|
|
),
|
|
|
|
|
timeout=10.0 # 10 second timeout
|
|
|
|
|
)
|
|
|
|
|
except asyncio.TimeoutError:
|
|
|
|
|
logger.error("RabbitMQ publish timed out",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
item_type=item_type,
|
|
|
|
|
alert_type=item['type'])
|
|
|
|
|
return False
|
2025-08-23 10:19:58 +02:00
|
|
|
|
|
|
|
|
if success:
|
|
|
|
|
self._items_published += 1
|
|
|
|
|
logger.info("Item published successfully",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
item_type=item_type,
|
|
|
|
|
alert_type=item['type'],
|
|
|
|
|
severity=item['severity'],
|
|
|
|
|
routing_key=routing_key)
|
|
|
|
|
else:
|
|
|
|
|
self._errors_count += 1
|
|
|
|
|
logger.error("Failed to publish item",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
item_type=item_type,
|
|
|
|
|
alert_type=item['type'])
|
|
|
|
|
|
|
|
|
|
return success
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self._errors_count += 1
|
|
|
|
|
logger.error("Error publishing item",
|
|
|
|
|
service=self.config.SERVICE_NAME,
|
|
|
|
|
error=str(e),
|
|
|
|
|
item_type=item_type)
|
|
|
|
|
return False
|
|
|
|
|
|
2025-09-26 12:12:17 +02:00
|
|
|
def _generate_unique_identifier(self, item: Dict[str, Any]) -> str:
|
|
|
|
|
"""Generate unique identifier for deduplication based on alert type and content"""
|
|
|
|
|
alert_type = item.get('type', '')
|
|
|
|
|
metadata = item.get('metadata', {})
|
|
|
|
|
|
|
|
|
|
# Generate unique identifier based on alert type
|
2025-11-27 15:52:40 +01:00
|
|
|
# Inventory alerts
|
2025-09-26 12:12:17 +02:00
|
|
|
if alert_type == 'overstock_warning':
|
|
|
|
|
return metadata.get('ingredient_id', '')
|
|
|
|
|
elif alert_type == 'critical_stock_shortage' or alert_type == 'low_stock_warning':
|
|
|
|
|
return metadata.get('ingredient_id', '')
|
|
|
|
|
elif alert_type == 'expired_products':
|
|
|
|
|
# For expired products alerts, create hash of all expired item IDs
|
|
|
|
|
expired_items = metadata.get('expired_items', [])
|
|
|
|
|
if expired_items:
|
|
|
|
|
expired_ids = sorted([str(item.get('id', '')) for item in expired_items])
|
|
|
|
|
import hashlib
|
|
|
|
|
return hashlib.md5(':'.join(expired_ids).encode()).hexdigest()[:16]
|
|
|
|
|
return ''
|
|
|
|
|
elif alert_type == 'urgent_expiry':
|
|
|
|
|
return f"{metadata.get('ingredient_id', '')}:{metadata.get('stock_id', '')}"
|
|
|
|
|
elif alert_type == 'temperature_breach':
|
|
|
|
|
return f"{metadata.get('sensor_id', '')}:{metadata.get('location', '')}"
|
|
|
|
|
elif alert_type == 'stock_depleted_by_order':
|
|
|
|
|
return f"{metadata.get('order_id', '')}:{metadata.get('ingredient_id', '')}"
|
|
|
|
|
elif alert_type == 'expired_batches_auto_processed':
|
|
|
|
|
# Use processing date and total batches as identifier
|
|
|
|
|
processing_date = metadata.get('processing_date', '')[:10] # Date only
|
|
|
|
|
total_batches = metadata.get('total_batches_processed', 0)
|
|
|
|
|
return f"{processing_date}:{total_batches}"
|
|
|
|
|
elif alert_type == 'inventory_optimization':
|
|
|
|
|
return f"opt:{metadata.get('ingredient_id', '')}:{metadata.get('recommendation_type', '')}"
|
|
|
|
|
elif alert_type == 'waste_reduction':
|
|
|
|
|
return f"waste:{metadata.get('ingredient_id', '')}"
|
2025-11-27 15:52:40 +01:00
|
|
|
|
|
|
|
|
# Procurement alerts
|
|
|
|
|
elif alert_type == 'procurement_pos_pending_approval':
|
|
|
|
|
# Use hash of PO IDs for grouped alerts
|
|
|
|
|
pos = metadata.get('pos', [])
|
|
|
|
|
if pos:
|
|
|
|
|
po_ids = sorted([str(po.get('po_id', '')) for po in pos])
|
|
|
|
|
import hashlib
|
|
|
|
|
return hashlib.md5(':'.join(po_ids).encode()).hexdigest()[:16]
|
|
|
|
|
return ''
|
|
|
|
|
elif alert_type == 'procurement_approval_reminder':
|
|
|
|
|
return metadata.get('po_id', '')
|
|
|
|
|
elif alert_type == 'procurement_critical_po':
|
|
|
|
|
return metadata.get('po_id', '')
|
|
|
|
|
elif alert_type == 'procurement_po_approved':
|
|
|
|
|
return metadata.get('po_id', '')
|
|
|
|
|
elif alert_type == 'procurement_auto_approval_summary':
|
|
|
|
|
# Daily summary - use date as identifier
|
|
|
|
|
summary_date = metadata.get('summary_date', '')[:10] # Date only
|
|
|
|
|
return f"summary:{summary_date}"
|
|
|
|
|
|
|
|
|
|
# Production alerts
|
|
|
|
|
elif alert_type in ['severe_capacity_overload', 'capacity_overload', 'near_capacity']:
|
|
|
|
|
return f"capacity:{metadata.get('planned_date', '')}"
|
|
|
|
|
elif alert_type == 'production_delay':
|
|
|
|
|
return metadata.get('batch_id', '')
|
|
|
|
|
elif alert_type == 'quality_control_failure':
|
|
|
|
|
return metadata.get('quality_check_id', '')
|
|
|
|
|
elif alert_type in ['equipment_failure', 'maintenance_required', 'low_equipment_efficiency']:
|
|
|
|
|
return metadata.get('equipment_id', '')
|
|
|
|
|
elif alert_type == 'production_ingredient_shortage':
|
|
|
|
|
return metadata.get('ingredient_id', '')
|
|
|
|
|
|
|
|
|
|
# Forecasting alerts
|
|
|
|
|
elif alert_type in ['demand_surge_weekend', 'holiday_preparation', 'demand_spike_detected', 'unexpected_demand_spike']:
|
|
|
|
|
return f"{alert_type}:{metadata.get('product_name', '')}:{metadata.get('forecast_date', '')}"
|
|
|
|
|
elif alert_type == 'weather_impact_alert':
|
|
|
|
|
return f"weather:{metadata.get('forecast_date', '')}"
|
|
|
|
|
elif alert_type == 'severe_weather_impact':
|
|
|
|
|
return f"severe_weather:{metadata.get('weather_type', '')}:{metadata.get('duration_hours', '')}"
|
|
|
|
|
|
2025-09-26 12:12:17 +02:00
|
|
|
else:
|
|
|
|
|
# Fallback to generic metadata.id or empty string
|
|
|
|
|
return metadata.get('id', '')
|
|
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
async def is_duplicate_item(self, item_key: str, window_minutes: int = 15) -> bool:
|
|
|
|
|
"""Prevent duplicate items within time window"""
|
|
|
|
|
key = f"item_sent:{item_key}"
|
|
|
|
|
try:
|
|
|
|
|
result = await self.redis.set(
|
2025-09-26 12:12:17 +02:00
|
|
|
key, "1",
|
2025-08-23 10:19:58 +02:00
|
|
|
ex=window_minutes * 60,
|
|
|
|
|
nx=True
|
|
|
|
|
)
|
|
|
|
|
return result is None # None means duplicate
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Error checking duplicate", error=str(e))
|
|
|
|
|
return False # Allow publishing if check fails
|
|
|
|
|
|
|
|
|
|
# Helper methods
|
|
|
|
|
async def get_active_tenants(self) -> List[UUID]:
|
|
|
|
|
"""Get list of active tenant IDs"""
|
|
|
|
|
try:
|
|
|
|
|
from sqlalchemy import text
|
|
|
|
|
query = text("SELECT DISTINCT tenant_id FROM tenants WHERE status = 'active'")
|
|
|
|
|
async with self.db_manager.get_session() as session:
|
|
|
|
|
result = await session.execute(query)
|
|
|
|
|
return [row.tenant_id for row in result.fetchall()]
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# If tenants table doesn't exist, skip tenant-based processing
|
|
|
|
|
if "does not exist" in str(e):
|
|
|
|
|
logger.debug("Tenants table not found, skipping tenant-based alert processing")
|
|
|
|
|
return []
|
|
|
|
|
else:
|
|
|
|
|
logger.error("Error fetching active tenants", error=str(e))
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
async def get_tenant_config(self, tenant_id: UUID) -> Dict[str, Any]:
|
|
|
|
|
"""Get tenant-specific configuration"""
|
|
|
|
|
try:
|
|
|
|
|
from sqlalchemy import text
|
|
|
|
|
query = text("SELECT config FROM tenants WHERE tenant_id = :tenant_id")
|
|
|
|
|
async with self.db_manager.get_session() as session:
|
|
|
|
|
result = await session.execute(query, {"tenant_id": tenant_id})
|
|
|
|
|
row = result.fetchone()
|
|
|
|
|
return json.loads(row.config) if row and row.config else {}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error("Error fetching tenant config", tenant_id=str(tenant_id), error=str(e))
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
# Health and metrics
|
|
|
|
|
def get_metrics(self) -> Dict[str, Any]:
|
|
|
|
|
"""Get service metrics"""
|
|
|
|
|
return {
|
|
|
|
|
"items_published": self._items_published,
|
|
|
|
|
"checks_performed": self._checks_performed,
|
|
|
|
|
"errors_count": self._errors_count,
|
|
|
|
|
"is_leader": self.is_leader,
|
|
|
|
|
"scheduler_running": self.scheduler.running,
|
|
|
|
|
"redis_connected": self.redis and not self.redis.closed,
|
|
|
|
|
"rabbitmq_connected": self.rabbitmq_client.connected if self.rabbitmq_client else False
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async def health_check(self) -> Dict[str, Any]:
|
|
|
|
|
"""Comprehensive health check"""
|
|
|
|
|
try:
|
|
|
|
|
# Check Redis
|
|
|
|
|
redis_healthy = False
|
|
|
|
|
if self.redis and not self.redis.closed:
|
|
|
|
|
await self.redis.ping()
|
|
|
|
|
redis_healthy = True
|
|
|
|
|
|
|
|
|
|
# Check RabbitMQ
|
|
|
|
|
rabbitmq_healthy = self.rabbitmq_client.connected if self.rabbitmq_client else False
|
|
|
|
|
|
|
|
|
|
# Check database
|
|
|
|
|
db_healthy = False
|
|
|
|
|
try:
|
|
|
|
|
from sqlalchemy import text
|
|
|
|
|
async with self.db_manager.get_session() as session:
|
|
|
|
|
await session.execute(text("SELECT 1"))
|
|
|
|
|
db_healthy = True
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
status = "healthy" if all([redis_healthy, rabbitmq_healthy, db_healthy]) else "unhealthy"
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"status": status,
|
|
|
|
|
"service": self.config.SERVICE_NAME,
|
|
|
|
|
"components": {
|
|
|
|
|
"redis": "healthy" if redis_healthy else "unhealthy",
|
|
|
|
|
"rabbitmq": "healthy" if rabbitmq_healthy else "unhealthy",
|
|
|
|
|
"database": "healthy" if db_healthy else "unhealthy",
|
|
|
|
|
"scheduler": "running" if self.scheduler.running else "stopped"
|
|
|
|
|
},
|
|
|
|
|
"metrics": self.get_metrics()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return {
|
|
|
|
|
"status": "error",
|
|
|
|
|
"service": self.config.SERVICE_NAME,
|
|
|
|
|
"error": str(e)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AlertServiceMixin:
|
|
|
|
|
"""Mixin providing common alert helper methods"""
|
2025-11-27 15:52:40 +01:00
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
def get_business_hours_severity(self, base_severity: str) -> str:
|
|
|
|
|
"""Adjust severity based on business hours"""
|
|
|
|
|
current_hour = datetime.now().hour
|
2025-11-27 15:52:40 +01:00
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
# Reduce non-critical severity outside business hours (7-20)
|
|
|
|
|
if not (7 <= current_hour <= 20):
|
|
|
|
|
if base_severity == 'medium':
|
|
|
|
|
return 'low'
|
|
|
|
|
elif base_severity == 'high' and current_hour < 6 or current_hour > 22:
|
|
|
|
|
return 'medium'
|
2025-11-27 15:52:40 +01:00
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
return base_severity
|
2025-11-27 15:52:40 +01:00
|
|
|
|
2025-08-23 10:19:58 +02:00
|
|
|
def should_send_recommendation(self, tenant_id: UUID, rec_type: str) -> bool:
|
|
|
|
|
"""Check if recommendation should be sent based on tenant preferences"""
|
|
|
|
|
# Implement tenant-specific recommendation frequency limits
|
|
|
|
|
# This is a simplified version
|
|
|
|
|
return True
|