New alert system and panel de control page

This commit is contained in:
Urtzi Alfaro
2025-11-27 15:52:40 +01:00
parent 1a2f4602f3
commit e902419b6e
178 changed files with 20982 additions and 6944 deletions

View File

@@ -19,14 +19,33 @@ from shared.database.base import create_database_manager
from shared.clients.base_service_client import BaseServiceClient
from shared.config.rabbitmq_config import RABBITMQ_CONFIG
# Import enrichment services
from app.services.enrichment import (
PriorityScoringService,
ContextEnrichmentService,
TimingIntelligenceService,
OrchestratorClient
)
from shared.schemas.alert_types import RawAlert
# Setup logging
import logging
# Configure Python's standard logging first (required for structlog.stdlib.LoggerFactory)
logging.basicConfig(
format="%(message)s",
stream=sys.stdout,
level=logging.INFO,
)
# Configure structlog to use the standard logging backend
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="ISO"),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer()
@@ -81,12 +100,19 @@ class AlertProcessorService:
self.connection = None
self.channel = None
self.running = False
# Initialize enrichment services (context_enrichment initialized after Redis connection)
self.orchestrator_client = OrchestratorClient(config.ORCHESTRATOR_SERVICE_URL)
self.context_enrichment = None # Initialized in start() after Redis connection
self.priority_scoring = PriorityScoringService(config)
self.timing_intelligence = TimingIntelligenceService(config)
# Metrics
self.items_processed = 0
self.items_stored = 0
self.notifications_sent = 0
self.errors_count = 0
self.enrichments_count = 0
async def start(self):
"""Start the alert processor service"""
@@ -97,16 +123,20 @@ class AlertProcessorService:
await initialize_redis(self.config.REDIS_URL, db=0, max_connections=20)
self.redis = await get_redis_client()
logger.info("Connected to Redis")
# Initialize context enrichment service now that Redis is available
self.context_enrichment = ContextEnrichmentService(self.config, self.db_manager, self.redis)
logger.info("Initialized context enrichment service")
# Connect to RabbitMQ
await self._setup_rabbitmq()
# Start consuming messages
await self._start_consuming()
self.running = True
logger.info("Alert Processor Service started successfully")
except Exception as e:
logger.error("Failed to start Alert Processor Service", error=str(e))
raise
@@ -152,102 +182,202 @@ class AlertProcessorService:
try:
# Parse message
item = json.loads(message.body.decode())
logger.info("Processing item",
logger.info("Processing item",
item_type=item.get('item_type'),
alert_type=item.get('type'),
severity=item.get('severity'),
priority_level=item.get('priority_level', 'standard'),
tenant_id=item.get('tenant_id'))
# Store in database
stored_item = await self.store_item(item)
# ENRICH ALERT BEFORE STORAGE
enriched_item = await self.enrich_alert(item)
self.enrichments_count += 1
# Store enriched alert in database
stored_item = await self.store_enriched_item(enriched_item)
self.items_stored += 1
# Determine delivery channels based on severity and type
channels = self.get_channels_by_severity_and_type(
item['severity'],
item['item_type']
)
# Determine delivery channels based on priority score (not severity)
channels = self.get_channels_by_priority(enriched_item['priority_score'])
# Send via notification service if channels are specified
if channels:
notification_result = await self.notification_client.send_notification(
tenant_id=item['tenant_id'],
tenant_id=enriched_item['tenant_id'],
notification={
'type': item['item_type'], # 'alert' or 'recommendation'
'id': item['id'],
'title': item['title'],
'message': item['message'],
'severity': item['severity'],
'metadata': item.get('metadata', {}),
'actions': item.get('actions', []),
'email': item.get('email'),
'phone': item.get('phone'),
'user_id': item.get('user_id')
'type': enriched_item['item_type'],
'id': enriched_item['id'],
'title': enriched_item['title'],
'message': enriched_item['message'],
'priority_score': enriched_item['priority_score'],
'priority_level': enriched_item['priority_level'],
'type_class': enriched_item['type_class'],
'metadata': enriched_item.get('metadata', {}),
'actions': enriched_item.get('smart_actions', []),
'ai_reasoning_summary': enriched_item.get('ai_reasoning_summary'),
'email': enriched_item.get('email'),
'phone': enriched_item.get('phone'),
'user_id': enriched_item.get('user_id')
},
channels=channels
)
if notification_result and notification_result.get('status') == 'success':
self.notifications_sent += 1
# Stream to SSE for real-time dashboard (always)
await self.stream_to_sse(item['tenant_id'], stored_item)
# Stream enriched alert to SSE for real-time dashboard (always)
await self.stream_to_sse(enriched_item['tenant_id'], stored_item)
self.items_processed += 1
logger.info("Item processed successfully",
item_id=item['id'],
logger.info("Item processed successfully",
item_id=enriched_item['id'],
priority_score=enriched_item['priority_score'],
priority_level=enriched_item['priority_level'],
channels=len(channels))
except Exception as e:
self.errors_count += 1
logger.error("Item processing failed", error=str(e))
raise
async def store_item(self, item: dict) -> dict:
"""Store alert or recommendation in database and cache in Redis"""
from app.models.alerts import Alert, AlertSeverity, AlertStatus
async def enrich_alert(self, item: dict) -> dict:
"""
Enrich alert with priority scoring, context, and smart actions.
All alerts MUST be enriched - no legacy support.
"""
try:
# Convert dict to RawAlert model
# Map 'type' to 'alert_type' and 'metadata' to 'alert_metadata'
raw_alert = RawAlert(
tenant_id=item['tenant_id'],
alert_type=item.get('type', item.get('alert_type', 'unknown')),
title=item['title'],
message=item['message'],
service=item['service'],
actions=item.get('actions', []),
alert_metadata=item.get('metadata', item.get('alert_metadata', {})),
item_type=item.get('item_type', 'alert')
)
# Enrich with orchestrator context (AI actions, business impact)
enriched = await self.context_enrichment.enrich_alert(raw_alert)
# Convert EnrichedAlert back to dict and merge with original item
# Use mode='json' to properly serialize datetime objects to ISO strings
enriched_dict = enriched.model_dump(mode='json') if hasattr(enriched, 'model_dump') else dict(enriched)
enriched_dict['id'] = item['id'] # Preserve original ID
enriched_dict['item_type'] = item.get('item_type', 'alert') # Preserve item_type
enriched_dict['type'] = enriched_dict.get('alert_type', item.get('type', 'unknown')) # Preserve type field
enriched_dict['timestamp'] = item.get('timestamp', datetime.utcnow().isoformat())
enriched_dict['timing_decision'] = enriched_dict.get('timing_decision', 'send_now') # Default timing decision
# Map 'actions' to 'smart_actions' for database storage
if 'actions' in enriched_dict and 'smart_actions' not in enriched_dict:
enriched_dict['smart_actions'] = enriched_dict['actions']
logger.info("Alert enriched successfully",
alert_id=enriched_dict['id'],
alert_type=enriched_dict.get('alert_type'),
priority_score=enriched_dict['priority_score'],
priority_level=enriched_dict['priority_level'],
type_class=enriched_dict['type_class'],
actions_count=len(enriched_dict.get('actions', [])),
smart_actions_count=len(enriched_dict.get('smart_actions', [])))
return enriched_dict
except Exception as e:
logger.error("Alert enrichment failed, using fallback", error=str(e), alert_id=item.get('id'))
# Fallback: basic enrichment with defaults
return self._create_fallback_enrichment(item)
def _create_fallback_enrichment(self, item: dict) -> dict:
"""
Create fallback enrichment when enrichment services fail.
Ensures all alerts have required enrichment fields.
"""
return {
**item,
'item_type': item.get('item_type', 'alert'), # Ensure item_type is preserved
'type': item.get('type', 'unknown'), # Ensure type field is preserved
'alert_type': item.get('type', item.get('alert_type', 'unknown')), # Ensure alert_type exists
'priority_score': 50,
'priority_level': 'standard',
'type_class': 'action_needed',
'orchestrator_context': None,
'business_impact': None,
'urgency_context': None,
'user_agency': None,
'trend_context': None,
'smart_actions': item.get('actions', []),
'ai_reasoning_summary': None,
'confidence_score': 0.5,
'timing_decision': 'send_now',
'scheduled_send_time': None,
'placement': ['dashboard']
}
async def store_enriched_item(self, enriched_item: dict) -> dict:
"""Store enriched alert in database with all enrichment fields"""
from app.models.events import Alert, AlertStatus
from sqlalchemy import select
async with self.db_manager.get_session() as session:
# Create alert instance
# Create enriched alert instance
alert = Alert(
id=item['id'],
tenant_id=item['tenant_id'],
item_type=item['item_type'], # 'alert' or 'recommendation'
alert_type=item['type'],
severity=AlertSeverity(item['severity'].lower()),
status=AlertStatus.ACTIVE,
service=item['service'],
title=item['title'],
message=item['message'],
actions=item.get('actions', []),
alert_metadata=item.get('metadata', {}),
created_at=datetime.fromisoformat(item['timestamp']) if isinstance(item['timestamp'], str) else item['timestamp']
id=enriched_item['id'],
tenant_id=enriched_item['tenant_id'],
item_type=enriched_item['item_type'],
alert_type=enriched_item['type'],
status='active',
service=enriched_item['service'],
title=enriched_item['title'],
message=enriched_item['message'],
# Enrichment fields (REQUIRED)
priority_score=enriched_item['priority_score'],
priority_level=enriched_item['priority_level'],
type_class=enriched_item['type_class'],
# Context enrichment (JSONB)
orchestrator_context=enriched_item.get('orchestrator_context'),
business_impact=enriched_item.get('business_impact'),
urgency_context=enriched_item.get('urgency_context'),
user_agency=enriched_item.get('user_agency'),
trend_context=enriched_item.get('trend_context'),
# Smart actions
smart_actions=enriched_item.get('smart_actions', []),
# AI reasoning
ai_reasoning_summary=enriched_item.get('ai_reasoning_summary'),
confidence_score=enriched_item.get('confidence_score', 0.8),
# Timing intelligence
timing_decision=enriched_item.get('timing_decision', 'send_now'),
scheduled_send_time=enriched_item.get('scheduled_send_time'),
# Placement
placement=enriched_item.get('placement', ['dashboard']),
# Metadata (legacy)
alert_metadata=enriched_item.get('metadata', {}),
# Timestamp
created_at=datetime.fromisoformat(enriched_item['timestamp']) if isinstance(enriched_item['timestamp'], str) else enriched_item['timestamp']
)
session.add(alert)
await session.commit()
await session.refresh(alert)
logger.debug("Item stored in database", item_id=item['id'])
logger.debug("Enriched item stored in database",
item_id=enriched_item['id'],
priority_score=alert.priority_score,
type_class=alert.type_class)
# Convert to dict for return
alert_dict = {
'id': str(alert.id),
'tenant_id': str(alert.tenant_id),
'item_type': alert.item_type,
'alert_type': alert.alert_type,
'severity': alert.severity.value,
'status': alert.status.value,
'service': alert.service,
'title': alert.title,
'message': alert.message,
'actions': alert.actions,
'metadata': alert.alert_metadata,
'created_at': alert.created_at
}
# Convert to enriched dict for return
alert_dict = alert.to_dict()
# Cache active alerts in Redis for SSE initial_items
await self._cache_active_alerts(str(alert.tenant_id))
@@ -263,7 +393,7 @@ class AlertProcessorService:
Analytics endpoints should query the database directly for historical data.
"""
try:
from app.models.alerts import Alert, AlertStatus
from app.models.events import Alert, AlertStatus
from sqlalchemy import select
async with self.db_manager.get_session() as session:
@@ -281,21 +411,10 @@ class AlertProcessorService:
result = await session.execute(query)
alerts = result.scalars().all()
# Convert to JSON-serializable format
# Convert to enriched JSON-serializable format
active_items = []
for alert in alerts:
active_items.append({
'id': str(alert.id),
'item_type': alert.item_type,
'type': alert.alert_type,
'severity': alert.severity.value,
'title': alert.title,
'message': alert.message,
'actions': alert.actions or [],
'metadata': alert.alert_metadata or {},
'timestamp': alert.created_at.isoformat() if alert.created_at else datetime.utcnow().isoformat(),
'status': alert.status.value
})
active_items.append(alert.to_dict())
# Cache in Redis with 1 hour TTL
cache_key = f"active_alerts:{tenant_id}"
@@ -316,57 +435,51 @@ class AlertProcessorService:
error=str(e))
async def stream_to_sse(self, tenant_id: str, item: dict):
"""Publish item to Redis for SSE streaming"""
"""Publish enriched item to Redis for SSE streaming"""
channel = f"alerts:{tenant_id}"
# Prepare message for SSE
# Item is already enriched dict from store_enriched_item
# Just ensure timestamp is serializable
sse_message = {
'id': item['id'],
'item_type': item['item_type'],
'type': item['alert_type'],
'severity': item['severity'],
'title': item['title'],
'message': item['message'],
'actions': json.loads(item['actions']) if isinstance(item['actions'], str) else item['actions'],
'metadata': json.loads(item['metadata']) if isinstance(item['metadata'], str) else item['metadata'],
'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at'],
'status': item['status']
**item,
'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at']
}
# Publish to Redis channel for SSE
await self.redis.publish(channel, json.dumps(sse_message))
logger.debug("Item published to SSE", tenant_id=tenant_id, item_id=item['id'])
logger.debug("Enriched item published to SSE",
tenant_id=tenant_id,
item_id=item['id'],
priority_score=item.get('priority_score'))
def get_channels_by_severity_and_type(self, severity: str, item_type: str) -> list:
"""Determine notification channels based on severity, type, and time"""
def get_channels_by_priority(self, priority_score: int) -> list:
"""
Determine notification channels based on priority score and timing.
Uses multi-factor priority score (0-100) instead of legacy severity.
"""
current_hour = datetime.now().hour
channels = ['dashboard'] # Always include dashboard (SSE)
if item_type == 'alert':
if severity == 'urgent':
# Urgent alerts: All channels immediately
channels.extend(['whatsapp', 'email', 'push'])
elif severity == 'high':
# High alerts: WhatsApp and email during extended hours
if 6 <= current_hour <= 22:
channels.extend(['whatsapp', 'email'])
else:
channels.append('email') # Email only during night
elif severity == 'medium':
# Medium alerts: Email during business hours
if 7 <= current_hour <= 20:
channels.append('email')
# Low severity: Dashboard only
elif item_type == 'recommendation':
# Recommendations: Less urgent, limit channels and respect business hours
if severity in ['medium', 'high']:
if 8 <= current_hour <= 19: # Business hours for recommendations
channels.append('email')
# Low/urgent (rare for recs): Dashboard only
# Critical priority (90-100): All channels immediately
if priority_score >= self.config.CRITICAL_THRESHOLD:
channels.extend(['whatsapp', 'email', 'push'])
# Important priority (70-89): WhatsApp and email during extended hours
elif priority_score >= self.config.IMPORTANT_THRESHOLD:
if 6 <= current_hour <= 22:
channels.extend(['whatsapp', 'email'])
else:
channels.append('email') # Email only during night
# Standard priority (50-69): Email during business hours
elif priority_score >= self.config.STANDARD_THRESHOLD:
if 7 <= current_hour <= 20:
channels.append('email')
# Info priority (0-49): Dashboard only
return channels
async def stop(self):
@@ -392,6 +505,7 @@ class AlertProcessorService:
return {
"items_processed": self.items_processed,
"items_stored": self.items_stored,
"enrichments_count": self.enrichments_count,
"notifications_sent": self.notifications_sent,
"errors_count": self.errors_count,
"running": self.running
@@ -399,27 +513,32 @@ class AlertProcessorService:
async def main():
"""Main entry point"""
print("STARTUP: Inside main() function", file=sys.stderr, flush=True)
config = AlertProcessorConfig()
print("STARTUP: Config created", file=sys.stderr, flush=True)
service = AlertProcessorService(config)
print("STARTUP: Service created", file=sys.stderr, flush=True)
# Setup signal handlers for graceful shutdown
async def shutdown():
logger.info("Received shutdown signal")
await service.stop()
sys.exit(0)
# Register signal handlers
for sig in (signal.SIGTERM, signal.SIGINT):
signal.signal(sig, lambda s, f: asyncio.create_task(shutdown()))
try:
# Start the service
print("STARTUP: About to start service", file=sys.stderr, flush=True)
await service.start()
print("STARTUP: Service started successfully", file=sys.stderr, flush=True)
# Keep running
while service.running:
await asyncio.sleep(1)
except KeyboardInterrupt:
logger.info("Received keyboard interrupt")
except Exception as e:
@@ -428,4 +547,13 @@ async def main():
await service.stop()
if __name__ == "__main__":
asyncio.run(main())
print("STARTUP: Entering main block", file=sys.stderr, flush=True)
try:
print("STARTUP: About to run main()", file=sys.stderr, flush=True)
asyncio.run(main())
print("STARTUP: main() completed", file=sys.stderr, flush=True)
except Exception as e:
print(f"STARTUP: FATAL ERROR: {e}", file=sys.stderr, flush=True)
import traceback
traceback.print_exc(file=sys.stderr)
raise