New alert system and panel de control page
This commit is contained in:
@@ -19,14 +19,33 @@ from shared.database.base import create_database_manager
|
||||
from shared.clients.base_service_client import BaseServiceClient
|
||||
from shared.config.rabbitmq_config import RABBITMQ_CONFIG
|
||||
|
||||
# Import enrichment services
|
||||
from app.services.enrichment import (
|
||||
PriorityScoringService,
|
||||
ContextEnrichmentService,
|
||||
TimingIntelligenceService,
|
||||
OrchestratorClient
|
||||
)
|
||||
from shared.schemas.alert_types import RawAlert
|
||||
|
||||
# Setup logging
|
||||
import logging
|
||||
|
||||
# Configure Python's standard logging first (required for structlog.stdlib.LoggerFactory)
|
||||
logging.basicConfig(
|
||||
format="%(message)s",
|
||||
stream=sys.stdout,
|
||||
level=logging.INFO,
|
||||
)
|
||||
|
||||
# Configure structlog to use the standard logging backend
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="ISO"),
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.JSONRenderer()
|
||||
@@ -81,12 +100,19 @@ class AlertProcessorService:
|
||||
self.connection = None
|
||||
self.channel = None
|
||||
self.running = False
|
||||
|
||||
|
||||
# Initialize enrichment services (context_enrichment initialized after Redis connection)
|
||||
self.orchestrator_client = OrchestratorClient(config.ORCHESTRATOR_SERVICE_URL)
|
||||
self.context_enrichment = None # Initialized in start() after Redis connection
|
||||
self.priority_scoring = PriorityScoringService(config)
|
||||
self.timing_intelligence = TimingIntelligenceService(config)
|
||||
|
||||
# Metrics
|
||||
self.items_processed = 0
|
||||
self.items_stored = 0
|
||||
self.notifications_sent = 0
|
||||
self.errors_count = 0
|
||||
self.enrichments_count = 0
|
||||
|
||||
async def start(self):
|
||||
"""Start the alert processor service"""
|
||||
@@ -97,16 +123,20 @@ class AlertProcessorService:
|
||||
await initialize_redis(self.config.REDIS_URL, db=0, max_connections=20)
|
||||
self.redis = await get_redis_client()
|
||||
logger.info("Connected to Redis")
|
||||
|
||||
|
||||
# Initialize context enrichment service now that Redis is available
|
||||
self.context_enrichment = ContextEnrichmentService(self.config, self.db_manager, self.redis)
|
||||
logger.info("Initialized context enrichment service")
|
||||
|
||||
# Connect to RabbitMQ
|
||||
await self._setup_rabbitmq()
|
||||
|
||||
|
||||
# Start consuming messages
|
||||
await self._start_consuming()
|
||||
|
||||
|
||||
self.running = True
|
||||
logger.info("Alert Processor Service started successfully")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to start Alert Processor Service", error=str(e))
|
||||
raise
|
||||
@@ -152,102 +182,202 @@ class AlertProcessorService:
|
||||
try:
|
||||
# Parse message
|
||||
item = json.loads(message.body.decode())
|
||||
|
||||
logger.info("Processing item",
|
||||
|
||||
logger.info("Processing item",
|
||||
item_type=item.get('item_type'),
|
||||
alert_type=item.get('type'),
|
||||
severity=item.get('severity'),
|
||||
priority_level=item.get('priority_level', 'standard'),
|
||||
tenant_id=item.get('tenant_id'))
|
||||
|
||||
# Store in database
|
||||
stored_item = await self.store_item(item)
|
||||
|
||||
# ENRICH ALERT BEFORE STORAGE
|
||||
enriched_item = await self.enrich_alert(item)
|
||||
self.enrichments_count += 1
|
||||
|
||||
# Store enriched alert in database
|
||||
stored_item = await self.store_enriched_item(enriched_item)
|
||||
self.items_stored += 1
|
||||
|
||||
# Determine delivery channels based on severity and type
|
||||
channels = self.get_channels_by_severity_and_type(
|
||||
item['severity'],
|
||||
item['item_type']
|
||||
)
|
||||
|
||||
|
||||
# Determine delivery channels based on priority score (not severity)
|
||||
channels = self.get_channels_by_priority(enriched_item['priority_score'])
|
||||
|
||||
# Send via notification service if channels are specified
|
||||
if channels:
|
||||
notification_result = await self.notification_client.send_notification(
|
||||
tenant_id=item['tenant_id'],
|
||||
tenant_id=enriched_item['tenant_id'],
|
||||
notification={
|
||||
'type': item['item_type'], # 'alert' or 'recommendation'
|
||||
'id': item['id'],
|
||||
'title': item['title'],
|
||||
'message': item['message'],
|
||||
'severity': item['severity'],
|
||||
'metadata': item.get('metadata', {}),
|
||||
'actions': item.get('actions', []),
|
||||
'email': item.get('email'),
|
||||
'phone': item.get('phone'),
|
||||
'user_id': item.get('user_id')
|
||||
'type': enriched_item['item_type'],
|
||||
'id': enriched_item['id'],
|
||||
'title': enriched_item['title'],
|
||||
'message': enriched_item['message'],
|
||||
'priority_score': enriched_item['priority_score'],
|
||||
'priority_level': enriched_item['priority_level'],
|
||||
'type_class': enriched_item['type_class'],
|
||||
'metadata': enriched_item.get('metadata', {}),
|
||||
'actions': enriched_item.get('smart_actions', []),
|
||||
'ai_reasoning_summary': enriched_item.get('ai_reasoning_summary'),
|
||||
'email': enriched_item.get('email'),
|
||||
'phone': enriched_item.get('phone'),
|
||||
'user_id': enriched_item.get('user_id')
|
||||
},
|
||||
channels=channels
|
||||
)
|
||||
|
||||
if notification_result and notification_result.get('status') == 'success':
|
||||
self.notifications_sent += 1
|
||||
|
||||
# Stream to SSE for real-time dashboard (always)
|
||||
await self.stream_to_sse(item['tenant_id'], stored_item)
|
||||
|
||||
|
||||
# Stream enriched alert to SSE for real-time dashboard (always)
|
||||
await self.stream_to_sse(enriched_item['tenant_id'], stored_item)
|
||||
|
||||
self.items_processed += 1
|
||||
|
||||
logger.info("Item processed successfully",
|
||||
item_id=item['id'],
|
||||
|
||||
logger.info("Item processed successfully",
|
||||
item_id=enriched_item['id'],
|
||||
priority_score=enriched_item['priority_score'],
|
||||
priority_level=enriched_item['priority_level'],
|
||||
channels=len(channels))
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.errors_count += 1
|
||||
logger.error("Item processing failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def store_item(self, item: dict) -> dict:
|
||||
"""Store alert or recommendation in database and cache in Redis"""
|
||||
from app.models.alerts import Alert, AlertSeverity, AlertStatus
|
||||
|
||||
async def enrich_alert(self, item: dict) -> dict:
|
||||
"""
|
||||
Enrich alert with priority scoring, context, and smart actions.
|
||||
All alerts MUST be enriched - no legacy support.
|
||||
"""
|
||||
try:
|
||||
# Convert dict to RawAlert model
|
||||
# Map 'type' to 'alert_type' and 'metadata' to 'alert_metadata'
|
||||
raw_alert = RawAlert(
|
||||
tenant_id=item['tenant_id'],
|
||||
alert_type=item.get('type', item.get('alert_type', 'unknown')),
|
||||
title=item['title'],
|
||||
message=item['message'],
|
||||
service=item['service'],
|
||||
actions=item.get('actions', []),
|
||||
alert_metadata=item.get('metadata', item.get('alert_metadata', {})),
|
||||
item_type=item.get('item_type', 'alert')
|
||||
)
|
||||
|
||||
# Enrich with orchestrator context (AI actions, business impact)
|
||||
enriched = await self.context_enrichment.enrich_alert(raw_alert)
|
||||
|
||||
# Convert EnrichedAlert back to dict and merge with original item
|
||||
# Use mode='json' to properly serialize datetime objects to ISO strings
|
||||
enriched_dict = enriched.model_dump(mode='json') if hasattr(enriched, 'model_dump') else dict(enriched)
|
||||
enriched_dict['id'] = item['id'] # Preserve original ID
|
||||
enriched_dict['item_type'] = item.get('item_type', 'alert') # Preserve item_type
|
||||
enriched_dict['type'] = enriched_dict.get('alert_type', item.get('type', 'unknown')) # Preserve type field
|
||||
enriched_dict['timestamp'] = item.get('timestamp', datetime.utcnow().isoformat())
|
||||
enriched_dict['timing_decision'] = enriched_dict.get('timing_decision', 'send_now') # Default timing decision
|
||||
# Map 'actions' to 'smart_actions' for database storage
|
||||
if 'actions' in enriched_dict and 'smart_actions' not in enriched_dict:
|
||||
enriched_dict['smart_actions'] = enriched_dict['actions']
|
||||
|
||||
logger.info("Alert enriched successfully",
|
||||
alert_id=enriched_dict['id'],
|
||||
alert_type=enriched_dict.get('alert_type'),
|
||||
priority_score=enriched_dict['priority_score'],
|
||||
priority_level=enriched_dict['priority_level'],
|
||||
type_class=enriched_dict['type_class'],
|
||||
actions_count=len(enriched_dict.get('actions', [])),
|
||||
smart_actions_count=len(enriched_dict.get('smart_actions', [])))
|
||||
|
||||
return enriched_dict
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Alert enrichment failed, using fallback", error=str(e), alert_id=item.get('id'))
|
||||
# Fallback: basic enrichment with defaults
|
||||
return self._create_fallback_enrichment(item)
|
||||
|
||||
def _create_fallback_enrichment(self, item: dict) -> dict:
|
||||
"""
|
||||
Create fallback enrichment when enrichment services fail.
|
||||
Ensures all alerts have required enrichment fields.
|
||||
"""
|
||||
return {
|
||||
**item,
|
||||
'item_type': item.get('item_type', 'alert'), # Ensure item_type is preserved
|
||||
'type': item.get('type', 'unknown'), # Ensure type field is preserved
|
||||
'alert_type': item.get('type', item.get('alert_type', 'unknown')), # Ensure alert_type exists
|
||||
'priority_score': 50,
|
||||
'priority_level': 'standard',
|
||||
'type_class': 'action_needed',
|
||||
'orchestrator_context': None,
|
||||
'business_impact': None,
|
||||
'urgency_context': None,
|
||||
'user_agency': None,
|
||||
'trend_context': None,
|
||||
'smart_actions': item.get('actions', []),
|
||||
'ai_reasoning_summary': None,
|
||||
'confidence_score': 0.5,
|
||||
'timing_decision': 'send_now',
|
||||
'scheduled_send_time': None,
|
||||
'placement': ['dashboard']
|
||||
}
|
||||
|
||||
async def store_enriched_item(self, enriched_item: dict) -> dict:
|
||||
"""Store enriched alert in database with all enrichment fields"""
|
||||
from app.models.events import Alert, AlertStatus
|
||||
from sqlalchemy import select
|
||||
|
||||
async with self.db_manager.get_session() as session:
|
||||
# Create alert instance
|
||||
# Create enriched alert instance
|
||||
alert = Alert(
|
||||
id=item['id'],
|
||||
tenant_id=item['tenant_id'],
|
||||
item_type=item['item_type'], # 'alert' or 'recommendation'
|
||||
alert_type=item['type'],
|
||||
severity=AlertSeverity(item['severity'].lower()),
|
||||
status=AlertStatus.ACTIVE,
|
||||
service=item['service'],
|
||||
title=item['title'],
|
||||
message=item['message'],
|
||||
actions=item.get('actions', []),
|
||||
alert_metadata=item.get('metadata', {}),
|
||||
created_at=datetime.fromisoformat(item['timestamp']) if isinstance(item['timestamp'], str) else item['timestamp']
|
||||
id=enriched_item['id'],
|
||||
tenant_id=enriched_item['tenant_id'],
|
||||
item_type=enriched_item['item_type'],
|
||||
alert_type=enriched_item['type'],
|
||||
status='active',
|
||||
service=enriched_item['service'],
|
||||
title=enriched_item['title'],
|
||||
message=enriched_item['message'],
|
||||
|
||||
# Enrichment fields (REQUIRED)
|
||||
priority_score=enriched_item['priority_score'],
|
||||
priority_level=enriched_item['priority_level'],
|
||||
type_class=enriched_item['type_class'],
|
||||
|
||||
# Context enrichment (JSONB)
|
||||
orchestrator_context=enriched_item.get('orchestrator_context'),
|
||||
business_impact=enriched_item.get('business_impact'),
|
||||
urgency_context=enriched_item.get('urgency_context'),
|
||||
user_agency=enriched_item.get('user_agency'),
|
||||
trend_context=enriched_item.get('trend_context'),
|
||||
|
||||
# Smart actions
|
||||
smart_actions=enriched_item.get('smart_actions', []),
|
||||
|
||||
# AI reasoning
|
||||
ai_reasoning_summary=enriched_item.get('ai_reasoning_summary'),
|
||||
confidence_score=enriched_item.get('confidence_score', 0.8),
|
||||
|
||||
# Timing intelligence
|
||||
timing_decision=enriched_item.get('timing_decision', 'send_now'),
|
||||
scheduled_send_time=enriched_item.get('scheduled_send_time'),
|
||||
|
||||
# Placement
|
||||
placement=enriched_item.get('placement', ['dashboard']),
|
||||
|
||||
# Metadata (legacy)
|
||||
alert_metadata=enriched_item.get('metadata', {}),
|
||||
|
||||
# Timestamp
|
||||
created_at=datetime.fromisoformat(enriched_item['timestamp']) if isinstance(enriched_item['timestamp'], str) else enriched_item['timestamp']
|
||||
)
|
||||
|
||||
session.add(alert)
|
||||
await session.commit()
|
||||
await session.refresh(alert)
|
||||
|
||||
logger.debug("Item stored in database", item_id=item['id'])
|
||||
logger.debug("Enriched item stored in database",
|
||||
item_id=enriched_item['id'],
|
||||
priority_score=alert.priority_score,
|
||||
type_class=alert.type_class)
|
||||
|
||||
# Convert to dict for return
|
||||
alert_dict = {
|
||||
'id': str(alert.id),
|
||||
'tenant_id': str(alert.tenant_id),
|
||||
'item_type': alert.item_type,
|
||||
'alert_type': alert.alert_type,
|
||||
'severity': alert.severity.value,
|
||||
'status': alert.status.value,
|
||||
'service': alert.service,
|
||||
'title': alert.title,
|
||||
'message': alert.message,
|
||||
'actions': alert.actions,
|
||||
'metadata': alert.alert_metadata,
|
||||
'created_at': alert.created_at
|
||||
}
|
||||
# Convert to enriched dict for return
|
||||
alert_dict = alert.to_dict()
|
||||
|
||||
# Cache active alerts in Redis for SSE initial_items
|
||||
await self._cache_active_alerts(str(alert.tenant_id))
|
||||
@@ -263,7 +393,7 @@ class AlertProcessorService:
|
||||
Analytics endpoints should query the database directly for historical data.
|
||||
"""
|
||||
try:
|
||||
from app.models.alerts import Alert, AlertStatus
|
||||
from app.models.events import Alert, AlertStatus
|
||||
from sqlalchemy import select
|
||||
|
||||
async with self.db_manager.get_session() as session:
|
||||
@@ -281,21 +411,10 @@ class AlertProcessorService:
|
||||
result = await session.execute(query)
|
||||
alerts = result.scalars().all()
|
||||
|
||||
# Convert to JSON-serializable format
|
||||
# Convert to enriched JSON-serializable format
|
||||
active_items = []
|
||||
for alert in alerts:
|
||||
active_items.append({
|
||||
'id': str(alert.id),
|
||||
'item_type': alert.item_type,
|
||||
'type': alert.alert_type,
|
||||
'severity': alert.severity.value,
|
||||
'title': alert.title,
|
||||
'message': alert.message,
|
||||
'actions': alert.actions or [],
|
||||
'metadata': alert.alert_metadata or {},
|
||||
'timestamp': alert.created_at.isoformat() if alert.created_at else datetime.utcnow().isoformat(),
|
||||
'status': alert.status.value
|
||||
})
|
||||
active_items.append(alert.to_dict())
|
||||
|
||||
# Cache in Redis with 1 hour TTL
|
||||
cache_key = f"active_alerts:{tenant_id}"
|
||||
@@ -316,57 +435,51 @@ class AlertProcessorService:
|
||||
error=str(e))
|
||||
|
||||
async def stream_to_sse(self, tenant_id: str, item: dict):
|
||||
"""Publish item to Redis for SSE streaming"""
|
||||
"""Publish enriched item to Redis for SSE streaming"""
|
||||
channel = f"alerts:{tenant_id}"
|
||||
|
||||
# Prepare message for SSE
|
||||
|
||||
# Item is already enriched dict from store_enriched_item
|
||||
# Just ensure timestamp is serializable
|
||||
sse_message = {
|
||||
'id': item['id'],
|
||||
'item_type': item['item_type'],
|
||||
'type': item['alert_type'],
|
||||
'severity': item['severity'],
|
||||
'title': item['title'],
|
||||
'message': item['message'],
|
||||
'actions': json.loads(item['actions']) if isinstance(item['actions'], str) else item['actions'],
|
||||
'metadata': json.loads(item['metadata']) if isinstance(item['metadata'], str) else item['metadata'],
|
||||
'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at'],
|
||||
'status': item['status']
|
||||
**item,
|
||||
'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at']
|
||||
}
|
||||
|
||||
|
||||
# Publish to Redis channel for SSE
|
||||
await self.redis.publish(channel, json.dumps(sse_message))
|
||||
|
||||
logger.debug("Item published to SSE", tenant_id=tenant_id, item_id=item['id'])
|
||||
|
||||
logger.debug("Enriched item published to SSE",
|
||||
tenant_id=tenant_id,
|
||||
item_id=item['id'],
|
||||
priority_score=item.get('priority_score'))
|
||||
|
||||
def get_channels_by_severity_and_type(self, severity: str, item_type: str) -> list:
|
||||
"""Determine notification channels based on severity, type, and time"""
|
||||
def get_channels_by_priority(self, priority_score: int) -> list:
|
||||
"""
|
||||
Determine notification channels based on priority score and timing.
|
||||
Uses multi-factor priority score (0-100) instead of legacy severity.
|
||||
"""
|
||||
current_hour = datetime.now().hour
|
||||
|
||||
|
||||
channels = ['dashboard'] # Always include dashboard (SSE)
|
||||
|
||||
if item_type == 'alert':
|
||||
if severity == 'urgent':
|
||||
# Urgent alerts: All channels immediately
|
||||
channels.extend(['whatsapp', 'email', 'push'])
|
||||
elif severity == 'high':
|
||||
# High alerts: WhatsApp and email during extended hours
|
||||
if 6 <= current_hour <= 22:
|
||||
channels.extend(['whatsapp', 'email'])
|
||||
else:
|
||||
channels.append('email') # Email only during night
|
||||
elif severity == 'medium':
|
||||
# Medium alerts: Email during business hours
|
||||
if 7 <= current_hour <= 20:
|
||||
channels.append('email')
|
||||
# Low severity: Dashboard only
|
||||
|
||||
elif item_type == 'recommendation':
|
||||
# Recommendations: Less urgent, limit channels and respect business hours
|
||||
if severity in ['medium', 'high']:
|
||||
if 8 <= current_hour <= 19: # Business hours for recommendations
|
||||
channels.append('email')
|
||||
# Low/urgent (rare for recs): Dashboard only
|
||||
|
||||
|
||||
# Critical priority (90-100): All channels immediately
|
||||
if priority_score >= self.config.CRITICAL_THRESHOLD:
|
||||
channels.extend(['whatsapp', 'email', 'push'])
|
||||
|
||||
# Important priority (70-89): WhatsApp and email during extended hours
|
||||
elif priority_score >= self.config.IMPORTANT_THRESHOLD:
|
||||
if 6 <= current_hour <= 22:
|
||||
channels.extend(['whatsapp', 'email'])
|
||||
else:
|
||||
channels.append('email') # Email only during night
|
||||
|
||||
# Standard priority (50-69): Email during business hours
|
||||
elif priority_score >= self.config.STANDARD_THRESHOLD:
|
||||
if 7 <= current_hour <= 20:
|
||||
channels.append('email')
|
||||
|
||||
# Info priority (0-49): Dashboard only
|
||||
|
||||
return channels
|
||||
|
||||
async def stop(self):
|
||||
@@ -392,6 +505,7 @@ class AlertProcessorService:
|
||||
return {
|
||||
"items_processed": self.items_processed,
|
||||
"items_stored": self.items_stored,
|
||||
"enrichments_count": self.enrichments_count,
|
||||
"notifications_sent": self.notifications_sent,
|
||||
"errors_count": self.errors_count,
|
||||
"running": self.running
|
||||
@@ -399,27 +513,32 @@ class AlertProcessorService:
|
||||
|
||||
async def main():
|
||||
"""Main entry point"""
|
||||
print("STARTUP: Inside main() function", file=sys.stderr, flush=True)
|
||||
config = AlertProcessorConfig()
|
||||
print("STARTUP: Config created", file=sys.stderr, flush=True)
|
||||
service = AlertProcessorService(config)
|
||||
|
||||
print("STARTUP: Service created", file=sys.stderr, flush=True)
|
||||
|
||||
# Setup signal handlers for graceful shutdown
|
||||
async def shutdown():
|
||||
logger.info("Received shutdown signal")
|
||||
await service.stop()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
# Register signal handlers
|
||||
for sig in (signal.SIGTERM, signal.SIGINT):
|
||||
signal.signal(sig, lambda s, f: asyncio.create_task(shutdown()))
|
||||
|
||||
|
||||
try:
|
||||
# Start the service
|
||||
print("STARTUP: About to start service", file=sys.stderr, flush=True)
|
||||
await service.start()
|
||||
|
||||
print("STARTUP: Service started successfully", file=sys.stderr, flush=True)
|
||||
|
||||
# Keep running
|
||||
while service.running:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Received keyboard interrupt")
|
||||
except Exception as e:
|
||||
@@ -428,4 +547,13 @@ async def main():
|
||||
await service.stop()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
print("STARTUP: Entering main block", file=sys.stderr, flush=True)
|
||||
try:
|
||||
print("STARTUP: About to run main()", file=sys.stderr, flush=True)
|
||||
asyncio.run(main())
|
||||
print("STARTUP: main() completed", file=sys.stderr, flush=True)
|
||||
except Exception as e:
|
||||
print(f"STARTUP: FATAL ERROR: {e}", file=sys.stderr, flush=True)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
raise
|
||||
Reference in New Issue
Block a user