New alert service

2025-12-05 20:07:01 +01:00
parent 1fe3a73549
commit 667e6e0404
393 changed files with 26002 additions and 61033 deletions
--- a/services/alert_processor/app/main.py
+++ b/services/alert_processor/app/main.py
@@ -1,559 +1,137 @@
-# services/alert_processor/app/main.py
 """
-Alert Processor Service - Central hub for processing alerts and recommendations
-Consumes from RabbitMQ, stores in database, and routes to notification service
+Alert Processor Service v2.0
+
+Main FastAPI application with RabbitMQ consumer lifecycle management.
 """

-import asyncio
-import json
-import signal
-import sys
-from datetime import datetime
-from typing import Dict, Any
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
 import structlog
-from shared.redis_utils import initialize_redis, close_redis, get_redis_client
-from aio_pika import connect_robust, IncomingMessage, ExchangeType

-from app.config import AlertProcessorConfig
-from shared.database.base import create_database_manager
-from shared.clients.base_service_client import BaseServiceClient
-from shared.config.rabbitmq_config import RABBITMQ_CONFIG
+from app.core.config import settings
+from app.consumer.event_consumer import EventConsumer
+from app.api import alerts, sse
+from shared.redis_utils import initialize_redis, close_redis

-# Import enrichment services
-from app.services.enrichment import (
-    PriorityScoringService,
-    ContextEnrichmentService,
-    TimingIntelligenceService,
-    OrchestratorClient
-)
-from shared.schemas.alert_types import RawAlert
-
-# Setup logging
-import logging
-
-# Configure Python's standard logging first (required for structlog.stdlib.LoggerFactory)
-logging.basicConfig(
-    format="%(message)s",
-    stream=sys.stdout,
-    level=logging.INFO,
-)
-
-# Configure structlog to use the standard logging backend
+# Configure structured logging
 structlog.configure(
    processors=[
-        structlog.stdlib.filter_by_level,
-        structlog.stdlib.add_logger_name,
-        structlog.stdlib.add_log_level,
-        structlog.stdlib.PositionalArgumentsFormatter(),
        structlog.processors.TimeStamper(fmt="iso"),
-        structlog.processors.StackInfoRenderer(),
-        structlog.processors.format_exc_info,
+        structlog.processors.add_log_level,
        structlog.processors.JSONRenderer()
-    ],
-    context_class=dict,
-    logger_factory=structlog.stdlib.LoggerFactory(),
-    wrapper_class=structlog.stdlib.BoundLogger,
-    cache_logger_on_first_use=True,
+    ]
 )

 logger = structlog.get_logger()

+# Global consumer instance
+consumer: EventConsumer = None

-class NotificationServiceClient(BaseServiceClient):
-    """Client for notification service"""
-    
-    def __init__(self, config: AlertProcessorConfig):
-        super().__init__("notification-service", config)
-        self.config = config
-    
-    def get_service_base_path(self) -> str:
-        """Return the base path for notification service APIs"""
-        return "/api/v1"
-    
-    async def send_notification(self, tenant_id: str, notification: Dict[str, Any], channels: list) -> Dict[str, Any]:
-        """Send notification via notification service"""
-        try:
-            response = await self.post(
-                "notifications/send",
-                data={
-                    "tenant_id": tenant_id,
-                    "notification": notification,
-                    "channels": channels
-                }
-            )
-            return response if response else {"status": "failed", "error": "No response from notification service"}
-        except Exception as e:
-            logger.error("Failed to send notification", error=str(e), tenant_id=tenant_id)
-            return {"status": "failed", "error": str(e)}

-class AlertProcessorService:
+@asynccontextmanager
+async def lifespan(app: FastAPI):
    """
-    Central service for processing and routing alerts and recommendations
-    Integrates with notification service for multi-channel delivery
+    Application lifecycle manager.
+
+    Startup: Initialize Redis and RabbitMQ consumer
+    Shutdown: Close consumer and Redis connections
    """
-    
-    def __init__(self, config: AlertProcessorConfig):
-        self.config = config
-        self.db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
-        self.notification_client = NotificationServiceClient(config)
-        self.redis = None
-        self.connection = None
-        self.channel = None
-        self.running = False
+    global consumer

-        # Initialize enrichment services (context_enrichment initialized after Redis connection)
-        self.orchestrator_client = OrchestratorClient(config.ORCHESTRATOR_SERVICE_URL)
-        self.context_enrichment = None  # Initialized in start() after Redis connection
-        self.priority_scoring = PriorityScoringService(config)
-        self.timing_intelligence = TimingIntelligenceService(config)
-
-        # Metrics
-        self.items_processed = 0
-        self.items_stored = 0
-        self.notifications_sent = 0
-        self.errors_count = 0
-        self.enrichments_count = 0
-        
-    async def start(self):
-        """Start the alert processor service"""
-        try:
-            logger.info("Starting Alert Processor Service")
-
-            # Initialize shared Redis connection for SSE publishing
-            await initialize_redis(self.config.REDIS_URL, db=0, max_connections=20)
-            self.redis = await get_redis_client()
-            logger.info("Connected to Redis")
-
-            # Initialize context enrichment service now that Redis is available
-            self.context_enrichment = ContextEnrichmentService(self.config, self.db_manager, self.redis)
-            logger.info("Initialized context enrichment service")
-
-            # Connect to RabbitMQ
-            await self._setup_rabbitmq()
-
-            # Start consuming messages
-            await self._start_consuming()
-
-            self.running = True
-            logger.info("Alert Processor Service started successfully")
-
-        except Exception as e:
-            logger.error("Failed to start Alert Processor Service", error=str(e))
-            raise
-    
-    async def _setup_rabbitmq(self):
-        """Setup RabbitMQ connection and configuration"""
-        self.connection = await connect_robust(
-            self.config.RABBITMQ_URL,
-            heartbeat=30,
-            connection_attempts=5
-        )
-        self.channel = await self.connection.channel()
-        await self.channel.set_qos(prefetch_count=10)  # Process 10 messages at a time
-        
-        # Setup exchange and queue based on config
-        exchange_config = RABBITMQ_CONFIG["exchanges"]["alerts"]
-        self.exchange = await self.channel.declare_exchange(
-            exchange_config["name"],
-            getattr(ExchangeType, exchange_config["type"].upper()),
-            durable=exchange_config["durable"]
-        )
-        
-        queue_config = RABBITMQ_CONFIG["queues"]["alert_processing"]
-        self.queue = await self.channel.declare_queue(
-            queue_config["name"],
-            durable=queue_config["durable"],
-            arguments=queue_config["arguments"]
-        )
-        
-        # Bind to all alert and recommendation routing keys
-        await self.queue.bind(self.exchange, routing_key="*.*.*")
-        
-        logger.info("RabbitMQ setup completed")
-    
-    async def _start_consuming(self):
-        """Start consuming messages from RabbitMQ"""
-        await self.queue.consume(self.process_item)
-        logger.info("Started consuming alert messages")
-        
-    async def process_item(self, message: IncomingMessage):
-        """Process incoming alert or recommendation"""
-        async with message.process():
-            try:
-                # Parse message
-                item = json.loads(message.body.decode())
-
-                logger.info("Processing item",
-                          item_type=item.get('item_type'),
-                          alert_type=item.get('type'),
-                          priority_level=item.get('priority_level', 'standard'),
-                          tenant_id=item.get('tenant_id'))
-
-                # ENRICH ALERT BEFORE STORAGE
-                enriched_item = await self.enrich_alert(item)
-                self.enrichments_count += 1
-
-                # Store enriched alert in database
-                stored_item = await self.store_enriched_item(enriched_item)
-                self.items_stored += 1
-
-                # Determine delivery channels based on priority score (not severity)
-                channels = self.get_channels_by_priority(enriched_item['priority_score'])
-
-                # Send via notification service if channels are specified
-                if channels:
-                    notification_result = await self.notification_client.send_notification(
-                        tenant_id=enriched_item['tenant_id'],
-                        notification={
-                            'type': enriched_item['item_type'],
-                            'id': enriched_item['id'],
-                            'title': enriched_item['title'],
-                            'message': enriched_item['message'],
-                            'priority_score': enriched_item['priority_score'],
-                            'priority_level': enriched_item['priority_level'],
-                            'type_class': enriched_item['type_class'],
-                            'metadata': enriched_item.get('metadata', {}),
-                            'actions': enriched_item.get('smart_actions', []),
-                            'ai_reasoning_summary': enriched_item.get('ai_reasoning_summary'),
-                            'email': enriched_item.get('email'),
-                            'phone': enriched_item.get('phone'),
-                            'user_id': enriched_item.get('user_id')
-                        },
-                        channels=channels
-                    )
-
-                    if notification_result and notification_result.get('status') == 'success':
-                        self.notifications_sent += 1
-
-                # Stream enriched alert to SSE for real-time dashboard (always)
-                await self.stream_to_sse(enriched_item['tenant_id'], stored_item)
-
-                self.items_processed += 1
-
-                logger.info("Item processed successfully",
-                          item_id=enriched_item['id'],
-                          priority_score=enriched_item['priority_score'],
-                          priority_level=enriched_item['priority_level'],
-                          channels=len(channels))
-
-            except Exception as e:
-                self.errors_count += 1
-                logger.error("Item processing failed", error=str(e))
-                raise
-
-    async def enrich_alert(self, item: dict) -> dict:
-        """
-        Enrich alert with priority scoring, context, and smart actions.
-        All alerts MUST be enriched - no legacy support.
-        """
-        try:
-            # Convert dict to RawAlert model
-            # Map 'type' to 'alert_type' and 'metadata' to 'alert_metadata'
-            raw_alert = RawAlert(
-                tenant_id=item['tenant_id'],
-                alert_type=item.get('type', item.get('alert_type', 'unknown')),
-                title=item['title'],
-                message=item['message'],
-                service=item['service'],
-                actions=item.get('actions', []),
-                alert_metadata=item.get('metadata', item.get('alert_metadata', {})),
-                item_type=item.get('item_type', 'alert')
-            )
-
-            # Enrich with orchestrator context (AI actions, business impact)
-            enriched = await self.context_enrichment.enrich_alert(raw_alert)
-
-            # Convert EnrichedAlert back to dict and merge with original item
-            # Use mode='json' to properly serialize datetime objects to ISO strings
-            enriched_dict = enriched.model_dump(mode='json') if hasattr(enriched, 'model_dump') else dict(enriched)
-            enriched_dict['id'] = item['id']  # Preserve original ID
-            enriched_dict['item_type'] = item.get('item_type', 'alert')  # Preserve item_type
-            enriched_dict['type'] = enriched_dict.get('alert_type', item.get('type', 'unknown'))  # Preserve type field
-            enriched_dict['timestamp'] = item.get('timestamp', datetime.utcnow().isoformat())
-            enriched_dict['timing_decision'] = enriched_dict.get('timing_decision', 'send_now')  # Default timing decision
-            # Map 'actions' to 'smart_actions' for database storage
-            if 'actions' in enriched_dict and 'smart_actions' not in enriched_dict:
-                enriched_dict['smart_actions'] = enriched_dict['actions']
-
-            logger.info("Alert enriched successfully",
-                        alert_id=enriched_dict['id'],
-                        alert_type=enriched_dict.get('alert_type'),
-                        priority_score=enriched_dict['priority_score'],
-                        priority_level=enriched_dict['priority_level'],
-                        type_class=enriched_dict['type_class'],
-                        actions_count=len(enriched_dict.get('actions', [])),
-                        smart_actions_count=len(enriched_dict.get('smart_actions', [])))
-
-            return enriched_dict
-
-        except Exception as e:
-            logger.error("Alert enrichment failed, using fallback", error=str(e), alert_id=item.get('id'))
-            # Fallback: basic enrichment with defaults
-            return self._create_fallback_enrichment(item)
-
-    def _create_fallback_enrichment(self, item: dict) -> dict:
-        """
-        Create fallback enrichment when enrichment services fail.
-        Ensures all alerts have required enrichment fields.
-        """
-        return {
-            **item,
-            'item_type': item.get('item_type', 'alert'),  # Ensure item_type is preserved
-            'type': item.get('type', 'unknown'),  # Ensure type field is preserved
-            'alert_type': item.get('type', item.get('alert_type', 'unknown')),  # Ensure alert_type exists
-            'priority_score': 50,
-            'priority_level': 'standard',
-            'type_class': 'action_needed',
-            'orchestrator_context': None,
-            'business_impact': None,
-            'urgency_context': None,
-            'user_agency': None,
-            'trend_context': None,
-            'smart_actions': item.get('actions', []),
-            'ai_reasoning_summary': None,
-            'confidence_score': 0.5,
-            'timing_decision': 'send_now',
-            'scheduled_send_time': None,
-            'placement': ['dashboard']
-        }
-
-    async def store_enriched_item(self, enriched_item: dict) -> dict:
-        """Store enriched alert in database with all enrichment fields"""
-        from app.models.events import Alert, AlertStatus
-        from sqlalchemy import select
-
-        async with self.db_manager.get_session() as session:
-            # Create enriched alert instance
-            alert = Alert(
-                id=enriched_item['id'],
-                tenant_id=enriched_item['tenant_id'],
-                item_type=enriched_item['item_type'],
-                alert_type=enriched_item['type'],
-                status='active',
-                service=enriched_item['service'],
-                title=enriched_item['title'],
-                message=enriched_item['message'],
-
-                # Enrichment fields (REQUIRED)
-                priority_score=enriched_item['priority_score'],
-                priority_level=enriched_item['priority_level'],
-                type_class=enriched_item['type_class'],
-
-                # Context enrichment (JSONB)
-                orchestrator_context=enriched_item.get('orchestrator_context'),
-                business_impact=enriched_item.get('business_impact'),
-                urgency_context=enriched_item.get('urgency_context'),
-                user_agency=enriched_item.get('user_agency'),
-                trend_context=enriched_item.get('trend_context'),
-
-                # Smart actions
-                smart_actions=enriched_item.get('smart_actions', []),
-
-                # AI reasoning
-                ai_reasoning_summary=enriched_item.get('ai_reasoning_summary'),
-                confidence_score=enriched_item.get('confidence_score', 0.8),
-
-                # Timing intelligence
-                timing_decision=enriched_item.get('timing_decision', 'send_now'),
-                scheduled_send_time=enriched_item.get('scheduled_send_time'),
-
-                # Placement
-                placement=enriched_item.get('placement', ['dashboard']),
-
-                # Metadata (legacy)
-                alert_metadata=enriched_item.get('metadata', {}),
-
-                # Timestamp
-                created_at=datetime.fromisoformat(enriched_item['timestamp']) if isinstance(enriched_item['timestamp'], str) else enriched_item['timestamp']
-            )
-
-            session.add(alert)
-            await session.commit()
-            await session.refresh(alert)
-
-            logger.debug("Enriched item stored in database",
-                        item_id=enriched_item['id'],
-                        priority_score=alert.priority_score,
-                        type_class=alert.type_class)
-
-            # Convert to enriched dict for return
-            alert_dict = alert.to_dict()
-
-            # Cache active alerts in Redis for SSE initial_items
-            await self._cache_active_alerts(str(alert.tenant_id))
-
-            return alert_dict
-
-    async def _cache_active_alerts(self, tenant_id: str):
-        """
-        Cache today's active alerts for a tenant in Redis for quick SSE access
-
-        Only caches alerts from today (00:00 UTC onwards) to avoid flooding
-        the dashboard with historical alerts on initial connection.
-        Analytics endpoints should query the database directly for historical data.
-        """
-        try:
-            from app.models.events import Alert, AlertStatus
-            from sqlalchemy import select
-
-            async with self.db_manager.get_session() as session:
-                # Calculate start of today (UTC) to filter only today's alerts
-                today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
-
-                # Query only today's active alerts for this tenant
-                # This prevents showing yesterday's alerts on dashboard initial load
-                query = select(Alert).where(
-                    Alert.tenant_id == tenant_id,
-                    Alert.status == AlertStatus.ACTIVE,
-                    Alert.created_at >= today_start  # Only today's alerts
-                ).order_by(Alert.created_at.desc()).limit(50)
-
-                result = await session.execute(query)
-                alerts = result.scalars().all()
-
-                # Convert to enriched JSON-serializable format
-                active_items = []
-                for alert in alerts:
-                    active_items.append(alert.to_dict())
-
-                # Cache in Redis with 1 hour TTL
-                cache_key = f"active_alerts:{tenant_id}"
-                await self.redis.setex(
-                    cache_key,
-                    3600,  # 1 hour TTL
-                    json.dumps(active_items)
-                )
-
-                logger.debug("Cached today's active alerts in Redis",
-                           tenant_id=tenant_id,
-                           count=len(active_items),
-                           filter_date=today_start.isoformat())
-
-        except Exception as e:
-            logger.error("Failed to cache active alerts",
-                        tenant_id=tenant_id,
-                        error=str(e))
-    
-    async def stream_to_sse(self, tenant_id: str, item: dict):
-        """Publish enriched item to Redis for SSE streaming"""
-        channel = f"alerts:{tenant_id}"
-
-        # Item is already enriched dict from store_enriched_item
-        # Just ensure timestamp is serializable
-        sse_message = {
-            **item,
-            'timestamp': item['created_at'].isoformat() if hasattr(item['created_at'], 'isoformat') else item['created_at']
-        }
-
-        # Publish to Redis channel for SSE
-        await self.redis.publish(channel, json.dumps(sse_message))
-
-        logger.debug("Enriched item published to SSE",
-                    tenant_id=tenant_id,
-                    item_id=item['id'],
-                    priority_score=item.get('priority_score'))
-    
-    def get_channels_by_priority(self, priority_score: int) -> list:
-        """
-        Determine notification channels based on priority score and timing.
-        Uses multi-factor priority score (0-100) instead of legacy severity.
-        """
-        current_hour = datetime.now().hour
-
-        channels = ['dashboard']  # Always include dashboard (SSE)
-
-        # Critical priority (90-100): All channels immediately
-        if priority_score >= self.config.CRITICAL_THRESHOLD:
-            channels.extend(['whatsapp', 'email', 'push'])
-
-        # Important priority (70-89): WhatsApp and email during extended hours
-        elif priority_score >= self.config.IMPORTANT_THRESHOLD:
-            if 6 <= current_hour <= 22:
-                channels.extend(['whatsapp', 'email'])
-            else:
-                channels.append('email')  # Email only during night
-
-        # Standard priority (50-69): Email during business hours
-        elif priority_score >= self.config.STANDARD_THRESHOLD:
-            if 7 <= current_hour <= 20:
-                channels.append('email')
-
-        # Info priority (0-49): Dashboard only
-
-        return channels
-    
-    async def stop(self):
-        """Stop the alert processor service"""
-        self.running = False
-        logger.info("Stopping Alert Processor Service")
-
-        try:
-            # Close RabbitMQ connection
-            if self.connection and not self.connection.is_closed:
-                await self.connection.close()
-
-            # Close shared Redis connection
-            await close_redis()
-
-            logger.info("Alert Processor Service stopped")
-
-        except Exception as e:
-            logger.error("Error stopping service", error=str(e))
-    
-    def get_metrics(self) -> Dict[str, Any]:
-        """Get service metrics"""
-        return {
-            "items_processed": self.items_processed,
-            "items_stored": self.items_stored,
-            "enrichments_count": self.enrichments_count,
-            "notifications_sent": self.notifications_sent,
-            "errors_count": self.errors_count,
-            "running": self.running
-        }
-
-async def main():
-    """Main entry point"""
-    print("STARTUP: Inside main() function", file=sys.stderr, flush=True)
-    config = AlertProcessorConfig()
-    print("STARTUP: Config created", file=sys.stderr, flush=True)
-    service = AlertProcessorService(config)
-    print("STARTUP: Service created", file=sys.stderr, flush=True)
-
-    # Setup signal handlers for graceful shutdown
-    async def shutdown():
-        logger.info("Received shutdown signal")
-        await service.stop()
-        sys.exit(0)
-
-    # Register signal handlers
-    for sig in (signal.SIGTERM, signal.SIGINT):
-        signal.signal(sig, lambda s, f: asyncio.create_task(shutdown()))
+    logger.info("alert_processor_starting", version=settings.VERSION)

+    # Startup: Initialize Redis and start consumer
    try:
-        # Start the service
-        print("STARTUP: About to start service", file=sys.stderr, flush=True)
-        await service.start()
-        print("STARTUP: Service started successfully", file=sys.stderr, flush=True)
+        # Initialize Redis connection
+        await initialize_redis(
+            settings.REDIS_URL,
+            db=settings.REDIS_DB,
+            max_connections=settings.REDIS_MAX_CONNECTIONS
+        )
+        logger.info("redis_initialized")

-        # Keep running
-        while service.running:
-            await asyncio.sleep(1)
-
-    except KeyboardInterrupt:
-        logger.info("Received keyboard interrupt")
+        consumer = EventConsumer()
+        await consumer.start()
+        logger.info("alert_processor_started")
    except Exception as e:
-        logger.error("Service failed", error=str(e))
-    finally:
-        await service.stop()
+        logger.error("alert_processor_startup_failed", error=str(e))
+        raise
+
+    yield
+
+    # Shutdown: Stop consumer and close Redis
+    try:
+        if consumer:
+            await consumer.stop()
+        await close_redis()
+        logger.info("alert_processor_shutdown")
+    except Exception as e:
+        logger.error("alert_processor_shutdown_failed", error=str(e))
+
+
+# Create FastAPI app
+app = FastAPI(
+    title="Alert Processor Service",
+    description="Event processing, enrichment, and alert management system",
+    version=settings.VERSION,
+    lifespan=lifespan,
+    debug=settings.DEBUG
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Configure appropriately for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Include routers
+app.include_router(
+    alerts.router,
+    prefix="/api/v1/tenants/{tenant_id}",
+    tags=["alerts"]
+)
+
+app.include_router(
+    sse.router,
+    prefix="/api/v1",
+    tags=["sse"]
+)
+
+
+@app.get("/health")
+async def health_check():
+    """
+    Health check endpoint.
+
+    Returns service status and version.
+    """
+    return {
+        "status": "healthy",
+        "service": settings.SERVICE_NAME,
+        "version": settings.VERSION
+    }
+
+
+@app.get("/")
+async def root():
+    """Root endpoint with service info"""
+    return {
+        "service": settings.SERVICE_NAME,
+        "version": settings.VERSION,
+        "description": "Event processing, enrichment, and alert management system"
+    }
+

 if __name__ == "__main__":
-    print("STARTUP: Entering main block", file=sys.stderr, flush=True)
-    try:
-        print("STARTUP: About to run main()", file=sys.stderr, flush=True)
-        asyncio.run(main())
-        print("STARTUP: main() completed", file=sys.stderr, flush=True)
-    except Exception as e:
-        print(f"STARTUP: FATAL ERROR: {e}", file=sys.stderr, flush=True)
-        import traceback
-        traceback.print_exc(file=sys.stderr)
-        raise
+    import uvicorn
+
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=settings.DEBUG
+    )