Imporve monitoring 5
This commit is contained in:
@@ -4,90 +4,28 @@ Alert Processor Service v2.0
|
||||
Main FastAPI application with RabbitMQ consumer lifecycle management.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
import structlog
|
||||
import os
|
||||
|
||||
from app.core.config import settings
|
||||
from app.consumer.event_consumer import EventConsumer
|
||||
from app.api import alerts, sse
|
||||
from shared.redis_utils import initialize_redis, close_redis
|
||||
from shared.monitoring.logging import setup_logging
|
||||
from shared.monitoring.metrics import MetricsCollector, add_metrics_middleware
|
||||
from shared.monitoring.system_metrics import SystemMetricsCollector
|
||||
from shared.service_base import StandardFastAPIService
|
||||
|
||||
# OpenTelemetry imports
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
||||
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
|
||||
from opentelemetry.instrumentation.redis import RedisInstrumentor
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
|
||||
# Configure OpenTelemetry tracing
|
||||
def setup_tracing(service_name: str = "alert-processor"):
|
||||
"""Initialize OpenTelemetry tracing with OTLP exporter for Jaeger"""
|
||||
resource = Resource.create({"service.name": service_name})
|
||||
|
||||
otlp_exporter = OTLPSpanExporter(
|
||||
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"),
|
||||
insecure=True
|
||||
)
|
||||
|
||||
provider = TracerProvider(resource=resource)
|
||||
processor = BatchSpanProcessor(otlp_exporter)
|
||||
provider.add_span_processor(processor)
|
||||
trace.set_tracer_provider(provider)
|
||||
|
||||
return provider
|
||||
|
||||
# Initialize tracing
|
||||
tracer_provider = setup_tracing("alert-processor")
|
||||
|
||||
# Setup logging
|
||||
setup_logging("alert-processor", getattr(settings, 'LOG_LEVEL', 'INFO'))
|
||||
|
||||
# Setup OpenTelemetry logging export if enabled
|
||||
if os.getenv("OTEL_LOGS_EXPORTER", "").lower() == "otlp":
|
||||
try:
|
||||
from shared.monitoring.logs_exporter import setup_otel_logging
|
||||
result = setup_otel_logging("alert-processor", settings.VERSION)
|
||||
if result:
|
||||
logger = structlog.get_logger()
|
||||
logger.info("OpenTelemetry logs export enabled for alert-processor")
|
||||
else:
|
||||
logger = structlog.get_logger()
|
||||
logger.warning("OpenTelemetry logs export setup returned None")
|
||||
except Exception as e:
|
||||
logger = structlog.get_logger()
|
||||
logger.error(f"Failed to setup OpenTelemetry logs export: {e}", exc_info=True)
|
||||
else:
|
||||
logger = structlog.get_logger()
|
||||
logger.info("OpenTelemetry logs export disabled - OTEL_LOGS_EXPORTER not set to otlp")
|
||||
# Initialize logger
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Global consumer instance
|
||||
consumer: EventConsumer = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""
|
||||
Application lifecycle manager.
|
||||
class AlertProcessorService(StandardFastAPIService):
|
||||
"""Alert Processor Service with standardized monitoring setup and RabbitMQ consumer"""
|
||||
|
||||
Startup: Initialize Redis and RabbitMQ consumer
|
||||
Shutdown: Close consumer and Redis connections
|
||||
"""
|
||||
global consumer
|
||||
async def on_startup(self, app):
|
||||
"""Custom startup logic for Alert Processor"""
|
||||
global consumer
|
||||
|
||||
logger.info("alert_processor_starting", version=settings.VERSION)
|
||||
|
||||
# Startup: Initialize Redis and start consumer
|
||||
try:
|
||||
# Initialize Redis connection
|
||||
await initialize_redis(
|
||||
settings.REDIS_URL,
|
||||
@@ -96,69 +34,48 @@ async def lifespan(app: FastAPI):
|
||||
)
|
||||
logger.info("redis_initialized")
|
||||
|
||||
# Start RabbitMQ consumer
|
||||
consumer = EventConsumer()
|
||||
await consumer.start()
|
||||
logger.info("alert_processor_started")
|
||||
logger.info("rabbitmq_consumer_started")
|
||||
|
||||
# Initialize system metrics collection
|
||||
system_metrics = SystemMetricsCollector("alert-processor")
|
||||
logger.info("System metrics collection started")
|
||||
await super().on_startup(app)
|
||||
|
||||
# Note: Metrics are exported via OpenTelemetry OTLP to SigNoz - no metrics server needed
|
||||
logger.info("Metrics export configured via OpenTelemetry OTLP")
|
||||
except Exception as e:
|
||||
logger.error("alert_processor_startup_failed", error=str(e))
|
||||
raise
|
||||
async def on_shutdown(self, app):
|
||||
"""Custom shutdown logic for Alert Processor"""
|
||||
global consumer
|
||||
|
||||
yield
|
||||
await super().on_shutdown(app)
|
||||
|
||||
# Shutdown: Stop consumer and close Redis
|
||||
try:
|
||||
# Stop RabbitMQ consumer
|
||||
if consumer:
|
||||
await consumer.stop()
|
||||
logger.info("rabbitmq_consumer_stopped")
|
||||
|
||||
# Close Redis
|
||||
await close_redis()
|
||||
logger.info("alert_processor_shutdown")
|
||||
except Exception as e:
|
||||
logger.error("alert_processor_shutdown_failed", error=str(e))
|
||||
logger.info("redis_closed")
|
||||
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="Alert Processor Service",
|
||||
# Create service instance
|
||||
service = AlertProcessorService(
|
||||
service_name="alert-processor",
|
||||
app_name="Alert Processor Service",
|
||||
description="Event processing, enrichment, and alert management system",
|
||||
version=settings.VERSION,
|
||||
lifespan=lifespan,
|
||||
debug=settings.DEBUG
|
||||
log_level=getattr(settings, 'LOG_LEVEL', 'INFO'),
|
||||
cors_origins=["*"], # Configure appropriately for production
|
||||
api_prefix="/api/v1",
|
||||
enable_metrics=True,
|
||||
enable_health_checks=True,
|
||||
enable_tracing=True,
|
||||
enable_cors=True
|
||||
)
|
||||
|
||||
# Instrument FastAPI with OpenTelemetry
|
||||
FastAPIInstrumentor.instrument_app(app)
|
||||
# Create FastAPI app
|
||||
app = service.create_app(debug=settings.DEBUG)
|
||||
|
||||
# Instrument httpx for outgoing requests
|
||||
HTTPXClientInstrumentor().instrument()
|
||||
|
||||
# Instrument Redis
|
||||
RedisInstrumentor().instrument()
|
||||
|
||||
# Instrument SQLAlchemy
|
||||
SQLAlchemyInstrumentor().instrument()
|
||||
|
||||
# Initialize metrics collector
|
||||
metrics_collector = MetricsCollector("alert-processor")
|
||||
|
||||
# Add metrics middleware to track HTTP requests
|
||||
add_metrics_middleware(app, metrics_collector)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Configure appropriately for production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
# Add service-specific routers
|
||||
app.include_router(
|
||||
alerts.router,
|
||||
prefix="/api/v1/tenants/{tenant_id}",
|
||||
@@ -172,34 +89,6 @@ app.include_router(
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""
|
||||
Health check endpoint.
|
||||
|
||||
Returns service status and version.
|
||||
"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": settings.SERVICE_NAME,
|
||||
"version": settings.VERSION
|
||||
}
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint with service info"""
|
||||
return {
|
||||
"service": settings.SERVICE_NAME,
|
||||
"version": settings.VERSION,
|
||||
"description": "Event processing, enrichment, and alert management system"
|
||||
}
|
||||
|
||||
|
||||
# Note: Metrics are exported via OpenTelemetry OTLP to SigNoz
|
||||
# The /metrics endpoint is not needed as metrics are pushed automatically
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
|
||||
Reference in New Issue
Block a user