Imporve monitoring 5

This commit is contained in:
Urtzi Alfaro
2026-01-09 23:14:12 +01:00
parent 22dab143ba
commit c05538cafb
23 changed files with 4737 additions and 1932 deletions

View File

@@ -3,192 +3,74 @@ Demo Session Service - Main Application
Manages isolated demo sessions with ephemeral data
"""
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import structlog
from contextlib import asynccontextmanager
import os
from app.core import settings, DatabaseManager
from app.api import demo_sessions, demo_accounts, demo_operations, internal
from shared.redis_utils import initialize_redis, close_redis
from shared.monitoring.logging import setup_logging
from shared.monitoring.metrics import MetricsCollector, add_metrics_middleware
from shared.monitoring.system_metrics import SystemMetricsCollector
from shared.service_base import StandardFastAPIService
# OpenTelemetry imports
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.redis import RedisInstrumentor
from opentelemetry.sdk.resources import Resource
# Initialize logger
logger = structlog.get_logger()
# Configure OpenTelemetry tracing
def setup_tracing(service_name: str = "demo-session"):
"""Initialize OpenTelemetry tracing with OTLP exporter for Jaeger"""
resource = Resource.create({"service.name": service_name})
otlp_exporter = OTLPSpanExporter(
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"),
insecure=True
)
provider = TracerProvider(resource=resource)
processor = BatchSpanProcessor(otlp_exporter)
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)
return provider
# Initialize tracing
tracer_provider = setup_tracing("demo-session")
# Setup logging
setup_logging("demo-session", getattr(settings, 'LOG_LEVEL', 'INFO'))
# Setup OpenTelemetry logging export if enabled
if os.getenv("OTEL_LOGS_EXPORTER", "").lower() == "otlp":
try:
from shared.monitoring.logs_exporter import setup_otel_logging
result = setup_otel_logging("demo-session", settings.VERSION)
if result:
logger = structlog.get_logger()
logger.info("OpenTelemetry logs export enabled for demo-session")
else:
logger = structlog.get_logger()
logger.warning("OpenTelemetry logs export setup returned None")
except Exception as e:
logger = structlog.get_logger()
logger.error(f"Failed to setup OpenTelemetry logs export: {e}", exc_info=True)
else:
logger = structlog.get_logger()
logger.info("OpenTelemetry logs export disabled - OTEL_LOGS_EXPORTER not set to otlp")
# Initialize database
# Initialize database manager
db_manager = DatabaseManager()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan handler"""
logger.info("Starting Demo Session Service", version=settings.VERSION)
class DemoSessionService(StandardFastAPIService):
"""Demo Session Service with standardized monitoring setup"""
# Initialize database
db_manager.initialize()
async def on_startup(self, app):
"""Custom startup logic for Demo Session"""
# Initialize database
db_manager.initialize()
logger.info("Database initialized")
# Initialize Redis using shared implementation
await initialize_redis(
redis_url=settings.REDIS_URL,
db=0,
max_connections=50
)
# Initialize Redis
await initialize_redis(
redis_url=settings.REDIS_URL,
db=0,
max_connections=50
)
logger.info("Redis initialized")
# Initialize system metrics collection
system_metrics = SystemMetricsCollector("demo-session")
logger.info("System metrics collection started")
await super().on_startup(app)
# Note: Metrics are exported via OpenTelemetry OTLP to SigNoz - no metrics server needed
logger.info("Metrics export configured via OpenTelemetry OTLP")
async def on_shutdown(self, app):
"""Custom shutdown logic for Demo Session"""
await super().on_shutdown(app)
logger.info("Demo Session Service started successfully")
yield
# Cleanup on shutdown
await db_manager.close()
await close_redis()
logger.info("Demo Session Service stopped")
# Cleanup
await db_manager.close()
await close_redis()
logger.info("Database and Redis connections closed")
app = FastAPI(
title="Demo Session Service",
# Create service instance
service = DemoSessionService(
service_name="demo-session",
app_name="Demo Session Service",
description="Manages isolated demo sessions for prospect users",
version=settings.VERSION,
lifespan=lifespan
log_level=getattr(settings, 'LOG_LEVEL', 'INFO'),
cors_origins=["*"], # Configure appropriately for production
api_prefix="/api/v1",
enable_metrics=True,
enable_health_checks=True,
enable_tracing=True,
enable_cors=True
)
# Instrument FastAPI with OpenTelemetry
FastAPIInstrumentor.instrument_app(app)
# Create FastAPI app
app = service.create_app(debug=settings.DEBUG)
# Instrument httpx for outgoing requests
HTTPXClientInstrumentor().instrument()
# Instrument Redis
RedisInstrumentor().instrument()
# Initialize metrics collector
metrics_collector = MetricsCollector("demo-session")
# Add metrics middleware to track HTTP requests
add_metrics_middleware(app, metrics_collector)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
"""Global exception handler"""
logger.error(
"Unhandled exception",
path=request.url.path,
method=request.method,
error=str(exc)
)
return JSONResponse(
status_code=500,
content={"detail": "Internal server error"}
)
# Include routers
# Add service-specific routers
app.include_router(demo_sessions.router)
app.include_router(demo_accounts.router)
app.include_router(demo_operations.router)
app.include_router(internal.router)
@app.get("/")
async def root():
"""Root endpoint"""
return {
"service": "demo-session",
"version": settings.VERSION,
"status": "running"
}
@app.get("/health")
async def health():
"""Health check endpoint"""
from shared.redis_utils import get_redis_manager
redis_manager = await get_redis_manager()
redis_ok = await redis_manager.health_check()
return {
"status": "healthy" if redis_ok else "degraded",
"service": "demo-session",
"version": settings.VERSION,
"redis": "connected" if redis_ok else "disconnected"
}
# Note: Metrics are exported via OpenTelemetry OTLP to SigNoz
# The /metrics endpoint is not needed as metrics are pushed automatically
if __name__ == "__main__":
import uvicorn
uvicorn.run(