Add signoz

This commit is contained in:
Urtzi Alfaro
2026-01-08 12:58:00 +01:00
parent 07178f8972
commit dfb7e4b237
40 changed files with 2049 additions and 3935 deletions

View File

@@ -1,22 +1,50 @@
"""Main FastAPI application for AI Insights Service."""
from fastapi import FastAPI
from fastapi import FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import structlog
import os
from app.core.config import settings
from app.core.database import init_db, close_db
from app.api import insights
from shared.monitoring.logging import setup_logging
from shared.monitoring.metrics import MetricsCollector, add_metrics_middleware
# Configure structured logging
structlog.configure(
processors=[
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.JSONRenderer()
]
)
# OpenTelemetry imports
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.redis import RedisInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.sdk.resources import Resource
# Configure OpenTelemetry tracing
def setup_tracing(service_name: str = "ai-insights"):
"""Initialize OpenTelemetry tracing with OTLP exporter for Jaeger"""
resource = Resource.create({"service.name": service_name})
otlp_exporter = OTLPSpanExporter(
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector.monitoring.svc.cluster.local:4317"),
insecure=True
)
provider = TracerProvider(resource=resource)
processor = BatchSpanProcessor(otlp_exporter)
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)
return provider
# Initialize tracing
tracer_provider = setup_tracing("ai-insights")
# Setup logging
setup_logging("ai-insights", getattr(settings, 'LOG_LEVEL', 'INFO'))
logger = structlog.get_logger()
@@ -28,6 +56,10 @@ async def lifespan(app: FastAPI):
await init_db()
logger.info("Database initialized")
# Start metrics server
metrics_collector.start_metrics_server(8080)
logger.info("Metrics server started on port 8080")
yield
# Shutdown
@@ -44,6 +76,24 @@ app = FastAPI(
lifespan=lifespan
)
# Instrument FastAPI with OpenTelemetry
FastAPIInstrumentor.instrument_app(app)
# Instrument httpx for outgoing requests
HTTPXClientInstrumentor().instrument()
# Instrument Redis
RedisInstrumentor().instrument()
# Instrument SQLAlchemy
SQLAlchemyInstrumentor().instrument()
# Initialize metrics collector
metrics_collector = MetricsCollector("ai-insights")
# Add metrics middleware to track HTTP requests
add_metrics_middleware(app, metrics_collector)
# CORS middleware
app.add_middleware(
CORSMiddleware,
@@ -81,6 +131,15 @@ async def health_check():
}
@app.get("/metrics")
async def metrics():
"""Prometheus metrics endpoint"""
return Response(
content=metrics_collector.get_metrics(),
media_type="text/plain; version=0.0.4; charset=utf-8"
)
if __name__ == "__main__":
import uvicorn

View File

@@ -29,6 +29,16 @@ pytz==2023.3
# Logging
structlog==23.2.0
# Monitoring and Observability
prometheus-client==0.23.1
opentelemetry-api==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-instrumentation-fastapi==0.48b0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-instrumentation-httpx==0.48b0
opentelemetry-instrumentation-redis==0.48b0
opentelemetry-instrumentation-sqlalchemy==0.48b0
# Machine Learning (for confidence scoring and impact estimation)
numpy==1.26.2
pandas==2.1.3

View File

@@ -4,25 +4,52 @@ Alert Processor Service v2.0
Main FastAPI application with RabbitMQ consumer lifecycle management.
"""
from fastapi import FastAPI
from fastapi import FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import structlog
import os
from app.core.config import settings
from app.consumer.event_consumer import EventConsumer
from app.api import alerts, sse
from shared.redis_utils import initialize_redis, close_redis
from shared.monitoring.logging import setup_logging
from shared.monitoring.metrics import MetricsCollector, add_metrics_middleware
# Configure structured logging
structlog.configure(
processors=[
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.add_log_level,
structlog.processors.JSONRenderer()
]
)
# OpenTelemetry imports
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.redis import RedisInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.sdk.resources import Resource
# Configure OpenTelemetry tracing
def setup_tracing(service_name: str = "alert-processor"):
"""Initialize OpenTelemetry tracing with OTLP exporter for Jaeger"""
resource = Resource.create({"service.name": service_name})
otlp_exporter = OTLPSpanExporter(
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector.monitoring.svc.cluster.local:4317"),
insecure=True
)
provider = TracerProvider(resource=resource)
processor = BatchSpanProcessor(otlp_exporter)
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)
return provider
# Initialize tracing
tracer_provider = setup_tracing("alert-processor")
# Setup logging
setup_logging("alert-processor", getattr(settings, 'LOG_LEVEL', 'INFO'))
logger = structlog.get_logger()
# Global consumer instance
@@ -54,6 +81,10 @@ async def lifespan(app: FastAPI):
consumer = EventConsumer()
await consumer.start()
logger.info("alert_processor_started")
# Start metrics server
metrics_collector.start_metrics_server(8080)
logger.info("Metrics server started on port 8080")
except Exception as e:
logger.error("alert_processor_startup_failed", error=str(e))
raise
@@ -79,6 +110,24 @@ app = FastAPI(
debug=settings.DEBUG
)
# Instrument FastAPI with OpenTelemetry
FastAPIInstrumentor.instrument_app(app)
# Instrument httpx for outgoing requests
HTTPXClientInstrumentor().instrument()
# Instrument Redis
RedisInstrumentor().instrument()
# Instrument SQLAlchemy
SQLAlchemyInstrumentor().instrument()
# Initialize metrics collector
metrics_collector = MetricsCollector("alert-processor")
# Add metrics middleware to track HTTP requests
add_metrics_middleware(app, metrics_collector)
# CORS middleware
app.add_middleware(
CORSMiddleware,
@@ -126,6 +175,15 @@ async def root():
}
@app.get("/metrics")
async def metrics():
"""Prometheus metrics endpoint"""
return Response(
content=metrics_collector.get_metrics(),
media_type="text/plain; version=0.0.4; charset=utf-8"
)
if __name__ == "__main__":
import uvicorn

View File

@@ -32,3 +32,13 @@ python-dateutil==2.8.2
# Authentication
python-jose[cryptography]==3.3.0
# Monitoring and Observability
prometheus-client==0.23.1
opentelemetry-api==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-instrumentation-fastapi==0.48b0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-instrumentation-httpx==0.48b0
opentelemetry-instrumentation-redis==0.48b0
opentelemetry-instrumentation-sqlalchemy==0.48b0

View File

@@ -3,16 +3,51 @@ Demo Session Service - Main Application
Manages isolated demo sessions with ephemeral data
"""
from fastapi import FastAPI, Request
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
import structlog
from contextlib import asynccontextmanager
import os
from app.core import settings, DatabaseManager
from app.api import demo_sessions, demo_accounts, demo_operations, internal
from shared.redis_utils import initialize_redis, close_redis
from shared.monitoring.logging import setup_logging
from shared.monitoring.metrics import MetricsCollector, add_metrics_middleware
# OpenTelemetry imports
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.redis import RedisInstrumentor
from opentelemetry.sdk.resources import Resource
# Configure OpenTelemetry tracing
def setup_tracing(service_name: str = "demo-session"):
"""Initialize OpenTelemetry tracing with OTLP exporter for Jaeger"""
resource = Resource.create({"service.name": service_name})
otlp_exporter = OTLPSpanExporter(
endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://otel-collector.monitoring.svc.cluster.local:4317"),
insecure=True
)
provider = TracerProvider(resource=resource)
processor = BatchSpanProcessor(otlp_exporter)
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)
return provider
# Initialize tracing
tracer_provider = setup_tracing("demo-session")
# Setup logging
setup_logging("demo-session", getattr(settings, 'LOG_LEVEL', 'INFO'))
logger = structlog.get_logger()
# Initialize database
@@ -34,6 +69,10 @@ async def lifespan(app: FastAPI):
max_connections=50
)
# Start metrics server
metrics_collector.start_metrics_server(8080)
logger.info("Metrics server started on port 8080")
logger.info("Demo Session Service started successfully")
yield
@@ -52,6 +91,21 @@ app = FastAPI(
lifespan=lifespan
)
# Instrument FastAPI with OpenTelemetry
FastAPIInstrumentor.instrument_app(app)
# Instrument httpx for outgoing requests
HTTPXClientInstrumentor().instrument()
# Instrument Redis
RedisInstrumentor().instrument()
# Initialize metrics collector
metrics_collector = MetricsCollector("demo-session")
# Add metrics middleware to track HTTP requests
add_metrics_middleware(app, metrics_collector)
# CORS middleware
app.add_middleware(
CORSMiddleware,
@@ -110,6 +164,15 @@ async def health():
}
@app.get("/metrics")
async def metrics():
"""Prometheus metrics endpoint"""
return Response(
content=metrics_collector.get_metrics(),
media_type="text/plain; version=0.0.4; charset=utf-8"
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(

View File

@@ -18,3 +18,11 @@ prometheus-client==0.23.1
aio-pika==9.4.3
email-validator==2.2.0
pytz==2024.2
# OpenTelemetry for distributed tracing
opentelemetry-api==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-instrumentation-fastapi==0.48b0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-instrumentation-httpx==0.48b0
opentelemetry-instrumentation-redis==0.48b0