Imporve monitoring 5
This commit is contained in:
@@ -3,17 +3,38 @@ OpenTelemetry distributed tracing integration
|
||||
Provides end-to-end request tracking across all services
|
||||
"""
|
||||
|
||||
import os
|
||||
import structlog
|
||||
from typing import Optional
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.sdk.resources import Resource, SERVICE_NAME, SERVICE_VERSION
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
|
||||
# Core instrumentations (should always be available)
|
||||
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
||||
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
|
||||
from opentelemetry.instrumentation.redis import RedisInstrumentor
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
|
||||
# Optional instrumentations (may not be installed in all services)
|
||||
try:
|
||||
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
|
||||
HTTPX_AVAILABLE = True
|
||||
except ImportError:
|
||||
HTTPX_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from opentelemetry.instrumentation.redis import RedisInstrumentor
|
||||
REDIS_AVAILABLE = True
|
||||
except ImportError:
|
||||
REDIS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
SQLALCHEMY_AVAILABLE = True
|
||||
except ImportError:
|
||||
SQLALCHEMY_AVAILABLE = False
|
||||
|
||||
from .otel_config import OTelConfig
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@@ -22,8 +43,8 @@ def setup_tracing(
|
||||
app,
|
||||
service_name: str,
|
||||
service_version: str = "1.0.0",
|
||||
otel_endpoint: str = "http://signoz-otel-collector.bakery-ia:4318"
|
||||
):
|
||||
otel_endpoint: Optional[str] = None
|
||||
) -> Optional[TracerProvider]:
|
||||
"""
|
||||
Setup OpenTelemetry distributed tracing for a FastAPI service.
|
||||
|
||||
@@ -33,35 +54,56 @@ def setup_tracing(
|
||||
- Redis operations
|
||||
- PostgreSQL/SQLAlchemy queries
|
||||
|
||||
Uses gRPC protocol (port 4317) for sending traces to SigNoz.
|
||||
|
||||
Args:
|
||||
app: FastAPI application instance
|
||||
service_name: Name of the service (e.g., "auth-service")
|
||||
service_version: Version of the service
|
||||
otel_endpoint: OpenTelemetry collector endpoint (SigNoz)
|
||||
otel_endpoint: Optional override for OTLP endpoint (gRPC format: host:port)
|
||||
|
||||
Returns:
|
||||
TracerProvider instance if successful, None otherwise
|
||||
|
||||
Example:
|
||||
from shared.monitoring.tracing import setup_tracing
|
||||
|
||||
app = FastAPI(title="Auth Service")
|
||||
setup_tracing(app, "auth-service")
|
||||
tracer_provider = setup_tracing(app, "auth-service", "1.0.0")
|
||||
"""
|
||||
|
||||
# Check if tracing is enabled
|
||||
if not OTelConfig.is_enabled("traces"):
|
||||
logger.info(
|
||||
"Distributed tracing disabled",
|
||||
service=service_name,
|
||||
reason="ENABLE_TRACING not set to 'true'"
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
# Create resource with service information
|
||||
resource = Resource(attributes={
|
||||
SERVICE_NAME: service_name,
|
||||
SERVICE_VERSION: service_version,
|
||||
"deployment.environment": "production"
|
||||
})
|
||||
# Get endpoints from centralized config
|
||||
endpoints = OTelConfig.get_endpoints()
|
||||
|
||||
# Use provided endpoint or get from config
|
||||
if otel_endpoint:
|
||||
# Clean user-provided endpoint for gRPC
|
||||
grpc_endpoint = OTelConfig._clean_grpc_endpoint(otel_endpoint)
|
||||
else:
|
||||
grpc_endpoint = endpoints.traces_grpc
|
||||
|
||||
# Get resource attributes
|
||||
resource_attrs = OTelConfig.get_resource_attributes(service_name, service_version)
|
||||
resource = Resource(attributes=resource_attrs)
|
||||
|
||||
# Configure tracer provider
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
# Configure OTLP exporter to send to SigNoz
|
||||
# Configure OTLP gRPC exporter for traces
|
||||
otlp_exporter = OTLPSpanExporter(
|
||||
endpoint=otel_endpoint,
|
||||
insecure=True # Use TLS in production
|
||||
endpoint=grpc_endpoint,
|
||||
insecure=True # Use secure=False in production with proper TLS
|
||||
)
|
||||
|
||||
# Add span processor with batching for performance
|
||||
@@ -75,40 +117,46 @@ def setup_tracing(
|
||||
excluded_urls="health,metrics" # Don't trace health/metrics endpoints
|
||||
)
|
||||
|
||||
# Auto-instrument HTTPX (inter-service communication)
|
||||
HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider)
|
||||
# Auto-instrument HTTPX (inter-service communication) if available
|
||||
if HTTPX_AVAILABLE:
|
||||
try:
|
||||
HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider)
|
||||
logger.debug("HTTPX instrumentation enabled")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to instrument HTTPX: {e}")
|
||||
|
||||
# Auto-instrument Redis
|
||||
try:
|
||||
RedisInstrumentor().instrument(tracer_provider=tracer_provider)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to instrument Redis: {e}")
|
||||
# Auto-instrument Redis if available
|
||||
if REDIS_AVAILABLE:
|
||||
try:
|
||||
RedisInstrumentor().instrument(tracer_provider=tracer_provider)
|
||||
logger.debug("Redis instrumentation enabled")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to instrument Redis: {e}")
|
||||
|
||||
# Auto-instrument PostgreSQL (psycopg2) - skip if not available
|
||||
# Most services use asyncpg instead of psycopg2
|
||||
# try:
|
||||
# Psycopg2Instrumentor().instrument(tracer_provider=tracer_provider)
|
||||
# except Exception as e:
|
||||
# logger.warning(f"Failed to instrument Psycopg2: {e}")
|
||||
|
||||
# Auto-instrument SQLAlchemy
|
||||
try:
|
||||
SQLAlchemyInstrumentor().instrument(tracer_provider=tracer_provider)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to instrument SQLAlchemy: {e}")
|
||||
# Auto-instrument SQLAlchemy if available
|
||||
if SQLALCHEMY_AVAILABLE:
|
||||
try:
|
||||
SQLAlchemyInstrumentor().instrument(tracer_provider=tracer_provider)
|
||||
logger.debug("SQLAlchemy instrumentation enabled")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to instrument SQLAlchemy: {e}")
|
||||
|
||||
logger.info(
|
||||
"Distributed tracing configured",
|
||||
"Distributed tracing configured successfully",
|
||||
service=service_name,
|
||||
otel_endpoint=otel_endpoint
|
||||
grpc_endpoint=grpc_endpoint,
|
||||
protocol="grpc"
|
||||
)
|
||||
|
||||
return tracer_provider
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to setup tracing - continuing without it",
|
||||
service=service_name,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_current_trace_id() -> Optional[str]:
|
||||
|
||||
Reference in New Issue
Block a user