2025-10-15 16:12:49 +02:00
|
|
|
"""
|
|
|
|
|
OpenTelemetry distributed tracing integration
|
|
|
|
|
Provides end-to-end request tracking across all services
|
|
|
|
|
"""
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
import os
|
2025-10-15 16:12:49 +02:00
|
|
|
import structlog
|
|
|
|
|
from typing import Optional
|
|
|
|
|
from opentelemetry import trace
|
|
|
|
|
from opentelemetry.sdk.trace import TracerProvider
|
|
|
|
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
2026-01-09 23:14:12 +01:00
|
|
|
from opentelemetry.sdk.resources import Resource
|
2025-10-15 16:12:49 +02:00
|
|
|
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
2026-01-09 23:14:12 +01:00
|
|
|
|
|
|
|
|
# Core instrumentations (should always be available)
|
2025-10-15 16:12:49 +02:00
|
|
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
2026-01-09 23:14:12 +01:00
|
|
|
|
|
|
|
|
# Optional instrumentations (may not be installed in all services)
|
|
|
|
|
try:
|
|
|
|
|
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
|
|
|
|
|
HTTPX_AVAILABLE = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
HTTPX_AVAILABLE = False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from opentelemetry.instrumentation.redis import RedisInstrumentor
|
|
|
|
|
REDIS_AVAILABLE = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
REDIS_AVAILABLE = False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
|
|
|
|
SQLALCHEMY_AVAILABLE = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
SQLALCHEMY_AVAILABLE = False
|
|
|
|
|
|
|
|
|
|
from .otel_config import OTelConfig
|
2025-10-15 16:12:49 +02:00
|
|
|
|
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup_tracing(
|
|
|
|
|
app,
|
|
|
|
|
service_name: str,
|
|
|
|
|
service_version: str = "1.0.0",
|
2026-01-09 23:14:12 +01:00
|
|
|
otel_endpoint: Optional[str] = None
|
|
|
|
|
) -> Optional[TracerProvider]:
|
2025-10-15 16:12:49 +02:00
|
|
|
"""
|
|
|
|
|
Setup OpenTelemetry distributed tracing for a FastAPI service.
|
|
|
|
|
|
|
|
|
|
Automatically instruments:
|
|
|
|
|
- FastAPI endpoints
|
|
|
|
|
- HTTPX client requests (inter-service calls)
|
|
|
|
|
- Redis operations
|
|
|
|
|
- PostgreSQL/SQLAlchemy queries
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
Uses gRPC protocol (port 4317) for sending traces to SigNoz.
|
|
|
|
|
|
2025-10-15 16:12:49 +02:00
|
|
|
Args:
|
|
|
|
|
app: FastAPI application instance
|
|
|
|
|
service_name: Name of the service (e.g., "auth-service")
|
|
|
|
|
service_version: Version of the service
|
2026-01-09 23:14:12 +01:00
|
|
|
otel_endpoint: Optional override for OTLP endpoint (gRPC format: host:port)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
TracerProvider instance if successful, None otherwise
|
2025-10-15 16:12:49 +02:00
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
from shared.monitoring.tracing import setup_tracing
|
|
|
|
|
|
|
|
|
|
app = FastAPI(title="Auth Service")
|
2026-01-09 23:14:12 +01:00
|
|
|
tracer_provider = setup_tracing(app, "auth-service", "1.0.0")
|
2025-10-15 16:12:49 +02:00
|
|
|
"""
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Check if tracing is enabled
|
|
|
|
|
if not OTelConfig.is_enabled("traces"):
|
|
|
|
|
logger.info(
|
|
|
|
|
"Distributed tracing disabled",
|
|
|
|
|
service=service_name,
|
|
|
|
|
reason="ENABLE_TRACING not set to 'true'"
|
|
|
|
|
)
|
|
|
|
|
return None
|
|
|
|
|
|
2025-10-15 16:12:49 +02:00
|
|
|
try:
|
2026-01-09 23:14:12 +01:00
|
|
|
# Get endpoints from centralized config
|
|
|
|
|
endpoints = OTelConfig.get_endpoints()
|
|
|
|
|
|
|
|
|
|
# Use provided endpoint or get from config
|
|
|
|
|
if otel_endpoint:
|
|
|
|
|
# Clean user-provided endpoint for gRPC
|
|
|
|
|
grpc_endpoint = OTelConfig._clean_grpc_endpoint(otel_endpoint)
|
|
|
|
|
else:
|
|
|
|
|
grpc_endpoint = endpoints.traces_grpc
|
|
|
|
|
|
|
|
|
|
# Get resource attributes
|
|
|
|
|
resource_attrs = OTelConfig.get_resource_attributes(service_name, service_version)
|
|
|
|
|
resource = Resource(attributes=resource_attrs)
|
2025-10-15 16:12:49 +02:00
|
|
|
|
|
|
|
|
# Configure tracer provider
|
|
|
|
|
tracer_provider = TracerProvider(resource=resource)
|
|
|
|
|
trace.set_tracer_provider(tracer_provider)
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Configure OTLP gRPC exporter for traces
|
2025-10-15 16:12:49 +02:00
|
|
|
otlp_exporter = OTLPSpanExporter(
|
2026-01-09 23:14:12 +01:00
|
|
|
endpoint=grpc_endpoint,
|
|
|
|
|
insecure=True # Use secure=False in production with proper TLS
|
2025-10-15 16:12:49 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Add span processor with batching for performance
|
|
|
|
|
span_processor = BatchSpanProcessor(otlp_exporter)
|
|
|
|
|
tracer_provider.add_span_processor(span_processor)
|
|
|
|
|
|
|
|
|
|
# Auto-instrument FastAPI
|
|
|
|
|
FastAPIInstrumentor.instrument_app(
|
|
|
|
|
app,
|
|
|
|
|
tracer_provider=tracer_provider,
|
|
|
|
|
excluded_urls="health,metrics" # Don't trace health/metrics endpoints
|
|
|
|
|
)
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Auto-instrument HTTPX (inter-service communication) if available
|
|
|
|
|
if HTTPX_AVAILABLE:
|
|
|
|
|
try:
|
|
|
|
|
HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider)
|
|
|
|
|
logger.debug("HTTPX instrumentation enabled")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"Failed to instrument HTTPX: {e}")
|
|
|
|
|
|
|
|
|
|
# Auto-instrument Redis if available
|
|
|
|
|
if REDIS_AVAILABLE:
|
|
|
|
|
try:
|
|
|
|
|
RedisInstrumentor().instrument(tracer_provider=tracer_provider)
|
|
|
|
|
logger.debug("Redis instrumentation enabled")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"Failed to instrument Redis: {e}")
|
|
|
|
|
|
|
|
|
|
# Auto-instrument SQLAlchemy if available
|
|
|
|
|
if SQLALCHEMY_AVAILABLE:
|
|
|
|
|
try:
|
|
|
|
|
SQLAlchemyInstrumentor().instrument(tracer_provider=tracer_provider)
|
|
|
|
|
logger.debug("SQLAlchemy instrumentation enabled")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"Failed to instrument SQLAlchemy: {e}")
|
2025-10-15 16:12:49 +02:00
|
|
|
|
|
|
|
|
logger.info(
|
2026-01-09 23:14:12 +01:00
|
|
|
"Distributed tracing configured successfully",
|
2025-10-15 16:12:49 +02:00
|
|
|
service=service_name,
|
2026-01-09 23:14:12 +01:00
|
|
|
grpc_endpoint=grpc_endpoint,
|
|
|
|
|
protocol="grpc"
|
2025-10-15 16:12:49 +02:00
|
|
|
)
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
return tracer_provider
|
|
|
|
|
|
2025-10-15 16:12:49 +02:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(
|
|
|
|
|
"Failed to setup tracing - continuing without it",
|
|
|
|
|
service=service_name,
|
|
|
|
|
error=str(e)
|
|
|
|
|
)
|
2026-01-09 23:14:12 +01:00
|
|
|
return None
|
2025-10-15 16:12:49 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_current_trace_id() -> Optional[str]:
|
|
|
|
|
"""
|
|
|
|
|
Get the current trace ID for correlation with logs.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Trace ID as hex string, or None if no active trace
|
|
|
|
|
"""
|
|
|
|
|
span = trace.get_current_span()
|
|
|
|
|
if span and span.get_span_context().is_valid:
|
|
|
|
|
return format(span.get_span_context().trace_id, '032x')
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_current_span_id() -> Optional[str]:
|
|
|
|
|
"""
|
|
|
|
|
Get the current span ID.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Span ID as hex string, or None if no active span
|
|
|
|
|
"""
|
|
|
|
|
span = trace.get_current_span()
|
|
|
|
|
if span and span.get_span_context().is_valid:
|
|
|
|
|
return format(span.get_span_context().span_id, '016x')
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def add_trace_attributes(**attributes):
|
|
|
|
|
"""
|
|
|
|
|
Add custom attributes to the current span.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
add_trace_attributes(
|
|
|
|
|
user_id="123",
|
|
|
|
|
tenant_id="abc",
|
|
|
|
|
operation="user_registration"
|
|
|
|
|
)
|
|
|
|
|
"""
|
|
|
|
|
span = trace.get_current_span()
|
|
|
|
|
if span and span.get_span_context().is_valid:
|
|
|
|
|
for key, value in attributes.items():
|
|
|
|
|
span.set_attribute(key, str(value))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def add_trace_event(name: str, **attributes):
|
|
|
|
|
"""
|
|
|
|
|
Add an event to the current span (for important operations).
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
add_trace_event("user_authenticated", user_id="123", method="jwt")
|
|
|
|
|
"""
|
|
|
|
|
span = trace.get_current_span()
|
|
|
|
|
if span and span.get_span_context().is_valid:
|
|
|
|
|
span.add_event(name, attributes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def record_exception(exception: Exception):
|
|
|
|
|
"""
|
|
|
|
|
Record an exception in the current span.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
exception: The exception to record
|
|
|
|
|
"""
|
|
|
|
|
span = trace.get_current_span()
|
|
|
|
|
if span and span.get_span_context().is_valid:
|
|
|
|
|
span.record_exception(exception)
|
|
|
|
|
span.set_status(trace.Status(trace.StatusCode.ERROR, str(exception)))
|