272 lines
9.9 KiB
Python
272 lines
9.9 KiB
Python
|
|
"""
|
||
|
|
Unified OpenTelemetry Telemetry Setup
|
||
|
|
|
||
|
|
Provides a single entry point to configure all telemetry signals:
|
||
|
|
- Traces: Distributed tracing across services
|
||
|
|
- Metrics: OTLP metrics export + system metrics collection
|
||
|
|
- Logs: Structured logs with trace correlation
|
||
|
|
|
||
|
|
All signals are exported to SigNoz via OTLP.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import structlog
|
||
|
|
from typing import Optional, Dict, Any, Tuple
|
||
|
|
from dataclasses import dataclass
|
||
|
|
|
||
|
|
from .otel_config import OTelConfig
|
||
|
|
from .tracing import setup_tracing
|
||
|
|
from .metrics_exporter import setup_otel_metrics
|
||
|
|
from .logs_exporter import setup_otel_logging
|
||
|
|
from .system_metrics import setup_all_metrics, SystemMetricsCollector, ApplicationMetricsCollector
|
||
|
|
|
||
|
|
logger = structlog.get_logger()
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class TelemetryProviders:
|
||
|
|
"""
|
||
|
|
Container for all OpenTelemetry providers and collectors.
|
||
|
|
|
||
|
|
Attributes:
|
||
|
|
tracer_provider: Provider for distributed tracing
|
||
|
|
meter_provider: Provider for metrics export
|
||
|
|
logging_handler: Handler for structured logs
|
||
|
|
system_metrics: Collector for system-level metrics (CPU, memory, disk, network)
|
||
|
|
app_metrics: Collector for application-level metrics (HTTP, DB)
|
||
|
|
"""
|
||
|
|
tracer_provider: Optional[Any] = None
|
||
|
|
meter_provider: Optional[Any] = None
|
||
|
|
logging_handler: Optional[Any] = None
|
||
|
|
system_metrics: Optional[SystemMetricsCollector] = None
|
||
|
|
app_metrics: Optional[ApplicationMetricsCollector] = None
|
||
|
|
|
||
|
|
|
||
|
|
def setup_telemetry(
|
||
|
|
app,
|
||
|
|
service_name: str,
|
||
|
|
service_version: str = "1.0.0",
|
||
|
|
enable_traces: bool = True,
|
||
|
|
enable_metrics: bool = True,
|
||
|
|
enable_logs: bool = True,
|
||
|
|
enable_system_metrics: bool = True,
|
||
|
|
metrics_protocol: Optional[str] = None, # "grpc" or "http", defaults to grpc
|
||
|
|
export_interval_millis: int = 60000
|
||
|
|
) -> TelemetryProviders:
|
||
|
|
"""
|
||
|
|
Setup all OpenTelemetry telemetry signals (traces, metrics, logs) for a service.
|
||
|
|
|
||
|
|
This is the UNIFIED setup function that configures everything:
|
||
|
|
- Distributed tracing (gRPC, port 4317)
|
||
|
|
- Metrics export (gRPC by default, port 4317)
|
||
|
|
- System metrics collection (CPU, memory, disk, network)
|
||
|
|
- Application metrics (HTTP requests, DB queries)
|
||
|
|
- Structured logs export (HTTP, port 4318)
|
||
|
|
|
||
|
|
All signals use the centralized OTelConfig for endpoint management.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
app: FastAPI application instance
|
||
|
|
service_name: Name of the service (e.g., "auth-service")
|
||
|
|
service_version: Version of the service
|
||
|
|
enable_traces: Enable distributed tracing (default: True)
|
||
|
|
enable_metrics: Enable metrics export to OTLP (default: True)
|
||
|
|
enable_logs: Enable logs export to OTLP (default: True)
|
||
|
|
enable_system_metrics: Enable system metrics collection (default: True, can be disabled via ENABLE_SYSTEM_METRICS env)
|
||
|
|
metrics_protocol: Protocol for metrics ("grpc" or "http", default: "grpc")
|
||
|
|
export_interval_millis: How often to export metrics in milliseconds
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
TelemetryProviders containing all initialized providers and collectors
|
||
|
|
|
||
|
|
Example:
|
||
|
|
from shared.monitoring.telemetry import setup_telemetry
|
||
|
|
|
||
|
|
app = FastAPI(title="Auth Service")
|
||
|
|
providers = setup_telemetry(
|
||
|
|
app,
|
||
|
|
service_name="auth-service",
|
||
|
|
service_version="1.0.0"
|
||
|
|
)
|
||
|
|
|
||
|
|
# All telemetry is now configured:
|
||
|
|
# - Traces automatically captured for HTTP requests
|
||
|
|
# - System metrics automatically collected
|
||
|
|
# - Application metrics via providers.app_metrics
|
||
|
|
# - Logs automatically correlated with traces
|
||
|
|
"""
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
"Setting up unified OpenTelemetry telemetry",
|
||
|
|
service=service_name,
|
||
|
|
version=service_version,
|
||
|
|
traces=enable_traces,
|
||
|
|
metrics=enable_metrics,
|
||
|
|
logs=enable_logs,
|
||
|
|
system_metrics=enable_system_metrics
|
||
|
|
)
|
||
|
|
|
||
|
|
providers = TelemetryProviders()
|
||
|
|
|
||
|
|
# Setup distributed tracing
|
||
|
|
if enable_traces and OTelConfig.is_enabled("traces"):
|
||
|
|
try:
|
||
|
|
providers.tracer_provider = setup_tracing(
|
||
|
|
app,
|
||
|
|
service_name=service_name,
|
||
|
|
service_version=service_version
|
||
|
|
)
|
||
|
|
if providers.tracer_provider:
|
||
|
|
logger.info("✓ Distributed tracing configured", service=service_name)
|
||
|
|
else:
|
||
|
|
logger.warning("✗ Distributed tracing setup returned None", service=service_name)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error("✗ Failed to setup distributed tracing", service=service_name, error=str(e))
|
||
|
|
|
||
|
|
# Setup OTLP metrics export
|
||
|
|
if enable_metrics and OTelConfig.is_enabled("metrics"):
|
||
|
|
try:
|
||
|
|
providers.meter_provider = setup_otel_metrics(
|
||
|
|
service_name=service_name,
|
||
|
|
service_version=service_version,
|
||
|
|
protocol=metrics_protocol,
|
||
|
|
export_interval_millis=export_interval_millis
|
||
|
|
)
|
||
|
|
if providers.meter_provider:
|
||
|
|
logger.info("✓ OTLP metrics export configured", service=service_name)
|
||
|
|
|
||
|
|
# Setup system and application metrics collectors
|
||
|
|
if enable_system_metrics:
|
||
|
|
enable_system_env = os.getenv("ENABLE_SYSTEM_METRICS", "true").lower() == "true"
|
||
|
|
if enable_system_env:
|
||
|
|
try:
|
||
|
|
providers.system_metrics, providers.app_metrics = setup_all_metrics(
|
||
|
|
service_name=service_name,
|
||
|
|
service_version=service_version,
|
||
|
|
meter_provider=providers.meter_provider
|
||
|
|
)
|
||
|
|
logger.info(
|
||
|
|
"✓ System and application metrics collectors initialized",
|
||
|
|
service=service_name,
|
||
|
|
system_metrics=["cpu", "memory", "disk", "network"],
|
||
|
|
app_metrics=["http_requests", "db_queries"]
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
logger.warning("✗ Failed to setup metrics collectors", service=service_name, error=str(e))
|
||
|
|
else:
|
||
|
|
logger.warning("✗ OTLP metrics export setup returned None", service=service_name)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error("✗ Failed to setup OTLP metrics export", service=service_name, error=str(e))
|
||
|
|
|
||
|
|
# Setup logs export
|
||
|
|
if enable_logs and OTelConfig.is_enabled("logs"):
|
||
|
|
try:
|
||
|
|
providers.logging_handler = setup_otel_logging(
|
||
|
|
service_name=service_name,
|
||
|
|
service_version=service_version
|
||
|
|
)
|
||
|
|
if providers.logging_handler:
|
||
|
|
logger.info("✓ Structured logs export configured", service=service_name)
|
||
|
|
else:
|
||
|
|
logger.warning("✗ Logs export setup returned None", service=service_name)
|
||
|
|
except Exception as e:
|
||
|
|
logger.error("✗ Failed to setup logs export", service=service_name, error=str(e))
|
||
|
|
|
||
|
|
# Log endpoint configuration summary
|
||
|
|
try:
|
||
|
|
endpoints = OTelConfig.get_endpoints()
|
||
|
|
summary = {
|
||
|
|
"service": service_name,
|
||
|
|
"version": service_version,
|
||
|
|
"traces": {
|
||
|
|
"enabled": bool(providers.tracer_provider),
|
||
|
|
"endpoint": endpoints.traces_grpc if providers.tracer_provider else "disabled"
|
||
|
|
},
|
||
|
|
"metrics": {
|
||
|
|
"enabled": bool(providers.meter_provider),
|
||
|
|
"endpoint": (endpoints.metrics_grpc if metrics_protocol != "http" else endpoints.metrics_http) if providers.meter_provider else "disabled",
|
||
|
|
"system_metrics": bool(providers.system_metrics),
|
||
|
|
"app_metrics": bool(providers.app_metrics)
|
||
|
|
},
|
||
|
|
"logs": {
|
||
|
|
"enabled": bool(providers.logging_handler),
|
||
|
|
"endpoint": endpoints.logs_http if providers.logging_handler else "disabled"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
logger.info("🎉 Telemetry setup complete", **summary)
|
||
|
|
except Exception as e:
|
||
|
|
logger.warning("Could not log endpoint summary", error=str(e))
|
||
|
|
|
||
|
|
return providers
|
||
|
|
|
||
|
|
|
||
|
|
def setup_telemetry_simple(
|
||
|
|
app,
|
||
|
|
service_name: str,
|
||
|
|
service_version: str = "1.0.0"
|
||
|
|
) -> TelemetryProviders:
|
||
|
|
"""
|
||
|
|
Simplified telemetry setup with all defaults.
|
||
|
|
|
||
|
|
Uses:
|
||
|
|
- gRPC for traces (port 4317)
|
||
|
|
- gRPC for metrics (port 4317)
|
||
|
|
- HTTP for logs (port 4318)
|
||
|
|
|
||
|
|
All settings are read from environment variables and OTelConfig.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
app: FastAPI application instance
|
||
|
|
service_name: Name of the service
|
||
|
|
service_version: Version of the service
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
TelemetryProviders containing all initialized providers
|
||
|
|
|
||
|
|
Example:
|
||
|
|
from shared.monitoring.telemetry import setup_telemetry_simple
|
||
|
|
|
||
|
|
app = FastAPI(title="Auth Service")
|
||
|
|
providers = setup_telemetry_simple(app, "auth-service")
|
||
|
|
"""
|
||
|
|
return setup_telemetry(
|
||
|
|
app=app,
|
||
|
|
service_name=service_name,
|
||
|
|
service_version=service_version
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def get_telemetry_status() -> Dict[str, Any]:
|
||
|
|
"""
|
||
|
|
Get current telemetry configuration status.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Dictionary with telemetry status information
|
||
|
|
|
||
|
|
Example:
|
||
|
|
from shared.monitoring.telemetry import get_telemetry_status
|
||
|
|
|
||
|
|
status = get_telemetry_status()
|
||
|
|
print(f"Tracing enabled: {status['traces']['enabled']}")
|
||
|
|
"""
|
||
|
|
endpoints = OTelConfig.get_endpoints()
|
||
|
|
|
||
|
|
return {
|
||
|
|
"traces": {
|
||
|
|
"enabled": OTelConfig.is_enabled("traces"),
|
||
|
|
"protocol": "grpc",
|
||
|
|
"endpoint": endpoints.traces_grpc
|
||
|
|
},
|
||
|
|
"metrics": {
|
||
|
|
"enabled": OTelConfig.is_enabled("metrics"),
|
||
|
|
"protocol": OTelConfig.get_protocol("metrics"),
|
||
|
|
"grpc_endpoint": endpoints.metrics_grpc,
|
||
|
|
"http_endpoint": endpoints.metrics_http
|
||
|
|
},
|
||
|
|
"logs": {
|
||
|
|
"enabled": OTelConfig.is_enabled("logs"),
|
||
|
|
"protocol": "http",
|
||
|
|
"endpoint": endpoints.logs_http
|
||
|
|
}
|
||
|
|
}
|