Update monitoring packages to latest versions
- Updated all OpenTelemetry packages to latest versions: - opentelemetry-api: 1.27.0 → 1.39.1 - opentelemetry-sdk: 1.27.0 → 1.39.1 - opentelemetry-exporter-otlp-proto-grpc: 1.27.0 → 1.39.1 - opentelemetry-exporter-otlp-proto-http: 1.27.0 → 1.39.1 - opentelemetry-instrumentation-fastapi: 0.48b0 → 0.60b1 - opentelemetry-instrumentation-httpx: 0.48b0 → 0.60b1 - opentelemetry-instrumentation-redis: 0.48b0 → 0.60b1 - opentelemetry-instrumentation-sqlalchemy: 0.48b0 → 0.60b1 - Removed prometheus-client==0.23.1 from all services - Unified all services to use the same monitoring package versions Generated by Mistral Vibe. Co-Authored-By: Mistral Vibe <vibe@mistral.ai>
This commit is contained in:
250
shared/monitoring/metrics_exporter.py
Normal file
250
shared/monitoring/metrics_exporter.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
OpenTelemetry Metrics Integration for SigNoz
|
||||
Exports metrics to SigNoz via OpenTelemetry Collector in addition to Prometheus
|
||||
"""
|
||||
|
||||
import os
|
||||
import structlog
|
||||
from typing import Optional
|
||||
from opentelemetry import metrics
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.sdk.resources import Resource, SERVICE_NAME, SERVICE_VERSION
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
def setup_otel_metrics(
|
||||
service_name: str,
|
||||
service_version: str = "1.0.0",
|
||||
otel_endpoint: Optional[str] = None,
|
||||
export_interval_millis: int = 60000 # Export every 60 seconds
|
||||
) -> Optional[MeterProvider]:
|
||||
"""
|
||||
Setup OpenTelemetry metrics to export to SigNoz.
|
||||
|
||||
This creates a dual-export strategy:
|
||||
- Prometheus exposition format at /metrics (for Prometheus scraping)
|
||||
- OTLP push to SigNoz collector (for direct ingestion)
|
||||
|
||||
Args:
|
||||
service_name: Name of the service (e.g., "auth-service")
|
||||
service_version: Version of the service
|
||||
otel_endpoint: OpenTelemetry collector endpoint (default from env)
|
||||
export_interval_millis: How often to push metrics (default 60s)
|
||||
|
||||
Returns:
|
||||
MeterProvider instance if successful, None otherwise
|
||||
|
||||
Example:
|
||||
from shared.monitoring.metrics_exporter import setup_otel_metrics
|
||||
|
||||
# Setup during service initialization
|
||||
meter_provider = setup_otel_metrics("auth-service", "1.0.0")
|
||||
|
||||
# Create meters for your metrics
|
||||
meter = meter_provider.get_meter(__name__)
|
||||
request_counter = meter.create_counter(
|
||||
"http.server.requests",
|
||||
description="Total HTTP requests",
|
||||
unit="1"
|
||||
)
|
||||
|
||||
# Record metrics
|
||||
request_counter.add(1, {"method": "GET", "status": "200"})
|
||||
"""
|
||||
|
||||
# Check if metrics export is enabled
|
||||
enable_otel_metrics = os.getenv("ENABLE_OTEL_METRICS", "true").lower() == "true"
|
||||
if not enable_otel_metrics:
|
||||
logger.info(
|
||||
"OpenTelemetry metrics export disabled",
|
||||
service=service_name,
|
||||
reason="ENABLE_OTEL_METRICS not set to 'true'"
|
||||
)
|
||||
return None
|
||||
|
||||
# Get OTLP endpoint from environment or parameter
|
||||
if otel_endpoint is None:
|
||||
otel_endpoint = os.getenv(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT",
|
||||
os.getenv("OTEL_COLLECTOR_ENDPOINT", "http://signoz-otel-collector.signoz:4318")
|
||||
)
|
||||
|
||||
# Ensure endpoint has /v1/metrics path for HTTP
|
||||
if not otel_endpoint.endswith("/v1/metrics"):
|
||||
otel_endpoint = f"{otel_endpoint}/v1/metrics"
|
||||
|
||||
try:
|
||||
# Create resource with service information
|
||||
resource = Resource(attributes={
|
||||
SERVICE_NAME: service_name,
|
||||
SERVICE_VERSION: service_version,
|
||||
"deployment.environment": os.getenv("ENVIRONMENT", "development"),
|
||||
"k8s.namespace.name": os.getenv("K8S_NAMESPACE", "bakery-ia"),
|
||||
"k8s.pod.name": os.getenv("HOSTNAME", "unknown"),
|
||||
})
|
||||
|
||||
# Configure OTLP exporter for metrics
|
||||
otlp_exporter = OTLPMetricExporter(
|
||||
endpoint=otel_endpoint,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
# Create periodic metric reader
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
exporter=otlp_exporter,
|
||||
export_interval_millis=export_interval_millis
|
||||
)
|
||||
|
||||
# Configure meter provider
|
||||
meter_provider = MeterProvider(
|
||||
resource=resource,
|
||||
metric_readers=[metric_reader]
|
||||
)
|
||||
|
||||
# Set global meter provider
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
logger.info(
|
||||
"OpenTelemetry metrics export configured",
|
||||
service=service_name,
|
||||
otel_endpoint=otel_endpoint,
|
||||
export_interval_seconds=export_interval_millis / 1000
|
||||
)
|
||||
|
||||
return meter_provider
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to setup OpenTelemetry metrics export",
|
||||
service=service_name,
|
||||
error=str(e),
|
||||
reason="Will continue with Prometheus-only metrics"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
class OTelMetricsCollector:
|
||||
"""
|
||||
Wrapper for OpenTelemetry metrics that provides a similar interface
|
||||
to the Prometheus MetricsCollector.
|
||||
|
||||
This allows services to emit metrics that go to both Prometheus and SigNoz.
|
||||
"""
|
||||
|
||||
def __init__(self, service_name: str, meter_provider: MeterProvider):
|
||||
self.service_name = service_name
|
||||
self.meter_provider = meter_provider
|
||||
self.meter = meter_provider.get_meter(__name__)
|
||||
|
||||
# Store created instruments
|
||||
self._counters = {}
|
||||
self._histograms = {}
|
||||
self._gauges = {}
|
||||
|
||||
def create_counter(self, name: str, description: str = "", unit: str = "1"):
|
||||
"""Create or get an OpenTelemetry Counter"""
|
||||
if name not in self._counters:
|
||||
self._counters[name] = self.meter.create_counter(
|
||||
name=f"{self.service_name.replace('-', '_')}_{name}",
|
||||
description=description,
|
||||
unit=unit
|
||||
)
|
||||
return self._counters[name]
|
||||
|
||||
def create_histogram(self, name: str, description: str = "", unit: str = "1"):
|
||||
"""Create or get an OpenTelemetry Histogram"""
|
||||
if name not in self._histograms:
|
||||
self._histograms[name] = self.meter.create_histogram(
|
||||
name=f"{self.service_name.replace('-', '_')}_{name}",
|
||||
description=description,
|
||||
unit=unit
|
||||
)
|
||||
return self._histograms[name]
|
||||
|
||||
def create_gauge(self, name: str, description: str = "", unit: str = "1"):
|
||||
"""
|
||||
Create or get an OpenTelemetry observable gauge.
|
||||
Note: Gauges in OTEL require a callback function.
|
||||
"""
|
||||
if name not in self._gauges:
|
||||
# Store gauge reference for callback registration
|
||||
self._gauges[name] = {
|
||||
"name": f"{self.service_name.replace('-', '_')}_{name}",
|
||||
"description": description,
|
||||
"unit": unit,
|
||||
"value": 0,
|
||||
"attributes": {}
|
||||
}
|
||||
return self._gauges[name]
|
||||
|
||||
def increment_counter(self, name: str, value: int = 1, attributes: dict = None):
|
||||
"""Increment a counter with optional attributes"""
|
||||
if name in self._counters:
|
||||
if attributes is None:
|
||||
attributes = {"service": self.service_name}
|
||||
elif "service" not in attributes:
|
||||
attributes["service"] = self.service_name
|
||||
|
||||
self._counters[name].add(value, attributes)
|
||||
|
||||
def observe_histogram(self, name: str, value: float, attributes: dict = None):
|
||||
"""Record a histogram observation with optional attributes"""
|
||||
if name in self._histograms:
|
||||
if attributes is None:
|
||||
attributes = {"service": self.service_name}
|
||||
elif "service" not in attributes:
|
||||
attributes["service"] = self.service_name
|
||||
|
||||
self._histograms[name].record(value, attributes)
|
||||
|
||||
def set_gauge(self, name: str, value: float, attributes: dict = None):
|
||||
"""Set a gauge value (stores for next callback)"""
|
||||
if name in self._gauges:
|
||||
if attributes is None:
|
||||
attributes = {"service": self.service_name}
|
||||
elif "service" not in attributes:
|
||||
attributes["service"] = self.service_name
|
||||
|
||||
self._gauges[name]["value"] = value
|
||||
self._gauges[name]["attributes"] = attributes
|
||||
|
||||
|
||||
def create_dual_metrics_collector(service_name: str, service_version: str = "1.0.0"):
|
||||
"""
|
||||
Create a metrics collector that exports to both Prometheus and SigNoz.
|
||||
|
||||
This function sets up both collection strategies:
|
||||
1. Prometheus client library (for /metrics endpoint scraping)
|
||||
2. OpenTelemetry metrics (for OTLP push to SigNoz)
|
||||
|
||||
Returns a tuple: (prometheus_collector, otel_collector)
|
||||
Both collectors can be used independently or together.
|
||||
|
||||
Example:
|
||||
from shared.monitoring.metrics_exporter import create_dual_metrics_collector
|
||||
|
||||
prom_collector, otel_collector = create_dual_metrics_collector("auth-service")
|
||||
|
||||
# Prometheus counter
|
||||
prom_collector.register_counter("requests_total", "Total requests")
|
||||
prom_collector.increment_counter("requests_total", labels={"status": "200"})
|
||||
|
||||
# OpenTelemetry counter (pushed to SigNoz)
|
||||
counter = otel_collector.create_counter("requests_total", "Total requests")
|
||||
counter.add(1, {"status": "200"})
|
||||
"""
|
||||
from shared.monitoring.metrics import MetricsCollector
|
||||
|
||||
# Create Prometheus collector
|
||||
prom_collector = MetricsCollector(service_name)
|
||||
|
||||
# Create OpenTelemetry collector
|
||||
meter_provider = setup_otel_metrics(service_name, service_version)
|
||||
otel_collector = None
|
||||
if meter_provider:
|
||||
otel_collector = OTelMetricsCollector(service_name, meter_provider)
|
||||
|
||||
return prom_collector, otel_collector
|
||||
Reference in New Issue
Block a user