Imporve monitoring 5
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
"""
|
||||
OpenTelemetry Metrics Integration for SigNoz
|
||||
Exports metrics to SigNoz via OpenTelemetry Collector in addition to Prometheus
|
||||
Exports metrics to SigNoz via OpenTelemetry Collector using gRPC protocol
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -9,8 +9,24 @@ from typing import Optional
|
||||
from opentelemetry import metrics
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.sdk.resources import Resource, SERVICE_NAME, SERVICE_VERSION
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
|
||||
# Import both gRPC and HTTP exporters
|
||||
try:
|
||||
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as GrpcMetricExporter
|
||||
GRPC_AVAILABLE = True
|
||||
except ImportError:
|
||||
GRPC_AVAILABLE = False
|
||||
GrpcMetricExporter = None
|
||||
|
||||
try:
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as HttpMetricExporter
|
||||
HTTP_AVAILABLE = True
|
||||
except ImportError:
|
||||
HTTP_AVAILABLE = False
|
||||
HttpMetricExporter = None
|
||||
|
||||
from .otel_config import OTelConfig
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@@ -19,20 +35,21 @@ def setup_otel_metrics(
|
||||
service_name: str,
|
||||
service_version: str = "1.0.0",
|
||||
otel_endpoint: Optional[str] = None,
|
||||
export_interval_millis: int = 60000 # Export every 60 seconds
|
||||
export_interval_millis: int = 60000, # Export every 60 seconds
|
||||
protocol: Optional[str] = None # "grpc" or "http", defaults to grpc
|
||||
) -> Optional[MeterProvider]:
|
||||
"""
|
||||
Setup OpenTelemetry metrics to export to SigNoz.
|
||||
|
||||
This creates a dual-export strategy:
|
||||
- Prometheus exposition format at /metrics (for Prometheus scraping)
|
||||
- OTLP push to SigNoz collector (for direct ingestion)
|
||||
Supports both gRPC (recommended, port 4317) and HTTP (port 4318) protocols.
|
||||
Default protocol is gRPC for better performance.
|
||||
|
||||
Args:
|
||||
service_name: Name of the service (e.g., "auth-service")
|
||||
service_version: Version of the service
|
||||
otel_endpoint: OpenTelemetry collector endpoint (default from env)
|
||||
export_interval_millis: How often to push metrics (default 60s)
|
||||
otel_endpoint: Optional override for OTLP endpoint
|
||||
export_interval_millis: How often to push metrics in milliseconds (default 60s)
|
||||
protocol: Protocol to use ("grpc" or "http"). Defaults to "grpc"
|
||||
|
||||
Returns:
|
||||
MeterProvider instance if successful, None otherwise
|
||||
@@ -40,9 +57,12 @@ def setup_otel_metrics(
|
||||
Example:
|
||||
from shared.monitoring.metrics_exporter import setup_otel_metrics
|
||||
|
||||
# Setup during service initialization
|
||||
# Setup with gRPC (default)
|
||||
meter_provider = setup_otel_metrics("auth-service", "1.0.0")
|
||||
|
||||
# Or with HTTP
|
||||
meter_provider = setup_otel_metrics("auth-service", "1.0.0", protocol="http")
|
||||
|
||||
# Create meters for your metrics
|
||||
meter = meter_provider.get_meter(__name__)
|
||||
request_counter = meter.create_counter(
|
||||
@@ -56,8 +76,7 @@ def setup_otel_metrics(
|
||||
"""
|
||||
|
||||
# Check if metrics export is enabled
|
||||
enable_otel_metrics = os.getenv("ENABLE_OTEL_METRICS", "true").lower() == "true"
|
||||
if not enable_otel_metrics:
|
||||
if not OTelConfig.is_enabled("metrics"):
|
||||
logger.info(
|
||||
"OpenTelemetry metrics export disabled",
|
||||
service=service_name,
|
||||
@@ -65,32 +84,66 @@ def setup_otel_metrics(
|
||||
)
|
||||
return None
|
||||
|
||||
# Get OTLP endpoint from environment or parameter
|
||||
if otel_endpoint is None:
|
||||
otel_endpoint = os.getenv(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT",
|
||||
os.getenv("OTEL_COLLECTOR_ENDPOINT", "http://signoz-otel-collector.bakery-ia:4318")
|
||||
)
|
||||
# Determine protocol to use
|
||||
if protocol is None:
|
||||
protocol = OTelConfig.get_protocol("metrics")
|
||||
|
||||
# Ensure endpoint has /v1/metrics path for HTTP
|
||||
if not otel_endpoint.endswith("/v1/metrics"):
|
||||
otel_endpoint = f"{otel_endpoint}/v1/metrics"
|
||||
# Validate protocol is available
|
||||
if protocol == "grpc" and not GRPC_AVAILABLE:
|
||||
logger.warning(
|
||||
"gRPC exporter not available, falling back to HTTP",
|
||||
service=service_name
|
||||
)
|
||||
protocol = "http"
|
||||
elif protocol == "http" and not HTTP_AVAILABLE:
|
||||
logger.warning(
|
||||
"HTTP exporter not available, falling back to gRPC",
|
||||
service=service_name
|
||||
)
|
||||
protocol = "grpc"
|
||||
|
||||
if protocol not in ["grpc", "http"]:
|
||||
logger.error(
|
||||
"Invalid protocol specified",
|
||||
service=service_name,
|
||||
protocol=protocol
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
# Create resource with service information
|
||||
resource = Resource(attributes={
|
||||
SERVICE_NAME: service_name,
|
||||
SERVICE_VERSION: service_version,
|
||||
"deployment.environment": os.getenv("ENVIRONMENT", "development"),
|
||||
"k8s.namespace.name": os.getenv("K8S_NAMESPACE", "bakery-ia"),
|
||||
"k8s.pod.name": os.getenv("HOSTNAME", "unknown"),
|
||||
})
|
||||
# Get endpoints from centralized config
|
||||
endpoints = OTelConfig.get_endpoints()
|
||||
|
||||
# Configure OTLP exporter for metrics
|
||||
otlp_exporter = OTLPMetricExporter(
|
||||
endpoint=otel_endpoint,
|
||||
timeout=10
|
||||
)
|
||||
# Determine which endpoint to use
|
||||
if otel_endpoint:
|
||||
# User provided override
|
||||
if protocol == "grpc":
|
||||
endpoint = OTelConfig._clean_grpc_endpoint(otel_endpoint)
|
||||
else:
|
||||
endpoint = OTelConfig._ensure_http_endpoint(otel_endpoint, "/v1/metrics")
|
||||
else:
|
||||
# Use config-determined endpoint
|
||||
if protocol == "grpc":
|
||||
endpoint = endpoints.metrics_grpc
|
||||
else:
|
||||
endpoint = endpoints.metrics_http
|
||||
|
||||
# Get resource attributes
|
||||
resource_attrs = OTelConfig.get_resource_attributes(service_name, service_version)
|
||||
resource = Resource(attributes=resource_attrs)
|
||||
|
||||
# Configure OTLP exporter based on protocol
|
||||
if protocol == "grpc":
|
||||
otlp_exporter = GrpcMetricExporter(
|
||||
endpoint=endpoint,
|
||||
insecure=True, # Use secure=False in production with proper TLS
|
||||
timeout=10
|
||||
)
|
||||
else: # http
|
||||
otlp_exporter = HttpMetricExporter(
|
||||
endpoint=endpoint,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
# Create periodic metric reader
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
@@ -108,9 +161,10 @@ def setup_otel_metrics(
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
logger.info(
|
||||
"OpenTelemetry metrics export configured",
|
||||
"OpenTelemetry metrics export configured successfully",
|
||||
service=service_name,
|
||||
otel_endpoint=otel_endpoint,
|
||||
endpoint=endpoint,
|
||||
protocol=protocol,
|
||||
export_interval_seconds=export_interval_millis / 1000
|
||||
)
|
||||
|
||||
@@ -121,7 +175,7 @@ def setup_otel_metrics(
|
||||
"Failed to setup OpenTelemetry metrics export",
|
||||
service=service_name,
|
||||
error=str(e),
|
||||
reason="Will continue with Prometheus-only metrics"
|
||||
protocol=protocol
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user