Imporve monitoring 5
This commit is contained in:
286
shared/monitoring/otel_config.py
Normal file
286
shared/monitoring/otel_config.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
Centralized OpenTelemetry Configuration
|
||||
Manages OTEL endpoints and settings for traces, metrics, and logs
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional, Tuple
|
||||
from dataclasses import dataclass
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@dataclass
|
||||
class OTelEndpoints:
|
||||
"""
|
||||
Container for OpenTelemetry endpoints.
|
||||
|
||||
SigNoz uses different protocols for different signals:
|
||||
- Traces: gRPC (port 4317)
|
||||
- Metrics: gRPC (port 4317) or HTTP (port 4318)
|
||||
- Logs: HTTP (port 4318)
|
||||
"""
|
||||
traces_grpc: str # gRPC endpoint for traces (e.g., "host:4317")
|
||||
metrics_grpc: str # gRPC endpoint for metrics (e.g., "host:4317")
|
||||
metrics_http: str # HTTP endpoint for metrics (e.g., "http://host:4318/v1/metrics")
|
||||
logs_http: str # HTTP endpoint for logs (e.g., "http://host:4318/v1/logs")
|
||||
|
||||
|
||||
class OTelConfig:
|
||||
"""
|
||||
Centralized configuration for OpenTelemetry exporters.
|
||||
|
||||
This class manages endpoint URLs and ensures proper protocol usage:
|
||||
- gRPC endpoints: host:port (no protocol prefix)
|
||||
- HTTP endpoints: http://host:port/path (with protocol and path)
|
||||
"""
|
||||
|
||||
# Default base endpoint (can be overridden by environment variables)
|
||||
DEFAULT_OTEL_COLLECTOR_HOST = "signoz-otel-collector.bakery-ia.svc.cluster.local"
|
||||
DEFAULT_GRPC_PORT = 4317
|
||||
DEFAULT_HTTP_PORT = 4318
|
||||
|
||||
@classmethod
|
||||
def get_endpoints(cls) -> OTelEndpoints:
|
||||
"""
|
||||
Get OpenTelemetry endpoints from environment variables with proper fallbacks.
|
||||
|
||||
Environment variables (in order of precedence):
|
||||
1. OTEL_EXPORTER_OTLP_ENDPOINT - Base endpoint (gRPC format: host:port)
|
||||
2. OTEL_EXPORTER_OTLP_TRACES_ENDPOINT - Specific traces endpoint
|
||||
3. OTEL_EXPORTER_OTLP_METRICS_ENDPOINT - Specific metrics endpoint
|
||||
4. OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - Specific logs endpoint
|
||||
5. OTEL_COLLECTOR_ENDPOINT - Legacy variable (HTTP format)
|
||||
|
||||
Returns:
|
||||
OTelEndpoints with all configured endpoints
|
||||
"""
|
||||
# Get base endpoint from environment
|
||||
base_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
||||
|
||||
if base_endpoint:
|
||||
# Clean and parse base endpoint
|
||||
base_grpc = cls._clean_grpc_endpoint(base_endpoint)
|
||||
base_http_host = cls._extract_host(base_endpoint)
|
||||
else:
|
||||
# Use default collector
|
||||
base_grpc = f"{cls.DEFAULT_OTEL_COLLECTOR_HOST}:{cls.DEFAULT_GRPC_PORT}"
|
||||
base_http_host = f"http://{cls.DEFAULT_OTEL_COLLECTOR_HOST}:{cls.DEFAULT_HTTP_PORT}"
|
||||
|
||||
# Get signal-specific endpoints (or use base endpoint)
|
||||
traces_endpoint = os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", base_grpc)
|
||||
metrics_endpoint = os.getenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", base_grpc)
|
||||
logs_endpoint = os.getenv("OTEL_EXPORTER_OTLP_LOGS_ENDPOINT")
|
||||
|
||||
# Build final endpoints
|
||||
traces_grpc = cls._clean_grpc_endpoint(traces_endpoint)
|
||||
metrics_grpc = cls._clean_grpc_endpoint(metrics_endpoint)
|
||||
|
||||
# For metrics HTTP, convert gRPC endpoint to HTTP if needed
|
||||
metrics_http = cls._grpc_to_http_endpoint(metrics_grpc, "/v1/metrics")
|
||||
|
||||
# For logs, use HTTP endpoint
|
||||
if logs_endpoint:
|
||||
logs_http = cls._ensure_http_endpoint(logs_endpoint, "/v1/logs")
|
||||
else:
|
||||
logs_http = cls._grpc_to_http_endpoint(base_grpc, "/v1/logs")
|
||||
|
||||
endpoints = OTelEndpoints(
|
||||
traces_grpc=traces_grpc,
|
||||
metrics_grpc=metrics_grpc,
|
||||
metrics_http=metrics_http,
|
||||
logs_http=logs_http
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"OpenTelemetry endpoints configured",
|
||||
traces_grpc=endpoints.traces_grpc,
|
||||
metrics_grpc=endpoints.metrics_grpc,
|
||||
metrics_http=endpoints.metrics_http,
|
||||
logs_http=endpoints.logs_http
|
||||
)
|
||||
|
||||
return endpoints
|
||||
|
||||
@staticmethod
|
||||
def _clean_grpc_endpoint(endpoint: str) -> str:
|
||||
"""
|
||||
Clean endpoint for gRPC usage (remove protocol, paths).
|
||||
|
||||
Args:
|
||||
endpoint: Raw endpoint string
|
||||
|
||||
Returns:
|
||||
Cleaned endpoint in format "host:port"
|
||||
"""
|
||||
# Remove protocol prefixes
|
||||
endpoint = endpoint.replace("http://", "").replace("https://", "")
|
||||
|
||||
# Remove paths (gRPC doesn't use paths)
|
||||
if "/" in endpoint:
|
||||
endpoint = endpoint.split("/")[0]
|
||||
|
||||
# Ensure it has a port
|
||||
if ":" not in endpoint:
|
||||
endpoint = f"{endpoint}:4317"
|
||||
|
||||
return endpoint
|
||||
|
||||
@staticmethod
|
||||
def _extract_host(endpoint: str) -> str:
|
||||
"""
|
||||
Extract host and convert to HTTP endpoint.
|
||||
|
||||
Args:
|
||||
endpoint: Raw endpoint string
|
||||
|
||||
Returns:
|
||||
HTTP endpoint without path (e.g., "http://host:4318")
|
||||
"""
|
||||
# Remove protocol if present
|
||||
clean = endpoint.replace("http://", "").replace("https://", "")
|
||||
|
||||
# Remove path if present
|
||||
if "/" in clean:
|
||||
clean = clean.split("/")[0]
|
||||
|
||||
# Extract host without port
|
||||
if ":" in clean:
|
||||
host = clean.split(":")[0]
|
||||
else:
|
||||
host = clean
|
||||
|
||||
return f"http://{host}:4318"
|
||||
|
||||
@staticmethod
|
||||
def _grpc_to_http_endpoint(grpc_endpoint: str, path: str) -> str:
|
||||
"""
|
||||
Convert gRPC endpoint to HTTP endpoint with path.
|
||||
|
||||
Args:
|
||||
grpc_endpoint: gRPC endpoint (e.g., "host:4317")
|
||||
path: HTTP path (e.g., "/v1/metrics")
|
||||
|
||||
Returns:
|
||||
HTTP endpoint (e.g., "http://host:4318/v1/metrics")
|
||||
"""
|
||||
# Extract host from gRPC endpoint
|
||||
if ":" in grpc_endpoint:
|
||||
host = grpc_endpoint.split(":")[0]
|
||||
else:
|
||||
host = grpc_endpoint
|
||||
|
||||
# Build HTTP endpoint with port 4318
|
||||
return f"http://{host}:4318{path}"
|
||||
|
||||
@staticmethod
|
||||
def _ensure_http_endpoint(endpoint: str, path: str) -> str:
|
||||
"""
|
||||
Ensure endpoint is in HTTP format with proper path.
|
||||
|
||||
Args:
|
||||
endpoint: Raw endpoint string
|
||||
path: Required path (e.g., "/v1/logs")
|
||||
|
||||
Returns:
|
||||
HTTP endpoint with protocol and path
|
||||
"""
|
||||
# Add protocol if missing
|
||||
if not endpoint.startswith(("http://", "https://")):
|
||||
endpoint = f"http://{endpoint}"
|
||||
|
||||
# Ensure it has the correct port for HTTP
|
||||
if ":4317" in endpoint:
|
||||
endpoint = endpoint.replace(":4317", ":4318")
|
||||
elif ":4318" not in endpoint and ":" in endpoint:
|
||||
# Has a port but not the right one, replace it
|
||||
parts = endpoint.split(":")
|
||||
if len(parts) >= 2:
|
||||
# Remove existing port and path
|
||||
base = ":".join(parts[:-1])
|
||||
endpoint = f"{base}:4318"
|
||||
elif ":" not in endpoint.replace("http://", "").replace("https://", ""):
|
||||
# No port at all, add it
|
||||
endpoint = f"{endpoint}:4318"
|
||||
|
||||
# Ensure path is present
|
||||
if not endpoint.endswith(path):
|
||||
# Remove any existing path first
|
||||
if "/" in endpoint.split("://")[1]:
|
||||
base = endpoint.split("://")[0] + "://" + endpoint.split("://")[1].split("/")[0]
|
||||
endpoint = base
|
||||
endpoint = f"{endpoint}{path}"
|
||||
|
||||
return endpoint
|
||||
|
||||
@classmethod
|
||||
def get_resource_attributes(
|
||||
cls,
|
||||
service_name: str,
|
||||
service_version: str = "1.0.0"
|
||||
) -> dict:
|
||||
"""
|
||||
Get common resource attributes for all OTEL signals.
|
||||
|
||||
Args:
|
||||
service_name: Name of the service
|
||||
service_version: Version of the service
|
||||
|
||||
Returns:
|
||||
Dictionary of resource attributes
|
||||
"""
|
||||
return {
|
||||
"service.name": service_name,
|
||||
"service.version": service_version,
|
||||
"deployment.environment": os.getenv("ENVIRONMENT", "development"),
|
||||
"k8s.namespace.name": os.getenv("K8S_NAMESPACE", "bakery-ia"),
|
||||
"k8s.pod.name": os.getenv("HOSTNAME", "unknown"),
|
||||
"k8s.cluster.name": os.getenv("K8S_CLUSTER_NAME", "bakery-ia-cluster"),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def is_enabled(cls, signal: str) -> bool:
|
||||
"""
|
||||
Check if a specific telemetry signal is enabled.
|
||||
|
||||
Args:
|
||||
signal: One of "traces", "metrics", "logs"
|
||||
|
||||
Returns:
|
||||
True if signal is enabled, False otherwise
|
||||
"""
|
||||
signal = signal.lower()
|
||||
|
||||
if signal == "traces":
|
||||
return os.getenv("ENABLE_TRACING", "true").lower() == "true"
|
||||
elif signal == "metrics":
|
||||
return os.getenv("ENABLE_OTEL_METRICS", "true").lower() == "true"
|
||||
elif signal == "logs":
|
||||
return os.getenv("OTEL_LOGS_EXPORTER", "").lower() == "otlp"
|
||||
else:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_protocol(cls, signal: str) -> str:
|
||||
"""
|
||||
Get the preferred protocol for a signal.
|
||||
|
||||
Args:
|
||||
signal: One of "traces", "metrics", "logs"
|
||||
|
||||
Returns:
|
||||
Protocol name ("grpc" or "http")
|
||||
"""
|
||||
protocol = os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL", "grpc")
|
||||
|
||||
# Signal-specific overrides
|
||||
if signal == "traces":
|
||||
return os.getenv("OTEL_EXPORTER_OTLP_TRACES_PROTOCOL", protocol)
|
||||
elif signal == "metrics":
|
||||
return os.getenv("OTEL_EXPORTER_OTLP_METRICS_PROTOCOL", protocol)
|
||||
elif signal == "logs":
|
||||
# Logs always use HTTP in our setup
|
||||
return "http"
|
||||
|
||||
return protocol
|
||||
Reference in New Issue
Block a user