Update monitoring packages to latest versions

- Updated all OpenTelemetry packages to latest versions:
  - opentelemetry-api: 1.27.0 → 1.39.1
  - opentelemetry-sdk: 1.27.0 → 1.39.1
  - opentelemetry-exporter-otlp-proto-grpc: 1.27.0 → 1.39.1
  - opentelemetry-exporter-otlp-proto-http: 1.27.0 → 1.39.1
  - opentelemetry-instrumentation-fastapi: 0.48b0 → 0.60b1
  - opentelemetry-instrumentation-httpx: 0.48b0 → 0.60b1
  - opentelemetry-instrumentation-redis: 0.48b0 → 0.60b1
  - opentelemetry-instrumentation-sqlalchemy: 0.48b0 → 0.60b1

- Removed prometheus-client==0.23.1 from all services
- Unified all services to use the same monitoring package versions

Generated by Mistral Vibe.
Co-Authored-By: Mistral Vibe <vibe@mistral.ai>
This commit is contained in:
Urtzi Alfaro
2026-01-08 19:25:52 +01:00
parent dfb7e4b237
commit 29d19087f1
129 changed files with 5718 additions and 1821 deletions

View File

@@ -6,7 +6,10 @@
global:
storageClass: "standard"
domain: "localhost"
domain: "monitoring.bakery-ia.local"
# Docker Hub credentials for pulling images
imagePullSecrets:
- name: dockerhub-creds
# Frontend Configuration
frontend:
@@ -27,7 +30,7 @@ frontend:
nginx.ingress.kubernetes.io/rewrite-target: /$2
nginx.ingress.kubernetes.io/use-regex: "true"
hosts:
- host: localhost
- host: monitoring.bakery-ia.local
paths:
- path: /signoz(/|$)(.*)
pathType: ImplementationSpecific
@@ -35,8 +38,8 @@ frontend:
resources:
requests:
cpu: 50m
memory: 128Mi
cpu: 25m # Reduced for local dev
memory: 64Mi # Reduced for local dev
limits:
cpu: 200m
memory: 256Mi
@@ -44,6 +47,8 @@ frontend:
env:
- name: FRONTEND_REFRESH_INTERVAL
value: "30000"
- name: BASE_URL
value: "https://monitoring.bakery-ia.local/signoz"
# Query Service Configuration
queryService:
@@ -59,8 +64,8 @@ queryService:
resources:
requests:
cpu: 100m
memory: 256Mi
cpu: 50m # Reduced for local dev
memory: 128Mi # Reduced for local dev
limits:
cpu: 500m
memory: 512Mi
@@ -90,8 +95,8 @@ alertmanager:
resources:
requests:
cpu: 50m
memory: 128Mi
cpu: 25m # Reduced for local dev
memory: 64Mi # Reduced for local dev
limits:
cpu: 200m
memory: 256Mi
@@ -115,76 +120,59 @@ alertmanager:
# Add email, slack, webhook configs here
# ClickHouse Configuration - Time Series Database
# Minimal resources for local development on constrained Kind cluster
clickhouse:
replicaCount: 1
image:
repository: clickhouse/clickhouse-server
tag: 24.1.2-alpine
pullPolicy: IfNotPresent
enabled: true
installCustomStorageClass: false
service:
type: ClusterIP
httpPort: 8123
tcpPort: 9000
# Reduce ClickHouse resource requests for local dev
clickhouse:
resources:
requests:
cpu: 200m # Reduced from default 500m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
persistence:
enabled: true
size: 10Gi
storageClass: "standard"
# ClickHouse configuration
config:
logger:
level: information
max_connections: 1024
max_concurrent_queries: 100
# Data retention (7 days for dev)
merge_tree:
parts_to_delay_insert: 150
parts_to_throw_insert: 300
# OpenTelemetry Collector - Integrated with SigNoz
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
otelCollector:
enabled: true
replicaCount: 1
image:
repository: signoz/signoz-otel-collector
tag: 0.102.8
pullPolicy: IfNotPresent
# Service configuration - expose both gRPC and HTTP endpoints
service:
type: ClusterIP
ports:
otlpGrpc: 4317
otlpHttp: 4318
metrics: 8888
healthCheck: 13133
# gRPC receivers
- name: otlp-grpc
port: 4317
targetPort: 4317
protocol: TCP
# HTTP receivers
- name: otlp-http
port: 4318
targetPort: 4318
protocol: TCP
# Prometheus remote write
- name: prometheus
port: 8889
targetPort: 8889
protocol: TCP
resources:
requests:
cpu: 100m
memory: 256Mi
cpu: 50m # Reduced from 100m
memory: 128Mi # Reduced from 256Mi
limits:
cpu: 500m
memory: 512Mi
# Full OTEL Collector Configuration
# OpenTelemetry Collector configuration
config:
extensions:
health_check:
endpoint: 0.0.0.0:13133
zpages:
endpoint: 0.0.0.0:55679
receivers:
# OTLP receivers for traces, metrics, and logs from applications
# All application telemetry is pushed via OTLP protocol
otlp:
protocols:
grpc:
@@ -193,105 +181,119 @@ otelCollector:
endpoint: 0.0.0.0:4318
cors:
allowed_origins:
- "http://localhost"
- "https://localhost"
- "*"
# Prometheus receiver for scraping metrics
prometheus:
config:
scrape_configs:
- job_name: 'otel-collector'
scrape_interval: 30s
static_configs:
- targets: ['localhost:8888']
# PostgreSQL receivers for database metrics
# Collects metrics directly from PostgreSQL databases
postgresql/auth:
endpoint: auth-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- auth_db
collection_interval: 60s
tls:
insecure: false
postgresql/inventory:
endpoint: inventory-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- inventory_db
collection_interval: 60s
tls:
insecure: false
postgresql/orders:
endpoint: orders-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- orders_db
collection_interval: 60s
tls:
insecure: false
# Add more PostgreSQL databases as needed
# postgresql/SERVICE:
# endpoint: SERVICE-db-service.bakery-ia:5432
# ...
# Redis receiver for cache metrics
redis:
endpoint: redis-service.bakery-ia:6379
password: ${REDIS_PASSWORD}
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/redis-tls/redis-cert.pem
key_file: /etc/redis-tls/redis-key.pem
ca_file: /etc/redis-tls/ca-cert.pem
# RabbitMQ receiver via management API
rabbitmq:
endpoint: http://rabbitmq-service.bakery-ia:15672
username: ${RABBITMQ_USER}
password: ${RABBITMQ_PASSWORD}
collection_interval: 60s
processors:
# Batch processor for better performance
batch:
timeout: 10s
send_batch_size: 1024
# Memory limiter to prevent OOM
memory_limiter:
check_interval: 1s
limit_mib: 400
spike_limit_mib: 100
# Resource detection for K8s
# Resource detection
resourcedetection:
detectors: [env, system, docker]
detectors: [env, system]
timeout: 5s
# Add resource attributes
resource:
attributes:
- key: deployment.environment
value: development
action: upsert
exporters:
# Export to SigNoz ClickHouse
# ClickHouse exporter for traces
clickhousetraces:
datasource: tcp://clickhouse:9000/?database=signoz_traces
datasource: tcp://signoz-clickhouse:9000/?database=signoz_traces
timeout: 10s
# ClickHouse exporter for metrics
clickhousemetricswrite:
endpoint: tcp://clickhouse:9000/?database=signoz_metrics
endpoint: tcp://signoz-clickhouse:9000/?database=signoz_metrics
timeout: 10s
# ClickHouse exporter for logs
clickhouselogsexporter:
dsn: tcp://clickhouse:9000/?database=signoz_logs
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
timeout: 10s
# Debug logging
# Logging exporter for debugging (optional)
logging:
loglevel: info
sampling_initial: 5
sampling_thereafter: 200
service:
extensions: [health_check, zpages]
pipelines:
# Traces pipeline
traces:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection, resource]
exporters: [clickhousetraces, logging]
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhousetraces]
# Metrics pipeline
metrics:
receivers: [otlp, prometheus]
processors: [memory_limiter, batch, resourcedetection, resource]
receivers: [otlp, postgresql/auth, postgresql/inventory, postgresql/orders, redis, rabbitmq]
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhousemetricswrite]
# Logs pipeline
logs:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection, resource]
exporters: [clickhouselogsexporter, logging]
# OpenTelemetry Collector Deployment Mode
otelCollectorDeployment:
enabled: true
mode: deployment
# Node Exporter for infrastructure metrics (optional)
nodeExporter:
enabled: true
service:
type: ClusterIP
port: 9100
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 100m
memory: 128Mi
# Schemamanager - Manages ClickHouse schema
schemamanager:
enabled: true
image:
repository: signoz/signoz-schema-migrator
tag: 0.52.3
pullPolicy: IfNotPresent
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhouselogsexporter]
# Additional Configuration
serviceAccount: