Imporve monitoring 2

This commit is contained in:
Urtzi Alfaro
2026-01-09 07:26:11 +01:00
parent 4af860c010
commit 8ca5d9c100
39 changed files with 1035 additions and 376 deletions

View File

@@ -1,11 +1,13 @@
# SigNoz Helm Chart Values - Development Environment
# Optimized for local development with minimal resource usage
# DEPLOYED IN bakery-ia NAMESPACE - Ingress managed by bakery-ingress
#
# Official Chart: https://github.com/SigNoz/charts
# Install Command: helm install signoz signoz/signoz -n signoz --create-namespace -f signoz-values-dev.yaml
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-dev.yaml
global:
storageClass: "standard"
clusterName: "bakery-ia-dev"
domain: "monitoring.bakery-ia.local"
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
imagePullSecrets:
@@ -23,17 +25,10 @@ signoz:
type: ClusterIP
port: 8080
# DISABLE built-in ingress - using unified bakery-ingress instead
# Route configured in infrastructure/kubernetes/overlays/dev/dev-ingress.yaml
ingress:
enabled: true
className: nginx
annotations: {}
hosts:
- host: monitoring.bakery-ia.local
paths:
- path: /
pathType: Prefix
port: 8080
tls: []
enabled: false
resources:
requests:
@@ -43,6 +38,17 @@ signoz:
cpu: 1000m
memory: 1Gi
# Environment variables (new format - replaces configVars)
env:
signoz_telemetrystore_provider: "clickhouse"
dot_metrics_enabled: "true"
signoz_emailing_enabled: "false"
signoz_alertmanager_provider: "signoz"
# Retention for dev (7 days)
signoz_traces_ttl_duration_hrs: "168"
signoz_metrics_ttl_duration_hrs: "168"
signoz_logs_ttl_duration_hrs: "168"
persistence:
enabled: true
size: 5Gi
@@ -92,6 +98,11 @@ clickhouse:
enabled: true
installCustomStorageClass: false
image:
registry: docker.io
repository: clickhouse/clickhouse-server
tag: 25.5.6 # Official recommended version
# Reduce ClickHouse resource requests for local dev
clickhouse:
resources:
@@ -102,15 +113,39 @@ clickhouse:
cpu: 1000m
memory: 1Gi
persistence:
enabled: true
size: 20Gi
# Zookeeper Configuration (required by ClickHouse)
zookeeper:
enabled: true
replicaCount: 1 # Single replica for dev
image:
tag: 3.7.1 # Official recommended version
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
persistence:
enabled: true
size: 5Gi
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
otelCollector:
enabled: true
replicaCount: 1
image:
repository: signoz/signoz-otel-collector
tag: v0.129.12 # Latest recommended version
# Service configuration - expose both gRPC and HTTP endpoints
service:
type: ClusterIP
@@ -130,6 +165,11 @@ otelCollector:
port: 8889
targetPort: 8889
protocol: TCP
# Metrics
- name: metrics
port: 8888
targetPort: 8888
protocol: TCP
resources:
requests:
@@ -210,10 +250,11 @@ otelCollector:
collection_interval: 60s
processors:
# Batch processor for better performance
# Batch processor for better performance (optimized for high throughput)
batch:
timeout: 10s
send_batch_size: 1024
timeout: 1s
send_batch_size: 10000 # Increased from 1024 for better performance
send_batch_max_size: 10000
# Memory limiter to prevent OOM
memory_limiter:
@@ -223,35 +264,57 @@ otelCollector:
# Resource detection
resourcedetection:
detectors: [env, system]
detectors: [env, system, docker]
timeout: 5s
# Span metrics processor for automatic service metrics
spanmetrics:
metrics_exporter: signozclickhousemetrics
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
dimensions_cache_size: 10000
exporters:
# ClickHouse exporter for traces
clickhousetraces:
datasource: tcp://signoz-clickhouse:9000/?database=signoz_traces
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
# ClickHouse exporter for metrics
signozclickhousemetrics:
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics"
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
# ClickHouse exporter for logs
clickhouselogsexporter:
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
# Debug exporter for debugging (optional)
debug:
verbosity: detailed
sampling_initial: 5
sampling_thereafter: 200
service:
pipelines:
# Traces pipeline
traces:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection]
processors: [memory_limiter, batch, spanmetrics, resourcedetection]
exporters: [clickhousetraces]
# Metrics pipeline