Imporve monitoring 3

This commit is contained in:
Urtzi Alfaro
2026-01-09 11:18:20 +01:00
parent 8ca5d9c100
commit 43a3f35bd1
27 changed files with 1279 additions and 32 deletions

View File

@@ -269,6 +269,15 @@ otelCollector:
# Full OTEL Collector Configuration
config:
# Connectors - bridge between pipelines
connectors:
signozmeter:
dimensions:
- name: service.name
- name: deployment.environment
- name: host.name
metrics_flush_interval: 1h
extensions:
health_check:
endpoint: 0.0.0.0:13133
@@ -304,6 +313,12 @@ otelCollector:
send_batch_size: 50000 # Increased from 2048 (official recommendation for traces)
send_batch_max_size: 50000
# Batch processor for meter data
batch/meter:
timeout: 1s
send_batch_size: 20000
send_batch_max_size: 25000
memory_limiter:
check_interval: 1s
limit_mib: 1500 # 75% of container memory (2Gi = ~2048Mi)
@@ -324,11 +339,19 @@ otelCollector:
value: bakery-ia-prod
action: upsert
# Span metrics processor for automatic service performance metrics
spanmetrics:
# SigNoz span metrics processor with delta aggregation (recommended)
# Generates RED metrics (Rate, Error, Duration) from trace spans
signozspanmetrics/delta:
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
metrics_exporter: signozclickhousemetrics
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
dimensions_cache_size: 100000
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: production
- name: signoz.collector.id
exporters:
# Export to SigNoz ClickHouse
@@ -350,6 +373,13 @@ otelCollector:
max_interval: 30s
max_elapsed_time: 300s
# ClickHouse exporter for meter data (usage metrics)
signozclickhousemeter:
dsn: "tcp://clickhouse:9000/?database=signoz_meter"
timeout: 45s
sending_queue:
enabled: false
clickhouselogsexporter:
dsn: tcp://clickhouse:9000/?database=signoz_logs
timeout: 10s
@@ -359,6 +389,13 @@ otelCollector:
max_interval: 30s
max_elapsed_time: 300s
# Metadata exporter for service metadata
metadataexporter:
dsn: "tcp://clickhouse:9000/?database=signoz_metadata"
timeout: 10s
cache:
provider: in_memory
# Debug exporter for debugging (replaces deprecated logging exporter)
debug:
verbosity: detailed
@@ -368,16 +405,25 @@ otelCollector:
service:
extensions: [health_check, zpages]
pipelines:
# Traces pipeline - exports to ClickHouse and signozmeter connector
traces:
receivers: [otlp]
processors: [memory_limiter, batch, spanmetrics, resourcedetection, resource]
exporters: [clickhousetraces]
processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection, resource]
exporters: [clickhousetraces, metadataexporter, signozmeter]
# Metrics pipeline
metrics:
receivers: [otlp, prometheus]
processors: [memory_limiter, batch, resourcedetection, resource]
exporters: [signozclickhousemetrics]
# Meter pipeline - receives from signozmeter connector
metrics/meter:
receivers: [signozmeter]
processors: [batch/meter]
exporters: [signozclickhousemeter]
# Logs pipeline
logs:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection, resource]