Files
bakery-ia/infrastructure/helm/signoz-values-dev.yaml

683 lines
21 KiB
YAML
Raw Normal View History

2026-01-08 12:58:00 +01:00
# SigNoz Helm Chart Values - Development Environment
# Optimized for local development with minimal resource usage
2026-01-09 07:26:11 +01:00
# DEPLOYED IN bakery-ia NAMESPACE - Ingress managed by bakery-ingress
2026-01-08 12:58:00 +01:00
#
# Official Chart: https://github.com/SigNoz/charts
2026-01-09 07:26:11 +01:00
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-dev.yaml
2026-01-08 12:58:00 +01:00
global:
storageClass: "standard"
2026-01-09 07:26:11 +01:00
clusterName: "bakery-ia-dev"
domain: "monitoring.bakery-ia.local"
2026-01-09 06:57:18 +01:00
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
imagePullSecrets:
2026-01-09 06:57:18 +01:00
- dockerhub-creds
2026-01-08 12:58:00 +01:00
2026-01-09 06:57:18 +01:00
# Docker Hub credentials for pulling images (root level for SigNoz components)
imagePullSecrets:
- dockerhub-creds
# SignOz Main Component (includes frontend and query service)
signoz:
2026-01-08 12:58:00 +01:00
replicaCount: 1
service:
type: ClusterIP
2026-01-09 06:57:18 +01:00
port: 8080
2026-01-08 12:58:00 +01:00
2026-01-09 07:26:11 +01:00
# DISABLE built-in ingress - using unified bakery-ingress instead
# Route configured in infrastructure/kubernetes/overlays/dev/dev-ingress.yaml
2026-01-08 12:58:00 +01:00
ingress:
2026-01-09 07:26:11 +01:00
enabled: false
2026-01-08 12:58:00 +01:00
resources:
requests:
2026-01-09 06:57:18 +01:00
cpu: 100m # Combined frontend + query service
2026-01-08 12:58:00 +01:00
memory: 256Mi
limits:
2026-01-09 06:57:18 +01:00
cpu: 1000m
memory: 1Gi
2026-01-08 12:58:00 +01:00
2026-01-09 07:26:11 +01:00
# Environment variables (new format - replaces configVars)
env:
signoz_telemetrystore_provider: "clickhouse"
dot_metrics_enabled: "true"
signoz_emailing_enabled: "false"
signoz_alertmanager_provider: "signoz"
# Retention for dev (7 days)
signoz_traces_ttl_duration_hrs: "168"
signoz_metrics_ttl_duration_hrs: "168"
signoz_logs_ttl_duration_hrs: "168"
2026-01-09 14:48:44 +01:00
# OpAMP Server Configuration
signoz_opamp_server_enabled: "true"
signoz_opamp_server_endpoint: "0.0.0.0:4320"
2026-01-09 07:26:11 +01:00
2026-01-08 12:58:00 +01:00
persistence:
enabled: true
size: 5Gi
storageClass: "standard"
# AlertManager Configuration
alertmanager:
replicaCount: 1
image:
repository: signoz/alertmanager
tag: 0.23.5
pullPolicy: IfNotPresent
service:
type: ClusterIP
port: 9093
resources:
requests:
cpu: 25m # Reduced for local dev
memory: 64Mi # Reduced for local dev
2026-01-08 12:58:00 +01:00
limits:
cpu: 200m
memory: 256Mi
persistence:
enabled: true
size: 2Gi
storageClass: "standard"
config:
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 10s
group_interval: 10s
repeat_interval: 12h
receiver: 'default'
receivers:
- name: 'default'
# Add email, slack, webhook configs here
# ClickHouse Configuration - Time Series Database
# Minimal resources for local development on constrained Kind cluster
2026-01-08 12:58:00 +01:00
clickhouse:
enabled: true
installCustomStorageClass: false
2026-01-09 07:26:11 +01:00
image:
registry: docker.io
repository: clickhouse/clickhouse-server
tag: 25.5.6 # Official recommended version
# Reduce ClickHouse resource requests for local dev
clickhouse:
resources:
requests:
cpu: 200m # Reduced from default 500m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
2026-01-09 07:26:11 +01:00
persistence:
enabled: true
size: 20Gi
2026-01-09 06:57:18 +01:00
# Zookeeper Configuration (required by ClickHouse)
zookeeper:
enabled: true
2026-01-09 07:26:11 +01:00
replicaCount: 1 # Single replica for dev
image:
tag: 3.7.1 # Official recommended version
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
persistence:
enabled: true
size: 5Gi
2026-01-09 06:57:18 +01:00
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
2026-01-08 12:58:00 +01:00
otelCollector:
enabled: true
replicaCount: 1
2026-01-09 07:26:11 +01:00
image:
repository: signoz/signoz-otel-collector
tag: v0.129.12 # Latest recommended version
2026-01-09 14:48:44 +01:00
# OpAMP Configuration - Enabled for dynamic configuration management
# Note: OpAMP allows remote configuration management via SigNoz backend
# This replaces the manual kubectl patch approach
# Init containers for the Otel Collector pod
initContainers:
fix-postgres-tls:
enabled: true
image:
registry: docker.io
repository: busybox
tag: 1.35
pullPolicy: IfNotPresent
command:
- sh
- -c
- |
echo "Fixing PostgreSQL TLS file permissions..."
cp /etc/postgres-tls-source/* /etc/postgres-tls/
chmod 600 /etc/postgres-tls/server-key.pem
chmod 644 /etc/postgres-tls/server-cert.pem
chmod 644 /etc/postgres-tls/ca-cert.pem
echo "PostgreSQL TLS permissions fixed"
volumeMounts:
- name: postgres-tls-source
mountPath: /etc/postgres-tls-source
readOnly: true
- name: postgres-tls-fixed
mountPath: /etc/postgres-tls
readOnly: false
Fix SigNoz OTel Collector configuration and disable OpAMP Root Cause Analysis: - OTel Collector was starting but OpAMP was overwriting config with "nop" receivers/exporters - ClickHouse authentication was failing due to missing credentials in DSN strings - Redis/PostgreSQL/RabbitMQ receivers had missing TLS certs causing startup failures Changes: 1. Fixed ClickHouse Exporters: - Added admin credentials to clickhousetraces datasource - Added admin credentials to clickhouselogsexporter dsn - Now using: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/ 2. Disabled Unconfigured Receivers: - Commented out PostgreSQL receivers (no monitor users configured) - Commented out Redis receiver (TLS certificates not available) - Commented out RabbitMQ receiver (credentials not configured) - Updated metrics pipeline to use only OTLP receiver 3. OpAMP Disabled: - OpAMP was causing collector to use nop exporters/receivers - Cannot disable via Helm (extraArgs appends, doesn't replace) - Must apply kubectl patch after Helm install: kubectl patch deployment signoz-otel-collector --type=json -p='[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["--config=/conf/otel-collector-config.yaml","--feature-gates=-pkg.translator.prometheus.NormalizeName"]}]' Results: ✅ OTel Collector successfully receiving traces (97+ spans) ✅ Services connecting without UNAVAILABLE errors ✅ No ClickHouse authentication failures ✅ All pipelines active (traces, metrics, logs) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-09 11:51:03 +01:00
# Service configuration - expose both gRPC and HTTP endpoints
2026-01-08 12:58:00 +01:00
service:
type: ClusterIP
ports:
# gRPC receivers
- name: otlp-grpc
port: 4317
targetPort: 4317
protocol: TCP
# HTTP receivers
- name: otlp-http
port: 4318
targetPort: 4318
protocol: TCP
# Prometheus remote write
- name: prometheus
port: 8889
targetPort: 8889
protocol: TCP
2026-01-09 07:26:11 +01:00
# Metrics
- name: metrics
port: 8888
targetPort: 8888
protocol: TCP
2026-01-08 12:58:00 +01:00
resources:
requests:
cpu: 50m # Reduced from 100m
memory: 128Mi # Reduced from 256Mi
2026-01-08 12:58:00 +01:00
limits:
cpu: 500m
memory: 512Mi
2026-01-09 14:48:44 +01:00
# Additional environment variables for receivers
additionalEnvs:
POSTGRES_MONITOR_USER: "monitoring"
POSTGRES_MONITOR_PASSWORD: "monitoring_369f9c001f242b07ef9e2826e17169ca"
REDIS_PASSWORD: "OxdmdJjdVNXp37MNC2IFoMnTpfGGFv1k"
RABBITMQ_USER: "bakery"
RABBITMQ_PASSWORD: "forecast123"
# Mount TLS certificates for secure connections
extraVolumes:
- name: redis-tls
secret:
secretName: redis-tls-secret
- name: postgres-tls
secret:
secretName: postgres-tls
- name: postgres-tls-fixed
emptyDir: {}
extraVolumeMounts:
- name: redis-tls
mountPath: /etc/redis-tls
readOnly: true
- name: postgres-tls
mountPath: /etc/postgres-tls-source
readOnly: true
- name: postgres-tls-fixed
mountPath: /etc/postgres-tls
readOnly: false
# Enable OpAMP for dynamic configuration management
command:
name: /signoz-otel-collector
extraArgs:
- --config=/conf/otel-collector-config.yaml
- --manager-config=/conf/otel-collector-opamp-config.yaml
- --feature-gates=-pkg.translator.prometheus.NormalizeName
# OpenTelemetry Collector configuration
2026-01-08 12:58:00 +01:00
config:
2026-01-09 11:18:20 +01:00
# Connectors - bridge between pipelines
connectors:
signozmeter:
dimensions:
- name: service.name
- name: deployment.environment
- name: host.name
metrics_flush_interval: 1h
2026-01-08 12:58:00 +01:00
receivers:
# OTLP receivers for traces, metrics, and logs from applications
# All application telemetry is pushed via OTLP protocol
2026-01-08 12:58:00 +01:00
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
cors:
allowed_origins:
- "*"
# PostgreSQL receivers for database metrics
2026-01-09 14:48:44 +01:00
# ENABLED: Monitor users configured and credentials stored in secrets
# Collects metrics directly from PostgreSQL databases with proper TLS
postgresql/auth:
endpoint: auth-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- auth_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/inventory:
endpoint: inventory-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- inventory_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/orders:
endpoint: orders-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- orders_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/ai-insights:
endpoint: ai-insights-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- ai_insights_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/alert-processor:
endpoint: alert-processor-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- alert_processor_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/distribution:
endpoint: distribution-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- distribution_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/external:
endpoint: external-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- external_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/forecasting:
endpoint: forecasting-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- forecasting_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/notification:
endpoint: notification-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- notification_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/orchestrator:
endpoint: orchestrator-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- orchestrator_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/pos:
endpoint: pos-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- pos_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/procurement:
endpoint: procurement-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- procurement_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/production:
endpoint: production-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- production_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/recipes:
endpoint: recipes-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- recipes_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/sales:
endpoint: sales-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- sales_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/suppliers:
endpoint: suppliers-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- suppliers_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/tenant:
endpoint: tenant-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- tenant_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
postgresql/training:
endpoint: training-db-service.bakery-ia:5432
username: ${env:POSTGRES_MONITOR_USER}
password: ${env:POSTGRES_MONITOR_PASSWORD}
databases:
- training_db
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/postgres-tls/server-cert.pem
key_file: /etc/postgres-tls/server-key.pem
ca_file: /etc/postgres-tls/ca-cert.pem
# Redis receiver for cache metrics
2026-01-09 14:48:44 +01:00
# ENABLED: Using existing credentials from redis-secrets with TLS
redis:
endpoint: redis-service.bakery-ia:6379
password: ${env:REDIS_PASSWORD}
collection_interval: 60s
transport: tcp
tls:
insecure_skip_verify: false
cert_file: /etc/redis-tls/redis-cert.pem
key_file: /etc/redis-tls/redis-key.pem
ca_file: /etc/redis-tls/ca-cert.pem
metrics:
redis.maxmemory:
enabled: true
redis.cmd.latency:
enabled: true
# RabbitMQ receiver via management API
2026-01-09 14:48:44 +01:00
# ENABLED: Using existing credentials from rabbitmq-secrets
rabbitmq:
endpoint: http://rabbitmq-service.bakery-ia:15672
username: ${env:RABBITMQ_USER}
password: ${env:RABBITMQ_PASSWORD}
collection_interval: 30s
2026-01-08 12:58:00 +01:00
processors:
2026-01-09 07:26:11 +01:00
# Batch processor for better performance (optimized for high throughput)
2026-01-08 12:58:00 +01:00
batch:
2026-01-09 07:26:11 +01:00
timeout: 1s
send_batch_size: 10000 # Increased from 1024 for better performance
send_batch_max_size: 10000
2026-01-08 12:58:00 +01:00
2026-01-09 11:18:20 +01:00
# Batch processor for meter data
batch/meter:
timeout: 1s
send_batch_size: 20000
send_batch_max_size: 25000
# Memory limiter to prevent OOM
2026-01-08 12:58:00 +01:00
memory_limiter:
check_interval: 1s
limit_mib: 400
spike_limit_mib: 100
# Resource detection
2026-01-08 12:58:00 +01:00
resourcedetection:
2026-01-09 07:26:11 +01:00
detectors: [env, system, docker]
2026-01-08 12:58:00 +01:00
timeout: 5s
2026-01-09 11:18:20 +01:00
# SigNoz span metrics processor with delta aggregation (recommended)
# Generates RED metrics (Rate, Error, Duration) from trace spans
signozspanmetrics/delta:
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
2026-01-09 07:26:11 +01:00
metrics_exporter: signozclickhousemetrics
2026-01-09 11:18:20 +01:00
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
dimensions_cache_size: 100000
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
- name: signoz.collector.id
2026-01-09 07:26:11 +01:00
2026-01-08 12:58:00 +01:00
exporters:
# ClickHouse exporter for traces
2026-01-08 12:58:00 +01:00
clickhousetraces:
Fix SigNoz OTel Collector configuration and disable OpAMP Root Cause Analysis: - OTel Collector was starting but OpAMP was overwriting config with "nop" receivers/exporters - ClickHouse authentication was failing due to missing credentials in DSN strings - Redis/PostgreSQL/RabbitMQ receivers had missing TLS certs causing startup failures Changes: 1. Fixed ClickHouse Exporters: - Added admin credentials to clickhousetraces datasource - Added admin credentials to clickhouselogsexporter dsn - Now using: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/ 2. Disabled Unconfigured Receivers: - Commented out PostgreSQL receivers (no monitor users configured) - Commented out Redis receiver (TLS certificates not available) - Commented out RabbitMQ receiver (credentials not configured) - Updated metrics pipeline to use only OTLP receiver 3. OpAMP Disabled: - OpAMP was causing collector to use nop exporters/receivers - Cannot disable via Helm (extraArgs appends, doesn't replace) - Must apply kubectl patch after Helm install: kubectl patch deployment signoz-otel-collector --type=json -p='[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["--config=/conf/otel-collector-config.yaml","--feature-gates=-pkg.translator.prometheus.NormalizeName"]}]' Results: ✅ OTel Collector successfully receiving traces (97+ spans) ✅ Services connecting without UNAVAILABLE errors ✅ No ClickHouse authentication failures ✅ All pipelines active (traces, metrics, logs) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-09 11:51:03 +01:00
datasource: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/?database=signoz_traces
2026-01-08 12:58:00 +01:00
timeout: 10s
2026-01-09 07:26:11 +01:00
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
2026-01-08 12:58:00 +01:00
# ClickHouse exporter for metrics
2026-01-09 06:57:18 +01:00
signozclickhousemetrics:
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics"
2026-01-08 12:58:00 +01:00
timeout: 10s
2026-01-09 07:26:11 +01:00
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
2026-01-08 12:58:00 +01:00
2026-01-09 11:18:20 +01:00
# ClickHouse exporter for meter data (usage metrics)
signozclickhousemeter:
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_meter"
timeout: 45s
sending_queue:
enabled: false
# ClickHouse exporter for logs
2026-01-08 12:58:00 +01:00
clickhouselogsexporter:
Fix SigNoz OTel Collector configuration and disable OpAMP Root Cause Analysis: - OTel Collector was starting but OpAMP was overwriting config with "nop" receivers/exporters - ClickHouse authentication was failing due to missing credentials in DSN strings - Redis/PostgreSQL/RabbitMQ receivers had missing TLS certs causing startup failures Changes: 1. Fixed ClickHouse Exporters: - Added admin credentials to clickhousetraces datasource - Added admin credentials to clickhouselogsexporter dsn - Now using: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/ 2. Disabled Unconfigured Receivers: - Commented out PostgreSQL receivers (no monitor users configured) - Commented out Redis receiver (TLS certificates not available) - Commented out RabbitMQ receiver (credentials not configured) - Updated metrics pipeline to use only OTLP receiver 3. OpAMP Disabled: - OpAMP was causing collector to use nop exporters/receivers - Cannot disable via Helm (extraArgs appends, doesn't replace) - Must apply kubectl patch after Helm install: kubectl patch deployment signoz-otel-collector --type=json -p='[{"op":"replace","path":"/spec/template/spec/containers/0/args","value":["--config=/conf/otel-collector-config.yaml","--feature-gates=-pkg.translator.prometheus.NormalizeName"]}]' Results: ✅ OTel Collector successfully receiving traces (97+ spans) ✅ Services connecting without UNAVAILABLE errors ✅ No ClickHouse authentication failures ✅ All pipelines active (traces, metrics, logs) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-09 11:51:03 +01:00
dsn: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/?database=signoz_logs
2026-01-08 12:58:00 +01:00
timeout: 10s
2026-01-09 07:26:11 +01:00
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
2026-01-08 12:58:00 +01:00
2026-01-09 11:18:20 +01:00
# Metadata exporter for service metadata
metadataexporter:
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metadata"
timeout: 10s
cache:
provider: in_memory
2026-01-09 06:57:18 +01:00
# Debug exporter for debugging (optional)
debug:
verbosity: detailed
2026-01-09 07:26:11 +01:00
sampling_initial: 5
sampling_thereafter: 200
2026-01-08 12:58:00 +01:00
service:
pipelines:
2026-01-09 11:18:20 +01:00
# Traces pipeline - exports to ClickHouse and signozmeter connector
2026-01-08 12:58:00 +01:00
traces:
receivers: [otlp]
2026-01-09 11:18:20 +01:00
processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection]
exporters: [clickhousetraces, metadataexporter, signozmeter]
2026-01-08 12:58:00 +01:00
# Metrics pipeline
2026-01-08 12:58:00 +01:00
metrics:
2026-01-09 14:48:44 +01:00
receivers: [otlp,
postgresql/auth, postgresql/inventory, postgresql/orders,
postgresql/ai-insights, postgresql/alert-processor, postgresql/distribution,
postgresql/external, postgresql/forecasting, postgresql/notification,
postgresql/orchestrator, postgresql/pos, postgresql/procurement,
postgresql/production, postgresql/recipes, postgresql/sales,
postgresql/suppliers, postgresql/tenant, postgresql/training,
redis, rabbitmq]
processors: [memory_limiter, batch, resourcedetection]
2026-01-09 06:57:18 +01:00
exporters: [signozclickhousemetrics]
2026-01-08 12:58:00 +01:00
2026-01-09 11:18:20 +01:00
# Meter pipeline - receives from signozmeter connector
metrics/meter:
receivers: [signozmeter]
processors: [batch/meter]
exporters: [signozclickhousemeter]
# Logs pipeline
2026-01-08 12:58:00 +01:00
logs:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhouselogsexporter]
2026-01-08 12:58:00 +01:00
# Additional Configuration
serviceAccount:
create: true
annotations: {}
name: ""
# Security Context
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
# Network Policies (disabled for dev)
networkPolicy:
enabled: false
# Monitoring SigNoz itself
selfMonitoring:
enabled: true
serviceMonitor:
enabled: false