2026-01-08 12:58:00 +01:00
|
|
|
# SigNoz Helm Chart Values - Production Environment
|
|
|
|
|
# High-availability configuration with resource optimization
|
2026-01-09 07:26:11 +01:00
|
|
|
# DEPLOYED IN bakery-ia NAMESPACE - Ingress managed by bakery-ingress-prod
|
2026-01-08 12:58:00 +01:00
|
|
|
#
|
|
|
|
|
# Official Chart: https://github.com/SigNoz/charts
|
2026-01-09 07:26:11 +01:00
|
|
|
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-prod.yaml
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
global:
|
2026-01-17 22:42:40 +01:00
|
|
|
storageClass: "microk8s-hostpath" # For MicroK8s, use "microk8s-hostpath" or custom storage class
|
2026-01-09 07:26:11 +01:00
|
|
|
clusterName: "bakery-ia-prod"
|
2026-01-08 12:58:00 +01:00
|
|
|
domain: "monitoring.bakewise.ai"
|
2026-01-09 06:57:18 +01:00
|
|
|
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
|
|
|
|
|
imagePullSecrets:
|
|
|
|
|
- dockerhub-creds
|
|
|
|
|
|
|
|
|
|
# Docker Hub credentials for pulling images (root level for SigNoz components)
|
|
|
|
|
imagePullSecrets:
|
|
|
|
|
- dockerhub-creds
|
2026-01-08 12:58:00 +01:00
|
|
|
|
2026-01-09 07:26:11 +01:00
|
|
|
# SigNoz Main Component (unified frontend + query service)
|
|
|
|
|
# BREAKING CHANGE: v0.89.0+ uses unified component instead of separate frontend/queryService
|
|
|
|
|
signoz:
|
2026-01-08 12:58:00 +01:00
|
|
|
replicaCount: 2
|
2026-01-09 07:26:11 +01:00
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
image:
|
2026-01-09 07:26:11 +01:00
|
|
|
repository: signoz/signoz
|
|
|
|
|
tag: v0.106.0 # Latest stable version
|
2026-01-08 12:58:00 +01:00
|
|
|
pullPolicy: IfNotPresent
|
|
|
|
|
|
|
|
|
|
service:
|
|
|
|
|
type: ClusterIP
|
2026-01-09 07:26:11 +01:00
|
|
|
port: 8080 # HTTP/API port
|
|
|
|
|
internalPort: 8085 # Internal gRPC port
|
2026-01-08 12:58:00 +01:00
|
|
|
|
2026-01-09 07:26:11 +01:00
|
|
|
# DISABLE built-in ingress - using unified bakery-ingress-prod instead
|
|
|
|
|
# Route configured in infrastructure/kubernetes/overlays/prod/prod-ingress.yaml
|
2026-01-08 12:58:00 +01:00
|
|
|
ingress:
|
2026-01-09 07:26:11 +01:00
|
|
|
enabled: false
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: 500m
|
|
|
|
|
memory: 1Gi
|
|
|
|
|
limits:
|
2026-01-09 07:26:11 +01:00
|
|
|
cpu: 2000m
|
|
|
|
|
memory: 4Gi
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
# Pod Anti-affinity for HA
|
|
|
|
|
affinity:
|
|
|
|
|
podAntiAffinity:
|
|
|
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
|
|
|
- weight: 100
|
|
|
|
|
podAffinityTerm:
|
|
|
|
|
labelSelector:
|
2026-01-09 07:26:11 +01:00
|
|
|
matchLabels:
|
|
|
|
|
app.kubernetes.io/component: query-service
|
2026-01-08 12:58:00 +01:00
|
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
|
|
2026-01-09 07:26:11 +01:00
|
|
|
# Environment variables (new format - replaces configVars)
|
2026-01-08 12:58:00 +01:00
|
|
|
env:
|
2026-01-09 07:26:11 +01:00
|
|
|
signoz_telemetrystore_provider: "clickhouse"
|
|
|
|
|
dot_metrics_enabled: "true"
|
|
|
|
|
signoz_emailing_enabled: "true"
|
|
|
|
|
signoz_alertmanager_provider: "signoz"
|
|
|
|
|
# Retention configuration (30 days for prod)
|
|
|
|
|
signoz_traces_ttl_duration_hrs: "720"
|
|
|
|
|
signoz_metrics_ttl_duration_hrs: "720"
|
|
|
|
|
signoz_logs_ttl_duration_hrs: "720"
|
2026-01-09 23:14:12 +01:00
|
|
|
# OpAMP Server Configuration
|
|
|
|
|
# WARNING: OpAMP can cause gRPC instability and collector reloads
|
|
|
|
|
# Only enable if you have a stable OpAMP backend server
|
|
|
|
|
signoz_opamp_server_enabled: "false"
|
|
|
|
|
# signoz_opamp_server_endpoint: "0.0.0.0:4320"
|
2026-01-19 11:55:17 +01:00
|
|
|
# SMTP configuration for email alerts - now using Mailu as SMTP server
|
2026-01-09 07:26:11 +01:00
|
|
|
signoz_smtp_enabled: "true"
|
2026-01-19 13:57:50 +01:00
|
|
|
signoz_smtp_host: "mailu-postfix.bakery-ia.svc.cluster.local"
|
2026-01-09 07:26:11 +01:00
|
|
|
signoz_smtp_port: "587"
|
|
|
|
|
signoz_smtp_from: "alerts@bakewise.ai"
|
|
|
|
|
signoz_smtp_username: "alerts@bakewise.ai"
|
|
|
|
|
# Password should be set via secret: signoz_smtp_password
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
persistence:
|
|
|
|
|
enabled: true
|
|
|
|
|
size: 20Gi
|
|
|
|
|
storageClass: "standard"
|
|
|
|
|
|
|
|
|
|
# Horizontal Pod Autoscaler
|
|
|
|
|
autoscaling:
|
|
|
|
|
enabled: true
|
|
|
|
|
minReplicas: 2
|
|
|
|
|
maxReplicas: 5
|
|
|
|
|
targetCPUUtilizationPercentage: 70
|
|
|
|
|
targetMemoryUtilizationPercentage: 80
|
|
|
|
|
|
|
|
|
|
# AlertManager Configuration
|
|
|
|
|
alertmanager:
|
2026-01-09 07:26:11 +01:00
|
|
|
enabled: true
|
2026-01-08 12:58:00 +01:00
|
|
|
replicaCount: 2
|
2026-01-09 07:26:11 +01:00
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
image:
|
|
|
|
|
repository: signoz/alertmanager
|
|
|
|
|
tag: 0.23.5
|
|
|
|
|
pullPolicy: IfNotPresent
|
|
|
|
|
|
|
|
|
|
service:
|
|
|
|
|
type: ClusterIP
|
|
|
|
|
port: 9093
|
|
|
|
|
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
2026-01-09 07:26:11 +01:00
|
|
|
cpu: 100m
|
|
|
|
|
memory: 128Mi
|
2026-01-08 12:58:00 +01:00
|
|
|
limits:
|
|
|
|
|
cpu: 500m
|
2026-01-09 07:26:11 +01:00
|
|
|
memory: 512Mi
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
# Pod Anti-affinity for HA
|
|
|
|
|
affinity:
|
|
|
|
|
podAntiAffinity:
|
|
|
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
|
|
|
- weight: 100
|
|
|
|
|
podAffinityTerm:
|
|
|
|
|
labelSelector:
|
|
|
|
|
matchExpressions:
|
|
|
|
|
- key: app
|
|
|
|
|
operator: In
|
|
|
|
|
values:
|
|
|
|
|
- signoz-alertmanager
|
|
|
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
|
|
|
|
|
|
persistence:
|
|
|
|
|
enabled: true
|
|
|
|
|
size: 5Gi
|
|
|
|
|
storageClass: "standard"
|
|
|
|
|
|
|
|
|
|
config:
|
|
|
|
|
global:
|
|
|
|
|
resolve_timeout: 5m
|
2026-01-19 13:57:50 +01:00
|
|
|
smtp_smarthost: 'mailu-postfix.bakery-ia.svc.cluster.local:587'
|
2026-01-08 12:58:00 +01:00
|
|
|
smtp_from: 'alerts@bakewise.ai'
|
|
|
|
|
smtp_auth_username: 'alerts@bakewise.ai'
|
|
|
|
|
smtp_auth_password: '${SMTP_PASSWORD}'
|
|
|
|
|
smtp_require_tls: true
|
|
|
|
|
|
|
|
|
|
route:
|
|
|
|
|
group_by: ['alertname', 'cluster', 'service', 'severity']
|
|
|
|
|
group_wait: 10s
|
|
|
|
|
group_interval: 10s
|
|
|
|
|
repeat_interval: 12h
|
|
|
|
|
receiver: 'critical-alerts'
|
|
|
|
|
routes:
|
|
|
|
|
- match:
|
|
|
|
|
severity: critical
|
|
|
|
|
receiver: 'critical-alerts'
|
|
|
|
|
continue: true
|
|
|
|
|
- match:
|
|
|
|
|
severity: warning
|
|
|
|
|
receiver: 'warning-alerts'
|
|
|
|
|
|
|
|
|
|
receivers:
|
|
|
|
|
- name: 'critical-alerts'
|
|
|
|
|
email_configs:
|
|
|
|
|
- to: 'critical-alerts@bakewise.ai'
|
|
|
|
|
headers:
|
|
|
|
|
Subject: '[CRITICAL] {{ .GroupLabels.alertname }} - Bakery IA'
|
|
|
|
|
# Slack webhook for critical alerts
|
|
|
|
|
slack_configs:
|
|
|
|
|
- api_url: '${SLACK_WEBHOOK_URL}'
|
|
|
|
|
channel: '#alerts-critical'
|
|
|
|
|
title: '[CRITICAL] {{ .GroupLabels.alertname }}'
|
|
|
|
|
text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
|
|
|
|
|
|
|
|
|
|
- name: 'warning-alerts'
|
|
|
|
|
email_configs:
|
|
|
|
|
- to: 'oncall@bakewise.ai'
|
|
|
|
|
headers:
|
|
|
|
|
Subject: '[WARNING] {{ .GroupLabels.alertname }} - Bakery IA'
|
|
|
|
|
|
|
|
|
|
# ClickHouse Configuration - Time Series Database
|
|
|
|
|
clickhouse:
|
2026-01-09 07:26:11 +01:00
|
|
|
enabled: true
|
|
|
|
|
installCustomStorageClass: false
|
|
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
image:
|
2026-01-09 07:26:11 +01:00
|
|
|
registry: docker.io
|
2026-01-08 12:58:00 +01:00
|
|
|
repository: clickhouse/clickhouse-server
|
2026-01-09 07:26:11 +01:00
|
|
|
tag: 25.5.6 # Updated to official recommended version
|
2026-01-08 12:58:00 +01:00
|
|
|
pullPolicy: IfNotPresent
|
|
|
|
|
|
2026-01-09 07:26:11 +01:00
|
|
|
# ClickHouse resources (nested config)
|
|
|
|
|
clickhouse:
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: 1000m
|
|
|
|
|
memory: 2Gi
|
|
|
|
|
limits:
|
|
|
|
|
cpu: 4000m
|
|
|
|
|
memory: 8Gi
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
# Pod Anti-affinity for HA
|
|
|
|
|
affinity:
|
|
|
|
|
podAntiAffinity:
|
|
|
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
|
|
|
- labelSelector:
|
|
|
|
|
matchExpressions:
|
|
|
|
|
- key: app
|
|
|
|
|
operator: In
|
|
|
|
|
values:
|
|
|
|
|
- signoz-clickhouse
|
|
|
|
|
topologyKey: kubernetes.io/hostname
|
|
|
|
|
|
|
|
|
|
persistence:
|
|
|
|
|
enabled: true
|
|
|
|
|
size: 100Gi
|
|
|
|
|
storageClass: "standard"
|
|
|
|
|
|
2026-01-09 07:26:11 +01:00
|
|
|
# Cold storage configuration for better disk space management
|
|
|
|
|
coldStorage:
|
2026-01-08 12:58:00 +01:00
|
|
|
enabled: true
|
2026-01-09 07:26:11 +01:00
|
|
|
defaultKeepFreeSpaceBytes: 10737418240 # Keep 10GB free
|
|
|
|
|
ttl:
|
|
|
|
|
deleteTTLDays: 30 # Move old data to cold storage after 30 days
|
|
|
|
|
|
|
|
|
|
# Zookeeper Configuration (required by ClickHouse for coordination)
|
|
|
|
|
zookeeper:
|
|
|
|
|
enabled: true
|
|
|
|
|
replicaCount: 3 # CRITICAL: Always use 3 replicas for production HA
|
|
|
|
|
|
|
|
|
|
image:
|
|
|
|
|
tag: 3.7.1 # Official recommended version
|
|
|
|
|
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: 100m
|
|
|
|
|
memory: 256Mi
|
|
|
|
|
limits:
|
|
|
|
|
cpu: 500m
|
|
|
|
|
memory: 512Mi
|
|
|
|
|
|
|
|
|
|
persistence:
|
|
|
|
|
enabled: true
|
|
|
|
|
size: 10Gi
|
|
|
|
|
storageClass: "standard"
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
# OpenTelemetry Collector - Integrated with SigNoz
|
|
|
|
|
otelCollector:
|
|
|
|
|
enabled: true
|
|
|
|
|
replicaCount: 2
|
2026-01-09 07:26:11 +01:00
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
image:
|
|
|
|
|
repository: signoz/signoz-otel-collector
|
2026-01-09 07:26:11 +01:00
|
|
|
tag: v0.129.12 # Updated to latest recommended version
|
2026-01-08 12:58:00 +01:00
|
|
|
pullPolicy: IfNotPresent
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Init containers for the Otel Collector pod
|
|
|
|
|
initContainers:
|
|
|
|
|
fix-postgres-tls:
|
|
|
|
|
enabled: true
|
|
|
|
|
image:
|
|
|
|
|
registry: docker.io
|
|
|
|
|
repository: busybox
|
|
|
|
|
tag: 1.35
|
|
|
|
|
pullPolicy: IfNotPresent
|
|
|
|
|
command:
|
|
|
|
|
- sh
|
|
|
|
|
- -c
|
|
|
|
|
- |
|
|
|
|
|
echo "Fixing PostgreSQL TLS file permissions..."
|
|
|
|
|
cp /etc/postgres-tls-source/* /etc/postgres-tls/
|
|
|
|
|
chmod 600 /etc/postgres-tls/server-key.pem
|
|
|
|
|
chmod 644 /etc/postgres-tls/server-cert.pem
|
|
|
|
|
chmod 644 /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
echo "PostgreSQL TLS permissions fixed"
|
|
|
|
|
volumeMounts:
|
|
|
|
|
- name: postgres-tls-source
|
|
|
|
|
mountPath: /etc/postgres-tls-source
|
|
|
|
|
readOnly: true
|
|
|
|
|
- name: postgres-tls-fixed
|
|
|
|
|
mountPath: /etc/postgres-tls
|
|
|
|
|
readOnly: false
|
|
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
service:
|
|
|
|
|
type: ClusterIP
|
|
|
|
|
ports:
|
2026-01-09 07:26:11 +01:00
|
|
|
- name: otlp-grpc
|
|
|
|
|
port: 4317
|
2026-01-09 23:14:12 +01:00
|
|
|
targetPort: 4317
|
|
|
|
|
protocol: TCP
|
2026-01-09 07:26:11 +01:00
|
|
|
- name: otlp-http
|
|
|
|
|
port: 4318
|
2026-01-09 23:14:12 +01:00
|
|
|
targetPort: 4318
|
|
|
|
|
protocol: TCP
|
|
|
|
|
- name: prometheus
|
|
|
|
|
port: 8889
|
|
|
|
|
targetPort: 8889
|
|
|
|
|
protocol: TCP
|
2026-01-09 07:26:11 +01:00
|
|
|
- name: metrics
|
|
|
|
|
port: 8888
|
2026-01-09 23:14:12 +01:00
|
|
|
targetPort: 8888
|
|
|
|
|
protocol: TCP
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
cpu: 500m
|
|
|
|
|
memory: 512Mi
|
|
|
|
|
limits:
|
2026-01-09 07:26:11 +01:00
|
|
|
cpu: 2000m
|
|
|
|
|
memory: 2Gi
|
2026-01-08 12:58:00 +01:00
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Additional environment variables for receivers
|
|
|
|
|
additionalEnvs:
|
|
|
|
|
POSTGRES_MONITOR_USER: "monitoring"
|
|
|
|
|
POSTGRES_MONITOR_PASSWORD: "monitoring_369f9c001f242b07ef9e2826e17169ca"
|
|
|
|
|
REDIS_PASSWORD: "OxdmdJjdVNXp37MNC2IFoMnTpfGGFv1k"
|
|
|
|
|
RABBITMQ_USER: "bakery"
|
|
|
|
|
RABBITMQ_PASSWORD: "forecast123"
|
|
|
|
|
|
|
|
|
|
# Mount TLS certificates for secure connections
|
|
|
|
|
extraVolumes:
|
|
|
|
|
- name: redis-tls
|
|
|
|
|
secret:
|
|
|
|
|
secretName: redis-tls-secret
|
|
|
|
|
- name: postgres-tls
|
|
|
|
|
secret:
|
|
|
|
|
secretName: postgres-tls
|
|
|
|
|
- name: postgres-tls-fixed
|
|
|
|
|
emptyDir: {}
|
|
|
|
|
- name: varlogpods
|
|
|
|
|
hostPath:
|
|
|
|
|
path: /var/log/pods
|
|
|
|
|
|
|
|
|
|
extraVolumeMounts:
|
|
|
|
|
- name: redis-tls
|
|
|
|
|
mountPath: /etc/redis-tls
|
|
|
|
|
readOnly: true
|
|
|
|
|
- name: postgres-tls
|
|
|
|
|
mountPath: /etc/postgres-tls-source
|
|
|
|
|
readOnly: true
|
|
|
|
|
- name: postgres-tls-fixed
|
|
|
|
|
mountPath: /etc/postgres-tls
|
|
|
|
|
readOnly: false
|
|
|
|
|
- name: varlogpods
|
|
|
|
|
mountPath: /var/log/pods
|
|
|
|
|
readOnly: true
|
|
|
|
|
|
|
|
|
|
# Enable OpAMP for dynamic configuration management
|
|
|
|
|
command:
|
|
|
|
|
name: /signoz-otel-collector
|
|
|
|
|
extraArgs:
|
|
|
|
|
- --config=/conf/otel-collector-config.yaml
|
|
|
|
|
- --manager-config=/conf/otel-collector-opamp-config.yaml
|
|
|
|
|
- --feature-gates=-pkg.translator.prometheus.NormalizeName
|
|
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
# Full OTEL Collector Configuration
|
|
|
|
|
config:
|
2026-01-09 11:18:20 +01:00
|
|
|
# Connectors - bridge between pipelines
|
|
|
|
|
connectors:
|
|
|
|
|
signozmeter:
|
|
|
|
|
dimensions:
|
|
|
|
|
- name: service.name
|
|
|
|
|
- name: deployment.environment
|
|
|
|
|
- name: host.name
|
|
|
|
|
metrics_flush_interval: 1h
|
|
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
extensions:
|
|
|
|
|
health_check:
|
|
|
|
|
endpoint: 0.0.0.0:13133
|
|
|
|
|
zpages:
|
|
|
|
|
endpoint: 0.0.0.0:55679
|
|
|
|
|
|
|
|
|
|
receivers:
|
|
|
|
|
otlp:
|
|
|
|
|
protocols:
|
|
|
|
|
grpc:
|
|
|
|
|
endpoint: 0.0.0.0:4317
|
2026-01-09 07:26:11 +01:00
|
|
|
max_recv_msg_size_mib: 32 # Increased for larger payloads
|
2026-01-08 12:58:00 +01:00
|
|
|
http:
|
|
|
|
|
endpoint: 0.0.0.0:4318
|
|
|
|
|
cors:
|
|
|
|
|
allowed_origins:
|
|
|
|
|
- "https://monitoring.bakewise.ai"
|
|
|
|
|
- "https://*.bakewise.ai"
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Filelog receiver for Kubernetes pod logs
|
|
|
|
|
# Collects container stdout/stderr from /var/log/pods
|
|
|
|
|
filelog:
|
|
|
|
|
include:
|
|
|
|
|
- /var/log/pods/*/*/*.log
|
|
|
|
|
exclude:
|
|
|
|
|
# Exclude SigNoz's own logs to avoid recursive collection
|
|
|
|
|
- /var/log/pods/bakery-ia_signoz-*/*/*.log
|
|
|
|
|
include_file_path: true
|
|
|
|
|
include_file_name: false
|
|
|
|
|
operators:
|
|
|
|
|
# Parse CRI-O / containerd log format
|
|
|
|
|
- type: regex_parser
|
|
|
|
|
regex: '^(?P<time>[^ ]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) (?P<log>.*)$'
|
|
|
|
|
timestamp:
|
|
|
|
|
parse_from: attributes.time
|
|
|
|
|
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
|
|
|
|
# Fix timestamp parsing - extract from the parsed time field
|
|
|
|
|
- type: move
|
|
|
|
|
from: attributes.time
|
|
|
|
|
to: attributes.timestamp
|
|
|
|
|
# Extract Kubernetes metadata from file path
|
|
|
|
|
- type: regex_parser
|
|
|
|
|
id: extract_metadata_from_filepath
|
|
|
|
|
regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[^\/]+)\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$'
|
|
|
|
|
parse_from: attributes["log.file.path"]
|
|
|
|
|
# Move metadata to resource attributes
|
|
|
|
|
- type: move
|
|
|
|
|
from: attributes.namespace
|
|
|
|
|
to: resource["k8s.namespace.name"]
|
|
|
|
|
- type: move
|
|
|
|
|
from: attributes.pod_name
|
|
|
|
|
to: resource["k8s.pod.name"]
|
|
|
|
|
- type: move
|
|
|
|
|
from: attributes.container_name
|
|
|
|
|
to: resource["k8s.container.name"]
|
|
|
|
|
- type: move
|
|
|
|
|
from: attributes.log
|
|
|
|
|
to: body
|
|
|
|
|
|
|
|
|
|
# Kubernetes Cluster Receiver - Collects cluster-level metrics
|
|
|
|
|
# Provides information about nodes, namespaces, pods, and other cluster resources
|
|
|
|
|
k8s_cluster:
|
|
|
|
|
collection_interval: 30s
|
|
|
|
|
node_conditions_to_report:
|
|
|
|
|
- Ready
|
|
|
|
|
- MemoryPressure
|
|
|
|
|
- DiskPressure
|
|
|
|
|
- PIDPressure
|
|
|
|
|
- NetworkUnavailable
|
|
|
|
|
allocatable_types_to_report:
|
|
|
|
|
- cpu
|
|
|
|
|
- memory
|
|
|
|
|
- pods
|
|
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
# Prometheus receiver for scraping metrics
|
|
|
|
|
prometheus:
|
|
|
|
|
config:
|
|
|
|
|
scrape_configs:
|
2026-01-09 23:14:12 +01:00
|
|
|
- job_name: 'kubernetes-nodes-cadvisor'
|
2026-01-08 12:58:00 +01:00
|
|
|
scrape_interval: 30s
|
2026-01-09 23:14:12 +01:00
|
|
|
scrape_timeout: 10s
|
|
|
|
|
scheme: https
|
|
|
|
|
tls_config:
|
|
|
|
|
insecure_skip_verify: true
|
|
|
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
|
kubernetes_sd_configs:
|
|
|
|
|
- role: node
|
|
|
|
|
relabel_configs:
|
|
|
|
|
- action: labelmap
|
|
|
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
|
|
|
- target_label: __address__
|
|
|
|
|
replacement: kubernetes.default.svc:443
|
|
|
|
|
- source_labels: [__meta_kubernetes_node_name]
|
|
|
|
|
regex: (.+)
|
|
|
|
|
target_label: __metrics_path__
|
|
|
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
|
|
|
|
- job_name: 'kubernetes-apiserver'
|
|
|
|
|
scrape_interval: 30s
|
|
|
|
|
scrape_timeout: 10s
|
|
|
|
|
scheme: https
|
|
|
|
|
tls_config:
|
|
|
|
|
insecure_skip_verify: true
|
|
|
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
|
kubernetes_sd_configs:
|
|
|
|
|
- role: endpoints
|
|
|
|
|
relabel_configs:
|
|
|
|
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
|
|
|
action: keep
|
|
|
|
|
regex: default;kubernetes;https
|
|
|
|
|
|
|
|
|
|
# Redis receiver for cache metrics
|
|
|
|
|
# ENABLED: Using existing credentials from redis-secrets with TLS
|
|
|
|
|
redis:
|
|
|
|
|
endpoint: redis-service.bakery-ia:6379
|
|
|
|
|
password: ${env:REDIS_PASSWORD}
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
transport: tcp
|
|
|
|
|
tls:
|
|
|
|
|
insecure_skip_verify: false
|
|
|
|
|
cert_file: /etc/redis-tls/redis-cert.pem
|
|
|
|
|
key_file: /etc/redis-tls/redis-key.pem
|
|
|
|
|
ca_file: /etc/redis-tls/ca-cert.pem
|
|
|
|
|
metrics:
|
|
|
|
|
redis.maxmemory:
|
|
|
|
|
enabled: true
|
|
|
|
|
redis.cmd.latency:
|
|
|
|
|
enabled: true
|
|
|
|
|
|
|
|
|
|
# RabbitMQ receiver via management API
|
|
|
|
|
# ENABLED: Using existing credentials from rabbitmq-secrets
|
|
|
|
|
rabbitmq:
|
|
|
|
|
endpoint: http://rabbitmq-service.bakery-ia:15672
|
|
|
|
|
username: ${env:RABBITMQ_USER}
|
|
|
|
|
password: ${env:RABBITMQ_PASSWORD}
|
|
|
|
|
collection_interval: 30s
|
|
|
|
|
|
|
|
|
|
# PostgreSQL receivers for database metrics
|
|
|
|
|
# Monitor all databases with proper TLS configuration
|
|
|
|
|
postgresql/auth:
|
|
|
|
|
endpoint: auth-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- auth_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/inventory:
|
|
|
|
|
endpoint: inventory-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- inventory_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/orders:
|
|
|
|
|
endpoint: orders-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- orders_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/ai-insights:
|
|
|
|
|
endpoint: ai-insights-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- ai_insights_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/alert-processor:
|
|
|
|
|
endpoint: alert-processor-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- alert_processor_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/distribution:
|
|
|
|
|
endpoint: distribution-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- distribution_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/external:
|
|
|
|
|
endpoint: external-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- external_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/forecasting:
|
|
|
|
|
endpoint: forecasting-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- forecasting_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/notification:
|
|
|
|
|
endpoint: notification-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- notification_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/orchestrator:
|
|
|
|
|
endpoint: orchestrator-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- orchestrator_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/pos:
|
|
|
|
|
endpoint: pos-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- pos_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/procurement:
|
|
|
|
|
endpoint: procurement-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- procurement_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/production:
|
|
|
|
|
endpoint: production-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- production_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/recipes:
|
|
|
|
|
endpoint: recipes-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- recipes_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/sales:
|
|
|
|
|
endpoint: sales-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- sales_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/suppliers:
|
|
|
|
|
endpoint: suppliers-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- suppliers_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/tenant:
|
|
|
|
|
endpoint: tenant-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- tenant_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
|
|
|
|
|
|
|
|
|
postgresql/training:
|
|
|
|
|
endpoint: training-db-service.bakery-ia:5432
|
|
|
|
|
username: ${env:POSTGRES_MONITOR_USER}
|
|
|
|
|
password: ${env:POSTGRES_MONITOR_PASSWORD}
|
|
|
|
|
databases:
|
|
|
|
|
- training_db
|
|
|
|
|
collection_interval: 60s
|
|
|
|
|
tls:
|
|
|
|
|
insecure: false
|
|
|
|
|
cert_file: /etc/postgres-tls/server-cert.pem
|
|
|
|
|
key_file: /etc/postgres-tls/server-key.pem
|
|
|
|
|
ca_file: /etc/postgres-tls/ca-cert.pem
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
processors:
|
2026-01-09 07:26:11 +01:00
|
|
|
# High-performance batch processing (official recommendation)
|
2026-01-08 12:58:00 +01:00
|
|
|
batch:
|
2026-01-09 07:26:11 +01:00
|
|
|
timeout: 1s # Reduced from 10s for faster processing
|
|
|
|
|
send_batch_size: 50000 # Increased from 2048 (official recommendation for traces)
|
|
|
|
|
send_batch_max_size: 50000
|
2026-01-08 12:58:00 +01:00
|
|
|
|
2026-01-09 11:18:20 +01:00
|
|
|
# Batch processor for meter data
|
|
|
|
|
batch/meter:
|
|
|
|
|
timeout: 1s
|
|
|
|
|
send_batch_size: 20000
|
|
|
|
|
send_batch_max_size: 25000
|
|
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
memory_limiter:
|
|
|
|
|
check_interval: 1s
|
2026-01-09 07:26:11 +01:00
|
|
|
limit_mib: 1500 # 75% of container memory (2Gi = ~2048Mi)
|
|
|
|
|
spike_limit_mib: 300
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
# Resource detection for K8s
|
|
|
|
|
resourcedetection:
|
2026-01-09 23:14:12 +01:00
|
|
|
detectors: [env, system, docker]
|
2026-01-08 12:58:00 +01:00
|
|
|
timeout: 5s
|
|
|
|
|
|
|
|
|
|
# Add resource attributes
|
|
|
|
|
resource:
|
|
|
|
|
attributes:
|
|
|
|
|
- key: deployment.environment
|
|
|
|
|
value: production
|
|
|
|
|
action: upsert
|
|
|
|
|
- key: cluster.name
|
|
|
|
|
value: bakery-ia-prod
|
|
|
|
|
action: upsert
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Kubernetes attributes processor - CRITICAL for logs
|
|
|
|
|
# Extracts pod, namespace, container metadata from log attributes
|
|
|
|
|
k8sattributes:
|
|
|
|
|
auth_type: "serviceAccount"
|
|
|
|
|
passthrough: false
|
|
|
|
|
extract:
|
|
|
|
|
metadata:
|
|
|
|
|
- k8s.pod.name
|
|
|
|
|
- k8s.pod.uid
|
|
|
|
|
- k8s.deployment.name
|
|
|
|
|
- k8s.namespace.name
|
|
|
|
|
- k8s.node.name
|
|
|
|
|
- k8s.container.name
|
|
|
|
|
labels:
|
|
|
|
|
- tag_name: "app"
|
|
|
|
|
- tag_name: "pod-template-hash"
|
|
|
|
|
- tag_name: "version"
|
|
|
|
|
annotations:
|
|
|
|
|
- tag_name: "description"
|
|
|
|
|
|
2026-01-09 11:18:20 +01:00
|
|
|
# SigNoz span metrics processor with delta aggregation (recommended)
|
|
|
|
|
# Generates RED metrics (Rate, Error, Duration) from trace spans
|
|
|
|
|
signozspanmetrics/delta:
|
|
|
|
|
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
|
2026-01-09 07:26:11 +01:00
|
|
|
metrics_exporter: signozclickhousemetrics
|
2026-01-09 11:18:20 +01:00
|
|
|
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
|
2026-01-09 07:26:11 +01:00
|
|
|
dimensions_cache_size: 100000
|
2026-01-09 11:18:20 +01:00
|
|
|
dimensions:
|
|
|
|
|
- name: service.namespace
|
|
|
|
|
default: default
|
|
|
|
|
- name: deployment.environment
|
|
|
|
|
default: production
|
|
|
|
|
- name: signoz.collector.id
|
2026-01-09 07:26:11 +01:00
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
exporters:
|
2026-01-09 23:14:12 +01:00
|
|
|
# ClickHouse exporter for traces
|
2026-01-08 12:58:00 +01:00
|
|
|
clickhousetraces:
|
2026-01-09 23:14:12 +01:00
|
|
|
datasource: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/?database=signoz_traces
|
2026-01-08 12:58:00 +01:00
|
|
|
timeout: 10s
|
|
|
|
|
retry_on_failure:
|
|
|
|
|
enabled: true
|
|
|
|
|
initial_interval: 5s
|
|
|
|
|
max_interval: 30s
|
|
|
|
|
max_elapsed_time: 300s
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# ClickHouse exporter for metrics
|
2026-01-09 06:57:18 +01:00
|
|
|
signozclickhousemetrics:
|
2026-01-09 23:14:12 +01:00
|
|
|
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics"
|
2026-01-08 12:58:00 +01:00
|
|
|
timeout: 10s
|
|
|
|
|
retry_on_failure:
|
|
|
|
|
enabled: true
|
|
|
|
|
initial_interval: 5s
|
|
|
|
|
max_interval: 30s
|
|
|
|
|
max_elapsed_time: 300s
|
|
|
|
|
|
2026-01-09 11:18:20 +01:00
|
|
|
# ClickHouse exporter for meter data (usage metrics)
|
|
|
|
|
signozclickhousemeter:
|
2026-01-09 23:14:12 +01:00
|
|
|
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_meter"
|
2026-01-09 11:18:20 +01:00
|
|
|
timeout: 45s
|
|
|
|
|
sending_queue:
|
|
|
|
|
enabled: false
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# ClickHouse exporter for logs
|
2026-01-08 12:58:00 +01:00
|
|
|
clickhouselogsexporter:
|
2026-01-09 23:14:12 +01:00
|
|
|
dsn: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/?database=signoz_logs
|
2026-01-08 12:58:00 +01:00
|
|
|
timeout: 10s
|
|
|
|
|
retry_on_failure:
|
|
|
|
|
enabled: true
|
|
|
|
|
initial_interval: 5s
|
|
|
|
|
max_interval: 30s
|
|
|
|
|
|
2026-01-09 11:18:20 +01:00
|
|
|
# Metadata exporter for service metadata
|
|
|
|
|
metadataexporter:
|
2026-01-09 23:14:12 +01:00
|
|
|
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metadata"
|
2026-01-09 11:18:20 +01:00
|
|
|
timeout: 10s
|
|
|
|
|
cache:
|
|
|
|
|
provider: in_memory
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Debug exporter for debugging (optional)
|
2026-01-09 06:57:18 +01:00
|
|
|
debug:
|
|
|
|
|
verbosity: detailed
|
2026-01-09 23:14:12 +01:00
|
|
|
sampling_initial: 5
|
|
|
|
|
sampling_thereafter: 200
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
service:
|
|
|
|
|
extensions: [health_check, zpages]
|
|
|
|
|
pipelines:
|
2026-01-09 11:18:20 +01:00
|
|
|
# Traces pipeline - exports to ClickHouse and signozmeter connector
|
2026-01-08 12:58:00 +01:00
|
|
|
traces:
|
|
|
|
|
receivers: [otlp]
|
2026-01-09 11:18:20 +01:00
|
|
|
processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection, resource]
|
|
|
|
|
exporters: [clickhousetraces, metadataexporter, signozmeter]
|
2026-01-08 12:58:00 +01:00
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Metrics pipeline - includes all infrastructure receivers
|
2026-01-08 12:58:00 +01:00
|
|
|
metrics:
|
2026-01-09 23:14:12 +01:00
|
|
|
receivers: [otlp,
|
|
|
|
|
postgresql/auth, postgresql/inventory, postgresql/orders,
|
|
|
|
|
postgresql/ai-insights, postgresql/alert-processor, postgresql/distribution,
|
|
|
|
|
postgresql/external, postgresql/forecasting, postgresql/notification,
|
|
|
|
|
postgresql/orchestrator, postgresql/pos, postgresql/procurement,
|
|
|
|
|
postgresql/production, postgresql/recipes, postgresql/sales,
|
|
|
|
|
postgresql/suppliers, postgresql/tenant, postgresql/training,
|
|
|
|
|
redis, rabbitmq, k8s_cluster, prometheus]
|
2026-01-08 12:58:00 +01:00
|
|
|
processors: [memory_limiter, batch, resourcedetection, resource]
|
2026-01-09 06:57:18 +01:00
|
|
|
exporters: [signozclickhousemetrics]
|
2026-01-08 12:58:00 +01:00
|
|
|
|
2026-01-09 11:18:20 +01:00
|
|
|
# Meter pipeline - receives from signozmeter connector
|
|
|
|
|
metrics/meter:
|
|
|
|
|
receivers: [signozmeter]
|
|
|
|
|
processors: [batch/meter]
|
|
|
|
|
exporters: [signozclickhousemeter]
|
|
|
|
|
|
2026-01-09 23:14:12 +01:00
|
|
|
# Logs pipeline - includes both OTLP and Kubernetes pod logs
|
2026-01-08 12:58:00 +01:00
|
|
|
logs:
|
2026-01-09 23:14:12 +01:00
|
|
|
receivers: [otlp, filelog]
|
|
|
|
|
processors: [memory_limiter, batch, resourcedetection, resource, k8sattributes]
|
2026-01-09 07:26:11 +01:00
|
|
|
exporters: [clickhouselogsexporter]
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
# HPA for OTEL Collector
|
|
|
|
|
autoscaling:
|
|
|
|
|
enabled: true
|
|
|
|
|
minReplicas: 2
|
|
|
|
|
maxReplicas: 10
|
|
|
|
|
targetCPUUtilizationPercentage: 70
|
|
|
|
|
targetMemoryUtilizationPercentage: 80
|
|
|
|
|
|
2026-01-10 13:43:38 +01:00
|
|
|
# ClusterRole configuration for Kubernetes monitoring
|
|
|
|
|
# CRITICAL: Required for k8s_cluster receiver to access Kubernetes API
|
|
|
|
|
# Without these permissions, k8s metrics will not appear in SigNoz UI
|
|
|
|
|
clusterRole:
|
|
|
|
|
create: true
|
|
|
|
|
name: "signoz-otel-collector-bakery-ia"
|
|
|
|
|
annotations: {}
|
|
|
|
|
# Complete RBAC rules required by k8sclusterreceiver
|
|
|
|
|
# Based on OpenTelemetry and SigNoz official documentation
|
|
|
|
|
rules:
|
|
|
|
|
# Core API group - fundamental Kubernetes resources
|
|
|
|
|
- apiGroups: [""]
|
|
|
|
|
resources:
|
|
|
|
|
- "events"
|
|
|
|
|
- "namespaces"
|
|
|
|
|
- "nodes"
|
|
|
|
|
- "nodes/proxy"
|
|
|
|
|
- "nodes/metrics"
|
|
|
|
|
- "nodes/spec"
|
|
|
|
|
- "pods"
|
|
|
|
|
- "pods/status"
|
|
|
|
|
- "replicationcontrollers"
|
|
|
|
|
- "replicationcontrollers/status"
|
|
|
|
|
- "resourcequotas"
|
|
|
|
|
- "services"
|
|
|
|
|
- "endpoints"
|
|
|
|
|
verbs: ["get", "list", "watch"]
|
|
|
|
|
# Apps API group - modern workload controllers
|
|
|
|
|
- apiGroups: ["apps"]
|
|
|
|
|
resources: ["deployments", "daemonsets", "statefulsets", "replicasets"]
|
|
|
|
|
verbs: ["get", "list", "watch"]
|
|
|
|
|
# Batch API group - job management
|
|
|
|
|
- apiGroups: ["batch"]
|
|
|
|
|
resources: ["jobs", "cronjobs"]
|
|
|
|
|
verbs: ["get", "list", "watch"]
|
|
|
|
|
# Autoscaling API group - HPA metrics (CRITICAL)
|
|
|
|
|
- apiGroups: ["autoscaling"]
|
|
|
|
|
resources: ["horizontalpodautoscalers"]
|
|
|
|
|
verbs: ["get", "list", "watch"]
|
|
|
|
|
# Extensions API group - legacy support
|
|
|
|
|
- apiGroups: ["extensions"]
|
|
|
|
|
resources: ["deployments", "daemonsets", "replicasets"]
|
|
|
|
|
verbs: ["get", "list", "watch"]
|
|
|
|
|
# Metrics API group - resource metrics
|
|
|
|
|
- apiGroups: ["metrics.k8s.io"]
|
|
|
|
|
resources: ["nodes", "pods"]
|
|
|
|
|
verbs: ["get", "list", "watch"]
|
|
|
|
|
clusterRoleBinding:
|
|
|
|
|
annotations: {}
|
|
|
|
|
name: "signoz-otel-collector-bakery-ia"
|
|
|
|
|
|
2026-01-09 07:26:11 +01:00
|
|
|
# Schema Migrator - Manages ClickHouse schema migrations
|
|
|
|
|
schemaMigrator:
|
2026-01-08 12:58:00 +01:00
|
|
|
enabled: true
|
|
|
|
|
|
|
|
|
|
image:
|
|
|
|
|
repository: signoz/signoz-schema-migrator
|
2026-01-09 07:26:11 +01:00
|
|
|
tag: v0.129.12 # Updated to latest version
|
2026-01-08 12:58:00 +01:00
|
|
|
pullPolicy: IfNotPresent
|
|
|
|
|
|
2026-01-09 07:26:11 +01:00
|
|
|
# Enable Helm hooks for proper upgrade handling
|
|
|
|
|
upgradeHelmHooks: true
|
|
|
|
|
|
2026-01-08 12:58:00 +01:00
|
|
|
# Additional Configuration
|
|
|
|
|
serviceAccount:
|
|
|
|
|
create: true
|
|
|
|
|
annotations: {}
|
|
|
|
|
name: "signoz"
|
|
|
|
|
|
|
|
|
|
# Security Context
|
|
|
|
|
securityContext:
|
|
|
|
|
runAsNonRoot: true
|
|
|
|
|
runAsUser: 1000
|
|
|
|
|
fsGroup: 1000
|
|
|
|
|
|
|
|
|
|
# Pod Disruption Budgets for HA
|
|
|
|
|
podDisruptionBudget:
|
|
|
|
|
frontend:
|
|
|
|
|
enabled: true
|
|
|
|
|
minAvailable: 1
|
|
|
|
|
queryService:
|
|
|
|
|
enabled: true
|
|
|
|
|
minAvailable: 1
|
|
|
|
|
alertmanager:
|
|
|
|
|
enabled: true
|
|
|
|
|
minAvailable: 1
|
|
|
|
|
clickhouse:
|
|
|
|
|
enabled: true
|
|
|
|
|
minAvailable: 1
|
|
|
|
|
|
|
|
|
|
# Network Policies for security
|
|
|
|
|
networkPolicy:
|
|
|
|
|
enabled: true
|
|
|
|
|
policyTypes:
|
|
|
|
|
- Ingress
|
|
|
|
|
- Egress
|
|
|
|
|
|
|
|
|
|
# Monitoring SigNoz itself
|
|
|
|
|
selfMonitoring:
|
|
|
|
|
enabled: true
|
|
|
|
|
serviceMonitor:
|
|
|
|
|
enabled: true
|
|
|
|
|
interval: 30s
|