2026-01-08 12:58:00 +01:00
|
|
|
# SigNoz Helm Chart Values - Production Environment
|
|
|
|
|
# High-availability configuration with resource optimization
|
2026-01-22 12:31:10 +01:00
|
|
|
# DEPLOYED IN bakery-ia NAMESPACE - Ingress managed by SigNoz Helm chart
|
2026-01-08 12:58:00 +01:00
|
|
|
#
|
|
|
|
|
# Official Chart: https://github.com/SigNoz/charts
|
2026-01-25 20:07:37 +01:00
|
|
|
# Install Command: helm upgrade --install signoz signoz/signoz -n bakery-ia -f signoz-values-prod.yaml
|
|
|
|
|
#
|
|
|
|
|
# IMPORTANT: This chart works together with k8s-infra chart for infrastructure monitoring
|
|
|
|
|
# Deploy k8s-infra after this: helm upgrade --install k8s-infra signoz/k8s-infra -n bakery-ia -f k8s-infra-values-prod.yaml
|
|
|
|
|
#
|
|
|
|
|
# MEMORY OPTIMIZATION NOTES:
|
|
|
|
|
# - ClickHouse memory increased to 8Gi to prevent OOM errors
|
|
|
|
|
# - Retention reduced to 3 days for traces, 7 days for metrics/logs
|
2026-01-08 12:58:00 +01:00
|
|
|
|
|
|
|
|
global:
|
2026-01-25 20:07:37 +01:00
|
|
|
storageClass: "microk8s-hostpath"
|
2026-01-09 07:26:11 +01:00
|
|
|
clusterName: "bakery-ia-prod"
|
2026-01-08 12:58:00 +01:00
|
|
|
domain: "monitoring.bakewise.ai"
|
2026-01-22 12:31:10 +01:00
|
|
|
|
2026-01-22 16:24:03 +01:00
|
|
|
# Ingress configuration for SigNoz Frontend
|
|
|
|
|
signoz:
|
2026-01-22 12:31:10 +01:00
|
|
|
ingress:
|
|
|
|
|
enabled: true
|
2026-01-22 15:42:32 +01:00
|
|
|
className: nginx
|
2026-01-22 12:31:10 +01:00
|
|
|
annotations:
|
|
|
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
|
|
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
|
|
|
|
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
|
|
|
|
|
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
|
|
|
|
|
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
|
|
|
|
|
cert-manager.io/cluster-issuer: "letsencrypt-production"
|
|
|
|
|
nginx.ingress.kubernetes.io/limit-rps: "50"
|
|
|
|
|
nginx.ingress.kubernetes.io/limit-connections: "25"
|
|
|
|
|
hosts:
|
|
|
|
|
- host: monitoring.bakewise.ai
|
|
|
|
|
paths:
|
|
|
|
|
- path: /
|
2026-01-22 16:24:03 +01:00
|
|
|
pathType: ImplementationSpecific
|
|
|
|
|
port: 8080
|
2026-01-22 12:31:10 +01:00
|
|
|
tls:
|
|
|
|
|
- hosts:
|
|
|
|
|
- monitoring.bakewise.ai
|
|
|
|
|
secretName: bakery-ia-prod-tls-cert
|
|
|
|
|
|
2026-01-25 20:07:37 +01:00
|
|
|
# ============================================================================
|
|
|
|
|
# CLICKHOUSE CONFIGURATION
|
|
|
|
|
# Increased memory to 8Gi to prevent OOM errors (was 4Gi, causing code 241 errors)
|
|
|
|
|
# ============================================================================
|
2026-01-22 12:31:10 +01:00
|
|
|
clickhouse:
|
|
|
|
|
persistence:
|
|
|
|
|
size: 20Gi
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
memory: "4Gi"
|
|
|
|
|
cpu: "1000m"
|
2026-01-25 20:07:37 +01:00
|
|
|
limits:
|
|
|
|
|
memory: "8Gi"
|
|
|
|
|
cpu: "2000m"
|
2026-01-22 12:31:10 +01:00
|
|
|
|
2026-01-25 20:07:37 +01:00
|
|
|
# Server-level settings only (NOT user-level settings like max_threads)
|
|
|
|
|
# User-level settings must go in profiles section
|
|
|
|
|
settings:
|
|
|
|
|
# Max server memory usage: 80% of container limit (6.4GB of 8GB)
|
|
|
|
|
max_server_memory_usage: "6400000000"
|
|
|
|
|
# Mark cache size (256MB)
|
|
|
|
|
mark_cache_size: "268435456"
|
|
|
|
|
# Uncompressed cache (256MB)
|
|
|
|
|
uncompressed_cache_size: "268435456"
|
|
|
|
|
# Max concurrent queries
|
|
|
|
|
max_concurrent_queries: "100"
|
|
|
|
|
|
|
|
|
|
# User-level settings go in profiles
|
|
|
|
|
profiles:
|
|
|
|
|
default:
|
|
|
|
|
# Max memory per query: 2GB
|
|
|
|
|
max_memory_usage: "2000000000"
|
|
|
|
|
# Max threads per query
|
|
|
|
|
max_threads: "4"
|
|
|
|
|
# Background merges memory limit
|
|
|
|
|
max_bytes_to_merge_at_max_space_in_pool: "1073741824"
|
|
|
|
|
|
|
|
|
|
coldStorage:
|
|
|
|
|
enabled: false
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
# DATA RETENTION CONFIGURATION
|
|
|
|
|
# Reduced retention to minimize storage and memory pressure
|
|
|
|
|
# ============================================================================
|
|
|
|
|
queryService:
|
2026-01-22 12:31:10 +01:00
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
memory: "1Gi"
|
|
|
|
|
cpu: "500m"
|
|
|
|
|
limits:
|
|
|
|
|
memory: "2Gi"
|
|
|
|
|
cpu: "1000m"
|
2026-01-25 20:07:37 +01:00
|
|
|
# Retention configuration via environment variables
|
|
|
|
|
configVars:
|
|
|
|
|
# Trace retention: 3 days (72 hours)
|
|
|
|
|
SIGNOZ_TRACE_TTL_DURATION_HOURS: "72"
|
|
|
|
|
# Logs retention: 7 days (168 hours)
|
|
|
|
|
SIGNOZ_LOGS_TTL_DURATION_HOURS: "168"
|
|
|
|
|
# Metrics retention: 7 days (168 hours)
|
|
|
|
|
SIGNOZ_METRICS_TTL_DURATION_HOURS: "168"
|
2026-01-22 12:31:10 +01:00
|
|
|
|
2026-01-25 20:07:37 +01:00
|
|
|
# ============================================================================
|
|
|
|
|
# OTEL COLLECTOR CONFIGURATION
|
|
|
|
|
# This collector receives data from:
|
|
|
|
|
# - Application services (traces, logs, metrics via OTLP)
|
|
|
|
|
# - k8s-infra chart (infrastructure metrics)
|
|
|
|
|
# ============================================================================
|
|
|
|
|
otelCollector:
|
2026-01-22 12:31:10 +01:00
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
memory: "1Gi"
|
|
|
|
|
cpu: "500m"
|
|
|
|
|
limits:
|
|
|
|
|
memory: "2Gi"
|
|
|
|
|
cpu: "1000m"
|
|
|
|
|
|
2026-01-25 20:07:37 +01:00
|
|
|
# ============================================================================
|
|
|
|
|
# ALERTMANAGER CONFIGURATION
|
|
|
|
|
# ============================================================================
|
2026-01-22 12:31:10 +01:00
|
|
|
alertmanager:
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
memory: "512Mi"
|
|
|
|
|
cpu: "250m"
|
|
|
|
|
limits:
|
|
|
|
|
memory: "1Gi"
|
|
|
|
|
cpu: "500m"
|
2026-01-25 20:07:37 +01:00
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
# ZOOKEEPER CONFIGURATION
|
|
|
|
|
# ============================================================================
|
|
|
|
|
zookeeper:
|
|
|
|
|
resources:
|
|
|
|
|
requests:
|
|
|
|
|
memory: "512Mi"
|
|
|
|
|
cpu: "250m"
|
|
|
|
|
limits:
|
|
|
|
|
memory: "1Gi"
|
|
|
|
|
cpu: "500m"
|
|
|
|
|
persistence:
|
|
|
|
|
size: 5Gi
|