Files
bakery-ia/infrastructure/helm/signoz-values-dev.yaml
2026-01-09 07:26:11 +01:00

353 lines
8.9 KiB
YAML

# SigNoz Helm Chart Values - Development Environment
# Optimized for local development with minimal resource usage
# DEPLOYED IN bakery-ia NAMESPACE - Ingress managed by bakery-ingress
#
# Official Chart: https://github.com/SigNoz/charts
# Install Command: helm install signoz signoz/signoz -n bakery-ia -f signoz-values-dev.yaml
global:
storageClass: "standard"
clusterName: "bakery-ia-dev"
domain: "monitoring.bakery-ia.local"
# Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc)
imagePullSecrets:
- dockerhub-creds
# Docker Hub credentials for pulling images (root level for SigNoz components)
imagePullSecrets:
- dockerhub-creds
# SignOz Main Component (includes frontend and query service)
signoz:
replicaCount: 1
service:
type: ClusterIP
port: 8080
# DISABLE built-in ingress - using unified bakery-ingress instead
# Route configured in infrastructure/kubernetes/overlays/dev/dev-ingress.yaml
ingress:
enabled: false
resources:
requests:
cpu: 100m # Combined frontend + query service
memory: 256Mi
limits:
cpu: 1000m
memory: 1Gi
# Environment variables (new format - replaces configVars)
env:
signoz_telemetrystore_provider: "clickhouse"
dot_metrics_enabled: "true"
signoz_emailing_enabled: "false"
signoz_alertmanager_provider: "signoz"
# Retention for dev (7 days)
signoz_traces_ttl_duration_hrs: "168"
signoz_metrics_ttl_duration_hrs: "168"
signoz_logs_ttl_duration_hrs: "168"
persistence:
enabled: true
size: 5Gi
storageClass: "standard"
# AlertManager Configuration
alertmanager:
replicaCount: 1
image:
repository: signoz/alertmanager
tag: 0.23.5
pullPolicy: IfNotPresent
service:
type: ClusterIP
port: 9093
resources:
requests:
cpu: 25m # Reduced for local dev
memory: 64Mi # Reduced for local dev
limits:
cpu: 200m
memory: 256Mi
persistence:
enabled: true
size: 2Gi
storageClass: "standard"
config:
global:
resolve_timeout: 5m
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 10s
group_interval: 10s
repeat_interval: 12h
receiver: 'default'
receivers:
- name: 'default'
# Add email, slack, webhook configs here
# ClickHouse Configuration - Time Series Database
# Minimal resources for local development on constrained Kind cluster
clickhouse:
enabled: true
installCustomStorageClass: false
image:
registry: docker.io
repository: clickhouse/clickhouse-server
tag: 25.5.6 # Official recommended version
# Reduce ClickHouse resource requests for local dev
clickhouse:
resources:
requests:
cpu: 200m # Reduced from default 500m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
persistence:
enabled: true
size: 20Gi
# Zookeeper Configuration (required by ClickHouse)
zookeeper:
enabled: true
replicaCount: 1 # Single replica for dev
image:
tag: 3.7.1 # Official recommended version
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
persistence:
enabled: true
size: 5Gi
# OpenTelemetry Collector - Data ingestion endpoint for all telemetry
otelCollector:
enabled: true
replicaCount: 1
image:
repository: signoz/signoz-otel-collector
tag: v0.129.12 # Latest recommended version
# Service configuration - expose both gRPC and HTTP endpoints
service:
type: ClusterIP
ports:
# gRPC receivers
- name: otlp-grpc
port: 4317
targetPort: 4317
protocol: TCP
# HTTP receivers
- name: otlp-http
port: 4318
targetPort: 4318
protocol: TCP
# Prometheus remote write
- name: prometheus
port: 8889
targetPort: 8889
protocol: TCP
# Metrics
- name: metrics
port: 8888
targetPort: 8888
protocol: TCP
resources:
requests:
cpu: 50m # Reduced from 100m
memory: 128Mi # Reduced from 256Mi
limits:
cpu: 500m
memory: 512Mi
# OpenTelemetry Collector configuration
config:
receivers:
# OTLP receivers for traces, metrics, and logs from applications
# All application telemetry is pushed via OTLP protocol
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
cors:
allowed_origins:
- "*"
# PostgreSQL receivers for database metrics
# Collects metrics directly from PostgreSQL databases
postgresql/auth:
endpoint: auth-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- auth_db
collection_interval: 60s
tls:
insecure: false
postgresql/inventory:
endpoint: inventory-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- inventory_db
collection_interval: 60s
tls:
insecure: false
postgresql/orders:
endpoint: orders-db-service.bakery-ia:5432
username: ${POSTGRES_MONITOR_USER}
password: ${POSTGRES_MONITOR_PASSWORD}
databases:
- orders_db
collection_interval: 60s
tls:
insecure: false
# Add more PostgreSQL databases as needed
# postgresql/SERVICE:
# endpoint: SERVICE-db-service.bakery-ia:5432
# ...
# Redis receiver for cache metrics
redis:
endpoint: redis-service.bakery-ia:6379
password: ${REDIS_PASSWORD}
collection_interval: 60s
tls:
insecure: false
cert_file: /etc/redis-tls/redis-cert.pem
key_file: /etc/redis-tls/redis-key.pem
ca_file: /etc/redis-tls/ca-cert.pem
# RabbitMQ receiver via management API
rabbitmq:
endpoint: http://rabbitmq-service.bakery-ia:15672
username: ${RABBITMQ_USER}
password: ${RABBITMQ_PASSWORD}
collection_interval: 60s
processors:
# Batch processor for better performance (optimized for high throughput)
batch:
timeout: 1s
send_batch_size: 10000 # Increased from 1024 for better performance
send_batch_max_size: 10000
# Memory limiter to prevent OOM
memory_limiter:
check_interval: 1s
limit_mib: 400
spike_limit_mib: 100
# Resource detection
resourcedetection:
detectors: [env, system, docker]
timeout: 5s
# Span metrics processor for automatic service metrics
spanmetrics:
metrics_exporter: signozclickhousemetrics
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
dimensions_cache_size: 10000
exporters:
# ClickHouse exporter for traces
clickhousetraces:
datasource: tcp://signoz-clickhouse:9000/?database=signoz_traces
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
# ClickHouse exporter for metrics
signozclickhousemetrics:
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics"
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
# ClickHouse exporter for logs
clickhouselogsexporter:
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
# Debug exporter for debugging (optional)
debug:
verbosity: detailed
sampling_initial: 5
sampling_thereafter: 200
service:
pipelines:
# Traces pipeline
traces:
receivers: [otlp]
processors: [memory_limiter, batch, spanmetrics, resourcedetection]
exporters: [clickhousetraces]
# Metrics pipeline
metrics:
receivers: [otlp, postgresql/auth, postgresql/inventory, postgresql/orders, redis, rabbitmq]
processors: [memory_limiter, batch, resourcedetection]
exporters: [signozclickhousemetrics]
# Logs pipeline
logs:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection]
exporters: [clickhouselogsexporter]
# Additional Configuration
serviceAccount:
create: true
annotations: {}
name: ""
# Security Context
securityContext:
runAsNonRoot: true
runAsUser: 1000
fsGroup: 1000
# Network Policies (disabled for dev)
networkPolicy:
enabled: false
# Monitoring SigNoz itself
selfMonitoring:
enabled: true
serviceMonitor:
enabled: false