Imporve monitoring 3

This commit is contained in:
Urtzi Alfaro
2026-01-09 11:18:20 +01:00
parent 8ca5d9c100
commit 43a3f35bd1
27 changed files with 1279 additions and 32 deletions

View File

@@ -0,0 +1,141 @@
#!/bin/bash
# Generate Test Traffic to Services
# This script generates API calls to verify telemetry data collection
set -e
NAMESPACE="bakery-ia"
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m'
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE} Generating Test Traffic for SigNoz Verification${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
# Check if ingress is accessible
echo -e "${BLUE}Step 1: Verifying Gateway Access${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
GATEWAY_POD=$(kubectl get pods -n $NAMESPACE -l app=gateway --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [[ -z "$GATEWAY_POD" ]]; then
echo -e "${YELLOW}⚠ Gateway pod not running. Starting port-forward...${NC}"
# Port forward in background
kubectl port-forward -n $NAMESPACE svc/gateway-service 8000:8000 &
PORT_FORWARD_PID=$!
sleep 3
API_URL="http://localhost:8000"
else
echo -e "${GREEN}✓ Gateway is running: $GATEWAY_POD${NC}"
# Use internal service
API_URL="http://gateway-service.$NAMESPACE.svc.cluster.local:8000"
fi
echo ""
# Function to make API call from inside cluster
make_request() {
local endpoint=$1
local description=$2
echo -e "${BLUE}→ Testing: $description${NC}"
echo " Endpoint: $endpoint"
if [[ -n "$GATEWAY_POD" ]]; then
# Make request from inside the gateway pod
RESPONSE=$(kubectl exec -n $NAMESPACE $GATEWAY_POD -- curl -s -w "\nHTTP_CODE:%{http_code}" "$API_URL$endpoint" 2>/dev/null || echo "FAILED")
else
# Make request from localhost
RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" "$API_URL$endpoint" 2>/dev/null || echo "FAILED")
fi
if [[ "$RESPONSE" == "FAILED" ]]; then
echo -e " ${YELLOW}⚠ Request failed${NC}"
else
HTTP_CODE=$(echo "$RESPONSE" | grep "HTTP_CODE" | cut -d: -f2)
if [[ "$HTTP_CODE" == "200" ]] || [[ "$HTTP_CODE" == "401" ]] || [[ "$HTTP_CODE" == "404" ]]; then
echo -e " ${GREEN}✓ Response received (HTTP $HTTP_CODE)${NC}"
else
echo -e " ${YELLOW}⚠ Unexpected response (HTTP $HTTP_CODE)${NC}"
fi
fi
echo ""
sleep 1
}
# Generate traffic to various endpoints
echo -e "${BLUE}Step 2: Generating Traffic to Services${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
# Health checks (should generate traces)
make_request "/health" "Gateway Health Check"
make_request "/api/health" "API Health Check"
# Auth service endpoints
make_request "/api/auth/health" "Auth Service Health"
# Tenant service endpoints
make_request "/api/tenants/health" "Tenant Service Health"
# Inventory service endpoints
make_request "/api/inventory/health" "Inventory Service Health"
# Orders service endpoints
make_request "/api/orders/health" "Orders Service Health"
# Forecasting service endpoints
make_request "/api/forecasting/health" "Forecasting Service Health"
echo -e "${BLUE}Step 3: Checking Service Logs for Telemetry${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
# Check a few service pods for tracing logs
SERVICES=("auth-service" "inventory-service" "gateway")
for service in "${SERVICES[@]}"; do
POD=$(kubectl get pods -n $NAMESPACE -l app=$service --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [[ -n "$POD" ]]; then
echo -e "${BLUE}Checking $service ($POD)...${NC}"
TRACING_LOG=$(kubectl logs -n $NAMESPACE $POD --tail=100 2>/dev/null | grep -i "tracing\|otel" | head -n 2 || echo "")
if [[ -n "$TRACING_LOG" ]]; then
echo -e "${GREEN}✓ Tracing configured:${NC}"
echo "$TRACING_LOG" | sed 's/^/ /'
else
echo -e "${YELLOW}⚠ No tracing logs found${NC}"
fi
echo ""
fi
done
# Wait for data to be processed
echo -e "${BLUE}Step 4: Waiting for Data Processing${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Waiting 30 seconds for telemetry data to be processed..."
for i in {30..1}; do
echo -ne "\r ${i} seconds remaining..."
sleep 1
done
echo -e "\n"
# Cleanup port-forward if started
if [[ -n "$PORT_FORWARD_PID" ]]; then
kill $PORT_FORWARD_PID 2>/dev/null || true
fi
echo -e "${GREEN}✓ Test traffic generation complete!${NC}"
echo ""
echo -e "${BLUE}Next Steps:${NC}"
echo "1. Run the verification script to check for collected data:"
echo " ./infrastructure/helm/verify-signoz-telemetry.sh"
echo ""
echo "2. Access SigNoz UI to visualize the data:"
echo " https://monitoring.bakery-ia.local"
echo " or"
echo " kubectl port-forward -n bakery-ia svc/signoz 3301:8080"
echo " Then go to: http://localhost:3301"
echo ""
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"

View File

@@ -181,6 +181,15 @@ otelCollector:
# OpenTelemetry Collector configuration
config:
# Connectors - bridge between pipelines
connectors:
signozmeter:
dimensions:
- name: service.name
- name: deployment.environment
- name: host.name
metrics_flush_interval: 1h
receivers:
# OTLP receivers for traces, metrics, and logs from applications
# All application telemetry is pushed via OTLP protocol
@@ -256,6 +265,12 @@ otelCollector:
send_batch_size: 10000 # Increased from 1024 for better performance
send_batch_max_size: 10000
# Batch processor for meter data
batch/meter:
timeout: 1s
send_batch_size: 20000
send_batch_max_size: 25000
# Memory limiter to prevent OOM
memory_limiter:
check_interval: 1s
@@ -267,11 +282,19 @@ otelCollector:
detectors: [env, system, docker]
timeout: 5s
# Span metrics processor for automatic service metrics
spanmetrics:
# SigNoz span metrics processor with delta aggregation (recommended)
# Generates RED metrics (Rate, Error, Duration) from trace spans
signozspanmetrics/delta:
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
metrics_exporter: signozclickhousemetrics
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
dimensions_cache_size: 10000
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
dimensions_cache_size: 100000
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
- name: signoz.collector.id
exporters:
# ClickHouse exporter for traces
@@ -294,6 +317,13 @@ otelCollector:
max_interval: 30s
max_elapsed_time: 300s
# ClickHouse exporter for meter data (usage metrics)
signozclickhousemeter:
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_meter"
timeout: 45s
sending_queue:
enabled: false
# ClickHouse exporter for logs
clickhouselogsexporter:
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
@@ -303,6 +333,13 @@ otelCollector:
initial_interval: 5s
max_interval: 30s
# Metadata exporter for service metadata
metadataexporter:
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metadata"
timeout: 10s
cache:
provider: in_memory
# Debug exporter for debugging (optional)
debug:
verbosity: detailed
@@ -311,11 +348,11 @@ otelCollector:
service:
pipelines:
# Traces pipeline
# Traces pipeline - exports to ClickHouse and signozmeter connector
traces:
receivers: [otlp]
processors: [memory_limiter, batch, spanmetrics, resourcedetection]
exporters: [clickhousetraces]
processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection]
exporters: [clickhousetraces, metadataexporter, signozmeter]
# Metrics pipeline
metrics:
@@ -323,6 +360,12 @@ otelCollector:
processors: [memory_limiter, batch, resourcedetection]
exporters: [signozclickhousemetrics]
# Meter pipeline - receives from signozmeter connector
metrics/meter:
receivers: [signozmeter]
processors: [batch/meter]
exporters: [signozclickhousemeter]
# Logs pipeline
logs:
receivers: [otlp]

View File

@@ -269,6 +269,15 @@ otelCollector:
# Full OTEL Collector Configuration
config:
# Connectors - bridge between pipelines
connectors:
signozmeter:
dimensions:
- name: service.name
- name: deployment.environment
- name: host.name
metrics_flush_interval: 1h
extensions:
health_check:
endpoint: 0.0.0.0:13133
@@ -304,6 +313,12 @@ otelCollector:
send_batch_size: 50000 # Increased from 2048 (official recommendation for traces)
send_batch_max_size: 50000
# Batch processor for meter data
batch/meter:
timeout: 1s
send_batch_size: 20000
send_batch_max_size: 25000
memory_limiter:
check_interval: 1s
limit_mib: 1500 # 75% of container memory (2Gi = ~2048Mi)
@@ -324,11 +339,19 @@ otelCollector:
value: bakery-ia-prod
action: upsert
# Span metrics processor for automatic service performance metrics
spanmetrics:
# SigNoz span metrics processor with delta aggregation (recommended)
# Generates RED metrics (Rate, Error, Duration) from trace spans
signozspanmetrics/delta:
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
metrics_exporter: signozclickhousemetrics
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
dimensions_cache_size: 100000
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: production
- name: signoz.collector.id
exporters:
# Export to SigNoz ClickHouse
@@ -350,6 +373,13 @@ otelCollector:
max_interval: 30s
max_elapsed_time: 300s
# ClickHouse exporter for meter data (usage metrics)
signozclickhousemeter:
dsn: "tcp://clickhouse:9000/?database=signoz_meter"
timeout: 45s
sending_queue:
enabled: false
clickhouselogsexporter:
dsn: tcp://clickhouse:9000/?database=signoz_logs
timeout: 10s
@@ -359,6 +389,13 @@ otelCollector:
max_interval: 30s
max_elapsed_time: 300s
# Metadata exporter for service metadata
metadataexporter:
dsn: "tcp://clickhouse:9000/?database=signoz_metadata"
timeout: 10s
cache:
provider: in_memory
# Debug exporter for debugging (replaces deprecated logging exporter)
debug:
verbosity: detailed
@@ -368,16 +405,25 @@ otelCollector:
service:
extensions: [health_check, zpages]
pipelines:
# Traces pipeline - exports to ClickHouse and signozmeter connector
traces:
receivers: [otlp]
processors: [memory_limiter, batch, spanmetrics, resourcedetection, resource]
exporters: [clickhousetraces]
processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection, resource]
exporters: [clickhousetraces, metadataexporter, signozmeter]
# Metrics pipeline
metrics:
receivers: [otlp, prometheus]
processors: [memory_limiter, batch, resourcedetection, resource]
exporters: [signozclickhousemetrics]
# Meter pipeline - receives from signozmeter connector
metrics/meter:
receivers: [signozmeter]
processors: [batch/meter]
exporters: [signozclickhousemeter]
# Logs pipeline
logs:
receivers: [otlp]
processors: [memory_limiter, batch, resourcedetection, resource]

View File

@@ -0,0 +1,177 @@
#!/bin/bash
# SigNoz Telemetry Verification Script
# This script verifies that services are correctly sending metrics, logs, and traces to SigNoz
# and that SigNoz is collecting them properly.
set -e
NAMESPACE="bakery-ia"
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE} SigNoz Telemetry Verification Script${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
# Step 1: Verify SigNoz Components are Running
echo -e "${BLUE}[1/7] Checking SigNoz Components Status...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
OTEL_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=otel-collector --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
SIGNOZ_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=signoz --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
CLICKHOUSE_POD=$(kubectl get pods -n $NAMESPACE -l clickhouse.altinity.com/chi=signoz-clickhouse --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [[ -n "$OTEL_POD" && -n "$SIGNOZ_POD" && -n "$CLICKHOUSE_POD" ]]; then
echo -e "${GREEN}✓ All SigNoz components are running${NC}"
echo " - OTel Collector: $OTEL_POD"
echo " - SigNoz Frontend: $SIGNOZ_POD"
echo " - ClickHouse: $CLICKHOUSE_POD"
else
echo -e "${RED}✗ Some SigNoz components are not running${NC}"
kubectl get pods -n $NAMESPACE | grep signoz
exit 1
fi
echo ""
# Step 2: Check OTel Collector Endpoints
echo -e "${BLUE}[2/7] Verifying OTel Collector Endpoints...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
OTEL_SVC=$(kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{.spec.clusterIP}')
echo "OTel Collector Service IP: $OTEL_SVC"
echo ""
echo "Available endpoints:"
kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{range .spec.ports[*]}{.name}{"\t"}{.port}{"\n"}{end}' | column -t
echo ""
echo -e "${GREEN}✓ OTel Collector endpoints are exposed${NC}"
echo ""
# Step 3: Check OTel Collector Logs for Data Reception
echo -e "${BLUE}[3/7] Checking OTel Collector for Recent Activity...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Recent OTel Collector logs (last 20 lines):"
kubectl logs -n $NAMESPACE $OTEL_POD --tail=20 | grep -E "received|exported|traces|metrics|logs" || echo "No recent telemetry data found in logs"
echo ""
# Step 4: Check Service Configurations
echo -e "${BLUE}[4/7] Verifying Service Telemetry Configuration...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Check ConfigMap for OTEL settings
OTEL_ENDPOINT=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.OTEL_EXPORTER_OTLP_ENDPOINT}')
ENABLE_TRACING=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_TRACING}')
ENABLE_METRICS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_METRICS}')
ENABLE_LOGS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_LOGS}')
echo "Configuration from bakery-config ConfigMap:"
echo " OTEL_EXPORTER_OTLP_ENDPOINT: $OTEL_ENDPOINT"
echo " ENABLE_TRACING: $ENABLE_TRACING"
echo " ENABLE_METRICS: $ENABLE_METRICS"
echo " ENABLE_LOGS: $ENABLE_LOGS"
echo ""
if [[ "$ENABLE_TRACING" == "true" && "$ENABLE_METRICS" == "true" && "$ENABLE_LOGS" == "true" ]]; then
echo -e "${GREEN}✓ Telemetry is enabled in configuration${NC}"
else
echo -e "${YELLOW}⚠ Some telemetry features may be disabled${NC}"
fi
echo ""
# Step 5: Test OTel Collector Health
echo -e "${BLUE}[5/7] Testing OTel Collector Health Endpoint...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
HEALTH_CHECK=$(kubectl exec -n $NAMESPACE $OTEL_POD -- wget -qO- http://localhost:13133/ 2>/dev/null || echo "FAILED")
if [[ "$HEALTH_CHECK" == *"Server available"* ]] || [[ "$HEALTH_CHECK" == "{}" ]]; then
echo -e "${GREEN}✓ OTel Collector health check passed${NC}"
else
echo -e "${RED}✗ OTel Collector health check failed${NC}"
echo "Response: $HEALTH_CHECK"
fi
echo ""
# Step 6: Query ClickHouse for Telemetry Data
echo -e "${BLUE}[6/7] Querying ClickHouse for Telemetry Data...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Get ClickHouse credentials
CH_PASSWORD=$(kubectl get secret -n $NAMESPACE signoz-clickhouse -o jsonpath='{.data.admin-password}' 2>/dev/null | base64 -d || echo "27ff0399-0d3a-4bd8-919d-17c2181e6fb9")
echo "Checking for traces in ClickHouse..."
TRACES_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_traces.signoz_index_v2 WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
echo " Traces in last hour: $TRACES_COUNT"
echo "Checking for metrics in ClickHouse..."
METRICS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_metrics.samples_v4 WHERE unix_milli >= toUnixTimestamp(now() - INTERVAL 1 HOUR) * 1000" 2>/dev/null || echo "0")
echo " Metrics in last hour: $METRICS_COUNT"
echo "Checking for logs in ClickHouse..."
LOGS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_logs.logs WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
echo " Logs in last hour: $LOGS_COUNT"
echo ""
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
echo -e "${GREEN}✓ Telemetry data found in ClickHouse!${NC}"
else
echo -e "${YELLOW}⚠ No telemetry data found in the last hour${NC}"
echo " This might be normal if:"
echo " - Services were just deployed"
echo " - No traffic has been generated yet"
echo " - Services haven't finished initializing"
fi
echo ""
# Step 7: Access Information
echo -e "${BLUE}[7/7] SigNoz UI Access Information${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
echo "SigNoz is accessible via ingress at:"
echo -e " ${GREEN}https://monitoring.bakery-ia.local${NC}"
echo ""
echo "Or via port-forward:"
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz 3301:8080${NC}"
echo " Then access: http://localhost:3301"
echo ""
echo "To view OTel Collector metrics:"
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 8888:8888${NC}"
echo " Then access: http://localhost:8888/metrics"
echo ""
# Summary
echo ""
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE} Verification Summary${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
echo "Component Status:"
echo " ✓ SigNoz components running"
echo " ✓ OTel Collector healthy"
echo " ✓ Configuration correct"
echo ""
echo "Data Collection (last hour):"
echo " Traces: $TRACES_COUNT"
echo " Metrics: $METRICS_COUNT"
echo " Logs: $LOGS_COUNT"
echo ""
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
echo -e "${GREEN}✓ SigNoz is collecting telemetry data successfully!${NC}"
else
echo -e "${YELLOW}⚠ To generate telemetry data, try:${NC}"
echo ""
echo "1. Generate traffic to your services:"
echo " curl http://localhost/api/health"
echo ""
echo "2. Check service logs for tracing initialization:"
echo " kubectl logs -n $NAMESPACE <service-pod> | grep -i 'tracing\\|otel\\|signoz'"
echo ""
echo "3. Wait a few minutes and run this script again"
fi
echo ""
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "ai-insights-service"
- name: ENABLE_TRACING

View File

@@ -100,7 +100,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "auth-service"
- name: ENABLE_TRACING

View File

@@ -64,7 +64,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "distribution-service"
- name: ENABLE_TRACING

View File

@@ -92,7 +92,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "external-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "forecasting-service"
- name: ENABLE_TRACING

View File

@@ -52,7 +52,10 @@ spec:
name: whatsapp-secrets
env:
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
resources:
requests:
memory: "256Mi"

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "inventory-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "notification-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "orchestrator-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "orders-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "pos-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "procurement-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "production-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "recipes-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "sales-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "suppliers-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "tenant-service"
- name: ENABLE_TRACING

View File

@@ -99,7 +99,10 @@ spec:
- name: OTEL_COLLECTOR_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
valueFrom:
configMapKeyRef:
name: bakery-config
key: OTEL_EXPORTER_OTLP_ENDPOINT
- name: OTEL_SERVICE_NAME
value: "training-service"
- name: ENABLE_TRACING

View File

@@ -385,7 +385,8 @@ data:
# OBSERVABILITY - SigNoz (Unified Monitoring)
# ================================================================
# OpenTelemetry Configuration - Direct to SigNoz
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
# IMPORTANT: gRPC endpoints should NOT include http:// prefix
OTEL_EXPORTER_OTLP_ENDPOINT: "signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
OTEL_SERVICE_NAME: "bakery-ia"
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=development"

View File

@@ -0,0 +1,60 @@
#!/bin/bash
# Fix OTEL endpoint configuration in all service manifests
# This script replaces hardcoded OTEL_EXPORTER_OTLP_ENDPOINT values
# with references to the central bakery-config ConfigMap
set -e
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'
echo -e "${BLUE}Fixing OTEL endpoint configuration in all services...${NC}"
echo ""
# Find all service YAML files
SERVICE_FILES=$(find infrastructure/kubernetes/base/components -name "*-service.yaml")
for file in $SERVICE_FILES; do
# Check if file contains hardcoded OTEL_EXPORTER_OTLP_ENDPOINT
if grep -q "name: OTEL_EXPORTER_OTLP_ENDPOINT" "$file"; then
# Check if it's already using configMapKeyRef
if grep -A 3 "name: OTEL_EXPORTER_OTLP_ENDPOINT" "$file" | grep -q "configMapKeyRef"; then
echo -e "${GREEN}$file already using ConfigMap${NC}"
else
echo -e "${BLUE}→ Fixing $file${NC}"
# Create a temporary file
tmp_file=$(mktemp)
# Process the file
awk '
/name: OTEL_EXPORTER_OTLP_ENDPOINT/ {
print $0
# Read and skip the next line (value line)
getline
# Output the configMapKeyRef instead
print " valueFrom:"
print " configMapKeyRef:"
print " name: bakery-config"
print " key: OTEL_EXPORTER_OTLP_ENDPOINT"
next
}
{ print }
' "$file" > "$tmp_file"
# Replace original file
mv "$tmp_file" "$file"
echo -e "${GREEN} ✓ Fixed${NC}"
fi
fi
done
echo ""
echo -e "${GREEN}✓ All service files processed!${NC}"
echo ""
echo "Next steps:"
echo "1. Review changes: git diff infrastructure/kubernetes/base/components"
echo "2. Apply changes: kubectl apply -k infrastructure/kubernetes/overlays/dev"
echo "3. Restart services: kubectl rollout restart deployment -n bakery-ia --all"

View File

@@ -23,7 +23,8 @@ data:
ENABLE_LOGS: "true"
# OpenTelemetry Configuration - Direct to SigNoz
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
# IMPORTANT: gRPC endpoints should NOT include http:// prefix
OTEL_EXPORTER_OTLP_ENDPOINT: "signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
OTEL_SERVICE_NAME: "bakery-ia"
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=production,cluster.name=bakery-ia-prod"