Files
bakery-ia/infrastructure/helm/verify-signoz-telemetry.sh
2026-01-09 11:18:20 +01:00

178 lines
9.3 KiB
Bash
Executable File

#!/bin/bash
# SigNoz Telemetry Verification Script
# This script verifies that services are correctly sending metrics, logs, and traces to SigNoz
# and that SigNoz is collecting them properly.
set -e
NAMESPACE="bakery-ia"
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE} SigNoz Telemetry Verification Script${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
# Step 1: Verify SigNoz Components are Running
echo -e "${BLUE}[1/7] Checking SigNoz Components Status...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
OTEL_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=otel-collector --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
SIGNOZ_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=signoz --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
CLICKHOUSE_POD=$(kubectl get pods -n $NAMESPACE -l clickhouse.altinity.com/chi=signoz-clickhouse --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
if [[ -n "$OTEL_POD" && -n "$SIGNOZ_POD" && -n "$CLICKHOUSE_POD" ]]; then
echo -e "${GREEN}✓ All SigNoz components are running${NC}"
echo " - OTel Collector: $OTEL_POD"
echo " - SigNoz Frontend: $SIGNOZ_POD"
echo " - ClickHouse: $CLICKHOUSE_POD"
else
echo -e "${RED}✗ Some SigNoz components are not running${NC}"
kubectl get pods -n $NAMESPACE | grep signoz
exit 1
fi
echo ""
# Step 2: Check OTel Collector Endpoints
echo -e "${BLUE}[2/7] Verifying OTel Collector Endpoints...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
OTEL_SVC=$(kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{.spec.clusterIP}')
echo "OTel Collector Service IP: $OTEL_SVC"
echo ""
echo "Available endpoints:"
kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{range .spec.ports[*]}{.name}{"\t"}{.port}{"\n"}{end}' | column -t
echo ""
echo -e "${GREEN}✓ OTel Collector endpoints are exposed${NC}"
echo ""
# Step 3: Check OTel Collector Logs for Data Reception
echo -e "${BLUE}[3/7] Checking OTel Collector for Recent Activity...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Recent OTel Collector logs (last 20 lines):"
kubectl logs -n $NAMESPACE $OTEL_POD --tail=20 | grep -E "received|exported|traces|metrics|logs" || echo "No recent telemetry data found in logs"
echo ""
# Step 4: Check Service Configurations
echo -e "${BLUE}[4/7] Verifying Service Telemetry Configuration...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Check ConfigMap for OTEL settings
OTEL_ENDPOINT=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.OTEL_EXPORTER_OTLP_ENDPOINT}')
ENABLE_TRACING=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_TRACING}')
ENABLE_METRICS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_METRICS}')
ENABLE_LOGS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_LOGS}')
echo "Configuration from bakery-config ConfigMap:"
echo " OTEL_EXPORTER_OTLP_ENDPOINT: $OTEL_ENDPOINT"
echo " ENABLE_TRACING: $ENABLE_TRACING"
echo " ENABLE_METRICS: $ENABLE_METRICS"
echo " ENABLE_LOGS: $ENABLE_LOGS"
echo ""
if [[ "$ENABLE_TRACING" == "true" && "$ENABLE_METRICS" == "true" && "$ENABLE_LOGS" == "true" ]]; then
echo -e "${GREEN}✓ Telemetry is enabled in configuration${NC}"
else
echo -e "${YELLOW}⚠ Some telemetry features may be disabled${NC}"
fi
echo ""
# Step 5: Test OTel Collector Health
echo -e "${BLUE}[5/7] Testing OTel Collector Health Endpoint...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
HEALTH_CHECK=$(kubectl exec -n $NAMESPACE $OTEL_POD -- wget -qO- http://localhost:13133/ 2>/dev/null || echo "FAILED")
if [[ "$HEALTH_CHECK" == *"Server available"* ]] || [[ "$HEALTH_CHECK" == "{}" ]]; then
echo -e "${GREEN}✓ OTel Collector health check passed${NC}"
else
echo -e "${RED}✗ OTel Collector health check failed${NC}"
echo "Response: $HEALTH_CHECK"
fi
echo ""
# Step 6: Query ClickHouse for Telemetry Data
echo -e "${BLUE}[6/7] Querying ClickHouse for Telemetry Data...${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Get ClickHouse credentials
CH_PASSWORD=$(kubectl get secret -n $NAMESPACE signoz-clickhouse -o jsonpath='{.data.admin-password}' 2>/dev/null | base64 -d || echo "27ff0399-0d3a-4bd8-919d-17c2181e6fb9")
echo "Checking for traces in ClickHouse..."
TRACES_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_traces.signoz_index_v2 WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
echo " Traces in last hour: $TRACES_COUNT"
echo "Checking for metrics in ClickHouse..."
METRICS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_metrics.samples_v4 WHERE unix_milli >= toUnixTimestamp(now() - INTERVAL 1 HOUR) * 1000" 2>/dev/null || echo "0")
echo " Metrics in last hour: $METRICS_COUNT"
echo "Checking for logs in ClickHouse..."
LOGS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_logs.logs WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
echo " Logs in last hour: $LOGS_COUNT"
echo ""
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
echo -e "${GREEN}✓ Telemetry data found in ClickHouse!${NC}"
else
echo -e "${YELLOW}⚠ No telemetry data found in the last hour${NC}"
echo " This might be normal if:"
echo " - Services were just deployed"
echo " - No traffic has been generated yet"
echo " - Services haven't finished initializing"
fi
echo ""
# Step 7: Access Information
echo -e "${BLUE}[7/7] SigNoz UI Access Information${NC}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
echo "SigNoz is accessible via ingress at:"
echo -e " ${GREEN}https://monitoring.bakery-ia.local${NC}"
echo ""
echo "Or via port-forward:"
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz 3301:8080${NC}"
echo " Then access: http://localhost:3301"
echo ""
echo "To view OTel Collector metrics:"
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 8888:8888${NC}"
echo " Then access: http://localhost:8888/metrics"
echo ""
# Summary
echo ""
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE} Verification Summary${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
echo "Component Status:"
echo " ✓ SigNoz components running"
echo " ✓ OTel Collector healthy"
echo " ✓ Configuration correct"
echo ""
echo "Data Collection (last hour):"
echo " Traces: $TRACES_COUNT"
echo " Metrics: $METRICS_COUNT"
echo " Logs: $LOGS_COUNT"
echo ""
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
echo -e "${GREEN}✓ SigNoz is collecting telemetry data successfully!${NC}"
else
echo -e "${YELLOW}⚠ To generate telemetry data, try:${NC}"
echo ""
echo "1. Generate traffic to your services:"
echo " curl http://localhost/api/health"
echo ""
echo "2. Check service logs for tracing initialization:"
echo " kubectl logs -n $NAMESPACE <service-pod> | grep -i 'tracing\\|otel\\|signoz'"
echo ""
echo "3. Wait a few minutes and run this script again"
fi
echo ""
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"