178 lines
9.3 KiB
Bash
178 lines
9.3 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
|
||
|
|
# SigNoz Telemetry Verification Script
|
||
|
|
# This script verifies that services are correctly sending metrics, logs, and traces to SigNoz
|
||
|
|
# and that SigNoz is collecting them properly.
|
||
|
|
|
||
|
|
set -e
|
||
|
|
|
||
|
|
NAMESPACE="bakery-ia"
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
RED='\033[0;31m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
BLUE='\033[0;34m'
|
||
|
|
NC='\033[0m' # No Color
|
||
|
|
|
||
|
|
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||
|
|
echo -e "${BLUE} SigNoz Telemetry Verification Script${NC}"
|
||
|
|
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 1: Verify SigNoz Components are Running
|
||
|
|
echo -e "${BLUE}[1/7] Checking SigNoz Components Status...${NC}"
|
||
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||
|
|
|
||
|
|
OTEL_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=otel-collector --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||
|
|
SIGNOZ_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=signoz --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||
|
|
CLICKHOUSE_POD=$(kubectl get pods -n $NAMESPACE -l clickhouse.altinity.com/chi=signoz-clickhouse --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||
|
|
|
||
|
|
if [[ -n "$OTEL_POD" && -n "$SIGNOZ_POD" && -n "$CLICKHOUSE_POD" ]]; then
|
||
|
|
echo -e "${GREEN}✓ All SigNoz components are running${NC}"
|
||
|
|
echo " - OTel Collector: $OTEL_POD"
|
||
|
|
echo " - SigNoz Frontend: $SIGNOZ_POD"
|
||
|
|
echo " - ClickHouse: $CLICKHOUSE_POD"
|
||
|
|
else
|
||
|
|
echo -e "${RED}✗ Some SigNoz components are not running${NC}"
|
||
|
|
kubectl get pods -n $NAMESPACE | grep signoz
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 2: Check OTel Collector Endpoints
|
||
|
|
echo -e "${BLUE}[2/7] Verifying OTel Collector Endpoints...${NC}"
|
||
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||
|
|
|
||
|
|
OTEL_SVC=$(kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{.spec.clusterIP}')
|
||
|
|
echo "OTel Collector Service IP: $OTEL_SVC"
|
||
|
|
echo ""
|
||
|
|
echo "Available endpoints:"
|
||
|
|
kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{range .spec.ports[*]}{.name}{"\t"}{.port}{"\n"}{end}' | column -t
|
||
|
|
echo ""
|
||
|
|
echo -e "${GREEN}✓ OTel Collector endpoints are exposed${NC}"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 3: Check OTel Collector Logs for Data Reception
|
||
|
|
echo -e "${BLUE}[3/7] Checking OTel Collector for Recent Activity...${NC}"
|
||
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||
|
|
|
||
|
|
echo "Recent OTel Collector logs (last 20 lines):"
|
||
|
|
kubectl logs -n $NAMESPACE $OTEL_POD --tail=20 | grep -E "received|exported|traces|metrics|logs" || echo "No recent telemetry data found in logs"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 4: Check Service Configurations
|
||
|
|
echo -e "${BLUE}[4/7] Verifying Service Telemetry Configuration...${NC}"
|
||
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||
|
|
|
||
|
|
# Check ConfigMap for OTEL settings
|
||
|
|
OTEL_ENDPOINT=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.OTEL_EXPORTER_OTLP_ENDPOINT}')
|
||
|
|
ENABLE_TRACING=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_TRACING}')
|
||
|
|
ENABLE_METRICS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_METRICS}')
|
||
|
|
ENABLE_LOGS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_LOGS}')
|
||
|
|
|
||
|
|
echo "Configuration from bakery-config ConfigMap:"
|
||
|
|
echo " OTEL_EXPORTER_OTLP_ENDPOINT: $OTEL_ENDPOINT"
|
||
|
|
echo " ENABLE_TRACING: $ENABLE_TRACING"
|
||
|
|
echo " ENABLE_METRICS: $ENABLE_METRICS"
|
||
|
|
echo " ENABLE_LOGS: $ENABLE_LOGS"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
if [[ "$ENABLE_TRACING" == "true" && "$ENABLE_METRICS" == "true" && "$ENABLE_LOGS" == "true" ]]; then
|
||
|
|
echo -e "${GREEN}✓ Telemetry is enabled in configuration${NC}"
|
||
|
|
else
|
||
|
|
echo -e "${YELLOW}⚠ Some telemetry features may be disabled${NC}"
|
||
|
|
fi
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 5: Test OTel Collector Health
|
||
|
|
echo -e "${BLUE}[5/7] Testing OTel Collector Health Endpoint...${NC}"
|
||
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||
|
|
|
||
|
|
HEALTH_CHECK=$(kubectl exec -n $NAMESPACE $OTEL_POD -- wget -qO- http://localhost:13133/ 2>/dev/null || echo "FAILED")
|
||
|
|
if [[ "$HEALTH_CHECK" == *"Server available"* ]] || [[ "$HEALTH_CHECK" == "{}" ]]; then
|
||
|
|
echo -e "${GREEN}✓ OTel Collector health check passed${NC}"
|
||
|
|
else
|
||
|
|
echo -e "${RED}✗ OTel Collector health check failed${NC}"
|
||
|
|
echo "Response: $HEALTH_CHECK"
|
||
|
|
fi
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 6: Query ClickHouse for Telemetry Data
|
||
|
|
echo -e "${BLUE}[6/7] Querying ClickHouse for Telemetry Data...${NC}"
|
||
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||
|
|
|
||
|
|
# Get ClickHouse credentials
|
||
|
|
CH_PASSWORD=$(kubectl get secret -n $NAMESPACE signoz-clickhouse -o jsonpath='{.data.admin-password}' 2>/dev/null | base64 -d || echo "27ff0399-0d3a-4bd8-919d-17c2181e6fb9")
|
||
|
|
|
||
|
|
echo "Checking for traces in ClickHouse..."
|
||
|
|
TRACES_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_traces.signoz_index_v2 WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
|
||
|
|
echo " Traces in last hour: $TRACES_COUNT"
|
||
|
|
|
||
|
|
echo "Checking for metrics in ClickHouse..."
|
||
|
|
METRICS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_metrics.samples_v4 WHERE unix_milli >= toUnixTimestamp(now() - INTERVAL 1 HOUR) * 1000" 2>/dev/null || echo "0")
|
||
|
|
echo " Metrics in last hour: $METRICS_COUNT"
|
||
|
|
|
||
|
|
echo "Checking for logs in ClickHouse..."
|
||
|
|
LOGS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_logs.logs WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
|
||
|
|
echo " Logs in last hour: $LOGS_COUNT"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
|
||
|
|
echo -e "${GREEN}✓ Telemetry data found in ClickHouse!${NC}"
|
||
|
|
else
|
||
|
|
echo -e "${YELLOW}⚠ No telemetry data found in the last hour${NC}"
|
||
|
|
echo " This might be normal if:"
|
||
|
|
echo " - Services were just deployed"
|
||
|
|
echo " - No traffic has been generated yet"
|
||
|
|
echo " - Services haven't finished initializing"
|
||
|
|
fi
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Step 7: Access Information
|
||
|
|
echo -e "${BLUE}[7/7] SigNoz UI Access Information${NC}"
|
||
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||
|
|
echo ""
|
||
|
|
echo "SigNoz is accessible via ingress at:"
|
||
|
|
echo -e " ${GREEN}https://monitoring.bakery-ia.local${NC}"
|
||
|
|
echo ""
|
||
|
|
echo "Or via port-forward:"
|
||
|
|
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz 3301:8080${NC}"
|
||
|
|
echo " Then access: http://localhost:3301"
|
||
|
|
echo ""
|
||
|
|
echo "To view OTel Collector metrics:"
|
||
|
|
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 8888:8888${NC}"
|
||
|
|
echo " Then access: http://localhost:8888/metrics"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# Summary
|
||
|
|
echo ""
|
||
|
|
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||
|
|
echo -e "${BLUE} Verification Summary${NC}"
|
||
|
|
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||
|
|
echo ""
|
||
|
|
echo "Component Status:"
|
||
|
|
echo " ✓ SigNoz components running"
|
||
|
|
echo " ✓ OTel Collector healthy"
|
||
|
|
echo " ✓ Configuration correct"
|
||
|
|
echo ""
|
||
|
|
echo "Data Collection (last hour):"
|
||
|
|
echo " Traces: $TRACES_COUNT"
|
||
|
|
echo " Metrics: $METRICS_COUNT"
|
||
|
|
echo " Logs: $LOGS_COUNT"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
|
||
|
|
echo -e "${GREEN}✓ SigNoz is collecting telemetry data successfully!${NC}"
|
||
|
|
else
|
||
|
|
echo -e "${YELLOW}⚠ To generate telemetry data, try:${NC}"
|
||
|
|
echo ""
|
||
|
|
echo "1. Generate traffic to your services:"
|
||
|
|
echo " curl http://localhost/api/health"
|
||
|
|
echo ""
|
||
|
|
echo "2. Check service logs for tracing initialization:"
|
||
|
|
echo " kubectl logs -n $NAMESPACE <service-pod> | grep -i 'tracing\\|otel\\|signoz'"
|
||
|
|
echo ""
|
||
|
|
echo "3. Wait a few minutes and run this script again"
|
||
|
|
fi
|
||
|
|
echo ""
|
||
|
|
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|