Imporve monitoring 3
This commit is contained in:
141
infrastructure/helm/generate-test-traffic.sh
Executable file
141
infrastructure/helm/generate-test-traffic.sh
Executable file
@@ -0,0 +1,141 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generate Test Traffic to Services
|
||||
# This script generates API calls to verify telemetry data collection
|
||||
|
||||
set -e
|
||||
|
||||
NAMESPACE="bakery-ia"
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo -e "${BLUE} Generating Test Traffic for SigNoz Verification${NC}"
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo ""
|
||||
|
||||
# Check if ingress is accessible
|
||||
echo -e "${BLUE}Step 1: Verifying Gateway Access${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
GATEWAY_POD=$(kubectl get pods -n $NAMESPACE -l app=gateway --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||||
if [[ -z "$GATEWAY_POD" ]]; then
|
||||
echo -e "${YELLOW}⚠ Gateway pod not running. Starting port-forward...${NC}"
|
||||
# Port forward in background
|
||||
kubectl port-forward -n $NAMESPACE svc/gateway-service 8000:8000 &
|
||||
PORT_FORWARD_PID=$!
|
||||
sleep 3
|
||||
API_URL="http://localhost:8000"
|
||||
else
|
||||
echo -e "${GREEN}✓ Gateway is running: $GATEWAY_POD${NC}"
|
||||
# Use internal service
|
||||
API_URL="http://gateway-service.$NAMESPACE.svc.cluster.local:8000"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Function to make API call from inside cluster
|
||||
make_request() {
|
||||
local endpoint=$1
|
||||
local description=$2
|
||||
|
||||
echo -e "${BLUE}→ Testing: $description${NC}"
|
||||
echo " Endpoint: $endpoint"
|
||||
|
||||
if [[ -n "$GATEWAY_POD" ]]; then
|
||||
# Make request from inside the gateway pod
|
||||
RESPONSE=$(kubectl exec -n $NAMESPACE $GATEWAY_POD -- curl -s -w "\nHTTP_CODE:%{http_code}" "$API_URL$endpoint" 2>/dev/null || echo "FAILED")
|
||||
else
|
||||
# Make request from localhost
|
||||
RESPONSE=$(curl -s -w "\nHTTP_CODE:%{http_code}" "$API_URL$endpoint" 2>/dev/null || echo "FAILED")
|
||||
fi
|
||||
|
||||
if [[ "$RESPONSE" == "FAILED" ]]; then
|
||||
echo -e " ${YELLOW}⚠ Request failed${NC}"
|
||||
else
|
||||
HTTP_CODE=$(echo "$RESPONSE" | grep "HTTP_CODE" | cut -d: -f2)
|
||||
if [[ "$HTTP_CODE" == "200" ]] || [[ "$HTTP_CODE" == "401" ]] || [[ "$HTTP_CODE" == "404" ]]; then
|
||||
echo -e " ${GREEN}✓ Response received (HTTP $HTTP_CODE)${NC}"
|
||||
else
|
||||
echo -e " ${YELLOW}⚠ Unexpected response (HTTP $HTTP_CODE)${NC}"
|
||||
fi
|
||||
fi
|
||||
echo ""
|
||||
sleep 1
|
||||
}
|
||||
|
||||
# Generate traffic to various endpoints
|
||||
echo -e "${BLUE}Step 2: Generating Traffic to Services${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
|
||||
# Health checks (should generate traces)
|
||||
make_request "/health" "Gateway Health Check"
|
||||
make_request "/api/health" "API Health Check"
|
||||
|
||||
# Auth service endpoints
|
||||
make_request "/api/auth/health" "Auth Service Health"
|
||||
|
||||
# Tenant service endpoints
|
||||
make_request "/api/tenants/health" "Tenant Service Health"
|
||||
|
||||
# Inventory service endpoints
|
||||
make_request "/api/inventory/health" "Inventory Service Health"
|
||||
|
||||
# Orders service endpoints
|
||||
make_request "/api/orders/health" "Orders Service Health"
|
||||
|
||||
# Forecasting service endpoints
|
||||
make_request "/api/forecasting/health" "Forecasting Service Health"
|
||||
|
||||
echo -e "${BLUE}Step 3: Checking Service Logs for Telemetry${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
|
||||
# Check a few service pods for tracing logs
|
||||
SERVICES=("auth-service" "inventory-service" "gateway")
|
||||
|
||||
for service in "${SERVICES[@]}"; do
|
||||
POD=$(kubectl get pods -n $NAMESPACE -l app=$service --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||||
if [[ -n "$POD" ]]; then
|
||||
echo -e "${BLUE}Checking $service ($POD)...${NC}"
|
||||
TRACING_LOG=$(kubectl logs -n $NAMESPACE $POD --tail=100 2>/dev/null | grep -i "tracing\|otel" | head -n 2 || echo "")
|
||||
if [[ -n "$TRACING_LOG" ]]; then
|
||||
echo -e "${GREEN}✓ Tracing configured:${NC}"
|
||||
echo "$TRACING_LOG" | sed 's/^/ /'
|
||||
else
|
||||
echo -e "${YELLOW}⚠ No tracing logs found${NC}"
|
||||
fi
|
||||
echo ""
|
||||
fi
|
||||
done
|
||||
|
||||
# Wait for data to be processed
|
||||
echo -e "${BLUE}Step 4: Waiting for Data Processing${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Waiting 30 seconds for telemetry data to be processed..."
|
||||
for i in {30..1}; do
|
||||
echo -ne "\r ${i} seconds remaining..."
|
||||
sleep 1
|
||||
done
|
||||
echo -e "\n"
|
||||
|
||||
# Cleanup port-forward if started
|
||||
if [[ -n "$PORT_FORWARD_PID" ]]; then
|
||||
kill $PORT_FORWARD_PID 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓ Test traffic generation complete!${NC}"
|
||||
echo ""
|
||||
echo -e "${BLUE}Next Steps:${NC}"
|
||||
echo "1. Run the verification script to check for collected data:"
|
||||
echo " ./infrastructure/helm/verify-signoz-telemetry.sh"
|
||||
echo ""
|
||||
echo "2. Access SigNoz UI to visualize the data:"
|
||||
echo " https://monitoring.bakery-ia.local"
|
||||
echo " or"
|
||||
echo " kubectl port-forward -n bakery-ia svc/signoz 3301:8080"
|
||||
echo " Then go to: http://localhost:3301"
|
||||
echo ""
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
@@ -181,6 +181,15 @@ otelCollector:
|
||||
|
||||
# OpenTelemetry Collector configuration
|
||||
config:
|
||||
# Connectors - bridge between pipelines
|
||||
connectors:
|
||||
signozmeter:
|
||||
dimensions:
|
||||
- name: service.name
|
||||
- name: deployment.environment
|
||||
- name: host.name
|
||||
metrics_flush_interval: 1h
|
||||
|
||||
receivers:
|
||||
# OTLP receivers for traces, metrics, and logs from applications
|
||||
# All application telemetry is pushed via OTLP protocol
|
||||
@@ -256,6 +265,12 @@ otelCollector:
|
||||
send_batch_size: 10000 # Increased from 1024 for better performance
|
||||
send_batch_max_size: 10000
|
||||
|
||||
# Batch processor for meter data
|
||||
batch/meter:
|
||||
timeout: 1s
|
||||
send_batch_size: 20000
|
||||
send_batch_max_size: 25000
|
||||
|
||||
# Memory limiter to prevent OOM
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
@@ -267,11 +282,19 @@ otelCollector:
|
||||
detectors: [env, system, docker]
|
||||
timeout: 5s
|
||||
|
||||
# Span metrics processor for automatic service metrics
|
||||
spanmetrics:
|
||||
# SigNoz span metrics processor with delta aggregation (recommended)
|
||||
# Generates RED metrics (Rate, Error, Duration) from trace spans
|
||||
signozspanmetrics/delta:
|
||||
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
|
||||
metrics_exporter: signozclickhousemetrics
|
||||
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
|
||||
dimensions_cache_size: 10000
|
||||
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
|
||||
dimensions_cache_size: 100000
|
||||
dimensions:
|
||||
- name: service.namespace
|
||||
default: default
|
||||
- name: deployment.environment
|
||||
default: default
|
||||
- name: signoz.collector.id
|
||||
|
||||
exporters:
|
||||
# ClickHouse exporter for traces
|
||||
@@ -294,6 +317,13 @@ otelCollector:
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
|
||||
# ClickHouse exporter for meter data (usage metrics)
|
||||
signozclickhousemeter:
|
||||
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_meter"
|
||||
timeout: 45s
|
||||
sending_queue:
|
||||
enabled: false
|
||||
|
||||
# ClickHouse exporter for logs
|
||||
clickhouselogsexporter:
|
||||
dsn: tcp://signoz-clickhouse:9000/?database=signoz_logs
|
||||
@@ -303,6 +333,13 @@ otelCollector:
|
||||
initial_interval: 5s
|
||||
max_interval: 30s
|
||||
|
||||
# Metadata exporter for service metadata
|
||||
metadataexporter:
|
||||
dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metadata"
|
||||
timeout: 10s
|
||||
cache:
|
||||
provider: in_memory
|
||||
|
||||
# Debug exporter for debugging (optional)
|
||||
debug:
|
||||
verbosity: detailed
|
||||
@@ -311,11 +348,11 @@ otelCollector:
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
# Traces pipeline
|
||||
# Traces pipeline - exports to ClickHouse and signozmeter connector
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch, spanmetrics, resourcedetection]
|
||||
exporters: [clickhousetraces]
|
||||
processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection]
|
||||
exporters: [clickhousetraces, metadataexporter, signozmeter]
|
||||
|
||||
# Metrics pipeline
|
||||
metrics:
|
||||
@@ -323,6 +360,12 @@ otelCollector:
|
||||
processors: [memory_limiter, batch, resourcedetection]
|
||||
exporters: [signozclickhousemetrics]
|
||||
|
||||
# Meter pipeline - receives from signozmeter connector
|
||||
metrics/meter:
|
||||
receivers: [signozmeter]
|
||||
processors: [batch/meter]
|
||||
exporters: [signozclickhousemeter]
|
||||
|
||||
# Logs pipeline
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
|
||||
@@ -269,6 +269,15 @@ otelCollector:
|
||||
|
||||
# Full OTEL Collector Configuration
|
||||
config:
|
||||
# Connectors - bridge between pipelines
|
||||
connectors:
|
||||
signozmeter:
|
||||
dimensions:
|
||||
- name: service.name
|
||||
- name: deployment.environment
|
||||
- name: host.name
|
||||
metrics_flush_interval: 1h
|
||||
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
@@ -304,6 +313,12 @@ otelCollector:
|
||||
send_batch_size: 50000 # Increased from 2048 (official recommendation for traces)
|
||||
send_batch_max_size: 50000
|
||||
|
||||
# Batch processor for meter data
|
||||
batch/meter:
|
||||
timeout: 1s
|
||||
send_batch_size: 20000
|
||||
send_batch_max_size: 25000
|
||||
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
limit_mib: 1500 # 75% of container memory (2Gi = ~2048Mi)
|
||||
@@ -324,11 +339,19 @@ otelCollector:
|
||||
value: bakery-ia-prod
|
||||
action: upsert
|
||||
|
||||
# Span metrics processor for automatic service performance metrics
|
||||
spanmetrics:
|
||||
# SigNoz span metrics processor with delta aggregation (recommended)
|
||||
# Generates RED metrics (Rate, Error, Duration) from trace spans
|
||||
signozspanmetrics/delta:
|
||||
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
|
||||
metrics_exporter: signozclickhousemetrics
|
||||
latency_histogram_buckets: [2ms, 4ms, 6ms, 8ms, 10ms, 50ms, 100ms, 200ms, 400ms, 800ms, 1s, 1400ms, 2s, 5s, 10s, 15s]
|
||||
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s]
|
||||
dimensions_cache_size: 100000
|
||||
dimensions:
|
||||
- name: service.namespace
|
||||
default: default
|
||||
- name: deployment.environment
|
||||
default: production
|
||||
- name: signoz.collector.id
|
||||
|
||||
exporters:
|
||||
# Export to SigNoz ClickHouse
|
||||
@@ -350,6 +373,13 @@ otelCollector:
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
|
||||
# ClickHouse exporter for meter data (usage metrics)
|
||||
signozclickhousemeter:
|
||||
dsn: "tcp://clickhouse:9000/?database=signoz_meter"
|
||||
timeout: 45s
|
||||
sending_queue:
|
||||
enabled: false
|
||||
|
||||
clickhouselogsexporter:
|
||||
dsn: tcp://clickhouse:9000/?database=signoz_logs
|
||||
timeout: 10s
|
||||
@@ -359,6 +389,13 @@ otelCollector:
|
||||
max_interval: 30s
|
||||
max_elapsed_time: 300s
|
||||
|
||||
# Metadata exporter for service metadata
|
||||
metadataexporter:
|
||||
dsn: "tcp://clickhouse:9000/?database=signoz_metadata"
|
||||
timeout: 10s
|
||||
cache:
|
||||
provider: in_memory
|
||||
|
||||
# Debug exporter for debugging (replaces deprecated logging exporter)
|
||||
debug:
|
||||
verbosity: detailed
|
||||
@@ -368,16 +405,25 @@ otelCollector:
|
||||
service:
|
||||
extensions: [health_check, zpages]
|
||||
pipelines:
|
||||
# Traces pipeline - exports to ClickHouse and signozmeter connector
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch, spanmetrics, resourcedetection, resource]
|
||||
exporters: [clickhousetraces]
|
||||
processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection, resource]
|
||||
exporters: [clickhousetraces, metadataexporter, signozmeter]
|
||||
|
||||
# Metrics pipeline
|
||||
metrics:
|
||||
receivers: [otlp, prometheus]
|
||||
processors: [memory_limiter, batch, resourcedetection, resource]
|
||||
exporters: [signozclickhousemetrics]
|
||||
|
||||
# Meter pipeline - receives from signozmeter connector
|
||||
metrics/meter:
|
||||
receivers: [signozmeter]
|
||||
processors: [batch/meter]
|
||||
exporters: [signozclickhousemeter]
|
||||
|
||||
# Logs pipeline
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch, resourcedetection, resource]
|
||||
|
||||
177
infrastructure/helm/verify-signoz-telemetry.sh
Executable file
177
infrastructure/helm/verify-signoz-telemetry.sh
Executable file
@@ -0,0 +1,177 @@
|
||||
#!/bin/bash
|
||||
|
||||
# SigNoz Telemetry Verification Script
|
||||
# This script verifies that services are correctly sending metrics, logs, and traces to SigNoz
|
||||
# and that SigNoz is collecting them properly.
|
||||
|
||||
set -e
|
||||
|
||||
NAMESPACE="bakery-ia"
|
||||
GREEN='\033[0;32m'
|
||||
RED='\033[0;31m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo -e "${BLUE} SigNoz Telemetry Verification Script${NC}"
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 1: Verify SigNoz Components are Running
|
||||
echo -e "${BLUE}[1/7] Checking SigNoz Components Status...${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
OTEL_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=otel-collector --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||||
SIGNOZ_POD=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/name=signoz,app.kubernetes.io/component=signoz --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||||
CLICKHOUSE_POD=$(kubectl get pods -n $NAMESPACE -l clickhouse.altinity.com/chi=signoz-clickhouse --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null)
|
||||
|
||||
if [[ -n "$OTEL_POD" && -n "$SIGNOZ_POD" && -n "$CLICKHOUSE_POD" ]]; then
|
||||
echo -e "${GREEN}✓ All SigNoz components are running${NC}"
|
||||
echo " - OTel Collector: $OTEL_POD"
|
||||
echo " - SigNoz Frontend: $SIGNOZ_POD"
|
||||
echo " - ClickHouse: $CLICKHOUSE_POD"
|
||||
else
|
||||
echo -e "${RED}✗ Some SigNoz components are not running${NC}"
|
||||
kubectl get pods -n $NAMESPACE | grep signoz
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Step 2: Check OTel Collector Endpoints
|
||||
echo -e "${BLUE}[2/7] Verifying OTel Collector Endpoints...${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
OTEL_SVC=$(kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{.spec.clusterIP}')
|
||||
echo "OTel Collector Service IP: $OTEL_SVC"
|
||||
echo ""
|
||||
echo "Available endpoints:"
|
||||
kubectl get svc -n $NAMESPACE signoz-otel-collector -o jsonpath='{range .spec.ports[*]}{.name}{"\t"}{.port}{"\n"}{end}' | column -t
|
||||
echo ""
|
||||
echo -e "${GREEN}✓ OTel Collector endpoints are exposed${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 3: Check OTel Collector Logs for Data Reception
|
||||
echo -e "${BLUE}[3/7] Checking OTel Collector for Recent Activity...${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
echo "Recent OTel Collector logs (last 20 lines):"
|
||||
kubectl logs -n $NAMESPACE $OTEL_POD --tail=20 | grep -E "received|exported|traces|metrics|logs" || echo "No recent telemetry data found in logs"
|
||||
echo ""
|
||||
|
||||
# Step 4: Check Service Configurations
|
||||
echo -e "${BLUE}[4/7] Verifying Service Telemetry Configuration...${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
# Check ConfigMap for OTEL settings
|
||||
OTEL_ENDPOINT=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.OTEL_EXPORTER_OTLP_ENDPOINT}')
|
||||
ENABLE_TRACING=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_TRACING}')
|
||||
ENABLE_METRICS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_METRICS}')
|
||||
ENABLE_LOGS=$(kubectl get configmap bakery-config -n $NAMESPACE -o jsonpath='{.data.ENABLE_LOGS}')
|
||||
|
||||
echo "Configuration from bakery-config ConfigMap:"
|
||||
echo " OTEL_EXPORTER_OTLP_ENDPOINT: $OTEL_ENDPOINT"
|
||||
echo " ENABLE_TRACING: $ENABLE_TRACING"
|
||||
echo " ENABLE_METRICS: $ENABLE_METRICS"
|
||||
echo " ENABLE_LOGS: $ENABLE_LOGS"
|
||||
echo ""
|
||||
|
||||
if [[ "$ENABLE_TRACING" == "true" && "$ENABLE_METRICS" == "true" && "$ENABLE_LOGS" == "true" ]]; then
|
||||
echo -e "${GREEN}✓ Telemetry is enabled in configuration${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}⚠ Some telemetry features may be disabled${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Step 5: Test OTel Collector Health
|
||||
echo -e "${BLUE}[5/7] Testing OTel Collector Health Endpoint...${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
HEALTH_CHECK=$(kubectl exec -n $NAMESPACE $OTEL_POD -- wget -qO- http://localhost:13133/ 2>/dev/null || echo "FAILED")
|
||||
if [[ "$HEALTH_CHECK" == *"Server available"* ]] || [[ "$HEALTH_CHECK" == "{}" ]]; then
|
||||
echo -e "${GREEN}✓ OTel Collector health check passed${NC}"
|
||||
else
|
||||
echo -e "${RED}✗ OTel Collector health check failed${NC}"
|
||||
echo "Response: $HEALTH_CHECK"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Step 6: Query ClickHouse for Telemetry Data
|
||||
echo -e "${BLUE}[6/7] Querying ClickHouse for Telemetry Data...${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
|
||||
# Get ClickHouse credentials
|
||||
CH_PASSWORD=$(kubectl get secret -n $NAMESPACE signoz-clickhouse -o jsonpath='{.data.admin-password}' 2>/dev/null | base64 -d || echo "27ff0399-0d3a-4bd8-919d-17c2181e6fb9")
|
||||
|
||||
echo "Checking for traces in ClickHouse..."
|
||||
TRACES_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_traces.signoz_index_v2 WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
|
||||
echo " Traces in last hour: $TRACES_COUNT"
|
||||
|
||||
echo "Checking for metrics in ClickHouse..."
|
||||
METRICS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_metrics.samples_v4 WHERE unix_milli >= toUnixTimestamp(now() - INTERVAL 1 HOUR) * 1000" 2>/dev/null || echo "0")
|
||||
echo " Metrics in last hour: $METRICS_COUNT"
|
||||
|
||||
echo "Checking for logs in ClickHouse..."
|
||||
LOGS_COUNT=$(kubectl exec -n $NAMESPACE $CLICKHOUSE_POD -- clickhouse-client --user=admin --password=$CH_PASSWORD --query="SELECT count() FROM signoz_logs.logs WHERE timestamp >= now() - INTERVAL 1 HOUR" 2>/dev/null || echo "0")
|
||||
echo " Logs in last hour: $LOGS_COUNT"
|
||||
echo ""
|
||||
|
||||
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
|
||||
echo -e "${GREEN}✓ Telemetry data found in ClickHouse!${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}⚠ No telemetry data found in the last hour${NC}"
|
||||
echo " This might be normal if:"
|
||||
echo " - Services were just deployed"
|
||||
echo " - No traffic has been generated yet"
|
||||
echo " - Services haven't finished initializing"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Step 7: Access Information
|
||||
echo -e "${BLUE}[7/7] SigNoz UI Access Information${NC}"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
echo "SigNoz is accessible via ingress at:"
|
||||
echo -e " ${GREEN}https://monitoring.bakery-ia.local${NC}"
|
||||
echo ""
|
||||
echo "Or via port-forward:"
|
||||
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz 3301:8080${NC}"
|
||||
echo " Then access: http://localhost:3301"
|
||||
echo ""
|
||||
echo "To view OTel Collector metrics:"
|
||||
echo -e " ${YELLOW}kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 8888:8888${NC}"
|
||||
echo " Then access: http://localhost:8888/metrics"
|
||||
echo ""
|
||||
|
||||
# Summary
|
||||
echo ""
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo -e "${BLUE} Verification Summary${NC}"
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
echo ""
|
||||
echo "Component Status:"
|
||||
echo " ✓ SigNoz components running"
|
||||
echo " ✓ OTel Collector healthy"
|
||||
echo " ✓ Configuration correct"
|
||||
echo ""
|
||||
echo "Data Collection (last hour):"
|
||||
echo " Traces: $TRACES_COUNT"
|
||||
echo " Metrics: $METRICS_COUNT"
|
||||
echo " Logs: $LOGS_COUNT"
|
||||
echo ""
|
||||
|
||||
if [[ "$TRACES_COUNT" -gt "0" || "$METRICS_COUNT" -gt "0" || "$LOGS_COUNT" -gt "0" ]]; then
|
||||
echo -e "${GREEN}✓ SigNoz is collecting telemetry data successfully!${NC}"
|
||||
else
|
||||
echo -e "${YELLOW}⚠ To generate telemetry data, try:${NC}"
|
||||
echo ""
|
||||
echo "1. Generate traffic to your services:"
|
||||
echo " curl http://localhost/api/health"
|
||||
echo ""
|
||||
echo "2. Check service logs for tracing initialization:"
|
||||
echo " kubectl logs -n $NAMESPACE <service-pod> | grep -i 'tracing\\|otel\\|signoz'"
|
||||
echo ""
|
||||
echo "3. Wait a few minutes and run this script again"
|
||||
fi
|
||||
echo ""
|
||||
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "ai-insights-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -100,7 +100,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "auth-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -64,7 +64,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "distribution-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -92,7 +92,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "external-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "forecasting-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -52,7 +52,10 @@ spec:
|
||||
name: whatsapp-secrets
|
||||
env:
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "inventory-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "notification-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "orchestrator-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "orders-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "pos-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "procurement-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "production-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "recipes-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "sales-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "suppliers-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "tenant-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -99,7 +99,10 @@ spec:
|
||||
- name: OTEL_COLLECTOR_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: bakery-config
|
||||
key: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: "training-service"
|
||||
- name: ENABLE_TRACING
|
||||
|
||||
@@ -385,7 +385,8 @@ data:
|
||||
# OBSERVABILITY - SigNoz (Unified Monitoring)
|
||||
# ================================================================
|
||||
# OpenTelemetry Configuration - Direct to SigNoz
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||
# IMPORTANT: gRPC endpoints should NOT include http:// prefix
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
|
||||
OTEL_SERVICE_NAME: "bakery-ia"
|
||||
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=development"
|
||||
|
||||
60
infrastructure/kubernetes/fix-otel-endpoints.sh
Executable file
60
infrastructure/kubernetes/fix-otel-endpoints.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Fix OTEL endpoint configuration in all service manifests
|
||||
# This script replaces hardcoded OTEL_EXPORTER_OTLP_ENDPOINT values
|
||||
# with references to the central bakery-config ConfigMap
|
||||
|
||||
set -e
|
||||
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo -e "${BLUE}Fixing OTEL endpoint configuration in all services...${NC}"
|
||||
echo ""
|
||||
|
||||
# Find all service YAML files
|
||||
SERVICE_FILES=$(find infrastructure/kubernetes/base/components -name "*-service.yaml")
|
||||
|
||||
for file in $SERVICE_FILES; do
|
||||
# Check if file contains hardcoded OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
if grep -q "name: OTEL_EXPORTER_OTLP_ENDPOINT" "$file"; then
|
||||
# Check if it's already using configMapKeyRef
|
||||
if grep -A 3 "name: OTEL_EXPORTER_OTLP_ENDPOINT" "$file" | grep -q "configMapKeyRef"; then
|
||||
echo -e "${GREEN}✓ $file already using ConfigMap${NC}"
|
||||
else
|
||||
echo -e "${BLUE}→ Fixing $file${NC}"
|
||||
|
||||
# Create a temporary file
|
||||
tmp_file=$(mktemp)
|
||||
|
||||
# Process the file
|
||||
awk '
|
||||
/name: OTEL_EXPORTER_OTLP_ENDPOINT/ {
|
||||
print $0
|
||||
# Read and skip the next line (value line)
|
||||
getline
|
||||
# Output the configMapKeyRef instead
|
||||
print " valueFrom:"
|
||||
print " configMapKeyRef:"
|
||||
print " name: bakery-config"
|
||||
print " key: OTEL_EXPORTER_OTLP_ENDPOINT"
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$file" > "$tmp_file"
|
||||
|
||||
# Replace original file
|
||||
mv "$tmp_file" "$file"
|
||||
echo -e "${GREEN} ✓ Fixed${NC}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}✓ All service files processed!${NC}"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo "1. Review changes: git diff infrastructure/kubernetes/base/components"
|
||||
echo "2. Apply changes: kubectl apply -k infrastructure/kubernetes/overlays/dev"
|
||||
echo "3. Restart services: kubectl rollout restart deployment -n bakery-ia --all"
|
||||
@@ -23,7 +23,8 @@ data:
|
||||
ENABLE_LOGS: "true"
|
||||
|
||||
# OpenTelemetry Configuration - Direct to SigNoz
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||
# IMPORTANT: gRPC endpoints should NOT include http:// prefix
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "signoz-otel-collector.bakery-ia.svc.cluster.local:4317"
|
||||
OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
|
||||
OTEL_SERVICE_NAME: "bakery-ia"
|
||||
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=production,cluster.name=bakery-ia-prod"
|
||||
|
||||
Reference in New Issue
Block a user