Imporve monitoring 2
This commit is contained in:
@@ -26,7 +26,7 @@ show_help() {
|
||||
echo ""
|
||||
echo "Options:
|
||||
-h, --help Show this help message
|
||||
-n, --namespace NAMESPACE Specify namespace (default: signoz)"
|
||||
-n, --namespace NAMESPACE Specify namespace (default: bakery-ia)"
|
||||
echo ""
|
||||
echo "Examples:
|
||||
$0 dev # Verify development deployment
|
||||
@@ -35,7 +35,7 @@ show_help() {
|
||||
}
|
||||
|
||||
# Parse command line arguments
|
||||
NAMESPACE="signoz"
|
||||
NAMESPACE="bakery-ia"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
@@ -224,20 +224,28 @@ verify_deployment() {
|
||||
# Function for development-specific verification
|
||||
verify_dev_specific() {
|
||||
echo "${BLUE}8. Development-specific checks...${NC}"
|
||||
|
||||
# Check if localhost ingress is configured
|
||||
if kubectl get ingress -n "$NAMESPACE" | grep -q "localhost"; then
|
||||
echo "${GREEN}✅ Localhost ingress configured${NC}"
|
||||
|
||||
# Check if ingress is configured
|
||||
if kubectl get ingress -n "$NAMESPACE" 2>/dev/null | grep -q "monitoring.bakery-ia.local"; then
|
||||
echo "${GREEN}✅ Development ingress configured${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Localhost ingress not found${NC}"
|
||||
echo "${YELLOW}⚠️ Development ingress not found${NC}"
|
||||
fi
|
||||
|
||||
# Check resource limits (should be lower for dev)
|
||||
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||
if [[ -n "$query_service" && "$query_service" == "512Mi" ]]; then
|
||||
echo "${GREEN}✅ Development resource limits applied${NC}"
|
||||
|
||||
# Check unified signoz component resource limits (should be lower for dev)
|
||||
local signoz_mem=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||
if [[ -n "$signoz_mem" ]]; then
|
||||
echo "${GREEN}✅ SigNoz component found (memory limit: $signoz_mem)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Resource limits may not be optimized for development${NC}"
|
||||
echo "${YELLOW}⚠️ Could not verify SigNoz component resources${NC}"
|
||||
fi
|
||||
|
||||
# Check single replica setup for dev
|
||||
local replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "0")
|
||||
if [[ $replicas -eq 1 ]]; then
|
||||
echo "${GREEN}✅ Single replica configuration (appropriate for dev)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Multiple replicas detected (replicas: $replicas)${NC}"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
@@ -245,28 +253,54 @@ verify_dev_specific() {
|
||||
# Function for production-specific verification
|
||||
verify_prod_specific() {
|
||||
echo "${BLUE}8. Production-specific checks...${NC}"
|
||||
|
||||
|
||||
# Check if TLS is configured
|
||||
if kubectl get ingress -n "$NAMESPACE" | grep -q "signoz-tls-cert"; then
|
||||
if kubectl get ingress -n "$NAMESPACE" 2>/dev/null | grep -q "signoz-tls"; then
|
||||
echo "${GREEN}✅ TLS certificate configured${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ TLS certificate not found${NC}"
|
||||
fi
|
||||
|
||||
# Check if multiple replicas are running
|
||||
local query_replicas=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1")
|
||||
if [[ $query_replicas -gt 1 ]]; then
|
||||
echo "${GREEN}✅ High availability configured ($query_replicas replicas)${NC}"
|
||||
|
||||
# Check if multiple replicas are running for HA
|
||||
local signoz_replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "1")
|
||||
if [[ $signoz_replicas -gt 1 ]]; then
|
||||
echo "${GREEN}✅ High availability configured ($signoz_replicas SigNoz replicas)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Single replica detected (not highly available)${NC}"
|
||||
echo "${YELLOW}⚠️ Single SigNoz replica detected (not highly available)${NC}"
|
||||
fi
|
||||
|
||||
# Check resource limits (should be higher for prod)
|
||||
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||
if [[ -n "$query_service" && "$query_service" == "2Gi" ]]; then
|
||||
echo "${GREEN}✅ Production resource limits applied${NC}"
|
||||
|
||||
# Check Zookeeper replicas (critical for production)
|
||||
local zk_replicas=$(kubectl get statefulset -n "$NAMESPACE" -l app.kubernetes.io/component=zookeeper -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "0")
|
||||
if [[ $zk_replicas -eq 3 ]]; then
|
||||
echo "${GREEN}✅ Zookeeper properly configured with 3 replicas${NC}"
|
||||
elif [[ $zk_replicas -gt 0 ]]; then
|
||||
echo "${YELLOW}⚠️ Zookeeper has $zk_replicas replicas (recommend 3 for production)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Resource limits may not be optimized for production${NC}"
|
||||
echo "${RED}❌ Zookeeper not found${NC}"
|
||||
fi
|
||||
|
||||
# Check OTel Collector replicas
|
||||
local otel_replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=otel-collector -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "1")
|
||||
if [[ $otel_replicas -gt 1 ]]; then
|
||||
echo "${GREEN}✅ OTel Collector HA configured ($otel_replicas replicas)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Single OTel Collector replica${NC}"
|
||||
fi
|
||||
|
||||
# Check resource limits (should be higher for prod)
|
||||
local signoz_mem=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||
if [[ -n "$signoz_mem" ]]; then
|
||||
echo "${GREEN}✅ Production resource limits applied (memory: $signoz_mem)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Could not verify resource limits${NC}"
|
||||
fi
|
||||
|
||||
# Check HPA (Horizontal Pod Autoscaler)
|
||||
local hpa_count=$(kubectl get hpa -n "$NAMESPACE" 2>/dev/null | grep -c signoz || echo "0")
|
||||
if [[ $hpa_count -gt 0 ]]; then
|
||||
echo "${GREEN}✅ Horizontal Pod Autoscaler configured${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ No HPA found (consider enabling for production)${NC}"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
@@ -278,39 +312,50 @@ show_access_info() {
|
||||
echo "📋 Access Information"
|
||||
echo "=========================================="
|
||||
echo "${NC}"
|
||||
|
||||
|
||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||
echo "SigNoz UI: https://localhost/signoz"
|
||||
echo "SigNoz API: https://localhost/signoz-api"
|
||||
echo "SigNoz UI: http://monitoring.bakery-ia.local"
|
||||
echo ""
|
||||
echo "OpenTelemetry Collector:"
|
||||
echo " gRPC: localhost:4317"
|
||||
echo " HTTP: localhost:4318"
|
||||
echo " Metrics: localhost:8888"
|
||||
echo "OpenTelemetry Collector (within cluster):"
|
||||
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
|
||||
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
|
||||
echo ""
|
||||
echo "Port-forward for local access:"
|
||||
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
|
||||
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4317:4317"
|
||||
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4318:4318"
|
||||
else
|
||||
echo "SigNoz UI: https://monitoring.bakewise.ai/signoz"
|
||||
echo "SigNoz API: https://monitoring.bakewise.ai/signoz-api"
|
||||
echo "SigNoz Alerts: https://monitoring.bakewise.ai/signoz-alerts"
|
||||
echo "SigNoz UI: https://monitoring.bakewise.ai"
|
||||
echo ""
|
||||
echo "OpenTelemetry Collector:"
|
||||
echo " gRPC: monitoring.bakewise.ai:4317"
|
||||
echo " HTTP: monitoring.bakewise.ai:4318"
|
||||
echo "OpenTelemetry Collector (within cluster):"
|
||||
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
|
||||
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
|
||||
fi
|
||||
|
||||
|
||||
echo ""
|
||||
echo "Default Credentials:"
|
||||
echo " Username: admin"
|
||||
echo " Username: admin@example.com"
|
||||
echo " Password: admin"
|
||||
echo ""
|
||||
|
||||
echo "⚠️ IMPORTANT: Change default password after first login!"
|
||||
echo ""
|
||||
|
||||
# Show connection test commands
|
||||
echo "Connection Test Commands:"
|
||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||
echo " curl -k https://localhost/signoz"
|
||||
echo " curl -k https://localhost/signoz-api/health"
|
||||
echo " # Test SigNoz UI"
|
||||
echo " curl http://monitoring.bakery-ia.local"
|
||||
echo ""
|
||||
echo " # Test via port-forward"
|
||||
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
|
||||
echo " curl http://localhost:8080"
|
||||
else
|
||||
echo " curl https://monitoring.bakewise.ai/signoz"
|
||||
echo " curl https://monitoring.bakewise.ai/signoz-api/health"
|
||||
echo " # Test SigNoz UI"
|
||||
echo " curl https://monitoring.bakewise.ai"
|
||||
echo ""
|
||||
echo " # Test API health"
|
||||
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
|
||||
echo " curl http://localhost:8080/api/v1/health"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
@@ -322,36 +367,43 @@ run_connectivity_tests() {
|
||||
echo "🔗 Running Connectivity Tests"
|
||||
echo "=========================================="
|
||||
echo "${NC}"
|
||||
|
||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||
# Test frontend
|
||||
echo "Testing SigNoz frontend..."
|
||||
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz | grep -q "200\|302"; then
|
||||
echo "${GREEN}✅ Frontend accessible${NC}"
|
||||
else
|
||||
echo "${RED}❌ Frontend not accessible${NC}"
|
||||
fi
|
||||
|
||||
# Test API
|
||||
echo "Testing SigNoz API..."
|
||||
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz-api/health | grep -q "200"; then
|
||||
echo "${GREEN}✅ API accessible${NC}"
|
||||
else
|
||||
echo "${RED}❌ API not accessible${NC}"
|
||||
fi
|
||||
|
||||
# Test OTEL collector
|
||||
echo "Testing OpenTelemetry collector..."
|
||||
if curl -s -o /dev/null -w "%{http_code}" http://localhost:8888/metrics | grep -q "200"; then
|
||||
echo "${GREEN}✅ OTEL collector accessible${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ OTEL collector not accessible (may not be exposed)${NC}"
|
||||
fi
|
||||
|
||||
# Test pod readiness first
|
||||
echo "Checking pod readiness..."
|
||||
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep "Running" | grep -c "1/1\|2/2" || echo "0")
|
||||
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
if [[ $ready_pods -eq $total_pods && $total_pods -gt 0 ]]; then
|
||||
echo "${GREEN}✅ All pods are ready ($ready_pods/$total_pods)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Production connectivity tests require valid DNS and TLS${NC}"
|
||||
echo " Please ensure monitoring.bakewise.ai resolves to your cluster"
|
||||
echo "${YELLOW}⚠️ Some pods not ready ($ready_pods/$total_pods)${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test internal service connectivity
|
||||
echo "Testing internal service connectivity..."
|
||||
local signoz_svc=$(kubectl get svc -n "$NAMESPACE" signoz -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
|
||||
if [[ -n "$signoz_svc" ]]; then
|
||||
echo "${GREEN}✅ SigNoz service accessible at $signoz_svc:8080${NC}"
|
||||
else
|
||||
echo "${RED}❌ SigNoz service not found${NC}"
|
||||
fi
|
||||
|
||||
local otel_svc=$(kubectl get svc -n "$NAMESPACE" signoz-otel-collector -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
|
||||
if [[ -n "$otel_svc" ]]; then
|
||||
echo "${GREEN}✅ OTel Collector service accessible at $otel_svc:4317 (gRPC), $otel_svc:4318 (HTTP)${NC}"
|
||||
else
|
||||
echo "${RED}❌ OTel Collector service not found${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
if [[ "$ENVIRONMENT" == "prod" ]]; then
|
||||
echo "${YELLOW}⚠️ Production connectivity tests require valid DNS and TLS${NC}"
|
||||
echo " Please ensure monitoring.bakewise.ai resolves to your cluster"
|
||||
echo ""
|
||||
echo "Manual test:"
|
||||
echo " curl -I https://monitoring.bakewise.ai"
|
||||
fi
|
||||
}
|
||||
|
||||
# Main execution
|
||||
|
||||
Reference in New Issue
Block a user