Update monitoring packages to latest versions
- Updated all OpenTelemetry packages to latest versions: - opentelemetry-api: 1.27.0 → 1.39.1 - opentelemetry-sdk: 1.27.0 → 1.39.1 - opentelemetry-exporter-otlp-proto-grpc: 1.27.0 → 1.39.1 - opentelemetry-exporter-otlp-proto-http: 1.27.0 → 1.39.1 - opentelemetry-instrumentation-fastapi: 0.48b0 → 0.60b1 - opentelemetry-instrumentation-httpx: 0.48b0 → 0.60b1 - opentelemetry-instrumentation-redis: 0.48b0 → 0.60b1 - opentelemetry-instrumentation-sqlalchemy: 0.48b0 → 0.60b1 - Removed prometheus-client==0.23.1 from all services - Unified all services to use the same monitoring package versions Generated by Mistral Vibe. Co-Authored-By: Mistral Vibe <vibe@mistral.ai>
This commit is contained in:
394
infrastructure/helm/verify-signoz.sh
Executable file
394
infrastructure/helm/verify-signoz.sh
Executable file
@@ -0,0 +1,394 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ============================================================================
|
||||
# SigNoz Verification Script for Bakery IA
|
||||
# ============================================================================
|
||||
# This script verifies that SigNoz is properly deployed and functioning
|
||||
# ============================================================================
|
||||
|
||||
set -e
|
||||
|
||||
# Color codes for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to display help
|
||||
show_help() {
|
||||
echo "Usage: $0 [OPTIONS] ENVIRONMENT"
|
||||
echo ""
|
||||
echo "Verify SigNoz deployment for Bakery IA"
|
||||
echo ""
|
||||
echo "Arguments:
|
||||
ENVIRONMENT Environment to verify (dev|prod)"
|
||||
echo ""
|
||||
echo "Options:
|
||||
-h, --help Show this help message
|
||||
-n, --namespace NAMESPACE Specify namespace (default: signoz)"
|
||||
echo ""
|
||||
echo "Examples:
|
||||
$0 dev # Verify development deployment
|
||||
$0 prod # Verify production deployment
|
||||
$0 --namespace monitoring dev # Verify with custom namespace"
|
||||
}
|
||||
|
||||
# Parse command line arguments
|
||||
NAMESPACE="signoz"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
-n|--namespace)
|
||||
NAMESPACE="$2"
|
||||
shift 2
|
||||
;;
|
||||
dev|prod)
|
||||
ENVIRONMENT="$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate environment
|
||||
if [[ -z "$ENVIRONMENT" ]]; then
|
||||
echo "Error: Environment not specified. Use 'dev' or 'prod'."
|
||||
show_help
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$ENVIRONMENT" != "dev" && "$ENVIRONMENT" != "prod" ]]; then
|
||||
echo "Error: Invalid environment. Use 'dev' or 'prod'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to check if kubectl is configured
|
||||
check_kubectl() {
|
||||
if ! kubectl cluster-info &> /dev/null; then
|
||||
echo "${RED}Error: kubectl is not configured or cannot connect to cluster.${NC}"
|
||||
echo "Please ensure you have access to a Kubernetes cluster."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to check namespace exists
|
||||
check_namespace() {
|
||||
if ! kubectl get namespace "$NAMESPACE" &> /dev/null; then
|
||||
echo "${RED}Error: Namespace $NAMESPACE does not exist.${NC}"
|
||||
echo "Please deploy SigNoz first using: ./deploy-signoz.sh $ENVIRONMENT"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to verify SigNoz deployment
|
||||
verify_deployment() {
|
||||
echo "${BLUE}"
|
||||
echo "=========================================="
|
||||
echo "🔍 Verifying SigNoz Deployment"
|
||||
echo "=========================================="
|
||||
echo "Environment: $ENVIRONMENT"
|
||||
echo "Namespace: $NAMESPACE"
|
||||
echo "${NC}"
|
||||
echo ""
|
||||
|
||||
# Check if SigNoz helm release exists
|
||||
echo "${BLUE}1. Checking Helm release...${NC}"
|
||||
if helm list -n "$NAMESPACE" | grep -q signoz; then
|
||||
echo "${GREEN}✅ SigNoz Helm release found${NC}"
|
||||
else
|
||||
echo "${RED}❌ SigNoz Helm release not found${NC}"
|
||||
echo "Please deploy SigNoz first using: ./deploy-signoz.sh $ENVIRONMENT"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Check pod status
|
||||
echo "${BLUE}2. Checking pod status...${NC}"
|
||||
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
|
||||
local running_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep -c "Running" || echo "0")
|
||||
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep "Running" | grep "1/1" | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
echo "Total pods: $total_pods"
|
||||
echo "Running pods: $running_pods"
|
||||
echo "Ready pods: $ready_pods"
|
||||
|
||||
if [[ $total_pods -eq 0 ]]; then
|
||||
echo "${RED}❌ No SigNoz pods found${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $running_pods -eq $total_pods ]]; then
|
||||
echo "${GREEN}✅ All pods are running${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Some pods are not running${NC}"
|
||||
fi
|
||||
|
||||
if [[ $ready_pods -eq $total_pods ]]; then
|
||||
echo "${GREEN}✅ All pods are ready${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Some pods are not ready${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Show pod details
|
||||
echo "${BLUE}Pod Details:${NC}"
|
||||
kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
|
||||
echo ""
|
||||
|
||||
# Check services
|
||||
echo "${BLUE}3. Checking services...${NC}"
|
||||
local service_count=$(kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
if [[ $service_count -gt 0 ]]; then
|
||||
echo "${GREEN}✅ Services found ($service_count services)${NC}"
|
||||
kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
|
||||
else
|
||||
echo "${RED}❌ No services found${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Check ingress
|
||||
echo "${BLUE}4. Checking ingress...${NC}"
|
||||
local ingress_count=$(kubectl get ingress -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
if [[ $ingress_count -gt 0 ]]; then
|
||||
echo "${GREEN}✅ Ingress found ($ingress_count ingress resources)${NC}"
|
||||
kubectl get ingress -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
|
||||
else
|
||||
echo "${YELLOW}⚠️ No ingress found (may be configured in main namespace)${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Check PVCs
|
||||
echo "${BLUE}5. Checking persistent volume claims...${NC}"
|
||||
local pvc_count=$(kubectl get pvc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
|
||||
|
||||
if [[ $pvc_count -gt 0 ]]; then
|
||||
echo "${GREEN}✅ PVCs found ($pvc_count PVCs)${NC}"
|
||||
kubectl get pvc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
|
||||
else
|
||||
echo "${YELLOW}⚠️ No PVCs found (may not be required for all components)${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Check resource usage
|
||||
echo "${BLUE}6. Checking resource usage...${NC}"
|
||||
if command -v kubectl &> /dev/null && kubectl top pods -n "$NAMESPACE" &> /dev/null; then
|
||||
echo "${GREEN}✅ Resource usage:${NC}"
|
||||
kubectl top pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
|
||||
else
|
||||
echo "${YELLOW}⚠️ Metrics server not available or no resource usage data${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Check logs for errors
|
||||
echo "${BLUE}7. Checking for errors in logs...${NC}"
|
||||
local error_found=false
|
||||
|
||||
# Check each pod for errors
|
||||
while IFS= read -r pod; do
|
||||
if [[ -n "$pod" ]]; then
|
||||
local pod_errors=$(kubectl logs -n "$NAMESPACE" "$pod" 2>/dev/null | grep -i "error\|exception\|fail\|crash" | wc -l || echo "0")
|
||||
if [[ $pod_errors -gt 0 ]]; then
|
||||
echo "${RED}❌ Errors found in pod $pod ($pod_errors errors)${NC}"
|
||||
error_found=true
|
||||
fi
|
||||
fi
|
||||
done < <(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz -o name | sed 's|pod/||')
|
||||
|
||||
if [[ "$error_found" == false ]]; then
|
||||
echo "${GREEN}✅ No errors found in logs${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Environment-specific checks
|
||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||
verify_dev_specific
|
||||
else
|
||||
verify_prod_specific
|
||||
fi
|
||||
|
||||
# Show access information
|
||||
show_access_info
|
||||
}
|
||||
|
||||
# Function for development-specific verification
|
||||
verify_dev_specific() {
|
||||
echo "${BLUE}8. Development-specific checks...${NC}"
|
||||
|
||||
# Check if localhost ingress is configured
|
||||
if kubectl get ingress -n "$NAMESPACE" | grep -q "localhost"; then
|
||||
echo "${GREEN}✅ Localhost ingress configured${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Localhost ingress not found${NC}"
|
||||
fi
|
||||
|
||||
# Check resource limits (should be lower for dev)
|
||||
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||
if [[ -n "$query_service" && "$query_service" == "512Mi" ]]; then
|
||||
echo "${GREEN}✅ Development resource limits applied${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Resource limits may not be optimized for development${NC}"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Function for production-specific verification
|
||||
verify_prod_specific() {
|
||||
echo "${BLUE}8. Production-specific checks...${NC}"
|
||||
|
||||
# Check if TLS is configured
|
||||
if kubectl get ingress -n "$NAMESPACE" | grep -q "signoz-tls-cert"; then
|
||||
echo "${GREEN}✅ TLS certificate configured${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ TLS certificate not found${NC}"
|
||||
fi
|
||||
|
||||
# Check if multiple replicas are running
|
||||
local query_replicas=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1")
|
||||
if [[ $query_replicas -gt 1 ]]; then
|
||||
echo "${GREEN}✅ High availability configured ($query_replicas replicas)${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Single replica detected (not highly available)${NC}"
|
||||
fi
|
||||
|
||||
# Check resource limits (should be higher for prod)
|
||||
local query_service=$(kubectl get deployment -n "$NAMESPACE" signoz-query-service -o jsonpath='{.spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
|
||||
if [[ -n "$query_service" && "$query_service" == "2Gi" ]]; then
|
||||
echo "${GREEN}✅ Production resource limits applied${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ Resource limits may not be optimized for production${NC}"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Function to show access information
|
||||
show_access_info() {
|
||||
echo "${BLUE}"
|
||||
echo "=========================================="
|
||||
echo "📋 Access Information"
|
||||
echo "=========================================="
|
||||
echo "${NC}"
|
||||
|
||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||
echo "SigNoz UI: https://localhost/signoz"
|
||||
echo "SigNoz API: https://localhost/signoz-api"
|
||||
echo ""
|
||||
echo "OpenTelemetry Collector:"
|
||||
echo " gRPC: localhost:4317"
|
||||
echo " HTTP: localhost:4318"
|
||||
echo " Metrics: localhost:8888"
|
||||
else
|
||||
echo "SigNoz UI: https://monitoring.bakewise.ai/signoz"
|
||||
echo "SigNoz API: https://monitoring.bakewise.ai/signoz-api"
|
||||
echo "SigNoz Alerts: https://monitoring.bakewise.ai/signoz-alerts"
|
||||
echo ""
|
||||
echo "OpenTelemetry Collector:"
|
||||
echo " gRPC: monitoring.bakewise.ai:4317"
|
||||
echo " HTTP: monitoring.bakewise.ai:4318"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Default Credentials:"
|
||||
echo " Username: admin"
|
||||
echo " Password: admin"
|
||||
echo ""
|
||||
|
||||
# Show connection test commands
|
||||
echo "Connection Test Commands:"
|
||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||
echo " curl -k https://localhost/signoz"
|
||||
echo " curl -k https://localhost/signoz-api/health"
|
||||
else
|
||||
echo " curl https://monitoring.bakewise.ai/signoz"
|
||||
echo " curl https://monitoring.bakewise.ai/signoz-api/health"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Function to run connectivity tests
|
||||
run_connectivity_tests() {
|
||||
echo "${BLUE}"
|
||||
echo "=========================================="
|
||||
echo "🔗 Running Connectivity Tests"
|
||||
echo "=========================================="
|
||||
echo "${NC}"
|
||||
|
||||
if [[ "$ENVIRONMENT" == "dev" ]]; then
|
||||
# Test frontend
|
||||
echo "Testing SigNoz frontend..."
|
||||
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz | grep -q "200\|302"; then
|
||||
echo "${GREEN}✅ Frontend accessible${NC}"
|
||||
else
|
||||
echo "${RED}❌ Frontend not accessible${NC}"
|
||||
fi
|
||||
|
||||
# Test API
|
||||
echo "Testing SigNoz API..."
|
||||
if curl -k -s -o /dev/null -w "%{http_code}" https://localhost/signoz-api/health | grep -q "200"; then
|
||||
echo "${GREEN}✅ API accessible${NC}"
|
||||
else
|
||||
echo "${RED}❌ API not accessible${NC}"
|
||||
fi
|
||||
|
||||
# Test OTEL collector
|
||||
echo "Testing OpenTelemetry collector..."
|
||||
if curl -s -o /dev/null -w "%{http_code}" http://localhost:8888/metrics | grep -q "200"; then
|
||||
echo "${GREEN}✅ OTEL collector accessible${NC}"
|
||||
else
|
||||
echo "${YELLOW}⚠️ OTEL collector not accessible (may not be exposed)${NC}"
|
||||
fi
|
||||
else
|
||||
echo "${YELLOW}⚠️ Production connectivity tests require valid DNS and TLS${NC}"
|
||||
echo " Please ensure monitoring.bakewise.ai resolves to your cluster"
|
||||
fi
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
echo "${BLUE}"
|
||||
echo "=========================================="
|
||||
echo "🔍 SigNoz Verification for Bakery IA"
|
||||
echo "=========================================="
|
||||
echo "${NC}"
|
||||
|
||||
# Check prerequisites
|
||||
check_kubectl
|
||||
check_namespace
|
||||
|
||||
# Verify deployment
|
||||
verify_deployment
|
||||
|
||||
# Run connectivity tests
|
||||
run_connectivity_tests
|
||||
|
||||
echo "${GREEN}"
|
||||
echo "=========================================="
|
||||
echo "✅ Verification Complete"
|
||||
echo "=========================================="
|
||||
echo "${NC}"
|
||||
|
||||
echo "Summary:"
|
||||
echo " Environment: $ENVIRONMENT"
|
||||
echo " Namespace: $NAMESPACE"
|
||||
echo ""
|
||||
echo "Next Steps:"
|
||||
echo " 1. Access SigNoz UI and verify dashboards"
|
||||
echo " 2. Configure alert rules for your services"
|
||||
echo " 3. Instrument your applications with OpenTelemetry"
|
||||
echo " 4. Set up custom dashboards for key metrics"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main
|
||||
Reference in New Issue
Block a user