Initial commit - production deployment

This commit is contained in:
2026-01-21 17:17:16 +01:00
commit c23d00dd92
2289 changed files with 638440 additions and 0 deletions

View File

@@ -0,0 +1,152 @@
#!/bin/bash
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
echo "======================================="
echo "Registry Verification Script"
echo "======================================="
echo ""
# 1. Check if registry container is running
print_status "Checking if kind-registry container is running..."
if docker ps | grep -q "kind-registry"; then
print_success "Registry container is running"
REGISTRY_STATUS=$(docker ps --filter "name=kind-registry" --format "{{.Status}}")
echo " Status: $REGISTRY_STATUS"
else
print_error "Registry container is not running!"
echo " Run: ./kubernetes_restart.sh setup"
exit 1
fi
# 2. Check if registry is accessible on localhost:5001
print_status "Checking if registry is accessible on localhost:5001..."
if curl -s http://localhost:5001/v2/_catalog > /dev/null 2>&1; then
print_success "Registry is accessible"
CATALOG=$(curl -s http://localhost:5001/v2/_catalog)
echo " Catalog: $CATALOG"
else
print_error "Registry is not accessible on localhost:5001"
exit 1
fi
# 3. Check if registry is connected to Kind network
print_status "Checking if registry is connected to Kind network..."
NETWORK_CHECK=$(docker inspect -f='{{json .NetworkSettings.Networks.kind}}' kind-registry 2>/dev/null)
if [ "$NETWORK_CHECK" != "null" ] && [ -n "$NETWORK_CHECK" ]; then
print_success "Registry is connected to Kind network"
else
print_warning "Registry is not connected to Kind network"
print_status "Connecting registry to Kind network..."
docker network connect "kind" "kind-registry"
if [ $? -eq 0 ]; then
print_success "Registry connected successfully"
else
print_error "Failed to connect registry to Kind network"
exit 1
fi
fi
# 4. Check if Kind cluster exists
print_status "Checking if Kind cluster exists..."
if kind get clusters | grep -q "bakery-ia-local"; then
print_success "Kind cluster 'bakery-ia-local' exists"
else
print_error "Kind cluster 'bakery-ia-local' not found"
echo " Run: ./kubernetes_restart.sh setup"
exit 1
fi
# 5. Check if registry is documented in cluster
print_status "Checking if registry is documented in cluster..."
if kubectl get configmap -n kube-public local-registry-hosting &>/dev/null; then
print_success "Registry is documented in cluster"
REG_HOST=$(kubectl get configmap -n kube-public local-registry-hosting -o jsonpath='{.data.localRegistryHosting\.v1}' 2>/dev/null | grep -o 'host: "[^"]*"' | cut -d'"' -f2)
echo " Registry host: $REG_HOST"
else
print_warning "Registry ConfigMap not found in cluster"
print_status "Creating ConfigMap..."
kubectl apply -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: local-registry-hosting
namespace: kube-public
data:
localRegistryHosting.v1: |
host: "localhost:5001"
help: "https://kind.sigs.k8s.io/docs/user/local-registry/"
EOF
if [ $? -eq 0 ]; then
print_success "ConfigMap created successfully"
fi
fi
# 6. Test pushing a test image
print_status "Testing image push to registry..."
print_status "Pulling busybox image..."
docker pull busybox:latest > /dev/null 2>&1
print_status "Tagging image for local registry..."
docker tag busybox:latest localhost:5001/test/busybox:latest
print_status "Pushing image to local registry..."
if docker push localhost:5001/test/busybox:latest > /dev/null 2>&1; then
print_success "Successfully pushed test image to registry"
else
print_error "Failed to push image to registry"
exit 1
fi
print_status "Verifying image in registry catalog..."
CATALOG=$(curl -s http://localhost:5001/v2/_catalog)
if echo "$CATALOG" | grep -q "test/busybox"; then
print_success "Test image found in registry catalog"
else
print_warning "Test image not found in catalog, but push succeeded"
fi
# 7. Clean up test image
print_status "Cleaning up test images..."
docker rmi localhost:5001/test/busybox:latest > /dev/null 2>&1
docker rmi busybox:latest > /dev/null 2>&1
echo ""
echo "======================================="
print_success "Registry verification completed!"
echo "======================================="
echo ""
print_status "Summary:"
echo " - Registry URL: localhost:5001"
echo " - Registry container: kind-registry"
echo " - Connected to Kind network: Yes"
echo " - Accessible from host: Yes"
echo " - Test push: Successful"
echo ""
print_status "Next steps:"
echo " 1. Ensure your Tiltfile has: default_registry('localhost:5001')"
echo " 2. Run: tilt up"
echo " 3. Images will be automatically pushed to localhost:5001/bakery/<service>"
echo ""

View File

@@ -0,0 +1,446 @@
#!/bin/bash
# ============================================================================
# SigNoz Verification Script for Bakery IA
# ============================================================================
# This script verifies that SigNoz is properly deployed and functioning
# ============================================================================
set -e
# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to display help
show_help() {
echo "Usage: $0 [OPTIONS] ENVIRONMENT"
echo ""
echo "Verify SigNoz deployment for Bakery IA"
echo ""
echo "Arguments:
ENVIRONMENT Environment to verify (dev|prod)"
echo ""
echo "Options:
-h, --help Show this help message
-n, --namespace NAMESPACE Specify namespace (default: bakery-ia)"
echo ""
echo "Examples:
$0 dev # Verify development deployment
$0 prod # Verify production deployment
$0 --namespace monitoring dev # Verify with custom namespace"
}
# Parse command line arguments
NAMESPACE="bakery-ia"
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
show_help
exit 0
;;
-n|--namespace)
NAMESPACE="$2"
shift 2
;;
dev|prod)
ENVIRONMENT="$1"
shift
;;
*)
echo "Unknown argument: $1"
show_help
exit 1
;;
esac
done
# Validate environment
if [[ -z "$ENVIRONMENT" ]]; then
echo "Error: Environment not specified. Use 'dev' or 'prod'."
show_help
exit 1
fi
if [[ "$ENVIRONMENT" != "dev" && "$ENVIRONMENT" != "prod" ]]; then
echo "Error: Invalid environment. Use 'dev' or 'prod'."
exit 1
fi
# Function to check if kubectl is configured
check_kubectl() {
if ! kubectl cluster-info &> /dev/null; then
echo "${RED}Error: kubectl is not configured or cannot connect to cluster.${NC}"
echo "Please ensure you have access to a Kubernetes cluster."
exit 1
fi
}
# Function to check namespace exists
check_namespace() {
if ! kubectl get namespace "$NAMESPACE" &> /dev/null; then
echo "${RED}Error: Namespace $NAMESPACE does not exist.${NC}"
echo "Please deploy SigNoz first using: ./deploy-signoz.sh $ENVIRONMENT"
exit 1
fi
}
# Function to verify SigNoz deployment
verify_deployment() {
echo "${BLUE}"
echo "=========================================="
echo "🔍 Verifying SigNoz Deployment"
echo "=========================================="
echo "Environment: $ENVIRONMENT"
echo "Namespace: $NAMESPACE"
echo "${NC}"
echo ""
# Check if SigNoz helm release exists
echo "${BLUE}1. Checking Helm release...${NC}"
if helm list -n "$NAMESPACE" | grep -q signoz; then
echo "${GREEN}✅ SigNoz Helm release found${NC}"
else
echo "${RED}❌ SigNoz Helm release not found${NC}"
echo "Please deploy SigNoz first using: ./deploy-signoz.sh $ENVIRONMENT"
exit 1
fi
echo ""
# Check pod status
echo "${BLUE}2. Checking pod status...${NC}"
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
local running_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep -c "Running" || echo "0")
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep "Running" | grep "1/1" | wc -l | tr -d ' ' || echo "0")
echo "Total pods: $total_pods"
echo "Running pods: $running_pods"
echo "Ready pods: $ready_pods"
if [[ $total_pods -eq 0 ]]; then
echo "${RED}❌ No SigNoz pods found${NC}"
exit 1
fi
if [[ $running_pods -eq $total_pods ]]; then
echo "${GREEN}✅ All pods are running${NC}"
else
echo "${YELLOW}⚠️ Some pods are not running${NC}"
fi
if [[ $ready_pods -eq $total_pods ]]; then
echo "${GREEN}✅ All pods are ready${NC}"
else
echo "${YELLOW}⚠️ Some pods are not ready${NC}"
fi
echo ""
# Show pod details
echo "${BLUE}Pod Details:${NC}"
kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
echo ""
# Check services
echo "${BLUE}3. Checking services...${NC}"
local service_count=$(kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
if [[ $service_count -gt 0 ]]; then
echo "${GREEN}✅ Services found ($service_count services)${NC}"
kubectl get svc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${RED}❌ No services found${NC}"
fi
echo ""
# Check ingress
echo "${BLUE}4. Checking ingress...${NC}"
local ingress_count=$(kubectl get ingress -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
if [[ $ingress_count -gt 0 ]]; then
echo "${GREEN}✅ Ingress found ($ingress_count ingress resources)${NC}"
kubectl get ingress -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${YELLOW}⚠️ No ingress found (may be configured in main namespace)${NC}"
fi
echo ""
# Check PVCs
echo "${BLUE}5. Checking persistent volume claims...${NC}"
local pvc_count=$(kubectl get pvc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
if [[ $pvc_count -gt 0 ]]; then
echo "${GREEN}✅ PVCs found ($pvc_count PVCs)${NC}"
kubectl get pvc -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${YELLOW}⚠️ No PVCs found (may not be required for all components)${NC}"
fi
echo ""
# Check resource usage
echo "${BLUE}6. Checking resource usage...${NC}"
if command -v kubectl &> /dev/null && kubectl top pods -n "$NAMESPACE" &> /dev/null; then
echo "${GREEN}✅ Resource usage:${NC}"
kubectl top pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz
else
echo "${YELLOW}⚠️ Metrics server not available or no resource usage data${NC}"
fi
echo ""
# Check logs for errors
echo "${BLUE}7. Checking for errors in logs...${NC}"
local error_found=false
# Check each pod for errors
while IFS= read -r pod; do
if [[ -n "$pod" ]]; then
local pod_errors=$(kubectl logs -n "$NAMESPACE" "$pod" 2>/dev/null | grep -i "error\|exception\|fail\|crash" | wc -l || echo "0")
if [[ $pod_errors -gt 0 ]]; then
echo "${RED}❌ Errors found in pod $pod ($pod_errors errors)${NC}"
error_found=true
fi
fi
done < <(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz -o name | sed 's|pod/||')
if [[ "$error_found" == false ]]; then
echo "${GREEN}✅ No errors found in logs${NC}"
fi
echo ""
# Environment-specific checks
if [[ "$ENVIRONMENT" == "dev" ]]; then
verify_dev_specific
else
verify_prod_specific
fi
# Show access information
show_access_info
}
# Function for development-specific verification
verify_dev_specific() {
echo "${BLUE}8. Development-specific checks...${NC}"
# Check if ingress is configured
if kubectl get ingress -n "$NAMESPACE" 2>/dev/null | grep -q "monitoring.bakery-ia.local"; then
echo "${GREEN}✅ Development ingress configured${NC}"
else
echo "${YELLOW}⚠️ Development ingress not found${NC}"
fi
# Check unified signoz component resource limits (should be lower for dev)
local signoz_mem=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
if [[ -n "$signoz_mem" ]]; then
echo "${GREEN}✅ SigNoz component found (memory limit: $signoz_mem)${NC}"
else
echo "${YELLOW}⚠️ Could not verify SigNoz component resources${NC}"
fi
# Check single replica setup for dev
local replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "0")
if [[ $replicas -eq 1 ]]; then
echo "${GREEN}✅ Single replica configuration (appropriate for dev)${NC}"
else
echo "${YELLOW}⚠️ Multiple replicas detected (replicas: $replicas)${NC}"
fi
echo ""
}
# Function for production-specific verification
verify_prod_specific() {
echo "${BLUE}8. Production-specific checks...${NC}"
# Check if TLS is configured
if kubectl get ingress -n "$NAMESPACE" 2>/dev/null | grep -q "signoz-tls"; then
echo "${GREEN}✅ TLS certificate configured${NC}"
else
echo "${YELLOW}⚠️ TLS certificate not found${NC}"
fi
# Check if multiple replicas are running for HA
local signoz_replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "1")
if [[ $signoz_replicas -gt 1 ]]; then
echo "${GREEN}✅ High availability configured ($signoz_replicas SigNoz replicas)${NC}"
else
echo "${YELLOW}⚠️ Single SigNoz replica detected (not highly available)${NC}"
fi
# Check Zookeeper replicas (critical for production)
local zk_replicas=$(kubectl get statefulset -n "$NAMESPACE" -l app.kubernetes.io/component=zookeeper -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "0")
if [[ $zk_replicas -eq 3 ]]; then
echo "${GREEN}✅ Zookeeper properly configured with 3 replicas${NC}"
elif [[ $zk_replicas -gt 0 ]]; then
echo "${YELLOW}⚠️ Zookeeper has $zk_replicas replicas (recommend 3 for production)${NC}"
else
echo "${RED}❌ Zookeeper not found${NC}"
fi
# Check OTel Collector replicas
local otel_replicas=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=otel-collector -o jsonpath='{.items[0].spec.replicas}' 2>/dev/null || echo "1")
if [[ $otel_replicas -gt 1 ]]; then
echo "${GREEN}✅ OTel Collector HA configured ($otel_replicas replicas)${NC}"
else
echo "${YELLOW}⚠️ Single OTel Collector replica${NC}"
fi
# Check resource limits (should be higher for prod)
local signoz_mem=$(kubectl get deployment -n "$NAMESPACE" -l app.kubernetes.io/component=query-service -o jsonpath='{.items[0].spec.template.spec.containers[0].resources.limits.memory}' 2>/dev/null || echo "")
if [[ -n "$signoz_mem" ]]; then
echo "${GREEN}✅ Production resource limits applied (memory: $signoz_mem)${NC}"
else
echo "${YELLOW}⚠️ Could not verify resource limits${NC}"
fi
# Check HPA (Horizontal Pod Autoscaler)
local hpa_count=$(kubectl get hpa -n "$NAMESPACE" 2>/dev/null | grep -c signoz || echo "0")
if [[ $hpa_count -gt 0 ]]; then
echo "${GREEN}✅ Horizontal Pod Autoscaler configured${NC}"
else
echo "${YELLOW}⚠️ No HPA found (consider enabling for production)${NC}"
fi
echo ""
}
# Function to show access information
show_access_info() {
echo "${BLUE}"
echo "=========================================="
echo "📋 Access Information"
echo "=========================================="
echo "${NC}"
if [[ "$ENVIRONMENT" == "dev" ]]; then
echo "SigNoz UI: http://monitoring.bakery-ia.local"
echo ""
echo "OpenTelemetry Collector (within cluster):"
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
echo ""
echo "Port-forward for local access:"
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4317:4317"
echo " kubectl port-forward -n $NAMESPACE svc/signoz-otel-collector 4318:4318"
else
echo "SigNoz UI: https://monitoring.bakewise.ai"
echo ""
echo "OpenTelemetry Collector (within cluster):"
echo " gRPC: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4317"
echo " HTTP: signoz-otel-collector.$NAMESPACE.svc.cluster.local:4318"
fi
echo ""
echo "Default Credentials:"
echo " Username: admin@example.com"
echo " Password: admin"
echo ""
echo "⚠️ IMPORTANT: Change default password after first login!"
echo ""
# Show connection test commands
echo "Connection Test Commands:"
if [[ "$ENVIRONMENT" == "dev" ]]; then
echo " # Test SigNoz UI"
echo " curl http://monitoring.bakery-ia.local"
echo ""
echo " # Test via port-forward"
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
echo " curl http://localhost:8080"
else
echo " # Test SigNoz UI"
echo " curl https://monitoring.bakewise.ai"
echo ""
echo " # Test API health"
echo " kubectl port-forward -n $NAMESPACE svc/signoz 8080:8080"
echo " curl http://localhost:8080/api/v1/health"
fi
echo ""
}
# Function to run connectivity tests
run_connectivity_tests() {
echo "${BLUE}"
echo "=========================================="
echo "🔗 Running Connectivity Tests"
echo "=========================================="
echo "${NC}"
# Test pod readiness first
echo "Checking pod readiness..."
local ready_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz --field-selector=status.phase=Running 2>/dev/null | grep "Running" | grep -c "1/1\|2/2" || echo "0")
local total_pods=$(kubectl get pods -n "$NAMESPACE" -l app.kubernetes.io/instance=signoz 2>/dev/null | grep -v "NAME" | wc -l | tr -d ' ' || echo "0")
if [[ $ready_pods -eq $total_pods && $total_pods -gt 0 ]]; then
echo "${GREEN}✅ All pods are ready ($ready_pods/$total_pods)${NC}"
else
echo "${YELLOW}⚠️ Some pods not ready ($ready_pods/$total_pods)${NC}"
fi
echo ""
# Test internal service connectivity
echo "Testing internal service connectivity..."
local signoz_svc=$(kubectl get svc -n "$NAMESPACE" signoz -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [[ -n "$signoz_svc" ]]; then
echo "${GREEN}✅ SigNoz service accessible at $signoz_svc:8080${NC}"
else
echo "${RED}❌ SigNoz service not found${NC}"
fi
local otel_svc=$(kubectl get svc -n "$NAMESPACE" signoz-otel-collector -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [[ -n "$otel_svc" ]]; then
echo "${GREEN}✅ OTel Collector service accessible at $otel_svc:4317 (gRPC), $otel_svc:4318 (HTTP)${NC}"
else
echo "${RED}❌ OTel Collector service not found${NC}"
fi
echo ""
if [[ "$ENVIRONMENT" == "prod" ]]; then
echo "${YELLOW}⚠️ Production connectivity tests require valid DNS and TLS${NC}"
echo " Please ensure monitoring.bakewise.ai resolves to your cluster"
echo ""
echo "Manual test:"
echo " curl -I https://monitoring.bakewise.ai"
fi
}
# Main execution
main() {
echo "${BLUE}"
echo "=========================================="
echo "🔍 SigNoz Verification for Bakery IA"
echo "=========================================="
echo "${NC}"
# Check prerequisites
check_kubectl
check_namespace
# Verify deployment
verify_deployment
# Run connectivity tests
run_connectivity_tests
echo "${GREEN}"
echo "=========================================="
echo "✅ Verification Complete"
echo "=========================================="
echo "${NC}"
echo "Summary:"
echo " Environment: $ENVIRONMENT"
echo " Namespace: $NAMESPACE"
echo ""
echo "Next Steps:"
echo " 1. Access SigNoz UI and verify dashboards"
echo " 2. Configure alert rules for your services"
echo " 3. Instrument your applications with OpenTelemetry"
echo " 4. Set up custom dashboards for key metrics"
echo ""
}
# Run main function
main