Files
bakery-ia/infrastructure/scripts/maintenance/kubernetes_restart.sh
2026-01-19 11:55:17 +01:00

369 lines
12 KiB
Bash
Executable File

#!/bin/bash
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Function to wait for pods with retry logic
wait_for_pods() {
local namespace=$1
local selector=$2
local timeout=$3
local max_retries=30
local retry_count=0
print_status "Waiting for pods with selector '$selector' in namespace '$namespace'..."
while [ $retry_count -lt $max_retries ]; do
# Check if any pods exist first
if kubectl get pods -n "$namespace" --selector="$selector" 2>/dev/null | grep -v "No resources found" | grep -v "NAME" > /dev/null; then
# Pods exist, now wait for them to be ready
if kubectl wait --namespace "$namespace" \
--for=condition=ready pod \
--selector="$selector" \
--timeout="${timeout}s" 2>/dev/null; then
print_success "Pods are ready"
return 0
fi
fi
retry_count=$((retry_count + 1))
print_status "Waiting for pods to be created... (attempt $retry_count/$max_retries)"
sleep 5
done
print_error "Timed out waiting for pods after $((max_retries * 5)) seconds"
return 1
}
# Function to handle cleanup
cleanup() {
print_status "Starting cleanup process..."
# Delete Kubernetes namespace with timeout
print_status "Deleting namespace bakery-ia..."
if kubectl get namespace bakery-ia &>/dev/null; then
kubectl delete namespace bakery-ia 2>/dev/null &
PID=$!
sleep 2
if ps -p $PID &>/dev/null; then
print_warning "kubectl delete namespace command taking too long, forcing termination..."
kill $PID 2>/dev/null
fi
print_success "Namespace deletion attempted"
else
print_status "Namespace bakery-ia not found"
fi
# Delete Kind cluster
print_status "Deleting Kind cluster..."
if kind get clusters | grep -q "bakery-ia-local"; then
kind delete cluster --name bakery-ia-local
print_success "Kind cluster deleted"
else
print_status "Kind cluster bakery-ia-local not found"
fi
# Stop local registry
print_status "Stopping local registry..."
if docker ps -a | grep -q "kind-registry"; then
docker stop kind-registry 2>/dev/null || true
docker rm kind-registry 2>/dev/null || true
print_success "Local registry removed"
else
print_status "Local registry not found"
fi
# Stop Colima
print_status "Stopping Colima..."
if colima list | grep -q "k8s-local"; then
colima stop --profile k8s-local
print_success "Colima stopped"
else
print_status "Colima profile k8s-local not found"
fi
print_success "Cleanup completed!"
echo "----------------------------------------"
}
# Function to check for required configuration files
check_config_files() {
print_status "Checking for required configuration files..."
# Check for kind-config.yaml
if [ ! -f kind-config.yaml ]; then
print_error "kind-config.yaml not found in current directory!"
print_error "Please ensure kind-config.yaml exists with your cluster configuration."
exit 1
fi
# Check for encryption directory if referenced in config
if grep -q "infrastructure/kubernetes/encryption" kind-config.yaml; then
if [ ! -d "./infrastructure/kubernetes/encryption" ]; then
print_warning "Encryption directory './infrastructure/kubernetes/encryption' not found"
print_warning "Some encryption configurations may not work properly"
fi
fi
print_success "Configuration files check completed"
}
# Function to create local registry
create_local_registry() {
local reg_name='kind-registry'
local reg_port='5001'
print_status "Setting up local Docker registry..."
# Create registry container unless it already exists
if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true)" != 'true' ]; then
print_status "Creating registry container on port ${reg_port}..."
docker run \
-d --restart=always \
-p "127.0.0.1:${reg_port}:5000" \
--name "${reg_name}" \
registry:2
if [ $? -eq 0 ]; then
print_success "Local registry created at localhost:${reg_port}"
else
print_error "Failed to create local registry"
exit 1
fi
else
print_success "Local registry already running at localhost:${reg_port}"
fi
# Store registry info for later use
echo "${reg_name}:${reg_port}"
}
# Function to connect registry to Kind
connect_registry_to_kind() {
local reg_name='kind-registry'
local reg_port='5001'
print_status "Connecting registry to Kind network..."
# Connect the registry to the cluster network if not already connected
if [ "$(docker inspect -f='{{json .NetworkSettings.Networks.kind}}' "${reg_name}")" = 'null' ]; then
docker network connect "kind" "${reg_name}"
print_success "Registry connected to Kind network"
else
print_success "Registry already connected to Kind network"
fi
# Configure containerd in the Kind node to use the registry
print_status "Configuring containerd to use local registry..."
# Create the registry config directory
docker exec bakery-ia-local-control-plane mkdir -p /etc/containerd/certs.d/localhost:${reg_port}
# Add registry configuration
docker exec bakery-ia-local-control-plane sh -c "cat > /etc/containerd/certs.d/localhost:${reg_port}/hosts.toml <<EOF
server = \"http://localhost:${reg_port}\"
[host.\"http://${reg_name}:5000\"]
capabilities = [\"pull\", \"resolve\", \"push\"]
skip_verify = true
EOF"
# Restart containerd to pick up new configuration
docker exec bakery-ia-local-control-plane systemctl restart containerd
print_success "Containerd configured for local registry"
# Document the local registry
print_status "Documenting local registry in cluster..."
kubectl apply -f - <<EOF
apiVersion: v1
kind: ConfigMap
metadata:
name: local-registry-hosting
namespace: kube-public
data:
localRegistryHosting.v1: |
host: "localhost:${reg_port}"
help: "https://kind.sigs.k8s.io/docs/user/local-registry/"
EOF
if [ $? -eq 0 ]; then
print_success "Registry documented in cluster"
else
print_warning "Failed to document registry (non-critical)"
fi
}
# Function to handle setup
setup() {
print_status "Starting setup process..."
# Check for required config files
check_config_files
# 1. Start Colima with adequate resources for SigNoz
print_status "Starting Colima with 8 CPU, 16GB memory, 120GB disk..."
colima start --cpu 8 --memory 16 --disk 120 --runtime docker --profile k8s-local
if [ $? -eq 0 ]; then
print_success "Colima started successfully"
# Increase inotify limits for Colima to prevent "too many open files" errors
print_status "Increasing inotify limits in Colima VM..."
colima ssh --profile k8s-local "sudo sysctl -w fs.inotify.max_user_watches=524288"
colima ssh --profile k8s-local "sudo sysctl -w fs.inotify.max_user_instances=512"
print_success "Inotify limits increased"
else
print_error "Failed to start Colima"
exit 1
fi
# 2. Create local registry before Kind cluster
create_local_registry
# 3. Create Kind cluster using existing configuration with registry support
print_status "Creating Kind cluster with registry configuration..."
if [ -f kind-config.yaml ]; then
print_status "Using kind-config.yaml with local registry support"
# Extract cluster name from config for verification
CLUSTER_NAME=$(grep -E "name:\s*" kind-config.yaml | head -1 | sed 's/name:\s*//' | tr -d '[:space:]' || echo "bakery-ia-local")
print_status "Creating cluster: $CLUSTER_NAME"
kind create cluster --config kind-config.yaml
if [ $? -eq 0 ]; then
print_success "Kind cluster created successfully"
else
print_error "Failed to create Kind cluster"
exit 1
fi
else
print_error "kind-config.yaml file not found!"
exit 1
fi
# 4. Connect registry to Kind network
connect_registry_to_kind
# 5. Install NGINX Ingress Controller
print_status "Installing NGINX Ingress Controller..."
# Apply the ingress-nginx manifest
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/kind/deploy.yaml
if [ $? -eq 0 ]; then
print_success "NGINX Ingress Controller manifest applied"
else
print_error "Failed to apply NGINX Ingress Controller manifest"
exit 1
fi
# Wait for ingress-nginx pods to be ready with retry logic
wait_for_pods "ingress-nginx" "app.kubernetes.io/component=controller" 300
if [ $? -ne 0 ]; then
print_error "NGINX Ingress Controller failed to become ready"
print_status "Checking pod status for debugging..."
kubectl get pods -n ingress-nginx
kubectl describe pods -n ingress-nginx
exit 1
fi
print_success "NGINX Ingress Controller ready (using Kind's built-in NodePort configuration)"
# 6. Verify port mappings from kind-config.yaml
print_status "Verifying port mappings from configuration..."
# Extract ports from kind-config.yaml
HTTP_HOST_PORT=$(grep -A1 "containerPort: 30080" kind-config.yaml | grep "hostPort:" | awk '{print $2}' || echo "80")
HTTPS_HOST_PORT=$(grep -A1 "containerPort: 30443" kind-config.yaml | grep "hostPort:" | awk '{print $2}' || echo "443")
# Print cluster info
echo ""
print_success "Setup completed successfully!"
echo "----------------------------------------"
print_status "Cluster Information:"
echo " - Colima profile: k8s-local"
echo " - Kind cluster: $CLUSTER_NAME"
echo " - Local registry: localhost:5001"
echo ""
print_status "Port Mappings (configured in kind-config.yaml):"
echo " - HTTP Ingress: localhost:${HTTP_HOST_PORT} -> Kind NodePort 30080"
echo " - HTTPS Ingress: localhost:${HTTPS_HOST_PORT} -> Kind NodePort 30443"
echo " - Frontend Direct: localhost:3000 -> container:30300"
echo " - Gateway Direct: localhost:8000 -> container:30800"
echo ""
print_status "How to access your application:"
echo " 1. Start Tilt: tilt up"
echo " 2. Access via:"
echo " - Ingress: http://localhost (or https://localhost)"
echo " - Direct: http://localhost:3000 (frontend), http://localhost:8000 (gateway)"
echo " - Tilt UI: http://localhost:10350"
echo "----------------------------------------"
print_status "Local Registry Information:"
echo " - Registry URL: localhost:5001"
echo " - Images pushed to: localhost:5001/bakery/<service>"
echo " - Tiltfile already configured: default_registry('localhost:5001')"
echo "----------------------------------------"
}
# Function to show usage
usage() {
echo "Usage: $0 [option]"
echo ""
echo "Options:"
echo " cleanup Clean up all resources (namespace, cluster, colima)"
echo " setup Set up the complete environment"
echo " full Clean up first, then set up (default)"
echo " help Show this help message"
echo ""
echo "Requirements:"
echo " - kind-config.yaml must exist in current directory"
echo " - For encryption: ./infrastructure/kubernetes/encryption directory"
}
# Main script logic
case "${1:-full}" in
"cleanup")
cleanup
;;
"setup")
setup
;;
"full")
cleanup
setup
;;
"help"|"-h"|"--help")
usage
;;
*)
print_warning "Unknown option: $1"
echo ""
usage
exit 1
;;
esac