diff --git a/docs/PILOT_LAUNCH_GUIDE.md b/docs/PILOT_LAUNCH_GUIDE.md index fa847b2f..9156c04b 100644 --- a/docs/PILOT_LAUNCH_GUIDE.md +++ b/docs/PILOT_LAUNCH_GUIDE.md @@ -2,27 +2,29 @@ **Complete guide for deploying to production for a 10-tenant pilot program** -**Last Updated:** 2026-01-07 +**Last Updated:** 2026-01-11 **Target Environment:** clouding.io VPS with MicroK8s **Estimated Cost:** €41-81/month -**Time to Deploy:** 2-4 hours (first time) +**Time to Deploy:** 3-5 hours (first time, including fixes) +**Status:** ⚠️ REQUIRES PRE-DEPLOYMENT FIXES - See [Production VPS Deployment Fixes](../PRODUCTION_VPS_DEPLOYMENT_FIXES.md) --- ## Table of Contents 1. [Executive Summary](#executive-summary) -2. [Pre-Launch Checklist](#pre-launch-checklist) -3. [VPS Provisioning](#vps-provisioning) -4. [Infrastructure Setup](#infrastructure-setup) -5. [Domain & DNS Configuration](#domain--dns-configuration) -6. [TLS/SSL Certificates](#tlsssl-certificates) -7. [Email & Communication Setup](#email--communication-setup) -8. [Kubernetes Deployment](#kubernetes-deployment) -9. [Configuration & Secrets](#configuration--secrets) -10. [Database Migrations](#database-migrations) -11. [Verification & Testing](#verification--testing) -12. [Post-Deployment](#post-deployment) +2. [⚠️ CRITICAL: Pre-Deployment Fixes](#critical-pre-deployment-fixes) +3. [Pre-Launch Checklist](#pre-launch-checklist) +4. [VPS Provisioning](#vps-provisioning) +5. [Infrastructure Setup](#infrastructure-setup) +6. [Domain & DNS Configuration](#domain--dns-configuration) +7. [TLS/SSL Certificates](#tlsssl-certificates) +8. [Email & Communication Setup](#email--communication-setup) +9. [Kubernetes Deployment](#kubernetes-deployment) +10. [Configuration & Secrets](#configuration--secrets) +11. [Database Migrations](#database-migrations) +12. [Verification & Testing](#verification--testing) +13. [Post-Deployment](#post-deployment) --- @@ -62,6 +64,162 @@ A complete multi-tenant SaaS platform with: --- +## ⚠️ CRITICAL: Pre-Deployment Configuration + +**READ THIS FIRST:** The Kubernetes configuration requires updates for secure production deployment. + +### 🔴 Configuration Status + +Your manifests need the following updates before deploying to production: + +### Required Configuration Changes + +#### 1. Remove imagePullSecrets (BLOCKING) +**Why:** Images are public/don't require authentication +**Impact if skipped:** All pods fail with ImagePullBackOff + +#### 2. Update Image Tags to Semantic Versions (BLOCKING) +**Why:** Using 'latest' causes non-deterministic deployments +**Impact if skipped:** Unpredictable behavior, impossible rollbacks + +#### 3. Fix SigNoz Namespace References (BLOCKING) - ✅ **ALREADY FIXED** +**Why:** SigNoz must be in bakery-ia namespace +**Impact if skipped:** Kustomize apply fails +**Status:** ✅ Fixed in latest commit + +#### 4. Generate Production Secrets (HIGH PRIORITY) +**Why:** Default secrets are placeholders and insecure +**Impact if skipped:** CRITICAL security vulnerability + +#### 5. Update Cert-Manager Email (HIGH PRIORITY) +**Why:** Receive Let's Encrypt renewal notifications +**Impact if skipped:** Won't receive SSL expiry warnings + +### ✅ Already Correct (No Changes Needed) + +- **Storage Class** - `microk8s-hostpath` is correct for MicroK8s +- **Domain Names** - `bakewise.ai` is your production domain +- **Service Types** - ClusterIP + Ingress is correct architecture +- **Network Policies** - Not required for single-namespace deployment +- **SigNoz Namespace** - ✅ Fixed to use bakery-ia namespace + +### Step-by-Step Configuration Script + +Run these commands on your **local machine** before deployment: + +```bash +# Navigate to repository root +cd /path/to/bakery-ia + +# ======================================== +# STEP 1: Remove imagePullSecrets +# ======================================== +echo "Step 1: Removing imagePullSecrets..." +chmod +x infrastructure/kubernetes/remove-imagepullsecrets.sh +./infrastructure/kubernetes/remove-imagepullsecrets.sh + +# Verify removal +grep -r "imagePullSecrets" infrastructure/kubernetes/base/ && \ + echo "⚠️ WARNING: Some files still have imagePullSecrets" || \ + echo "✅ imagePullSecrets removed" + +# ======================================== +# STEP 2: Update Image Tags +# ======================================== +echo -e "\nStep 2: Updating image tags..." +export VERSION="1.0.0" # Change this to your version +sed -i.bak "s/newTag: latest/newTag: v${VERSION}/g" infrastructure/kubernetes/overlays/prod/kustomization.yaml + +# Verify no 'latest' tags remain +grep "newTag:" infrastructure/kubernetes/overlays/prod/kustomization.yaml | grep "latest" && \ + echo "⚠️ WARNING: Some images still use 'latest'" || \ + echo "✅ All images now use version v${VERSION}" + +# ======================================== +# STEP 3: Generate Production Secrets +# ======================================== +echo -e "\nStep 3: Generating production secrets..." +echo "Copy these values to infrastructure/kubernetes/base/secrets.yaml" +echo "================================================================" + +# JWT and API secrets +echo -e "\n### JWT and API Keys ###" +export JWT_SECRET=$(openssl rand -base64 32) +export JWT_REFRESH_SECRET=$(openssl rand -base64 32) +export SERVICE_API_KEY=$(openssl rand -hex 32) + +echo "JWT_SECRET_KEY: $(echo -n $JWT_SECRET | base64)" +echo "JWT_REFRESH_SECRET_KEY: $(echo -n $JWT_REFRESH_SECRET | base64)" +echo "SERVICE_API_KEY: $(echo -n $SERVICE_API_KEY | base64)" + +# Database passwords +echo -e "\n### Database Passwords ###" +for db in auth tenant inventory sales orders procurement forecasting analytics notification monitoring users products recipes stock menu demo_session orchestrator cleanup; do + password=$(openssl rand -base64 24) + echo "${db^^}_DB_PASSWORD: $(echo -n $password | base64)" +done + +echo -e "\n================================================================" +echo "⚠️ SAVE THESE SECRETS SECURELY!" +echo "Update infrastructure/kubernetes/base/secrets.yaml with the values above" +echo "Press Enter when you've updated secrets.yaml..." +read + +# ======================================== +# STEP 4: Update Cert-Manager Email +# ======================================== +echo -e "\nStep 4: Updating cert-manager email..." +sed -i.bak 's/admin@bakery-ia.local/admin@bakewise.ai/g' \ + infrastructure/kubernetes/base/components/cert-manager/cluster-issuer-production.yaml + +grep "admin@bakewise.ai" infrastructure/kubernetes/base/components/cert-manager/cluster-issuer-production.yaml && \ + echo "✅ Cert-manager email updated" || \ + echo "⚠️ WARNING: Email not updated" + +# ======================================== +# FINAL VALIDATION +# ======================================== +echo -e "\n========================================" +echo "Pre-Deployment Configuration Complete!" +echo "========================================" +echo "" +echo "Validation Checklist:" +echo " ✅ imagePullSecrets removed" +echo " ✅ Image tags updated to v${VERSION}" +echo " ✅ SigNoz namespace fixed (bakery-ia)" +echo " ⚠️ Production secrets updated in secrets.yaml (manual verification required)" +echo " ✅ Cert-manager email updated" +echo "" +echo "Next: Copy manifests to VPS and begin deployment" +``` + +### Manual Verification + +After running the script above: + +1. **Verify secrets.yaml updated:** + ```bash + # Check that JWT_SECRET_KEY is not the placeholder + grep "JWT_SECRET_KEY" infrastructure/kubernetes/base/secrets.yaml + # Should NOT show the old placeholder value + ``` + +2. **Check image tags:** + ```bash + grep "newTag:" infrastructure/kubernetes/overlays/prod/kustomization.yaml + # All should show v1.0.0 (or your version), NOT 'latest' + ``` + +3. **Verify SigNoz namespace:** + ```bash + grep -A 3 "name: signoz" infrastructure/kubernetes/overlays/prod/kustomization.yaml + # All should show: namespace: bakery-ia + ``` + +**⏱️ Estimated Time:** 30-45 minutes + +--- + ## Pre-Launch Checklist ### Required Accounts & Services @@ -192,10 +350,15 @@ ls infrastructure/kubernetes/overlays/prod/ ### Step 1: Install MicroK8s +**Using MicroK8s for production VPS deployment on clouding.io** + ```bash # SSH into your VPS ssh root@$VPS_IP +# Update system +apt update && apt upgrade -y + # Install MicroK8s snap install microk8s --classic --channel=1.28/stable @@ -208,28 +371,45 @@ newgrp microk8s microk8s status --wait-ready ``` -### Step 2: Enable Required Add-ons +### Step 2: Enable Required MicroK8s Addons + +**All required components are available as MicroK8s addons:** ```bash -# Enable core add-ons -microk8s enable dns -microk8s enable hostpath-storage -microk8s enable ingress -microk8s enable cert-manager -microk8s enable metrics-server -microk8s enable rbac - -# Optional but recommended -microk8s enable prometheus # For monitoring -microk8s enable registry # If using local registry +# Enable core addons +microk8s enable dns # DNS resolution within cluster +microk8s enable hostpath-storage # Provides microk8s-hostpath storage class +microk8s enable ingress # Nginx ingress controller +microk8s enable cert-manager # Let's Encrypt SSL certificates +microk8s enable metrics-server # For HPA autoscaling +microk8s enable rbac # Role-based access control # Setup kubectl alias echo "alias kubectl='microk8s kubectl'" >> ~/.bashrc source ~/.bashrc -# Verify +# Verify all components are running kubectl get nodes +# Should show: Ready + +kubectl get storageclass +# Should show: microk8s-hostpath (default) + kubectl get pods -A +# Should show pods in: kube-system, ingress-nginx, cert-manager namespaces + +# Verify metrics-server is working +kubectl top nodes +# Should return CPU/Memory metrics +``` + +**Optional but Recommended:** +```bash +# Enable Prometheus for additional monitoring (optional) +microk8s enable prometheus + +# Enable registry if you want local image storage (optional) +microk8s enable registry ``` ### Step 3: Configure Firewall @@ -584,44 +764,106 @@ docker push YOUR_VPS_IP:32000/bakery/auth-service ### Step 2: Update Production Configuration -The production configuration is already set up for **bakewise.ai** domain: +**⚠️ CRITICAL:** The default configuration uses **bakewise.ai** domain. You MUST update this before deployment if using a different domain. + +#### Required Configuration Updates + +**Step 2.1: Remove imagePullSecrets** + +```bash +# On your local machine +cd bakery-ia + +# Remove imagePullSecrets from all deployment files +find infrastructure/kubernetes/base -name "*.yaml" -type f -exec sed -i.bak '/imagePullSecrets:/,+1d' {} \; + +# Verify removal +grep -r "imagePullSecrets" infrastructure/kubernetes/base/ +# Should return NO results +``` + +**Step 2.2: Update Image Tags (Use Semantic Versions)** + +```bash +# Edit kustomization.yaml to replace 'latest' with actual version +nano infrastructure/kubernetes/overlays/prod/kustomization.yaml + +# Find the images section (lines 163-196) and update: +# BEFORE: +# - name: bakery/auth-service +# newTag: latest +# AFTER: +# - name: bakery/auth-service +# newTag: v1.0.0 + +# Do this for ALL 22 services, or use this helper: +export VERSION="1.0.0" # Your version + +# Create a script to update all image tags +cat > /tmp/update-tags.sh <<'EOF' +#!/bin/bash +VERSION="${1:-1.0.0}" +sed -i "s/newTag: latest/newTag: v${VERSION}/g" infrastructure/kubernetes/overlays/prod/kustomization.yaml +EOF + +chmod +x /tmp/update-tags.sh +/tmp/update-tags.sh ${VERSION} + +# Verify no 'latest' tags remain +grep "newTag:" infrastructure/kubernetes/overlays/prod/kustomization.yaml | grep -c "latest" +# Should return: 0 +``` + +**Step 2.3: Fix SigNoz Namespace References** + +```bash +# Update SigNoz patches to use bakery-ia namespace instead of signoz +sed -i 's/namespace: signoz/namespace: bakery-ia/g' infrastructure/kubernetes/overlays/prod/kustomization.yaml + +# Verify changes (should show bakery-ia in all 3 patches) +grep -A 3 "name: signoz" infrastructure/kubernetes/overlays/prod/kustomization.yaml +``` + +**Step 2.4: Update Cert-Manager Email** + +```bash +# Update Let's Encrypt notification email to your production email +sed -i "s/admin@bakery-ia.local/admin@bakewise.ai/g" \ + infrastructure/kubernetes/base/components/cert-manager/cluster-issuer-production.yaml +``` + +**Step 2.5: Generate and Update Production Secrets** + +```bash +# Generate JWT secrets +export JWT_SECRET=$(openssl rand -base64 32) +export JWT_REFRESH_SECRET=$(openssl rand -base64 32) +export SERVICE_API_KEY=$(openssl rand -hex 32) + +# Display base64-encoded values for secrets.yaml +echo "=== JWT Secrets (copy these to secrets.yaml) ===" +echo "JWT_SECRET_KEY: $(echo -n $JWT_SECRET | base64)" +echo "JWT_REFRESH_SECRET_KEY: $(echo -n $JWT_REFRESH_SECRET | base64)" +echo "SERVICE_API_KEY: $(echo -n $SERVICE_API_KEY | base64)" +echo "" + +# Generate strong database passwords for all 18 databases +echo "=== Database Passwords (copy these to secrets.yaml) ===" +for db in auth tenant inventory sales orders procurement forecasting analytics notification monitoring users products recipes stock menu demo_session orchestrator cleanup; do + password=$(openssl rand -base64 24) + echo "${db}_DB_PASSWORD: $(echo -n $password | base64)" +done + +# Now manually update infrastructure/kubernetes/base/secrets.yaml with the generated values +nano infrastructure/kubernetes/base/secrets.yaml +``` **Production URLs:** - **Main Application:** https://bakewise.ai - **API Endpoints:** https://bakewise.ai/api/v1/... -- **Monitoring Dashboard:** https://monitoring.bakewise.ai/grafana -- **Prometheus:** https://monitoring.bakewise.ai/prometheus -- **SigNoz (Traces/Metrics/Logs):** https://monitoring.bakewise.ai/signoz +- **SigNoz (Monitoring):** https://monitoring.bakewise.ai/signoz - **AlertManager:** https://monitoring.bakewise.ai/alertmanager -```bash -# Verify the configuration is correct: -cat infrastructure/kubernetes/overlays/prod/prod-ingress.yaml | grep -A 3 "host:" - -# Expected output should show: -# - host: bakewise.ai -# - host: monitoring.bakewise.ai - -# Verify CORS configuration -cat infrastructure/kubernetes/overlays/prod/prod-configmap.yaml | grep CORS - -# Expected: CORS_ORIGINS: "https://bakewise.ai" -``` - -**If using a different domain**, update these files: -```bash -# 1. Update domain names -nano infrastructure/kubernetes/overlays/prod/prod-ingress.yaml -# Replace bakewise.ai with your domain - -# 2. Update ConfigMap -nano infrastructure/kubernetes/overlays/prod/prod-configmap.yaml -# Update CORS_ORIGINS - -# 3. Verify image names (if using custom registry) -nano infrastructure/kubernetes/overlays/prod/kustomization.yaml -``` - --- ## Configuration & Secrets @@ -681,34 +923,75 @@ echo -n "your-password-here" | base64 **CRITICAL:** Never commit real secrets to git! Use `.gitignore` for secrets files. -### Step 3: Apply Secrets +### Step 3: Apply Application Secrets ```bash -# Copy manifests to VPS -scp -r infrastructure/kubernetes user@YOUR_VPS_IP:~/ +# Copy manifests to VPS (from local machine) +scp -r infrastructure/kubernetes root@YOUR_VPS_IP:~/ # SSH to VPS -ssh user@YOUR_VPS_IP +ssh root@YOUR_VPS_IP -# Apply secrets -kubectl apply -f ~/infrastructure/kubernetes/base/secrets.yaml +# Apply application secrets +kubectl apply -f ~/infrastructure/kubernetes/base/secrets.yaml -n bakery-ia # Verify secrets created kubectl get secrets -n bakery-ia +# Should show multiple secrets including postgres-tls, redis-tls, app-secrets, etc. ``` --- ## Database Migrations -### Step 1: Deploy Databases +### Step 0: Deploy SigNoz Monitoring (BEFORE Application) + +**⚠️ CRITICAL:** SigNoz must be deployed BEFORE the application into the **bakery-ia namespace** because the production kustomization patches SigNoz resources. ```bash # On VPS -kubectl apply -k ~/kubernetes/overlays/prod +# 1. Ensure bakery-ia namespace exists +kubectl get namespace bakery-ia || kubectl create namespace bakery-ia + +# 2. Add Helm repo +helm repo add signoz https://charts.signoz.io +helm repo update + +# 3. Install SigNoz into bakery-ia namespace (NOT separate signoz namespace) +helm install signoz signoz/signoz \ + -n bakery-ia \ + --set frontend.service.type=ClusterIP \ + --set clickhouse.persistence.size=20Gi \ + --set clickhouse.persistence.storageClass=microk8s-hostpath + +# 4. Wait for SigNoz to be ready (this may take 10-15 minutes) +kubectl wait --for=condition=ready pod \ + -l app.kubernetes.io/instance=signoz \ + -n bakery-ia \ + --timeout=900s + +# 5. Verify SigNoz components running in bakery-ia namespace +kubectl get pods -n bakery-ia -l app.kubernetes.io/instance=signoz +# Should show: signoz-0, signoz-otel-collector, signoz-clickhouse, signoz-zookeeper, signoz-alertmanager + +# 6. Verify StatefulSets exist (kustomization will patch these) +kubectl get statefulset -n bakery-ia | grep signoz +# Should show: signoz, signoz-clickhouse +``` + +**⚠️ Important:** Do NOT create a separate `signoz` namespace. SigNoz must be in `bakery-ia` namespace for the overlays to work correctly. + +### Step 1: Deploy Application and Databases + +```bash +# On VPS +kubectl apply -k ~/infrastructure/kubernetes/overlays/prod # Wait for databases to be ready (5-10 minutes) -kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=database -n bakery-ia --timeout=600s +kubectl wait --for=condition=ready pod \ + -l app.kubernetes.io/component=database \ + -n bakery-ia \ + --timeout=600s # Check status kubectl get pods -n bakery-ia -l app.kubernetes.io/component=database @@ -1503,18 +1786,41 @@ kubectl scale deployment monitoring -n bakery-ia --replicas=0 ## Summary Checklist -Before going live, ensure: +### Pre-Deployment Configuration (LOCAL MACHINE) +- [ ] **imagePullSecrets removed** - Deleted from all 67 manifests +- [ ] **Image tags updated** - Changed all 'latest' to v1.0.0 (semantic version) +- [ ] **SigNoz namespace fixed** - ✅ Already done (bakery-ia namespace) +- [ ] **Production secrets generated** - JWT, database passwords, API keys +- [ ] **secrets.yaml updated** - Replaced all placeholder values +- [ ] **Cert-manager email updated** - admin@bakewise.ai +- [ ] **Manifests validated** - No 'latest' tags, no imagePullSecrets remaining +### Infrastructure Setup - [ ] VPS provisioned and accessible -- [ ] MicroK8s installed and configured +- [ ] k3s (or Kubernetes) installed and configured +- [ ] nginx-ingress-controller installed +- [ ] metrics-server installed and working +- [ ] cert-manager installed +- [ ] local-path-provisioner installed - [ ] Domain registered and DNS configured -- [ ] Cloudflare protection enabled -- [ ] TLS certificates generated +- [ ] Cloudflare protection enabled (optional but recommended) + +### Secrets and Configuration +- [ ] TLS certificates generated (postgres, redis) - [ ] Email service configured and tested - [ ] WhatsApp API setup (optional for launch) -- [ ] Container images built and pushed -- [ ] Production configs updated (domains, CORS, etc.) -- [ ] Secrets generated (strong passwords!) +- [ ] Container images built and pushed with version tags +- [ ] Production configs verified (domains, CORS, storage class) +- [ ] Strong passwords generated for all services +- [ ] Docker registry secret created (dockerhub-creds) +- [ ] Application secrets applied + +### Monitoring +- [ ] SigNoz deployed via Helm +- [ ] SigNoz pods running and healthy +- [ ] signoz namespace created + +### Application Deployment - [ ] All pods running successfully - [ ] Databases accepting TLS connections - [ ] Let's Encrypt certificates issued @@ -1522,11 +1828,15 @@ Before going live, ensure: - [ ] API health check passing - [ ] Test user can login - [ ] Email delivery working -- [ ] Monitoring dashboards loading +- [ ] SigNoz monitoring accessible +- [ ] Metrics flowing to SigNoz + +### Post-Deployment - [ ] Backups configured and tested - [ ] Team trained on operations - [ ] Documentation complete - [ ] Emergency procedures documented +- [ ] Monitoring alerts configured --- @@ -1537,6 +1847,16 @@ Before going live, ensure: --- -**Document Version:** 1.0 -**Last Updated:** 2026-01-07 +**Document Version:** 2.0 +**Last Updated:** 2026-01-11 **Maintained By:** DevOps Team +**Changes in v2.0:** +- Added critical pre-deployment fixes section +- Updated infrastructure setup for k3s instead of MicroK8s +- Added required component installation (nginx-ingress, metrics-server, etc.) +- Updated configuration steps with domain replacement +- Added Docker registry secret creation +- Added SigNoz Helm deployment before application +- Updated storage class configuration +- Added image tag version requirements +- Expanded verification checklist diff --git a/infrastructure/kubernetes/overlays/prod/kustomization.yaml b/infrastructure/kubernetes/overlays/prod/kustomization.yaml index 6acc7a0c..43ebe17c 100644 --- a/infrastructure/kubernetes/overlays/prod/kustomization.yaml +++ b/infrastructure/kubernetes/overlays/prod/kustomization.yaml @@ -105,7 +105,7 @@ patches: version: v1 kind: StatefulSet name: signoz-clickhouse - namespace: signoz + namespace: bakery-ia patch: |- - op: replace path: /spec/replicas @@ -125,7 +125,7 @@ patches: version: v1 kind: StatefulSet name: signoz - namespace: signoz + namespace: bakery-ia patch: |- - op: replace path: /spec/replicas @@ -145,7 +145,7 @@ patches: version: v1 kind: Deployment name: signoz-alertmanager - namespace: signoz + namespace: bakery-ia patch: |- - op: replace path: /spec/replicas diff --git a/infrastructure/kubernetes/remove-imagepullsecrets.sh b/infrastructure/kubernetes/remove-imagepullsecrets.sh new file mode 100755 index 00000000..560bbe0b --- /dev/null +++ b/infrastructure/kubernetes/remove-imagepullsecrets.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Script to remove imagePullSecrets from all Kubernetes manifests +# Run this from the repository root: ./infrastructure/kubernetes/remove-imagepullsecrets.sh + +echo "Removing imagePullSecrets from all Kubernetes manifests..." + +# Find all YAML files in base directory and remove imagePullSecrets +find infrastructure/kubernetes/base -name "*.yaml" -type f | while read file; do + # Create backup + cp "$file" "$file.bak" + + # Remove imagePullSecrets and the following line (name: dockerhub-creds) + sed -i '/imagePullSecrets:/,+1d' "$file" + + echo "Processed: $file" +done + +echo "" +echo "✅ Done! Removed imagePullSecrets from all manifests" +echo "Backup files created with .bak extension" +echo "" +echo "Verify removal:" +grep -r "imagePullSecrets" infrastructure/kubernetes/base/ && echo "⚠️ WARNING: Some files still contain imagePullSecrets" || echo "✅ All imagePullSecrets removed successfully"