Add new infra architecture 13

This commit is contained in:
Urtzi Alfaro
2026-01-21 23:16:19 +01:00
parent 66dfd50fbc
commit aeff6b1537
22 changed files with 552 additions and 151 deletions

View File

@@ -427,15 +427,41 @@ kubectl get namespaces
# kubectl apply -f infrastructure/namespaces/tekton-pipelines.yaml
```
### Step 3.2: Deploy Cert-Manager ClusterIssuers
### Step 3.2: Install Cert-Manager and Deploy ClusterIssuers
> **Note:** The MicroK8s `cert-manager` addon may only create the namespace without installing the actual components. Install cert-manager manually to ensure it works correctly.
```bash
# Apply cert-manager configuration
kubectl apply -k infrastructure/platform/cert-manager/
# Check if cert-manager pods exist
kubectl get pods -n cert-manager
# If no pods are running, install cert-manager manually:
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.4/cert-manager.yaml
# Wait for all cert-manager pods to be ready (this may take 1-2 minutes)
kubectl wait --for=condition=ready pod --all -n cert-manager --timeout=300s
# Verify all 3 components are running
kubectl get pods -n cert-manager
# Expected output:
# NAME READY STATUS RESTARTS AGE
# cert-manager-xxxxxxxxxx-xxxxx 1/1 Running 0 1m
# cert-manager-cainjector-xxxxxxxxxx-xxxxx 1/1 Running 0 1m
# cert-manager-webhook-xxxxxxxxxx-xxxxx 1/1 Running 0 1m
```
**Deploy ClusterIssuers:**
```bash
# Wait for webhook to be fully initialized
sleep 10
# Apply ClusterIssuers for Let's Encrypt
kubectl apply -f infrastructure/platform/cert-manager/cluster-issuer-staging.yaml
kubectl apply -f infrastructure/platform/cert-manager/cluster-issuer-production.yaml
# Verify ClusterIssuers are ready
kubectl get clusterissuer
kubectl describe clusterissuer letsencrypt-production
# Expected output:
# NAME READY AGE
@@ -443,6 +469,16 @@ kubectl describe clusterissuer letsencrypt-production
# letsencrypt-staging True 1m
```
**If you get webhook errors:**
```bash
# The webhook may need more time to initialize
# Wait and retry:
sleep 30
kubectl apply -f infrastructure/platform/cert-manager/cluster-issuer-staging.yaml
kubectl apply -f infrastructure/platform/cert-manager/cluster-issuer-production.yaml
```
> **Note:** Common configs (secrets, configmaps) and TLS secrets are automatically included when you apply the prod kustomization in Phase 6. No manual application needed.
---
@@ -551,13 +587,17 @@ kubectl wait --for=condition=ready pod -l app.kubernetes.io/part-of=tekton-trigg
# Verify Tekton is installed
kubectl get pods -n tekton-pipelines
# Step 3: Get Gitea password and generate webhook token
# Step 3: Create flux-system namespace (required by Tekton helm chart)
# The Tekton chart creates a secret for Flux in this namespace
kubectl create namespace flux-system --dry-run=client -o yaml | kubectl apply -f -
# Step 4: Get Gitea password and generate webhook token
export GITEA_ADMIN_PASSWORD=$(kubectl get secret gitea-admin-secret -n gitea -o jsonpath='{.data.password}' | base64 -d)
export TEKTON_WEBHOOK_TOKEN=$(openssl rand -hex 32)
echo "Tekton Webhook Token: $TEKTON_WEBHOOK_TOKEN"
echo "⚠️ SAVE THIS TOKEN - needed to configure Gitea webhook!"
# Step 4: Deploy Bakery-IA CI/CD pipelines and tasks
# Step 5: Deploy Bakery-IA CI/CD pipelines and tasks
helm upgrade --install tekton-cicd infrastructure/cicd/tekton-helm \
-n tekton-pipelines \
-f infrastructure/cicd/tekton-helm/values.yaml \
@@ -617,13 +657,38 @@ flux get kustomizations -n flux-system
## Phase 5: Pre-Pull and Push Base Images to Gitea Registry
> **Critical Step:** This phase must be completed after Gitea is configured (Phase 5) and before deploying application services (Phase 6). It ensures all required base images are available in the Gitea registry.
> **Critical Step:** This phase must be completed after Gitea is configured (Phase 4) and before deploying application services (Phase 6). It ensures all required base images are available in the Gitea registry.
### Overview
This phase involves two main steps:
1. **Step 5.6.1-5.6.4:** Pre-pull base images from Docker Hub and push them to Gitea registry
2. **Step 5.6.5:** Build and push all service images (first-time deployment only)
1. **Step 5.1-5.4:** Pre-pull base images from Docker Hub and push them to Gitea registry
2. **Step 5.5:** Build and push all service images (first-time deployment only)
### Prerequisites: Install Docker and Create kubectl Symlink
> **Important:** MicroK8s uses containerd, not Docker. You need to install Docker separately for building and pushing images. Also, scripts need `kubectl` to be available in PATH.
```bash
# Step 1: Install Docker
apt-get update
apt-get install -y docker.io
# Start and enable Docker service
systemctl enable docker
systemctl start docker
# Verify Docker installation
docker --version
# Expected: Docker version 28.x.x or similar
# Step 2: Create kubectl symlink (required for scripts)
# MicroK8s bundles its own kubectl, but scripts need it in PATH
sudo ln -sf /snap/microk8s/current/microk8s-kubectl.wrapper /usr/local/bin/kubectl
# Verify kubectl works
kubectl version --client
```
### Base Images Required
@@ -652,8 +717,8 @@ cd /root/bakery-ia/scripts
chmod +x prepull-base-images-for-prod.sh
# Run the prepull script in production mode WITH push enabled
# IMPORTANT: Use --push-images flag to push to Gitea registry
./prepull-base-images-for-prod.sh -e prod --push-images
# IMPORTANT: Use -r flag to specify the external registry URL
./prepull-base-images-for-prod.sh -e prod --push-images -r registry.bakewise.ai
# The script will:
# 1. Authenticate with Docker Hub (uses embedded credentials or env vars)
@@ -937,6 +1002,27 @@ kubectl wait --for=condition=available --timeout=900s deployment --all -n bakery
# Monitor deployment progress
kubectl get pods -n bakery-ia --watch
# if fails
# From your Mac
rsync -avz --progress --delete \
--exclude='.git' \
--exclude='node_modules' \
--exclude='__pycache__' \
--exclude='.venv' \
/Users/urtzialfaro/Documents/bakery-ia/ \
bakery-vps:/root/bakery-ia/
# On the VPS
kubectl delete deployments --all -n bakery-ia
kubectl delete jobs --all -n bakery-ia
kubectl delete statefulsets --all -n bakery-ia
sleep 30
kubectl apply -k infrastructure/environments/prod/k8s-manifests
kubectl get pods -n bakery-ia -w
kubectl get pods -n bakery-ia
kubectl describe node | grep -A 10 "Allocated resources"
```
### Step 6.3: Verify Application Health

View File

@@ -1983,10 +1983,10 @@ The CI/CD infrastructure has been configured with production security in mind:
- Minimum 16-character password requirement
- Password hidden from terminal output
2. **Internal Cluster Communication**
- All CI/CD components communicate via internal cluster DNS
- GitOps updates use `gitea-http.gitea.svc.cluster.local:3000`
- No hardcoded external URLs in pipeline tasks
2. **Registry Communication**
- Git operations (clone, push) use internal cluster DNS: `gitea-http.gitea.svc.cluster.local:3000`
- Image references use external HTTPS URL: `registry.bakewise.ai` (containerd requires HTTPS for auth)
- This ensures image pulls work correctly while git operations stay internal
3. **Credential Isolation**
- Secrets are passed via `--set` flags, never committed to git

View File

@@ -11,36 +11,29 @@ ingress:
className: nginx
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
nginx.ingress.kubernetes.io/proxy-body-size: "2G"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
cert-manager.io/cluster-issuer: "letsencrypt-production"
hosts:
- host: gitea.bakewise.ai
paths:
- path: /
pathType: Prefix
- host: registry.bakewise.ai
paths:
- path: /
pathType: Prefix
tls:
- secretName: gitea-tls-cert
hosts:
- gitea.bakewise.ai
apiIngress:
enabled: true
className: nginx
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
cert-manager.io/cluster-issuer: "letsencrypt-production"
hosts:
- host: registry.bakewise.ai
paths:
- path: /
pathType: Prefix
tls:
- secretName: registry-tls-cert
hosts:
- registry.bakewise.ai
- registry.bakewise.ai
# NOTE: The Gitea Helm chart (v12.4.0) does not natively support separate registry ingress.
# For registry access, we include registry.bakewise.ai in the main ingress above.
# This works because Gitea serves both UI and registry on the same port (3000).
gitea:
admin:
@@ -62,4 +55,4 @@ resources:
# Larger storage for production
persistence:
size: 50Gi
size: 50Gi

View File

@@ -32,7 +32,7 @@ ingress:
className: nginx
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
nginx.ingress.kubernetes.io/proxy-body-size: "2G"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"

View File

@@ -29,7 +29,7 @@ spec:
- name: base-registry
type: string
description: Base image registry URL (e.g., docker.io, ghcr.io/org)
default: "gitea-http.gitea.svc.cluster.local:3000/bakery-admin"
default: "registry.bakewise.ai/bakery-admin"
- name: python-image
type: string
description: Python base image name and tag

View File

@@ -23,7 +23,7 @@ spec:
default: "false"
steps:
- name: run-unit-tests
image: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/python_3.11-slim:latest
image: registry.bakewise.ai/bakery-admin/python:3.11-slim
workingDir: $(workspaces.source.path)
script: |
#!/bin/bash
@@ -57,7 +57,7 @@ spec:
cpu: 200m
memory: 512Mi
- name: run-integration-tests
image: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/python_3.11-slim:latest
image: registry.bakewise.ai/bakery-admin/python:3.11-slim
workingDir: $(workspaces.source.path)
script: |
#!/bin/bash

View File

@@ -16,6 +16,11 @@
# Global settings for production
global:
# Registry configuration - use external HTTPS URL for image references
# containerd/Docker requires HTTPS for authenticated registries
registry:
url: "registry.bakewise.ai/bakery-admin"
# Git configuration
git:
userEmail: "ci@bakewise.ai"
@@ -25,6 +30,8 @@ pipeline:
# Build configuration
build:
verbosity: "warn" # Less verbose in production
# Use external registry URL for base images (HTTPS required)
baseRegistry: "registry.bakewise.ai/bakery-admin"
# Test configuration
test:
@@ -72,7 +79,9 @@ secrets:
registry:
username: "bakery-admin"
password: "" # MUST be set via --set flag
registryUrl: "gitea-http.gitea.svc.cluster.local:3000"
# Use external HTTPS URL for image references (containerd requires HTTPS for auth)
# Kaniko can still push via HTTP internally, but image refs must use HTTPS
registryUrl: "registry.bakewise.ai"
# Git credentials for GitOps updates
# Override with: --set secrets.git.password=$GITEA_ADMIN_PASSWORD

View File

@@ -4,8 +4,9 @@
# Global settings
global:
# Registry configuration
# NOTE: Use external HTTPS URL - containerd requires HTTPS for authenticated registries
registry:
url: "gitea-http.gitea.svc.cluster.local:3000/bakery-admin"
url: "registry.bakewise.ai/bakery-admin"
# Git configuration
git:
@@ -20,10 +21,9 @@ pipeline:
cacheTTL: "24h"
verbosity: "info"
# Base image registry configuration
# For dev: localhost:5000 with python_3.11-slim
# For prod: gitea registry with python_3.11-slim
baseRegistry: "gitea-http.gitea.svc.cluster.local:3000/bakery-admin"
pythonImage: "python_3.11-slim"
# NOTE: Use external HTTPS URL - containerd requires HTTPS for authenticated registries
baseRegistry: "registry.bakewise.ai/bakery-admin"
pythonImage: "python:3.11-slim"
# Test configuration
test:
@@ -74,10 +74,11 @@ secrets:
# Registry credentials for pushing images
# Uses the same credentials as Gitea admin for consistency
# NOTE: Use external HTTPS URL - containerd requires HTTPS for authenticated registries
registry:
username: "bakery-admin"
password: "" # Will be populated from gitea-admin-secret
registryUrl: "gitea-http.gitea.svc.cluster.local:3000"
registryUrl: "registry.bakewise.ai"
# Git credentials for GitOps updates
# Uses the same credentials as Gitea admin for consistency

View File

@@ -207,141 +207,160 @@ patches:
images:
# Application services
- name: bakery/auth-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/auth-service
newName: registry.bakewise.ai/bakery-admin/auth-service
newTag: latest
- name: bakery/tenant-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/tenant-service
newName: registry.bakewise.ai/bakery-admin/tenant-service
newTag: latest
- name: bakery/training-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/training-service
newName: registry.bakewise.ai/bakery-admin/training-service
newTag: latest
- name: bakery/forecasting-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/forecasting-service
newName: registry.bakewise.ai/bakery-admin/forecasting-service
newTag: latest
- name: bakery/sales-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/sales-service
newName: registry.bakewise.ai/bakery-admin/sales-service
newTag: latest
- name: bakery/external-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/external-service
newName: registry.bakewise.ai/bakery-admin/external-service
newTag: latest
- name: bakery/notification-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/notification-service
newName: registry.bakewise.ai/bakery-admin/notification-service
newTag: latest
- name: bakery/inventory-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/inventory-service
newName: registry.bakewise.ai/bakery-admin/inventory-service
newTag: latest
- name: bakery/recipes-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/recipes-service
newName: registry.bakewise.ai/bakery-admin/recipes-service
newTag: latest
- name: bakery/suppliers-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/suppliers-service
newName: registry.bakewise.ai/bakery-admin/suppliers-service
newTag: latest
- name: bakery/pos-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/pos-service
newName: registry.bakewise.ai/bakery-admin/pos-service
newTag: latest
- name: bakery/orders-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/orders-service
newName: registry.bakewise.ai/bakery-admin/orders-service
newTag: latest
- name: bakery/production-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/production-service
newName: registry.bakewise.ai/bakery-admin/production-service
newTag: latest
- name: bakery/alert-processor
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/alert-processor
newName: registry.bakewise.ai/bakery-admin/alert-processor
newTag: latest
- name: bakery/gateway
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/gateway
newName: registry.bakewise.ai/bakery-admin/gateway
newTag: latest
- name: bakery/dashboard
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/dashboard
newName: registry.bakewise.ai/bakery-admin/dashboard
newTag: latest
# Missing services (added to fix ImagePullBackOff errors)
- name: bakery/ai-insights-service
newName: registry.bakewise.ai/bakery-admin/ai-insights-service
newTag: latest
- name: bakery/demo-session-service
newName: registry.bakewise.ai/bakery-admin/demo-session-service
newTag: latest
- name: bakery/distribution-service
newName: registry.bakewise.ai/bakery-admin/distribution-service
newTag: latest
- name: bakery/orchestrator-service
newName: registry.bakewise.ai/bakery-admin/orchestrator-service
newTag: latest
- name: bakery/procurement-service
newName: registry.bakewise.ai/bakery-admin/procurement-service
newTag: latest
# =============================================================================
# Database images (cached in gitea registry for consistency)
- name: postgres
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/postgres
newName: registry.bakewise.ai/bakery-admin/postgres
newTag: "17-alpine"
- name: redis
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/redis
newName: registry.bakewise.ai/bakery-admin/redis
newTag: "7.4-alpine"
- name: rabbitmq
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/rabbitmq
newName: registry.bakewise.ai/bakery-admin/rabbitmq
newTag: "4.1-management-alpine"
# Utility images
- name: busybox
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/busybox
newName: registry.bakewise.ai/bakery-admin/busybox
newTag: "1.36"
- name: curlimages/curl
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/curlimages-curl
newName: registry.bakewise.ai/bakery-admin/curlimages_curl
newTag: latest
- name: bitnami/kubectl
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/bitnami-kubectl
newName: registry.bakewise.ai/bakery-admin/bitnami_kubectl
newTag: latest
# Alpine variants
- name: alpine
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/alpine
newName: registry.bakewise.ai/bakery-admin/alpine
newTag: "3.19"
- name: alpine/git
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/alpine-git
newName: registry.bakewise.ai/bakery-admin/alpine_git
newTag: 2.43.0
# CI/CD images (cached in gitea registry for consistency)
- name: gcr.io/kaniko-project/executor
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/gcr.io-kaniko-project-executor
newName: registry.bakewise.ai/bakery-admin/gcr.io_kaniko-project_executor
newTag: v1.23.0
- name: gcr.io/go-containerregistry/crane
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/gcr.io-go-containerregistry-crane
newName: registry.bakewise.ai/bakery-admin/gcr.io_go-containerregistry_crane
newTag: latest
- name: registry.k8s.io/kustomize/kustomize
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/registry.k8s.io-kustomize-kustomize
newName: registry.bakewise.ai/bakery-admin/registry.k8s.io_kustomize_kustomize
newTag: v5.3.0
# Storage images
- name: minio/minio
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/minio-minio
newName: registry.bakewise.ai/bakery-admin/minio_minio
newTag: RELEASE.2024-11-07T00-52-20Z
- name: minio/mc
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/minio-mc
newName: registry.bakewise.ai/bakery-admin/minio_mc
newTag: RELEASE.2024-11-17T19-35-25Z
# NOTE: nominatim image override removed - nominatim is now deployed via Helm
# Python base image
- name: python
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/python
newName: registry.bakewise.ai/bakery-admin/python
newTag: 3.11-slim
# Replica counts for single-node VPS deployment (8 CPU cores)
# Set to 1 replica per service to fit resource constraints
# Scale up when adding more nodes to the cluster
replicas:
- name: auth-service
count: 3
count: 1
- name: tenant-service
count: 2
count: 1
- name: training-service
count: 3 # Safe with MinIO storage - no PVC conflicts
count: 1
- name: forecasting-service
count: 3
count: 1
- name: sales-service
count: 2
count: 1
- name: external-service
count: 2
count: 1
- name: notification-service
count: 3
count: 1
- name: inventory-service
count: 2
count: 1
- name: recipes-service
count: 2
count: 1
- name: suppliers-service
count: 2
count: 1
- name: pos-service
count: 2
count: 1
- name: orders-service
count: 3
count: 1
- name: production-service
count: 2
count: 1
- name: alert-processor
count: 3
count: 1
- name: procurement-service
count: 2
count: 1
- name: orchestrator-service
count: 2
count: 1
- name: ai-insights-service
count: 2
count: 1
- name: gateway
count: 3
count: 1
- name: frontend
count: 2
count: 1

View File

@@ -49,6 +49,9 @@ spec:
- secretRef:
name: whatsapp-secrets
env:
# Gateway doesn't use a database but base config requires this for production validation
- name: DATABASE_URL
value: "postgresql://gateway:unused@localhost:5432/unused"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
valueFrom:
configMapKeyRef:

View File

@@ -119,9 +119,21 @@ fi
# Wait for Unbound to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=unbound -n "$NAMESPACE" --timeout=120s
# Get Unbound service IP
UNBOUND_IP=$(kubectl get svc unbound-dns -n "$NAMESPACE" -o jsonpath='{.spec.clusterIP}')
echo "Unbound DNS service IP: $UNBOUND_IP"
# Get Unbound service IP (dynamic resolution)
echo "Waiting for Unbound service to get assigned IP..."
for i in {1..30}; do
UNBOUND_IP=$(kubectl get svc unbound-dns -n "$NAMESPACE" -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [ -n "$UNBOUND_IP" ] && [ "$UNBOUND_IP" != "<none>" ]; then
echo "Unbound DNS service IP: $UNBOUND_IP"
break
fi
if [ $i -eq 30 ]; then
print_error "Failed to get Unbound service IP"
exit 1
fi
sleep 2
echo "Waiting for Unbound service IP... (attempt $i/30)"
done
# =============================================================================
# Step 2: Configure CoreDNS to Forward to Unbound
@@ -134,12 +146,43 @@ CURRENT_FORWARD=$(kubectl get configmap coredns -n kube-system -o jsonpath='{.da
if [ "$CURRENT_FORWARD" != "$UNBOUND_IP" ]; then
echo "Updating CoreDNS to forward to Unbound ($UNBOUND_IP)..."
# Create a temporary file with the CoreDNS configuration
TEMP_COREFILE=$(mktemp)
cat > "$TEMP_COREFILE" <<EOF
.:53 {
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
forward . $UNBOUND_IP {
max_concurrent 1000
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
EOF
# Apply the configuration
kubectl patch configmap coredns -n kube-system --type merge -p "{
\"data\": {
\"Corefile\": \".:53 {\\n errors\\n health {\\n lameduck 5s\\n }\\n ready\\n kubernetes cluster.local in-addr.arpa ip6.arpa {\\n pods insecure\\n fallthrough in-addr.arpa ip6.arpa\\n ttl 30\\n }\\n prometheus :9153\\n forward . $UNBOUND_IP {\\n max_concurrent 1000\\n }\\n cache 30 {\\n disable success cluster.local\\n disable denial cluster.local\\n }\\n loop\\n reload\\n loadbalance\\n}\\n\"
\"Corefile\": \"$(cat "$TEMP_COREFILE" | sed 's/\\/\\\\/g' | sed ':a;N;$!ba;s/\n/\\\\n/g')\"
}
}"
rm -f "$TEMP_COREFILE"
# Restart CoreDNS
kubectl rollout restart deployment coredns -n kube-system
kubectl rollout status deployment coredns -n kube-system --timeout=60s
@@ -208,13 +251,19 @@ print_step "Step 5: Deploying Mailu via Helm..."
helm repo add mailu https://mailu.github.io/helm-charts 2>/dev/null || true
helm repo update mailu
# Deploy Mailu
# Create temporary values file with dynamic DNS server
TEMP_VALUES=$(mktemp)
cat "$MAILU_HELM_DIR/values.yaml" | sed "s/# custom_dns_servers: \"\" # Will be set dynamically by deployment script/custom_dns_servers: \"$UNBOUND_IP\"/" > "$TEMP_VALUES"
# Deploy Mailu with dynamic DNS configuration
helm upgrade --install mailu mailu/mailu \
-n "$NAMESPACE" \
-f "$MAILU_HELM_DIR/values.yaml" \
-f "$TEMP_VALUES" \
-f "$MAILU_HELM_DIR/prod/values.yaml" \
--timeout 10m
rm -f "$TEMP_VALUES"
print_success "Mailu Helm release deployed (admin user will be created automatically)"
# =============================================================================

View File

@@ -0,0 +1,209 @@
#!/bin/bash
# =============================================================================
# Phase 7: Deploy Optional Services - Fixed Version
# =============================================================================
# This script deploys the optional services for production:
# 1. Unbound DNS (with dynamic IP resolution)
# 2. CoreDNS configuration for DNSSEC
# 3. Mailu Email Server
# 4. SigNoz Monitoring
#
# Fixed issues:
# - Removed static ClusterIP that caused CIDR range conflicts
# - Implemented dynamic IP resolution for Unbound DNS
# - Updated CoreDNS patching to use dynamic IP
# - Updated Mailu configuration to use dynamic DNS server
# =============================================================================
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
NAMESPACE="bakery-ia"
DOMAIN="bakewise.ai"
print_step() {
echo -e "\n${BLUE}==>${NC} ${GREEN}$1${NC}"
}
print_error() {
echo -e "${RED}ERROR:${NC} $1"
}
print_success() {
echo -e "${GREEN}${NC} $1"
}
# =============================================================================
# Step 7.1: Deploy Unbound DNS (with dynamic IP)
# =============================================================================
print_step "Step 7.1: Deploying Unbound DNS resolver (dynamic IP)..."
if kubectl get deployment unbound -n "$NAMESPACE" &>/dev/null; then
print_success "Unbound already deployed"
else
helm upgrade --install unbound infrastructure/platform/networking/dns/unbound-helm \
-n "$NAMESPACE" \
-f infrastructure/platform/networking/dns/unbound-helm/values.yaml \
-f infrastructure/platform/networking/dns/unbound-helm/prod/values.yaml \
--timeout 5m \
--wait
print_success "Unbound deployed"
fi
# Wait for Unbound service to get assigned IP
print_step "Waiting for Unbound service to get assigned IP..."
for i in {1..30}; do
UNBOUND_IP=$(kubectl get svc unbound-dns -n "$NAMESPACE" -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [ -n "$UNBOUND_IP" ] && [ "$UNBOUND_IP" != "<none>" ]; then
echo "Unbound DNS service IP: $UNBOUND_IP"
break
fi
if [ $i -eq 30 ]; then
print_error "Failed to get Unbound service IP"
exit 1
fi
sleep 2
echo "Waiting for Unbound service IP... (attempt $i/30)"
done
# =============================================================================
# Step 7.2: Configure CoreDNS for DNSSEC (dynamic IP)
# =============================================================================
print_step "Step 7.2: Configuring CoreDNS for DNSSEC validation..."
# Check current CoreDNS forward configuration
CURRENT_FORWARD=$(kubectl get configmap coredns -n kube-system -o jsonpath='{.data.Corefile}' | grep -o 'forward \. [0-9.]*' | awk '{print $3}' || echo "")
if [ "$CURRENT_FORWARD" != "$UNBOUND_IP" ]; then
echo "Updating CoreDNS to forward to Unbound ($UNBOUND_IP)..."
# Create a temporary file with the CoreDNS configuration
TEMP_COREFILE=$(mktemp)
cat > "$TEMP_COREFILE" <<EOF
.:53 {
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
forward . $UNBOUND_IP {
max_concurrent 1000
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
EOF
# Apply the configuration
kubectl patch configmap coredns -n kube-system --type merge -p "{
\"data\": {
\"Corefile\": \"$(cat "$TEMP_COREFILE" | sed 's/\\/\\\\/g' | sed ':a;N;$!ba;s/\n/\\\\n/g')\"
}
}"
rm -f "$TEMP_COREFILE"
# Restart CoreDNS
kubectl rollout restart deployment coredns -n kube-system
kubectl rollout status deployment coredns -n kube-system --timeout=60s
print_success "CoreDNS configured to forward to Unbound"
else
print_success "CoreDNS already configured for Unbound"
fi
# =============================================================================
# Step 7.3: Deploy Mailu Email Server (dynamic DNS)
# =============================================================================
print_step "Step 7.3: Deploying Mailu Email Server..."
# Add Mailu Helm repository
helm repo add mailu https://mailu.github.io/helm-charts 2>/dev/null || true
helm repo update mailu
# Create temporary values file with dynamic DNS server
TEMP_VALUES=$(mktemp)
cat infrastructure/platform/mail/mailu-helm/values.yaml | sed "s/# custom_dns_servers: \"\" # Will be set dynamically by deployment script/custom_dns_servers: \"$UNBOUND_IP\"/" > "$TEMP_VALUES"
# Deploy Mailu with dynamic DNS configuration
helm upgrade --install mailu mailu/mailu \
-n "$NAMESPACE" \
-f "$TEMP_VALUES" \
-f infrastructure/platform/mail/mailu-helm/prod/values.yaml \
--timeout 10m
rm -f "$TEMP_VALUES"
print_success "Mailu Helm release deployed"
# Wait for Mailu pods to be ready
echo "Waiting for Mailu pods to be ready (this may take 5-10 minutes)..."
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=admin -n "$NAMESPACE" --timeout=300s || {
print_error "Admin pod failed to start. Checking logs..."
kubectl logs -n "$NAMESPACE" -l app.kubernetes.io/component=admin --tail=50
exit 1
}
print_success "Mailu deployment completed"
# =============================================================================
# Step 7.4: Deploy SigNoz Monitoring
# =============================================================================
print_step "Step 7.4: Deploying SigNoz Monitoring..."
# Add SigNoz Helm repository
helm repo add signoz https://charts.signoz.io 2>/dev/null || true
helm repo update
# Install SigNoz
helm install signoz signoz/signoz \
-n "$NAMESPACE" \
-f infrastructure/monitoring/signoz/signoz-values-prod.yaml \
--set global.storageClass="microk8s-hostpath" \
--set clickhouse.persistence.enabled=true \
--set clickhouse.persistence.size=50Gi \
--timeout 15m
# Wait for SigNoz to be ready
kubectl wait --for=condition=available --timeout=600s deployment/signoz-frontend -n "$NAMESPACE"
print_success "SigNoz deployment completed"
# =============================================================================
# Summary
# =============================================================================
echo ""
echo "=============================================="
echo -e "${GREEN}Phase 7 Deployment Complete!${NC}"
echo "=============================================="
echo ""
echo "Deployed Services:"
echo " ✓ Unbound DNS (IP: $UNBOUND_IP)"
echo " ✓ CoreDNS (configured for DNSSEC)"
echo " ✓ Mailu Email Server"
echo " ✓ SigNoz Monitoring"
echo ""
echo "Next Steps:"
echo " 1. Configure DNS records for mail.$DOMAIN"
echo " 2. Set up Mailgun relay credentials"
echo " 3. Configure Ingress for monitoring.$DOMAIN"
echo " 4. Verify all services are accessible"
echo ""

View File

@@ -5,8 +5,8 @@
global:
# Using Unbound DNS resolver directly for DNSSEC validation
# Unbound service is available at unbound-dns.bakery-ia.svc.cluster.local
# Static ClusterIP configured in unbound-helm/values.yaml
custom_dns_servers: "10.96.53.53" # Unbound DNS static ClusterIP
# DNS server IP will be dynamically resolved during deployment
# custom_dns_servers: "" # Will be set dynamically by deployment script
# Domain configuration
domain: "DOMAIN_PLACEHOLDER"

View File

@@ -41,10 +41,10 @@ affinity:
# Production probe settings (more conservative)
probes:
readiness:
initialDelaySeconds: 15
initialDelaySeconds: 20
periodSeconds: 30
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
command: "sh -c 'echo \"\" | nc -w 3 127.0.0.1 53 || exit 1'"
liveness:
initialDelaySeconds: 45
initialDelaySeconds: 60
periodSeconds: 60
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
command: "sh -c 'echo \"\" | nc -w 3 127.0.0.1 53 || exit 1'"

View File

@@ -34,10 +34,8 @@ securityContext:
# Service configuration
service:
type: "ClusterIP"
# Static ClusterIP for predictable DNS configuration
# This allows other services (like Mailu) to reference a stable IP
# Must be within the cluster's service CIDR range (typically 10.96.0.0/12)
clusterIP: "10.96.53.53"
# Dynamic ClusterIP - Kubernetes will assign automatically
# clusterIP: "" # Leave empty for automatic assignment
ports:
dnsUdp: 53
dnsTcp: 53
@@ -46,14 +44,22 @@ service:
probes:
readiness:
enabled: true
initialDelaySeconds: 10
initialDelaySeconds: 15
periodSeconds: 30
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
# Simple TCP connectivity check - more reliable than DNS queries
# Tests if the DNS port is listening and responding
command: "sh -c 'echo \"\" | nc -w 2 127.0.0.1 53 || exit 1'"
# Alternative: use curl if available
# command: "curl -s --max-time 2 http://127.0.0.1:53 || exit 1"
liveness:
enabled: true
initialDelaySeconds: 30
initialDelaySeconds: 45
periodSeconds: 60
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
# Simple TCP connectivity check - more reliable than DNS queries
# Tests if the DNS port is listening and responding
command: "sh -c 'echo \"\" | nc -w 2 127.0.0.1 53 || exit 1'"
# Alternative: use curl if available
# command: "curl -s --max-time 2 http://127.0.0.1:53 || exit 1"
# Additional environment variables
env: {}

View File

@@ -10,7 +10,7 @@ metadata:
# Nginx ingress controller annotations
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
nginx.ingress.kubernetes.io/proxy-body-size: "2GB"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"

View File

@@ -25,12 +25,12 @@ if ! GITEA_ADMIN_PASSWORD=$(kubectl get secret gitea-admin-secret -n gitea -o js
exit 1
fi
# Login to Gitea registry
# Login to Gitea registry (use external HTTPS URL)
echo "Logging in to Gitea registry..."
docker login gitea-http.gitea.svc.cluster.local:3000 -u bakery-admin -p "$GITEA_ADMIN_PASSWORD"
docker login registry.bakewise.ai -u bakery-admin -p "$GITEA_ADMIN_PASSWORD"
# Define the registry URL
REGISTRY="gitea-http.gitea.svc.cluster.local:3000/bakery-admin"
# Define the registry URL (use external HTTPS URL - containerd requires HTTPS for auth)
REGISTRY="registry.bakewise.ai/bakery-admin"
# Define all services to build
# Format: "directory_name:image_name"

View File

@@ -112,7 +112,7 @@ if [ "$ENVIRONMENT" = "prod" ]; then
else
echo "Error: Could not detect Gitea registry automatically"
echo "Please specify the registry with -r/--registry option"
echo "Example: $0 -e prod -r gitea-http.gitea.svc.cluster.local:3000"
echo "Example: $0 -e prod -r registry.bakewise.ai"
exit 1
fi
fi

View File

@@ -113,7 +113,7 @@ if [ "$ENVIRONMENT" = "prod" ]; then
else
echo "Error: Could not detect Gitea registry automatically"
echo "Please specify the registry with -r/--registry option"
echo "Example: $0 -e prod -r gitea-http.gitea.svc.cluster.local:3000"
echo "Example: $0 -e prod -r registry.bakewise.ai"
exit 1
fi
fi

View File

@@ -4,6 +4,7 @@ Implements SetupIntent-first architecture for secure payment flows
Implements PaymentProvider interface for easy SDK swapping
"""
import os
import stripe
import uuid
import logging
@@ -30,12 +31,13 @@ class StripeClient(PaymentProvider):
def __init__(self):
"""Initialize Stripe client with configuration"""
settings = BaseServiceSettings()
stripe.api_key = settings.STRIPE_SECRET_KEY
# Read Stripe settings directly from environment to avoid BaseServiceSettings validation
stripe.api_key = os.environ.get("STRIPE_SECRET_KEY")
# Let the SDK use its default pinned API version (2025-12-15.clover for v14.1.0)
# Only override if explicitly set in environment
if settings.STRIPE_API_VERSION:
stripe.api_version = settings.STRIPE_API_VERSION
stripe_api_version = os.environ.get("STRIPE_API_VERSION")
if stripe_api_version:
stripe.api_version = stripe_api_version
async def create_setup_intent_for_verification(
self,

View File

@@ -100,9 +100,33 @@ class BaseServiceSettings(BaseSettings):
# DATABASE CONFIGURATION
# ================================================================
# Note: DATABASE_URL is defined as a property in each service-specific config
# to construct the URL from secure environment variables
# DATABASE_URL as a property - can be overridden by service-specific configs
# Base implementation reads from environment variable or builds from components
@property
def DATABASE_URL(self) -> str:
"""Build database URL from environment or components.
Service-specific configs should override this property to use their
own service-specific environment variables (e.g., TENANT_DATABASE_URL).
"""
# Try complete URL first
complete_url = os.getenv("DATABASE_URL")
if complete_url:
return complete_url
# Try to build from standard components
user = os.getenv("DB_USER", "")
password = os.getenv("DB_PASSWORD", "")
host = os.getenv("DB_HOST", "localhost")
port = os.getenv("DB_PORT", "5432")
name = os.getenv("DB_NAME", "")
if user and password and name:
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
# Return empty string if not configured (will fail validation in production)
return ""
# Database connection settings
DB_POOL_SIZE: int = int(os.getenv("DB_POOL_SIZE", "10"))
DB_MAX_OVERFLOW: int = int(os.getenv("DB_MAX_OVERFLOW", "20"))

View File

@@ -11,7 +11,7 @@ metadata:
# - PYTHON_IMAGE: Python image name and tag
#
# Dev (default): BASE_REGISTRY=localhost:5000, PYTHON_IMAGE=python_3_11_slim
# Prod: BASE_REGISTRY=gitea-http.gitea.svc.cluster.local:3000/bakery-admin, PYTHON_IMAGE=python_3_11_slim
# Prod: BASE_REGISTRY=registry.bakewise.ai/bakery-admin, PYTHON_IMAGE=python:3.11-slim
#
# Usage:
# skaffold dev # Uses dev settings (local registry)
@@ -320,7 +320,7 @@ profiles:
docker:
dockerfile: gateway/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000/bakery-admin
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python_3_11_slim"
# Frontend
@@ -329,13 +329,13 @@ profiles:
docker:
dockerfile: Dockerfile.kubernetes
# Microservices - Production base images (gitea-http.gitea.svc.cluster.local:3000/bakery-admin/python_3.11-slim)
# Microservices - Production base images (registry.bakewise.ai/bakery-admin/python_3.11-slim)
- image: bakery/auth-service
context: .
docker:
dockerfile: services/auth/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000/bakery-admin
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python_3_11_slim"
- image: bakery/tenant-service
@@ -343,7 +343,7 @@ profiles:
docker:
dockerfile: services/tenant/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000/bakery-admin
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python_3_11_slim"
- image: bakery/training-service
@@ -457,7 +457,7 @@ profiles:
# ---------------------------------------------------------------------------
# GITEA PROFILE - Development with Gitea registry for base images
# Uses internal cluster service name for base image registry
# Uses external HTTPS registry URL (containerd requires HTTPS for auth)
# ---------------------------------------------------------------------------
# Usage:
# skaffold run -p gitea # Gitea registry mode
@@ -477,7 +477,7 @@ profiles:
docker:
dockerfile: gateway/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
# Frontend
@@ -492,7 +492,7 @@ profiles:
docker:
dockerfile: services/auth/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/tenant-service
@@ -500,7 +500,7 @@ profiles:
docker:
dockerfile: services/tenant/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/training-service
@@ -508,7 +508,7 @@ profiles:
docker:
dockerfile: services/training/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/forecasting-service
@@ -516,7 +516,7 @@ profiles:
docker:
dockerfile: services/forecasting/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/sales-service
@@ -524,7 +524,7 @@ profiles:
docker:
dockerfile: services/sales/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/external-service
@@ -532,7 +532,7 @@ profiles:
docker:
dockerfile: services/external/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/notification-service
@@ -540,7 +540,7 @@ profiles:
docker:
dockerfile: services/notification/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/inventory-service
@@ -548,7 +548,7 @@ profiles:
docker:
dockerfile: services/inventory/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/recipes-service
@@ -556,7 +556,7 @@ profiles:
docker:
dockerfile: services/recipes/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/procurement-service
@@ -564,7 +564,7 @@ profiles:
docker:
dockerfile: services/procurement/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/distribution-service
@@ -572,7 +572,7 @@ profiles:
docker:
dockerfile: services/distribution/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/production-service
@@ -580,7 +580,7 @@ profiles:
docker:
dockerfile: services/production/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/pos-service
@@ -588,7 +588,7 @@ profiles:
docker:
dockerfile: services/pos/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/orders-service
@@ -596,7 +596,7 @@ profiles:
docker:
dockerfile: services/orders/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/alert-processor
@@ -604,7 +604,7 @@ profiles:
docker:
dockerfile: services/alert_processor/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
- image: bakery/demo-session-service
@@ -612,7 +612,7 @@ profiles:
docker:
dockerfile: services/demo_session/Dockerfile
buildArgs:
BASE_REGISTRY: gitea-http.gitea.svc.cluster.local:3000
BASE_REGISTRY: registry.bakewise.ai/bakery-admin
PYTHON_IMAGE: "python:3.11-slim"
deploy: