Add new infra architecture 13

This commit is contained in:
Urtzi Alfaro
2026-01-21 23:16:19 +01:00
parent 66dfd50fbc
commit aeff6b1537
22 changed files with 552 additions and 151 deletions

View File

@@ -11,36 +11,29 @@ ingress:
className: nginx
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
nginx.ingress.kubernetes.io/proxy-body-size: "2G"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
cert-manager.io/cluster-issuer: "letsencrypt-production"
hosts:
- host: gitea.bakewise.ai
paths:
- path: /
pathType: Prefix
- host: registry.bakewise.ai
paths:
- path: /
pathType: Prefix
tls:
- secretName: gitea-tls-cert
hosts:
- gitea.bakewise.ai
apiIngress:
enabled: true
className: nginx
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
cert-manager.io/cluster-issuer: "letsencrypt-production"
hosts:
- host: registry.bakewise.ai
paths:
- path: /
pathType: Prefix
tls:
- secretName: registry-tls-cert
hosts:
- registry.bakewise.ai
- registry.bakewise.ai
# NOTE: The Gitea Helm chart (v12.4.0) does not natively support separate registry ingress.
# For registry access, we include registry.bakewise.ai in the main ingress above.
# This works because Gitea serves both UI and registry on the same port (3000).
gitea:
admin:
@@ -62,4 +55,4 @@ resources:
# Larger storage for production
persistence:
size: 50Gi
size: 50Gi

View File

@@ -32,7 +32,7 @@ ingress:
className: nginx
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
nginx.ingress.kubernetes.io/proxy-body-size: "2G"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"

View File

@@ -29,7 +29,7 @@ spec:
- name: base-registry
type: string
description: Base image registry URL (e.g., docker.io, ghcr.io/org)
default: "gitea-http.gitea.svc.cluster.local:3000/bakery-admin"
default: "registry.bakewise.ai/bakery-admin"
- name: python-image
type: string
description: Python base image name and tag

View File

@@ -23,7 +23,7 @@ spec:
default: "false"
steps:
- name: run-unit-tests
image: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/python_3.11-slim:latest
image: registry.bakewise.ai/bakery-admin/python:3.11-slim
workingDir: $(workspaces.source.path)
script: |
#!/bin/bash
@@ -57,7 +57,7 @@ spec:
cpu: 200m
memory: 512Mi
- name: run-integration-tests
image: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/python_3.11-slim:latest
image: registry.bakewise.ai/bakery-admin/python:3.11-slim
workingDir: $(workspaces.source.path)
script: |
#!/bin/bash

View File

@@ -16,6 +16,11 @@
# Global settings for production
global:
# Registry configuration - use external HTTPS URL for image references
# containerd/Docker requires HTTPS for authenticated registries
registry:
url: "registry.bakewise.ai/bakery-admin"
# Git configuration
git:
userEmail: "ci@bakewise.ai"
@@ -25,6 +30,8 @@ pipeline:
# Build configuration
build:
verbosity: "warn" # Less verbose in production
# Use external registry URL for base images (HTTPS required)
baseRegistry: "registry.bakewise.ai/bakery-admin"
# Test configuration
test:
@@ -72,7 +79,9 @@ secrets:
registry:
username: "bakery-admin"
password: "" # MUST be set via --set flag
registryUrl: "gitea-http.gitea.svc.cluster.local:3000"
# Use external HTTPS URL for image references (containerd requires HTTPS for auth)
# Kaniko can still push via HTTP internally, but image refs must use HTTPS
registryUrl: "registry.bakewise.ai"
# Git credentials for GitOps updates
# Override with: --set secrets.git.password=$GITEA_ADMIN_PASSWORD

View File

@@ -4,8 +4,9 @@
# Global settings
global:
# Registry configuration
# NOTE: Use external HTTPS URL - containerd requires HTTPS for authenticated registries
registry:
url: "gitea-http.gitea.svc.cluster.local:3000/bakery-admin"
url: "registry.bakewise.ai/bakery-admin"
# Git configuration
git:
@@ -20,10 +21,9 @@ pipeline:
cacheTTL: "24h"
verbosity: "info"
# Base image registry configuration
# For dev: localhost:5000 with python_3.11-slim
# For prod: gitea registry with python_3.11-slim
baseRegistry: "gitea-http.gitea.svc.cluster.local:3000/bakery-admin"
pythonImage: "python_3.11-slim"
# NOTE: Use external HTTPS URL - containerd requires HTTPS for authenticated registries
baseRegistry: "registry.bakewise.ai/bakery-admin"
pythonImage: "python:3.11-slim"
# Test configuration
test:
@@ -74,10 +74,11 @@ secrets:
# Registry credentials for pushing images
# Uses the same credentials as Gitea admin for consistency
# NOTE: Use external HTTPS URL - containerd requires HTTPS for authenticated registries
registry:
username: "bakery-admin"
password: "" # Will be populated from gitea-admin-secret
registryUrl: "gitea-http.gitea.svc.cluster.local:3000"
registryUrl: "registry.bakewise.ai"
# Git credentials for GitOps updates
# Uses the same credentials as Gitea admin for consistency

View File

@@ -207,141 +207,160 @@ patches:
images:
# Application services
- name: bakery/auth-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/auth-service
newName: registry.bakewise.ai/bakery-admin/auth-service
newTag: latest
- name: bakery/tenant-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/tenant-service
newName: registry.bakewise.ai/bakery-admin/tenant-service
newTag: latest
- name: bakery/training-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/training-service
newName: registry.bakewise.ai/bakery-admin/training-service
newTag: latest
- name: bakery/forecasting-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/forecasting-service
newName: registry.bakewise.ai/bakery-admin/forecasting-service
newTag: latest
- name: bakery/sales-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/sales-service
newName: registry.bakewise.ai/bakery-admin/sales-service
newTag: latest
- name: bakery/external-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/external-service
newName: registry.bakewise.ai/bakery-admin/external-service
newTag: latest
- name: bakery/notification-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/notification-service
newName: registry.bakewise.ai/bakery-admin/notification-service
newTag: latest
- name: bakery/inventory-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/inventory-service
newName: registry.bakewise.ai/bakery-admin/inventory-service
newTag: latest
- name: bakery/recipes-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/recipes-service
newName: registry.bakewise.ai/bakery-admin/recipes-service
newTag: latest
- name: bakery/suppliers-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/suppliers-service
newName: registry.bakewise.ai/bakery-admin/suppliers-service
newTag: latest
- name: bakery/pos-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/pos-service
newName: registry.bakewise.ai/bakery-admin/pos-service
newTag: latest
- name: bakery/orders-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/orders-service
newName: registry.bakewise.ai/bakery-admin/orders-service
newTag: latest
- name: bakery/production-service
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/production-service
newName: registry.bakewise.ai/bakery-admin/production-service
newTag: latest
- name: bakery/alert-processor
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/alert-processor
newName: registry.bakewise.ai/bakery-admin/alert-processor
newTag: latest
- name: bakery/gateway
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/gateway
newName: registry.bakewise.ai/bakery-admin/gateway
newTag: latest
- name: bakery/dashboard
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/dashboard
newName: registry.bakewise.ai/bakery-admin/dashboard
newTag: latest
# Missing services (added to fix ImagePullBackOff errors)
- name: bakery/ai-insights-service
newName: registry.bakewise.ai/bakery-admin/ai-insights-service
newTag: latest
- name: bakery/demo-session-service
newName: registry.bakewise.ai/bakery-admin/demo-session-service
newTag: latest
- name: bakery/distribution-service
newName: registry.bakewise.ai/bakery-admin/distribution-service
newTag: latest
- name: bakery/orchestrator-service
newName: registry.bakewise.ai/bakery-admin/orchestrator-service
newTag: latest
- name: bakery/procurement-service
newName: registry.bakewise.ai/bakery-admin/procurement-service
newTag: latest
# =============================================================================
# Database images (cached in gitea registry for consistency)
- name: postgres
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/postgres
newName: registry.bakewise.ai/bakery-admin/postgres
newTag: "17-alpine"
- name: redis
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/redis
newName: registry.bakewise.ai/bakery-admin/redis
newTag: "7.4-alpine"
- name: rabbitmq
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/rabbitmq
newName: registry.bakewise.ai/bakery-admin/rabbitmq
newTag: "4.1-management-alpine"
# Utility images
- name: busybox
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/busybox
newName: registry.bakewise.ai/bakery-admin/busybox
newTag: "1.36"
- name: curlimages/curl
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/curlimages-curl
newName: registry.bakewise.ai/bakery-admin/curlimages_curl
newTag: latest
- name: bitnami/kubectl
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/bitnami-kubectl
newName: registry.bakewise.ai/bakery-admin/bitnami_kubectl
newTag: latest
# Alpine variants
- name: alpine
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/alpine
newName: registry.bakewise.ai/bakery-admin/alpine
newTag: "3.19"
- name: alpine/git
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/alpine-git
newName: registry.bakewise.ai/bakery-admin/alpine_git
newTag: 2.43.0
# CI/CD images (cached in gitea registry for consistency)
- name: gcr.io/kaniko-project/executor
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/gcr.io-kaniko-project-executor
newName: registry.bakewise.ai/bakery-admin/gcr.io_kaniko-project_executor
newTag: v1.23.0
- name: gcr.io/go-containerregistry/crane
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/gcr.io-go-containerregistry-crane
newName: registry.bakewise.ai/bakery-admin/gcr.io_go-containerregistry_crane
newTag: latest
- name: registry.k8s.io/kustomize/kustomize
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/registry.k8s.io-kustomize-kustomize
newName: registry.bakewise.ai/bakery-admin/registry.k8s.io_kustomize_kustomize
newTag: v5.3.0
# Storage images
- name: minio/minio
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/minio-minio
newName: registry.bakewise.ai/bakery-admin/minio_minio
newTag: RELEASE.2024-11-07T00-52-20Z
- name: minio/mc
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/minio-mc
newName: registry.bakewise.ai/bakery-admin/minio_mc
newTag: RELEASE.2024-11-17T19-35-25Z
# NOTE: nominatim image override removed - nominatim is now deployed via Helm
# Python base image
- name: python
newName: gitea-http.gitea.svc.cluster.local:3000/bakery-admin/python
newName: registry.bakewise.ai/bakery-admin/python
newTag: 3.11-slim
# Replica counts for single-node VPS deployment (8 CPU cores)
# Set to 1 replica per service to fit resource constraints
# Scale up when adding more nodes to the cluster
replicas:
- name: auth-service
count: 3
count: 1
- name: tenant-service
count: 2
count: 1
- name: training-service
count: 3 # Safe with MinIO storage - no PVC conflicts
count: 1
- name: forecasting-service
count: 3
count: 1
- name: sales-service
count: 2
count: 1
- name: external-service
count: 2
count: 1
- name: notification-service
count: 3
count: 1
- name: inventory-service
count: 2
count: 1
- name: recipes-service
count: 2
count: 1
- name: suppliers-service
count: 2
count: 1
- name: pos-service
count: 2
count: 1
- name: orders-service
count: 3
count: 1
- name: production-service
count: 2
count: 1
- name: alert-processor
count: 3
count: 1
- name: procurement-service
count: 2
count: 1
- name: orchestrator-service
count: 2
count: 1
- name: ai-insights-service
count: 2
count: 1
- name: gateway
count: 3
count: 1
- name: frontend
count: 2
count: 1

View File

@@ -49,6 +49,9 @@ spec:
- secretRef:
name: whatsapp-secrets
env:
# Gateway doesn't use a database but base config requires this for production validation
- name: DATABASE_URL
value: "postgresql://gateway:unused@localhost:5432/unused"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
valueFrom:
configMapKeyRef:

View File

@@ -119,9 +119,21 @@ fi
# Wait for Unbound to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=unbound -n "$NAMESPACE" --timeout=120s
# Get Unbound service IP
UNBOUND_IP=$(kubectl get svc unbound-dns -n "$NAMESPACE" -o jsonpath='{.spec.clusterIP}')
echo "Unbound DNS service IP: $UNBOUND_IP"
# Get Unbound service IP (dynamic resolution)
echo "Waiting for Unbound service to get assigned IP..."
for i in {1..30}; do
UNBOUND_IP=$(kubectl get svc unbound-dns -n "$NAMESPACE" -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [ -n "$UNBOUND_IP" ] && [ "$UNBOUND_IP" != "<none>" ]; then
echo "Unbound DNS service IP: $UNBOUND_IP"
break
fi
if [ $i -eq 30 ]; then
print_error "Failed to get Unbound service IP"
exit 1
fi
sleep 2
echo "Waiting for Unbound service IP... (attempt $i/30)"
done
# =============================================================================
# Step 2: Configure CoreDNS to Forward to Unbound
@@ -134,12 +146,43 @@ CURRENT_FORWARD=$(kubectl get configmap coredns -n kube-system -o jsonpath='{.da
if [ "$CURRENT_FORWARD" != "$UNBOUND_IP" ]; then
echo "Updating CoreDNS to forward to Unbound ($UNBOUND_IP)..."
# Create a temporary file with the CoreDNS configuration
TEMP_COREFILE=$(mktemp)
cat > "$TEMP_COREFILE" <<EOF
.:53 {
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
forward . $UNBOUND_IP {
max_concurrent 1000
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
EOF
# Apply the configuration
kubectl patch configmap coredns -n kube-system --type merge -p "{
\"data\": {
\"Corefile\": \".:53 {\\n errors\\n health {\\n lameduck 5s\\n }\\n ready\\n kubernetes cluster.local in-addr.arpa ip6.arpa {\\n pods insecure\\n fallthrough in-addr.arpa ip6.arpa\\n ttl 30\\n }\\n prometheus :9153\\n forward . $UNBOUND_IP {\\n max_concurrent 1000\\n }\\n cache 30 {\\n disable success cluster.local\\n disable denial cluster.local\\n }\\n loop\\n reload\\n loadbalance\\n}\\n\"
\"Corefile\": \"$(cat "$TEMP_COREFILE" | sed 's/\\/\\\\/g' | sed ':a;N;$!ba;s/\n/\\\\n/g')\"
}
}"
rm -f "$TEMP_COREFILE"
# Restart CoreDNS
kubectl rollout restart deployment coredns -n kube-system
kubectl rollout status deployment coredns -n kube-system --timeout=60s
@@ -208,13 +251,19 @@ print_step "Step 5: Deploying Mailu via Helm..."
helm repo add mailu https://mailu.github.io/helm-charts 2>/dev/null || true
helm repo update mailu
# Deploy Mailu
# Create temporary values file with dynamic DNS server
TEMP_VALUES=$(mktemp)
cat "$MAILU_HELM_DIR/values.yaml" | sed "s/# custom_dns_servers: \"\" # Will be set dynamically by deployment script/custom_dns_servers: \"$UNBOUND_IP\"/" > "$TEMP_VALUES"
# Deploy Mailu with dynamic DNS configuration
helm upgrade --install mailu mailu/mailu \
-n "$NAMESPACE" \
-f "$MAILU_HELM_DIR/values.yaml" \
-f "$TEMP_VALUES" \
-f "$MAILU_HELM_DIR/prod/values.yaml" \
--timeout 10m
rm -f "$TEMP_VALUES"
print_success "Mailu Helm release deployed (admin user will be created automatically)"
# =============================================================================

View File

@@ -0,0 +1,209 @@
#!/bin/bash
# =============================================================================
# Phase 7: Deploy Optional Services - Fixed Version
# =============================================================================
# This script deploys the optional services for production:
# 1. Unbound DNS (with dynamic IP resolution)
# 2. CoreDNS configuration for DNSSEC
# 3. Mailu Email Server
# 4. SigNoz Monitoring
#
# Fixed issues:
# - Removed static ClusterIP that caused CIDR range conflicts
# - Implemented dynamic IP resolution for Unbound DNS
# - Updated CoreDNS patching to use dynamic IP
# - Updated Mailu configuration to use dynamic DNS server
# =============================================================================
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
NAMESPACE="bakery-ia"
DOMAIN="bakewise.ai"
print_step() {
echo -e "\n${BLUE}==>${NC} ${GREEN}$1${NC}"
}
print_error() {
echo -e "${RED}ERROR:${NC} $1"
}
print_success() {
echo -e "${GREEN}${NC} $1"
}
# =============================================================================
# Step 7.1: Deploy Unbound DNS (with dynamic IP)
# =============================================================================
print_step "Step 7.1: Deploying Unbound DNS resolver (dynamic IP)..."
if kubectl get deployment unbound -n "$NAMESPACE" &>/dev/null; then
print_success "Unbound already deployed"
else
helm upgrade --install unbound infrastructure/platform/networking/dns/unbound-helm \
-n "$NAMESPACE" \
-f infrastructure/platform/networking/dns/unbound-helm/values.yaml \
-f infrastructure/platform/networking/dns/unbound-helm/prod/values.yaml \
--timeout 5m \
--wait
print_success "Unbound deployed"
fi
# Wait for Unbound service to get assigned IP
print_step "Waiting for Unbound service to get assigned IP..."
for i in {1..30}; do
UNBOUND_IP=$(kubectl get svc unbound-dns -n "$NAMESPACE" -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
if [ -n "$UNBOUND_IP" ] && [ "$UNBOUND_IP" != "<none>" ]; then
echo "Unbound DNS service IP: $UNBOUND_IP"
break
fi
if [ $i -eq 30 ]; then
print_error "Failed to get Unbound service IP"
exit 1
fi
sleep 2
echo "Waiting for Unbound service IP... (attempt $i/30)"
done
# =============================================================================
# Step 7.2: Configure CoreDNS for DNSSEC (dynamic IP)
# =============================================================================
print_step "Step 7.2: Configuring CoreDNS for DNSSEC validation..."
# Check current CoreDNS forward configuration
CURRENT_FORWARD=$(kubectl get configmap coredns -n kube-system -o jsonpath='{.data.Corefile}' | grep -o 'forward \. [0-9.]*' | awk '{print $3}' || echo "")
if [ "$CURRENT_FORWARD" != "$UNBOUND_IP" ]; then
echo "Updating CoreDNS to forward to Unbound ($UNBOUND_IP)..."
# Create a temporary file with the CoreDNS configuration
TEMP_COREFILE=$(mktemp)
cat > "$TEMP_COREFILE" <<EOF
.:53 {
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
forward . $UNBOUND_IP {
max_concurrent 1000
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
EOF
# Apply the configuration
kubectl patch configmap coredns -n kube-system --type merge -p "{
\"data\": {
\"Corefile\": \"$(cat "$TEMP_COREFILE" | sed 's/\\/\\\\/g' | sed ':a;N;$!ba;s/\n/\\\\n/g')\"
}
}"
rm -f "$TEMP_COREFILE"
# Restart CoreDNS
kubectl rollout restart deployment coredns -n kube-system
kubectl rollout status deployment coredns -n kube-system --timeout=60s
print_success "CoreDNS configured to forward to Unbound"
else
print_success "CoreDNS already configured for Unbound"
fi
# =============================================================================
# Step 7.3: Deploy Mailu Email Server (dynamic DNS)
# =============================================================================
print_step "Step 7.3: Deploying Mailu Email Server..."
# Add Mailu Helm repository
helm repo add mailu https://mailu.github.io/helm-charts 2>/dev/null || true
helm repo update mailu
# Create temporary values file with dynamic DNS server
TEMP_VALUES=$(mktemp)
cat infrastructure/platform/mail/mailu-helm/values.yaml | sed "s/# custom_dns_servers: \"\" # Will be set dynamically by deployment script/custom_dns_servers: \"$UNBOUND_IP\"/" > "$TEMP_VALUES"
# Deploy Mailu with dynamic DNS configuration
helm upgrade --install mailu mailu/mailu \
-n "$NAMESPACE" \
-f "$TEMP_VALUES" \
-f infrastructure/platform/mail/mailu-helm/prod/values.yaml \
--timeout 10m
rm -f "$TEMP_VALUES"
print_success "Mailu Helm release deployed"
# Wait for Mailu pods to be ready
echo "Waiting for Mailu pods to be ready (this may take 5-10 minutes)..."
kubectl wait --for=condition=ready pod -l app.kubernetes.io/component=admin -n "$NAMESPACE" --timeout=300s || {
print_error "Admin pod failed to start. Checking logs..."
kubectl logs -n "$NAMESPACE" -l app.kubernetes.io/component=admin --tail=50
exit 1
}
print_success "Mailu deployment completed"
# =============================================================================
# Step 7.4: Deploy SigNoz Monitoring
# =============================================================================
print_step "Step 7.4: Deploying SigNoz Monitoring..."
# Add SigNoz Helm repository
helm repo add signoz https://charts.signoz.io 2>/dev/null || true
helm repo update
# Install SigNoz
helm install signoz signoz/signoz \
-n "$NAMESPACE" \
-f infrastructure/monitoring/signoz/signoz-values-prod.yaml \
--set global.storageClass="microk8s-hostpath" \
--set clickhouse.persistence.enabled=true \
--set clickhouse.persistence.size=50Gi \
--timeout 15m
# Wait for SigNoz to be ready
kubectl wait --for=condition=available --timeout=600s deployment/signoz-frontend -n "$NAMESPACE"
print_success "SigNoz deployment completed"
# =============================================================================
# Summary
# =============================================================================
echo ""
echo "=============================================="
echo -e "${GREEN}Phase 7 Deployment Complete!${NC}"
echo "=============================================="
echo ""
echo "Deployed Services:"
echo " ✓ Unbound DNS (IP: $UNBOUND_IP)"
echo " ✓ CoreDNS (configured for DNSSEC)"
echo " ✓ Mailu Email Server"
echo " ✓ SigNoz Monitoring"
echo ""
echo "Next Steps:"
echo " 1. Configure DNS records for mail.$DOMAIN"
echo " 2. Set up Mailgun relay credentials"
echo " 3. Configure Ingress for monitoring.$DOMAIN"
echo " 4. Verify all services are accessible"
echo ""

View File

@@ -5,8 +5,8 @@
global:
# Using Unbound DNS resolver directly for DNSSEC validation
# Unbound service is available at unbound-dns.bakery-ia.svc.cluster.local
# Static ClusterIP configured in unbound-helm/values.yaml
custom_dns_servers: "10.96.53.53" # Unbound DNS static ClusterIP
# DNS server IP will be dynamically resolved during deployment
# custom_dns_servers: "" # Will be set dynamically by deployment script
# Domain configuration
domain: "DOMAIN_PLACEHOLDER"

View File

@@ -41,10 +41,10 @@ affinity:
# Production probe settings (more conservative)
probes:
readiness:
initialDelaySeconds: 15
initialDelaySeconds: 20
periodSeconds: 30
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
command: "sh -c 'echo \"\" | nc -w 3 127.0.0.1 53 || exit 1'"
liveness:
initialDelaySeconds: 45
initialDelaySeconds: 60
periodSeconds: 60
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
command: "sh -c 'echo \"\" | nc -w 3 127.0.0.1 53 || exit 1'"

View File

@@ -34,10 +34,8 @@ securityContext:
# Service configuration
service:
type: "ClusterIP"
# Static ClusterIP for predictable DNS configuration
# This allows other services (like Mailu) to reference a stable IP
# Must be within the cluster's service CIDR range (typically 10.96.0.0/12)
clusterIP: "10.96.53.53"
# Dynamic ClusterIP - Kubernetes will assign automatically
# clusterIP: "" # Leave empty for automatic assignment
ports:
dnsUdp: 53
dnsTcp: 53
@@ -46,14 +44,22 @@ service:
probes:
readiness:
enabled: true
initialDelaySeconds: 10
initialDelaySeconds: 15
periodSeconds: 30
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
# Simple TCP connectivity check - more reliable than DNS queries
# Tests if the DNS port is listening and responding
command: "sh -c 'echo \"\" | nc -w 2 127.0.0.1 53 || exit 1'"
# Alternative: use curl if available
# command: "curl -s --max-time 2 http://127.0.0.1:53 || exit 1"
liveness:
enabled: true
initialDelaySeconds: 30
initialDelaySeconds: 45
periodSeconds: 60
command: "drill @127.0.0.1 -p 53 example.org || echo 'DNS query test'"
# Simple TCP connectivity check - more reliable than DNS queries
# Tests if the DNS port is listening and responding
command: "sh -c 'echo \"\" | nc -w 2 127.0.0.1 53 || exit 1'"
# Alternative: use curl if available
# command: "curl -s --max-time 2 http://127.0.0.1:53 || exit 1"
# Additional environment variables
env: {}

View File

@@ -10,7 +10,7 @@ metadata:
# Nginx ingress controller annotations
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "500m"
nginx.ingress.kubernetes.io/proxy-body-size: "2GB"
nginx.ingress.kubernetes.io/proxy-connect-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"