Fix resources isues 5

This commit is contained in:
2026-01-22 11:15:11 +01:00
parent 6505044f24
commit 0183f3ab72
20 changed files with 399 additions and 1193 deletions

View File

@@ -71,7 +71,7 @@ A complete multi-tenant SaaS platform consisting of:
│ PostgreSQL (18 DBs) │ Redis │ RabbitMQ │ MinIO │
├─────────────────────────────────────────────────────────────────────────────┤
│ LAYER 2: NETWORK & SECURITY │
Unbound DNS │ CoreDNS │ Ingress Controller │ Cert-Manager │ TLS
CoreDNS (DNS-over-TLS) │ Ingress Controller │ Cert-Manager │ TLS │
├─────────────────────────────────────────────────────────────────────────────┤
│ LAYER 1: FOUNDATION │
│ Namespaces │ Storage Classes │ RBAC │ ConfigMaps │ Secrets │
@@ -1045,107 +1045,84 @@ kubectl exec -n bakery-ia deployment/gateway -- curl -s http://localhost:8000/he
## Phase 7: Deploy Optional Services
### Step 7.1: Deploy Unbound DNS (Required for Mailu)
### Step 7.1: Configure CoreDNS with DNS-over-TLS for DNSSEC
> **Why Unbound?** Mailu requires DNSSEC validation for email security (DKIM/SPF/DMARC via rspamd).
> CoreDNS does NOT support DNSSEC natively, so Unbound provides this capability.
> **DNS Architecture:** CoreDNS is configured to use DNS-over-TLS with Cloudflare (1.1.1.1) for DNSSEC validation.
> This provides DNSSEC support for Mailu without requiring additional DNS pods.
```bash
# Clean up any stuck Unbound deployments from previous attempts
kubectl delete deployment -n bakery-ia -l app.kubernetes.io/name=unbound --ignore-not-found
# Check if CoreDNS is already configured with DNS-over-TLS
kubectl get configmap coredns -n kube-system -o jsonpath='{.data.Corefile}' | grep -o 'tls://1.1.1.1' || echo "Not configured"
# Deploy Unbound DNS resolver with minimal resources
# Note: prod/values.yaml uses 50m CPU, 64Mi memory - very lightweight
helm upgrade --install unbound infrastructure/platform/networking/dns/unbound-helm \
-n bakery-ia \
-f infrastructure/platform/networking/dns/unbound-helm/values.yaml \
-f infrastructure/platform/networking/dns/unbound-helm/prod/values.yaml \
--timeout 5m \
--wait
# If not configured, update CoreDNS to use DNS-over-TLS with Cloudflare
cat > /tmp/coredns-corefile.yaml << 'EOF'
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
data:
Corefile: |
.:53 {
errors
health {
lameduck 5s
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
forward . tls://1.1.1.1 tls://1.0.0.1 {
tls_servername cloudflare-dns.com
health_check 5s
}
cache 30 {
disable success cluster.local
disable denial cluster.local
}
loop
reload
loadbalance
}
EOF
# Verify Unbound pod is running
kubectl get pods -n bakery-ia -l app.kubernetes.io/name=unbound
kubectl apply -f /tmp/coredns-corefile.yaml
# Restart CoreDNS to apply changes
kubectl rollout restart deployment coredns -n kube-system
kubectl rollout status deployment coredns -n kube-system --timeout=60s
# Verify CoreDNS is running
kubectl get pods -n kube-system -l k8s-app=kube-dns
# Expected: 1/1 Running
# Get Unbound service IP (will be used in subsequent steps)
UNBOUND_IP=$(kubectl get svc unbound-dns -n bakery-ia -o jsonpath='{.spec.clusterIP}')
echo "Unbound DNS IP: $UNBOUND_IP"
# Save this IP - you'll need it for Step 7.2 and 7.3
# Get CoreDNS service IP (will be used for Mailu)
COREDNS_IP=$(kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}')
echo "CoreDNS IP: $COREDNS_IP"
# Save this IP - you'll need it for Step 7.2
# Test Unbound is working (from inside the cluster)
kubectl run -it --rm dns-test --image=busybox --restart=Never -- \
nslookup google.com $UNBOUND_IP
# Test DNS resolution is working
kubectl run -it --rm dns-test --image=busybox --restart=Never -- nslookup google.com
# Expected: Should resolve google.com successfully
```
**Troubleshooting Unbound:**
**Troubleshooting CoreDNS:**
```bash
# If pod is Pending, check resources
kubectl describe pod -n bakery-ia -l app.kubernetes.io/name=unbound | grep -A 5 Events
# Check CoreDNS logs
kubectl logs -n kube-system -l k8s-app=kube-dns
# Check node resource availability
kubectl describe node | grep -A 10 "Allocated resources"
# Check CoreDNS configuration
kubectl get configmap coredns -n kube-system -o yaml
# If resources are exhausted, scale down non-critical services temporarily
kubectl scale deployment signoz-frontend -n bakery-ia --replicas=0 --ignore-not-found
# Verify DNS-over-TLS is working
kubectl run -it --rm dns-test --image=busybox --restart=Never -- nslookup cloudflare.com
```
### Step 7.2: Configure CoreDNS (Choose ONE Option)
> **Architecture Decision:** You have two options for DNS configuration.
> Choose based on your cluster size and requirements.
#### Option A: Mailu-Only DNSSEC (Recommended for Single-Node)
Only Mailu pods use Unbound for DNSSEC. CoreDNS uses public DNS for everything else.
This is simpler and avoids making Unbound a single point of failure for the entire cluster.
```bash
# Ensure CoreDNS uses public DNS (8.8.8.8, 1.1.1.1)
# This is likely already the default, but verify:
kubectl get configmap coredns -n kube-system -o yaml | grep forward
# If it shows forwarding to Unbound IP, restore to public DNS:
kubectl patch configmap coredns -n kube-system --type merge -p '{
"data": {
"Corefile": ".:53 {\n errors\n health {\n lameduck 5s\n }\n ready\n kubernetes cluster.local in-addr.arpa ip6.arpa {\n pods insecure\n fallthrough in-addr.arpa ip6.arpa\n ttl 30\n }\n prometheus :9153\n forward . 8.8.8.8 1.1.1.1 {\n max_concurrent 1000\n }\n cache 30\n loop\n reload\n loadbalance\n}\n"
}
}'
kubectl rollout restart deployment coredns -n kube-system
kubectl rollout status deployment coredns -n kube-system --timeout=60s
```
#### Option B: Cluster-Wide DNSSEC (For Multi-Node HA)
All cluster DNS queries go through Unbound. Provides DNSSEC for all pods.
Only use this if you have multiple Unbound replicas for high availability.
```bash
# Get Unbound IP
UNBOUND_IP=$(kubectl get svc unbound-dns -n bakery-ia -o jsonpath='{.spec.clusterIP}')
# Patch CoreDNS to forward ALL external queries to Unbound
kubectl patch configmap coredns -n kube-system --type merge -p "{
\"data\": {
\"Corefile\": \".:53 {\\n errors\\n health {\\n lameduck 5s\\n }\\n ready\\n kubernetes cluster.local in-addr.arpa ip6.arpa {\\n pods insecure\\n fallthrough in-addr.arpa ip6.arpa\\n ttl 30\\n }\\n prometheus :9153\\n forward . $UNBOUND_IP {\\n max_concurrent 1000\\n }\\n cache 30\\n loop\\n reload\\n loadbalance\\n}\\n\"
}
}"
kubectl rollout restart deployment coredns -n kube-system
kubectl rollout status deployment coredns -n kube-system --timeout=60s
```
**Verify DNS is working:**
```bash
# Test DNS resolution from a pod
kubectl run -it --rm dns-test --image=busybox --restart=Never -- nslookup google.com
# Expected: Should resolve successfully
```
### Step 7.3: Deploy Mailu Email Server
### Step 7.2: Deploy Mailu Email Server
```bash
# Add Mailu Helm repository
@@ -1156,18 +1133,18 @@ helm repo update
kubectl apply -f infrastructure/platform/mail/mailu-helm/configs/mailu-admin-credentials-secret.yaml -n bakery-ia
kubectl apply -f infrastructure/platform/mail/mailu-helm/configs/mailu-certificates-secret.yaml -n bakery-ia
# Get Unbound DNS IP dynamically
UNBOUND_IP=$(kubectl get svc unbound-dns -n bakery-ia -o jsonpath='{.spec.clusterIP}')
echo "Using Unbound DNS IP: $UNBOUND_IP"
# Get CoreDNS service IP dynamically
COREDNS_IP=$(kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}')
echo "Using CoreDNS IP: $COREDNS_IP"
# Install Mailu with production configuration
# The --set flag dynamically passes the Unbound IP for DNSSEC validation
# The --set flag dynamically passes the CoreDNS IP for DNS resolution
# DNSSEC validation is provided by CoreDNS via DNS-over-TLS to Cloudflare
helm upgrade --install mailu mailu/mailu \
-n bakery-ia \
-f infrastructure/platform/mail/mailu-helm/values.yaml \
-f infrastructure/platform/mail/mailu-helm/prod/values.yaml \
--set global.custom_dns_servers="$UNBOUND_IP" \
--set admin.dnsConfig.nameservers[0]="$UNBOUND_IP" \
--set global.custom_dns_servers="$COREDNS_IP" \
--timeout 10m
# Wait for Mailu to be ready (may take 5-10 minutes)