Fix resources isues
This commit is contained in:
@@ -1047,8 +1047,15 @@ kubectl exec -n bakery-ia deployment/gateway -- curl -s http://localhost:8000/he
|
||||
|
||||
### Step 7.1: Deploy Unbound DNS (Required for Mailu)
|
||||
|
||||
> **Why Unbound?** Mailu requires DNSSEC validation for email security (DKIM/SPF/DMARC via rspamd).
|
||||
> CoreDNS does NOT support DNSSEC natively, so Unbound provides this capability.
|
||||
|
||||
```bash
|
||||
# Deploy Unbound DNS resolver
|
||||
# Clean up any stuck Unbound deployments from previous attempts
|
||||
kubectl delete deployment -n bakery-ia -l app.kubernetes.io/name=unbound --ignore-not-found
|
||||
|
||||
# Deploy Unbound DNS resolver with minimal resources
|
||||
# Note: prod/values.yaml uses 50m CPU, 64Mi memory - very lightweight
|
||||
helm upgrade --install unbound infrastructure/platform/networking/dns/unbound-helm \
|
||||
-n bakery-ia \
|
||||
-f infrastructure/platform/networking/dns/unbound-helm/values.yaml \
|
||||
@@ -1056,26 +1063,88 @@ helm upgrade --install unbound infrastructure/platform/networking/dns/unbound-he
|
||||
--timeout 5m \
|
||||
--wait
|
||||
|
||||
# Get Unbound service IP
|
||||
# Verify Unbound pod is running
|
||||
kubectl get pods -n bakery-ia -l app.kubernetes.io/name=unbound
|
||||
# Expected: 1/1 Running
|
||||
|
||||
# Get Unbound service IP (will be used in subsequent steps)
|
||||
UNBOUND_IP=$(kubectl get svc unbound-dns -n bakery-ia -o jsonpath='{.spec.clusterIP}')
|
||||
echo "Unbound DNS IP: $UNBOUND_IP"
|
||||
# Save this IP - you'll need it for Step 7.2 and 7.3
|
||||
|
||||
# Test Unbound is working (from inside the cluster)
|
||||
kubectl run -it --rm dns-test --image=busybox --restart=Never -- \
|
||||
nslookup google.com $UNBOUND_IP
|
||||
# Expected: Should resolve google.com successfully
|
||||
```
|
||||
|
||||
### Step 7.2: Configure CoreDNS for DNSSEC
|
||||
**Troubleshooting Unbound:**
|
||||
|
||||
```bash
|
||||
# Patch CoreDNS to forward to Unbound
|
||||
# If pod is Pending, check resources
|
||||
kubectl describe pod -n bakery-ia -l app.kubernetes.io/name=unbound | grep -A 5 Events
|
||||
|
||||
# Check node resource availability
|
||||
kubectl describe node | grep -A 10 "Allocated resources"
|
||||
|
||||
# If resources are exhausted, scale down non-critical services temporarily
|
||||
kubectl scale deployment signoz-frontend -n bakery-ia --replicas=0 --ignore-not-found
|
||||
```
|
||||
|
||||
### Step 7.2: Configure CoreDNS (Choose ONE Option)
|
||||
|
||||
> **Architecture Decision:** You have two options for DNS configuration.
|
||||
> Choose based on your cluster size and requirements.
|
||||
|
||||
#### Option A: Mailu-Only DNSSEC (Recommended for Single-Node)
|
||||
|
||||
Only Mailu pods use Unbound for DNSSEC. CoreDNS uses public DNS for everything else.
|
||||
This is simpler and avoids making Unbound a single point of failure for the entire cluster.
|
||||
|
||||
```bash
|
||||
# Ensure CoreDNS uses public DNS (8.8.8.8, 1.1.1.1)
|
||||
# This is likely already the default, but verify:
|
||||
kubectl get configmap coredns -n kube-system -o yaml | grep forward
|
||||
|
||||
# If it shows forwarding to Unbound IP, restore to public DNS:
|
||||
kubectl patch configmap coredns -n kube-system --type merge -p '{
|
||||
"data": {
|
||||
"Corefile": ".:53 {\n errors\n health {\n lameduck 5s\n }\n ready\n kubernetes cluster.local in-addr.arpa ip6.arpa {\n pods insecure\n fallthrough in-addr.arpa ip6.arpa\n ttl 30\n }\n prometheus :9153\n forward . 8.8.8.8 1.1.1.1 {\n max_concurrent 1000\n }\n cache 30\n loop\n reload\n loadbalance\n}\n"
|
||||
}
|
||||
}'
|
||||
|
||||
kubectl rollout restart deployment coredns -n kube-system
|
||||
kubectl rollout status deployment coredns -n kube-system --timeout=60s
|
||||
```
|
||||
|
||||
#### Option B: Cluster-Wide DNSSEC (For Multi-Node HA)
|
||||
|
||||
All cluster DNS queries go through Unbound. Provides DNSSEC for all pods.
|
||||
Only use this if you have multiple Unbound replicas for high availability.
|
||||
|
||||
```bash
|
||||
# Get Unbound IP
|
||||
UNBOUND_IP=$(kubectl get svc unbound-dns -n bakery-ia -o jsonpath='{.spec.clusterIP}')
|
||||
|
||||
# Patch CoreDNS to forward ALL external queries to Unbound
|
||||
kubectl patch configmap coredns -n kube-system --type merge -p "{
|
||||
\"data\": {
|
||||
\"Corefile\": \".:53 {\\n errors\\n health {\\n lameduck 5s\\n }\\n ready\\n kubernetes cluster.local in-addr.arpa ip6.arpa {\\n pods insecure\\n fallthrough in-addr.arpa ip6.arpa\\n ttl 30\\n }\\n prometheus :9153\\n forward . $UNBOUND_IP {\\n max_concurrent 1000\\n }\\n cache 30\\n loop\\n reload\\n loadbalance\\n}\\n\"
|
||||
}
|
||||
}"
|
||||
|
||||
# Restart CoreDNS
|
||||
kubectl rollout restart deployment coredns -n kube-system
|
||||
kubectl rollout status deployment coredns -n kube-system --timeout=60s
|
||||
```
|
||||
|
||||
**Verify DNS is working:**
|
||||
|
||||
```bash
|
||||
# Test DNS resolution from a pod
|
||||
kubectl run -it --rm dns-test --image=busybox --restart=Never -- nslookup google.com
|
||||
# Expected: Should resolve successfully
|
||||
```
|
||||
|
||||
### Step 7.3: Deploy Mailu Email Server
|
||||
|
||||
```bash
|
||||
@@ -1084,27 +1153,34 @@ helm repo add mailu https://mailu.github.io/helm-charts
|
||||
helm repo update
|
||||
|
||||
# Apply Mailu configuration secrets
|
||||
# These are pre-configured with secure defaults
|
||||
kubectl apply -f infrastructure/platform/mail/mailu-helm/configs/mailu-admin-credentials-secret.yaml -n bakery-ia
|
||||
kubectl apply -f infrastructure/platform/mail/mailu-helm/configs/mailu-certificates-secret.yaml -n bakery-ia
|
||||
|
||||
# Get Unbound DNS IP dynamically
|
||||
UNBOUND_IP=$(kubectl get svc unbound-dns -n bakery-ia -o jsonpath='{.spec.clusterIP}')
|
||||
echo "Using Unbound DNS IP: $UNBOUND_IP"
|
||||
|
||||
# Install Mailu with production configuration
|
||||
# The Helm chart uses the pre-configured secrets for admin credentials and TLS certificates
|
||||
# The --set flag dynamically passes the Unbound IP for DNSSEC validation
|
||||
helm upgrade --install mailu mailu/mailu \
|
||||
-n bakery-ia \
|
||||
-f infrastructure/platform/mail/mailu-helm/values.yaml \
|
||||
-f infrastructure/platform/mail/mailu-helm/prod/values.yaml \
|
||||
--set global.custom_dns_servers="$UNBOUND_IP" \
|
||||
--set admin.dnsConfig.nameservers[0]="$UNBOUND_IP" \
|
||||
--timeout 10m
|
||||
|
||||
# Wait for Mailu to be ready
|
||||
# Wait for Mailu to be ready (may take 5-10 minutes)
|
||||
kubectl wait --for=condition=available --timeout=600s deployment/mailu-front -n bakery-ia
|
||||
|
||||
# Verify Mailu pods are running
|
||||
kubectl get pods -n bakery-ia | grep mailu
|
||||
|
||||
# Get the admin password from the pre-configured secret
|
||||
# Get the admin password
|
||||
MAILU_ADMIN_PASSWORD=$(kubectl get secret mailu-admin-credentials -n bakery-ia -o jsonpath='{.data.password}' | base64 -d)
|
||||
echo "============================================"
|
||||
echo "Mailu Admin Password: $MAILU_ADMIN_PASSWORD"
|
||||
echo "============================================"
|
||||
echo "⚠️ SAVE THIS PASSWORD SECURELY!"
|
||||
|
||||
# Check Mailu initialization status
|
||||
@@ -1113,53 +1189,46 @@ kubectl logs -n bakery-ia deployment/mailu-front --tail=10
|
||||
|
||||
> **Important Notes about Mailu Deployment:**
|
||||
>
|
||||
> 1. **Pre-Configured Secrets:** Mailu uses pre-configured secrets for admin credentials and TLS certificates. These are defined in the configuration files.
|
||||
> 1. **Pre-Configured Secrets:** Mailu uses pre-configured secrets for admin credentials and TLS certificates.
|
||||
>
|
||||
> 2. **Password Management:** The admin password is stored in `mailu-admin-credentials-secret.yaml`. For production, you should update this with a secure password before deployment.
|
||||
> 2. **Password Management:** Update `mailu-admin-credentials-secret.yaml` with a secure password before deployment.
|
||||
>
|
||||
> 3. **TLS Certificates:** The self-signed certificates in `mailu-certificates-secret.yaml` are for initial setup. For production, replace these with proper certificates from cert-manager (see Step 7.3.1).
|
||||
> 3. **TLS Certificates:** Self-signed certificates are used internally. External traffic uses Let's Encrypt via Ingress.
|
||||
>
|
||||
> 4. **Initialization Time:** Mailu may take 5-10 minutes to fully initialize. During this time, some pods may restart as the system configures itself.
|
||||
> 4. **Initialization Time:** Mailu may take 5-10 minutes to fully initialize. Pods may restart during setup.
|
||||
>
|
||||
> 5. **Accessing Mailu:**
|
||||
> - Webmail: `https://mail.bakewise.ai/webmail`
|
||||
> - Admin Interface: `https://mail.bakewise.ai/admin`
|
||||
> - Username: `admin@bakewise.ai`
|
||||
> - Password: (from `mailu-admin-credentials-secret.yaml`)
|
||||
> - Password: (from secret above)
|
||||
>
|
||||
> 6. **Mailgun Relay:** The production configuration includes Mailgun SMTP relay. Configure your Mailgun credentials in `mailu-mailgun-credentials-secret.yaml` before deployment.
|
||||
> 6. **Mailgun Relay:** Configure credentials in `mailu-mailgun-credentials-secret.yaml` before deployment.
|
||||
|
||||
### Step 7.3.1: Mailu Configuration Notes
|
||||
|
||||
> **Important Information about Mailu Certificates:**
|
||||
> **Certificate Architecture:**
|
||||
>
|
||||
> 1. **Dual Certificate Architecture:**
|
||||
> - **Internal Communication:** Uses self-signed certificates (`mailu-certificates-secret.yaml`)
|
||||
> - **External Communication:** Uses Let's Encrypt certificates via NGINX Ingress (`bakery-ia-prod-tls-cert`)
|
||||
> ```
|
||||
> External Client → NGINX Ingress (Let's Encrypt) → Internal Network → Mailu Services (Self-signed)
|
||||
> ```
|
||||
>
|
||||
> 2. **No Certificate Replacement Needed:** The self-signed certificates are only used for internal communication between Mailu services. External clients connect through the NGINX Ingress Controller which uses the publicly trusted Let's Encrypt certificates.
|
||||
>
|
||||
> 3. **Certificate Flow:**
|
||||
> ```
|
||||
> External Client → NGINX Ingress (Let's Encrypt) → Internal Network → Mailu Services (Self-signed)
|
||||
> ```
|
||||
>
|
||||
> 4. **Security:** This architecture is secure because:
|
||||
> - External connections use publicly trusted certificates
|
||||
> - Internal connections are still encrypted (even if self-signed)
|
||||
> - Ingress terminates TLS, reducing load on Mailu services
|
||||
>
|
||||
> 5. **Mailgun Relay Configuration:** For outbound email delivery, configure your Mailgun credentials:
|
||||
> ```bash
|
||||
> # Edit the Mailgun credentials secret
|
||||
> nano infrastructure/platform/mail/mailu-helm/configs/mailu-mailgun-credentials-secret.yaml
|
||||
>
|
||||
> # Apply the secret
|
||||
> kubectl apply -f infrastructure/platform/mail/mailu-helm/configs/mailu-mailgun-credentials-secret.yaml -n bakery-ia
|
||||
>
|
||||
> # Restart Mailu to pick up the new relay configuration
|
||||
> kubectl rollout restart deployment -n bakery-ia -l app.kubernetes.io/instance=mailu
|
||||
> ```
|
||||
> - **External:** Uses publicly trusted Let's Encrypt certificates via NGINX Ingress
|
||||
> - **Internal:** Uses self-signed certificates for inter-service communication
|
||||
> - **No replacement needed:** This dual-certificate architecture is intentional and secure
|
||||
|
||||
**Configure Mailgun Relay (for outbound email):**
|
||||
|
||||
```bash
|
||||
# Edit the Mailgun credentials secret
|
||||
nano infrastructure/platform/mail/mailu-helm/configs/mailu-mailgun-credentials-secret.yaml
|
||||
|
||||
# Apply the secret
|
||||
kubectl apply -f infrastructure/platform/mail/mailu-helm/configs/mailu-mailgun-credentials-secret.yaml -n bakery-ia
|
||||
|
||||
# Restart Mailu to pick up the new relay configuration
|
||||
kubectl rollout restart deployment -n bakery-ia -l app.kubernetes.io/instance=mailu
|
||||
```
|
||||
|
||||
### Step 7.4: Deploy SigNoz Monitoring
|
||||
|
||||
|
||||
@@ -204,6 +204,115 @@ patches:
|
||||
memory: "1Gi"
|
||||
cpu: "500m"
|
||||
|
||||
# =============================================================================
|
||||
# CPU Request Optimization for Production
|
||||
# Reduce CPU requests to match actual usage (was 100m, actual ~5-10m)
|
||||
# This prevents scheduler rejections due to overcommitted requests
|
||||
# =============================================================================
|
||||
|
||||
# Database deployments - reduce CPU request from 100m to 25m
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: ".*-db$"
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "25m"
|
||||
|
||||
# Microservice deployments - reduce CPU request from 100m to 25m
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: ".*-service$"
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "25m"
|
||||
|
||||
# Other core services
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: gateway
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "25m"
|
||||
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: alert-processor
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "25m"
|
||||
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: frontend
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "50m"
|
||||
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: redis
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "25m"
|
||||
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: rabbitmq
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "50m"
|
||||
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
name: minio
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "50m"
|
||||
|
||||
# Migration jobs - reduce CPU request from 100m to 25m
|
||||
- target:
|
||||
group: batch
|
||||
version: v1
|
||||
kind: Job
|
||||
name: ".*-migration$"
|
||||
namespace: bakery-ia
|
||||
patch: |-
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/requests/cpu
|
||||
value: "25m"
|
||||
|
||||
images:
|
||||
# Application services
|
||||
- name: bakery/auth-service
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
# Development-tuned Mailu configuration
|
||||
global:
|
||||
# Using Unbound DNS for DNSSEC validation (required by Mailu admin)
|
||||
# Unbound service is available at unbound-dns.bakery-ia.svc.cluster.local
|
||||
# Static ClusterIP configured in unbound-helm/values.yaml
|
||||
custom_dns_servers: "10.96.53.53" # Unbound DNS static ClusterIP
|
||||
# This value is dynamically set via --set during helm install:
|
||||
# UNBOUND_IP=$(kubectl get svc unbound-dns -n bakery-ia -o jsonpath='{.spec.clusterIP}')
|
||||
# helm upgrade --install mailu ... --set global.custom_dns_servers="$UNBOUND_IP"
|
||||
# Default fallback to Kubernetes DNS (will be overridden by --set)
|
||||
custom_dns_servers: "10.96.0.10" # Override with Unbound IP via --set
|
||||
|
||||
# Redis configuration - use built-in Mailu Redis (no authentication needed)
|
||||
externalRedis:
|
||||
@@ -11,11 +13,12 @@ externalRedis:
|
||||
|
||||
# Component-specific DNS configuration
|
||||
# Admin requires DNSSEC validation - use Unbound DNS (forwards cluster.local to kube-dns)
|
||||
# NOTE: dnsConfig.nameservers is dynamically set via --set during helm install
|
||||
admin:
|
||||
dnsPolicy: "None"
|
||||
dnsConfig:
|
||||
nameservers:
|
||||
- "10.96.53.53" # Unbound DNS static ClusterIP (forwards cluster.local to kube-dns)
|
||||
- "10.96.0.10" # Override with Unbound IP via --set admin.dnsConfig.nameservers[0]
|
||||
searches:
|
||||
- "bakery-ia.svc.cluster.local"
|
||||
- "svc.cluster.local"
|
||||
|
||||
@@ -1,5 +1,18 @@
|
||||
# Production-specific values for unbound DNS resolver
|
||||
# Overrides for the production environment
|
||||
#
|
||||
# ARCHITECTURE NOTE:
|
||||
# Unbound provides DNSSEC validation required by Mailu (rspamd for DKIM/SPF/DMARC).
|
||||
# CoreDNS does NOT support DNSSEC, so we need Unbound as a dedicated resolver.
|
||||
#
|
||||
# Two deployment options:
|
||||
# 1. Mailu-only: Only Mailu pods use Unbound (via dnsPolicy: None)
|
||||
# - CoreDNS forwards to public DNS (8.8.8.8, 1.1.1.1)
|
||||
# - Lower resource usage, simpler architecture
|
||||
#
|
||||
# 2. Cluster-wide: CoreDNS forwards ALL external queries to Unbound
|
||||
# - All pods get DNSSEC validation
|
||||
# - Higher resource usage, single point of failure for DNS
|
||||
|
||||
# Use official image for production
|
||||
image:
|
||||
@@ -7,44 +20,47 @@ image:
|
||||
tag: "latest"
|
||||
pullPolicy: "IfNotPresent"
|
||||
|
||||
# Production resource settings (higher limits for reliability)
|
||||
# Production resource settings - MINIMAL for single-node clusters
|
||||
# Unbound is very lightweight - DNS queries use minimal CPU
|
||||
resources:
|
||||
requests:
|
||||
cpu: "50m"
|
||||
memory: "64Mi"
|
||||
limits:
|
||||
cpu: "200m"
|
||||
memory: "256Mi"
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "512Mi"
|
||||
|
||||
# Production-specific settings
|
||||
replicaCount: 2
|
||||
# Single replica for single-node clusters (saves resources)
|
||||
# Increase to 2 for multi-node HA deployments
|
||||
replicaCount: 1
|
||||
|
||||
# Production annotations
|
||||
podAnnotations:
|
||||
environment: "production"
|
||||
critical: "true"
|
||||
|
||||
# Anti-affinity for high availability in production
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- unbound
|
||||
topologyKey: "kubernetes.io/hostname"
|
||||
# Anti-affinity disabled for single-node clusters
|
||||
# Uncomment for multi-node HA deployments
|
||||
# affinity:
|
||||
# podAntiAffinity:
|
||||
# preferredDuringSchedulingIgnoredDuringExecution:
|
||||
# - weight: 100
|
||||
# podAffinityTerm:
|
||||
# labelSelector:
|
||||
# matchExpressions:
|
||||
# - key: app.kubernetes.io/name
|
||||
# operator: In
|
||||
# values:
|
||||
# - unbound
|
||||
# topologyKey: "kubernetes.io/hostname"
|
||||
|
||||
# Production probe settings (more conservative)
|
||||
probes:
|
||||
readiness:
|
||||
initialDelaySeconds: 20
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
command: "sh -c 'echo \"\" | nc -w 3 127.0.0.1 53 || exit 1'"
|
||||
liveness:
|
||||
initialDelaySeconds: 60
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 60
|
||||
command: "sh -c 'echo \"\" | nc -w 3 127.0.0.1 53 || exit 1'"
|
||||
@@ -1,6 +1,10 @@
|
||||
# Default values for unbound DNS resolver
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
#
|
||||
# PURPOSE: Provides DNSSEC validation for Mailu email server
|
||||
# CoreDNS does NOT support DNSSEC, so Unbound fills this gap.
|
||||
# Mailu's rspamd requires DNSSEC for DKIM/SPF/DMARC validation.
|
||||
|
||||
# Global settings
|
||||
global:
|
||||
@@ -18,13 +22,14 @@ image:
|
||||
replicaCount: 1
|
||||
|
||||
# Resource limits and requests
|
||||
# Unbound is very lightweight - these minimal resources are sufficient
|
||||
resources:
|
||||
requests:
|
||||
cpu: "25m"
|
||||
memory: "32Mi"
|
||||
limits:
|
||||
cpu: "100m"
|
||||
memory: "128Mi"
|
||||
limits:
|
||||
cpu: "300m"
|
||||
memory: "384Mi"
|
||||
|
||||
# Security context
|
||||
securityContext:
|
||||
|
||||
Reference in New Issue
Block a user