From b78399da2c458e775bb95c16374f5ee7f98197ff Mon Sep 17 00:00:00 2001 From: Urtzi Alfaro Date: Mon, 19 Jan 2026 15:15:04 +0100 Subject: [PATCH] Add new infra architecture 5 --- MAILU_DEPLOYMENT_ARCHITECTURE.md | 338 ++++++ Tiltfile | 6 +- docs/PILOT_LAUNCH_GUIDE.md | 177 +++- docs/PRODUCTION_OPERATIONS_GUIDE.md | 146 ++- .../cicd/tekton-helm/templates/secrets.yaml | 4 + .../dev/k8s-manifests/kustomization.yaml | 4 + .../prod/k8s-manifests/kustomization.yaml | 3 + .../monitoring/signoz/signoz-values-dev.yaml | 841 --------------- .../monitoring/signoz/signoz-values-prod.yaml | 986 ------------------ .../infrastructure/gateway-service.yaml | 2 - .../infrastructure/kustomization.yaml | 1 + .../nominatim/nominatim-init-job.yaml | 2 - .../infrastructure/unbound/unbound.yaml | 81 ++ .../platform/mail/mailu-helm/dev/values.yaml | 87 +- .../platform/mail/mailu-helm/prod/values.yaml | 95 +- .../platform/mail/mailu-helm/values.yaml | 21 +- .../storage/postgres/postgres-template.yaml | 2 - .../platform/storage/redis/redis.yaml | 2 - .../scripts/setup/setup-dockerhub-secrets.sh | 65 -- .../scripts/setup/setup-ghcr-secrets.sh | 67 -- .../services/databases/ai-insights-db.yaml | 2 - .../databases/alert-processor-db.yaml | 2 - .../services/databases/auth-db.yaml | 2 - .../services/databases/demo-session-db.yaml | 2 - .../services/databases/distribution-db.yaml | 2 - .../services/databases/external-db.yaml | 2 - .../services/databases/forecasting-db.yaml | 2 - .../services/databases/inventory-db.yaml | 2 - .../services/databases/notification-db.yaml | 2 - .../services/databases/orchestrator-db.yaml | 2 - .../services/databases/orders-db.yaml | 2 - infrastructure/services/databases/pos-db.yaml | 2 - .../services/databases/procurement-db.yaml | 2 - .../services/databases/production-db.yaml | 2 - .../services/databases/rabbitmq.yaml | 2 - .../services/databases/recipes-db.yaml | 2 - .../services/databases/sales-db.yaml | 2 - .../services/databases/suppliers-db.yaml | 2 - .../services/databases/tenant-db.yaml | 2 - .../services/databases/training-db.yaml | 2 - .../ai-insights/ai-insights-service.yaml | 2 - .../migrations/ai-insights-migration-job.yaml | 2 - .../alert-processor-migration-job.yaml | 2 - .../microservices/auth/auth-service.yaml | 2 - .../auth/migrations/auth-migration-job.yaml | 3 - .../cronjobs/demo-cleanup-cronjob.yaml | 2 - .../demo-session/demo-cleanup-worker.yaml | 2 - .../demo-session-migration-job.yaml | 2 - .../distribution/distribution-service.yaml | 2 - .../distribution-migration-job.yaml | 2 - .../external-data-rotation-cronjob.yaml | 2 - .../external/external-service.yaml | 2 - .../migrations/external-data-init-job.yaml | 2 - .../migrations/external-migration-job.yaml | 3 - .../forecasting/forecasting-service.yaml | 2 - .../migrations/forecasting-migration-job.yaml | 2 - .../frontend/frontend-service.yaml | 2 - .../inventory/inventory-service.yaml | 2 - .../migrations/inventory-migration-job.yaml | 2 - .../notification-migration-job.yaml | 2 - .../notification/notification-service.yaml | 2 - .../orchestrator-migration-job.yaml | 2 - .../orchestrator/orchestrator-service.yaml | 2 - .../migrations/orders-migration-job.yaml | 2 - .../microservices/orders/orders-service.yaml | 2 - .../pos/migrations/pos-migration-job.yaml | 2 - .../microservices/pos/pos-service.yaml | 2 - .../migrations/procurement-migration-job.yaml | 2 - .../procurement/procurement-service.yaml | 2 - .../migrations/production-migration-job.yaml | 2 - .../production/production-service.yaml | 2 - .../migrations/recipes-migration-job.yaml | 2 - .../recipes/recipes-service.yaml | 2 - .../sales/migrations/sales-migration-job.yaml | 2 - .../microservices/sales/sales-service.yaml | 2 - .../migrations/suppliers-migration-job.yaml | 2 - .../suppliers/suppliers-service.yaml | 2 - .../migrations/tenant-migration-job.yaml | 3 - .../microservices/tenant/tenant-service.yaml | 3 - .../migrations/training-migration-job.yaml | 2 - .../training/training-service.yaml | 2 - scripts/prepull-base-images.sh | 2 + secrets_test.yaml | 72 ++ test_secrets.yaml | 22 + 84 files changed, 1027 insertions(+), 2125 deletions(-) create mode 100644 MAILU_DEPLOYMENT_ARCHITECTURE.md create mode 100644 infrastructure/platform/infrastructure/unbound/unbound.yaml delete mode 100755 infrastructure/scripts/setup/setup-dockerhub-secrets.sh delete mode 100644 infrastructure/scripts/setup/setup-ghcr-secrets.sh create mode 100644 secrets_test.yaml create mode 100644 test_secrets.yaml diff --git a/MAILU_DEPLOYMENT_ARCHITECTURE.md b/MAILU_DEPLOYMENT_ARCHITECTURE.md new file mode 100644 index 00000000..bf1d72a4 --- /dev/null +++ b/MAILU_DEPLOYMENT_ARCHITECTURE.md @@ -0,0 +1,338 @@ +# Mailu Deployment Architecture for Bakery-IA Project + +## Executive Summary + +This document outlines the recommended architecture for deploying Mailu email services across development and production environments for the Bakery-IA project. The solution addresses DNSSEC validation requirements while maintaining consistency across different Kubernetes platforms. + +## Environment Overview + +### Development Environment +- **Platform**: Kind (Kubernetes in Docker) or Colima +- **Purpose**: Local development and testing +- **Characteristics**: Ephemeral, single-node, resource-constrained + +### Production Environment +- **Platform**: MicroK8s on Ubuntu VPS +- **Purpose**: Production email services +- **Characteristics**: Single-node or small cluster, persistent storage, production-grade reliability + +## Core Requirements + +1. **DNSSEC Validation**: Mailu v1.9+ requires DNSSEC-validating resolver +2. **Cross-Environment Consistency**: Unified approach for dev and prod +3. **Resource Efficiency**: Optimized for constrained environments +4. **Reliability**: Production-grade availability and monitoring + +## Architectural Solution + +### Unified DNS Resolution Strategy + +**Recommended Approach**: Deploy Unbound as a dedicated DNSSEC-validating resolver pod in both environments + +#### Benefits: +- ✅ Consistent behavior across dev and prod +- ✅ Meets Mailu's DNSSEC requirements +- ✅ Privacy-preserving (no external DNS queries) +- ✅ Avoids rate-limiting from public DNS providers +- ✅ Full control over DNS resolution + +### Implementation Components + +#### 1. Unbound Deployment Manifest + +```yaml +# unbound.yaml - Cross-environment compatible +apiVersion: apps/v1 +kind: Deployment +metadata: + name: unbound-resolver + namespace: mailu + labels: + app: unbound + component: dns +spec: + replicas: 1 # Scale to 2+ in production with anti-affinity + selector: + matchLabels: + app: unbound + template: + metadata: + labels: + app: unbound + component: dns + spec: + containers: + - name: unbound + image: mvance/unbound:latest + ports: + - containerPort: 53 + name: dns-udp + protocol: UDP + - containerPort: 53 + name: dns-tcp + protocol: TCP + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "300m" + memory: "384Mi" + readinessProbe: + exec: + command: ["drill", "@127.0.0.1", "-p", "53", "+dnssec", "example.org"] + initialDelaySeconds: 10 + periodSeconds: 30 + securityContext: + capabilities: + add: ["NET_BIND_SERVICE"] + +--- +apiVersion: v1 +kind: Service +metadata: + name: unbound-dns + namespace: mailu +spec: + selector: + app: unbound + ports: + - name: dns-udp + port: 53 + targetPort: 53 + protocol: UDP + - name: dns-tcp + port: 53 + targetPort: 53 + protocol: TCP +``` + +#### 2. Mailu Configuration (values.yaml) + +```yaml +# Production-tuned Mailu configuration +dnsPolicy: None +dnsConfig: + nameservers: + - "10.152.183.x" # Replace with actual unbound service IP + +# Component-specific DNS configuration +admin: + dnsPolicy: None + dnsConfig: + nameservers: + - "10.152.183.x" + +rspamd: + dnsPolicy: None + dnsConfig: + nameservers: + - "10.152.183.x" + +# Environment-specific configurations +persistence: + enabled: true + # Development: use default storage class + # Production: use microk8s-hostpath or longhorn + storageClass: "standard" + +replicas: 1 # Increase in production as needed + +# Security settings +secretKey: "generate-strong-key-here" + +# Ingress configuration +# Use existing Bakery-IA ingress controller +``` + +### Environment-Specific Adaptations + +#### Development (Kind/Colima) + +**Optimizations:** +- Use hostPath volumes for persistence +- Reduce resource requests/limits +- Disable or simplify monitoring +- Use NodePort for external access + +**Deployment:** +```bash +# Apply unbound +kubectl apply -f unbound.yaml + +# Get unbound service IP +UNBOUND_IP=$(kubectl get svc unbound-dns -n mailu -o jsonpath='{.spec.clusterIP}') + +# Deploy Mailu with dev-specific values +helm upgrade --install mailu mailu/mailu \ + --namespace mailu \ + -f values-dev.yaml \ + --set dnsConfig.nameservers[0]=$UNBOUND_IP +``` + +#### Production (MicroK8s/Ubuntu) + +**Enhancements:** +- Use Longhorn or OpenEBS for storage +- Enable monitoring and logging +- Configure proper ingress with TLS +- Set up backup solutions + +**Deployment:** +```bash +# Enable required MicroK8s addons +microk8s enable dns storage ingress metallb + +# Apply unbound +kubectl apply -f unbound.yaml + +# Get unbound service IP +UNBOUND_IP=$(kubectl get svc unbound-dns -n mailu -o jsonpath='{.spec.clusterIP}') + +# Deploy Mailu with production values +helm upgrade --install mailu mailu/mailu \ + --namespace mailu \ + -f values-prod.yaml \ + --set dnsConfig.nameservers[0]=$UNBOUND_IP +``` + +## Verification Procedures + +### DNSSEC Validation Test + +```bash +# From within a Mailu pod +kubectl exec -it -n mailu deploy/mailu-admin -- bash + +# Test DNSSEC validation +dig @unbound-dns +short +dnssec +adflag example.org A + +# Should show AD flag in response +``` + +### Service Health Checks + +```bash +# Check unbound service +kubectl get pods -n mailu -l app=unbound +kubectl logs -n mailu -l app=unbound + +# Check Mailu components +kubectl get pods -n mailu +kubectl logs -n mailu -l app.kubernetes.io/name=mailu +``` + +## Monitoring and Maintenance + +### Production Monitoring Setup + +```yaml +# Example monitoring configuration for production +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: unbound-monitor + namespace: mailu +spec: + selector: + matchLabels: + app: unbound + endpoints: + - port: dns-tcp + interval: 30s + path: /metrics +``` + +### Backup Strategy + +**Production:** +- Daily Velero backups of Mailu namespace +- Weekly database dumps +- Monthly full cluster snapshots + +**Development:** +- On-demand backups before major changes +- Volume snapshots for critical data + +## Troubleshooting Guide + +### Common Issues and Solutions + +**Issue: DNSSEC validation failures** +- Verify unbound pod logs +- Check network policies +- Test DNS resolution from within pods + +**Issue: Mailu pods failing to start** +- Confirm DNS configuration in values.yaml +- Verify unbound service is reachable +- Check resource availability + +**Issue: Performance problems** +- Monitor CPU/memory usage +- Adjust resource limits +- Consider scaling replicas + +## Migration Path + +### From Development to Production + +1. **Configuration Migration** + - Update storage class from hostPath to production storage + - Adjust resource requests/limits + - Enable monitoring and logging + +2. **Data Migration** + - Export development data + - Import into production environment + - Verify data integrity + +3. **DNS Configuration** + - Update DNS records to point to production + - Verify TLS certificates + - Test email delivery + +## Security Considerations + +### Production Security Hardening + +1. **Network Security** + - Implement network policies + - Restrict ingress/egress traffic + - Use TLS for all external communications + +2. **Access Control** + - Implement RBAC for Mailu namespace + - Restrict admin access + - Use strong authentication + +3. **Monitoring and Alerting** + - Set up anomaly detection + - Configure alert thresholds + - Implement log retention policies + +## Cost Optimization + +### Resource Management + +**Development:** +- Use minimal resource allocations +- Scale down when not in use +- Clean up unused resources + +**Production:** +- Right-size resource requests +- Implement auto-scaling where possible +- Monitor and optimize usage patterns + +## Conclusion + +This architecture provides a robust, consistent solution for deploying Mailu across development and production environments. By using Unbound as a dedicated DNSSEC-validating resolver, we ensure compliance with Mailu's requirements while maintaining flexibility and reliability across different Kubernetes platforms. + +The solution is designed to be: +- **Consistent**: Same core architecture across environments +- **Reliable**: Production-grade availability and monitoring +- **Efficient**: Optimized resource usage +- **Maintainable**: Clear documentation and troubleshooting guides + +This approach aligns with the Bakery-IA project's requirements for a secure, reliable email infrastructure that can be consistently deployed across different environments. diff --git a/Tiltfile b/Tiltfile index 55907fd1..8b812a0f 100644 --- a/Tiltfile +++ b/Tiltfile @@ -473,13 +473,16 @@ k8s_image_json_path( # Redis & RabbitMQ k8s_resource('redis', resource_deps=['security-setup'], labels=['01-infrastructure']) -k8s_resource('rabbitmq', labels=['01-infrastructure']) +k8s_resource('rabbitmq', resource_deps=['security-setup'], labels=['01-infrastructure']) k8s_resource('nominatim', labels=['01-infrastructure']) # MinIO Storage k8s_resource('minio', resource_deps=['security-setup'], labels=['01-infrastructure']) k8s_resource('minio-bucket-init', resource_deps=['minio'], labels=['01-infrastructure']) +# Unbound DNSSEC Resolver - Infrastructure component for Mailu DNS validation +k8s_resource('unbound-resolver', resource_deps=['security-setup'], labels=['01-infrastructure']) + # Mail Infrastructure (Mailu) - Manual trigger for Helm deployment local_resource( 'mailu-helm', @@ -542,6 +545,7 @@ local_resource( auto_init=False, # Manual trigger only ) + # ============================================================================= # MONITORING RESOURCES - SigNoz (Unified Observability) # ============================================================================= diff --git a/docs/PILOT_LAUNCH_GUIDE.md b/docs/PILOT_LAUNCH_GUIDE.md index 52c56a4d..8b6154aa 100644 --- a/docs/PILOT_LAUNCH_GUIDE.md +++ b/docs/PILOT_LAUNCH_GUIDE.md @@ -433,6 +433,45 @@ microk8s enable prometheus microk8s enable registry ``` +### Step 3: Enhanced Infrastructure Components + +**The platform includes additional infrastructure components that enhance security, monitoring, and operations:** + +```bash +# The platform includes Mailu for email services +# Deploy Mailu via Helm (optional but recommended for production): +kubectl create namespace bakery-ia --dry-run=client -o yaml | kubectl apply -f - +helm repo add mailu https://mailu.github.io/helm-charts +helm repo update +helm install mailu mailu/mailu \ + -n bakery-ia \ + -f infrastructure/platform/mail/mailu-helm/values.yaml \ + --timeout 10m \ + --wait + +# Verify Mailu deployment +kubectl get pods -n bakery-ia | grep mailu +``` + +**For development environments, ensure the prepull-base-images script is run:** +```bash +# On your local machine, run the prepull script to cache base images +cd bakery-ia +chmod +x scripts/prepull-base-images.sh +./scripts/prepull-base-images.sh +``` + +**For production environments, ensure CI/CD infrastructure is properly configured:** +```bash +# Tekton Pipelines for CI/CD (optional - can be deployed separately) +kubectl create namespace tekton-pipelines +kubectl apply -f https://storage.googleapis.com/tekton-releases/pipeline/latest/release.yaml +kubectl apply -f https://storage.googleapis.com/tekton-releases/triggers/latest/release.yaml + +# Flux CD for GitOps (already enabled in MicroK8s if needed) +# flux install --namespace=flux-system --network-policy=false +``` + ### Step 3: Configure Firewall ```bash @@ -917,7 +956,34 @@ echo -n "your-value-here" | base64 **CRITICAL:** Never commit real secrets to git! The secrets.yaml file should be in `.gitignore`. -### Step 2: Apply Application Secrets +### Step 2: CI/CD Secrets Configuration + +**For production CI/CD setup, additional secrets are required:** + +```bash +# Create Docker Hub credentials secret (for image pulls) +kubectl create secret docker-registry dockerhub-creds \ + --docker-server=docker.io \ + --docker-username=YOUR_DOCKERHUB_USERNAME \ + --docker-password=YOUR_DOCKERHUB_TOKEN \ + --docker-email=your-email@example.com \ + -n bakery-ia + +# Create Gitea registry credentials (if using Gitea for CI/CD) +kubectl create secret docker-registry gitea-registry-credentials \ + -n tekton-pipelines \ + --docker-server=gitea.bakery-ia.local:5000 \ + --docker-username=your-username \ + --docker-password=your-password + +# Create Git credentials for Flux (if using GitOps) +kubectl create secret generic gitea-credentials \ + -n flux-system \ + --from-literal=username=your-username \ + --from-literal=password=your-password +``` + +### Step 3: Apply Application Secrets ```bash # Copy manifests to VPS (from local machine) @@ -938,7 +1004,30 @@ kubectl get secrets -n bakery-ia ## Database Migrations -### Step 0: Deploy SigNoz Monitoring (BEFORE Application) +### Step 0: Deploy CI/CD Infrastructure (Optional but Recommended) + +**For production environments, deploy CI/CD infrastructure components:** + +```bash +# Deploy Tekton Pipelines for CI/CD (optional but recommended for production) +kubectl create namespace tekton-pipelines + +# Install Tekton Pipelines +kubectl apply -f https://storage.googleapis.com/tekton-releases/pipeline/latest/release.yaml + +# Install Tekton Triggers +kubectl apply -f https://storage.googleapis.com/tekton-releases/triggers/latest/release.yaml + +# Apply Tekton configurations +kubectl apply -f ~/infrastructure/cicd/tekton/tasks/ +kubectl apply -f ~/infrastructure/cicd/tekton/pipelines/ +kubectl apply -f ~/infrastructure/cicd/tekton/triggers/ + +# Verify Tekton deployment +kubectl get pods -n tekton-pipelines +``` + +### Step 1: Deploy SigNoz Monitoring (BEFORE Application) **⚠️ CRITICAL:** SigNoz must be deployed BEFORE the application into the **bakery-ia namespace** because the production kustomization patches SigNoz resources. @@ -975,7 +1064,7 @@ kubectl get statefulset -n bakery-ia | grep signoz **⚠️ Important:** Do NOT create a separate `signoz` namespace. SigNoz must be in `bakery-ia` namespace for the overlays to work correctly. -### Step 1: Deploy Application and Databases +### Step 2: Deploy Application and Databases ```bash # On VPS @@ -1271,6 +1360,88 @@ kubectl logs -n bakery-ia deployment/signoz-otel-collector --tail=50 | grep -i " kubectl logs -n bakery-ia deployment/signoz-otel-collector | grep filelog ``` +### Step 2: Configure CI/CD Infrastructure (Optional but Recommended) + +If you deployed the CI/CD infrastructure, configure it for your workflow: + +#### Gitea Setup (Git Server + Registry) +```bash +# Access Gitea at: http://gitea.bakery-ia.local (for dev) or http://gitea.bakewise.ai (for prod) +# Make sure to add the appropriate hostname to /etc/hosts or configure DNS + +# Create your repositories for each service +# Configure webhook to trigger Tekton pipelines +``` + +#### Tekton Pipeline Configuration +```bash +# Verify Tekton pipelines are running +kubectl get pods -n tekton-pipelines + +# Create a PipelineRun manually to test: +kubectl create -f - <[^ ]+) (?Pstdout|stderr) (?P[^ ]*) (?P.*)$' - timestamp: - parse_from: attributes.time - layout: '%Y-%m-%dT%H:%M:%S.%LZ' - # Fix timestamp parsing - extract from the parsed time field - - type: move - from: attributes.time - to: attributes.timestamp - # Extract Kubernetes metadata from file path - - type: regex_parser - id: extract_metadata_from_filepath - regex: '^.*\/(?P[^_]+)_(?P[^_]+)_(?P[^\/]+)\/(?P[^\._]+)\/(?P\d+)\.log$' - parse_from: attributes["log.file.path"] - # Move metadata to resource attributes - - type: move - from: attributes.namespace - to: resource["k8s.namespace.name"] - - type: move - from: attributes.pod_name - to: resource["k8s.pod.name"] - - type: move - from: attributes.container_name - to: resource["k8s.container.name"] - - type: move - from: attributes.log - to: body - - # Kubernetes Cluster Receiver - Collects cluster-level metrics - # Provides information about nodes, namespaces, pods, and other cluster resources - k8s_cluster: - collection_interval: 30s - node_conditions_to_report: - - Ready - - MemoryPressure - - DiskPressure - - PIDPressure - - NetworkUnavailable - allocatable_types_to_report: - - cpu - - memory - - pods - - - - # PostgreSQL receivers for database metrics - # ENABLED: Monitor users configured and credentials stored in secrets - # Collects metrics directly from PostgreSQL databases with proper TLS - postgresql/auth: - endpoint: auth-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - auth_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/inventory: - endpoint: inventory-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - inventory_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/orders: - endpoint: orders-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - orders_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/ai-insights: - endpoint: ai-insights-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - ai_insights_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/alert-processor: - endpoint: alert-processor-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - alert_processor_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/distribution: - endpoint: distribution-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - distribution_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/external: - endpoint: external-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - external_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/forecasting: - endpoint: forecasting-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - forecasting_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/notification: - endpoint: notification-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - notification_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/orchestrator: - endpoint: orchestrator-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - orchestrator_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/pos: - endpoint: pos-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - pos_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/procurement: - endpoint: procurement-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - procurement_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/production: - endpoint: production-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - production_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/recipes: - endpoint: recipes-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - recipes_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/sales: - endpoint: sales-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - sales_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/suppliers: - endpoint: suppliers-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - suppliers_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/tenant: - endpoint: tenant-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - tenant_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - postgresql/training: - endpoint: training-db-service.bakery-ia:5432 - username: ${env:POSTGRES_MONITOR_USER} - password: ${env:POSTGRES_MONITOR_PASSWORD} - databases: - - training_db - collection_interval: 60s - tls: - insecure: false - cert_file: /etc/postgres-tls/server-cert.pem - key_file: /etc/postgres-tls/server-key.pem - ca_file: /etc/postgres-tls/ca-cert.pem - - # Redis receiver for cache metrics - # ENABLED: Using existing credentials from redis-secrets with TLS - redis: - endpoint: redis-service.bakery-ia:6379 - password: ${env:REDIS_PASSWORD} - collection_interval: 60s - transport: tcp - tls: - insecure_skip_verify: false - cert_file: /etc/redis-tls/redis-cert.pem - key_file: /etc/redis-tls/redis-key.pem - ca_file: /etc/redis-tls/ca-cert.pem - metrics: - redis.maxmemory: - enabled: true - redis.cmd.latency: - enabled: true - - # RabbitMQ receiver via management API - # ENABLED: Using existing credentials from rabbitmq-secrets - rabbitmq: - endpoint: http://rabbitmq-service.bakery-ia:15672 - username: ${env:RABBITMQ_USER} - password: ${env:RABBITMQ_PASSWORD} - collection_interval: 30s - - # Prometheus Receiver - Scrapes metrics from Kubernetes API - # Simplified configuration using only Kubernetes API metrics - prometheus: - config: - scrape_configs: - - job_name: 'kubernetes-nodes-cadvisor' - scrape_interval: 30s - scrape_timeout: 10s - scheme: https - tls_config: - insecure_skip_verify: true - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - kubernetes_sd_configs: - - role: node - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - target_label: __address__ - replacement: kubernetes.default.svc:443 - - source_labels: [__meta_kubernetes_node_name] - regex: (.+) - target_label: __metrics_path__ - replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor - - job_name: 'kubernetes-apiserver' - scrape_interval: 30s - scrape_timeout: 10s - scheme: https - tls_config: - insecure_skip_verify: true - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - kubernetes_sd_configs: - - role: endpoints - relabel_configs: - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: default;kubernetes;https - - processors: - # Batch processor for better performance (optimized for high throughput) - batch: - timeout: 1s - send_batch_size: 10000 # Increased from 1024 for better performance - send_batch_max_size: 10000 - - # Batch processor for meter data - batch/meter: - timeout: 1s - send_batch_size: 20000 - send_batch_max_size: 25000 - - # Memory limiter to prevent OOM - memory_limiter: - check_interval: 1s - limit_mib: 400 - spike_limit_mib: 100 - - # Resource detection - resourcedetection: - detectors: [env, system, docker] - timeout: 5s - - # Kubernetes attributes processor - CRITICAL for logs - # Extracts pod, namespace, container metadata from log attributes - k8sattributes: - auth_type: "serviceAccount" - passthrough: false - extract: - metadata: - - k8s.pod.name - - k8s.pod.uid - - k8s.deployment.name - - k8s.namespace.name - - k8s.node.name - - k8s.container.name - labels: - - tag_name: "app" - - tag_name: "pod-template-hash" - annotations: - - tag_name: "description" - - # SigNoz span metrics processor with delta aggregation (recommended) - # Generates RED metrics (Rate, Error, Duration) from trace spans - signozspanmetrics/delta: - aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA - metrics_exporter: signozclickhousemetrics - latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s] - dimensions_cache_size: 100000 - dimensions: - - name: service.namespace - default: default - - name: deployment.environment - default: default - - name: signoz.collector.id - - exporters: - # ClickHouse exporter for traces - clickhousetraces: - datasource: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/?database=signoz_traces - timeout: 10s - retry_on_failure: - enabled: true - initial_interval: 5s - max_interval: 30s - max_elapsed_time: 300s - - # ClickHouse exporter for metrics - signozclickhousemetrics: - dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metrics" - timeout: 10s - retry_on_failure: - enabled: true - initial_interval: 5s - max_interval: 30s - max_elapsed_time: 300s - - # ClickHouse exporter for meter data (usage metrics) - signozclickhousemeter: - dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_meter" - timeout: 45s - sending_queue: - enabled: false - - # ClickHouse exporter for logs - clickhouselogsexporter: - dsn: tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/?database=signoz_logs - timeout: 10s - retry_on_failure: - enabled: true - initial_interval: 5s - max_interval: 30s - - # Metadata exporter for service metadata - metadataexporter: - dsn: "tcp://admin:27ff0399-0d3a-4bd8-919d-17c2181e6fb9@signoz-clickhouse:9000/signoz_metadata" - timeout: 10s - cache: - provider: in_memory - - # Debug exporter for debugging (optional) - debug: - verbosity: detailed - sampling_initial: 5 - sampling_thereafter: 200 - - service: - pipelines: - # Traces pipeline - exports to ClickHouse and signozmeter connector - traces: - receivers: [otlp] - processors: [memory_limiter, batch, signozspanmetrics/delta, resourcedetection] - exporters: [clickhousetraces, metadataexporter, signozmeter] - - # Metrics pipeline - metrics: - receivers: [otlp, - postgresql/auth, postgresql/inventory, postgresql/orders, - postgresql/ai-insights, postgresql/alert-processor, postgresql/distribution, - postgresql/external, postgresql/forecasting, postgresql/notification, - postgresql/orchestrator, postgresql/pos, postgresql/procurement, - postgresql/production, postgresql/recipes, postgresql/sales, - postgresql/suppliers, postgresql/tenant, postgresql/training, - redis, rabbitmq, k8s_cluster, prometheus] - processors: [memory_limiter, batch, resourcedetection] - exporters: [signozclickhousemetrics] - - # Meter pipeline - receives from signozmeter connector - metrics/meter: - receivers: [signozmeter] - processors: [batch/meter] - exporters: [signozclickhousemeter] - - # Logs pipeline - includes both OTLP and Kubernetes pod logs - logs: - receivers: [otlp, filelog] - processors: [memory_limiter, batch, resourcedetection, k8sattributes] - exporters: [clickhouselogsexporter] - - # ClusterRole configuration for Kubernetes monitoring - # CRITICAL: Required for k8s_cluster receiver to access Kubernetes API - # Without these permissions, k8s metrics will not appear in SigNoz UI - clusterRole: - create: true - name: "signoz-otel-collector-bakery-ia" - annotations: {} - # Complete RBAC rules required by k8sclusterreceiver - # Based on OpenTelemetry and SigNoz official documentation - rules: - # Core API group - fundamental Kubernetes resources - - apiGroups: [""] - resources: - - "events" - - "namespaces" - - "nodes" - - "nodes/proxy" - - "nodes/metrics" - - "nodes/spec" - - "pods" - - "pods/status" - - "replicationcontrollers" - - "replicationcontrollers/status" - - "resourcequotas" - - "services" - - "endpoints" - verbs: ["get", "list", "watch"] - # Apps API group - modern workload controllers - - apiGroups: ["apps"] - resources: ["deployments", "daemonsets", "statefulsets", "replicasets"] - verbs: ["get", "list", "watch"] - # Batch API group - job management - - apiGroups: ["batch"] - resources: ["jobs", "cronjobs"] - verbs: ["get", "list", "watch"] - # Autoscaling API group - HPA metrics (CRITICAL) - - apiGroups: ["autoscaling"] - resources: ["horizontalpodautoscalers"] - verbs: ["get", "list", "watch"] - # Extensions API group - legacy support - - apiGroups: ["extensions"] - resources: ["deployments", "daemonsets", "replicasets"] - verbs: ["get", "list", "watch"] - # Metrics API group - resource metrics - - apiGroups: ["metrics.k8s.io"] - resources: ["nodes", "pods"] - verbs: ["get", "list", "watch"] - clusterRoleBinding: - annotations: {} - name: "signoz-otel-collector-bakery-ia" - -# Additional Configuration -serviceAccount: - create: true - annotations: {} - name: "signoz-otel-collector" - -# Security Context -securityContext: - runAsNonRoot: true - runAsUser: 1000 - fsGroup: 1000 - -# Network Policies (disabled for dev) -networkPolicy: - enabled: false - -# Monitoring SigNoz itself -selfMonitoring: - enabled: true - serviceMonitor: - enabled: false diff --git a/infrastructure/monitoring/signoz/signoz-values-prod.yaml b/infrastructure/monitoring/signoz/signoz-values-prod.yaml index f43342a4..a47e4535 100644 --- a/infrastructure/monitoring/signoz/signoz-values-prod.yaml +++ b/infrastructure/monitoring/signoz/signoz-values-prod.yaml @@ -10,989 +10,3 @@ global: clusterName: "bakery-ia-prod" domain: "monitoring.bakewise.ai" # Docker Hub credentials - applied to all sub-charts (including Zookeeper, ClickHouse, etc) - imagePullSecrets: - - dockerhub-creds - -# Docker Hub credentials for pulling images (root level for SigNoz components) -imagePullSecrets: - - dockerhub-creds - -# SigNoz Main Component (unified frontend + query service) -# BREAKING CHANGE: v0.89.0+ uses unified component instead of separate frontend/queryService -signoz: - replicaCount: 2 - - image: - repository: signoz/signoz - tag: v0.106.0 # Latest stable version - pullPolicy: IfNotPresent - - service: - type: ClusterIP - port: 8080 # HTTP/API port - internalPort: 8085 # Internal gRPC port - - # DISABLE built-in ingress - using unified bakery-ingress-prod instead - # Route configured in infrastructure/kubernetes/overlays/prod/prod-ingress.yaml - ingress: - enabled: false - - resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m - memory: 4Gi - - # Pod Anti-affinity for HA - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchLabels: - app.kubernetes.io/component: query-service - topologyKey: kubernetes.io/hostname - - # Environment variables (new format - replaces configVars) - env: - signoz_telemetrystore_provider: "clickhouse" - dot_metrics_enabled: "true" - signoz_emailing_enabled: "true" - signoz_alertmanager_provider: "signoz" - # Retention configuration (30 days for prod) - signoz_traces_ttl_duration_hrs: "720" - signoz_metrics_ttl_duration_hrs: "720" - signoz_logs_ttl_duration_hrs: "720" - # OpAMP Server Configuration - # WARNING: OpAMP can cause gRPC instability and collector reloads - # Only enable if you have a stable OpAMP backend server - signoz_opamp_server_enabled: "false" - # signoz_opamp_server_endpoint: "0.0.0.0:4320" - # SMTP configuration for email alerts - now using Mailu as SMTP server - signoz_smtp_enabled: "true" - signoz_smtp_host: "mailu-postfix.bakery-ia.svc.cluster.local" - signoz_smtp_port: "587" - signoz_smtp_from: "alerts@bakewise.ai" - signoz_smtp_username: "alerts@bakewise.ai" - # Password should be set via secret: signoz_smtp_password - - persistence: - enabled: true - size: 20Gi - storageClass: "standard" - - # Horizontal Pod Autoscaler - autoscaling: - enabled: true - minReplicas: 2 - maxReplicas: 5 - targetCPUUtilizationPercentage: 70 - targetMemoryUtilizationPercentage: 80 - -# AlertManager Configuration -alertmanager: - enabled: true - replicaCount: 2 - - image: - repository: signoz/alertmanager - tag: 0.23.5 - pullPolicy: IfNotPresent - - service: - type: ClusterIP - port: 9093 - - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 512Mi - - # Pod Anti-affinity for HA - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - signoz-alertmanager - topologyKey: kubernetes.io/hostname - - persistence: - enabled: true - size: 5Gi - storageClass: "standard" - - config: - global: - resolve_timeout: 5m - smtp_smarthost: 'mailu-postfix.bakery-ia.svc.cluster.local:587' - smtp_from: 'alerts@bakewise.ai' - smtp_auth_username: 'alerts@bakewise.ai' - smtp_auth_password: '${SMTP_PASSWORD}' - smtp_require_tls: true - - route: - group_by: ['alertname', 'cluster', 'service', 'severity'] - group_wait: 10s - group_interval: 10s - repeat_interval: 12h - receiver: 'critical-alerts' - routes: - - match: - severity: critical - receiver: 'critical-alerts' - continue: true - - match: - severity: warning - receiver: 'warning-alerts' - - receivers: - - name: 'critical-alerts' - email_configs: - - to: 'critical-alerts@bakewise.ai' - headers: - Subject: '[CRITICAL] {{ .GroupLabels.alertname }} - Bakery IA' - # Slack webhook for critical alerts - slack_configs: - - api_url: '${SLACK_WEBHOOK_URL}' - channel: '#alerts-critical' - title: '[CRITICAL] {{ .GroupLabels.alertname }}' - text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}' - - - name: 'warning-alerts' - email_configs: - - to: 'oncall@bakewise.ai' - headers: - Subject: '[WARNING] {{ .GroupLabels.alertname }} - Bakery IA' - -# ClickHouse Configuration - Time Series Database -clickhouse: - enabled: true - installCustomStorageClass: false - - image: - registry: docker.io - repository: clickhouse/clickhouse-server - tag: 25.5.6 # Updated to official recommended version - pullPolicy: IfNotPresent - - # ClickHouse resources (nested config) - clickhouse: - resources: - requests: - cpu: 1000m - memory: 2Gi - limits: - cpu: 4000m - memory: 8Gi - - # Pod Anti-affinity for HA - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - signoz-clickhouse - topologyKey: kubernetes.io/hostname - - persistence: - enabled: true - size: 100Gi - storageClass: "standard" - - # Cold storage configuration for better disk space management - coldStorage: - enabled: true - defaultKeepFreeSpaceBytes: 10737418240 # Keep 10GB free - ttl: - deleteTTLDays: 30 # Move old data to cold storage after 30 days - -# Zookeeper Configuration (required by ClickHouse for coordination) -zookeeper: - enabled: true - replicaCount: 3 # CRITICAL: Always use 3 replicas for production HA - - image: - tag: 3.7.1 # Official recommended version - - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - - persistence: - enabled: true - size: 10Gi - storageClass: "standard" - -# OpenTelemetry Collector - Integrated with SigNoz -otelCollector: - enabled: true - replicaCount: 2 - - image: - repository: signoz/signoz-otel-collector - tag: v0.129.12 # Updated to latest recommended version - pullPolicy: IfNotPresent - - # Init containers for the Otel Collector pod - initContainers: - fix-postgres-tls: - enabled: true - image: - registry: docker.io - repository: busybox - tag: 1.35 - pullPolicy: IfNotPresent - command: - - sh - - -c - - | - echo "Fixing PostgreSQL TLS file permissions..." - cp /etc/postgres-tls-source/* /etc/postgres-tls/ - chmod 600 /etc/postgres-tls/server-key.pem - chmod 644 /etc/postgres-tls/server-cert.pem - chmod 644 /etc/postgres-tls/ca-cert.pem - echo "PostgreSQL TLS permissions fixed" - volumeMounts: - - name: postgres-tls-source - mountPath: /etc/postgres-tls-source - readOnly: true - - name: postgres-tls-fixed - mountPath: /etc/postgres-tls - readOnly: false - - service: - type: ClusterIP - ports: - - name: otlp-grpc - port: 4317 - targetPort: 4317 - protocol: TCP - - name: otlp-http - port: 4318 - targetPort: 4318 - protocol: TCP - - name: prometheus - port: 8889 - targetPort: 8889 - protocol: TCP - - name: metrics - port: 8888 - targetPort: 8888 - protocol: TCP - - resources: - requests: - cpu: 500m - memory: 512Mi - limits: - cpu: 2000m - memory: 2Gi - - # Additional environment variables for receivers - additionalEnvs: - POSTGRES_MONITOR_USER: "monitoring" - POSTGRES_MONITOR_PASSWORD: "monitoring_369f9c001f242b07ef9e2826e17169ca" - REDIS_PASSWORD: "OxdmdJjdVNXp37MNC2IFoMnTpfGGFv1k" - RABBITMQ_USER: "bakery" - RABBITMQ_PASSWORD: "forecast123" - - # Mount TLS certificates for secure connections - extraVolumes: - - name: redis-tls - secret: - secretName: redis-tls-secret - - name: postgres-tls - secret: - secretName: postgres-tls - - name: postgres-tls-fixed - emptyDir: {} - - name: varlogpods - hostPath: - path: /var/log/pods - - extraVolumeMounts: - - name: redis-tls - mountPath: /etc/redis-tls - readOnly: true - - name: postgres-tls - mountPath: /etc/postgres-tls-source - readOnly: true - - name: postgres-tls-fixed - mountPath: /etc/postgres-tls - readOnly: false - - name: varlogpods - mountPath: /var/log/pods - readOnly: true - - # Enable OpAMP for dynamic configuration management - command: - name: /signoz-otel-collector - extraArgs: - - --config=/conf/otel-collector-config.yaml - - --manager-config=/conf/otel-collector-opamp-config.yaml - - --feature-gates=-pkg.translator.prometheus.NormalizeName - - # Full OTEL Collector Configuration - config: - # Connectors - bridge between pipelines - connectors: - signozmeter: - dimensions: - - name: service.name - - name: deployment.environment - - name: host.name - metrics_flush_interval: 1h - - extensions: - health_check: - endpoint: 0.0.0.0:13133 - zpages: - endpoint: 0.0.0.0:55679 - - receivers: - otlp: - protocols: - grpc: - endpoint: 0.0.0.0:4317 - max_recv_msg_size_mib: 32 # Increased for larger payloads - http: - endpoint: 0.0.0.0:4318 - cors: - allowed_origins: - - "https://monitoring.bakewise.ai" - - "https://*.bakewise.ai" - - # Filelog receiver for Kubernetes pod logs - # Collects container stdout/stderr from /var/log/pods - filelog: - include: - - /var/log/pods/*/*/*.log - exclude: - # Exclude SigNoz's own logs to avoid recursive collection - - /var/log/pods/bakery-ia_signoz-*/*/*.log - include_file_path: true - include_file_name: false - operators: - # Parse CRI-O / containerd log format - - type: regex_parser - regex: '^(?P