Fix redis ssl issues 3
This commit is contained in:
@@ -31,6 +31,7 @@
|
|||||||
- [Step 5.6: Verify Service Images](#step-56-verify-all-service-images-are-available)
|
- [Step 5.6: Verify Service Images](#step-56-verify-all-service-images-are-available)
|
||||||
9. [Phase 6: Deploy Application Services](#phase-6-deploy-application-services)
|
9. [Phase 6: Deploy Application Services](#phase-6-deploy-application-services)
|
||||||
10. [Phase 7: Deploy Optional Services](#phase-7-deploy-optional-services)
|
10. [Phase 7: Deploy Optional Services](#phase-7-deploy-optional-services)
|
||||||
|
- [Step 7.5: Deploy Kubernetes Infrastructure Monitoring](#step-75-deploy-kubernetes-infrastructure-monitoring-required-for-signoz-infrastructure-view)
|
||||||
11. [Phase 8: Verification & Validation](#phase-8-verification--validation)
|
11. [Phase 8: Verification & Validation](#phase-8-verification--validation)
|
||||||
12. [Post-Deployment Operations](#post-deployment-operations)
|
12. [Post-Deployment Operations](#post-deployment-operations)
|
||||||
13. [Troubleshooting Guide](#troubleshooting-guide)
|
13. [Troubleshooting Guide](#troubleshooting-guide)
|
||||||
@@ -1385,6 +1386,77 @@ kubectl wait --for=condition=available --timeout=600s deployment/signoz-frontend
|
|||||||
kubectl get pods -n bakery-ia -l app.kubernetes.io/instance=signoz
|
kubectl get pods -n bakery-ia -l app.kubernetes.io/instance=signoz
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Step 7.5: Deploy Kubernetes Infrastructure Monitoring (Required for SigNoz Infrastructure View)
|
||||||
|
|
||||||
|
> **Purpose:** Deploy kube-state-metrics and node-exporter to enable Kubernetes infrastructure metrics in SigNoz. Without these components, the SigNoz Infrastructure section will be empty.
|
||||||
|
|
||||||
|
**Components Deployed:**
|
||||||
|
|
||||||
|
| Component | Purpose | Metrics |
|
||||||
|
|-----------|---------|---------|
|
||||||
|
| **kube-state-metrics** | Kubernetes object metrics | Pods, Deployments, Nodes, PVCs, etc. |
|
||||||
|
| **node-exporter** | Host-level metrics | CPU, Memory, Disk, Network |
|
||||||
|
|
||||||
|
**Deploy using the automated script:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Navigate to the k8s-infra monitoring directory
|
||||||
|
cd /root/bakery-ia
|
||||||
|
|
||||||
|
# Make the script executable (if not already)
|
||||||
|
chmod +x infrastructure/monitoring/k8s-infra/deploy-k8s-infra-monitoring.sh
|
||||||
|
|
||||||
|
# Deploy kube-state-metrics and node-exporter
|
||||||
|
./infrastructure/monitoring/k8s-infra/deploy-k8s-infra-monitoring.sh --microk8s install
|
||||||
|
```
|
||||||
|
|
||||||
|
**Upgrade SigNoz to scrape the new metrics:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# The signoz-values-prod.yaml already includes the Prometheus receiver configuration
|
||||||
|
# Upgrade SigNoz to apply the scraping configuration
|
||||||
|
microk8s helm3 upgrade signoz signoz/signoz \
|
||||||
|
-n bakery-ia \
|
||||||
|
-f infrastructure/monitoring/signoz/signoz-values-prod.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify deployment:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check pods are running
|
||||||
|
microk8s kubectl get pods -n bakery-ia | grep -E "(kube-state|node-exporter)"
|
||||||
|
|
||||||
|
# Expected output:
|
||||||
|
# kube-state-metrics-xxxxxxxxxx-xxxxx 1/1 Running 0 1m
|
||||||
|
# node-exporter-prometheus-node-exporter-xxxxx 1/1 Running 0 1m
|
||||||
|
|
||||||
|
# Check status
|
||||||
|
./infrastructure/monitoring/k8s-infra/deploy-k8s-infra-monitoring.sh --microk8s status
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify metrics in SigNoz:**
|
||||||
|
|
||||||
|
After a few minutes, you should see:
|
||||||
|
- **Infrastructure → Kubernetes**: Pod status, deployments, nodes, PVCs
|
||||||
|
- **Infrastructure → Hosts**: CPU, memory, disk, network usage
|
||||||
|
|
||||||
|
**Troubleshooting:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check if metrics are being scraped
|
||||||
|
microk8s kubectl port-forward svc/kube-state-metrics 8080:8080 -n bakery-ia &
|
||||||
|
curl localhost:8080/metrics | head -20
|
||||||
|
|
||||||
|
# Check OTel Collector logs for scraping errors
|
||||||
|
microk8s kubectl logs -l app.kubernetes.io/name=signoz-otel-collector -n bakery-ia --tail=50
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Files Location:**
|
||||||
|
> - Helm values: `infrastructure/monitoring/k8s-infra/kube-state-metrics-values.yaml`
|
||||||
|
> - Helm values: `infrastructure/monitoring/k8s-infra/node-exporter-values.yaml`
|
||||||
|
> - Deploy script: `infrastructure/monitoring/k8s-infra/deploy-k8s-infra-monitoring.sh`
|
||||||
|
> - Documentation: `infrastructure/monitoring/k8s-infra/README.md`
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Phase 8: Verification & Validation
|
## Phase 8: Verification & Validation
|
||||||
|
|||||||
87
Tiltfile
87
Tiltfile
@@ -46,6 +46,10 @@ if use_dockerhub:
|
|||||||
base_registry = 'docker.io'
|
base_registry = 'docker.io'
|
||||||
python_image = 'python:3.11-slim'
|
python_image = 'python:3.11-slim'
|
||||||
|
|
||||||
|
# Git commit hash for migration job names (extracted from manifest to match CI/CD updates)
|
||||||
|
# We read from a manifest file rather than git HEAD because CI/CD commits may not be checked out locally
|
||||||
|
git_commit_short = str(local("sed -n 's/.*name: auth-migration-\\([a-f0-9]*\\).*/\\1/p' infrastructure/services/microservices/auth/migrations/auth-migration-job.yaml | head -1", quiet=True)).strip()
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# PREPULL BASE IMAGES - RUNS AFTER SECURITY SETUP
|
# PREPULL BASE IMAGES - RUNS AFTER SECURITY SETUP
|
||||||
@@ -1189,79 +1193,80 @@ k8s_resource('demo-session-db', resource_deps=['security-setup'], labels=['06-da
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# MIGRATION JOBS
|
# MIGRATION JOBS
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
# Migration job names include git commit hash (set by CI/CD in manifests)
|
||||||
|
|
||||||
# Core Service Migrations
|
# Core Service Migrations
|
||||||
k8s_resource('auth-migration', resource_deps=['auth-db'], labels=['07-migrations'])
|
k8s_resource('auth-migration-' + git_commit_short, resource_deps=['auth-db'], labels=['07-migrations'])
|
||||||
k8s_resource('tenant-migration', resource_deps=['tenant-db'], labels=['07-migrations'])
|
k8s_resource('tenant-migration-' + git_commit_short, resource_deps=['tenant-db'], labels=['07-migrations'])
|
||||||
|
|
||||||
# Data & Analytics Migrations
|
# Data & Analytics Migrations
|
||||||
k8s_resource('training-migration', resource_deps=['training-db'], labels=['07-migrations'])
|
k8s_resource('training-migration-' + git_commit_short, resource_deps=['training-db'], labels=['07-migrations'])
|
||||||
k8s_resource('forecasting-migration', resource_deps=['forecasting-db'], labels=['07-migrations'])
|
k8s_resource('forecasting-migration-' + git_commit_short, resource_deps=['forecasting-db'], labels=['07-migrations'])
|
||||||
k8s_resource('ai-insights-migration', resource_deps=['ai-insights-db'], labels=['07-migrations'])
|
k8s_resource('ai-insights-migration-' + git_commit_short, resource_deps=['ai-insights-db'], labels=['07-migrations'])
|
||||||
|
|
||||||
# Operations Migrations
|
# Operations Migrations
|
||||||
k8s_resource('sales-migration', resource_deps=['sales-db'], labels=['07-migrations'])
|
k8s_resource('sales-migration-' + git_commit_short, resource_deps=['sales-db'], labels=['07-migrations'])
|
||||||
k8s_resource('inventory-migration', resource_deps=['inventory-db'], labels=['07-migrations'])
|
k8s_resource('inventory-migration-' + git_commit_short, resource_deps=['inventory-db'], labels=['07-migrations'])
|
||||||
k8s_resource('production-migration', resource_deps=['production-db'], labels=['07-migrations'])
|
k8s_resource('production-migration-' + git_commit_short, resource_deps=['production-db'], labels=['07-migrations'])
|
||||||
k8s_resource('procurement-migration', resource_deps=['procurement-db'], labels=['07-migrations'])
|
k8s_resource('procurement-migration-' + git_commit_short, resource_deps=['procurement-db'], labels=['07-migrations'])
|
||||||
k8s_resource('distribution-migration', resource_deps=['distribution-db'], labels=['07-migrations'])
|
k8s_resource('distribution-migration-' + git_commit_short, resource_deps=['distribution-db'], labels=['07-migrations'])
|
||||||
|
|
||||||
# Supporting Service Migrations
|
# Supporting Service Migrations
|
||||||
k8s_resource('recipes-migration', resource_deps=['recipes-db'], labels=['07-migrations'])
|
k8s_resource('recipes-migration-' + git_commit_short, resource_deps=['recipes-db'], labels=['07-migrations'])
|
||||||
k8s_resource('suppliers-migration', resource_deps=['suppliers-db'], labels=['07-migrations'])
|
k8s_resource('suppliers-migration-' + git_commit_short, resource_deps=['suppliers-db'], labels=['07-migrations'])
|
||||||
k8s_resource('pos-migration', resource_deps=['pos-db'], labels=['07-migrations'])
|
k8s_resource('pos-migration-' + git_commit_short, resource_deps=['pos-db'], labels=['07-migrations'])
|
||||||
k8s_resource('orders-migration', resource_deps=['orders-db'], labels=['07-migrations'])
|
k8s_resource('orders-migration-' + git_commit_short, resource_deps=['orders-db'], labels=['07-migrations'])
|
||||||
k8s_resource('external-migration', resource_deps=['external-db'], labels=['07-migrations'])
|
k8s_resource('external-migration-' + git_commit_short, resource_deps=['external-db'], labels=['07-migrations'])
|
||||||
|
|
||||||
# Platform Service Migrations
|
# Platform Service Migrations
|
||||||
k8s_resource('notification-migration', resource_deps=['notification-db'], labels=['07-migrations'])
|
k8s_resource('notification-migration-' + git_commit_short, resource_deps=['notification-db'], labels=['07-migrations'])
|
||||||
k8s_resource('alert-processor-migration', resource_deps=['alert-processor-db'], labels=['07-migrations'])
|
k8s_resource('alert-processor-migration-' + git_commit_short, resource_deps=['alert-processor-db'], labels=['07-migrations'])
|
||||||
k8s_resource('orchestrator-migration', resource_deps=['orchestrator-db'], labels=['07-migrations'])
|
k8s_resource('orchestrator-migration-' + git_commit_short, resource_deps=['orchestrator-db'], labels=['07-migrations'])
|
||||||
|
|
||||||
# Demo Service Migrations
|
# Demo Service Migrations
|
||||||
k8s_resource('demo-session-migration', resource_deps=['demo-session-db'], labels=['07-migrations'])
|
k8s_resource('demo-session-migration-' + git_commit_short, resource_deps=['demo-session-db'], labels=['07-migrations'])
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# DATA INITIALIZATION JOBS
|
# DATA INITIALIZATION JOBS
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
k8s_resource('external-data-init', resource_deps=['external-migration', 'redis'], labels=['08-data-init'])
|
k8s_resource('external-data-init-' + git_commit_short, resource_deps=['external-migration-' + git_commit_short, 'redis'], labels=['08-data-init'])
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# APPLICATION SERVICES
|
# APPLICATION SERVICES
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
# Core Services
|
# Core Services
|
||||||
k8s_resource('auth-service', resource_deps=['auth-migration', 'redis'], labels=['09-services-core'])
|
k8s_resource('auth-service', resource_deps=['auth-migration-' + git_commit_short, 'redis'], labels=['09-services-core'])
|
||||||
k8s_resource('tenant-service', resource_deps=['tenant-migration', 'redis'], labels=['09-services-core'])
|
k8s_resource('tenant-service', resource_deps=['tenant-migration-' + git_commit_short, 'redis'], labels=['09-services-core'])
|
||||||
|
|
||||||
# Data & Analytics Services
|
# Data & Analytics Services
|
||||||
k8s_resource('training-service', resource_deps=['training-migration', 'redis'], labels=['10-services-analytics'])
|
k8s_resource('training-service', resource_deps=['training-migration-' + git_commit_short, 'redis'], labels=['10-services-analytics'])
|
||||||
k8s_resource('forecasting-service', resource_deps=['forecasting-migration', 'redis'], labels=['10-services-analytics'])
|
k8s_resource('forecasting-service', resource_deps=['forecasting-migration-' + git_commit_short, 'redis'], labels=['10-services-analytics'])
|
||||||
k8s_resource('ai-insights-service', resource_deps=['ai-insights-migration', 'redis', 'forecasting-service', 'production-service', 'procurement-service'], labels=['10-services-analytics'])
|
k8s_resource('ai-insights-service', resource_deps=['ai-insights-migration-' + git_commit_short, 'redis', 'forecasting-service', 'production-service', 'procurement-service'], labels=['10-services-analytics'])
|
||||||
|
|
||||||
# Operations Services
|
# Operations Services
|
||||||
k8s_resource('sales-service', resource_deps=['sales-migration', 'redis'], labels=['11-services-operations'])
|
k8s_resource('sales-service', resource_deps=['sales-migration-' + git_commit_short, 'redis'], labels=['11-services-operations'])
|
||||||
k8s_resource('inventory-service', resource_deps=['inventory-migration', 'redis'], labels=['11-services-operations'])
|
k8s_resource('inventory-service', resource_deps=['inventory-migration-' + git_commit_short, 'redis'], labels=['11-services-operations'])
|
||||||
k8s_resource('production-service', resource_deps=['production-migration', 'redis'], labels=['11-services-operations'])
|
k8s_resource('production-service', resource_deps=['production-migration-' + git_commit_short, 'redis'], labels=['11-services-operations'])
|
||||||
k8s_resource('procurement-service', resource_deps=['procurement-migration', 'redis'], labels=['11-services-operations'])
|
k8s_resource('procurement-service', resource_deps=['procurement-migration-' + git_commit_short, 'redis'], labels=['11-services-operations'])
|
||||||
k8s_resource('distribution-service', resource_deps=['distribution-migration', 'redis', 'rabbitmq'], labels=['11-services-operations'])
|
k8s_resource('distribution-service', resource_deps=['distribution-migration-' + git_commit_short, 'redis', 'rabbitmq'], labels=['11-services-operations'])
|
||||||
|
|
||||||
# Supporting Services
|
# Supporting Services
|
||||||
k8s_resource('recipes-service', resource_deps=['recipes-migration', 'redis'], labels=['12-services-supporting'])
|
k8s_resource('recipes-service', resource_deps=['recipes-migration-' + git_commit_short, 'redis'], labels=['12-services-supporting'])
|
||||||
k8s_resource('suppliers-service', resource_deps=['suppliers-migration', 'redis'], labels=['12-services-supporting'])
|
k8s_resource('suppliers-service', resource_deps=['suppliers-migration-' + git_commit_short, 'redis'], labels=['12-services-supporting'])
|
||||||
k8s_resource('pos-service', resource_deps=['pos-migration', 'redis'], labels=['12-services-supporting'])
|
k8s_resource('pos-service', resource_deps=['pos-migration-' + git_commit_short, 'redis'], labels=['12-services-supporting'])
|
||||||
k8s_resource('orders-service', resource_deps=['orders-migration', 'redis'], labels=['12-services-supporting'])
|
k8s_resource('orders-service', resource_deps=['orders-migration-' + git_commit_short, 'redis'], labels=['12-services-supporting'])
|
||||||
k8s_resource('external-service', resource_deps=['external-migration', 'external-data-init', 'redis'], labels=['12-services-supporting'])
|
k8s_resource('external-service', resource_deps=['external-migration-' + git_commit_short, 'external-data-init-' + git_commit_short, 'redis'], labels=['12-services-supporting'])
|
||||||
|
|
||||||
# Platform Services
|
# Platform Services
|
||||||
k8s_resource('notification-service', resource_deps=['notification-migration', 'redis', 'rabbitmq'], labels=['13-services-platform'])
|
k8s_resource('notification-service', resource_deps=['notification-migration-' + git_commit_short, 'redis', 'rabbitmq'], labels=['13-services-platform'])
|
||||||
k8s_resource('alert-processor', resource_deps=['alert-processor-migration', 'redis', 'rabbitmq'], labels=['13-services-platform'])
|
k8s_resource('alert-processor', resource_deps=['alert-processor-migration-' + git_commit_short, 'redis', 'rabbitmq'], labels=['13-services-platform'])
|
||||||
k8s_resource('orchestrator-service', resource_deps=['orchestrator-migration', 'redis'], labels=['13-services-platform'])
|
k8s_resource('orchestrator-service', resource_deps=['orchestrator-migration-' + git_commit_short, 'redis'], labels=['13-services-platform'])
|
||||||
|
|
||||||
# Demo Services
|
# Demo Services
|
||||||
k8s_resource('demo-session-service', resource_deps=['demo-session-migration', 'redis'], labels=['14-services-demo'])
|
k8s_resource('demo-session-service', resource_deps=['demo-session-migration-' + git_commit_short, 'redis'], labels=['14-services-demo'])
|
||||||
k8s_resource('demo-cleanup-worker', resource_deps=['demo-session-service', 'redis'], labels=['14-services-demo'])
|
k8s_resource('demo-cleanup-worker-' + git_commit_short, resource_deps=['demo-session-service', 'redis'], labels=['14-services-demo'])
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# FRONTEND & GATEWAY
|
# FRONTEND & GATEWAY
|
||||||
@@ -1275,7 +1280,7 @@ k8s_resource('frontend', resource_deps=['gateway'], labels=['15-frontend'])
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
k8s_resource('demo-session-cleanup', resource_deps=['demo-session-service'], labels=['16-cronjobs'])
|
k8s_resource('demo-session-cleanup', resource_deps=['demo-session-service'], labels=['16-cronjobs'])
|
||||||
k8s_resource('external-data-rotation', resource_deps=['external-service'], labels=['16-cronjobs'])
|
k8s_resource('external-data-rotation-' + git_commit_short, resource_deps=['external-service'], labels=['16-cronjobs'])
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# WATCH SETTINGS
|
# WATCH SETTINGS
|
||||||
|
|||||||
121
infrastructure/monitoring/k8s-infra/README.md
Normal file
121
infrastructure/monitoring/k8s-infra/README.md
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
# Kubernetes Infrastructure Monitoring
|
||||||
|
|
||||||
|
This directory contains configurations for deploying Kubernetes infrastructure monitoring components that integrate with SigNoz.
|
||||||
|
|
||||||
|
## Components
|
||||||
|
|
||||||
|
| Component | Purpose | Metrics Endpoint |
|
||||||
|
|-----------|---------|------------------|
|
||||||
|
| **kube-state-metrics** | Kubernetes object metrics (pods, deployments, nodes, etc.) | `:8080/metrics` |
|
||||||
|
| **node-exporter** | Host-level metrics (CPU, memory, disk, network) | `:9100/metrics` |
|
||||||
|
|
||||||
|
## Quick Start (MicroK8s Production)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Deploy infrastructure monitoring components
|
||||||
|
./deploy-k8s-infra-monitoring.sh --microk8s install
|
||||||
|
|
||||||
|
# 2. Upgrade SigNoz to scrape the new metrics
|
||||||
|
microk8s helm3 upgrade signoz signoz/signoz \
|
||||||
|
-n bakery-ia \
|
||||||
|
-f ../signoz/signoz-values-prod.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Install
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Standard Kubernetes
|
||||||
|
./deploy-k8s-infra-monitoring.sh install
|
||||||
|
|
||||||
|
# MicroK8s
|
||||||
|
./deploy-k8s-infra-monitoring.sh --microk8s install
|
||||||
|
```
|
||||||
|
|
||||||
|
### Upgrade
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-k8s-infra-monitoring.sh --microk8s upgrade
|
||||||
|
```
|
||||||
|
|
||||||
|
### Uninstall
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-k8s-infra-monitoring.sh --microk8s uninstall
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-k8s-infra-monitoring.sh --microk8s status
|
||||||
|
```
|
||||||
|
|
||||||
|
### Dry Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./deploy-k8s-infra-monitoring.sh --microk8s --dry-run install
|
||||||
|
```
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
- `kube-state-metrics-values.yaml` - Helm values for kube-state-metrics
|
||||||
|
- `node-exporter-values.yaml` - Helm values for node-exporter
|
||||||
|
- `deploy-k8s-infra-monitoring.sh` - Deployment automation script
|
||||||
|
|
||||||
|
## SigNoz Integration
|
||||||
|
|
||||||
|
The SigNoz OTel Collector is configured (in `signoz-values-prod.yaml`) to scrape metrics from:
|
||||||
|
|
||||||
|
- `kube-state-metrics.bakery-ia.svc.cluster.local:8080`
|
||||||
|
- `node-exporter-prometheus-node-exporter.bakery-ia.svc.cluster.local:9100`
|
||||||
|
|
||||||
|
After deploying these components, metrics will appear in SigNoz under:
|
||||||
|
- **Infrastructure** > **Kubernetes** (for K8s object metrics)
|
||||||
|
- **Infrastructure** > **Hosts** (for node metrics)
|
||||||
|
|
||||||
|
## Metrics Available
|
||||||
|
|
||||||
|
### From kube-state-metrics
|
||||||
|
|
||||||
|
- Pod status, phase, restarts
|
||||||
|
- Deployment replicas (desired vs available)
|
||||||
|
- Node conditions and capacity
|
||||||
|
- PVC status and capacity
|
||||||
|
- Resource requests and limits
|
||||||
|
- Job/CronJob status
|
||||||
|
|
||||||
|
### From node-exporter
|
||||||
|
|
||||||
|
- CPU usage per core
|
||||||
|
- Memory usage (total, free, cached)
|
||||||
|
- Disk I/O and space
|
||||||
|
- Network traffic (bytes in/out)
|
||||||
|
- System load average
|
||||||
|
- Filesystem usage
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Check if metrics are being scraped
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Port-forward to kube-state-metrics
|
||||||
|
microk8s kubectl port-forward svc/kube-state-metrics 8080:8080 -n bakery-ia &
|
||||||
|
curl localhost:8080/metrics | head -50
|
||||||
|
|
||||||
|
# Port-forward to node-exporter
|
||||||
|
microk8s kubectl port-forward svc/node-exporter-prometheus-node-exporter 9100:9100 -n bakery-ia &
|
||||||
|
curl localhost:9100/metrics | head -50
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check OTel Collector logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
microk8s kubectl logs -l app.kubernetes.io/name=signoz-otel-collector -n bakery-ia --tail=100
|
||||||
|
```
|
||||||
|
|
||||||
|
### Verify pods are running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
microk8s kubectl get pods -n bakery-ia | grep -E "(kube-state|node-exporter)"
|
||||||
|
```
|
||||||
347
infrastructure/monitoring/k8s-infra/deploy-k8s-infra-monitoring.sh
Executable file
347
infrastructure/monitoring/k8s-infra/deploy-k8s-infra-monitoring.sh
Executable file
@@ -0,0 +1,347 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Kubernetes Infrastructure Monitoring Deployment Script
|
||||||
|
# ============================================================================
|
||||||
|
# Deploys kube-state-metrics and node-exporter for Kubernetes infrastructure
|
||||||
|
# monitoring in SigNoz
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Color codes for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
NAMESPACE="bakery-ia"
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
# Function to display help
|
||||||
|
show_help() {
|
||||||
|
echo "Usage: $0 [OPTIONS] [COMMAND]"
|
||||||
|
echo ""
|
||||||
|
echo "Deploy Kubernetes infrastructure monitoring components"
|
||||||
|
echo ""
|
||||||
|
echo "Commands:"
|
||||||
|
echo " install Install kube-state-metrics and node-exporter (default)"
|
||||||
|
echo " upgrade Upgrade existing deployments"
|
||||||
|
echo " uninstall Remove all infrastructure monitoring components"
|
||||||
|
echo " status Show deployment status"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " -h, --help Show this help message"
|
||||||
|
echo " -d, --dry-run Show what would be done without executing"
|
||||||
|
echo " -n, --namespace NS Specify namespace (default: bakery-ia)"
|
||||||
|
echo " --microk8s Use microk8s helm3 command (for MicroK8s clusters)"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " $0 install # Install on standard k8s"
|
||||||
|
echo " $0 --microk8s install # Install on MicroK8s"
|
||||||
|
echo " $0 --microk8s upgrade # Upgrade on MicroK8s"
|
||||||
|
echo " $0 --microk8s uninstall # Remove from MicroK8s"
|
||||||
|
echo " $0 status # Check deployment status"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
DRY_RUN=false
|
||||||
|
USE_MICROK8S=false
|
||||||
|
COMMAND="install"
|
||||||
|
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-h|--help)
|
||||||
|
show_help
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
-d|--dry-run)
|
||||||
|
DRY_RUN=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-n|--namespace)
|
||||||
|
NAMESPACE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--microk8s)
|
||||||
|
USE_MICROK8S=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
install|upgrade|uninstall|status)
|
||||||
|
COMMAND="$1"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo -e "${RED}Unknown argument: $1${NC}"
|
||||||
|
show_help
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Set helm and kubectl commands based on environment
|
||||||
|
if [[ "$USE_MICROK8S" == true ]]; then
|
||||||
|
HELM_CMD="microk8s helm3"
|
||||||
|
KUBECTL_CMD="microk8s kubectl"
|
||||||
|
else
|
||||||
|
HELM_CMD="helm"
|
||||||
|
KUBECTL_CMD="kubectl"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Function to check prerequisites
|
||||||
|
check_prerequisites() {
|
||||||
|
echo -e "${BLUE}Checking prerequisites...${NC}"
|
||||||
|
|
||||||
|
# Check helm
|
||||||
|
if [[ "$USE_MICROK8S" == true ]]; then
|
||||||
|
# Test if microk8s helm3 command works directly
|
||||||
|
if ! microk8s helm3 version &> /dev/null; then
|
||||||
|
echo -e "${RED}Error: MicroK8s helm3 addon is not working.${NC}"
|
||||||
|
echo "Enable it with: microk8s enable helm3"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo -e "${GREEN}MicroK8s helm3 is available.${NC}"
|
||||||
|
else
|
||||||
|
if ! command -v helm &> /dev/null; then
|
||||||
|
echo -e "${RED}Error: Helm is not installed.${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check kubectl connectivity
|
||||||
|
if ! $KUBECTL_CMD cluster-info &> /dev/null; then
|
||||||
|
echo -e "${RED}Error: Cannot connect to Kubernetes cluster.${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${GREEN}Prerequisites check passed.${NC}"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to setup Helm repository
|
||||||
|
setup_helm_repo() {
|
||||||
|
echo -e "${BLUE}Setting up Prometheus Community Helm repository...${NC}"
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
echo " (dry-run) Would add prometheus-community Helm repository"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
if $HELM_CMD repo list 2>/dev/null | grep -q "prometheus-community"; then
|
||||||
|
echo -e "${BLUE}Repository already added, updating...${NC}"
|
||||||
|
$HELM_CMD repo update prometheus-community
|
||||||
|
else
|
||||||
|
$HELM_CMD repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||||
|
$HELM_CMD repo update
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${GREEN}Helm repository ready.${NC}"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to ensure namespace exists
|
||||||
|
ensure_namespace() {
|
||||||
|
echo -e "${BLUE}Ensuring namespace $NAMESPACE exists...${NC}"
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
echo " (dry-run) Would create namespace if needed"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! $KUBECTL_CMD get namespace "$NAMESPACE" &> /dev/null; then
|
||||||
|
$KUBECTL_CMD create namespace "$NAMESPACE"
|
||||||
|
echo -e "${GREEN}Namespace $NAMESPACE created.${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${BLUE}Namespace $NAMESPACE already exists.${NC}"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to install kube-state-metrics
|
||||||
|
install_kube_state_metrics() {
|
||||||
|
echo -e "${BLUE}Installing kube-state-metrics...${NC}"
|
||||||
|
|
||||||
|
local values_file="$SCRIPT_DIR/kube-state-metrics-values.yaml"
|
||||||
|
|
||||||
|
if [[ ! -f "$values_file" ]]; then
|
||||||
|
echo -e "${RED}Error: Values file not found: $values_file${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
echo " (dry-run) Would install kube-state-metrics"
|
||||||
|
echo " Command: $HELM_CMD upgrade --install kube-state-metrics prometheus-community/kube-state-metrics -n $NAMESPACE -f $values_file"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
$HELM_CMD upgrade --install kube-state-metrics \
|
||||||
|
prometheus-community/kube-state-metrics \
|
||||||
|
-n "$NAMESPACE" \
|
||||||
|
-f "$values_file" \
|
||||||
|
--wait \
|
||||||
|
--timeout 5m
|
||||||
|
|
||||||
|
echo -e "${GREEN}kube-state-metrics installed successfully.${NC}"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to install node-exporter
|
||||||
|
install_node_exporter() {
|
||||||
|
echo -e "${BLUE}Installing node-exporter...${NC}"
|
||||||
|
|
||||||
|
local values_file="$SCRIPT_DIR/node-exporter-values.yaml"
|
||||||
|
|
||||||
|
if [[ ! -f "$values_file" ]]; then
|
||||||
|
echo -e "${RED}Error: Values file not found: $values_file${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
echo " (dry-run) Would install node-exporter"
|
||||||
|
echo " Command: $HELM_CMD upgrade --install node-exporter prometheus-community/prometheus-node-exporter -n $NAMESPACE -f $values_file"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
$HELM_CMD upgrade --install node-exporter \
|
||||||
|
prometheus-community/prometheus-node-exporter \
|
||||||
|
-n "$NAMESPACE" \
|
||||||
|
-f "$values_file" \
|
||||||
|
--wait \
|
||||||
|
--timeout 5m
|
||||||
|
|
||||||
|
echo -e "${GREEN}node-exporter installed successfully.${NC}"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to uninstall components
|
||||||
|
uninstall_components() {
|
||||||
|
echo -e "${BLUE}Uninstalling Kubernetes infrastructure monitoring components...${NC}"
|
||||||
|
|
||||||
|
if [[ "$DRY_RUN" == true ]]; then
|
||||||
|
echo " (dry-run) Would uninstall kube-state-metrics and node-exporter"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Uninstall kube-state-metrics
|
||||||
|
if $HELM_CMD list -n "$NAMESPACE" | grep -q "kube-state-metrics"; then
|
||||||
|
echo -e "${BLUE}Removing kube-state-metrics...${NC}"
|
||||||
|
$HELM_CMD uninstall kube-state-metrics -n "$NAMESPACE" --wait
|
||||||
|
echo -e "${GREEN}kube-state-metrics removed.${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}kube-state-metrics not found.${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Uninstall node-exporter
|
||||||
|
if $HELM_CMD list -n "$NAMESPACE" | grep -q "node-exporter"; then
|
||||||
|
echo -e "${BLUE}Removing node-exporter...${NC}"
|
||||||
|
$HELM_CMD uninstall node-exporter -n "$NAMESPACE" --wait
|
||||||
|
echo -e "${GREEN}node-exporter removed.${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}node-exporter not found.${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to show deployment status
|
||||||
|
show_status() {
|
||||||
|
echo -e "${BLUE}=== Kubernetes Infrastructure Monitoring Status ===${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo -e "${BLUE}Helm Releases:${NC}"
|
||||||
|
$HELM_CMD list -n "$NAMESPACE" | grep -E "(kube-state-metrics|node-exporter)" || echo " No releases found"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo -e "${BLUE}Pods:${NC}"
|
||||||
|
$KUBECTL_CMD get pods -n "$NAMESPACE" -l 'app.kubernetes.io/name in (kube-state-metrics, prometheus-node-exporter)' 2>/dev/null || echo " No pods found"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo -e "${BLUE}Services:${NC}"
|
||||||
|
$KUBECTL_CMD get svc -n "$NAMESPACE" | grep -E "(kube-state-metrics|node-exporter)" || echo " No services found"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo -e "${BLUE}Endpoints (for SigNoz scraping):${NC}"
|
||||||
|
echo " kube-state-metrics: kube-state-metrics.$NAMESPACE.svc.cluster.local:8080"
|
||||||
|
echo " node-exporter: node-exporter-prometheus-node-exporter.$NAMESPACE.svc.cluster.local:9100"
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to show post-install instructions
|
||||||
|
show_post_install_instructions() {
|
||||||
|
echo -e "${BLUE}=== Post-Installation Instructions ===${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "To enable SigNoz to scrape these metrics, update your SigNoz OTel Collector config."
|
||||||
|
echo ""
|
||||||
|
echo "Add the following to your signoz-values-prod.yaml under otelCollector.config:"
|
||||||
|
echo ""
|
||||||
|
cat << 'EOF'
|
||||||
|
otelCollector:
|
||||||
|
config:
|
||||||
|
receivers:
|
||||||
|
prometheus:
|
||||||
|
config:
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'kube-state-metrics'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['kube-state-metrics.bakery-ia.svc.cluster.local:8080']
|
||||||
|
scrape_interval: 30s
|
||||||
|
- job_name: 'node-exporter'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['node-exporter-prometheus-node-exporter.bakery-ia.svc.cluster.local:9100']
|
||||||
|
scrape_interval: 30s
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
metrics:
|
||||||
|
receivers: [otlp, prometheus]
|
||||||
|
EOF
|
||||||
|
echo ""
|
||||||
|
echo "Then upgrade SigNoz:"
|
||||||
|
if [[ "$USE_MICROK8S" == true ]]; then
|
||||||
|
echo " microk8s helm3 upgrade signoz signoz/signoz -n $NAMESPACE -f infrastructure/monitoring/signoz/signoz-values-prod.yaml"
|
||||||
|
else
|
||||||
|
echo " helm upgrade signoz signoz/signoz -n $NAMESPACE -f infrastructure/monitoring/signoz/signoz-values-prod.yaml"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main execution
|
||||||
|
main() {
|
||||||
|
echo -e "${BLUE}"
|
||||||
|
echo "=========================================="
|
||||||
|
echo "Kubernetes Infrastructure Monitoring"
|
||||||
|
echo "=========================================="
|
||||||
|
echo -e "${NC}"
|
||||||
|
|
||||||
|
check_prerequisites
|
||||||
|
|
||||||
|
case $COMMAND in
|
||||||
|
install)
|
||||||
|
setup_helm_repo
|
||||||
|
ensure_namespace
|
||||||
|
install_kube_state_metrics
|
||||||
|
install_node_exporter
|
||||||
|
show_status
|
||||||
|
show_post_install_instructions
|
||||||
|
echo -e "${GREEN}Installation completed successfully!${NC}"
|
||||||
|
;;
|
||||||
|
upgrade)
|
||||||
|
setup_helm_repo
|
||||||
|
install_kube_state_metrics
|
||||||
|
install_node_exporter
|
||||||
|
show_status
|
||||||
|
echo -e "${GREEN}Upgrade completed successfully!${NC}"
|
||||||
|
;;
|
||||||
|
uninstall)
|
||||||
|
uninstall_components
|
||||||
|
echo -e "${GREEN}Uninstallation completed.${NC}"
|
||||||
|
;;
|
||||||
|
status)
|
||||||
|
show_status
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run main function
|
||||||
|
main
|
||||||
@@ -0,0 +1,109 @@
|
|||||||
|
# Kube-State-Metrics Helm Values for Bakery IA
|
||||||
|
# Chart: prometheus-community/kube-state-metrics
|
||||||
|
# Documentation: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
|
||||||
|
#
|
||||||
|
# Install Command:
|
||||||
|
# helm install kube-state-metrics prometheus-community/kube-state-metrics \
|
||||||
|
# -n bakery-ia -f kube-state-metrics-values.yaml
|
||||||
|
|
||||||
|
# Image configuration
|
||||||
|
image:
|
||||||
|
registry: registry.k8s.io
|
||||||
|
repository: kube-state-metrics/kube-state-metrics
|
||||||
|
tag: "" # Uses chart default (latest stable)
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
# Replicas - single instance is sufficient for most clusters
|
||||||
|
replicas: 1
|
||||||
|
|
||||||
|
# Resource limits optimized for MicroK8s VPS
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 32Mi
|
||||||
|
limits:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 128Mi
|
||||||
|
|
||||||
|
# Service configuration
|
||||||
|
service:
|
||||||
|
type: ClusterIP
|
||||||
|
port: 8080
|
||||||
|
annotations: {}
|
||||||
|
|
||||||
|
# Prometheus scrape annotations
|
||||||
|
prometheusScrape: true
|
||||||
|
|
||||||
|
# Which Kubernetes resources to collect metrics for
|
||||||
|
# Full list available, but we focus on most useful ones
|
||||||
|
collectors:
|
||||||
|
- certificatesigningrequests
|
||||||
|
- configmaps
|
||||||
|
- cronjobs
|
||||||
|
- daemonsets
|
||||||
|
- deployments
|
||||||
|
- endpoints
|
||||||
|
- horizontalpodautoscalers
|
||||||
|
- ingresses
|
||||||
|
- jobs
|
||||||
|
- leases
|
||||||
|
- limitranges
|
||||||
|
- namespaces
|
||||||
|
- networkpolicies
|
||||||
|
- nodes
|
||||||
|
- persistentvolumeclaims
|
||||||
|
- persistentvolumes
|
||||||
|
- poddisruptionbudgets
|
||||||
|
- pods
|
||||||
|
- replicasets
|
||||||
|
- replicationcontrollers
|
||||||
|
- resourcequotas
|
||||||
|
- secrets
|
||||||
|
- services
|
||||||
|
- statefulsets
|
||||||
|
- storageclasses
|
||||||
|
|
||||||
|
# Namespace to watch (empty = all namespaces)
|
||||||
|
namespaces: ""
|
||||||
|
|
||||||
|
# Node selector for scheduling
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
# Tolerations
|
||||||
|
tolerations: []
|
||||||
|
|
||||||
|
# Affinity rules
|
||||||
|
affinity: {}
|
||||||
|
|
||||||
|
# Pod security context
|
||||||
|
podSecurityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 65534
|
||||||
|
fsGroup: 65534
|
||||||
|
|
||||||
|
# Container security context
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
|
||||||
|
# Self-monitoring metrics
|
||||||
|
selfMonitor:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
# Kubernetes API access
|
||||||
|
kubeconfig:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
# RBAC configuration
|
||||||
|
rbac:
|
||||||
|
create: true
|
||||||
|
useClusterRole: true
|
||||||
|
|
||||||
|
# Service account
|
||||||
|
serviceAccount:
|
||||||
|
create: true
|
||||||
|
name: ""
|
||||||
|
annotations: {}
|
||||||
@@ -0,0 +1,97 @@
|
|||||||
|
# Prometheus Node Exporter Helm Values for Bakery IA
|
||||||
|
# Chart: prometheus-community/prometheus-node-exporter
|
||||||
|
# Documentation: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
|
||||||
|
#
|
||||||
|
# Install Command:
|
||||||
|
# helm install node-exporter prometheus-community/prometheus-node-exporter \
|
||||||
|
# -n bakery-ia -f node-exporter-values.yaml
|
||||||
|
|
||||||
|
# Image configuration
|
||||||
|
image:
|
||||||
|
registry: quay.io
|
||||||
|
repository: prometheus/node-exporter
|
||||||
|
tag: "" # Uses chart default (latest stable)
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
|
||||||
|
# Resource limits optimized for MicroK8s VPS
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 32Mi
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 64Mi
|
||||||
|
|
||||||
|
# Service configuration
|
||||||
|
service:
|
||||||
|
type: ClusterIP
|
||||||
|
port: 9100
|
||||||
|
targetPort: 9100
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
|
||||||
|
# DaemonSet update strategy
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 1
|
||||||
|
|
||||||
|
# Host network - required for accurate network metrics
|
||||||
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
|
hostRootFsMount:
|
||||||
|
enabled: true
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
|
||||||
|
# Node selector
|
||||||
|
nodeSelector: {}
|
||||||
|
|
||||||
|
# Tolerations - allow scheduling on all nodes including control plane
|
||||||
|
tolerations:
|
||||||
|
- effect: NoSchedule
|
||||||
|
operator: Exists
|
||||||
|
|
||||||
|
# Affinity rules
|
||||||
|
affinity: {}
|
||||||
|
|
||||||
|
# Pod security context
|
||||||
|
podSecurityContext:
|
||||||
|
fsGroup: 65534
|
||||||
|
runAsGroup: 65534
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 65534
|
||||||
|
|
||||||
|
# Container security context
|
||||||
|
securityContext:
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
|
||||||
|
# RBAC configuration
|
||||||
|
rbac:
|
||||||
|
create: true
|
||||||
|
pspEnabled: false
|
||||||
|
|
||||||
|
# Service account
|
||||||
|
serviceAccount:
|
||||||
|
create: true
|
||||||
|
name: ""
|
||||||
|
annotations: {}
|
||||||
|
|
||||||
|
# Prometheus scrape annotations
|
||||||
|
prometheus:
|
||||||
|
monitor:
|
||||||
|
enabled: false # We use SigNoz OTel collector scraping instead
|
||||||
|
|
||||||
|
# Extra arguments for node-exporter
|
||||||
|
extraArgs:
|
||||||
|
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
|
||||||
|
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
|
||||||
|
|
||||||
|
# Collectors to enable (default set)
|
||||||
|
# Disable collectors that are not useful or cause issues
|
||||||
|
extraHostVolumeMounts: []
|
||||||
|
|
||||||
|
# Sidecar containers
|
||||||
|
sidecars: []
|
||||||
|
|
||||||
|
# Init containers
|
||||||
|
initContainers: []
|
||||||
@@ -60,6 +60,34 @@ otelCollector:
|
|||||||
limits:
|
limits:
|
||||||
memory: "2Gi"
|
memory: "2Gi"
|
||||||
cpu: "1000m"
|
cpu: "1000m"
|
||||||
|
# Additional config for Kubernetes infrastructure metrics scraping
|
||||||
|
config:
|
||||||
|
receivers:
|
||||||
|
prometheus:
|
||||||
|
config:
|
||||||
|
scrape_configs:
|
||||||
|
# Kube-state-metrics - Kubernetes object metrics
|
||||||
|
- job_name: 'kube-state-metrics'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['kube-state-metrics.bakery-ia.svc.cluster.local:8080']
|
||||||
|
scrape_interval: 30s
|
||||||
|
metric_relabel_configs:
|
||||||
|
- source_labels: [__name__]
|
||||||
|
regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset|replicaset|job|cronjob|persistentvolume|persistentvolumeclaim|resourcequota|service|configmap|secret).*'
|
||||||
|
action: keep
|
||||||
|
# Node-exporter - Host-level metrics
|
||||||
|
- job_name: 'node-exporter'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['node-exporter-prometheus-node-exporter.bakery-ia.svc.cluster.local:9100']
|
||||||
|
scrape_interval: 30s
|
||||||
|
metric_relabel_configs:
|
||||||
|
- source_labels: [__name__]
|
||||||
|
regex: 'node_(cpu|memory|disk|filesystem|network|load).*'
|
||||||
|
action: keep
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
metrics:
|
||||||
|
receivers: [otlp, prometheus]
|
||||||
|
|
||||||
queryService:
|
queryService:
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
@@ -165,16 +165,9 @@ class BaseServiceSettings(BaseSettings):
|
|||||||
|
|
||||||
if password:
|
if password:
|
||||||
url = f"{protocol}://:{password}@{host}:{port}"
|
url = f"{protocol}://:{password}@{host}:{port}"
|
||||||
if use_tls:
|
|
||||||
# Use ssl_cert_reqs=none for self-signed certs in internal cluster
|
|
||||||
# Still encrypted, just skips cert validation
|
|
||||||
url += "?ssl_cert_reqs=none"
|
|
||||||
print(f"[DEBUG REDIS_URL] Returning URL with auth and TLS: {url}", file=sys.stderr)
|
print(f"[DEBUG REDIS_URL] Returning URL with auth and TLS: {url}", file=sys.stderr)
|
||||||
return url
|
return url
|
||||||
url = f"{protocol}://{host}:{port}"
|
url = f"{protocol}://{host}:{port}"
|
||||||
if use_tls:
|
|
||||||
# Use ssl_cert_reqs=none for self-signed certs in internal cluster
|
|
||||||
url += "?ssl_cert_reqs=none"
|
|
||||||
print(f"[DEBUG REDIS_URL] Returning URL without auth: {url}", file=sys.stderr)
|
print(f"[DEBUG REDIS_URL] Returning URL without auth: {url}", file=sys.stderr)
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|||||||
@@ -134,22 +134,70 @@ class RedisConnectionManager:
|
|||||||
self._redis_url = redis_url
|
self._redis_url = redis_url
|
||||||
|
|
||||||
# Create connection pool with SSL handling for self-signed certificates
|
# Create connection pool with SSL handling for self-signed certificates
|
||||||
connection_kwargs = {
|
# For Redis 6.4.0+, we need to handle SSL parameters correctly
|
||||||
'db': db,
|
if redis_url.startswith("rediss://"):
|
||||||
'max_connections': max_connections,
|
# Extract connection parameters from URL
|
||||||
'decode_responses': decode_responses,
|
from urllib.parse import urlparse
|
||||||
'retry_on_timeout': retry_on_timeout,
|
|
||||||
'socket_keepalive': socket_keepalive,
|
|
||||||
'health_check_interval': health_check_interval
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add SSL kwargs for self-signed certificates (using shared helper)
|
parsed_url = urlparse(redis_url)
|
||||||
connection_kwargs.update(get_ssl_kwargs_for_url(redis_url))
|
|
||||||
|
|
||||||
self._pool = redis.ConnectionPool.from_url(
|
# Build connection parameters for ConnectionPool
|
||||||
redis_url,
|
connection_params = {
|
||||||
**connection_kwargs
|
'db': db,
|
||||||
)
|
'max_connections': max_connections,
|
||||||
|
'retry_on_timeout': retry_on_timeout,
|
||||||
|
'socket_keepalive': socket_keepalive,
|
||||||
|
'health_check_interval': health_check_interval
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add password if present
|
||||||
|
if parsed_url.password:
|
||||||
|
connection_params['password'] = parsed_url.password
|
||||||
|
|
||||||
|
# Create connection pool (without SSL parameters - they go to the client)
|
||||||
|
self._pool = redis.ConnectionPool(
|
||||||
|
host=parsed_url.hostname,
|
||||||
|
port=parsed_url.port or 6379,
|
||||||
|
**connection_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get SSL configuration for self-signed certificates
|
||||||
|
ssl_kwargs = get_ssl_kwargs_for_url(redis_url)
|
||||||
|
|
||||||
|
# Create Redis client with SSL parameters
|
||||||
|
client_params = {
|
||||||
|
'connection_pool': self._pool,
|
||||||
|
'decode_responses': decode_responses
|
||||||
|
}
|
||||||
|
|
||||||
|
if ssl_kwargs:
|
||||||
|
client_params['ssl'] = True
|
||||||
|
client_params['ssl_cert_reqs'] = ssl_kwargs.get('ssl_cert_reqs', ssl.CERT_NONE)
|
||||||
|
client_params['ssl_ca_certs'] = ssl_kwargs.get('ssl_ca_certs')
|
||||||
|
client_params['ssl_certfile'] = ssl_kwargs.get('ssl_certfile')
|
||||||
|
client_params['ssl_keyfile'] = ssl_kwargs.get('ssl_keyfile')
|
||||||
|
|
||||||
|
self._client = redis.Redis(**client_params)
|
||||||
|
else:
|
||||||
|
# For non-TLS connections, use the original approach
|
||||||
|
connection_kwargs = {
|
||||||
|
'db': db,
|
||||||
|
'max_connections': max_connections,
|
||||||
|
'decode_responses': decode_responses,
|
||||||
|
'retry_on_timeout': retry_on_timeout,
|
||||||
|
'socket_keepalive': socket_keepalive,
|
||||||
|
'health_check_interval': health_check_interval
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add SSL kwargs for self-signed certificates (using shared helper)
|
||||||
|
connection_kwargs.update(get_ssl_kwargs_for_url(redis_url))
|
||||||
|
|
||||||
|
self._pool = redis.ConnectionPool.from_url(
|
||||||
|
redis_url,
|
||||||
|
**connection_kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
self._client = redis.Redis(connection_pool=self._pool)
|
||||||
|
|
||||||
# Create Redis client with pool
|
# Create Redis client with pool
|
||||||
self._client = redis.Redis(connection_pool=self._pool)
|
self._client = redis.Redis(connection_pool=self._pool)
|
||||||
|
|||||||
Reference in New Issue
Block a user