Improve monitoring for prod
This commit is contained in:
@@ -56,6 +56,19 @@ data:
|
||||
cluster: 'bakery-ia'
|
||||
environment: 'production'
|
||||
|
||||
# AlertManager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager-0.alertmanager.monitoring.svc.cluster.local:9093
|
||||
- alertmanager-1.alertmanager.monitoring.svc.cluster.local:9093
|
||||
- alertmanager-2.alertmanager.monitoring.svc.cluster.local:9093
|
||||
|
||||
# Load alert rules
|
||||
rule_files:
|
||||
- '/etc/prometheus/rules/*.yml'
|
||||
|
||||
scrape_configs:
|
||||
# Scrape Prometheus itself
|
||||
- job_name: 'prometheus'
|
||||
@@ -114,16 +127,42 @@ data:
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/${1}/proxy/metrics
|
||||
|
||||
# Scrape AlertManager
|
||||
- job_name: 'alertmanager'
|
||||
static_configs:
|
||||
- targets:
|
||||
- alertmanager-0.alertmanager.monitoring.svc.cluster.local:9093
|
||||
- alertmanager-1.alertmanager.monitoring.svc.cluster.local:9093
|
||||
- alertmanager-2.alertmanager.monitoring.svc.cluster.local:9093
|
||||
|
||||
# Scrape PostgreSQL exporter
|
||||
- job_name: 'postgres-exporter'
|
||||
static_configs:
|
||||
- targets: ['postgres-exporter.monitoring.svc.cluster.local:9187']
|
||||
|
||||
# Scrape Node Exporter
|
||||
- job_name: 'node-exporter'
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
regex: '(.*):10250'
|
||||
replacement: '${1}:9100'
|
||||
target_label: __address__
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
target_label: node
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: prometheus
|
||||
spec:
|
||||
replicas: 1
|
||||
serviceName: prometheus
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
@@ -133,6 +172,18 @@ spec:
|
||||
app: prometheus
|
||||
spec:
|
||||
serviceAccountName: prometheus
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app
|
||||
operator: In
|
||||
values:
|
||||
- prometheus
|
||||
topologyKey: kubernetes.io/hostname
|
||||
containers:
|
||||
- name: prometheus
|
||||
image: prom/prometheus:v3.0.1
|
||||
@@ -149,6 +200,8 @@ spec:
|
||||
volumeMounts:
|
||||
- name: prometheus-config
|
||||
mountPath: /etc/prometheus
|
||||
- name: prometheus-rules
|
||||
mountPath: /etc/prometheus/rules
|
||||
- name: prometheus-storage
|
||||
mountPath: /prometheus
|
||||
resources:
|
||||
@@ -174,22 +227,18 @@ spec:
|
||||
- name: prometheus-config
|
||||
configMap:
|
||||
name: prometheus-config
|
||||
- name: prometheus-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: prometheus-storage
|
||||
- name: prometheus-rules
|
||||
configMap:
|
||||
name: prometheus-alert-rules
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: prometheus-storage
|
||||
namespace: monitoring
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: prometheus-storage
|
||||
spec:
|
||||
accessModes: [ "ReadWriteOnce" ]
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
@@ -199,6 +248,25 @@ metadata:
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: prometheus
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
ports:
|
||||
- port: 9090
|
||||
targetPort: 9090
|
||||
protocol: TCP
|
||||
name: web
|
||||
selector:
|
||||
app: prometheus
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-external
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: prometheus
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
|
||||
Reference in New Issue
Block a user