Files
bakery-ia/infrastructure/kubernetes/base/components/monitoring/prometheus.yaml
2025-10-15 16:12:49 +02:00

211 lines
4.9 KiB
YAML

---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitoring
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 30s
evaluation_interval: 30s
external_labels:
cluster: 'bakery-ia'
environment: 'production'
scrape_configs:
# Scrape Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Scrape all bakery-ia services
- job_name: 'bakery-services'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- bakery-ia
relabel_configs:
# Only scrape pods with metrics port
- source_labels: [__meta_kubernetes_pod_container_port_name]
action: keep
regex: http
# Add service name label
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
target_label: service
# Add component label
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component]
target_label: component
# Add pod name
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
# Set metrics path
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
# Set scrape port
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
# Scrape Kubernetes nodes
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: prom/prometheus:v2.48.0
args:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--web.enable-lifecycle'
ports:
- containerPort: 9090
name: web
volumeMounts:
- name: prometheus-config
mountPath: /etc/prometheus
- name: prometheus-storage
mountPath: /prometheus
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1"
livenessProbe:
httpGet:
path: /-/healthy
port: 9090
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /-/ready
port: 9090
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
- name: prometheus-storage
persistentVolumeClaim:
claimName: prometheus-storage
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-storage
namespace: monitoring
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
spec:
type: ClusterIP
ports:
- port: 9090
targetPort: 9090
protocol: TCP
name: web
selector:
app: prometheus