307 lines
9.0 KiB
YAML
307 lines
9.0 KiB
YAML
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: postgres-exporter
|
|
namespace: monitoring
|
|
labels:
|
|
app: postgres-exporter
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: postgres-exporter
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: postgres-exporter
|
|
spec:
|
|
containers:
|
|
- name: postgres-exporter
|
|
image: prometheuscommunity/postgres-exporter:v0.15.0
|
|
ports:
|
|
- containerPort: 9187
|
|
name: metrics
|
|
env:
|
|
- name: DATA_SOURCE_NAME
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: postgres-exporter
|
|
key: data-source-name
|
|
# Enable extended metrics
|
|
- name: PG_EXPORTER_EXTEND_QUERY_PATH
|
|
value: "/etc/postgres-exporter/queries.yaml"
|
|
# Disable default metrics (we'll use custom ones)
|
|
- name: PG_EXPORTER_DISABLE_DEFAULT_METRICS
|
|
value: "false"
|
|
# Disable settings metrics (can be noisy)
|
|
- name: PG_EXPORTER_DISABLE_SETTINGS_METRICS
|
|
value: "false"
|
|
volumeMounts:
|
|
- name: queries
|
|
mountPath: /etc/postgres-exporter
|
|
resources:
|
|
requests:
|
|
memory: "64Mi"
|
|
cpu: "50m"
|
|
limits:
|
|
memory: "128Mi"
|
|
cpu: "200m"
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: 9187
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: 9187
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
volumes:
|
|
- name: queries
|
|
configMap:
|
|
name: postgres-exporter-queries
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: postgres-exporter-queries
|
|
namespace: monitoring
|
|
data:
|
|
queries.yaml: |
|
|
# Custom PostgreSQL queries for bakery-ia metrics
|
|
|
|
pg_database:
|
|
query: |
|
|
SELECT
|
|
datname,
|
|
numbackends as connections,
|
|
xact_commit as transactions_committed,
|
|
xact_rollback as transactions_rolled_back,
|
|
blks_read as blocks_read,
|
|
blks_hit as blocks_hit,
|
|
tup_returned as tuples_returned,
|
|
tup_fetched as tuples_fetched,
|
|
tup_inserted as tuples_inserted,
|
|
tup_updated as tuples_updated,
|
|
tup_deleted as tuples_deleted,
|
|
conflicts as conflicts,
|
|
temp_files as temp_files,
|
|
temp_bytes as temp_bytes,
|
|
deadlocks as deadlocks
|
|
FROM pg_stat_database
|
|
WHERE datname NOT IN ('template0', 'template1', 'postgres')
|
|
metrics:
|
|
- datname:
|
|
usage: "LABEL"
|
|
description: "Name of the database"
|
|
- connections:
|
|
usage: "GAUGE"
|
|
description: "Number of backends currently connected to this database"
|
|
- transactions_committed:
|
|
usage: "COUNTER"
|
|
description: "Number of transactions in this database that have been committed"
|
|
- transactions_rolled_back:
|
|
usage: "COUNTER"
|
|
description: "Number of transactions in this database that have been rolled back"
|
|
- blocks_read:
|
|
usage: "COUNTER"
|
|
description: "Number of disk blocks read in this database"
|
|
- blocks_hit:
|
|
usage: "COUNTER"
|
|
description: "Number of times disk blocks were found in the buffer cache"
|
|
- tuples_returned:
|
|
usage: "COUNTER"
|
|
description: "Number of rows returned by queries in this database"
|
|
- tuples_fetched:
|
|
usage: "COUNTER"
|
|
description: "Number of rows fetched by queries in this database"
|
|
- tuples_inserted:
|
|
usage: "COUNTER"
|
|
description: "Number of rows inserted by queries in this database"
|
|
- tuples_updated:
|
|
usage: "COUNTER"
|
|
description: "Number of rows updated by queries in this database"
|
|
- tuples_deleted:
|
|
usage: "COUNTER"
|
|
description: "Number of rows deleted by queries in this database"
|
|
- conflicts:
|
|
usage: "COUNTER"
|
|
description: "Number of queries canceled due to conflicts with recovery"
|
|
- temp_files:
|
|
usage: "COUNTER"
|
|
description: "Number of temporary files created by queries"
|
|
- temp_bytes:
|
|
usage: "COUNTER"
|
|
description: "Total amount of data written to temporary files by queries"
|
|
- deadlocks:
|
|
usage: "COUNTER"
|
|
description: "Number of deadlocks detected in this database"
|
|
|
|
pg_replication:
|
|
query: |
|
|
SELECT
|
|
CASE WHEN pg_is_in_recovery() THEN 1 ELSE 0 END as is_replica,
|
|
EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::INT as lag_seconds
|
|
metrics:
|
|
- is_replica:
|
|
usage: "GAUGE"
|
|
description: "1 if this is a replica, 0 if primary"
|
|
- lag_seconds:
|
|
usage: "GAUGE"
|
|
description: "Replication lag in seconds (only on replicas)"
|
|
|
|
pg_slow_queries:
|
|
query: |
|
|
SELECT
|
|
datname,
|
|
usename,
|
|
state,
|
|
COUNT(*) as count,
|
|
MAX(EXTRACT(EPOCH FROM (now() - query_start))) as max_duration_seconds
|
|
FROM pg_stat_activity
|
|
WHERE state != 'idle'
|
|
AND query NOT LIKE '%pg_stat_activity%'
|
|
AND query_start < now() - interval '30 seconds'
|
|
GROUP BY datname, usename, state
|
|
metrics:
|
|
- datname:
|
|
usage: "LABEL"
|
|
description: "Database name"
|
|
- usename:
|
|
usage: "LABEL"
|
|
description: "User name"
|
|
- state:
|
|
usage: "LABEL"
|
|
description: "Query state"
|
|
- count:
|
|
usage: "GAUGE"
|
|
description: "Number of slow queries"
|
|
- max_duration_seconds:
|
|
usage: "GAUGE"
|
|
description: "Maximum query duration in seconds"
|
|
|
|
pg_table_stats:
|
|
query: |
|
|
SELECT
|
|
schemaname,
|
|
relname,
|
|
seq_scan,
|
|
seq_tup_read,
|
|
idx_scan,
|
|
idx_tup_fetch,
|
|
n_tup_ins,
|
|
n_tup_upd,
|
|
n_tup_del,
|
|
n_tup_hot_upd,
|
|
n_live_tup,
|
|
n_dead_tup,
|
|
n_mod_since_analyze,
|
|
last_vacuum,
|
|
last_autovacuum,
|
|
last_analyze,
|
|
last_autoanalyze
|
|
FROM pg_stat_user_tables
|
|
WHERE schemaname = 'public'
|
|
ORDER BY n_live_tup DESC
|
|
LIMIT 20
|
|
metrics:
|
|
- schemaname:
|
|
usage: "LABEL"
|
|
description: "Schema name"
|
|
- relname:
|
|
usage: "LABEL"
|
|
description: "Table name"
|
|
- seq_scan:
|
|
usage: "COUNTER"
|
|
description: "Number of sequential scans"
|
|
- seq_tup_read:
|
|
usage: "COUNTER"
|
|
description: "Number of tuples read by sequential scans"
|
|
- idx_scan:
|
|
usage: "COUNTER"
|
|
description: "Number of index scans"
|
|
- idx_tup_fetch:
|
|
usage: "COUNTER"
|
|
description: "Number of tuples fetched by index scans"
|
|
- n_tup_ins:
|
|
usage: "COUNTER"
|
|
description: "Number of tuples inserted"
|
|
- n_tup_upd:
|
|
usage: "COUNTER"
|
|
description: "Number of tuples updated"
|
|
- n_tup_del:
|
|
usage: "COUNTER"
|
|
description: "Number of tuples deleted"
|
|
- n_tup_hot_upd:
|
|
usage: "COUNTER"
|
|
description: "Number of tuples HOT updated"
|
|
- n_live_tup:
|
|
usage: "GAUGE"
|
|
description: "Estimated number of live rows"
|
|
- n_dead_tup:
|
|
usage: "GAUGE"
|
|
description: "Estimated number of dead rows"
|
|
- n_mod_since_analyze:
|
|
usage: "GAUGE"
|
|
description: "Number of rows modified since last analyze"
|
|
|
|
pg_locks:
|
|
query: |
|
|
SELECT
|
|
mode,
|
|
locktype,
|
|
COUNT(*) as count
|
|
FROM pg_locks
|
|
GROUP BY mode, locktype
|
|
metrics:
|
|
- mode:
|
|
usage: "LABEL"
|
|
description: "Lock mode"
|
|
- locktype:
|
|
usage: "LABEL"
|
|
description: "Lock type"
|
|
- count:
|
|
usage: "GAUGE"
|
|
description: "Number of locks"
|
|
|
|
pg_connection_pool:
|
|
query: |
|
|
SELECT
|
|
state,
|
|
COUNT(*) as count,
|
|
MAX(EXTRACT(EPOCH FROM (now() - state_change))) as max_state_duration_seconds
|
|
FROM pg_stat_activity
|
|
GROUP BY state
|
|
metrics:
|
|
- state:
|
|
usage: "LABEL"
|
|
description: "Connection state"
|
|
- count:
|
|
usage: "GAUGE"
|
|
description: "Number of connections in this state"
|
|
- max_state_duration_seconds:
|
|
usage: "GAUGE"
|
|
description: "Maximum time a connection has been in this state"
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: postgres-exporter
|
|
namespace: monitoring
|
|
labels:
|
|
app: postgres-exporter
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 9187
|
|
targetPort: 9187
|
|
protocol: TCP
|
|
name: metrics
|
|
selector:
|
|
app: postgres-exporter
|