--- apiVersion: apps/v1 kind: Deployment metadata: name: postgres-exporter namespace: monitoring labels: app: postgres-exporter spec: replicas: 1 selector: matchLabels: app: postgres-exporter template: metadata: labels: app: postgres-exporter spec: containers: - name: postgres-exporter image: prometheuscommunity/postgres-exporter:v0.15.0 ports: - containerPort: 9187 name: metrics env: - name: DATA_SOURCE_NAME valueFrom: secretKeyRef: name: postgres-exporter key: data-source-name # Enable extended metrics - name: PG_EXPORTER_EXTEND_QUERY_PATH value: "/etc/postgres-exporter/queries.yaml" # Disable default metrics (we'll use custom ones) - name: PG_EXPORTER_DISABLE_DEFAULT_METRICS value: "false" # Disable settings metrics (can be noisy) - name: PG_EXPORTER_DISABLE_SETTINGS_METRICS value: "false" volumeMounts: - name: queries mountPath: /etc/postgres-exporter resources: requests: memory: "64Mi" cpu: "50m" limits: memory: "128Mi" cpu: "200m" livenessProbe: httpGet: path: / port: 9187 initialDelaySeconds: 30 periodSeconds: 10 readinessProbe: httpGet: path: / port: 9187 initialDelaySeconds: 5 periodSeconds: 5 volumes: - name: queries configMap: name: postgres-exporter-queries --- apiVersion: v1 kind: ConfigMap metadata: name: postgres-exporter-queries namespace: monitoring data: queries.yaml: | # Custom PostgreSQL queries for bakery-ia metrics pg_database: query: | SELECT datname, numbackends as connections, xact_commit as transactions_committed, xact_rollback as transactions_rolled_back, blks_read as blocks_read, blks_hit as blocks_hit, tup_returned as tuples_returned, tup_fetched as tuples_fetched, tup_inserted as tuples_inserted, tup_updated as tuples_updated, tup_deleted as tuples_deleted, conflicts as conflicts, temp_files as temp_files, temp_bytes as temp_bytes, deadlocks as deadlocks FROM pg_stat_database WHERE datname NOT IN ('template0', 'template1', 'postgres') metrics: - datname: usage: "LABEL" description: "Name of the database" - connections: usage: "GAUGE" description: "Number of backends currently connected to this database" - transactions_committed: usage: "COUNTER" description: "Number of transactions in this database that have been committed" - transactions_rolled_back: usage: "COUNTER" description: "Number of transactions in this database that have been rolled back" - blocks_read: usage: "COUNTER" description: "Number of disk blocks read in this database" - blocks_hit: usage: "COUNTER" description: "Number of times disk blocks were found in the buffer cache" - tuples_returned: usage: "COUNTER" description: "Number of rows returned by queries in this database" - tuples_fetched: usage: "COUNTER" description: "Number of rows fetched by queries in this database" - tuples_inserted: usage: "COUNTER" description: "Number of rows inserted by queries in this database" - tuples_updated: usage: "COUNTER" description: "Number of rows updated by queries in this database" - tuples_deleted: usage: "COUNTER" description: "Number of rows deleted by queries in this database" - conflicts: usage: "COUNTER" description: "Number of queries canceled due to conflicts with recovery" - temp_files: usage: "COUNTER" description: "Number of temporary files created by queries" - temp_bytes: usage: "COUNTER" description: "Total amount of data written to temporary files by queries" - deadlocks: usage: "COUNTER" description: "Number of deadlocks detected in this database" pg_replication: query: | SELECT CASE WHEN pg_is_in_recovery() THEN 1 ELSE 0 END as is_replica, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::INT as lag_seconds metrics: - is_replica: usage: "GAUGE" description: "1 if this is a replica, 0 if primary" - lag_seconds: usage: "GAUGE" description: "Replication lag in seconds (only on replicas)" pg_slow_queries: query: | SELECT datname, usename, state, COUNT(*) as count, MAX(EXTRACT(EPOCH FROM (now() - query_start))) as max_duration_seconds FROM pg_stat_activity WHERE state != 'idle' AND query NOT LIKE '%pg_stat_activity%' AND query_start < now() - interval '30 seconds' GROUP BY datname, usename, state metrics: - datname: usage: "LABEL" description: "Database name" - usename: usage: "LABEL" description: "User name" - state: usage: "LABEL" description: "Query state" - count: usage: "GAUGE" description: "Number of slow queries" - max_duration_seconds: usage: "GAUGE" description: "Maximum query duration in seconds" pg_table_stats: query: | SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, last_vacuum, last_autovacuum, last_analyze, last_autoanalyze FROM pg_stat_user_tables WHERE schemaname = 'public' ORDER BY n_live_tup DESC LIMIT 20 metrics: - schemaname: usage: "LABEL" description: "Schema name" - relname: usage: "LABEL" description: "Table name" - seq_scan: usage: "COUNTER" description: "Number of sequential scans" - seq_tup_read: usage: "COUNTER" description: "Number of tuples read by sequential scans" - idx_scan: usage: "COUNTER" description: "Number of index scans" - idx_tup_fetch: usage: "COUNTER" description: "Number of tuples fetched by index scans" - n_tup_ins: usage: "COUNTER" description: "Number of tuples inserted" - n_tup_upd: usage: "COUNTER" description: "Number of tuples updated" - n_tup_del: usage: "COUNTER" description: "Number of tuples deleted" - n_tup_hot_upd: usage: "COUNTER" description: "Number of tuples HOT updated" - n_live_tup: usage: "GAUGE" description: "Estimated number of live rows" - n_dead_tup: usage: "GAUGE" description: "Estimated number of dead rows" - n_mod_since_analyze: usage: "GAUGE" description: "Number of rows modified since last analyze" pg_locks: query: | SELECT mode, locktype, COUNT(*) as count FROM pg_locks GROUP BY mode, locktype metrics: - mode: usage: "LABEL" description: "Lock mode" - locktype: usage: "LABEL" description: "Lock type" - count: usage: "GAUGE" description: "Number of locks" pg_connection_pool: query: | SELECT state, COUNT(*) as count, MAX(EXTRACT(EPOCH FROM (now() - state_change))) as max_state_duration_seconds FROM pg_stat_activity GROUP BY state metrics: - state: usage: "LABEL" description: "Connection state" - count: usage: "GAUGE" description: "Number of connections in this state" - max_state_duration_seconds: usage: "GAUGE" description: "Maximum time a connection has been in this state" --- apiVersion: v1 kind: Service metadata: name: postgres-exporter namespace: monitoring labels: app: postgres-exporter spec: type: ClusterIP ports: - port: 9187 targetPort: 9187 protocol: TCP name: metrics selector: app: postgres-exporter