{ "description": "Comprehensive infrastructure monitoring dashboard for Bakery IA Kubernetes cluster", "tags": ["infrastructure", "kubernetes", "k8s", "system"], "name": "bakery-ia-infrastructure-monitoring", "title": "Bakery IA - Infrastructure Monitoring", "uploadedGrafana": false, "uuid": "bakery-ia-infra-01", "version": "v4", "collapsableRowsMigrated": true, "layout": [ { "x": 0, "y": 0, "w": 6, "h": 3, "i": "pod-count", "moved": false, "static": false }, { "x": 6, "y": 0, "w": 6, "h": 3, "i": "pod-phase", "moved": false, "static": false }, { "x": 0, "y": 3, "w": 6, "h": 3, "i": "container-restarts", "moved": false, "static": false }, { "x": 6, "y": 3, "w": 6, "h": 3, "i": "node-condition", "moved": false, "static": false }, { "x": 0, "y": 6, "w": 12, "h": 3, "i": "deployment-status", "moved": false, "static": false } ], "variables": { "namespace": { "id": "namespace-var", "name": "namespace", "description": "Filter by Kubernetes namespace", "type": "QUERY", "queryValue": "SELECT DISTINCT(resource_attrs['k8s.namespace.name']) as value FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'k8s.pod.phase' AND value != '' ORDER BY value", "customValue": "", "textboxValue": "", "showALLOption": true, "multiSelect": false, "order": 1, "modificationUUID": "", "sort": "ASC", "selectedValue": "bakery-ia" } }, "widgets": [ { "id": "pod-count", "title": "Total Pods", "description": "Total number of pods in the namespace", "isStacked": false, "nullZeroValues": "zero", "opacity": "1", "panelTypes": "value", "query": { "builder": { "queryData": [ { "dataSource": "metrics", "queryName": "A", "aggregateOperator": "count", "aggregateAttribute": { "key": "k8s.pod.phase", "dataType": "int64", "type": "Gauge", "isColumn": false }, "timeAggregation": "latest", "spaceAggregation": "sum", "functions": [], "filters": { "items": [ { "id": "filter-k8s-namespace", "key": { "id": "k8s.namespace.name--string--tag--false", "key": "k8s.namespace.name", "dataType": "string", "type": "tag", "isColumn": false }, "op": "=", "value": "{{.namespace}}" } ], "op": "AND" }, "expression": "A", "disabled": false, "having": [], "stepInterval": 60, "limit": null, "orderBy": [], "groupBy": [], "legend": "Total Pods", "reduceTo": "sum" } ], "queryFormulas": [] }, "queryType": "builder" }, "fillSpans": false, "yAxisUnit": "none" }, { "id": "pod-phase", "title": "Pod Phase Distribution", "description": "Pods by phase (Running, Pending, Failed, etc.)", "isStacked": true, "nullZeroValues": "zero", "opacity": "1", "panelTypes": "graph", "query": { "builder": { "queryData": [ { "dataSource": "metrics", "queryName": "A", "aggregateOperator": "sum", "aggregateAttribute": { "key": "k8s.pod.phase", "dataType": "int64", "type": "Gauge", "isColumn": false }, "timeAggregation": "latest", "spaceAggregation": "sum", "functions": [], "filters": { "items": [ { "id": "filter-k8s-namespace", "key": { "id": "k8s.namespace.name--string--tag--false", "key": "k8s.namespace.name", "dataType": "string", "type": "tag", "isColumn": false }, "op": "=", "value": "{{.namespace}}" } ], "op": "AND" }, "expression": "A", "disabled": false, "having": [], "stepInterval": 60, "limit": null, "orderBy": [], "groupBy": [ { "key": "phase", "dataType": "string", "type": "tag", "isColumn": false } ], "legend": "{{phase}}", "reduceTo": "sum" } ], "queryFormulas": [] }, "queryType": "builder" }, "fillSpans": false, "yAxisUnit": "none" }, { "id": "container-restarts", "title": "Container Restarts", "description": "Container restart count over time", "isStacked": false, "nullZeroValues": "zero", "opacity": "1", "panelTypes": "graph", "query": { "builder": { "queryData": [ { "dataSource": "metrics", "queryName": "A", "aggregateOperator": "sum", "aggregateAttribute": { "key": "k8s.container.restarts", "dataType": "int64", "type": "Gauge", "isColumn": false }, "timeAggregation": "increase", "spaceAggregation": "sum", "functions": [], "filters": { "items": [ { "id": "filter-k8s-namespace", "key": { "id": "k8s.namespace.name--string--tag--false", "key": "k8s.namespace.name", "dataType": "string", "type": "tag", "isColumn": false }, "op": "=", "value": "{{.namespace}}" } ], "op": "AND" }, "expression": "A", "disabled": false, "having": [], "stepInterval": 60, "limit": null, "orderBy": [], "groupBy": [ { "id": "k8s.pod.name--string--tag--false", "key": "k8s.pod.name", "dataType": "string", "type": "tag", "isColumn": false } ], "legend": "{{k8s.pod.name}}", "reduceTo": "sum" } ], "queryFormulas": [] }, "queryType": "builder" }, "fillSpans": false, "yAxisUnit": "none" }, { "id": "node-condition", "title": "Node Conditions", "description": "Node condition status (Ready, MemoryPressure, DiskPressure, etc.)", "isStacked": true, "nullZeroValues": "zero", "opacity": "1", "panelTypes": "graph", "query": { "builder": { "queryData": [ { "dataSource": "metrics", "queryName": "A", "aggregateOperator": "sum", "aggregateAttribute": { "key": "k8s.node.condition_ready", "dataType": "int64", "type": "Gauge", "isColumn": false }, "timeAggregation": "latest", "spaceAggregation": "sum", "functions": [], "filters": { "items": [], "op": "AND" }, "expression": "A", "disabled": false, "having": [], "stepInterval": 60, "limit": null, "orderBy": [], "groupBy": [ { "id": "k8s.node.name--string--tag--false", "key": "k8s.node.name", "dataType": "string", "type": "tag", "isColumn": false } ], "legend": "{{k8s.node.name}} Ready", "reduceTo": "sum" } ], "queryFormulas": [] }, "queryType": "builder" }, "fillSpans": false, "yAxisUnit": "none" }, { "id": "deployment-status", "title": "Deployment Status (Desired vs Available)", "description": "Deployment replicas: desired vs available", "isStacked": false, "nullZeroValues": "zero", "opacity": "1", "panelTypes": "graph", "query": { "builder": { "queryData": [ { "dataSource": "metrics", "queryName": "A", "aggregateOperator": "avg", "aggregateAttribute": { "key": "k8s.deployment.desired", "dataType": "int64", "type": "Gauge", "isColumn": false }, "timeAggregation": "latest", "spaceAggregation": "avg", "functions": [], "filters": { "items": [ { "id": "filter-k8s-namespace", "key": { "id": "k8s.namespace.name--string--tag--false", "key": "k8s.namespace.name", "dataType": "string", "type": "tag", "isColumn": false }, "op": "=", "value": "{{.namespace}}" } ], "op": "AND" }, "expression": "A", "disabled": false, "having": [], "stepInterval": 60, "limit": null, "orderBy": [], "groupBy": [ { "id": "k8s.deployment.name--string--tag--false", "key": "k8s.deployment.name", "dataType": "string", "type": "tag", "isColumn": false } ], "legend": "{{k8s.deployment.name}} (desired)", "reduceTo": "avg" }, { "dataSource": "metrics", "queryName": "B", "aggregateOperator": "avg", "aggregateAttribute": { "key": "k8s.deployment.available", "dataType": "int64", "type": "Gauge", "isColumn": false }, "timeAggregation": "latest", "spaceAggregation": "avg", "functions": [], "filters": { "items": [ { "id": "filter-k8s-namespace", "key": { "id": "k8s.namespace.name--string--tag--false", "key": "k8s.namespace.name", "dataType": "string", "type": "tag", "isColumn": false }, "op": "=", "value": "{{.namespace}}" } ], "op": "AND" }, "expression": "B", "disabled": false, "having": [], "stepInterval": 60, "limit": null, "orderBy": [], "groupBy": [ { "id": "k8s.deployment.name--string--tag--false", "key": "k8s.deployment.name", "dataType": "string", "type": "tag", "isColumn": false } ], "legend": "{{k8s.deployment.name}} (available)", "reduceTo": "avg" } ], "queryFormulas": [] }, "queryType": "builder" }, "fillSpans": false, "yAxisUnit": "none" } ] }