Imporve monitoring 5

This commit is contained in:
Urtzi Alfaro
2026-01-09 23:14:12 +01:00
parent 22dab143ba
commit c05538cafb
23 changed files with 4737 additions and 1932 deletions

View File

@@ -1,92 +1,295 @@
{
"dashboard": {
"title": "Bakery IA - System Health",
"description": "Comprehensive system health monitoring dashboard",
"tags": ["system", "health", "monitoring"],
"panels": [
{
"title": "System Availability",
"type": "stat",
"query": {
"metric": "system_availability",
"aggregate": "avg",
"filters": [
{
"key": "namespace",
"operator": "=",
"value": "${namespace}"
}
]
},
"unit": "percent"
},
{
"title": "Service Health Score",
"type": "stat",
"query": {
"metric": "service_health_score",
"aggregate": "avg",
"filters": [
{
"key": "namespace",
"operator": "=",
"value": "${namespace}"
}
]
},
"unit": "number"
},
{
"title": "CPU Usage",
"type": "timeseries",
"query": {
"metric": "system_cpu_usage",
"aggregate": "avg",
"filters": [
{
"key": "namespace",
"operator": "=",
"value": "${namespace}"
}
]
},
"unit": "percent"
},
{
"title": "Memory Usage",
"type": "timeseries",
"query": {
"metric": "system_memory_usage",
"aggregate": "avg",
"filters": [
{
"key": "namespace",
"operator": "=",
"value": "${namespace}"
}
]
},
"unit": "percent"
}
],
"variables": [
{
"name": "namespace",
"label": "Namespace",
"type": "dropdown",
"default": "bakery-ia",
"values": ["bakery-ia", "default"]
}
],
"layout": {
"type": "grid",
"columns": 12,
"gap": [16, 16]
"description": "Comprehensive system health monitoring dashboard",
"tags": ["system", "health", "monitoring"],
"name": "bakery-ia-system-health",
"title": "Bakery IA - System Health",
"uploadedGrafana": false,
"uuid": "bakery-ia-health-01",
"version": "v4",
"collapsableRowsMigrated": true,
"layout": [
{
"x": 0,
"y": 0,
"w": 6,
"h": 3,
"i": "system-availability",
"moved": false,
"static": false
},
"refresh": "30s",
"time": {
"from": "now-1h",
"to": "now"
{
"x": 6,
"y": 0,
"w": 6,
"h": 3,
"i": "health-score",
"moved": false,
"static": false
},
{
"x": 0,
"y": 3,
"w": 6,
"h": 3,
"i": "cpu-usage",
"moved": false,
"static": false
},
{
"x": 6,
"y": 3,
"w": 6,
"h": 3,
"i": "memory-usage",
"moved": false,
"static": false
}
}
],
"variables": {
"namespace": {
"id": "namespace-var",
"name": "namespace",
"description": "Filter by Kubernetes namespace",
"type": "QUERY",
"queryValue": "SELECT DISTINCT(resource_attrs['k8s.namespace.name']) as value FROM signoz_metrics.distributed_time_series_v4_1day WHERE metric_name = 'system_availability' AND value != '' ORDER BY value",
"customValue": "",
"textboxValue": "",
"showALLOption": true,
"multiSelect": false,
"order": 1,
"modificationUUID": "",
"sort": "ASC",
"selectedValue": "bakery-ia"
}
},
"widgets": [
{
"id": "system-availability",
"title": "System Availability",
"description": "Overall system availability percentage",
"isStacked": false,
"nullZeroValues": "zero",
"opacity": "1",
"panelTypes": "value",
"query": {
"builder": {
"queryData": [
{
"dataSource": "metrics",
"queryName": "A",
"aggregateOperator": "avg",
"aggregateAttribute": {
"key": "system_availability",
"dataType": "float64",
"type": "Gauge",
"isColumn": false
},
"timeAggregation": "latest",
"spaceAggregation": "avg",
"functions": [],
"filters": {
"items": [
{
"key": {
"key": "k8s.namespace.name",
"dataType": "string",
"type": "resource",
"isColumn": false
},
"op": "=",
"value": "{{.namespace}}"
}
],
"op": "AND"
},
"expression": "A",
"disabled": false,
"having": [],
"stepInterval": 60,
"limit": null,
"orderBy": [],
"groupBy": [],
"legend": "System Availability",
"reduceTo": "avg"
}
],
"queryFormulas": []
},
"queryType": "builder"
},
"fillSpans": false,
"yAxisUnit": "percent"
},
{
"id": "health-score",
"title": "Service Health Score",
"description": "Overall service health score",
"isStacked": false,
"nullZeroValues": "zero",
"opacity": "1",
"panelTypes": "value",
"query": {
"builder": {
"queryData": [
{
"dataSource": "metrics",
"queryName": "A",
"aggregateOperator": "avg",
"aggregateAttribute": {
"key": "service_health_score",
"dataType": "float64",
"type": "Gauge",
"isColumn": false
},
"timeAggregation": "latest",
"spaceAggregation": "avg",
"functions": [],
"filters": {
"items": [
{
"key": {
"key": "k8s.namespace.name",
"dataType": "string",
"type": "resource",
"isColumn": false
},
"op": "=",
"value": "{{.namespace}}"
}
],
"op": "AND"
},
"expression": "A",
"disabled": false,
"having": [],
"stepInterval": 60,
"limit": null,
"orderBy": [],
"groupBy": [],
"legend": "Health Score",
"reduceTo": "avg"
}
],
"queryFormulas": []
},
"queryType": "builder"
},
"fillSpans": false,
"yAxisUnit": "none"
},
{
"id": "cpu-usage",
"title": "CPU Usage",
"description": "System CPU usage over time",
"isStacked": false,
"nullZeroValues": "zero",
"opacity": "1",
"panelTypes": "graph",
"query": {
"builder": {
"queryData": [
{
"dataSource": "metrics",
"queryName": "A",
"aggregateOperator": "avg",
"aggregateAttribute": {
"key": "system_cpu_usage",
"dataType": "float64",
"type": "Gauge",
"isColumn": false
},
"timeAggregation": "avg",
"spaceAggregation": "avg",
"functions": [],
"filters": {
"items": [
{
"key": {
"key": "k8s.namespace.name",
"dataType": "string",
"type": "resource",
"isColumn": false
},
"op": "=",
"value": "{{.namespace}}"
}
],
"op": "AND"
},
"expression": "A",
"disabled": false,
"having": [],
"stepInterval": 60,
"limit": null,
"orderBy": [],
"groupBy": [],
"legend": "CPU Usage",
"reduceTo": "avg"
}
],
"queryFormulas": []
},
"queryType": "builder"
},
"fillSpans": false,
"yAxisUnit": "percent"
},
{
"id": "memory-usage",
"title": "Memory Usage",
"description": "System memory usage over time",
"isStacked": false,
"nullZeroValues": "zero",
"opacity": "1",
"panelTypes": "graph",
"query": {
"builder": {
"queryData": [
{
"dataSource": "metrics",
"queryName": "A",
"aggregateOperator": "avg",
"aggregateAttribute": {
"key": "system_memory_usage",
"dataType": "float64",
"type": "Gauge",
"isColumn": false
},
"timeAggregation": "avg",
"spaceAggregation": "avg",
"functions": [],
"filters": {
"items": [
{
"key": {
"key": "k8s.namespace.name",
"dataType": "string",
"type": "resource",
"isColumn": false
},
"op": "=",
"value": "{{.namespace}}"
}
],
"op": "AND"
},
"expression": "A",
"disabled": false,
"having": [],
"stepInterval": 60,
"limit": null,
"orderBy": [],
"groupBy": [],
"legend": "Memory Usage",
"reduceTo": "avg"
}
],
"queryFormulas": []
},
"queryType": "builder"
},
"fillSpans": false,
"yAxisUnit": "percent"
}
]
}