Add role-based filtering and imporve code
This commit is contained in:
@@ -0,0 +1,177 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboards
|
||||
namespace: monitoring
|
||||
data:
|
||||
gateway-metrics.json: |
|
||||
{
|
||||
"dashboard": {
|
||||
"title": "Bakery IA - Gateway Metrics",
|
||||
"tags": ["bakery-ia", "gateway"],
|
||||
"timezone": "browser",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Request Rate by Endpoint",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 0, "y": 0, "w": 12, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "rate(http_requests_total{service=\"gateway\"}[5m])",
|
||||
"legendFormat": "{{method}} {{endpoint}}"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "P95 Request Latency",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 12, "y": 0, "w": 12, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{service=\"gateway\"}[5m]))",
|
||||
"legendFormat": "{{endpoint}} p95"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Error Rate (5xx)",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 0, "y": 8, "w": 12, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "rate(http_requests_total{service=\"gateway\",status_code=~\"5..\"}[5m])",
|
||||
"legendFormat": "{{endpoint}} errors"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Active Requests",
|
||||
"type": "stat",
|
||||
"gridPos": {"x": 12, "y": 8, "w": 6, "h": 4},
|
||||
"targets": [{
|
||||
"expr": "sum(rate(http_requests_total{service=\"gateway\"}[1m]))"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Authentication Success Rate",
|
||||
"type": "stat",
|
||||
"gridPos": {"x": 18, "y": 8, "w": 6, "h": 4},
|
||||
"targets": [{
|
||||
"expr": "rate(gateway_auth_responses_total[5m]) / rate(gateway_auth_requests_total[5m]) * 100"
|
||||
}]
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 16,
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
|
||||
services-overview.json: |
|
||||
{
|
||||
"dashboard": {
|
||||
"title": "Bakery IA - Services Overview",
|
||||
"tags": ["bakery-ia", "services"],
|
||||
"timezone": "browser",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Request Rate by Service",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 0, "y": 0, "w": 12, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "sum by (service) (rate(http_requests_total[5m]))",
|
||||
"legendFormat": "{{service}}"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "P99 Latency by Service",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 12, "y": 0, "w": 12, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "histogram_quantile(0.99, sum by (service, le) (rate(http_request_duration_seconds_bucket[5m])))",
|
||||
"legendFormat": "{{service}} p99"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Error Rate by Service",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 0, "y": 8, "w": 24, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "sum by (service) (rate(http_requests_total{status_code=~\"5..\"}[5m]))",
|
||||
"legendFormat": "{{service}}"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Service Health Status",
|
||||
"type": "table",
|
||||
"gridPos": {"x": 0, "y": 16, "w": 24, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "up{job=\"bakery-services\"}",
|
||||
"format": "table",
|
||||
"instant": true
|
||||
}],
|
||||
"transformations": [{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"service": "Service Name",
|
||||
"Value": "Status"
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 16,
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
|
||||
circuit-breakers.json: |
|
||||
{
|
||||
"dashboard": {
|
||||
"title": "Bakery IA - Circuit Breakers",
|
||||
"tags": ["bakery-ia", "reliability"],
|
||||
"timezone": "browser",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Circuit Breaker States",
|
||||
"type": "stat",
|
||||
"gridPos": {"x": 0, "y": 0, "w": 24, "h": 4},
|
||||
"targets": [{
|
||||
"expr": "circuit_breaker_state",
|
||||
"legendFormat": "{{service}} - {{state}}"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Circuit Breaker Trips",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 0, "y": 4, "w": 12, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "rate(circuit_breaker_opened_total[5m])",
|
||||
"legendFormat": "{{service}}"
|
||||
}]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Rejected Requests",
|
||||
"type": "graph",
|
||||
"gridPos": {"x": 12, "y": 4, "w": 12, "h": 8},
|
||||
"targets": [{
|
||||
"expr": "rate(circuit_breaker_rejected_total[5m])",
|
||||
"legendFormat": "{{service}}"
|
||||
}]
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 16,
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user