Files
bakery-ia/infrastructure/monitoring/prometheus/forecasting-service.yml

43 lines
1.3 KiB
YAML
Raw Normal View History

2025-07-21 19:48:56 +02:00
# ================================================================
# Monitoring Configuration: infrastructure/monitoring/prometheus/forecasting-service.yml
# ================================================================
groups:
- name: forecasting-service
rules:
- alert: ForecastingServiceDown
expr: up{job="forecasting-service"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Forecasting service is down"
description: "Forecasting service has been down for more than 1 minute"
- alert: HighForecastingLatency
expr: histogram_quantile(0.95, forecast_processing_time_seconds) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "High forecasting latency"
description: "95th percentile forecasting latency is {{ $value }}s"
- alert: ForecastingErrorRate
expr: rate(forecasting_errors_total[5m]) > 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "High forecasting error rate"
description: "Forecasting error rate is {{ $value }} errors/sec"
- alert: LowModelAccuracy
expr: avg(model_accuracy_score) < 0.7
for: 10m
labels:
severity: warning
annotations:
summary: "Low model accuracy detected"
description: "Average model accuracy is {{ $value }}"