43 lines
1.3 KiB
YAML
43 lines
1.3 KiB
YAML
# ================================================================
|
|
# Monitoring Configuration: infrastructure/monitoring/prometheus/forecasting-service.yml
|
|
# ================================================================
|
|
groups:
|
|
- name: forecasting-service
|
|
rules:
|
|
- alert: ForecastingServiceDown
|
|
expr: up{job="forecasting-service"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Forecasting service is down"
|
|
description: "Forecasting service has been down for more than 1 minute"
|
|
|
|
- alert: HighForecastingLatency
|
|
expr: histogram_quantile(0.95, forecast_processing_time_seconds) > 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High forecasting latency"
|
|
description: "95th percentile forecasting latency is {{ $value }}s"
|
|
|
|
- alert: ForecastingErrorRate
|
|
expr: rate(forecasting_errors_total[5m]) > 0.1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "High forecasting error rate"
|
|
description: "Forecasting error rate is {{ $value }} errors/sec"
|
|
|
|
- alert: LowModelAccuracy
|
|
expr: avg(model_accuracy_score) < 0.7
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Low model accuracy detected"
|
|
description: "Average model accuracy is {{ $value }}"
|
|
|