# ================================================================
# Monitoring Configuration: infrastructure/monitoring/prometheus/forecasting-service.yml
# ================================================================
groups:
- name: forecasting-service
  rules:
  - alert: ForecastingServiceDown
    expr: up{job="forecasting-service"} == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "Forecasting service is down"
      description: "Forecasting service has been down for more than 1 minute"

  - alert: HighForecastingLatency
    expr: histogram_quantile(0.95, forecast_processing_time_seconds) > 10
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "High forecasting latency"
      description: "95th percentile forecasting latency is {{ $value }}s"

  - alert: ForecastingErrorRate
    expr: rate(forecasting_errors_total[5m]) > 0.1
    for: 5m
    labels:
      severity: critical
    annotations:
      summary: "High forecasting error rate"
      description: "Forecasting error rate is {{ $value }} errors/sec"

  - alert: LowModelAccuracy
    expr: avg(model_accuracy_score) < 0.7
    for: 10m
    labels:
      severity: warning
    annotations:
      summary: "Low model accuracy detected"
      description: "Average model accuracy is {{ $value }}"