New alert system and panel de control page
This commit is contained in:
@@ -41,7 +41,7 @@ spec:
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
path: /health
|
||||
port: 3000
|
||||
initialDelaySeconds: 60
|
||||
timeoutSeconds: 10
|
||||
@@ -49,7 +49,7 @@ spec:
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
path: /health
|
||||
port: 3000
|
||||
initialDelaySeconds: 20
|
||||
timeoutSeconds: 5
|
||||
|
||||
@@ -187,6 +187,33 @@ data:
|
||||
ALERT_DEDUPLICATION_WINDOW_MINUTES: "15"
|
||||
RECOMMENDATION_DEDUPLICATION_WINDOW_MINUTES: "60"
|
||||
|
||||
# Alert Enrichment Configuration (Unified Alert Service)
|
||||
# Priority scoring weights (must sum to 1.0)
|
||||
BUSINESS_IMPACT_WEIGHT: "0.4"
|
||||
URGENCY_WEIGHT: "0.3"
|
||||
USER_AGENCY_WEIGHT: "0.2"
|
||||
CONFIDENCE_WEIGHT: "0.1"
|
||||
|
||||
# Priority thresholds (0-100 scale)
|
||||
CRITICAL_THRESHOLD: "90"
|
||||
IMPORTANT_THRESHOLD: "70"
|
||||
STANDARD_THRESHOLD: "50"
|
||||
|
||||
# Timing intelligence
|
||||
BUSINESS_HOURS_START: "6"
|
||||
BUSINESS_HOURS_END: "22"
|
||||
PEAK_HOURS_START: "7"
|
||||
PEAK_HOURS_END: "11"
|
||||
PEAK_HOURS_EVENING_START: "17"
|
||||
PEAK_HOURS_EVENING_END: "19"
|
||||
|
||||
# Alert grouping
|
||||
GROUPING_TIME_WINDOW_MINUTES: "15"
|
||||
MAX_ALERTS_PER_GROUP: "5"
|
||||
|
||||
# Email digest
|
||||
DIGEST_SEND_TIME: "18:00"
|
||||
|
||||
# ================================================================
|
||||
# CHECK FREQUENCIES (CRON EXPRESSIONS)
|
||||
# ================================================================
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: alert-priority-recalculation
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app: alert-priority-recalculation
|
||||
component: cron
|
||||
service: alert-processor
|
||||
spec:
|
||||
# Schedule: Every hour at minute 15
|
||||
schedule: "15 * * * *"
|
||||
|
||||
# Keep last 3 successful jobs and 1 failed job for debugging
|
||||
successfulJobsHistoryLimit: 3
|
||||
failedJobsHistoryLimit: 1
|
||||
|
||||
# Don't start new job if previous one is still running
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
# Job must complete within 10 minutes
|
||||
startingDeadlineSeconds: 600
|
||||
|
||||
jobTemplate:
|
||||
spec:
|
||||
# Retry up to 2 times if job fails
|
||||
backoffLimit: 2
|
||||
|
||||
# Job must complete within 30 minutes
|
||||
activeDeadlineSeconds: 1800
|
||||
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: alert-priority-recalculation
|
||||
component: cron
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
|
||||
# Use alert-processor service image
|
||||
containers:
|
||||
- name: priority-recalc
|
||||
image: bakery/alert-processor:latest
|
||||
imagePullPolicy: Always
|
||||
|
||||
command:
|
||||
- python3
|
||||
- -m
|
||||
- app.jobs.priority_recalculation
|
||||
|
||||
env:
|
||||
# Database connection
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: database-secrets
|
||||
key: ALERT_PROCESSOR_DATABASE_URL
|
||||
|
||||
# Redis connection
|
||||
- name: REDIS_URL
|
||||
value: rediss://redis-service:6379/0?ssl_cert_reqs=none
|
||||
|
||||
# Alert processor settings
|
||||
- name: BUSINESS_IMPACT_WEIGHT
|
||||
value: "0.40"
|
||||
|
||||
- name: URGENCY_WEIGHT
|
||||
value: "0.30"
|
||||
|
||||
- name: USER_AGENCY_WEIGHT
|
||||
value: "0.20"
|
||||
|
||||
- name: CONFIDENCE_WEIGHT
|
||||
value: "0.10"
|
||||
|
||||
- name: CRITICAL_THRESHOLD
|
||||
value: "90"
|
||||
|
||||
- name: IMPORTANT_THRESHOLD
|
||||
value: "70"
|
||||
|
||||
- name: STANDARD_THRESHOLD
|
||||
value: "50"
|
||||
|
||||
# Escalation thresholds (hours)
|
||||
- name: ESCALATION_THRESHOLD_48H
|
||||
value: "48"
|
||||
|
||||
- name: ESCALATION_THRESHOLD_72H
|
||||
value: "72"
|
||||
|
||||
# Service settings
|
||||
- name: LOG_LEVEL
|
||||
value: "INFO"
|
||||
|
||||
- name: PYTHONUNBUFFERED
|
||||
value: "1"
|
||||
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: alert-priority-recalculation-config
|
||||
namespace: bakery-ia
|
||||
data:
|
||||
schedule: "Hourly at minute 15"
|
||||
description: "Recalculates alert priorities with time-based escalation"
|
||||
escalation_48h_boost: "10"
|
||||
escalation_72h_boost: "20"
|
||||
deadline_24h_boost: "15"
|
||||
deadline_6h_boost: "30"
|
||||
max_boost: "30"
|
||||
@@ -0,0 +1,176 @@
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: delivery-tracking
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app: delivery-tracking
|
||||
component: cron
|
||||
service: orchestrator
|
||||
spec:
|
||||
# Schedule: Every hour at minute 30
|
||||
schedule: "30 * * * *"
|
||||
|
||||
# Keep last 3 successful jobs and 1 failed job for debugging
|
||||
successfulJobsHistoryLimit: 3
|
||||
failedJobsHistoryLimit: 1
|
||||
|
||||
# Don't start new job if previous one is still running
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
# Job must complete within 10 minutes
|
||||
startingDeadlineSeconds: 600
|
||||
|
||||
jobTemplate:
|
||||
spec:
|
||||
# Retry up to 2 times if job fails
|
||||
backoffLimit: 2
|
||||
|
||||
# Job must complete within 30 minutes
|
||||
activeDeadlineSeconds: 1800
|
||||
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: delivery-tracking
|
||||
component: cron
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
|
||||
# Use orchestrator service image
|
||||
containers:
|
||||
- name: delivery-tracker
|
||||
image: bakery/orchestrator-service:latest
|
||||
imagePullPolicy: Always
|
||||
|
||||
command:
|
||||
- python3
|
||||
- -c
|
||||
- |
|
||||
import asyncio
|
||||
import os
|
||||
from app.services.delivery_tracking_service import DeliveryTrackingService
|
||||
from shared.database.base import create_database_manager
|
||||
from app.core.config import settings
|
||||
from shared.messaging.rabbitmq import RabbitMQClient
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
async def run_delivery_tracking():
|
||||
"""Run delivery tracking for all tenants"""
|
||||
import redis.asyncio as redis
|
||||
from shared.redis_utils import initialize_redis, get_redis_client
|
||||
|
||||
config = settings # Use the global settings instance
|
||||
db_manager = create_database_manager(config.DATABASE_URL, "orchestrator")
|
||||
|
||||
try:
|
||||
# Initialize Redis - This is an async function
|
||||
await initialize_redis(config.REDIS_URL, db=2, max_connections=10) # Using db 2 for orchestrator
|
||||
redis_client = await get_redis_client()
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize Redis", error=str(e))
|
||||
raise
|
||||
|
||||
try:
|
||||
rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, "delivery-tracking-job")
|
||||
|
||||
service = DeliveryTrackingService(
|
||||
config=config,
|
||||
db_manager=db_manager,
|
||||
redis_client=redis_client,
|
||||
rabbitmq_client=rabbitmq_client
|
||||
)
|
||||
|
||||
logger.info("Starting delivery tracking job")
|
||||
|
||||
# Get active tenant IDs from environment variable
|
||||
active_tenant_ids = os.environ.get('ACTIVE_TENANT_IDS', '')
|
||||
if active_tenant_ids:
|
||||
tenant_ids = [tid.strip() for tid in active_tenant_ids.split(',') if tid.strip()]
|
||||
else:
|
||||
tenant_ids = ['00000000-0000-0000-0000-000000000001'] # Default single tenant
|
||||
|
||||
for tenant_id in tenant_ids:
|
||||
try:
|
||||
result = await service.check_expected_deliveries(tenant_id)
|
||||
logger.info("Delivery tracking completed", tenant_id=tenant_id, **result)
|
||||
except Exception as e:
|
||||
logger.error("Delivery tracking failed", tenant_id=tenant_id, error=str(e))
|
||||
|
||||
logger.info("Delivery tracking job completed")
|
||||
except Exception as e:
|
||||
logger.error("Delivery tracking service error", error=str(e))
|
||||
raise
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(run_delivery_tracking())
|
||||
|
||||
env:
|
||||
# Database connection
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: database-secrets
|
||||
key: ORCHESTRATOR_DATABASE_URL
|
||||
|
||||
# Redis connection
|
||||
- name: REDIS_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: database-secrets
|
||||
key: REDIS_URL
|
||||
|
||||
# Service URLs
|
||||
- name: ALERT_PROCESSOR_URL
|
||||
value: "http://alert-processor-api:8000"
|
||||
|
||||
- name: PROCUREMENT_SERVICE_URL
|
||||
value: "http://procurement-service:8000"
|
||||
|
||||
# Active tenants (comma-separated UUIDs)
|
||||
- name: ACTIVE_TENANT_IDS
|
||||
value: "00000000-0000-0000-0000-000000000001"
|
||||
|
||||
# Orchestrator settings
|
||||
- name: ORCHESTRATOR_CONTEXT_CACHE_TTL
|
||||
value: "300"
|
||||
|
||||
# Delivery tracking settings
|
||||
- name: ARRIVING_SOON_HOURS_BEFORE
|
||||
value: "2"
|
||||
|
||||
- name: OVERDUE_MINUTES_AFTER
|
||||
value: "30"
|
||||
|
||||
- name: DEFAULT_DELIVERY_WINDOW_HOURS
|
||||
value: "4"
|
||||
|
||||
# Service settings
|
||||
- name: LOG_LEVEL
|
||||
value: "INFO"
|
||||
|
||||
- name: PYTHONUNBUFFERED
|
||||
value: "1"
|
||||
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: delivery-tracking-config
|
||||
namespace: bakery-ia
|
||||
data:
|
||||
schedule: "Hourly at minute 30"
|
||||
description: "Checks expected deliveries and generates proactive alerts"
|
||||
arriving_soon_hours: "2"
|
||||
overdue_minutes: "30"
|
||||
delivery_window_hours: "4"
|
||||
@@ -0,0 +1,67 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: demo-seed-alerts
|
||||
namespace: bakery-ia
|
||||
labels:
|
||||
app: demo-seed
|
||||
component: initialization
|
||||
annotations:
|
||||
"helm.sh/hook": post-install,post-upgrade
|
||||
"helm.sh/hook-weight": "28" # After orchestration runs (27), as alerts reference recent data
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 3600
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: demo-seed-alerts
|
||||
spec:
|
||||
initContainers:
|
||||
- name: wait-for-alert-processor-migration
|
||||
image: busybox:1.36
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
echo "Waiting 30 seconds for alert-processor-migration to complete..."
|
||||
sleep 30
|
||||
- name: wait-for-alert-processor-api
|
||||
image: curlimages/curl:latest
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
echo "Waiting for alert-processor-api to be ready..."
|
||||
until curl -f http://alert-processor-api.bakery-ia.svc.cluster.local:8010/health > /dev/null 2>&1; do
|
||||
echo "alert-processor-api not ready yet, waiting..."
|
||||
sleep 5
|
||||
done
|
||||
echo "alert-processor-api is ready!"
|
||||
containers:
|
||||
- name: seed-alerts
|
||||
image: bakery/alert-processor:latest
|
||||
command: ["python", "/app/scripts/demo/seed_demo_alerts.py"]
|
||||
env:
|
||||
- name: ALERT_PROCESSOR_DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: database-secrets
|
||||
key: ALERT_PROCESSOR_DATABASE_URL
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: database-secrets
|
||||
key: ALERT_PROCESSOR_DATABASE_URL
|
||||
- name: DEMO_MODE
|
||||
value: "production"
|
||||
- name: LOG_LEVEL
|
||||
value: "INFO"
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
restartPolicy: OnFailure
|
||||
serviceAccountName: demo-seed-sa
|
||||
@@ -62,6 +62,7 @@ resources:
|
||||
- jobs/demo-seed-forecasts-job.yaml
|
||||
- jobs/demo-seed-pos-configs-job.yaml
|
||||
- jobs/demo-seed-orchestration-runs-job.yaml
|
||||
- jobs/demo-seed-alerts-job.yaml
|
||||
|
||||
# External data initialization job (v2.0)
|
||||
- jobs/external-data-init-job.yaml
|
||||
@@ -70,6 +71,8 @@ resources:
|
||||
- cronjobs/demo-cleanup-cronjob.yaml
|
||||
- cronjobs/external-data-rotation-cronjob.yaml
|
||||
- cronjobs/usage-tracker-cronjob.yaml
|
||||
- cronjobs/alert-priority-recalculation-cronjob.yaml
|
||||
- cronjobs/delivery-tracking-cronjob.yaml
|
||||
|
||||
# Infrastructure components
|
||||
- components/databases/redis.yaml
|
||||
|
||||
Reference in New Issue
Block a user