New alert system and panel de control page

This commit is contained in:
Urtzi Alfaro
2025-11-27 15:52:40 +01:00
parent 1a2f4602f3
commit e902419b6e
178 changed files with 20982 additions and 6944 deletions

View File

@@ -0,0 +1,120 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: alert-priority-recalculation
namespace: bakery-ia
labels:
app: alert-priority-recalculation
component: cron
service: alert-processor
spec:
# Schedule: Every hour at minute 15
schedule: "15 * * * *"
# Keep last 3 successful jobs and 1 failed job for debugging
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# Don't start new job if previous one is still running
concurrencyPolicy: Forbid
# Job must complete within 10 minutes
startingDeadlineSeconds: 600
jobTemplate:
spec:
# Retry up to 2 times if job fails
backoffLimit: 2
# Job must complete within 30 minutes
activeDeadlineSeconds: 1800
template:
metadata:
labels:
app: alert-priority-recalculation
component: cron
spec:
restartPolicy: OnFailure
# Use alert-processor service image
containers:
- name: priority-recalc
image: bakery/alert-processor:latest
imagePullPolicy: Always
command:
- python3
- -m
- app.jobs.priority_recalculation
env:
# Database connection
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: ALERT_PROCESSOR_DATABASE_URL
# Redis connection
- name: REDIS_URL
value: rediss://redis-service:6379/0?ssl_cert_reqs=none
# Alert processor settings
- name: BUSINESS_IMPACT_WEIGHT
value: "0.40"
- name: URGENCY_WEIGHT
value: "0.30"
- name: USER_AGENCY_WEIGHT
value: "0.20"
- name: CONFIDENCE_WEIGHT
value: "0.10"
- name: CRITICAL_THRESHOLD
value: "90"
- name: IMPORTANT_THRESHOLD
value: "70"
- name: STANDARD_THRESHOLD
value: "50"
# Escalation thresholds (hours)
- name: ESCALATION_THRESHOLD_48H
value: "48"
- name: ESCALATION_THRESHOLD_72H
value: "72"
# Service settings
- name: LOG_LEVEL
value: "INFO"
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: alert-priority-recalculation-config
namespace: bakery-ia
data:
schedule: "Hourly at minute 15"
description: "Recalculates alert priorities with time-based escalation"
escalation_48h_boost: "10"
escalation_72h_boost: "20"
deadline_24h_boost: "15"
deadline_6h_boost: "30"
max_boost: "30"

View File

@@ -0,0 +1,176 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: delivery-tracking
namespace: bakery-ia
labels:
app: delivery-tracking
component: cron
service: orchestrator
spec:
# Schedule: Every hour at minute 30
schedule: "30 * * * *"
# Keep last 3 successful jobs and 1 failed job for debugging
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# Don't start new job if previous one is still running
concurrencyPolicy: Forbid
# Job must complete within 10 minutes
startingDeadlineSeconds: 600
jobTemplate:
spec:
# Retry up to 2 times if job fails
backoffLimit: 2
# Job must complete within 30 minutes
activeDeadlineSeconds: 1800
template:
metadata:
labels:
app: delivery-tracking
component: cron
spec:
restartPolicy: OnFailure
# Use orchestrator service image
containers:
- name: delivery-tracker
image: bakery/orchestrator-service:latest
imagePullPolicy: Always
command:
- python3
- -c
- |
import asyncio
import os
from app.services.delivery_tracking_service import DeliveryTrackingService
from shared.database.base import create_database_manager
from app.core.config import settings
from shared.messaging.rabbitmq import RabbitMQClient
import structlog
logger = structlog.get_logger()
async def run_delivery_tracking():
"""Run delivery tracking for all tenants"""
import redis.asyncio as redis
from shared.redis_utils import initialize_redis, get_redis_client
config = settings # Use the global settings instance
db_manager = create_database_manager(config.DATABASE_URL, "orchestrator")
try:
# Initialize Redis - This is an async function
await initialize_redis(config.REDIS_URL, db=2, max_connections=10) # Using db 2 for orchestrator
redis_client = await get_redis_client()
except Exception as e:
logger.error("Failed to initialize Redis", error=str(e))
raise
try:
rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, "delivery-tracking-job")
service = DeliveryTrackingService(
config=config,
db_manager=db_manager,
redis_client=redis_client,
rabbitmq_client=rabbitmq_client
)
logger.info("Starting delivery tracking job")
# Get active tenant IDs from environment variable
active_tenant_ids = os.environ.get('ACTIVE_TENANT_IDS', '')
if active_tenant_ids:
tenant_ids = [tid.strip() for tid in active_tenant_ids.split(',') if tid.strip()]
else:
tenant_ids = ['00000000-0000-0000-0000-000000000001'] # Default single tenant
for tenant_id in tenant_ids:
try:
result = await service.check_expected_deliveries(tenant_id)
logger.info("Delivery tracking completed", tenant_id=tenant_id, **result)
except Exception as e:
logger.error("Delivery tracking failed", tenant_id=tenant_id, error=str(e))
logger.info("Delivery tracking job completed")
except Exception as e:
logger.error("Delivery tracking service error", error=str(e))
raise
if __name__ == "__main__":
asyncio.run(run_delivery_tracking())
env:
# Database connection
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: ORCHESTRATOR_DATABASE_URL
# Redis connection
- name: REDIS_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: REDIS_URL
# Service URLs
- name: ALERT_PROCESSOR_URL
value: "http://alert-processor-api:8000"
- name: PROCUREMENT_SERVICE_URL
value: "http://procurement-service:8000"
# Active tenants (comma-separated UUIDs)
- name: ACTIVE_TENANT_IDS
value: "00000000-0000-0000-0000-000000000001"
# Orchestrator settings
- name: ORCHESTRATOR_CONTEXT_CACHE_TTL
value: "300"
# Delivery tracking settings
- name: ARRIVING_SOON_HOURS_BEFORE
value: "2"
- name: OVERDUE_MINUTES_AFTER
value: "30"
- name: DEFAULT_DELIVERY_WINDOW_HOURS
value: "4"
# Service settings
- name: LOG_LEVEL
value: "INFO"
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: delivery-tracking-config
namespace: bakery-ia
data:
schedule: "Hourly at minute 30"
description: "Checks expected deliveries and generates proactive alerts"
arriving_soon_hours: "2"
overdue_minutes: "30"
delivery_window_hours: "4"