New alert service

This commit is contained in:
Urtzi Alfaro
2025-12-05 20:07:01 +01:00
parent 1fe3a73549
commit 667e6e0404
393 changed files with 26002 additions and 61033 deletions

View File

@@ -1,175 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: alert-processor-service
namespace: bakery-ia
labels:
app.kubernetes.io/name: alert-processor-service
app.kubernetes.io/component: worker
app.kubernetes.io/part-of: bakery-ia
spec:
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: alert-processor-service
app.kubernetes.io/component: worker
template:
metadata:
labels:
app.kubernetes.io/name: alert-processor-service
app.kubernetes.io/component: worker
spec:
initContainers:
# Wait for Redis to be ready
- name: wait-for-redis
image: redis:7.4-alpine
command:
- sh
- -c
- |
echo "Waiting for Redis to be ready..."
until redis-cli -h $REDIS_HOST -p $REDIS_PORT --tls --cert /tls/redis-cert.pem --key /tls/redis-key.pem --cacert /tls/ca-cert.pem -a "$REDIS_PASSWORD" ping | grep -q PONG; do
echo "Redis not ready yet, waiting..."
sleep 2
done
echo "Redis is ready!"
env:
- name: REDIS_HOST
valueFrom:
configMapKeyRef:
name: bakery-config
key: REDIS_HOST
- name: REDIS_PORT
valueFrom:
configMapKeyRef:
name: bakery-config
key: REDIS_PORT
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-secrets
key: REDIS_PASSWORD
volumeMounts:
- name: redis-tls
mountPath: /tls
readOnly: true
# Wait for RabbitMQ to be ready
- name: wait-for-rabbitmq
image: curlimages/curl:latest
command:
- sh
- -c
- |
echo "Waiting for RabbitMQ to be ready..."
until curl -f -u "$RABBITMQ_USER:$RABBITMQ_PASSWORD" http://$RABBITMQ_HOST:15672/api/healthchecks/node > /dev/null 2>&1; do
echo "RabbitMQ not ready yet, waiting..."
sleep 2
done
echo "RabbitMQ is ready!"
env:
- name: RABBITMQ_HOST
valueFrom:
configMapKeyRef:
name: bakery-config
key: RABBITMQ_HOST
- name: RABBITMQ_USER
valueFrom:
secretKeyRef:
name: rabbitmq-secrets
key: RABBITMQ_USER
- name: RABBITMQ_PASSWORD
valueFrom:
secretKeyRef:
name: rabbitmq-secrets
key: RABBITMQ_PASSWORD
- name: wait-for-migration
image: postgres:17-alpine
command:
- sh
- -c
- |
echo "Waiting for alert-processor database and migrations to be ready..."
# Wait for database to be accessible
until pg_isready -h $ALERT_PROCESSOR_DB_HOST -p $ALERT_PROCESSOR_DB_PORT -U $ALERT_PROCESSOR_DB_USER; do
echo "Database not ready yet, waiting..."
sleep 2
done
echo "Database is ready!"
# Give migrations extra time to complete after DB is ready
echo "Waiting for migrations to complete..."
sleep 10
echo "Ready to start service"
env:
- name: ALERT_PROCESSOR_DB_HOST
valueFrom:
configMapKeyRef:
name: bakery-config
key: ALERT_PROCESSOR_DB_HOST
- name: ALERT_PROCESSOR_DB_PORT
valueFrom:
configMapKeyRef:
name: bakery-config
key: DB_PORT
- name: ALERT_PROCESSOR_DB_USER
valueFrom:
secretKeyRef:
name: database-secrets
key: ALERT_PROCESSOR_DB_USER
containers:
- name: alert-processor-service
image: bakery/alert-processor:f246381-dirty
envFrom:
- configMapRef:
name: bakery-config
- secretRef:
name: database-secrets
- secretRef:
name: redis-secrets
- secretRef:
name: rabbitmq-secrets
- secretRef:
name: jwt-secrets
- secretRef:
name: external-api-secrets
- secretRef:
name: payment-secrets
- secretRef:
name: email-secrets
- secretRef:
name: monitoring-secrets
- secretRef:
name: pos-integration-secrets
- secretRef:
name: whatsapp-secrets
resources:
requests:
memory: "128Mi"
cpu: "50m"
limits:
memory: "256Mi"
cpu: "200m"
readinessProbe:
exec:
command:
- python
- -c
- "import sys; sys.exit(0)"
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
livenessProbe:
exec:
command:
- python
- -c
- "import sys; sys.exit(0)"
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
volumes:
- name: redis-tls
secret:
secretName: redis-tls-secret
defaultMode: 0400

View File

@@ -1,25 +1,54 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: alert-processor-api
name: alert-processor
namespace: bakery-ia
labels:
app.kubernetes.io/name: alert-processor-api
app.kubernetes.io/component: api
app.kubernetes.io/name: alert-processor
app.kubernetes.io/component: service
app.kubernetes.io/part-of: bakery-ia
spec:
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: alert-processor-api
app.kubernetes.io/component: api
app.kubernetes.io/name: alert-processor
app.kubernetes.io/component: service
template:
metadata:
labels:
app.kubernetes.io/name: alert-processor-api
app.kubernetes.io/component: api
app.kubernetes.io/name: alert-processor
app.kubernetes.io/component: service
spec:
initContainers:
# Wait for RabbitMQ to be ready
- name: wait-for-rabbitmq
image: curlimages/curl:latest
command:
- sh
- -c
- |
echo "Waiting for RabbitMQ to be ready..."
until curl -f -u "$RABBITMQ_USER:$RABBITMQ_PASSWORD" http://$RABBITMQ_HOST:15672/api/healthchecks/node > /dev/null 2>&1; do
echo "RabbitMQ not ready yet, waiting..."
sleep 2
done
echo "RabbitMQ is ready!"
env:
- name: RABBITMQ_HOST
valueFrom:
configMapKeyRef:
name: bakery-config
key: RABBITMQ_HOST
- name: RABBITMQ_USER
valueFrom:
secretKeyRef:
name: rabbitmq-secrets
key: RABBITMQ_USER
- name: RABBITMQ_PASSWORD
valueFrom:
secretKeyRef:
name: rabbitmq-secrets
key: RABBITMQ_PASSWORD
- name: wait-for-migration
image: postgres:17-alpine
command:
@@ -34,7 +63,7 @@ spec:
echo "Database is ready!"
echo "Waiting for migrations to complete..."
sleep 10
echo "Ready to start API service"
echo "Ready to start service"
env:
- name: ALERT_PROCESSOR_DB_HOST
valueFrom:
@@ -52,11 +81,11 @@ spec:
name: database-secrets
key: ALERT_PROCESSOR_DB_USER
containers:
- name: alert-processor-api
- name: alert-processor
image: bakery/alert-processor:latest
command: ["python", "-m", "uvicorn", "app.api_server:app", "--host", "0.0.0.0", "--port", "8010"]
command: ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
ports:
- containerPort: 8010
- containerPort: 8000
name: http
envFrom:
- configMapRef:
@@ -65,6 +94,8 @@ spec:
name: database-secrets
- secretRef:
name: redis-secrets
- secretRef:
name: rabbitmq-secrets
- secretRef:
name: jwt-secrets
resources:
@@ -77,7 +108,7 @@ spec:
readinessProbe:
httpGet:
path: /health
port: 8010
port: 8000
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
@@ -85,7 +116,7 @@ spec:
livenessProbe:
httpGet:
path: /health
port: 8010
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
@@ -94,19 +125,19 @@ spec:
apiVersion: v1
kind: Service
metadata:
name: alert-processor-api
name: alert-processor
namespace: bakery-ia
labels:
app.kubernetes.io/name: alert-processor-api
app.kubernetes.io/component: api
app.kubernetes.io/name: alert-processor
app.kubernetes.io/component: service
app.kubernetes.io/part-of: bakery-ia
spec:
selector:
app.kubernetes.io/name: alert-processor-api
app.kubernetes.io/component: api
app.kubernetes.io/name: alert-processor
app.kubernetes.io/component: service
ports:
- name: http
port: 8010
targetPort: 8010
port: 8000
targetPort: 8000
protocol: TCP
type: ClusterIP

View File

@@ -24,9 +24,14 @@ spec:
ports:
- containerPort: 8000
name: http
envFrom:
- configMapRef:
name: bakery-config
env:
- name: SERVICE_NAME
value: "demo-session-service"
- name: ALERT_PROCESSOR_SERVICE_URL
value: "http://alert-processor:8000"
- name: DEMO_SESSION_DATABASE_URL
valueFrom:
secretKeyRef:

View File

@@ -101,7 +101,7 @@ data:
POS_SERVICE_URL: "http://pos-service:8000"
ORDERS_SERVICE_URL: "http://orders-service:8000"
PRODUCTION_SERVICE_URL: "http://production-service:8000"
ALERT_PROCESSOR_SERVICE_URL: "http://alert-processor-api:8010"
ALERT_PROCESSOR_SERVICE_URL: "http://alert-processor:8000"
ORCHESTRATOR_SERVICE_URL: "http://orchestrator-service:8000"
AI_INSIGHTS_SERVICE_URL: "http://ai-insights-service:8000"
DISTRIBUTION_SERVICE_URL: "http://distribution-service:8000"

View File

@@ -1,120 +0,0 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: alert-priority-recalculation
namespace: bakery-ia
labels:
app: alert-priority-recalculation
component: cron
service: alert-processor
spec:
# Schedule: Every hour at minute 15
schedule: "15 * * * *"
# Keep last 3 successful jobs and 1 failed job for debugging
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# Don't start new job if previous one is still running
concurrencyPolicy: Forbid
# Job must complete within 10 minutes
startingDeadlineSeconds: 600
jobTemplate:
spec:
# Retry up to 2 times if job fails
backoffLimit: 2
# Job must complete within 30 minutes
activeDeadlineSeconds: 1800
template:
metadata:
labels:
app: alert-priority-recalculation
component: cron
spec:
restartPolicy: OnFailure
# Use alert-processor service image
containers:
- name: priority-recalc
image: bakery/alert-processor:latest
imagePullPolicy: Always
command:
- python3
- -m
- app.jobs.priority_recalculation
env:
# Database connection
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: ALERT_PROCESSOR_DATABASE_URL
# Redis connection
- name: REDIS_URL
value: rediss://redis-service:6379/0?ssl_cert_reqs=none
# Alert processor settings
- name: BUSINESS_IMPACT_WEIGHT
value: "0.40"
- name: URGENCY_WEIGHT
value: "0.30"
- name: USER_AGENCY_WEIGHT
value: "0.20"
- name: CONFIDENCE_WEIGHT
value: "0.10"
- name: CRITICAL_THRESHOLD
value: "90"
- name: IMPORTANT_THRESHOLD
value: "70"
- name: STANDARD_THRESHOLD
value: "50"
# Escalation thresholds (hours)
- name: ESCALATION_THRESHOLD_48H
value: "48"
- name: ESCALATION_THRESHOLD_72H
value: "72"
# Service settings
- name: LOG_LEVEL
value: "INFO"
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: alert-priority-recalculation-config
namespace: bakery-ia
data:
schedule: "Hourly at minute 15"
description: "Recalculates alert priorities with time-based escalation"
escalation_48h_boost: "10"
escalation_72h_boost: "20"
deadline_24h_boost: "15"
deadline_6h_boost: "30"
max_boost: "30"

View File

@@ -1,176 +0,0 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: delivery-tracking
namespace: bakery-ia
labels:
app: delivery-tracking
component: cron
service: orchestrator
spec:
# Schedule: Every hour at minute 30
schedule: "30 * * * *"
# Keep last 3 successful jobs and 1 failed job for debugging
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# Don't start new job if previous one is still running
concurrencyPolicy: Forbid
# Job must complete within 10 minutes
startingDeadlineSeconds: 600
jobTemplate:
spec:
# Retry up to 2 times if job fails
backoffLimit: 2
# Job must complete within 30 minutes
activeDeadlineSeconds: 1800
template:
metadata:
labels:
app: delivery-tracking
component: cron
spec:
restartPolicy: OnFailure
# Use orchestrator service image
containers:
- name: delivery-tracker
image: bakery/orchestrator-service:latest
imagePullPolicy: Always
command:
- python3
- -c
- |
import asyncio
import os
from app.services.delivery_tracking_service import DeliveryTrackingService
from shared.database.base import create_database_manager
from app.core.config import settings
from shared.messaging.rabbitmq import RabbitMQClient
import structlog
logger = structlog.get_logger()
async def run_delivery_tracking():
"""Run delivery tracking for all tenants"""
import redis.asyncio as redis
from shared.redis_utils import initialize_redis, get_redis_client
config = settings # Use the global settings instance
db_manager = create_database_manager(config.DATABASE_URL, "orchestrator")
try:
# Initialize Redis - This is an async function
await initialize_redis(config.REDIS_URL, db=2, max_connections=10) # Using db 2 for orchestrator
redis_client = await get_redis_client()
except Exception as e:
logger.error("Failed to initialize Redis", error=str(e))
raise
try:
rabbitmq_client = RabbitMQClient(config.RABBITMQ_URL, "delivery-tracking-job")
service = DeliveryTrackingService(
config=config,
db_manager=db_manager,
redis_client=redis_client,
rabbitmq_client=rabbitmq_client
)
logger.info("Starting delivery tracking job")
# Get active tenant IDs from environment variable
active_tenant_ids = os.environ.get('ACTIVE_TENANT_IDS', '')
if active_tenant_ids:
tenant_ids = [tid.strip() for tid in active_tenant_ids.split(',') if tid.strip()]
else:
tenant_ids = ['00000000-0000-0000-0000-000000000001'] # Default single tenant
for tenant_id in tenant_ids:
try:
result = await service.check_expected_deliveries(tenant_id)
logger.info("Delivery tracking completed", tenant_id=tenant_id, **result)
except Exception as e:
logger.error("Delivery tracking failed", tenant_id=tenant_id, error=str(e))
logger.info("Delivery tracking job completed")
except Exception as e:
logger.error("Delivery tracking service error", error=str(e))
raise
if __name__ == "__main__":
asyncio.run(run_delivery_tracking())
env:
# Database connection
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: ORCHESTRATOR_DATABASE_URL
# Redis connection
- name: REDIS_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: REDIS_URL
# Service URLs
- name: ALERT_PROCESSOR_URL
value: "http://alert-processor-api:8000"
- name: PROCUREMENT_SERVICE_URL
value: "http://procurement-service:8000"
# Active tenants (comma-separated UUIDs)
- name: ACTIVE_TENANT_IDS
value: "00000000-0000-0000-0000-000000000001"
# Orchestrator settings
- name: ORCHESTRATOR_CONTEXT_CACHE_TTL
value: "300"
# Delivery tracking settings
- name: ARRIVING_SOON_HOURS_BEFORE
value: "2"
- name: OVERDUE_MINUTES_AFTER
value: "30"
- name: DEFAULT_DELIVERY_WINDOW_HOURS
value: "4"
# Service settings
- name: LOG_LEVEL
value: "INFO"
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: delivery-tracking-config
namespace: bakery-ia
data:
schedule: "Hourly at minute 30"
description: "Checks expected deliveries and generates proactive alerts"
arriving_soon_hours: "2"
overdue_minutes: "30"
delivery_window_hours: "4"

View File

@@ -1,99 +0,0 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: usage-tracker
namespace: bakery-ia
labels:
app: usage-tracker
component: cron
spec:
# Schedule: Daily at 2 AM UTC
schedule: "0 2 * * *"
# Keep last 3 successful jobs and 1 failed job for debugging
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
# Don't start new job if previous one is still running
concurrencyPolicy: Forbid
# Job must complete within 30 minutes
startingDeadlineSeconds: 1800
jobTemplate:
spec:
# Retry up to 2 times if job fails
backoffLimit: 2
# Job must complete within 20 minutes
activeDeadlineSeconds: 1200
template:
metadata:
labels:
app: usage-tracker
component: cron
spec:
restartPolicy: OnFailure
# Use tenant service image (it has access to all models)
containers:
- name: usage-tracker
image: your-registry/bakery-ia-tenant-service:latest
imagePullPolicy: Always
command:
- python3
- /app/scripts/track_daily_usage.py
env:
# Database connection
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-credentials
key: url
# Redis connection
- name: REDIS_URL
valueFrom:
configMapKeyRef:
name: app-config
key: redis-url
# Service settings
- name: LOG_LEVEL
value: "INFO"
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
# Health check: ensure script completes successfully
livenessProbe:
exec:
command:
- /bin/sh
- -c
- pgrep -f track_daily_usage.py
initialDelaySeconds: 10
periodSeconds: 60
failureThreshold: 3
---
apiVersion: v1
kind: ConfigMap
metadata:
name: usage-tracker-config
namespace: bakery-ia
data:
# You can add additional configuration here if needed
schedule: "Daily at 2 AM UTC"
description: "Tracks daily usage snapshots for predictive analytics"

View File

@@ -25,18 +25,18 @@ spec:
- |
echo "Waiting 30 seconds for alert-processor-migration to complete..."
sleep 30
- name: wait-for-alert-processor-api
- name: wait-for-alert-processor
image: curlimages/curl:latest
command:
- sh
- -c
- |
echo "Waiting for alert-processor-api to be ready..."
until curl -f http://alert-processor-api.bakery-ia.svc.cluster.local:8010/health > /dev/null 2>&1; do
echo "alert-processor-api not ready yet, waiting..."
echo "Waiting for alert-processor to be ready..."
until curl -f http://alert-processor.bakery-ia.svc.cluster.local:8000/health > /dev/null 2>&1; do
echo "alert-processor not ready yet, waiting..."
sleep 5
done
echo "alert-processor-api is ready!"
echo "alert-processor is ready!"
containers:
- name: seed-alerts
image: bakery/alert-processor:latest

View File

@@ -18,18 +18,18 @@ spec:
app: demo-seed-alerts-retail
spec:
initContainers:
- name: wait-for-alert-processor-service
- name: wait-for-alert-processor
image: curlimages/curl:latest
command:
- sh
- -c
- |
echo "Waiting for alert-processor-api to be ready..."
until curl -f http://alert-processor-api.bakery-ia.svc.cluster.local:8010/health > /dev/null 2>&1; do
echo "alert-processor-api not ready yet, waiting..."
echo "Waiting for alert-processor to be ready..."
until curl -f http://alert-processor.bakery-ia.svc.cluster.local:8000/health > /dev/null 2>&1; do
echo "alert-processor not ready yet, waiting..."
sleep 5
done
echo "alert-processor-api is ready!"
echo "alert-processor is ready!"
containers:
- name: seed-alerts-retail
image: bakery/alert-processor:latest

View File

@@ -64,7 +64,7 @@ resources:
- jobs/demo-seed-forecasts-job.yaml
- jobs/demo-seed-pos-configs-job.yaml
- jobs/demo-seed-orchestration-runs-job.yaml
- jobs/demo-seed-alerts-job.yaml
# - jobs/demo-seed-alerts-job.yaml # Commented out: Alert processor v2 uses event-driven architecture; services emit events via RabbitMQ
# Phase 2: Child retail seed jobs (for enterprise demo)
- jobs/demo-seed-inventory-retail-job.yaml
@@ -73,7 +73,7 @@ resources:
- jobs/demo-seed-customers-retail-job.yaml
- jobs/demo-seed-pos-retail-job.yaml
- jobs/demo-seed-forecasts-retail-job.yaml
- jobs/demo-seed-alerts-retail-job.yaml
# - jobs/demo-seed-alerts-retail-job.yaml # Commented out: Alert processor v2 uses event-driven architecture; services emit events via RabbitMQ
- jobs/demo-seed-distribution-history-job.yaml
# External data initialization job (v2.0)
@@ -82,9 +82,6 @@ resources:
# CronJobs
- cronjobs/demo-cleanup-cronjob.yaml
- cronjobs/external-data-rotation-cronjob.yaml
- cronjobs/usage-tracker-cronjob.yaml
- cronjobs/alert-priority-recalculation-cronjob.yaml
- cronjobs/delivery-tracking-cronjob.yaml
# Infrastructure components
- components/databases/redis.yaml
@@ -147,8 +144,7 @@ resources:
- components/production/production-service.yaml
- components/procurement/procurement-service.yaml
- components/orchestrator/orchestrator-service.yaml
- components/alert-processor/alert-processor-service.yaml
- components/alert-processor/alert-processor-api.yaml
- components/alert-processor/alert-processor.yaml
- components/ai-insights/ai-insights-service.yaml
# Frontend

View File

@@ -37,6 +37,11 @@ spec:
secretKeyRef:
name: database-secrets
key: ALERT_PROCESSOR_DATABASE_URL
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: ALERT_PROCESSOR_DATABASE_URL
- name: DB_FORCE_RECREATE
valueFrom:
configMapKeyRef: