REFACTOR external service and improve websocket training

This commit is contained in:
Urtzi Alfaro
2025-10-09 14:11:02 +02:00
parent 7c72f83c51
commit 3c689b4f98
111 changed files with 13289 additions and 2374 deletions

View File

@@ -1,3 +1,5 @@
# infrastructure/kubernetes/base/components/external/external-service.yaml
# External Data Service v2.0 - Optimized city-based architecture
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -7,8 +9,9 @@ metadata:
app.kubernetes.io/name: external-service
app.kubernetes.io/component: microservice
app.kubernetes.io/part-of: bakery-ia
version: "2.0"
spec:
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: external-service
@@ -18,41 +21,30 @@ spec:
labels:
app.kubernetes.io/name: external-service
app.kubernetes.io/component: microservice
version: "2.0"
spec:
initContainers:
- name: wait-for-migration
- name: check-data-initialized
image: postgres:15-alpine
command:
- sh
- -c
- |
echo "Waiting for external database and migrations to be ready..."
# Wait for database to be accessible
until pg_isready -h $EXTERNAL_DB_HOST -p $EXTERNAL_DB_PORT -U $EXTERNAL_DB_USER; do
echo "Database not ready yet, waiting..."
sleep 2
done
echo "Database is ready!"
# Give migrations extra time to complete after DB is ready
echo "Waiting for migrations to complete..."
sleep 10
echo "Ready to start service"
- sh
- -c
- |
echo "Checking if data initialization is complete..."
# Convert asyncpg URL to psql-compatible format
DB_URL=$(echo "$DATABASE_URL" | sed 's/postgresql+asyncpg:/postgresql:/')
until psql "$DB_URL" -c "SELECT COUNT(*) FROM city_weather_data LIMIT 1;" > /dev/null 2>&1; do
echo "Waiting for initial data load..."
sleep 10
done
echo "Data is initialized"
env:
- name: EXTERNAL_DB_HOST
valueFrom:
configMapKeyRef:
name: bakery-config
key: EXTERNAL_DB_HOST
- name: EXTERNAL_DB_PORT
valueFrom:
configMapKeyRef:
name: bakery-config
key: DB_PORT
- name: EXTERNAL_DB_USER
valueFrom:
secretKeyRef:
name: database-secrets
key: EXTERNAL_DB_USER
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: database-secrets
key: EXTERNAL_DATABASE_URL
containers:
- name: external-service
image: bakery/external-service:latest

View File

@@ -82,6 +82,10 @@ spec:
name: pos-integration-secrets
- secretRef:
name: whatsapp-secrets
volumeMounts:
- name: model-storage
mountPath: /app/models
readOnly: true # Forecasting only reads models
resources:
requests:
memory: "256Mi"
@@ -105,6 +109,11 @@ spec:
timeoutSeconds: 3
periodSeconds: 5
failureThreshold: 5
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: model-storage
readOnly: true # Forecasting only reads models
---
apiVersion: v1

View File

@@ -85,6 +85,8 @@ spec:
volumeMounts:
- name: tmp-storage
mountPath: /tmp
- name: model-storage
mountPath: /app/models
resources:
requests:
memory: "512Mi"
@@ -112,6 +114,9 @@ spec:
- name: tmp-storage
emptyDir:
sizeLimit: 2Gi
- name: model-storage
persistentVolumeClaim:
claimName: model-storage
---
apiVersion: v1

View File

@@ -0,0 +1,16 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: model-storage
namespace: bakery-ia
labels:
app.kubernetes.io/name: model-storage
app.kubernetes.io/component: storage
app.kubernetes.io/part-of: bakery-ia
spec:
accessModes:
- ReadWriteOnce # Single node access (works with local Kubernetes)
resources:
requests:
storage: 10Gi # Adjust based on your needs
storageClassName: standard # Use default local-path provisioner

View File

@@ -127,8 +127,8 @@ data:
# EXTERNAL API CONFIGURATION
# ================================================================
AEMET_BASE_URL: "https://opendata.aemet.es/opendata"
AEMET_TIMEOUT: "60"
AEMET_RETRY_ATTEMPTS: "3"
AEMET_TIMEOUT: "90"
AEMET_RETRY_ATTEMPTS: "5"
MADRID_OPENDATA_BASE_URL: "https://datos.madrid.es"
MADRID_OPENDATA_TIMEOUT: "30"
@@ -327,4 +327,12 @@ data:
# ================================================================
NOMINATIM_PBF_URL: "http://download.geofabrik.de/europe/spain-latest.osm.pbf"
NOMINATIM_MEMORY_LIMIT: "8G"
NOMINATIM_CPU_LIMIT: "4"
NOMINATIM_CPU_LIMIT: "4"
# ================================================================
# EXTERNAL DATA SERVICE V2 SETTINGS
# ================================================================
EXTERNAL_ENABLED_CITIES: "madrid"
EXTERNAL_RETENTION_MONTHS: "6" # Reduced from 24 to avoid memory issues during init
EXTERNAL_CACHE_TTL_DAYS: "7"
EXTERNAL_REDIS_URL: "redis://redis-service:6379/0"

View File

@@ -0,0 +1,66 @@
# infrastructure/kubernetes/base/cronjobs/external-data-rotation-cronjob.yaml
# Monthly CronJob to rotate 24-month sliding window (runs 1st of month at 2am UTC)
apiVersion: batch/v1
kind: CronJob
metadata:
name: external-data-rotation
namespace: bakery-ia
labels:
app: external-service
component: data-rotation
spec:
schedule: "0 2 1 * *"
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
concurrencyPolicy: Forbid
jobTemplate:
metadata:
labels:
app: external-service
job: data-rotation
spec:
ttlSecondsAfterFinished: 172800
backoffLimit: 2
template:
metadata:
labels:
app: external-service
cronjob: data-rotation
spec:
restartPolicy: OnFailure
containers:
- name: data-rotator
image: bakery/external-service:latest
imagePullPolicy: Always
command:
- python
- -m
- app.jobs.rotate_data
args:
- "--log-level=INFO"
- "--notify-slack=true"
envFrom:
- configMapRef:
name: bakery-config
- secretRef:
name: database-secrets
- secretRef:
name: external-api-secrets
- secretRef:
name: monitoring-secrets
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"

View File

@@ -0,0 +1,68 @@
# infrastructure/kubernetes/base/jobs/external-data-init-job.yaml
# One-time job to initialize 24 months of historical data for all enabled cities
apiVersion: batch/v1
kind: Job
metadata:
name: external-data-init
namespace: bakery-ia
labels:
app: external-service
component: data-initialization
spec:
ttlSecondsAfterFinished: 86400
backoffLimit: 3
template:
metadata:
labels:
app: external-service
job: data-init
spec:
restartPolicy: OnFailure
initContainers:
- name: wait-for-db
image: postgres:15-alpine
command:
- sh
- -c
- |
until pg_isready -h $EXTERNAL_DB_HOST -p $DB_PORT -U $EXTERNAL_DB_USER; do
echo "Waiting for database..."
sleep 2
done
echo "Database is ready"
envFrom:
- configMapRef:
name: bakery-config
- secretRef:
name: database-secrets
containers:
- name: data-loader
image: bakery/external-service:latest
imagePullPolicy: Always
command:
- python
- -m
- app.jobs.initialize_data
args:
- "--months=6" # Reduced from 24 to avoid memory/rate limit issues
- "--log-level=INFO"
envFrom:
- configMapRef:
name: bakery-config
- secretRef:
name: database-secrets
- secretRef:
name: external-api-secrets
resources:
requests:
memory: "2Gi" # Increased from 1Gi
cpu: "500m"
limits:
memory: "4Gi" # Increased from 2Gi
cpu: "1000m"

View File

@@ -39,14 +39,21 @@ resources:
- jobs/demo-seed-inventory-job.yaml
- jobs/demo-seed-ai-models-job.yaml
# Demo cleanup cronjob
# External data initialization job (v2.0)
- jobs/external-data-init-job.yaml
# CronJobs
- cronjobs/demo-cleanup-cronjob.yaml
- cronjobs/external-data-rotation-cronjob.yaml
# Infrastructure components
- components/databases/redis.yaml
- components/databases/rabbitmq.yaml
- components/infrastructure/gateway-service.yaml
# Persistent storage
- components/volumes/model-storage-pvc.yaml
# Database services
- components/databases/auth-db.yaml
- components/databases/tenant-db.yaml

View File

@@ -113,7 +113,7 @@ metadata:
app.kubernetes.io/component: external-apis
type: Opaque
data:
AEMET_API_KEY: ZXlKaGJHY2lPaUpJVXpJMU5pSjkuZXlKemRXSWlPaUoxWVd4bVlYSnZRR2R0WVdsc0xtTnZiU0lzSW1wMGFTSTZJbVJqWldWbU5URXdMVGRtWXpFdE5HTXhOeTFoT0RaaUxXUTROemRsWkRjNVpEbGxOeUlzSW1semN5STZJa0ZGVFVWVUlpd2lhV0YwSWpveE56VXlPRE13TURnM0xDSjFjMlZ5U1dRaU9pSmtZMlZsWmpVeE1DMDNabU14TFRSak1UY3RZVGcyWkMxa09EYzNaV1EzT1dRNVpUY2lMQ0p5YjJ4bElqb2lJbjAuQzA0N2dhaUVoV2hINEl0RGdrSFN3ZzhIektUend3ODdUT1BUSTJSZ01mOGotMnc=
AEMET_API_KEY: ZXlKaGJHY2lPaUpJVXpJMU5pSjkuZXlKemRXSWlPaUoxWVd4bVlYSnZRR2R0WVdsc0xtTnZiU0lzSW1wMGFTSTZJakV3TjJObE9XVmlMVGxoTm1ZdE5EQmpZeTA1WWpoaUxUTTFOV05pWkRZNU5EazJOeUlzSW1semN5STZJa0ZGVFVWVUlpd2lhV0YwSWpveE56VTVPREkwT0RNekxDSjFjMlZ5U1dRaU9pSXhNRGRqWlRsbFlpMDVZVFptTFRRd1kyTXRPV0k0WWkwek5UVmpZbVEyT1RRNU5qY2lMQ0p5YjJ4bElqb2lJbjAuamtjX3hCc0pDc204ZmRVVnhESW1mb2x5UE5pazF4MTd6c1UxZEZKR09iWQ==
MADRID_OPENDATA_API_KEY: eW91ci1tYWRyaWQtb3BlbmRhdGEta2V5LWhlcmU= # your-madrid-opendata-key-here
---

View File

@@ -0,0 +1,34 @@
# infrastructure/rabbitmq/rabbitmq.conf
# RabbitMQ configuration file
# Network settings
listeners.tcp.default = 5672
management.tcp.port = 15672
# Heartbeat settings - increase to prevent timeout disconnections
heartbeat = 600
# Set the heartbeat timeout multiplier (server will close connection after 2 missed heartbeats)
heartbeat_timeout_threshold_multiplier = 2
# Memory and disk thresholds
vm_memory_high_watermark.relative = 0.6
disk_free_limit.relative = 2.0
# Default user (will be overridden by environment variables)
default_user = bakery
default_pass = forecast123
default_vhost = /
# Management plugin
management.load_definitions = /etc/rabbitmq/definitions.json
# Logging
log.console = true
log.console.level = info
log.file = false
# Queue settings
queue_master_locator = min-masters
# Connection settings
connection.max_channels_per_connection = 100

View File

@@ -5,6 +5,11 @@
listeners.tcp.default = 5672
management.tcp.port = 15672
# Heartbeat settings - increase to prevent timeout disconnections
heartbeat = 600
# Set the heartbeat timeout multiplier (server will close connection after 2 missed heartbeats)
heartbeat_timeout_threshold_multiplier = 2
# Memory and disk thresholds
vm_memory_high_watermark.relative = 0.6
disk_free_limit.relative = 2.0
@@ -23,4 +28,7 @@ log.console.level = info
log.file = false
# Queue settings
queue_master_locator = min-masters
queue_master_locator = min-masters
# Connection settings
connection.max_channels_per_connection = 100