apiVersion: apps/v1 kind: Deployment metadata: name: training-service namespace: bakery-ia labels: app.kubernetes.io/name: training-service app.kubernetes.io/component: microservice app.kubernetes.io/part-of: bakery-ia spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: training-service app.kubernetes.io/component: microservice template: metadata: labels: app.kubernetes.io/name: training-service app.kubernetes.io/component: microservice spec: imagePullSecrets: - name: dockerhub-creds initContainers: # Wait for Redis to be ready - name: wait-for-redis image: redis:7.4-alpine command: - sh - -c - | echo "Waiting for Redis to be ready..." until redis-cli -h $REDIS_HOST -p $REDIS_PORT --tls --cert /tls/redis-cert.pem --key /tls/redis-key.pem --cacert /tls/ca-cert.pem -a "$REDIS_PASSWORD" ping | grep -q PONG; do echo "Redis not ready yet, waiting..." sleep 2 done echo "Redis is ready!" env: - name: REDIS_HOST valueFrom: configMapKeyRef: name: bakery-config key: REDIS_HOST - name: REDIS_PORT valueFrom: configMapKeyRef: name: bakery-config key: REDIS_PORT - name: REDIS_PASSWORD valueFrom: secretKeyRef: name: redis-secrets key: REDIS_PASSWORD volumeMounts: - name: redis-tls mountPath: /tls readOnly: true - name: wait-for-migration image: postgres:17-alpine command: - sh - -c - | echo "Waiting for training database and migrations to be ready..." # Wait for database to be accessible until pg_isready -h $TRAINING_DB_HOST -p $TRAINING_DB_PORT -U $TRAINING_DB_USER; do echo "Database not ready yet, waiting..." sleep 2 done echo "Database is ready!" # Give migrations extra time to complete after DB is ready echo "Waiting for migrations to complete..." sleep 10 echo "Ready to start service" env: - name: TRAINING_DB_HOST valueFrom: configMapKeyRef: name: bakery-config key: TRAINING_DB_HOST - name: TRAINING_DB_PORT valueFrom: configMapKeyRef: name: bakery-config key: DB_PORT - name: TRAINING_DB_USER valueFrom: secretKeyRef: name: database-secrets key: TRAINING_DB_USER containers: - name: training-service image: bakery/training-service:79c869aaa529b2aaf2bbe77d2a2506e3ebdaf2abac3c83505ddfad29f3dbf99e ports: - containerPort: 8000 name: http env: # OpenTelemetry Configuration - name: OTEL_COLLECTOR_ENDPOINT value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318" - name: OTEL_EXPORTER_OTLP_ENDPOINT value: "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318" - name: OTEL_SERVICE_NAME value: "training-service" - name: ENABLE_TRACING value: "true" # Logging Configuration - name: OTEL_LOGS_EXPORTER value: "otlp" - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED value: "true" # Metrics Configuration - name: ENABLE_OTEL_METRICS value: "true" - name: ENABLE_SYSTEM_METRICS value: "true" envFrom: - configMapRef: name: bakery-config - secretRef: name: database-secrets - secretRef: name: redis-secrets - secretRef: name: rabbitmq-secrets - secretRef: name: jwt-secrets - secretRef: name: external-api-secrets - secretRef: name: payment-secrets - secretRef: name: email-secrets - secretRef: name: monitoring-secrets - secretRef: name: pos-integration-secrets - secretRef: name: whatsapp-secrets volumeMounts: - name: tmp-storage mountPath: /tmp - name: model-storage mountPath: /app/models resources: requests: memory: "512Mi" cpu: "200m" limits: memory: "4Gi" cpu: "2000m" livenessProbe: httpGet: path: /health/live port: 8000 initialDelaySeconds: 60 timeoutSeconds: 30 periodSeconds: 30 failureThreshold: 5 readinessProbe: httpGet: path: /health/ready port: 8000 initialDelaySeconds: 30 timeoutSeconds: 15 periodSeconds: 15 failureThreshold: 5 volumes: - name: redis-tls secret: secretName: redis-tls-secret defaultMode: 0400 - name: tmp-storage emptyDir: sizeLimit: 4Gi # Increased from 2Gi to handle cmdstan temp files during optimization - name: model-storage persistentVolumeClaim: claimName: model-storage --- apiVersion: v1 kind: Service metadata: name: training-service namespace: bakery-ia labels: app.kubernetes.io/name: training-service app.kubernetes.io/component: microservice spec: type: ClusterIP ports: - port: 8000 targetPort: 8000 protocol: TCP name: http selector: app.kubernetes.io/name: training-service app.kubernetes.io/component: microservice