"""Add horizontal scaling constraints for multi-pod deployment Revision ID: add_horizontal_scaling Revises: 26a665cd5348 Create Date: 2025-01-18 This migration adds database-level constraints to prevent race conditions when running multiple training service pods: 1. Partial unique index on model_training_logs to prevent duplicate active jobs per tenant 2. Index to speed up active job lookups """ from typing import Sequence, Union from alembic import op import sqlalchemy as sa # revision identifiers, used by Alembic. revision: str = 'add_horizontal_scaling' down_revision: Union[str, None] = '26a665cd5348' branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: # Add partial unique index to prevent duplicate active training jobs per tenant # This ensures only ONE job can be in 'pending' or 'running' status per tenant at a time # The constraint is enforced at the database level, preventing race conditions # between multiple pods checking and creating jobs simultaneously op.execute(""" CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_active_training_per_tenant ON model_training_logs (tenant_id) WHERE status IN ('pending', 'running') """) # Add index to speed up active job lookups (used by deduplication check) op.create_index( 'idx_training_logs_tenant_status', 'model_training_logs', ['tenant_id', 'status'], unique=False, if_not_exists=True ) # Add index for job recovery queries (find stale running jobs) op.create_index( 'idx_training_logs_status_updated', 'model_training_logs', ['status', 'updated_at'], unique=False, if_not_exists=True ) def downgrade() -> None: # Remove the indexes in reverse order op.execute("DROP INDEX IF EXISTS idx_training_logs_status_updated") op.execute("DROP INDEX IF EXISTS idx_training_logs_tenant_status") op.execute("DROP INDEX IF EXISTS idx_unique_active_training_per_tenant")