61 lines
2.0 KiB
Python
61 lines
2.0 KiB
Python
"""Add horizontal scaling constraints for multi-pod deployment
|
|
|
|
Revision ID: add_horizontal_scaling
|
|
Revises: 26a665cd5348
|
|
Create Date: 2025-01-18
|
|
|
|
This migration adds database-level constraints to prevent race conditions
|
|
when running multiple training service pods:
|
|
|
|
1. Partial unique index on model_training_logs to prevent duplicate active jobs per tenant
|
|
2. Index to speed up active job lookups
|
|
"""
|
|
from typing import Sequence, Union
|
|
|
|
from alembic import op
|
|
import sqlalchemy as sa
|
|
|
|
|
|
# revision identifiers, used by Alembic.
|
|
revision: str = 'add_horizontal_scaling'
|
|
down_revision: Union[str, None] = '26a665cd5348'
|
|
branch_labels: Union[str, Sequence[str], None] = None
|
|
depends_on: Union[str, Sequence[str], None] = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
# Add partial unique index to prevent duplicate active training jobs per tenant
|
|
# This ensures only ONE job can be in 'pending' or 'running' status per tenant at a time
|
|
# The constraint is enforced at the database level, preventing race conditions
|
|
# between multiple pods checking and creating jobs simultaneously
|
|
op.execute("""
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_unique_active_training_per_tenant
|
|
ON model_training_logs (tenant_id)
|
|
WHERE status IN ('pending', 'running')
|
|
""")
|
|
|
|
# Add index to speed up active job lookups (used by deduplication check)
|
|
op.create_index(
|
|
'idx_training_logs_tenant_status',
|
|
'model_training_logs',
|
|
['tenant_id', 'status'],
|
|
unique=False,
|
|
if_not_exists=True
|
|
)
|
|
|
|
# Add index for job recovery queries (find stale running jobs)
|
|
op.create_index(
|
|
'idx_training_logs_status_updated',
|
|
'model_training_logs',
|
|
['status', 'updated_at'],
|
|
unique=False,
|
|
if_not_exists=True
|
|
)
|
|
|
|
|
|
def downgrade() -> None:
|
|
# Remove the indexes in reverse order
|
|
op.execute("DROP INDEX IF EXISTS idx_training_logs_status_updated")
|
|
op.execute("DROP INDEX IF EXISTS idx_training_logs_tenant_status")
|
|
op.execute("DROP INDEX IF EXISTS idx_unique_active_training_per_tenant")
|