Add ci/cd and fix multiple pods issues

This commit is contained in:
Urtzi Alfaro
2026-01-18 09:02:27 +01:00
parent 3c4b5c2a06
commit 21d35ea92b
27 changed files with 3779 additions and 73 deletions

View File

@@ -1,10 +1,16 @@
"""
Distributed Locking Mechanisms
Prevents concurrent training jobs for the same product
HORIZONTAL SCALING FIX:
- Uses SHA256 for stable hash across all Python processes/pods
- Python's built-in hash() varies between processes due to hash randomization (Python 3.3+)
- This ensures all pods compute the same lock ID for the same lock name
"""
import asyncio
import time
import hashlib
from typing import Optional
import logging
from contextlib import asynccontextmanager
@@ -39,9 +45,20 @@ class DatabaseLock:
self.lock_id = self._hash_lock_name(lock_name)
def _hash_lock_name(self, name: str) -> int:
"""Convert lock name to integer ID for PostgreSQL advisory lock"""
# Use hash and modulo to get a positive 32-bit integer
return abs(hash(name)) % (2**31)
"""
Convert lock name to integer ID for PostgreSQL advisory lock.
CRITICAL: Uses SHA256 for stable hash across all Python processes/pods.
Python's built-in hash() varies between processes due to hash randomization
(PYTHONHASHSEED, enabled by default since Python 3.3), which would cause
different pods to compute different lock IDs for the same lock name,
defeating the purpose of distributed locking.
"""
# Use SHA256 for stable, cross-process hash
hash_bytes = hashlib.sha256(name.encode('utf-8')).digest()
# Take first 4 bytes and convert to positive 31-bit integer
# (PostgreSQL advisory locks use bigint, but we use 31-bit for safety)
return int.from_bytes(hash_bytes[:4], 'big') % (2**31)
@asynccontextmanager
async def acquire(self, session: AsyncSession):