Files
bakery-ia/shared/database/init_manager.py

381 lines
14 KiB
Python
Executable File

"""
Database Initialization Manager
Handles Alembic-based migrations with autogenerate support:
1. First-time deployment: Generate initial migration from models
2. Subsequent deployments: Run pending migrations
3. Development reset: Drop tables and regenerate migrations
"""
import os
import asyncio
import structlog
from typing import Optional, List, Dict, Any
from pathlib import Path
from sqlalchemy import text, inspect
from sqlalchemy.ext.asyncio import AsyncSession
from alembic.config import Config
from alembic import command
from alembic.runtime.migration import MigrationContext
from alembic.script import ScriptDirectory
from .base import DatabaseManager, Base
logger = structlog.get_logger()
class DatabaseInitManager:
"""
Manages database initialization using Alembic migrations exclusively.
Two modes:
1. Migration mode (for migration jobs): Runs alembic upgrade head
2. Verification mode (for services): Only verifies database is ready
"""
def __init__(
self,
database_manager: DatabaseManager,
service_name: str,
alembic_ini_path: Optional[str] = None,
models_module: Optional[str] = None,
verify_only: bool = True, # Default: services only verify
force_recreate: bool = False
):
self.database_manager = database_manager
self.service_name = service_name
self.alembic_ini_path = alembic_ini_path
self.models_module = models_module
self.verify_only = verify_only
self.force_recreate = force_recreate
self.logger = logger.bind(service=service_name)
async def initialize_database(self) -> Dict[str, Any]:
"""
Main initialization method.
Two modes:
1. verify_only=True (default, for services):
- Verifies database is ready
- Checks tables exist
- Checks alembic_version exists
- DOES NOT run migrations
2. verify_only=False (for migration jobs only):
- Runs alembic upgrade head
- Applies pending migrations
- Can force recreate if needed
"""
if self.verify_only:
self.logger.info("Database verification mode - checking database is ready")
return await self._verify_database_ready()
else:
self.logger.info("Migration mode - running database migrations")
return await self._run_migrations_mode()
async def _verify_database_ready(self) -> Dict[str, Any]:
"""
Verify database is ready for service startup.
Services should NOT run migrations - only verify they've been applied.
"""
try:
# Check alembic configuration exists
if not self.alembic_ini_path or not os.path.exists(self.alembic_ini_path):
raise Exception(f"Alembic configuration not found at {self.alembic_ini_path}")
# Check database state
db_state = await self._check_database_state()
self.logger.info("Database state checked", state=db_state)
# Verify migrations exist
if not db_state["has_migrations"]:
raise Exception(
f"No migration files found for {self.service_name}. "
f"Migrations must be generated and included in the Docker image."
)
# Verify database is not empty
if db_state["is_empty"]:
raise Exception(
f"Database is empty. Migration job must run before service startup. "
f"Ensure migration job completes successfully before starting services."
)
# Verify alembic_version table exists
if not db_state["has_alembic_version"]:
raise Exception(
f"No alembic_version table found. Migration job must run before service startup."
)
# Verify current revision exists
if not db_state["current_revision"]:
raise Exception(
f"No current migration revision found. Database may not be properly initialized."
)
self.logger.info(
"Database verification successful",
migration_count=db_state["migration_count"],
current_revision=db_state["current_revision"],
table_count=len(db_state["existing_tables"])
)
return {
"action": "verified",
"message": "Database verified successfully - ready for service",
"current_revision": db_state["current_revision"],
"migration_count": db_state["migration_count"],
"table_count": len(db_state["existing_tables"])
}
except Exception as e:
self.logger.error("Database verification failed", error=str(e))
raise
async def _run_migrations_mode(self) -> Dict[str, Any]:
"""
Run migrations mode - for migration jobs only.
"""
try:
if not self.alembic_ini_path or not os.path.exists(self.alembic_ini_path):
raise Exception(f"Alembic configuration not found at {self.alembic_ini_path}")
# Check current database state
db_state = await self._check_database_state()
self.logger.info("Database state checked", state=db_state)
# Handle force recreate
if self.force_recreate:
return await self._handle_force_recreate()
# Check migrations exist
if not db_state["has_migrations"]:
raise Exception(
f"No migration files found for {self.service_name}. "
f"Generate migrations using regenerate_migrations_k8s.sh script."
)
# Run migrations
result = await self._handle_run_migrations()
self.logger.info("Migration mode completed", result=result)
return result
except Exception as e:
self.logger.error("Migration mode failed", error=str(e))
raise
async def _check_database_state(self) -> Dict[str, Any]:
"""Check the current state of migrations"""
state = {
"has_migrations": False,
"migration_count": 0,
"is_empty": False,
"existing_tables": [],
"has_alembic_version": False,
"current_revision": None
}
try:
# Check if migration files exist
migrations_dir = self._get_migrations_versions_dir()
if migrations_dir.exists():
migration_files = list(migrations_dir.glob("*.py"))
migration_files = [f for f in migration_files if f.name != "__pycache__" and not f.name.startswith("_")]
state["migration_count"] = len(migration_files)
state["has_migrations"] = len(migration_files) > 0
self.logger.info("Found migration files", count=len(migration_files))
# Check database tables
async with self.database_manager.get_session() as session:
existing_tables = await self._get_existing_tables(session)
state["existing_tables"] = existing_tables
state["is_empty"] = len(existing_tables) == 0
# Check alembic_version table
if "alembic_version" in existing_tables:
state["has_alembic_version"] = True
result = await session.execute(text("SELECT version_num FROM alembic_version"))
version = result.scalar()
state["current_revision"] = version
except Exception as e:
self.logger.warning("Error checking database state", error=str(e))
return state
async def _handle_run_migrations(self) -> Dict[str, Any]:
"""Handle normal migration scenario - run pending migrations"""
self.logger.info("Running pending migrations")
try:
await self._run_migrations()
return {
"action": "migrations_applied",
"message": "Pending migrations applied successfully"
}
except Exception as e:
self.logger.error("Failed to run migrations", error=str(e))
raise
async def _handle_force_recreate(self) -> Dict[str, Any]:
"""Handle development reset scenario - drop and recreate tables using existing migrations"""
self.logger.info("Force recreate: dropping tables and rerunning migrations")
try:
# Drop all tables
await self._drop_all_tables()
# Apply migrations from scratch
await self._run_migrations()
return {
"action": "force_recreate",
"tables_dropped": True,
"migrations_applied": True,
"message": "Database recreated from existing migrations"
}
except Exception as e:
self.logger.error("Failed to force recreate", error=str(e))
raise
async def _run_migrations(self):
"""Run pending Alembic migrations (upgrade head)"""
try:
def run_alembic_upgrade():
import os
from pathlib import Path
# Ensure we're in the correct working directory
alembic_dir = Path(self.alembic_ini_path).parent
original_cwd = os.getcwd()
try:
os.chdir(alembic_dir)
alembic_cfg = Config(self.alembic_ini_path)
# Set the SQLAlchemy URL from the database manager
alembic_cfg.set_main_option("sqlalchemy.url", str(self.database_manager.database_url))
# Run upgrade
command.upgrade(alembic_cfg, "head")
finally:
os.chdir(original_cwd)
# Run in executor to avoid blocking
await asyncio.get_event_loop().run_in_executor(None, run_alembic_upgrade)
self.logger.info("Migrations applied successfully")
except Exception as e:
self.logger.error("Failed to run migrations", error=str(e))
raise
async def _drop_all_tables(self):
"""Drop all tables (for development reset)"""
try:
async with self.database_manager.async_engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
self.logger.info("All tables dropped")
except Exception as e:
self.logger.error("Failed to drop tables", error=str(e))
raise
def _get_migrations_versions_dir(self) -> Path:
"""Get the migrations/versions directory path"""
alembic_path = Path(self.alembic_ini_path).parent
return alembic_path / "migrations" / "versions"
async def _get_existing_tables(self, session: AsyncSession) -> List[str]:
"""Get list of existing tables in the database"""
def get_tables_sync(connection):
insp = inspect(connection)
return insp.get_table_names()
connection = await session.connection()
return await connection.run_sync(get_tables_sync)
def create_init_manager(
database_manager: DatabaseManager,
service_name: str,
service_path: Optional[str] = None,
verify_only: bool = True,
force_recreate: bool = False
) -> DatabaseInitManager:
"""
Factory function to create a DatabaseInitManager with auto-detected paths
Args:
database_manager: DatabaseManager instance
service_name: Name of the service
service_path: Path to service directory (auto-detected if None)
verify_only: True = verify DB ready (services), False = run migrations (jobs only)
force_recreate: Force recreate tables (requires verify_only=False)
"""
# Auto-detect paths if not provided
if service_path is None:
# Try Docker container path first (service files at root level)
if os.path.exists("alembic.ini"):
service_path = "."
else:
# Fallback to development path
service_path = f"services/{service_name}"
# Set up paths based on environment
if service_path == ".":
# Docker container environment
alembic_ini_path = "alembic.ini"
models_module = "app.models"
else:
# Development environment
alembic_ini_path = f"{service_path}/alembic.ini"
models_module = f"services.{service_name}.app.models"
# Check if paths exist
if not os.path.exists(alembic_ini_path):
logger.warning("Alembic config not found", path=alembic_ini_path)
alembic_ini_path = None
return DatabaseInitManager(
database_manager=database_manager,
service_name=service_name,
alembic_ini_path=alembic_ini_path,
models_module=models_module,
verify_only=verify_only,
force_recreate=force_recreate
)
async def initialize_service_database(
database_manager: DatabaseManager,
service_name: str,
verify_only: bool = True,
force_recreate: bool = False
) -> Dict[str, Any]:
"""
Convenience function for database initialization
Args:
database_manager: DatabaseManager instance
service_name: Name of the service
verify_only: True = verify DB ready (default, services), False = run migrations (jobs only)
force_recreate: Force recreate tables (requires verify_only=False)
Returns:
Dict with initialization results
"""
init_manager = create_init_manager(
database_manager=database_manager,
service_name=service_name,
verify_only=verify_only,
force_recreate=force_recreate
)
return await init_manager.initialize_database()