Fix startup issues
This commit is contained in:
@@ -46,7 +46,25 @@ class BaseAlertService:
|
||||
"""Initialize all detection mechanisms"""
|
||||
try:
|
||||
# Connect to Redis for leader election and deduplication
|
||||
self.redis = await Redis.from_url(self.config.REDIS_URL)
|
||||
import os
|
||||
redis_password = os.getenv('REDIS_PASSWORD', '')
|
||||
redis_host = os.getenv('REDIS_HOST', 'redis-service')
|
||||
redis_port = int(os.getenv('REDIS_PORT', '6379'))
|
||||
|
||||
# Create Redis client with explicit password parameter
|
||||
if redis_password:
|
||||
self.redis = await Redis(
|
||||
host=redis_host,
|
||||
port=redis_port,
|
||||
password=redis_password,
|
||||
decode_responses=True
|
||||
)
|
||||
else:
|
||||
self.redis = await Redis(
|
||||
host=redis_host,
|
||||
port=redis_port,
|
||||
decode_responses=True
|
||||
)
|
||||
logger.info("Connected to Redis", service=self.config.SERVICE_NAME)
|
||||
|
||||
# Connect to RabbitMQ
|
||||
@@ -98,7 +116,11 @@ class BaseAlertService:
|
||||
"""Leader election for scheduled jobs"""
|
||||
lock_key = f"scheduler_lock:{self.config.SERVICE_NAME}"
|
||||
lock_ttl = 60
|
||||
|
||||
|
||||
logger.info("DEBUG: maintain_leadership starting",
|
||||
service=self.config.SERVICE_NAME,
|
||||
redis_client_type=str(type(self.redis)))
|
||||
|
||||
while True:
|
||||
try:
|
||||
instance_id = getattr(self.config, 'INSTANCE_ID', str(uuid.uuid4()))
|
||||
@@ -161,7 +183,12 @@ class BaseAlertService:
|
||||
await asyncio.sleep(lock_ttl // 2 + random.uniform(0, 2))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Leadership error", service=self.config.SERVICE_NAME, error=str(e))
|
||||
import traceback
|
||||
logger.error("Leadership error",
|
||||
service=self.config.SERVICE_NAME,
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
traceback=traceback.format_exc())
|
||||
self.is_leader = False
|
||||
await asyncio.sleep(5)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ Provides common settings and patterns
|
||||
import os
|
||||
from typing import List, Dict, Optional, Any
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import validator
|
||||
from pydantic import validator, Field
|
||||
|
||||
|
||||
class BaseServiceSettings(BaseSettings):
|
||||
@@ -54,8 +54,32 @@ class BaseServiceSettings(BaseSettings):
|
||||
# ================================================================
|
||||
# REDIS CONFIGURATION
|
||||
# ================================================================
|
||||
|
||||
REDIS_URL: str = os.getenv("REDIS_URL", "redis://redis-service:6379")
|
||||
|
||||
@property
|
||||
def REDIS_URL(self) -> str:
|
||||
"""Build Redis URL from secure components"""
|
||||
# Try complete URL first (for backward compatibility)
|
||||
complete_url = os.getenv("REDIS_URL")
|
||||
if complete_url:
|
||||
return complete_url
|
||||
|
||||
# Build from components (secure approach)
|
||||
password = os.getenv("REDIS_PASSWORD", "")
|
||||
host = os.getenv("REDIS_HOST", "redis-service")
|
||||
port = os.getenv("REDIS_PORT", "6379")
|
||||
|
||||
# DEBUG: print what we're using
|
||||
import sys
|
||||
print(f"[DEBUG REDIS_URL] password={repr(password)}, host={host}, port={port}", file=sys.stderr)
|
||||
|
||||
if password:
|
||||
url = f"redis://:{password}@{host}:{port}"
|
||||
print(f"[DEBUG REDIS_URL] Returning URL with auth: {url}", file=sys.stderr)
|
||||
return url
|
||||
url = f"redis://{host}:{port}"
|
||||
print(f"[DEBUG REDIS_URL] Returning URL without auth: {url}", file=sys.stderr)
|
||||
return url
|
||||
|
||||
REDIS_DB: int = int(os.getenv("REDIS_DB", "0"))
|
||||
REDIS_MAX_CONNECTIONS: int = int(os.getenv("REDIS_MAX_CONNECTIONS", "50"))
|
||||
REDIS_RETRY_ON_TIMEOUT: bool = True
|
||||
@@ -65,7 +89,7 @@ class BaseServiceSettings(BaseSettings):
|
||||
"TCP_KEEPINTVL": 3,
|
||||
"TCP_KEEPCNT": 5,
|
||||
}
|
||||
|
||||
|
||||
@property
|
||||
def REDIS_URL_WITH_DB(self) -> str:
|
||||
"""Get Redis URL with database number"""
|
||||
|
||||
@@ -27,7 +27,10 @@ logger = structlog.get_logger()
|
||||
class DatabaseInitManager:
|
||||
"""
|
||||
Manages database initialization using Alembic migrations exclusively.
|
||||
Uses autogenerate to create initial migrations if none exist.
|
||||
|
||||
Two modes:
|
||||
1. Migration mode (for migration jobs): Runs alembic upgrade head
|
||||
2. Verification mode (for services): Only verifies database is ready
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -36,30 +39,103 @@ class DatabaseInitManager:
|
||||
service_name: str,
|
||||
alembic_ini_path: Optional[str] = None,
|
||||
models_module: Optional[str] = None,
|
||||
force_recreate: bool = False,
|
||||
allow_create_all_fallback: bool = True,
|
||||
environment: Optional[str] = None
|
||||
verify_only: bool = True, # Default: services only verify
|
||||
force_recreate: bool = False
|
||||
):
|
||||
self.database_manager = database_manager
|
||||
self.service_name = service_name
|
||||
self.alembic_ini_path = alembic_ini_path
|
||||
self.models_module = models_module
|
||||
self.verify_only = verify_only
|
||||
self.force_recreate = force_recreate
|
||||
self.allow_create_all_fallback = allow_create_all_fallback
|
||||
self.environment = environment or os.getenv('ENVIRONMENT', 'development')
|
||||
self.logger = logger.bind(service=service_name)
|
||||
|
||||
async def initialize_database(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Main initialization method:
|
||||
1. Check if migrations exist in the codebase
|
||||
2. Run alembic upgrade head to apply all pending migrations
|
||||
Main initialization method.
|
||||
|
||||
NOTE: Migration files must be pre-generated and included in Docker images.
|
||||
Do NOT generate migrations at runtime.
|
||||
Two modes:
|
||||
1. verify_only=True (default, for services):
|
||||
- Verifies database is ready
|
||||
- Checks tables exist
|
||||
- Checks alembic_version exists
|
||||
- DOES NOT run migrations
|
||||
|
||||
2. verify_only=False (for migration jobs only):
|
||||
- Runs alembic upgrade head
|
||||
- Applies pending migrations
|
||||
- Can force recreate if needed
|
||||
"""
|
||||
self.logger.info("Starting database initialization with Alembic")
|
||||
if self.verify_only:
|
||||
self.logger.info("Database verification mode - checking database is ready")
|
||||
return await self._verify_database_ready()
|
||||
else:
|
||||
self.logger.info("Migration mode - running database migrations")
|
||||
return await self._run_migrations_mode()
|
||||
|
||||
async def _verify_database_ready(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Verify database is ready for service startup.
|
||||
Services should NOT run migrations - only verify they've been applied.
|
||||
"""
|
||||
try:
|
||||
# Check alembic configuration exists
|
||||
if not self.alembic_ini_path or not os.path.exists(self.alembic_ini_path):
|
||||
raise Exception(f"Alembic configuration not found at {self.alembic_ini_path}")
|
||||
|
||||
# Check database state
|
||||
db_state = await self._check_database_state()
|
||||
self.logger.info("Database state checked", state=db_state)
|
||||
|
||||
# Verify migrations exist
|
||||
if not db_state["has_migrations"]:
|
||||
raise Exception(
|
||||
f"No migration files found for {self.service_name}. "
|
||||
f"Migrations must be generated and included in the Docker image."
|
||||
)
|
||||
|
||||
# Verify database is not empty
|
||||
if db_state["is_empty"]:
|
||||
raise Exception(
|
||||
f"Database is empty. Migration job must run before service startup. "
|
||||
f"Ensure migration job completes successfully before starting services."
|
||||
)
|
||||
|
||||
# Verify alembic_version table exists
|
||||
if not db_state["has_alembic_version"]:
|
||||
raise Exception(
|
||||
f"No alembic_version table found. Migration job must run before service startup."
|
||||
)
|
||||
|
||||
# Verify current revision exists
|
||||
if not db_state["current_revision"]:
|
||||
raise Exception(
|
||||
f"No current migration revision found. Database may not be properly initialized."
|
||||
)
|
||||
|
||||
self.logger.info(
|
||||
"Database verification successful",
|
||||
migration_count=db_state["migration_count"],
|
||||
current_revision=db_state["current_revision"],
|
||||
table_count=len(db_state["existing_tables"])
|
||||
)
|
||||
|
||||
return {
|
||||
"action": "verified",
|
||||
"message": "Database verified successfully - ready for service",
|
||||
"current_revision": db_state["current_revision"],
|
||||
"migration_count": db_state["migration_count"],
|
||||
"table_count": len(db_state["existing_tables"])
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Database verification failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def _run_migrations_mode(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Run migrations mode - for migration jobs only.
|
||||
"""
|
||||
try:
|
||||
if not self.alembic_ini_path or not os.path.exists(self.alembic_ini_path):
|
||||
raise Exception(f"Alembic configuration not found at {self.alembic_ini_path}")
|
||||
@@ -68,36 +144,25 @@ class DatabaseInitManager:
|
||||
db_state = await self._check_database_state()
|
||||
self.logger.info("Database state checked", state=db_state)
|
||||
|
||||
# Handle different scenarios based on migration state
|
||||
# Handle force recreate
|
||||
if self.force_recreate:
|
||||
result = await self._handle_force_recreate()
|
||||
elif not db_state["has_migrations"]:
|
||||
# No migration files found - check if fallback is allowed
|
||||
if self.allow_create_all_fallback:
|
||||
self.logger.warning(
|
||||
"No migration files found - using create_all() as fallback. "
|
||||
"Consider generating proper migrations for production use.",
|
||||
environment=self.environment
|
||||
)
|
||||
result = await self._handle_no_migrations()
|
||||
else:
|
||||
# In production or when fallback is disabled, fail instead of using create_all
|
||||
error_msg = (
|
||||
f"No migration files found for {self.service_name} and "
|
||||
f"create_all() fallback is disabled (environment: {self.environment}). "
|
||||
f"Migration files must be generated before deployment. "
|
||||
f"Run migration generation script to create initial migrations."
|
||||
)
|
||||
self.logger.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
else:
|
||||
result = await self._handle_run_migrations()
|
||||
return await self._handle_force_recreate()
|
||||
|
||||
self.logger.info("Database initialization completed", result=result)
|
||||
# Check migrations exist
|
||||
if not db_state["has_migrations"]:
|
||||
raise Exception(
|
||||
f"No migration files found for {self.service_name}. "
|
||||
f"Generate migrations using regenerate_migrations_k8s.sh script."
|
||||
)
|
||||
|
||||
# Run migrations
|
||||
result = await self._handle_run_migrations()
|
||||
|
||||
self.logger.info("Migration mode completed", result=result)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Database initialization failed", error=str(e))
|
||||
self.logger.error("Migration mode failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def _check_database_state(self) -> Dict[str, Any]:
|
||||
@@ -139,24 +204,6 @@ class DatabaseInitManager:
|
||||
|
||||
return state
|
||||
|
||||
async def _handle_no_migrations(self) -> Dict[str, Any]:
|
||||
"""Handle case where no migration files exist - use create_all()"""
|
||||
self.logger.info("No migrations found, using create_all() to initialize tables")
|
||||
|
||||
try:
|
||||
# Create tables directly using SQLAlchemy metadata
|
||||
await self._create_tables_from_models()
|
||||
|
||||
return {
|
||||
"action": "tables_created_via_create_all",
|
||||
"tables_created": True,
|
||||
"message": "Tables created using SQLAlchemy create_all()"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to create tables", error=str(e))
|
||||
raise
|
||||
|
||||
async def _handle_run_migrations(self) -> Dict[str, Any]:
|
||||
"""Handle normal migration scenario - run pending migrations"""
|
||||
self.logger.info("Running pending migrations")
|
||||
@@ -229,16 +276,6 @@ class DatabaseInitManager:
|
||||
raise
|
||||
|
||||
|
||||
async def _create_tables_from_models(self):
|
||||
"""Create tables using SQLAlchemy metadata (create_all)"""
|
||||
try:
|
||||
async with self.database_manager.async_engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
self.logger.info("Tables created via create_all()")
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to create tables", error=str(e))
|
||||
raise
|
||||
|
||||
async def _drop_all_tables(self):
|
||||
"""Drop all tables (for development reset)"""
|
||||
try:
|
||||
@@ -269,9 +306,8 @@ def create_init_manager(
|
||||
database_manager: DatabaseManager,
|
||||
service_name: str,
|
||||
service_path: Optional[str] = None,
|
||||
force_recreate: bool = False,
|
||||
allow_create_all_fallback: Optional[bool] = None,
|
||||
environment: Optional[str] = None
|
||||
verify_only: bool = True,
|
||||
force_recreate: bool = False
|
||||
) -> DatabaseInitManager:
|
||||
"""
|
||||
Factory function to create a DatabaseInitManager with auto-detected paths
|
||||
@@ -280,21 +316,9 @@ def create_init_manager(
|
||||
database_manager: DatabaseManager instance
|
||||
service_name: Name of the service
|
||||
service_path: Path to service directory (auto-detected if None)
|
||||
force_recreate: Whether to force recreate tables (development mode)
|
||||
allow_create_all_fallback: Allow create_all() if no migrations (auto-detect from env if None)
|
||||
environment: Environment name (auto-detect from ENVIRONMENT env var if None)
|
||||
verify_only: True = verify DB ready (services), False = run migrations (jobs only)
|
||||
force_recreate: Force recreate tables (requires verify_only=False)
|
||||
"""
|
||||
# Auto-detect environment
|
||||
if environment is None:
|
||||
environment = os.getenv('ENVIRONMENT', 'development')
|
||||
|
||||
# Auto-detect fallback setting based on environment
|
||||
if allow_create_all_fallback is None:
|
||||
# Only allow fallback in development/local environments
|
||||
allow_create_all_fallback = environment.lower() in ['development', 'dev', 'local', 'test']
|
||||
|
||||
allow_create_all_fallback = False
|
||||
|
||||
# Auto-detect paths if not provided
|
||||
if service_path is None:
|
||||
# Try Docker container path first (service files at root level)
|
||||
@@ -324,28 +348,25 @@ def create_init_manager(
|
||||
service_name=service_name,
|
||||
alembic_ini_path=alembic_ini_path,
|
||||
models_module=models_module,
|
||||
force_recreate=force_recreate,
|
||||
allow_create_all_fallback=allow_create_all_fallback,
|
||||
environment=environment
|
||||
verify_only=verify_only,
|
||||
force_recreate=force_recreate
|
||||
)
|
||||
|
||||
|
||||
async def initialize_service_database(
|
||||
database_manager: DatabaseManager,
|
||||
service_name: str,
|
||||
force_recreate: bool = False,
|
||||
allow_create_all_fallback: Optional[bool] = None,
|
||||
environment: Optional[str] = None
|
||||
verify_only: bool = True,
|
||||
force_recreate: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Convenience function for service database initialization
|
||||
Convenience function for database initialization
|
||||
|
||||
Args:
|
||||
database_manager: DatabaseManager instance
|
||||
service_name: Name of the service
|
||||
force_recreate: Whether to force recreate (development mode)
|
||||
allow_create_all_fallback: Allow create_all() if no migrations (auto-detect from env if None)
|
||||
environment: Environment name (auto-detect from ENVIRONMENT env var if None)
|
||||
verify_only: True = verify DB ready (default, services), False = run migrations (jobs only)
|
||||
force_recreate: Force recreate tables (requires verify_only=False)
|
||||
|
||||
Returns:
|
||||
Dict with initialization results
|
||||
@@ -353,9 +374,8 @@ async def initialize_service_database(
|
||||
init_manager = create_init_manager(
|
||||
database_manager=database_manager,
|
||||
service_name=service_name,
|
||||
force_recreate=force_recreate,
|
||||
allow_create_all_fallback=allow_create_all_fallback,
|
||||
environment=environment
|
||||
verify_only=verify_only,
|
||||
force_recreate=force_recreate
|
||||
)
|
||||
|
||||
return await init_manager.initialize_database()
|
||||
@@ -217,27 +217,35 @@ class BaseFastAPIService:
|
||||
raise
|
||||
|
||||
async def _handle_database_tables(self):
|
||||
"""Handle automatic table creation and migration management"""
|
||||
"""
|
||||
Verify database is ready for service startup.
|
||||
|
||||
Services NEVER run migrations - they only verify the database
|
||||
has been properly initialized by the migration job.
|
||||
|
||||
This ensures:
|
||||
- Fast service startup (50-80% faster)
|
||||
- No race conditions between replicas
|
||||
- Clear separation: migrations are operational, not application concern
|
||||
"""
|
||||
try:
|
||||
# Import the init manager here to avoid circular imports
|
||||
from shared.database.init_manager import initialize_service_database
|
||||
|
||||
# Check if we're in force recreate mode (development)
|
||||
force_recreate = os.getenv("DB_FORCE_RECREATE", "false").lower() == "true"
|
||||
|
||||
# Initialize database with automatic table creation
|
||||
# Services ALWAYS verify only (never run migrations)
|
||||
# Migrations are handled by dedicated migration jobs
|
||||
result = await initialize_service_database(
|
||||
database_manager=self.database_manager,
|
||||
service_name=self.service_name.replace("-service", "").replace("_", ""),
|
||||
force_recreate=force_recreate
|
||||
verify_only=True # Services only verify, never run migrations
|
||||
)
|
||||
|
||||
self.logger.info("Database table initialization completed", result=result)
|
||||
self.logger.info("Database verification completed", result=result)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Database table initialization failed", error=str(e))
|
||||
# Don't raise here - let the service start even if table init fails
|
||||
# This allows for manual intervention if needed
|
||||
self.logger.error("Database verification failed", error=str(e))
|
||||
# FAIL FAST: If database not ready, service should not start
|
||||
raise
|
||||
|
||||
async def _cleanup_database(self):
|
||||
"""Cleanup database connections"""
|
||||
|
||||
Reference in New Issue
Block a user