Fix Alembic issue

This commit is contained in:
Urtzi Alfaro
2025-10-01 11:24:06 +02:00
parent 7cc4b957a5
commit 2eeebfc1e0
62 changed files with 6114 additions and 3676 deletions

View File

@@ -1,7 +1,6 @@
"""Alembic environment configuration for training service"""
import asyncio
import logging
import os
import sys
from logging.config import fileConfig
@@ -25,7 +24,7 @@ try:
from shared.database.base import Base
# Import all models to ensure they are registered with Base.metadata
from app.models import * # Import all models
from app.models import * # noqa: F401, F403
except ImportError as e:
print(f"Import error in migrations env.py: {e}")
@@ -35,12 +34,19 @@ except ImportError as e:
# this is the Alembic Config object
config = context.config
# Set database URL from environment variables or settings
# Try service-specific DATABASE_URL first, then fall back to generic
database_url = os.getenv('TRAINING_DATABASE_URL') or os.getenv('DATABASE_URL')
# Determine service name from file path
service_name = os.path.basename(os.path.dirname(os.path.dirname(__file__)))
service_name_upper = service_name.upper().replace('-', '_')
# Set database URL from environment variables with multiple fallback strategies
database_url = (
os.getenv(f'{service_name_upper}_DATABASE_URL') or # Service-specific
os.getenv('DATABASE_URL') # Generic fallback
)
# If DATABASE_URL is not set, construct from individual components
if not database_url:
# Try generic PostgreSQL environment variables first
postgres_host = os.getenv('POSTGRES_HOST')
postgres_port = os.getenv('POSTGRES_PORT', '5432')
postgres_db = os.getenv('POSTGRES_DB')
@@ -50,11 +56,28 @@ if not database_url:
if all([postgres_host, postgres_db, postgres_user, postgres_password]):
database_url = f"postgresql+asyncpg://{postgres_user}:{postgres_password}@{postgres_host}:{postgres_port}/{postgres_db}"
else:
# Fallback to settings
database_url = getattr(settings, 'DATABASE_URL', None)
# Try service-specific environment variables
db_host = os.getenv(f'{service_name_upper}_DB_HOST', f'{service_name}-db-service')
db_port = os.getenv(f'{service_name_upper}_DB_PORT', '5432')
db_name = os.getenv(f'{service_name_upper}_DB_NAME', f'{service_name.replace("-", "_")}_db')
db_user = os.getenv(f'{service_name_upper}_DB_USER', f'{service_name.replace("-", "_")}_user')
db_password = os.getenv(f'{service_name_upper}_DB_PASSWORD')
if database_url:
config.set_main_option("sqlalchemy.url", database_url)
if db_password:
database_url = f"postgresql+asyncpg://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
else:
# Final fallback: try to get from settings object
try:
database_url = getattr(settings, 'DATABASE_URL', None)
except Exception:
pass
if not database_url:
error_msg = f"ERROR: No database URL configured for {service_name} service"
print(error_msg)
raise Exception(error_msg)
config.set_main_option("sqlalchemy.url", database_url)
# Interpret the config file for Python logging
if config.config_file_name is not None:
@@ -63,6 +86,7 @@ if config.config_file_name is not None:
# Set target metadata
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
@@ -78,7 +102,9 @@ def run_migrations_offline() -> None:
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection: Connection) -> None:
"""Execute migrations with the given connection."""
context.configure(
connection=connection,
target_metadata=target_metadata,
@@ -89,8 +115,9 @@ def do_run_migrations(connection: Connection) -> None:
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
"""Run migrations in 'online' mode."""
"""Run migrations in 'online' mode with async support."""
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
@@ -102,10 +129,12 @@ async def run_async_migrations() -> None:
await connectable.dispose()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:

View File

@@ -1,78 +0,0 @@
"""Initial schema for training service
Revision ID: 0001
Revises:
Create Date: 2025-09-30 18:00:00.0000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '00001'
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
op.create_table('training_jobs',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('model_id', sa.UUID(), nullable=False),
sa.Column('job_name', sa.String(255), nullable=False),
sa.Column('job_type', sa.String(100), nullable=False),
sa.Column('status', sa.String(50), nullable=True),
sa.Column('progress', sa.Float(), nullable=True),
sa.Column('parameters', postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column('metrics', postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column('training_data_path', sa.String(500), nullable=True),
sa.Column('model_path', sa.String(500), nullable=True),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_training_jobs_tenant_id'), 'training_jobs', ['tenant_id'], unique=False)
op.create_index(op.f('ix_training_jobs_model_id'), 'training_jobs', ['model_id'], unique=False)
op.create_index(op.f('ix_training_jobs_status'), 'training_jobs', ['status'], unique=False)
op.create_index(op.f('ix_training_jobs_job_type'), 'training_jobs', ['job_type'], unique=False)
op.create_table('ml_models',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('name', sa.String(255), nullable=False),
sa.Column('version', sa.String(50), nullable=False),
sa.Column('model_type', sa.String(100), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('status', sa.String(50), nullable=True),
sa.Column('accuracy', sa.Float(), nullable=True),
sa.Column('f1_score', sa.Float(), nullable=True),
sa.Column('precision', sa.Float(), nullable=True),
sa.Column('recall', sa.Float(), nullable=True),
sa.Column('model_path', sa.String(500), nullable=True),
sa.Column('hyperparameters', postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column('training_data_info', postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_ml_models_tenant_id'), 'ml_models', ['tenant_id'], unique=False)
op.create_index(op.f('ix_ml_models_name'), 'ml_models', ['name'], unique=False)
op.create_index(op.f('ix_ml_models_version'), 'ml_models', ['version'], unique=False)
op.create_index(op.f('ix_ml_models_status'), 'ml_models', ['status'], unique=False)
def downgrade() -> None:
op.drop_index(op.f('ix_ml_models_status'), table_name='ml_models')
op.drop_index(op.f('ix_ml_models_version'), table_name='ml_models')
op.drop_index(op.f('ix_ml_models_name'), table_name='ml_models')
op.drop_index(op.f('ix_ml_models_tenant_id'), table_name='ml_models')
op.drop_table('ml_models')
op.drop_index(op.f('ix_training_jobs_job_type'), table_name='training_jobs')
op.drop_index(op.f('ix_training_jobs_status'), table_name='training_jobs')
op.drop_index(op.f('ix_training_jobs_model_id'), table_name='training_jobs')
op.drop_index(op.f('ix_training_jobs_tenant_id'), table_name='training_jobs')
op.drop_table('training_jobs')

View File

@@ -0,0 +1,159 @@
"""initial_schema_20251001_1118
Revision ID: 121e47ff97c4
Revises:
Create Date: 2025-10-01 11:18:37.223786+02:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '121e47ff97c4'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('model_artifacts',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('model_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('artifact_type', sa.String(length=50), nullable=False),
sa.Column('file_path', sa.String(length=1000), nullable=False),
sa.Column('file_size_bytes', sa.Integer(), nullable=True),
sa.Column('checksum', sa.String(length=255), nullable=True),
sa.Column('storage_location', sa.String(length=100), nullable=False),
sa.Column('compression', sa.String(length=50), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('expires_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_model_artifacts_id'), 'model_artifacts', ['id'], unique=False)
op.create_index(op.f('ix_model_artifacts_model_id'), 'model_artifacts', ['model_id'], unique=False)
op.create_index(op.f('ix_model_artifacts_tenant_id'), 'model_artifacts', ['tenant_id'], unique=False)
op.create_table('model_performance_metrics',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('model_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('inventory_product_id', sa.UUID(), nullable=False),
sa.Column('mae', sa.Float(), nullable=True),
sa.Column('mse', sa.Float(), nullable=True),
sa.Column('rmse', sa.Float(), nullable=True),
sa.Column('mape', sa.Float(), nullable=True),
sa.Column('r2_score', sa.Float(), nullable=True),
sa.Column('accuracy_percentage', sa.Float(), nullable=True),
sa.Column('prediction_confidence', sa.Float(), nullable=True),
sa.Column('evaluation_period_start', sa.DateTime(), nullable=True),
sa.Column('evaluation_period_end', sa.DateTime(), nullable=True),
sa.Column('evaluation_samples', sa.Integer(), nullable=True),
sa.Column('measured_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_model_performance_metrics_id'), 'model_performance_metrics', ['id'], unique=False)
op.create_index(op.f('ix_model_performance_metrics_inventory_product_id'), 'model_performance_metrics', ['inventory_product_id'], unique=False)
op.create_index(op.f('ix_model_performance_metrics_model_id'), 'model_performance_metrics', ['model_id'], unique=False)
op.create_index(op.f('ix_model_performance_metrics_tenant_id'), 'model_performance_metrics', ['tenant_id'], unique=False)
op.create_table('model_training_logs',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('job_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('status', sa.String(length=50), nullable=False),
sa.Column('progress', sa.Integer(), nullable=True),
sa.Column('current_step', sa.String(length=500), nullable=True),
sa.Column('start_time', sa.DateTime(timezone=True), nullable=True),
sa.Column('end_time', sa.DateTime(timezone=True), nullable=True),
sa.Column('config', sa.JSON(), nullable=True),
sa.Column('results', sa.JSON(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_model_training_logs_id'), 'model_training_logs', ['id'], unique=False)
op.create_index(op.f('ix_model_training_logs_job_id'), 'model_training_logs', ['job_id'], unique=True)
op.create_index(op.f('ix_model_training_logs_tenant_id'), 'model_training_logs', ['tenant_id'], unique=False)
op.create_table('trained_models',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('inventory_product_id', sa.UUID(), nullable=False),
sa.Column('model_type', sa.String(), nullable=True),
sa.Column('model_version', sa.String(), nullable=True),
sa.Column('job_id', sa.String(), nullable=False),
sa.Column('model_path', sa.String(), nullable=False),
sa.Column('metadata_path', sa.String(), nullable=True),
sa.Column('mape', sa.Float(), nullable=True),
sa.Column('mae', sa.Float(), nullable=True),
sa.Column('rmse', sa.Float(), nullable=True),
sa.Column('r2_score', sa.Float(), nullable=True),
sa.Column('training_samples', sa.Integer(), nullable=True),
sa.Column('hyperparameters', sa.JSON(), nullable=True),
sa.Column('features_used', sa.JSON(), nullable=True),
sa.Column('normalization_params', sa.JSON(), nullable=True),
sa.Column('is_active', sa.Boolean(), nullable=True),
sa.Column('is_production', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('last_used_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('training_start_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('training_end_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('data_quality_score', sa.Float(), nullable=True),
sa.Column('notes', sa.Text(), nullable=True),
sa.Column('created_by', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_trained_models_inventory_product_id'), 'trained_models', ['inventory_product_id'], unique=False)
op.create_index(op.f('ix_trained_models_tenant_id'), 'trained_models', ['tenant_id'], unique=False)
op.create_table('training_job_queue',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('job_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('job_type', sa.String(length=50), nullable=False),
sa.Column('priority', sa.Integer(), nullable=True),
sa.Column('config', sa.JSON(), nullable=True),
sa.Column('scheduled_at', sa.DateTime(), nullable=True),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('estimated_duration_minutes', sa.Integer(), nullable=True),
sa.Column('status', sa.String(length=50), nullable=False),
sa.Column('retry_count', sa.Integer(), nullable=True),
sa.Column('max_retries', sa.Integer(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('cancelled_by', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_training_job_queue_id'), 'training_job_queue', ['id'], unique=False)
op.create_index(op.f('ix_training_job_queue_job_id'), 'training_job_queue', ['job_id'], unique=True)
op.create_index(op.f('ix_training_job_queue_tenant_id'), 'training_job_queue', ['tenant_id'], unique=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_training_job_queue_tenant_id'), table_name='training_job_queue')
op.drop_index(op.f('ix_training_job_queue_job_id'), table_name='training_job_queue')
op.drop_index(op.f('ix_training_job_queue_id'), table_name='training_job_queue')
op.drop_table('training_job_queue')
op.drop_index(op.f('ix_trained_models_tenant_id'), table_name='trained_models')
op.drop_index(op.f('ix_trained_models_inventory_product_id'), table_name='trained_models')
op.drop_table('trained_models')
op.drop_index(op.f('ix_model_training_logs_tenant_id'), table_name='model_training_logs')
op.drop_index(op.f('ix_model_training_logs_job_id'), table_name='model_training_logs')
op.drop_index(op.f('ix_model_training_logs_id'), table_name='model_training_logs')
op.drop_table('model_training_logs')
op.drop_index(op.f('ix_model_performance_metrics_tenant_id'), table_name='model_performance_metrics')
op.drop_index(op.f('ix_model_performance_metrics_model_id'), table_name='model_performance_metrics')
op.drop_index(op.f('ix_model_performance_metrics_inventory_product_id'), table_name='model_performance_metrics')
op.drop_index(op.f('ix_model_performance_metrics_id'), table_name='model_performance_metrics')
op.drop_table('model_performance_metrics')
op.drop_index(op.f('ix_model_artifacts_tenant_id'), table_name='model_artifacts')
op.drop_index(op.f('ix_model_artifacts_model_id'), table_name='model_artifacts')
op.drop_index(op.f('ix_model_artifacts_id'), table_name='model_artifacts')
op.drop_table('model_artifacts')
# ### end Alembic commands ###