Files
bakery-ia/services/training/migrations/versions/26a665cd5348_initial_schema.py
2025-11-05 13:34:56 +01:00

251 lines
15 KiB
Python

"""Initial schema with all training tables and columns
Revision ID: 26a665cd5348
Revises:
Create Date: 2025-10-15 12:29:01.717552+02:00
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = '26a665cd5348'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Create audit_logs table
op.create_table('audit_logs',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('user_id', sa.UUID(), nullable=False),
sa.Column('action', sa.String(length=100), nullable=False),
sa.Column('resource_type', sa.String(length=100), nullable=False),
sa.Column('resource_id', sa.String(length=255), nullable=True),
sa.Column('severity', sa.String(length=20), nullable=False),
sa.Column('service_name', sa.String(length=100), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('changes', postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column('audit_metadata', postgresql.JSON(astext_type=sa.Text()), nullable=True),
sa.Column('ip_address', sa.String(length=45), nullable=True),
sa.Column('user_agent', sa.Text(), nullable=True),
sa.Column('endpoint', sa.String(length=255), nullable=True),
sa.Column('method', sa.String(length=10), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_index('idx_audit_resource_type_action', 'audit_logs', ['resource_type', 'action'], unique=False)
op.create_index('idx_audit_service_created', 'audit_logs', ['service_name', 'created_at'], unique=False)
op.create_index('idx_audit_severity_created', 'audit_logs', ['severity', 'created_at'], unique=False)
op.create_index('idx_audit_tenant_created', 'audit_logs', ['tenant_id', 'created_at'], unique=False)
op.create_index('idx_audit_user_created', 'audit_logs', ['user_id', 'created_at'], unique=False)
op.create_index(op.f('ix_audit_logs_action'), 'audit_logs', ['action'], unique=False)
op.create_index(op.f('ix_audit_logs_created_at'), 'audit_logs', ['created_at'], unique=False)
op.create_index(op.f('ix_audit_logs_resource_id'), 'audit_logs', ['resource_id'], unique=False)
op.create_index(op.f('ix_audit_logs_resource_type'), 'audit_logs', ['resource_type'], unique=False)
op.create_index(op.f('ix_audit_logs_service_name'), 'audit_logs', ['service_name'], unique=False)
op.create_index(op.f('ix_audit_logs_severity'), 'audit_logs', ['severity'], unique=False)
op.create_index(op.f('ix_audit_logs_tenant_id'), 'audit_logs', ['tenant_id'], unique=False)
op.create_index(op.f('ix_audit_logs_user_id'), 'audit_logs', ['user_id'], unique=False)
# Create trained_models table
op.create_table('trained_models',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('inventory_product_id', sa.UUID(), nullable=False),
sa.Column('model_type', sa.String(), nullable=True),
sa.Column('model_version', sa.String(), nullable=True),
sa.Column('job_id', sa.String(), nullable=False),
sa.Column('model_path', sa.String(), nullable=False),
sa.Column('metadata_path', sa.String(), nullable=True),
sa.Column('mape', sa.Float(), nullable=True),
sa.Column('mae', sa.Float(), nullable=True),
sa.Column('rmse', sa.Float(), nullable=True),
sa.Column('r2_score', sa.Float(), nullable=True),
sa.Column('training_samples', sa.Integer(), nullable=True),
sa.Column('hyperparameters', sa.JSON(), nullable=True),
sa.Column('features_used', sa.JSON(), nullable=True),
sa.Column('normalization_params', sa.JSON(), nullable=True),
sa.Column('is_active', sa.Boolean(), nullable=True),
sa.Column('is_production', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('last_used_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('training_start_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('training_end_date', sa.DateTime(timezone=True), nullable=True),
sa.Column('data_quality_score', sa.Float(), nullable=True),
sa.Column('notes', sa.Text(), nullable=True),
sa.Column('created_by', sa.String(), nullable=True),
sa.Column('product_category', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_trained_models_inventory_product_id'), 'trained_models', ['inventory_product_id'], unique=False)
op.create_index(op.f('ix_trained_models_tenant_id'), 'trained_models', ['tenant_id'], unique=False)
# Create model_training_logs table
op.create_table('model_training_logs',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('job_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('status', sa.String(length=50), nullable=False),
sa.Column('progress', sa.Integer(), nullable=True),
sa.Column('current_step', sa.String(length=500), nullable=True),
sa.Column('start_time', sa.DateTime(timezone=True), nullable=True),
sa.Column('end_time', sa.DateTime(timezone=True), nullable=True),
sa.Column('config', sa.JSON(), nullable=True),
sa.Column('results', sa.JSON(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_model_training_logs_id'), 'model_training_logs', ['id'], unique=False)
op.create_index(op.f('ix_model_training_logs_job_id'), 'model_training_logs', ['job_id'], unique=True)
op.create_index(op.f('ix_model_training_logs_tenant_id'), 'model_training_logs', ['tenant_id'], unique=False)
# Create model_performance_metrics table
op.create_table('model_performance_metrics',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('model_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('inventory_product_id', sa.UUID(), nullable=False),
sa.Column('mae', sa.Float(), nullable=True),
sa.Column('mse', sa.Float(), nullable=True),
sa.Column('rmse', sa.Float(), nullable=True),
sa.Column('mape', sa.Float(), nullable=True),
sa.Column('r2_score', sa.Float(), nullable=True),
sa.Column('accuracy_percentage', sa.Float(), nullable=True),
sa.Column('prediction_confidence', sa.Float(), nullable=True),
sa.Column('evaluation_period_start', sa.DateTime(), nullable=True),
sa.Column('evaluation_period_end', sa.DateTime(), nullable=True),
sa.Column('evaluation_samples', sa.Integer(), nullable=True),
sa.Column('measured_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_model_performance_metrics_id'), 'model_performance_metrics', ['id'], unique=False)
op.create_index(op.f('ix_model_performance_metrics_inventory_product_id'), 'model_performance_metrics', ['inventory_product_id'], unique=False)
op.create_index(op.f('ix_model_performance_metrics_model_id'), 'model_performance_metrics', ['model_id'], unique=False)
op.create_index(op.f('ix_model_performance_metrics_tenant_id'), 'model_performance_metrics', ['tenant_id'], unique=False)
# Create training_job_queue table
op.create_table('training_job_queue',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('job_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('job_type', sa.String(length=50), nullable=False),
sa.Column('priority', sa.Integer(), nullable=True),
sa.Column('config', sa.JSON(), nullable=True),
sa.Column('scheduled_at', sa.DateTime(), nullable=True),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('estimated_duration_minutes', sa.Integer(), nullable=True),
sa.Column('status', sa.String(length=50), nullable=False),
sa.Column('retry_count', sa.Integer(), nullable=True),
sa.Column('max_retries', sa.Integer(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('cancelled_by', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_training_job_queue_id'), 'training_job_queue', ['id'], unique=False)
op.create_index(op.f('ix_training_job_queue_job_id'), 'training_job_queue', ['job_id'], unique=True)
op.create_index(op.f('ix_training_job_queue_tenant_id'), 'training_job_queue', ['tenant_id'], unique=False)
# Create model_artifacts table
op.create_table('model_artifacts',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('model_id', sa.String(length=255), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('artifact_type', sa.String(length=50), nullable=False),
sa.Column('file_path', sa.String(length=1000), nullable=False),
sa.Column('file_size_bytes', sa.Integer(), nullable=True),
sa.Column('checksum', sa.String(length=255), nullable=True),
sa.Column('storage_location', sa.String(length=100), nullable=False),
sa.Column('compression', sa.String(length=50), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('expires_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_model_artifacts_id'), 'model_artifacts', ['id'], unique=False)
op.create_index(op.f('ix_model_artifacts_model_id'), 'model_artifacts', ['model_id'], unique=False)
op.create_index(op.f('ix_model_artifacts_tenant_id'), 'model_artifacts', ['tenant_id'], unique=False)
# Create training_performance_metrics table
op.create_table('training_performance_metrics',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('tenant_id', sa.UUID(), nullable=False),
sa.Column('job_id', sa.String(length=255), nullable=False),
sa.Column('total_products', sa.Integer(), nullable=False),
sa.Column('successful_products', sa.Integer(), nullable=False),
sa.Column('failed_products', sa.Integer(), nullable=False),
sa.Column('total_duration_seconds', sa.Float(), nullable=False),
sa.Column('avg_time_per_product', sa.Float(), nullable=False),
sa.Column('data_analysis_time_seconds', sa.Float(), nullable=True),
sa.Column('training_time_seconds', sa.Float(), nullable=True),
sa.Column('finalization_time_seconds', sa.Float(), nullable=True),
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=False),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_training_performance_metrics_job_id'), 'training_performance_metrics', ['job_id'], unique=False)
op.create_index(op.f('ix_training_performance_metrics_tenant_id'), 'training_performance_metrics', ['tenant_id'], unique=False)
def downgrade() -> None:
# Drop training_performance_metrics table
op.drop_index(op.f('ix_training_performance_metrics_tenant_id'), table_name='training_performance_metrics')
op.drop_index(op.f('ix_training_performance_metrics_job_id'), table_name='training_performance_metrics')
op.drop_table('training_performance_metrics')
# Drop model_artifacts table
op.drop_index(op.f('ix_model_artifacts_tenant_id'), table_name='model_artifacts')
op.drop_index(op.f('ix_model_artifacts_model_id'), table_name='model_artifacts')
op.drop_index(op.f('ix_model_artifacts_id'), table_name='model_artifacts')
op.drop_table('model_artifacts')
# Drop training_job_queue table
op.drop_index(op.f('ix_training_job_queue_tenant_id'), table_name='training_job_queue')
op.drop_index(op.f('ix_training_job_queue_job_id'), table_name='training_job_queue')
op.drop_index(op.f('ix_training_job_queue_id'), table_name='training_job_queue')
op.drop_table('training_job_queue')
# Drop model_performance_metrics table
op.drop_index(op.f('ix_model_performance_metrics_tenant_id'), table_name='model_performance_metrics')
op.drop_index(op.f('ix_model_performance_metrics_model_id'), table_name='model_performance_metrics')
op.drop_index(op.f('ix_model_performance_metrics_inventory_product_id'), table_name='model_performance_metrics')
op.drop_index(op.f('ix_model_performance_metrics_id'), table_name='model_performance_metrics')
op.drop_table('model_performance_metrics')
# Drop model_training_logs table
op.drop_index(op.f('ix_model_training_logs_tenant_id'), table_name='model_training_logs')
op.drop_index(op.f('ix_model_training_logs_job_id'), table_name='model_training_logs')
op.drop_index(op.f('ix_model_training_logs_id'), table_name='model_training_logs')
op.drop_table('model_training_logs')
# Drop trained_models table (with the product_category column)
op.drop_index(op.f('ix_trained_models_tenant_id'), table_name='trained_models')
op.drop_index(op.f('ix_trained_models_inventory_product_id'), table_name='trained_models')
op.drop_table('trained_models')
# Drop audit_logs table
op.drop_index(op.f('ix_audit_logs_user_id'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_tenant_id'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_severity'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_service_name'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_resource_type'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_resource_id'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_created_at'), table_name='audit_logs')
op.drop_index(op.f('ix_audit_logs_action'), table_name='audit_logs')
op.drop_index('idx_audit_user_created', table_name='audit_logs')
op.drop_index('idx_audit_tenant_created', table_name='audit_logs')
op.drop_index('idx_audit_severity_created', table_name='audit_logs')
op.drop_index('idx_audit_service_created', table_name='audit_logs')
op.drop_index('idx_audit_resource_type_action', table_name='audit_logs')
op.drop_table('audit_logs')