Add user role

This commit is contained in:
Urtzi Alfaro
2025-08-02 09:41:50 +02:00
parent d4687e6375
commit 277e8bec73
13 changed files with 1051 additions and 28 deletions

View File

@@ -12,9 +12,15 @@ from app.core.database import get_db
from app.schemas.training import TrainedModelResponse, ModelMetricsResponse
from app.services.training_service import TrainingService
from datetime import datetime
from sqlalchemy import select, delete, func
import uuid
import shutil
from app.services.messaging import publish_models_deleted_event
from shared.auth.decorators import (
get_current_tenant_id_dep
get_current_user_dep,
require_admin_role
)
logger = structlog.get_logger()
@@ -212,4 +218,244 @@ async def list_models(
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve models"
)
@router.delete("/models/tenant/{tenant_id}")
async def delete_tenant_models_complete(
tenant_id: str,
current_user = Depends(get_current_user_dep),
_admin_check = Depends(require_admin_role),
db: AsyncSession = Depends(get_db)
):
"""
Delete all trained models and artifacts for a tenant.
**WARNING: This operation is irreversible!**
This endpoint:
1. Cancels any active training jobs for the tenant
2. Deletes all model artifacts (files) from storage
3. Deletes model records from database
4. Deletes training logs and performance metrics
5. Publishes deletion event
Used by admin user deletion process to clean up all training data.
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid tenant ID format"
)
try:
from app.models.training import (
ModelTrainingLog,
TrainedModel,
ModelArtifact,
ModelPerformanceMetric,
TrainingJobQueue
)
from app.core.config import settings
deletion_stats = {
"tenant_id": tenant_id,
"deleted_at": datetime.utcnow().isoformat(),
"jobs_cancelled": 0,
"models_deleted": 0,
"artifacts_deleted": 0,
"artifacts_files_deleted": 0,
"training_logs_deleted": 0,
"performance_metrics_deleted": 0,
"storage_freed_bytes": 0,
"errors": []
}
# Step 1: Cancel active training jobs
try:
active_jobs_query = select(TrainingJobQueue).where(
TrainingJobQueue.tenant_id == tenant_uuid,
TrainingJobQueue.status.in_(["queued", "running", "pending"])
)
active_jobs_result = await db.execute(active_jobs_query)
active_jobs = active_jobs_result.scalars().all()
for job in active_jobs:
job.status = "cancelled"
job.updated_at = datetime.utcnow()
deletion_stats["jobs_cancelled"] += 1
if active_jobs:
await db.commit()
logger.info("Cancelled active training jobs",
tenant_id=tenant_id,
count=len(active_jobs))
except Exception as e:
error_msg = f"Error cancelling training jobs: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.error(error_msg)
# Step 2: Delete model artifact files from storage
try:
artifacts_query = select(ModelArtifact).where(
ModelArtifact.tenant_id == tenant_uuid
)
artifacts_result = await db.execute(artifacts_query)
artifacts = artifacts_result.scalars().all()
storage_freed = 0
files_deleted = 0
for artifact in artifacts:
try:
file_path = Path(artifact.file_path)
if file_path.exists():
file_size = file_path.stat().st_size
file_path.unlink() # Delete file
storage_freed += file_size
files_deleted += 1
logger.debug("Deleted artifact file",
file_path=str(file_path),
size_bytes=file_size)
# Also try to delete parent directories if empty
try:
if file_path.parent.exists() and not any(file_path.parent.iterdir()):
file_path.parent.rmdir()
except:
pass # Ignore errors cleaning up directories
except Exception as e:
error_msg = f"Error deleting artifact file {artifact.file_path}: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.warning(error_msg)
deletion_stats["artifacts_files_deleted"] = files_deleted
deletion_stats["storage_freed_bytes"] = storage_freed
logger.info("Deleted artifact files",
tenant_id=tenant_id,
files_deleted=files_deleted,
storage_freed_mb=storage_freed / (1024 * 1024))
except Exception as e:
error_msg = f"Error processing artifact files: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.error(error_msg)
# Step 3: Delete database records
try:
# Delete model performance metrics
metrics_count_query = select(func.count(ModelPerformanceMetric.id)).where(
ModelPerformanceMetric.tenant_id == tenant_uuid
)
metrics_count_result = await db.execute(metrics_count_query)
metrics_count = metrics_count_result.scalar()
metrics_delete_query = delete(ModelPerformanceMetric).where(
ModelPerformanceMetric.tenant_id == tenant_uuid
)
await db.execute(metrics_delete_query)
deletion_stats["performance_metrics_deleted"] = metrics_count
# Delete model artifacts records
artifacts_count_query = select(func.count(ModelArtifact.id)).where(
ModelArtifact.tenant_id == tenant_uuid
)
artifacts_count_result = await db.execute(artifacts_count_query)
artifacts_count = artifacts_count_result.scalar()
artifacts_delete_query = delete(ModelArtifact).where(
ModelArtifact.tenant_id == tenant_uuid
)
await db.execute(artifacts_delete_query)
deletion_stats["artifacts_deleted"] = artifacts_count
# Delete trained models
models_count_query = select(func.count(TrainedModel.id)).where(
TrainedModel.tenant_id == tenant_uuid
)
models_count_result = await db.execute(models_count_query)
models_count = models_count_result.scalar()
models_delete_query = delete(TrainedModel).where(
TrainedModel.tenant_id == tenant_uuid
)
await db.execute(models_delete_query)
deletion_stats["models_deleted"] = models_count
# Delete training logs
logs_count_query = select(func.count(ModelTrainingLog.id)).where(
ModelTrainingLog.tenant_id == tenant_uuid
)
logs_count_result = await db.execute(logs_count_query)
logs_count = logs_count_result.scalar()
logs_delete_query = delete(ModelTrainingLog).where(
ModelTrainingLog.tenant_id == tenant_uuid
)
await db.execute(logs_delete_query)
deletion_stats["training_logs_deleted"] = logs_count
# Delete job queue entries
queue_delete_query = delete(TrainingJobQueue).where(
TrainingJobQueue.tenant_id == tenant_uuid
)
await db.execute(queue_delete_query)
await db.commit()
logger.info("Deleted training database records",
tenant_id=tenant_id,
models=models_count,
artifacts=artifacts_count,
logs=logs_count,
metrics=metrics_count)
except Exception as e:
await db.rollback()
error_msg = f"Error deleting database records: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.error(error_msg)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=error_msg
)
# Step 4: Clean up tenant model directory
try:
tenant_model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
if tenant_model_dir.exists():
shutil.rmtree(tenant_model_dir)
logger.info("Deleted tenant model directory",
directory=str(tenant_model_dir))
except Exception as e:
error_msg = f"Error deleting model directory: {str(e)}"
deletion_stats["errors"].append(error_msg)
logger.warning(error_msg)
# Step 5: Publish deletion event
try:
await publish_models_deleted_event(tenant_id, deletion_stats)
except Exception as e:
logger.warning("Failed to publish models deletion event", error=str(e))
return {
"success": True,
"message": f"All training data for tenant {tenant_id} deleted successfully",
"deletion_details": deletion_stats
}
except HTTPException:
raise
except Exception as e:
logger.error("Unexpected error deleting tenant models",
tenant_id=tenant_id,
error=str(e))
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete tenant models: {str(e)}"
)