Add user role
This commit is contained in:
@@ -12,9 +12,15 @@ from app.core.database import get_db
|
||||
from app.schemas.training import TrainedModelResponse, ModelMetricsResponse
|
||||
from app.services.training_service import TrainingService
|
||||
from datetime import datetime
|
||||
from sqlalchemy import select, delete, func
|
||||
import uuid
|
||||
import shutil
|
||||
|
||||
from app.services.messaging import publish_models_deleted_event
|
||||
|
||||
from shared.auth.decorators import (
|
||||
get_current_tenant_id_dep
|
||||
get_current_user_dep,
|
||||
require_admin_role
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
@@ -212,4 +218,244 @@ async def list_models(
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to retrieve models"
|
||||
)
|
||||
|
||||
@router.delete("/models/tenant/{tenant_id}")
|
||||
async def delete_tenant_models_complete(
|
||||
tenant_id: str,
|
||||
current_user = Depends(get_current_user_dep),
|
||||
_admin_check = Depends(require_admin_role),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Delete all trained models and artifacts for a tenant.
|
||||
|
||||
**WARNING: This operation is irreversible!**
|
||||
|
||||
This endpoint:
|
||||
1. Cancels any active training jobs for the tenant
|
||||
2. Deletes all model artifacts (files) from storage
|
||||
3. Deletes model records from database
|
||||
4. Deletes training logs and performance metrics
|
||||
5. Publishes deletion event
|
||||
|
||||
Used by admin user deletion process to clean up all training data.
|
||||
"""
|
||||
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Invalid tenant ID format"
|
||||
)
|
||||
|
||||
try:
|
||||
from app.models.training import (
|
||||
ModelTrainingLog,
|
||||
TrainedModel,
|
||||
ModelArtifact,
|
||||
ModelPerformanceMetric,
|
||||
TrainingJobQueue
|
||||
)
|
||||
from app.core.config import settings
|
||||
|
||||
deletion_stats = {
|
||||
"tenant_id": tenant_id,
|
||||
"deleted_at": datetime.utcnow().isoformat(),
|
||||
"jobs_cancelled": 0,
|
||||
"models_deleted": 0,
|
||||
"artifacts_deleted": 0,
|
||||
"artifacts_files_deleted": 0,
|
||||
"training_logs_deleted": 0,
|
||||
"performance_metrics_deleted": 0,
|
||||
"storage_freed_bytes": 0,
|
||||
"errors": []
|
||||
}
|
||||
|
||||
# Step 1: Cancel active training jobs
|
||||
try:
|
||||
active_jobs_query = select(TrainingJobQueue).where(
|
||||
TrainingJobQueue.tenant_id == tenant_uuid,
|
||||
TrainingJobQueue.status.in_(["queued", "running", "pending"])
|
||||
)
|
||||
active_jobs_result = await db.execute(active_jobs_query)
|
||||
active_jobs = active_jobs_result.scalars().all()
|
||||
|
||||
for job in active_jobs:
|
||||
job.status = "cancelled"
|
||||
job.updated_at = datetime.utcnow()
|
||||
deletion_stats["jobs_cancelled"] += 1
|
||||
|
||||
if active_jobs:
|
||||
await db.commit()
|
||||
logger.info("Cancelled active training jobs",
|
||||
tenant_id=tenant_id,
|
||||
count=len(active_jobs))
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error cancelling training jobs: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
# Step 2: Delete model artifact files from storage
|
||||
try:
|
||||
artifacts_query = select(ModelArtifact).where(
|
||||
ModelArtifact.tenant_id == tenant_uuid
|
||||
)
|
||||
artifacts_result = await db.execute(artifacts_query)
|
||||
artifacts = artifacts_result.scalars().all()
|
||||
|
||||
storage_freed = 0
|
||||
files_deleted = 0
|
||||
|
||||
for artifact in artifacts:
|
||||
try:
|
||||
file_path = Path(artifact.file_path)
|
||||
if file_path.exists():
|
||||
file_size = file_path.stat().st_size
|
||||
file_path.unlink() # Delete file
|
||||
storage_freed += file_size
|
||||
files_deleted += 1
|
||||
logger.debug("Deleted artifact file",
|
||||
file_path=str(file_path),
|
||||
size_bytes=file_size)
|
||||
|
||||
# Also try to delete parent directories if empty
|
||||
try:
|
||||
if file_path.parent.exists() and not any(file_path.parent.iterdir()):
|
||||
file_path.parent.rmdir()
|
||||
except:
|
||||
pass # Ignore errors cleaning up directories
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error deleting artifact file {artifact.file_path}: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.warning(error_msg)
|
||||
|
||||
deletion_stats["artifacts_files_deleted"] = files_deleted
|
||||
deletion_stats["storage_freed_bytes"] = storage_freed
|
||||
|
||||
logger.info("Deleted artifact files",
|
||||
tenant_id=tenant_id,
|
||||
files_deleted=files_deleted,
|
||||
storage_freed_mb=storage_freed / (1024 * 1024))
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error processing artifact files: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
# Step 3: Delete database records
|
||||
try:
|
||||
# Delete model performance metrics
|
||||
metrics_count_query = select(func.count(ModelPerformanceMetric.id)).where(
|
||||
ModelPerformanceMetric.tenant_id == tenant_uuid
|
||||
)
|
||||
metrics_count_result = await db.execute(metrics_count_query)
|
||||
metrics_count = metrics_count_result.scalar()
|
||||
|
||||
metrics_delete_query = delete(ModelPerformanceMetric).where(
|
||||
ModelPerformanceMetric.tenant_id == tenant_uuid
|
||||
)
|
||||
await db.execute(metrics_delete_query)
|
||||
deletion_stats["performance_metrics_deleted"] = metrics_count
|
||||
|
||||
# Delete model artifacts records
|
||||
artifacts_count_query = select(func.count(ModelArtifact.id)).where(
|
||||
ModelArtifact.tenant_id == tenant_uuid
|
||||
)
|
||||
artifacts_count_result = await db.execute(artifacts_count_query)
|
||||
artifacts_count = artifacts_count_result.scalar()
|
||||
|
||||
artifacts_delete_query = delete(ModelArtifact).where(
|
||||
ModelArtifact.tenant_id == tenant_uuid
|
||||
)
|
||||
await db.execute(artifacts_delete_query)
|
||||
deletion_stats["artifacts_deleted"] = artifacts_count
|
||||
|
||||
# Delete trained models
|
||||
models_count_query = select(func.count(TrainedModel.id)).where(
|
||||
TrainedModel.tenant_id == tenant_uuid
|
||||
)
|
||||
models_count_result = await db.execute(models_count_query)
|
||||
models_count = models_count_result.scalar()
|
||||
|
||||
models_delete_query = delete(TrainedModel).where(
|
||||
TrainedModel.tenant_id == tenant_uuid
|
||||
)
|
||||
await db.execute(models_delete_query)
|
||||
deletion_stats["models_deleted"] = models_count
|
||||
|
||||
# Delete training logs
|
||||
logs_count_query = select(func.count(ModelTrainingLog.id)).where(
|
||||
ModelTrainingLog.tenant_id == tenant_uuid
|
||||
)
|
||||
logs_count_result = await db.execute(logs_count_query)
|
||||
logs_count = logs_count_result.scalar()
|
||||
|
||||
logs_delete_query = delete(ModelTrainingLog).where(
|
||||
ModelTrainingLog.tenant_id == tenant_uuid
|
||||
)
|
||||
await db.execute(logs_delete_query)
|
||||
deletion_stats["training_logs_deleted"] = logs_count
|
||||
|
||||
# Delete job queue entries
|
||||
queue_delete_query = delete(TrainingJobQueue).where(
|
||||
TrainingJobQueue.tenant_id == tenant_uuid
|
||||
)
|
||||
await db.execute(queue_delete_query)
|
||||
|
||||
await db.commit()
|
||||
|
||||
logger.info("Deleted training database records",
|
||||
tenant_id=tenant_id,
|
||||
models=models_count,
|
||||
artifacts=artifacts_count,
|
||||
logs=logs_count,
|
||||
metrics=metrics_count)
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
error_msg = f"Error deleting database records: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.error(error_msg)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=error_msg
|
||||
)
|
||||
|
||||
# Step 4: Clean up tenant model directory
|
||||
try:
|
||||
tenant_model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
|
||||
if tenant_model_dir.exists():
|
||||
shutil.rmtree(tenant_model_dir)
|
||||
logger.info("Deleted tenant model directory",
|
||||
directory=str(tenant_model_dir))
|
||||
except Exception as e:
|
||||
error_msg = f"Error deleting model directory: {str(e)}"
|
||||
deletion_stats["errors"].append(error_msg)
|
||||
logger.warning(error_msg)
|
||||
|
||||
# Step 5: Publish deletion event
|
||||
try:
|
||||
await publish_models_deleted_event(tenant_id, deletion_stats)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish models deletion event", error=str(e))
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"All training data for tenant {tenant_id} deleted successfully",
|
||||
"deletion_details": deletion_stats
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error deleting tenant models",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to delete tenant models: {str(e)}"
|
||||
)
|
||||
Reference in New Issue
Block a user