340 lines
13 KiB
Python
340 lines
13 KiB
Python
# services/training/app/services/tenant_deletion_service.py
|
|
"""
|
|
Tenant Data Deletion Service for Training Service
|
|
Handles deletion of all training-related data for a tenant
|
|
"""
|
|
|
|
from typing import Dict
|
|
from sqlalchemy import select, func, delete
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
import structlog
|
|
|
|
from shared.services.tenant_deletion import (
|
|
BaseTenantDataDeletionService,
|
|
TenantDataDeletionResult
|
|
)
|
|
from app.models import (
|
|
TrainedModel,
|
|
ModelTrainingLog,
|
|
ModelPerformanceMetric,
|
|
TrainingJobQueue,
|
|
ModelArtifact,
|
|
AuditLog
|
|
)
|
|
|
|
logger = structlog.get_logger(__name__)
|
|
|
|
|
|
class TrainingTenantDeletionService(BaseTenantDataDeletionService):
|
|
"""Service for deleting all training-related data for a tenant"""
|
|
|
|
def __init__(self, db: AsyncSession):
|
|
self.db = db
|
|
self.service_name = "training"
|
|
|
|
async def get_tenant_data_preview(self, tenant_id: str) -> Dict[str, int]:
|
|
"""
|
|
Get counts of what would be deleted for a tenant (dry-run)
|
|
|
|
Args:
|
|
tenant_id: The tenant ID to preview deletion for
|
|
|
|
Returns:
|
|
Dictionary with entity names and their counts
|
|
"""
|
|
logger.info("training.tenant_deletion.preview", tenant_id=tenant_id)
|
|
preview = {}
|
|
|
|
try:
|
|
# Count trained models
|
|
model_count = await self.db.scalar(
|
|
select(func.count(TrainedModel.id)).where(
|
|
TrainedModel.tenant_id == tenant_id
|
|
)
|
|
)
|
|
preview["trained_models"] = model_count or 0
|
|
|
|
# Count model artifacts
|
|
artifact_count = await self.db.scalar(
|
|
select(func.count(ModelArtifact.id)).where(
|
|
ModelArtifact.tenant_id == tenant_id
|
|
)
|
|
)
|
|
preview["model_artifacts"] = artifact_count or 0
|
|
|
|
# Count training logs
|
|
log_count = await self.db.scalar(
|
|
select(func.count(ModelTrainingLog.id)).where(
|
|
ModelTrainingLog.tenant_id == tenant_id
|
|
)
|
|
)
|
|
preview["model_training_logs"] = log_count or 0
|
|
|
|
# Count performance metrics
|
|
metric_count = await self.db.scalar(
|
|
select(func.count(ModelPerformanceMetric.id)).where(
|
|
ModelPerformanceMetric.tenant_id == tenant_id
|
|
)
|
|
)
|
|
preview["model_performance_metrics"] = metric_count or 0
|
|
|
|
# Count training job queue entries
|
|
queue_count = await self.db.scalar(
|
|
select(func.count(TrainingJobQueue.id)).where(
|
|
TrainingJobQueue.tenant_id == tenant_id
|
|
)
|
|
)
|
|
preview["training_job_queue"] = queue_count or 0
|
|
|
|
# Count audit logs
|
|
audit_count = await self.db.scalar(
|
|
select(func.count(AuditLog.id)).where(
|
|
AuditLog.tenant_id == tenant_id
|
|
)
|
|
)
|
|
preview["audit_logs"] = audit_count or 0
|
|
|
|
logger.info(
|
|
"training.tenant_deletion.preview_complete",
|
|
tenant_id=tenant_id,
|
|
preview=preview
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"training.tenant_deletion.preview_error",
|
|
tenant_id=tenant_id,
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
raise
|
|
|
|
return preview
|
|
|
|
async def delete_tenant_data(self, tenant_id: str) -> TenantDataDeletionResult:
|
|
"""
|
|
Permanently delete all training data for a tenant
|
|
|
|
Deletion order:
|
|
1. ModelArtifact (references models)
|
|
2. ModelPerformanceMetric (references models)
|
|
3. ModelTrainingLog (independent job logs)
|
|
4. TrainingJobQueue (independent queue entries)
|
|
5. TrainedModel (parent model records)
|
|
6. AuditLog (independent)
|
|
|
|
Note: This also deletes physical model files from disk/storage
|
|
|
|
Args:
|
|
tenant_id: The tenant ID to delete data for
|
|
|
|
Returns:
|
|
TenantDataDeletionResult with deletion counts and any errors
|
|
"""
|
|
logger.info("training.tenant_deletion.started", tenant_id=tenant_id)
|
|
result = TenantDataDeletionResult(tenant_id=tenant_id, service_name=self.service_name)
|
|
|
|
try:
|
|
import os
|
|
|
|
# Step 1: Delete model artifacts (references models)
|
|
logger.info("training.tenant_deletion.deleting_artifacts", tenant_id=tenant_id)
|
|
|
|
# Delete physical files from storage before deleting DB records
|
|
artifacts = await self.db.execute(
|
|
select(ModelArtifact).where(ModelArtifact.tenant_id == tenant_id)
|
|
)
|
|
deleted_files = 0
|
|
failed_files = 0
|
|
for artifact in artifacts.scalars():
|
|
try:
|
|
if artifact.file_path and os.path.exists(artifact.file_path):
|
|
os.remove(artifact.file_path)
|
|
deleted_files += 1
|
|
logger.info("Deleted artifact file",
|
|
path=artifact.file_path,
|
|
artifact_id=artifact.id)
|
|
except Exception as e:
|
|
failed_files += 1
|
|
logger.warning("Failed to delete artifact file",
|
|
path=artifact.file_path,
|
|
artifact_id=artifact.id if hasattr(artifact, 'id') else 'unknown',
|
|
error=str(e))
|
|
|
|
logger.info("Artifact files deletion complete",
|
|
deleted_files=deleted_files,
|
|
failed_files=failed_files)
|
|
|
|
# Now delete DB records
|
|
artifacts_result = await self.db.execute(
|
|
delete(ModelArtifact).where(
|
|
ModelArtifact.tenant_id == tenant_id
|
|
)
|
|
)
|
|
result.deleted_counts["model_artifacts"] = artifacts_result.rowcount
|
|
result.deleted_counts["artifact_files_deleted"] = deleted_files
|
|
result.deleted_counts["artifact_files_failed"] = failed_files
|
|
logger.info(
|
|
"training.tenant_deletion.artifacts_deleted",
|
|
tenant_id=tenant_id,
|
|
count=artifacts_result.rowcount
|
|
)
|
|
|
|
# Step 2: Delete model performance metrics
|
|
logger.info("training.tenant_deletion.deleting_metrics", tenant_id=tenant_id)
|
|
metrics_result = await self.db.execute(
|
|
delete(ModelPerformanceMetric).where(
|
|
ModelPerformanceMetric.tenant_id == tenant_id
|
|
)
|
|
)
|
|
result.deleted_counts["model_performance_metrics"] = metrics_result.rowcount
|
|
logger.info(
|
|
"training.tenant_deletion.metrics_deleted",
|
|
tenant_id=tenant_id,
|
|
count=metrics_result.rowcount
|
|
)
|
|
|
|
# Step 3: Delete training logs
|
|
logger.info("training.tenant_deletion.deleting_logs", tenant_id=tenant_id)
|
|
logs_result = await self.db.execute(
|
|
delete(ModelTrainingLog).where(
|
|
ModelTrainingLog.tenant_id == tenant_id
|
|
)
|
|
)
|
|
result.deleted_counts["model_training_logs"] = logs_result.rowcount
|
|
logger.info(
|
|
"training.tenant_deletion.logs_deleted",
|
|
tenant_id=tenant_id,
|
|
count=logs_result.rowcount
|
|
)
|
|
|
|
# Step 4: Delete training job queue entries
|
|
logger.info("training.tenant_deletion.deleting_queue", tenant_id=tenant_id)
|
|
queue_result = await self.db.execute(
|
|
delete(TrainingJobQueue).where(
|
|
TrainingJobQueue.tenant_id == tenant_id
|
|
)
|
|
)
|
|
result.deleted_counts["training_job_queue"] = queue_result.rowcount
|
|
logger.info(
|
|
"training.tenant_deletion.queue_deleted",
|
|
tenant_id=tenant_id,
|
|
count=queue_result.rowcount
|
|
)
|
|
|
|
# Step 5: Delete trained models (parent records)
|
|
logger.info("training.tenant_deletion.deleting_models", tenant_id=tenant_id)
|
|
|
|
# Delete physical model files (.pkl) before deleting DB records
|
|
models = await self.db.execute(
|
|
select(TrainedModel).where(TrainedModel.tenant_id == tenant_id)
|
|
)
|
|
deleted_model_files = 0
|
|
failed_model_files = 0
|
|
for model in models.scalars():
|
|
try:
|
|
# Delete .pkl file
|
|
if hasattr(model, 'model_path') and model.model_path and os.path.exists(model.model_path):
|
|
os.remove(model.model_path)
|
|
deleted_model_files += 1
|
|
logger.info("Deleted model file",
|
|
path=model.model_path,
|
|
model_id=model.id)
|
|
# Delete model_file_path if it exists
|
|
if hasattr(model, 'model_file_path') and model.model_file_path and os.path.exists(model.model_file_path):
|
|
os.remove(model.model_file_path)
|
|
deleted_model_files += 1
|
|
logger.info("Deleted model file",
|
|
path=model.model_file_path,
|
|
model_id=model.id)
|
|
# Delete metadata file if exists
|
|
if hasattr(model, 'metadata_path') and model.metadata_path and os.path.exists(model.metadata_path):
|
|
os.remove(model.metadata_path)
|
|
logger.info("Deleted metadata file",
|
|
path=model.metadata_path,
|
|
model_id=model.id)
|
|
except Exception as e:
|
|
failed_model_files += 1
|
|
logger.warning("Failed to delete model file",
|
|
path=getattr(model, 'model_path', getattr(model, 'model_file_path', 'unknown')),
|
|
model_id=model.id if hasattr(model, 'id') else 'unknown',
|
|
error=str(e))
|
|
|
|
logger.info("Model files deletion complete",
|
|
deleted_files=deleted_model_files,
|
|
failed_files=failed_model_files)
|
|
|
|
# Now delete DB records
|
|
models_result = await self.db.execute(
|
|
delete(TrainedModel).where(
|
|
TrainedModel.tenant_id == tenant_id
|
|
)
|
|
)
|
|
result.deleted_counts["trained_models"] = models_result.rowcount
|
|
result.deleted_counts["model_files_deleted"] = deleted_model_files
|
|
result.deleted_counts["model_files_failed"] = failed_model_files
|
|
logger.info(
|
|
"training.tenant_deletion.models_deleted",
|
|
tenant_id=tenant_id,
|
|
count=models_result.rowcount
|
|
)
|
|
|
|
# Step 6: Delete audit logs
|
|
logger.info("training.tenant_deletion.deleting_audit_logs", tenant_id=tenant_id)
|
|
audit_result = await self.db.execute(
|
|
delete(AuditLog).where(
|
|
AuditLog.tenant_id == tenant_id
|
|
)
|
|
)
|
|
result.deleted_counts["audit_logs"] = audit_result.rowcount
|
|
logger.info(
|
|
"training.tenant_deletion.audit_logs_deleted",
|
|
tenant_id=tenant_id,
|
|
count=audit_result.rowcount
|
|
)
|
|
|
|
# Commit the transaction
|
|
await self.db.commit()
|
|
|
|
# Calculate total deleted
|
|
total_deleted = sum(result.deleted_counts.values())
|
|
|
|
logger.info(
|
|
"training.tenant_deletion.completed",
|
|
tenant_id=tenant_id,
|
|
total_deleted=total_deleted,
|
|
breakdown=result.deleted_counts,
|
|
note="Physical model files should be cleaned up separately"
|
|
)
|
|
|
|
result.success = True
|
|
|
|
except Exception as e:
|
|
await self.db.rollback()
|
|
error_msg = f"Failed to delete training data for tenant {tenant_id}: {str(e)}"
|
|
logger.error(
|
|
"training.tenant_deletion.failed",
|
|
tenant_id=tenant_id,
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
result.errors.append(error_msg)
|
|
result.success = False
|
|
|
|
return result
|
|
|
|
|
|
def get_training_tenant_deletion_service(
|
|
db: AsyncSession
|
|
) -> TrainingTenantDeletionService:
|
|
"""
|
|
Factory function to create TrainingTenantDeletionService instance
|
|
|
|
Args:
|
|
db: AsyncSession database session
|
|
|
|
Returns:
|
|
TrainingTenantDeletionService instance
|
|
"""
|
|
return TrainingTenantDeletionService(db)
|