# services/training/app/services/tenant_deletion_service.py """ Tenant Data Deletion Service for Training Service Handles deletion of all training-related data for a tenant """ from typing import Dict from sqlalchemy import select, func, delete from sqlalchemy.ext.asyncio import AsyncSession import structlog from shared.services.tenant_deletion import ( BaseTenantDataDeletionService, TenantDataDeletionResult ) from app.models import ( TrainedModel, ModelTrainingLog, ModelPerformanceMetric, TrainingJobQueue, ModelArtifact, AuditLog ) logger = structlog.get_logger(__name__) class TrainingTenantDeletionService(BaseTenantDataDeletionService): """Service for deleting all training-related data for a tenant""" def __init__(self, db: AsyncSession): self.db = db self.service_name = "training" async def get_tenant_data_preview(self, tenant_id: str) -> Dict[str, int]: """ Get counts of what would be deleted for a tenant (dry-run) Args: tenant_id: The tenant ID to preview deletion for Returns: Dictionary with entity names and their counts """ logger.info("training.tenant_deletion.preview", tenant_id=tenant_id) preview = {} try: # Count trained models model_count = await self.db.scalar( select(func.count(TrainedModel.id)).where( TrainedModel.tenant_id == tenant_id ) ) preview["trained_models"] = model_count or 0 # Count model artifacts artifact_count = await self.db.scalar( select(func.count(ModelArtifact.id)).where( ModelArtifact.tenant_id == tenant_id ) ) preview["model_artifacts"] = artifact_count or 0 # Count training logs log_count = await self.db.scalar( select(func.count(ModelTrainingLog.id)).where( ModelTrainingLog.tenant_id == tenant_id ) ) preview["model_training_logs"] = log_count or 0 # Count performance metrics metric_count = await self.db.scalar( select(func.count(ModelPerformanceMetric.id)).where( ModelPerformanceMetric.tenant_id == tenant_id ) ) preview["model_performance_metrics"] = metric_count or 0 # Count training job queue entries queue_count = await self.db.scalar( select(func.count(TrainingJobQueue.id)).where( TrainingJobQueue.tenant_id == tenant_id ) ) preview["training_job_queue"] = queue_count or 0 # Count audit logs audit_count = await self.db.scalar( select(func.count(AuditLog.id)).where( AuditLog.tenant_id == tenant_id ) ) preview["audit_logs"] = audit_count or 0 logger.info( "training.tenant_deletion.preview_complete", tenant_id=tenant_id, preview=preview ) except Exception as e: logger.error( "training.tenant_deletion.preview_error", tenant_id=tenant_id, error=str(e), exc_info=True ) raise return preview async def delete_tenant_data(self, tenant_id: str) -> TenantDataDeletionResult: """ Permanently delete all training data for a tenant Deletion order: 1. ModelArtifact (references models) 2. ModelPerformanceMetric (references models) 3. ModelTrainingLog (independent job logs) 4. TrainingJobQueue (independent queue entries) 5. TrainedModel (parent model records) 6. AuditLog (independent) Note: This also deletes physical model files from disk/storage Args: tenant_id: The tenant ID to delete data for Returns: TenantDataDeletionResult with deletion counts and any errors """ logger.info("training.tenant_deletion.started", tenant_id=tenant_id) result = TenantDataDeletionResult(tenant_id=tenant_id, service_name=self.service_name) try: import os # Step 1: Delete model artifacts (references models) logger.info("training.tenant_deletion.deleting_artifacts", tenant_id=tenant_id) # Delete physical files from storage before deleting DB records artifacts = await self.db.execute( select(ModelArtifact).where(ModelArtifact.tenant_id == tenant_id) ) deleted_files = 0 failed_files = 0 for artifact in artifacts.scalars(): try: if artifact.file_path and os.path.exists(artifact.file_path): os.remove(artifact.file_path) deleted_files += 1 logger.info("Deleted artifact file", path=artifact.file_path, artifact_id=artifact.id) except Exception as e: failed_files += 1 logger.warning("Failed to delete artifact file", path=artifact.file_path, artifact_id=artifact.id if hasattr(artifact, 'id') else 'unknown', error=str(e)) logger.info("Artifact files deletion complete", deleted_files=deleted_files, failed_files=failed_files) # Now delete DB records artifacts_result = await self.db.execute( delete(ModelArtifact).where( ModelArtifact.tenant_id == tenant_id ) ) result.deleted_counts["model_artifacts"] = artifacts_result.rowcount result.deleted_counts["artifact_files_deleted"] = deleted_files result.deleted_counts["artifact_files_failed"] = failed_files logger.info( "training.tenant_deletion.artifacts_deleted", tenant_id=tenant_id, count=artifacts_result.rowcount ) # Step 2: Delete model performance metrics logger.info("training.tenant_deletion.deleting_metrics", tenant_id=tenant_id) metrics_result = await self.db.execute( delete(ModelPerformanceMetric).where( ModelPerformanceMetric.tenant_id == tenant_id ) ) result.deleted_counts["model_performance_metrics"] = metrics_result.rowcount logger.info( "training.tenant_deletion.metrics_deleted", tenant_id=tenant_id, count=metrics_result.rowcount ) # Step 3: Delete training logs logger.info("training.tenant_deletion.deleting_logs", tenant_id=tenant_id) logs_result = await self.db.execute( delete(ModelTrainingLog).where( ModelTrainingLog.tenant_id == tenant_id ) ) result.deleted_counts["model_training_logs"] = logs_result.rowcount logger.info( "training.tenant_deletion.logs_deleted", tenant_id=tenant_id, count=logs_result.rowcount ) # Step 4: Delete training job queue entries logger.info("training.tenant_deletion.deleting_queue", tenant_id=tenant_id) queue_result = await self.db.execute( delete(TrainingJobQueue).where( TrainingJobQueue.tenant_id == tenant_id ) ) result.deleted_counts["training_job_queue"] = queue_result.rowcount logger.info( "training.tenant_deletion.queue_deleted", tenant_id=tenant_id, count=queue_result.rowcount ) # Step 5: Delete trained models (parent records) logger.info("training.tenant_deletion.deleting_models", tenant_id=tenant_id) # Delete physical model files (.pkl) before deleting DB records models = await self.db.execute( select(TrainedModel).where(TrainedModel.tenant_id == tenant_id) ) deleted_model_files = 0 failed_model_files = 0 for model in models.scalars(): try: # Delete .pkl file if hasattr(model, 'model_path') and model.model_path and os.path.exists(model.model_path): os.remove(model.model_path) deleted_model_files += 1 logger.info("Deleted model file", path=model.model_path, model_id=model.id) # Delete model_file_path if it exists if hasattr(model, 'model_file_path') and model.model_file_path and os.path.exists(model.model_file_path): os.remove(model.model_file_path) deleted_model_files += 1 logger.info("Deleted model file", path=model.model_file_path, model_id=model.id) # Delete metadata file if exists if hasattr(model, 'metadata_path') and model.metadata_path and os.path.exists(model.metadata_path): os.remove(model.metadata_path) logger.info("Deleted metadata file", path=model.metadata_path, model_id=model.id) except Exception as e: failed_model_files += 1 logger.warning("Failed to delete model file", path=getattr(model, 'model_path', getattr(model, 'model_file_path', 'unknown')), model_id=model.id if hasattr(model, 'id') else 'unknown', error=str(e)) logger.info("Model files deletion complete", deleted_files=deleted_model_files, failed_files=failed_model_files) # Now delete DB records models_result = await self.db.execute( delete(TrainedModel).where( TrainedModel.tenant_id == tenant_id ) ) result.deleted_counts["trained_models"] = models_result.rowcount result.deleted_counts["model_files_deleted"] = deleted_model_files result.deleted_counts["model_files_failed"] = failed_model_files logger.info( "training.tenant_deletion.models_deleted", tenant_id=tenant_id, count=models_result.rowcount ) # Step 6: Delete audit logs logger.info("training.tenant_deletion.deleting_audit_logs", tenant_id=tenant_id) audit_result = await self.db.execute( delete(AuditLog).where( AuditLog.tenant_id == tenant_id ) ) result.deleted_counts["audit_logs"] = audit_result.rowcount logger.info( "training.tenant_deletion.audit_logs_deleted", tenant_id=tenant_id, count=audit_result.rowcount ) # Commit the transaction await self.db.commit() # Calculate total deleted total_deleted = sum(result.deleted_counts.values()) logger.info( "training.tenant_deletion.completed", tenant_id=tenant_id, total_deleted=total_deleted, breakdown=result.deleted_counts, note="Physical model files should be cleaned up separately" ) result.success = True except Exception as e: await self.db.rollback() error_msg = f"Failed to delete training data for tenant {tenant_id}: {str(e)}" logger.error( "training.tenant_deletion.failed", tenant_id=tenant_id, error=str(e), exc_info=True ) result.errors.append(error_msg) result.success = False return result def get_training_tenant_deletion_service( db: AsyncSession ) -> TrainingTenantDeletionService: """ Factory function to create TrainingTenantDeletionService instance Args: db: AsyncSession database session Returns: TrainingTenantDeletionService instance """ return TrainingTenantDeletionService(db)