diff --git a/services/training/app/services/training_service.py b/services/training/app/services/training_service.py index e5d0b2a0..156aa682 100644 --- a/services/training/app/services/training_service.py +++ b/services/training/app/services/training_service.py @@ -236,6 +236,11 @@ class EnhancedTrainingService: job_id, PROGRESS_ML_TRAINING_START, "ml_training", "running" ) + # ✅ FIX: Commit the session to prevent deadlock with trainer's nested session + # The trainer creates its own session, so we need to ensure this update is committed + await session.commit() + logger.debug("Committed session after ml_training progress update") + training_results = await self.trainer.train_tenant_models( tenant_id=tenant_id, training_dataset=training_dataset,