Improve the UI and training
This commit is contained in:
@@ -158,6 +158,56 @@ async def start_training_job(
|
||||
# Continue with job creation but log the error
|
||||
|
||||
try:
|
||||
# CRITICAL FIX: Check for existing running jobs before starting new one
|
||||
# This prevents duplicate tenant-level training jobs
|
||||
async with enhanced_training_service.database_manager.get_session() as check_session:
|
||||
from app.repositories.training_log_repository import TrainingLogRepository
|
||||
log_repo = TrainingLogRepository(check_session)
|
||||
|
||||
# Check for active jobs (running or pending)
|
||||
active_jobs = await log_repo.get_active_jobs(tenant_id=tenant_id)
|
||||
pending_jobs = await log_repo.get_logs_by_tenant(
|
||||
tenant_id=tenant_id,
|
||||
status="pending",
|
||||
limit=10
|
||||
)
|
||||
|
||||
all_active = active_jobs + pending_jobs
|
||||
|
||||
if all_active:
|
||||
# Training job already in progress, return existing job info
|
||||
existing_job = all_active[0]
|
||||
logger.info("Training job already in progress, returning existing job",
|
||||
existing_job_id=existing_job.job_id,
|
||||
tenant_id=tenant_id,
|
||||
status=existing_job.status)
|
||||
|
||||
return TrainingJobResponse(
|
||||
job_id=existing_job.job_id,
|
||||
tenant_id=tenant_id,
|
||||
status=existing_job.status,
|
||||
message=f"Training job already in progress (started {existing_job.created_at.isoformat() if existing_job.created_at else 'recently'})",
|
||||
created_at=existing_job.created_at or datetime.now(timezone.utc),
|
||||
estimated_duration_minutes=existing_job.config.get("estimated_duration_minutes", 15) if existing_job.config else 15,
|
||||
training_results={
|
||||
"total_products": 0,
|
||||
"successful_trainings": 0,
|
||||
"failed_trainings": 0,
|
||||
"products": [],
|
||||
"overall_training_time_seconds": 0.0
|
||||
},
|
||||
data_summary=None,
|
||||
completed_at=None,
|
||||
error_details=None,
|
||||
processing_metadata={
|
||||
"background_task": True,
|
||||
"async_execution": True,
|
||||
"existing_job": True,
|
||||
"deduplication": True
|
||||
}
|
||||
)
|
||||
|
||||
# No existing job, proceed with creating new one
|
||||
# Generate enhanced job ID
|
||||
job_id = f"enhanced_training_{tenant_id}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
@@ -407,6 +457,7 @@ async def start_single_product_training(
|
||||
request: SingleProductTrainingRequest,
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
inventory_product_id: str = Path(..., description="Inventory product UUID"),
|
||||
background_tasks: BackgroundTasks = BackgroundTasks(),
|
||||
request_obj: Request = None,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
||||
@@ -421,6 +472,7 @@ async def start_single_product_training(
|
||||
- Enhanced error handling and validation
|
||||
- Metrics tracking
|
||||
- Transactional operations
|
||||
- Background execution to prevent blocking
|
||||
"""
|
||||
metrics = get_metrics_collector(request_obj)
|
||||
|
||||
@@ -429,6 +481,53 @@ async def start_single_product_training(
|
||||
inventory_product_id=inventory_product_id,
|
||||
tenant_id=tenant_id)
|
||||
|
||||
# CRITICAL FIX: Check if this product is currently being trained
|
||||
# This prevents duplicate training from rapid-click scenarios
|
||||
async with enhanced_training_service.database_manager.get_session() as check_session:
|
||||
from app.repositories.training_log_repository import TrainingLogRepository
|
||||
log_repo = TrainingLogRepository(check_session)
|
||||
|
||||
# Check for active jobs for this specific product
|
||||
active_jobs = await log_repo.get_active_jobs(tenant_id=tenant_id)
|
||||
pending_jobs = await log_repo.get_logs_by_tenant(
|
||||
tenant_id=tenant_id,
|
||||
status="pending",
|
||||
limit=20
|
||||
)
|
||||
|
||||
all_active = active_jobs + pending_jobs
|
||||
|
||||
# Filter for jobs that include this specific product
|
||||
product_jobs = [
|
||||
job for job in all_active
|
||||
if job.config and (
|
||||
# Single product job for this product
|
||||
job.config.get("product_id") == inventory_product_id or
|
||||
# Tenant-wide job that would include this product
|
||||
job.config.get("job_type") == "tenant_training"
|
||||
)
|
||||
]
|
||||
|
||||
if product_jobs:
|
||||
existing_job = product_jobs[0]
|
||||
logger.warning("Product training already in progress, rejecting duplicate request",
|
||||
existing_job_id=existing_job.job_id,
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
status=existing_job.status)
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail={
|
||||
"error": "Product training already in progress",
|
||||
"message": f"Product {inventory_product_id} is currently being trained in job {existing_job.job_id}",
|
||||
"existing_job_id": existing_job.job_id,
|
||||
"status": existing_job.status,
|
||||
"started_at": existing_job.created_at.isoformat() if existing_job.created_at else None
|
||||
}
|
||||
)
|
||||
|
||||
# No existing job, proceed with training
|
||||
# Record metrics
|
||||
if metrics:
|
||||
metrics.increment_counter("enhanced_single_product_training_total")
|
||||
@@ -436,22 +535,60 @@ async def start_single_product_training(
|
||||
# Generate enhanced job ID
|
||||
job_id = f"enhanced_single_{tenant_id}_{inventory_product_id}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# Delegate to enhanced training service
|
||||
result = await enhanced_training_service.start_single_product_training(
|
||||
# CRITICAL FIX: Add initial training log entry
|
||||
await enhanced_training_service._update_job_status_repository(
|
||||
job_id=job_id,
|
||||
status="pending",
|
||||
progress=0,
|
||||
current_step="Initializing single product training",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Add enhanced background task for single product training
|
||||
background_tasks.add_task(
|
||||
execute_single_product_training_background,
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
job_id=job_id,
|
||||
bakery_location=request.bakery_location or (40.4168, -3.7038)
|
||||
bakery_location=request.bakery_location or (40.4168, -3.7038),
|
||||
database_manager=enhanced_training_service.database_manager
|
||||
)
|
||||
|
||||
if metrics:
|
||||
metrics.increment_counter("enhanced_single_product_training_success_total")
|
||||
# Return immediate response with job info
|
||||
response_data = {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"status": "pending",
|
||||
"message": "Enhanced single product training started successfully",
|
||||
"created_at": datetime.now(timezone.utc),
|
||||
"estimated_duration_minutes": 15, # Default estimate for single product
|
||||
"training_results": {
|
||||
"total_products": 1,
|
||||
"successful_trainings": 0,
|
||||
"failed_trainings": 0,
|
||||
"products": [],
|
||||
"overall_training_time_seconds": 0.0
|
||||
},
|
||||
"data_summary": None,
|
||||
"completed_at": None,
|
||||
"error_details": None,
|
||||
"processing_metadata": {
|
||||
"background_task": True,
|
||||
"async_execution": True,
|
||||
"enhanced_features": True,
|
||||
"repository_pattern": True,
|
||||
"dependency_injection": True
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("Enhanced single product training completed",
|
||||
logger.info("Enhanced single product training queued successfully",
|
||||
inventory_product_id=inventory_product_id,
|
||||
job_id=job_id)
|
||||
|
||||
return TrainingJobResponse(**result)
|
||||
if metrics:
|
||||
metrics.increment_counter("enhanced_single_product_training_queued_total")
|
||||
|
||||
return TrainingJobResponse(**response_data)
|
||||
|
||||
except ValueError as e:
|
||||
if metrics:
|
||||
@@ -475,6 +612,74 @@ async def start_single_product_training(
|
||||
)
|
||||
|
||||
|
||||
async def execute_single_product_training_background(
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
job_id: str,
|
||||
bakery_location: tuple,
|
||||
database_manager
|
||||
):
|
||||
"""
|
||||
Enhanced background task that executes single product training using repository pattern.
|
||||
Uses a separate service instance to avoid session conflicts.
|
||||
"""
|
||||
logger.info("Enhanced background single product training started",
|
||||
job_id=job_id,
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id)
|
||||
|
||||
# Create a new service instance with a fresh database session to avoid conflicts
|
||||
from app.services.training_service import EnhancedTrainingService
|
||||
fresh_training_service = EnhancedTrainingService(database_manager)
|
||||
|
||||
try:
|
||||
# Update job status to running
|
||||
await fresh_training_service._update_job_status_repository(
|
||||
job_id=job_id,
|
||||
status="running",
|
||||
progress=0,
|
||||
current_step="Starting single product training",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Execute the enhanced single product training with repository pattern
|
||||
result = await fresh_training_service.start_single_product_training(
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
job_id=job_id,
|
||||
bakery_location=bakery_location
|
||||
)
|
||||
|
||||
logger.info("Enhanced background single product training completed successfully",
|
||||
job_id=job_id,
|
||||
inventory_product_id=inventory_product_id)
|
||||
|
||||
except Exception as training_error:
|
||||
logger.error("Enhanced single product training failed",
|
||||
job_id=job_id,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(training_error))
|
||||
|
||||
try:
|
||||
await fresh_training_service._update_job_status_repository(
|
||||
job_id=job_id,
|
||||
status="failed",
|
||||
progress=0,
|
||||
current_step="Single product training failed",
|
||||
error_message=str(training_error),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
except Exception as status_error:
|
||||
logger.error("Failed to update job status after training error",
|
||||
job_id=job_id,
|
||||
status_error=str(status_error))
|
||||
|
||||
finally:
|
||||
logger.info("Enhanced background single product training cleanup completed",
|
||||
job_id=job_id,
|
||||
inventory_product_id=inventory_product_id)
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint for the training operations"""
|
||||
|
||||
Reference in New Issue
Block a user