bakery-ia/services/training/app/repositories/job_queue_repository.py

"""
Job Queue Repository
Repository for training job queue operations
"""

from typing import Optional, List, Dict, Any
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, and_, text, desc
from datetime import datetime, timedelta
import structlog

from .base import TrainingBaseRepository
from app.models.training import TrainingJobQueue
from shared.database.exceptions import DatabaseError, ValidationError

logger = structlog.get_logger()


class JobQueueRepository(TrainingBaseRepository):
    """Repository for training job queue operations"""
    
    def __init__(self, session: AsyncSession, cache_ttl: Optional[int] = 60):
        # Job queue changes frequently, very short cache time (1 minute)
        super().__init__(TrainingJobQueue, session, cache_ttl)
    
    async def enqueue_job(self, job_data: Dict[str, Any]) -> TrainingJobQueue:
        """Add a job to the training queue"""
        try:
            # Validate job data
            validation_result = self._validate_training_data(
                job_data, 
                ["job_id", "tenant_id", "job_type"]
            )
            
            if not validation_result["is_valid"]:
                raise ValidationError(f"Invalid job data: {validation_result['errors']}")
            
            # Set default values
            if "priority" not in job_data:
                job_data["priority"] = 1
            if "status" not in job_data:
                job_data["status"] = "queued"
            if "max_retries" not in job_data:
                job_data["max_retries"] = 3
            
            # Create queue entry
            queued_job = await self.create(job_data)
            
            logger.info("Job enqueued",
                       job_id=queued_job.job_id,
                       tenant_id=queued_job.tenant_id,
                       job_type=queued_job.job_type,
                       priority=queued_job.priority)
            
            return queued_job
            
        except ValidationError:
            raise
        except Exception as e:
            logger.error("Failed to enqueue job",
                        job_id=job_data.get("job_id"),
                        error=str(e))
            raise DatabaseError(f"Failed to enqueue job: {str(e)}")
    
    async def get_next_job(self, job_types: List[str] = None) -> Optional[TrainingJobQueue]:
        """Get the next job to process from the queue"""
        try:
            # Build filters for job types if specified
            filters = {"status": "queued"}
            
            if job_types:
                # For multiple job types, we need to use raw SQL
                job_types_str = "', '".join(job_types)
                query_text = f"""
                    SELECT * FROM training_job_queue
                    WHERE status = 'queued'
                    AND job_type IN ('{job_types_str}')
                    AND (scheduled_at IS NULL OR scheduled_at <= :now)
                    ORDER BY priority DESC, created_at ASC
                    LIMIT 1
                """
                
                result = await self.session.execute(text(query_text), {"now": datetime.now()})
                row = result.fetchone()
                
                if row:
                    record_dict = dict(row._mapping)
                    return self.model(**record_dict)
                return None
            else:
                # Simple case - get any queued job
                jobs = await self.get_multi(
                    filters=filters,
                    limit=1,
                    order_by="priority",
                    order_desc=True
                )
                return jobs[0] if jobs else None
            
        except Exception as e:
            logger.error("Failed to get next job from queue",
                        job_types=job_types,
                        error=str(e))
            raise DatabaseError(f"Failed to get next job: {str(e)}")
    
    async def start_job(self, job_id: str) -> Optional[TrainingJobQueue]:
        """Mark a job as started"""
        try:
            job = await self.get_by_job_id(job_id)
            if not job:
                logger.error(f"Job not found in queue: {job_id}")
                return None
            
            if job.status != "queued":
                logger.warning(f"Job {job_id} is not queued (status: {job.status})")
                return job
            
            updated_job = await self.update(job.id, {
                "status": "running",
                "started_at": datetime.now(),
                "updated_at": datetime.now()
            })
            
            logger.info("Job started",
                       job_id=job_id,
                       job_type=job.job_type)
            
            return updated_job
            
        except Exception as e:
            logger.error("Failed to start job",
                        job_id=job_id,
                        error=str(e))
            raise DatabaseError(f"Failed to start job: {str(e)}")
    
    async def complete_job(self, job_id: str) -> Optional[TrainingJobQueue]:
        """Mark a job as completed"""
        try:
            job = await self.get_by_job_id(job_id)
            if not job:
                logger.error(f"Job not found in queue: {job_id}")
                return None
            
            updated_job = await self.update(job.id, {
                "status": "completed",
                "updated_at": datetime.now()
            })
            
            logger.info("Job completed",
                       job_id=job_id,
                       job_type=job.job_type if job else "unknown")
            
            return updated_job
            
        except Exception as e:
            logger.error("Failed to complete job",
                        job_id=job_id,
                        error=str(e))
            raise DatabaseError(f"Failed to complete job: {str(e)}")
    
    async def fail_job(self, job_id: str, error_message: str = None) -> Optional[TrainingJobQueue]:
        """Mark a job as failed and handle retries"""
        try:
            job = await self.get_by_job_id(job_id)
            if not job:
                logger.error(f"Job not found in queue: {job_id}")
                return None
            
            # Increment retry count
            new_retry_count = job.retry_count + 1
            
            # Check if we should retry
            if new_retry_count < job.max_retries:
                # Reset to queued for retry
                updated_job = await self.update(job.id, {
                    "status": "queued",
                    "retry_count": new_retry_count,
                    "updated_at": datetime.now(),
                    "started_at": None  # Reset started_at for retry
                })
                
                logger.info("Job failed, queued for retry",
                           job_id=job_id,
                           retry_count=new_retry_count,
                           max_retries=job.max_retries)
            else:
                # Mark as permanently failed
                updated_job = await self.update(job.id, {
                    "status": "failed",
                    "retry_count": new_retry_count,
                    "updated_at": datetime.now()
                })
                
                logger.error("Job permanently failed",
                           job_id=job_id,
                           retry_count=new_retry_count,
                           error_message=error_message)
            
            return updated_job
            
        except Exception as e:
            logger.error("Failed to handle job failure",
                        job_id=job_id,
                        error=str(e))
            raise DatabaseError(f"Failed to handle job failure: {str(e)}")
    
    async def cancel_job(self, job_id: str, cancelled_by: str = None) -> Optional[TrainingJobQueue]:
        """Cancel a job"""
        try:
            job = await self.get_by_job_id(job_id)
            if not job:
                logger.error(f"Job not found in queue: {job_id}")
                return None
            
            if job.status in ["completed", "failed"]:
                logger.warning(f"Cannot cancel job {job_id} with status {job.status}")
                return job
            
            updated_job = await self.update(job.id, {
                "status": "cancelled",
                "cancelled_by": cancelled_by,
                "updated_at": datetime.now()
            })
            
            logger.info("Job cancelled",
                       job_id=job_id,
                       cancelled_by=cancelled_by)
            
            return updated_job
            
        except Exception as e:
            logger.error("Failed to cancel job",
                        job_id=job_id,
                        error=str(e))
            raise DatabaseError(f"Failed to cancel job: {str(e)}")
    
    async def get_queue_status(self, tenant_id: str = None) -> Dict[str, Any]:
        """Get queue status and statistics"""
        try:
            base_filters = {}
            if tenant_id:
                base_filters["tenant_id"] = tenant_id
            
            # Get counts by status
            queued_jobs = await self.count(filters={**base_filters, "status": "queued"})
            running_jobs = await self.count(filters={**base_filters, "status": "running"})
            completed_jobs = await self.count(filters={**base_filters, "status": "completed"})
            failed_jobs = await self.count(filters={**base_filters, "status": "failed"})
            cancelled_jobs = await self.count(filters={**base_filters, "status": "cancelled"})
            
            # Get jobs by type
            type_query = text(f"""
                SELECT job_type, COUNT(*) as count 
                FROM training_job_queue 
                WHERE 1=1
                {' AND tenant_id = :tenant_id' if tenant_id else ''}
                GROUP BY job_type
                ORDER BY count DESC
            """)
            
            params = {"tenant_id": tenant_id} if tenant_id else {}
            result = await self.session.execute(type_query, params)
            jobs_by_type = {row.job_type: row.count for row in result.fetchall()}
            
            # Get average wait time for completed jobs
            wait_time_query = text(f"""
                SELECT 
                    AVG(EXTRACT(EPOCH FROM (started_at - created_at))/60) as avg_wait_minutes
                FROM training_job_queue
                WHERE status = 'completed'
                AND started_at IS NOT NULL
                AND created_at IS NOT NULL
                {' AND tenant_id = :tenant_id' if tenant_id else ''}
            """)
            
            wait_result = await self.session.execute(wait_time_query, params)
            wait_row = wait_result.fetchone()
            avg_wait_time = float(wait_row.avg_wait_minutes) if wait_row and wait_row.avg_wait_minutes else 0.0
            
            return {
                "tenant_id": tenant_id,
                "queue_counts": {
                    "queued": queued_jobs,
                    "running": running_jobs,
                    "completed": completed_jobs,
                    "failed": failed_jobs,
                    "cancelled": cancelled_jobs,
                    "total": queued_jobs + running_jobs + completed_jobs + failed_jobs + cancelled_jobs
                },
                "jobs_by_type": jobs_by_type,
                "avg_wait_time_minutes": round(avg_wait_time, 2),
                "queue_health": {
                    "has_queued_jobs": queued_jobs > 0,
                    "has_running_jobs": running_jobs > 0,
                    "failure_rate": round((failed_jobs / max(completed_jobs + failed_jobs, 1)) * 100, 2)
                }
            }
            
        except Exception as e:
            logger.error("Failed to get queue status",
                        tenant_id=tenant_id,
                        error=str(e))
            return {
                "tenant_id": tenant_id,
                "queue_counts": {
                    "queued": 0, "running": 0, "completed": 0,
                    "failed": 0, "cancelled": 0, "total": 0
                },
                "jobs_by_type": {},
                "avg_wait_time_minutes": 0.0,
                "queue_health": {
                    "has_queued_jobs": False,
                    "has_running_jobs": False,
                    "failure_rate": 0.0
                }
            }
    
    async def get_jobs_by_tenant(
        self,
        tenant_id: str,
        status: str = None,
        job_type: str = None,
        skip: int = 0,
        limit: int = 100
    ) -> List[TrainingJobQueue]:
        """Get jobs for a tenant with optional filtering"""
        try:
            filters = {"tenant_id": tenant_id}
            if status:
                filters["status"] = status
            if job_type:
                filters["job_type"] = job_type
            
            return await self.get_multi(
                filters=filters,
                skip=skip,
                limit=limit,
                order_by="created_at",
                order_desc=True
            )
            
        except Exception as e:
            logger.error("Failed to get jobs by tenant",
                        tenant_id=tenant_id,
                        error=str(e))
            raise DatabaseError(f"Failed to get tenant jobs: {str(e)}")
    
    async def cleanup_old_jobs(self, days_old: int = 30, status_filter: str = None) -> int:
        """Clean up old completed/failed/cancelled jobs"""
        try:
            cutoff_date = datetime.now() - timedelta(days=days_old)
            
            # Only clean up finished jobs by default
            default_statuses = ["completed", "failed", "cancelled"]
            
            if status_filter:
                status_condition = "status = :status"
                params = {"cutoff_date": cutoff_date, "status": status_filter}
            else:
                status_list = "', '".join(default_statuses)
                status_condition = f"status IN ('{status_list}')"
                params = {"cutoff_date": cutoff_date}
            
            query_text = f"""
                DELETE FROM training_job_queue 
                WHERE created_at < :cutoff_date
                AND {status_condition}
            """
            
            result = await self.session.execute(text(query_text), params)
            deleted_count = result.rowcount
            
            logger.info("Cleaned up old queue jobs",
                       deleted_count=deleted_count,
                       days_old=days_old,
                       status_filter=status_filter)
            
            return deleted_count
            
        except Exception as e:
            logger.error("Failed to cleanup old queue jobs",
                        error=str(e))
            raise DatabaseError(f"Queue cleanup failed: {str(e)}")
    
    async def get_stuck_jobs(self, hours_stuck: int = 2) -> List[TrainingJobQueue]:
        """Get jobs that have been running for too long"""
        try:
            cutoff_time = datetime.now() - timedelta(hours=hours_stuck)
            
            query_text = """
                SELECT * FROM training_job_queue
                WHERE status = 'running'
                AND started_at IS NOT NULL
                AND started_at < :cutoff_time
                ORDER BY started_at ASC
            """
            
            result = await self.session.execute(text(query_text), {"cutoff_time": cutoff_time})
            
            stuck_jobs = []
            for row in result.fetchall():
                record_dict = dict(row._mapping)
                job = self.model(**record_dict)
                stuck_jobs.append(job)
            
            if stuck_jobs:
                logger.warning("Found stuck jobs",
                              count=len(stuck_jobs),
                              hours_stuck=hours_stuck)
            
            return stuck_jobs
            
        except Exception as e:
            logger.error("Failed to get stuck jobs",
                        hours_stuck=hours_stuck,
                        error=str(e))
            return []
    
    async def reset_stuck_jobs(self, hours_stuck: int = 2) -> int:
        """Reset stuck jobs back to queued status"""
        try:
            stuck_jobs = await self.get_stuck_jobs(hours_stuck)
            reset_count = 0
            
            for job in stuck_jobs:
                # Reset job to queued status
                await self.update(job.id, {
                    "status": "queued",
                    "started_at": None,
                    "updated_at": datetime.now()
                })
                reset_count += 1
            
            if reset_count > 0:
                logger.info("Reset stuck jobs",
                           reset_count=reset_count,
                           hours_stuck=hours_stuck)
            
            return reset_count
            
        except Exception as e:
            logger.error("Failed to reset stuck jobs",
                        hours_stuck=hours_stuck,
                        error=str(e))
            raise DatabaseError(f"Failed to reset stuck jobs: {str(e)}")
REFACTOR - Database logic 2025-08-08 09:08:41 +02:00			`"""`
			`Job Queue Repository`
			`Repository for training job queue operations`
			`"""`

			`from typing import Optional, List, Dict, Any`
			`from sqlalchemy.ext.asyncio import AsyncSession`
			`from sqlalchemy import select, and_, text, desc`
			`from datetime import datetime, timedelta`
			`import structlog`

			`from .base import TrainingBaseRepository`
			`from app.models.training import TrainingJobQueue`
			`from shared.database.exceptions import DatabaseError, ValidationError`

			`logger = structlog.get_logger()`


			`class JobQueueRepository(TrainingBaseRepository):`
			`"""Repository for training job queue operations"""`

			`def __init__(self, session: AsyncSession, cache_ttl: Optional[int] = 60):`
			`# Job queue changes frequently, very short cache time (1 minute)`
			`super().__init__(TrainingJobQueue, session, cache_ttl)`

			`async def enqueue_job(self, job_data: Dict[str, Any]) -> TrainingJobQueue:`
			`"""Add a job to the training queue"""`
			`try:`
			`# Validate job data`
			`validation_result = self._validate_training_data(`
			`job_data,`
			`["job_id", "tenant_id", "job_type"]`
			`)`

			`if not validation_result["is_valid"]:`
			`raise ValidationError(f"Invalid job data: {validation_result['errors']}")`

			`# Set default values`
			`if "priority" not in job_data:`
			`job_data["priority"] = 1`
			`if "status" not in job_data:`
			`job_data["status"] = "queued"`
			`if "max_retries" not in job_data:`
			`job_data["max_retries"] = 3`

			`# Create queue entry`
			`queued_job = await self.create(job_data)`

			`logger.info("Job enqueued",`
			`job_id=queued_job.job_id,`
			`tenant_id=queued_job.tenant_id,`
			`job_type=queued_job.job_type,`
			`priority=queued_job.priority)`

			`return queued_job`

			`except ValidationError:`
			`raise`
			`except Exception as e:`
			`logger.error("Failed to enqueue job",`
			`job_id=job_data.get("job_id"),`
			`error=str(e))`
			`raise DatabaseError(f"Failed to enqueue job: {str(e)}")`

			`async def get_next_job(self, job_types: List[str] = None) -> Optional[TrainingJobQueue]:`
			`"""Get the next job to process from the queue"""`
			`try:`
			`# Build filters for job types if specified`
			`filters = {"status": "queued"}`

			`if job_types:`
			`# For multiple job types, we need to use raw SQL`
			`job_types_str = "', '".join(job_types)`
			`query_text = f"""`
			`SELECT * FROM training_job_queue`
			`WHERE status = 'queued'`
			`AND job_type IN ('{job_types_str}')`
			`AND (scheduled_at IS NULL OR scheduled_at <= :now)`
			`ORDER BY priority DESC, created_at ASC`
			`LIMIT 1`
			`"""`

			`result = await self.session.execute(text(query_text), {"now": datetime.now()})`
			`row = result.fetchone()`

			`if row:`
			`record_dict = dict(row._mapping)`
			`return self.model(**record_dict)`
			`return None`
			`else:`
			`# Simple case - get any queued job`
			`jobs = await self.get_multi(`
			`filters=filters,`
			`limit=1,`
			`order_by="priority",`
			`order_desc=True`
			`)`
			`return jobs[0] if jobs else None`

			`except Exception as e:`
			`logger.error("Failed to get next job from queue",`
			`job_types=job_types,`
			`error=str(e))`
			`raise DatabaseError(f"Failed to get next job: {str(e)}")`

			`async def start_job(self, job_id: str) -> Optional[TrainingJobQueue]:`
			`"""Mark a job as started"""`
			`try:`
			`job = await self.get_by_job_id(job_id)`
			`if not job:`
			`logger.error(f"Job not found in queue: {job_id}")`
			`return None`

			`if job.status != "queued":`
			`logger.warning(f"Job {job_id} is not queued (status: {job.status})")`
			`return job`

			`updated_job = await self.update(job.id, {`
			`"status": "running",`
			`"started_at": datetime.now(),`
			`"updated_at": datetime.now()`
			`})`

			`logger.info("Job started",`
			`job_id=job_id,`
			`job_type=job.job_type)`

			`return updated_job`

			`except Exception as e:`
			`logger.error("Failed to start job",`
			`job_id=job_id,`
			`error=str(e))`
			`raise DatabaseError(f"Failed to start job: {str(e)}")`

			`async def complete_job(self, job_id: str) -> Optional[TrainingJobQueue]:`
			`"""Mark a job as completed"""`
			`try:`
			`job = await self.get_by_job_id(job_id)`
			`if not job:`
			`logger.error(f"Job not found in queue: {job_id}")`
			`return None`

			`updated_job = await self.update(job.id, {`
			`"status": "completed",`
			`"updated_at": datetime.now()`
			`})`

			`logger.info("Job completed",`
			`job_id=job_id,`
			`job_type=job.job_type if job else "unknown")`

			`return updated_job`

			`except Exception as e:`
			`logger.error("Failed to complete job",`
			`job_id=job_id,`
			`error=str(e))`
			`raise DatabaseError(f"Failed to complete job: {str(e)}")`

			`async def fail_job(self, job_id: str, error_message: str = None) -> Optional[TrainingJobQueue]:`
			`"""Mark a job as failed and handle retries"""`
			`try:`
			`job = await self.get_by_job_id(job_id)`
			`if not job:`
			`logger.error(f"Job not found in queue: {job_id}")`
			`return None`

			`# Increment retry count`
			`new_retry_count = job.retry_count + 1`

			`# Check if we should retry`
			`if new_retry_count < job.max_retries:`
			`# Reset to queued for retry`
			`updated_job = await self.update(job.id, {`
			`"status": "queued",`
			`"retry_count": new_retry_count,`
			`"updated_at": datetime.now(),`
			`"started_at": None # Reset started_at for retry`
			`})`

			`logger.info("Job failed, queued for retry",`
			`job_id=job_id,`
			`retry_count=new_retry_count,`
			`max_retries=job.max_retries)`
			`else:`
			`# Mark as permanently failed`
			`updated_job = await self.update(job.id, {`
			`"status": "failed",`
			`"retry_count": new_retry_count,`
			`"updated_at": datetime.now()`
			`})`

			`logger.error("Job permanently failed",`
			`job_id=job_id,`
			`retry_count=new_retry_count,`
			`error_message=error_message)`

			`return updated_job`

			`except Exception as e:`
			`logger.error("Failed to handle job failure",`
			`job_id=job_id,`
			`error=str(e))`
			`raise DatabaseError(f"Failed to handle job failure: {str(e)}")`

			`async def cancel_job(self, job_id: str, cancelled_by: str = None) -> Optional[TrainingJobQueue]:`
			`"""Cancel a job"""`
			`try:`
			`job = await self.get_by_job_id(job_id)`
			`if not job:`
			`logger.error(f"Job not found in queue: {job_id}")`
			`return None`

			`if job.status in ["completed", "failed"]:`
			`logger.warning(f"Cannot cancel job {job_id} with status {job.status}")`
			`return job`

			`updated_job = await self.update(job.id, {`
			`"status": "cancelled",`
			`"cancelled_by": cancelled_by,`
			`"updated_at": datetime.now()`
			`})`

			`logger.info("Job cancelled",`
			`job_id=job_id,`
			`cancelled_by=cancelled_by)`

			`return updated_job`

			`except Exception as e:`
			`logger.error("Failed to cancel job",`
			`job_id=job_id,`
			`error=str(e))`
			`raise DatabaseError(f"Failed to cancel job: {str(e)}")`

			`async def get_queue_status(self, tenant_id: str = None) -> Dict[str, Any]:`
			`"""Get queue status and statistics"""`
			`try:`
			`base_filters = {}`
			`if tenant_id:`
			`base_filters["tenant_id"] = tenant_id`

			`# Get counts by status`
			`queued_jobs = await self.count(filters={**base_filters, "status": "queued"})`
			`running_jobs = await self.count(filters={**base_filters, "status": "running"})`
			`completed_jobs = await self.count(filters={**base_filters, "status": "completed"})`
			`failed_jobs = await self.count(filters={**base_filters, "status": "failed"})`
			`cancelled_jobs = await self.count(filters={**base_filters, "status": "cancelled"})`

			`# Get jobs by type`
			`type_query = text(f"""`
			`SELECT job_type, COUNT(*) as count`
			`FROM training_job_queue`
			`WHERE 1=1`
			`{' AND tenant_id = :tenant_id' if tenant_id else ''}`
			`GROUP BY job_type`
			`ORDER BY count DESC`
			`""")`

			`params = {"tenant_id": tenant_id} if tenant_id else {}`
			`result = await self.session.execute(type_query, params)`
			`jobs_by_type = {row.job_type: row.count for row in result.fetchall()}`

			`# Get average wait time for completed jobs`
			`wait_time_query = text(f"""`
			`SELECT`
			`AVG(EXTRACT(EPOCH FROM (started_at - created_at))/60) as avg_wait_minutes`
			`FROM training_job_queue`
			`WHERE status = 'completed'`
			`AND started_at IS NOT NULL`
			`AND created_at IS NOT NULL`
			`{' AND tenant_id = :tenant_id' if tenant_id else ''}`
			`""")`

			`wait_result = await self.session.execute(wait_time_query, params)`
			`wait_row = wait_result.fetchone()`
			`avg_wait_time = float(wait_row.avg_wait_minutes) if wait_row and wait_row.avg_wait_minutes else 0.0`

			`return {`
			`"tenant_id": tenant_id,`
			`"queue_counts": {`
			`"queued": queued_jobs,`
			`"running": running_jobs,`
			`"completed": completed_jobs,`
			`"failed": failed_jobs,`
			`"cancelled": cancelled_jobs,`
			`"total": queued_jobs + running_jobs + completed_jobs + failed_jobs + cancelled_jobs`
			`},`
			`"jobs_by_type": jobs_by_type,`
			`"avg_wait_time_minutes": round(avg_wait_time, 2),`
			`"queue_health": {`
			`"has_queued_jobs": queued_jobs > 0,`
			`"has_running_jobs": running_jobs > 0,`
			`"failure_rate": round((failed_jobs / max(completed_jobs + failed_jobs, 1)) * 100, 2)`
			`}`
			`}`

			`except Exception as e:`
			`logger.error("Failed to get queue status",`
			`tenant_id=tenant_id,`
			`error=str(e))`
			`return {`
			`"tenant_id": tenant_id,`
			`"queue_counts": {`
			`"queued": 0, "running": 0, "completed": 0,`
			`"failed": 0, "cancelled": 0, "total": 0`
			`},`
			`"jobs_by_type": {},`
			`"avg_wait_time_minutes": 0.0,`
			`"queue_health": {`
			`"has_queued_jobs": False,`
			`"has_running_jobs": False,`
			`"failure_rate": 0.0`
			`}`
			`}`

			`async def get_jobs_by_tenant(`
			`self,`
			`tenant_id: str,`
			`status: str = None,`
			`job_type: str = None,`
			`skip: int = 0,`
			`limit: int = 100`
			`) -> List[TrainingJobQueue]:`
			`"""Get jobs for a tenant with optional filtering"""`
			`try:`
			`filters = {"tenant_id": tenant_id}`
			`if status:`
			`filters["status"] = status`
			`if job_type:`
			`filters["job_type"] = job_type`

			`return await self.get_multi(`
			`filters=filters,`
			`skip=skip,`
			`limit=limit,`
			`order_by="created_at",`
			`order_desc=True`
			`)`

			`except Exception as e:`
			`logger.error("Failed to get jobs by tenant",`
			`tenant_id=tenant_id,`
			`error=str(e))`
			`raise DatabaseError(f"Failed to get tenant jobs: {str(e)}")`

			`async def cleanup_old_jobs(self, days_old: int = 30, status_filter: str = None) -> int:`
			`"""Clean up old completed/failed/cancelled jobs"""`
			`try:`
			`cutoff_date = datetime.now() - timedelta(days=days_old)`

			`# Only clean up finished jobs by default`
			`default_statuses = ["completed", "failed", "cancelled"]`

			`if status_filter:`
			`status_condition = "status = :status"`
			`params = {"cutoff_date": cutoff_date, "status": status_filter}`
			`else:`
			`status_list = "', '".join(default_statuses)`
			`status_condition = f"status IN ('{status_list}')"`
			`params = {"cutoff_date": cutoff_date}`

			`query_text = f"""`
			`DELETE FROM training_job_queue`
			`WHERE created_at < :cutoff_date`
			`AND {status_condition}`
			`"""`

			`result = await self.session.execute(text(query_text), params)`
			`deleted_count = result.rowcount`

			`logger.info("Cleaned up old queue jobs",`
			`deleted_count=deleted_count,`
			`days_old=days_old,`
			`status_filter=status_filter)`

			`return deleted_count`

			`except Exception as e:`
			`logger.error("Failed to cleanup old queue jobs",`
			`error=str(e))`
			`raise DatabaseError(f"Queue cleanup failed: {str(e)}")`

			`async def get_stuck_jobs(self, hours_stuck: int = 2) -> List[TrainingJobQueue]:`
			`"""Get jobs that have been running for too long"""`
			`try:`
			`cutoff_time = datetime.now() - timedelta(hours=hours_stuck)`

			`query_text = """`
			`SELECT * FROM training_job_queue`
			`WHERE status = 'running'`
			`AND started_at IS NOT NULL`
			`AND started_at < :cutoff_time`
			`ORDER BY started_at ASC`
			`"""`

			`result = await self.session.execute(text(query_text), {"cutoff_time": cutoff_time})`

			`stuck_jobs = []`
			`for row in result.fetchall():`
			`record_dict = dict(row._mapping)`
			`job = self.model(**record_dict)`
			`stuck_jobs.append(job)`

			`if stuck_jobs:`
			`logger.warning("Found stuck jobs",`
			`count=len(stuck_jobs),`
			`hours_stuck=hours_stuck)`

			`return stuck_jobs`

			`except Exception as e:`
			`logger.error("Failed to get stuck jobs",`
			`hours_stuck=hours_stuck,`
			`error=str(e))`
			`return []`

			`async def reset_stuck_jobs(self, hours_stuck: int = 2) -> int:`
			`"""Reset stuck jobs back to queued status"""`
			`try:`
			`stuck_jobs = await self.get_stuck_jobs(hours_stuck)`
			`reset_count = 0`

			`for job in stuck_jobs:`
			`# Reset job to queued status`
			`await self.update(job.id, {`
			`"status": "queued",`
			`"started_at": None,`
			`"updated_at": datetime.now()`
			`})`
			`reset_count += 1`

			`if reset_count > 0:`
			`logger.info("Reset stuck jobs",`
			`reset_count=reset_count,`
			`hours_stuck=hours_stuck)`

			`return reset_count`

			`except Exception as e:`
			`logger.error("Failed to reset stuck jobs",`
			`hours_stuck=hours_stuck,`
			`error=str(e))`
			`raise DatabaseError(f"Failed to reset stuck jobs: {str(e)}")`