bakery-ia/services/alert_processor/app/jobs/priority_recalculation.py

"""
Priority Recalculation Job

Scheduled job that recalculates priority scores for active alerts,
applying time-based escalation boosts.

Runs hourly to ensure stale actions get escalated appropriately.
"""

import structlog
from datetime import datetime, timedelta, timezone
from typing import Dict, List
from uuid import UUID

from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession

from app.models.events import Alert, AlertStatus
from app.services.enrichment.priority_scoring import PriorityScoringService
from shared.schemas.alert_types import UrgencyContext

logger = structlog.get_logger()


class PriorityRecalculationJob:
    """Recalculates alert priorities with time-based escalation"""

    def __init__(self, config, db_manager, redis_client):
        self.config = config
        self.db_manager = db_manager
        self.redis = redis_client
        self.priority_service = PriorityScoringService(config)

    async def run(self, tenant_id: UUID = None) -> Dict[str, int]:
        """
        Recalculate priorities for all active action-needed alerts.

        Args:
            tenant_id: Optional tenant filter. If None, runs for all tenants.

        Returns:
            Dict with counts: {
                'processed': int,
                'escalated': int,
                'errors': int
            }
        """
        logger.info("Starting priority recalculation job", tenant_id=str(tenant_id) if tenant_id else "all")

        counts = {
            'processed': 0,
            'escalated': 0,
            'errors': 0
        }

        try:
            # Process alerts in batches to avoid memory issues and timeouts
            batch_size = 50  # Process 50 alerts at a time to prevent timeouts

            # Get tenant IDs to process
            tenant_ids = [tenant_id] if tenant_id else await self._get_tenant_ids()

            for current_tenant_id in tenant_ids:
                offset = 0
                while True:
                    async with self.db_manager.get_session() as session:
                        # Get a batch of active alerts
                        alerts_batch = await self._get_active_alerts_batch(session, current_tenant_id, offset, batch_size)

                        if not alerts_batch:
                            break  # No more alerts to process

                        logger.info(f"Processing batch of {len(alerts_batch)} alerts for tenant {current_tenant_id}, offset {offset}")

                        for alert in alerts_batch:
                            try:
                                result = await self._recalculate_alert_priority(session, alert)
                                counts['processed'] += 1
                                if result['escalated']:
                                    counts['escalated'] += 1

                            except Exception as e:
                                logger.error(
                                    "Error recalculating alert priority",
                                    alert_id=str(alert.id),
                                    error=str(e)
                                )
                                counts['errors'] += 1

                        # Commit this batch
                        await session.commit()

                        # Update offset for next batch
                        offset += batch_size

                        # Log progress periodically
                        if offset % (batch_size * 10) == 0:  # Every 10 batches
                            logger.info(
                                "Priority recalculation progress update",
                                tenant_id=str(current_tenant_id),
                                processed=counts['processed'],
                                escalated=counts['escalated'],
                                errors=counts['errors']
                            )

                logger.info(
                    "Tenant priority recalculation completed",
                    tenant_id=str(current_tenant_id),
                    processed=counts['processed'],
                    escalated=counts['escalated'],
                    errors=counts['errors']
                )

            logger.info(
                "Priority recalculation completed for all tenants",
                **counts
            )

        except Exception as e:
            logger.error(
                "Priority recalculation job failed",
                error=str(e)
            )
            counts['errors'] += 1

        return counts

    async def _get_active_alerts(
        self,
        session: AsyncSession,
        tenant_id: UUID = None
    ) -> List[Alert]:
        """
        Get all active alerts that need priority recalculation.

        Filters:
        - Status: active
        - Type class: action_needed (only these can escalate)
        - Has action_created_at set
        """
        stmt = select(Alert).where(
            Alert.status == AlertStatus.ACTIVE,
            Alert.type_class == 'action_needed',
            Alert.action_created_at.isnot(None),
            Alert.hidden_from_ui == False
        )

        if tenant_id:
            stmt = stmt.where(Alert.tenant_id == tenant_id)

        # Order by oldest first (most likely to need escalation)
        stmt = stmt.order_by(Alert.action_created_at.asc())

        result = await session.execute(stmt)
        return result.scalars().all()

    async def _get_tenant_ids(self) -> List[UUID]:
        """
        Get all unique tenant IDs that have active alerts that need recalculation.
        """
        async with self.db_manager.get_session() as session:
            # Get unique tenant IDs with active alerts
            stmt = select(Alert.tenant_id).distinct().where(
                Alert.status == AlertStatus.ACTIVE,
                Alert.type_class == 'action_needed',
                Alert.action_created_at.isnot(None),
                Alert.hidden_from_ui == False
            )

            result = await session.execute(stmt)
            tenant_ids = result.scalars().all()
            return tenant_ids

    async def _get_active_alerts_batch(
        self,
        session: AsyncSession,
        tenant_id: UUID,
        offset: int,
        limit: int
    ) -> List[Alert]:
        """
        Get a batch of active alerts that need priority recalculation.

        Filters:
        - Status: active
        - Type class: action_needed (only these can escalate)
        - Has action_created_at set
        """
        stmt = select(Alert).where(
            Alert.status == AlertStatus.ACTIVE,
            Alert.type_class == 'action_needed',
            Alert.action_created_at.isnot(None),
            Alert.hidden_from_ui == False
        )

        if tenant_id:
            stmt = stmt.where(Alert.tenant_id == tenant_id)

        # Order by oldest first (most likely to need escalation)
        stmt = stmt.order_by(Alert.action_created_at.asc())

        # Apply offset and limit for batching
        stmt = stmt.offset(offset).limit(limit)

        result = await session.execute(stmt)
        return result.scalars().all()

    async def _recalculate_alert_priority(
        self,
        session: AsyncSession,
        alert: Alert
    ) -> Dict[str, any]:
        """
        Recalculate priority for a single alert with escalation boost.

        Returns:
            Dict with 'old_score', 'new_score', 'escalated' (bool)
        """
        old_score = alert.priority_score

        # Build urgency context from alert metadata
        urgency_context = None
        if alert.urgency_context:
            urgency_context = UrgencyContext(**alert.urgency_context)

        # Calculate escalation boost
        boost = self.priority_service.calculate_escalation_boost(
            action_created_at=alert.action_created_at,
            urgency_context=urgency_context,
            current_priority=old_score
        )

        # Apply boost
        new_score = min(100, old_score + boost)

        # Update if score changed
        if new_score != old_score:
            # Update priority score and level
            new_level = self.priority_service.get_priority_level(new_score)

            alert.priority_score = new_score
            alert.priority_level = new_level
            alert.updated_at = datetime.now(timezone.utc)

            # Add escalation metadata
            if not alert.alert_metadata:
                alert.alert_metadata = {}

            alert.alert_metadata['escalation'] = {
                'original_score': old_score,
                'boost_applied': boost,
                'escalated_at': datetime.now(timezone.utc).isoformat(),
                'reason': 'time_based_escalation'
            }

            # Invalidate cache
            cache_key = f"alert:{alert.tenant_id}:{alert.id}"
            await self.redis.delete(cache_key)

            logger.info(
                "Alert priority escalated",
                alert_id=str(alert.id),
                old_score=old_score,
                new_score=new_score,
                boost=boost,
                old_level=alert.priority_level if old_score == new_score else self.priority_service.get_priority_level(old_score),
                new_level=new_level
            )

            return {
                'old_score': old_score,
                'new_score': new_score,
                'escalated': True
            }

        return {
            'old_score': old_score,
            'new_score': new_score,
            'escalated': False
        }

    async def run_for_all_tenants(self) -> Dict[str, Dict[str, int]]:
        """
        Run recalculation for all tenants.

        Returns:
            Dict mapping tenant_id to counts
        """
        logger.info("Running priority recalculation for all tenants")

        all_results = {}

        try:
            # Get unique tenant IDs with active alerts using the new efficient method
            tenant_ids = await self._get_tenant_ids()
            logger.info(f"Found {len(tenant_ids)} tenants with active alerts")

            for tenant_id in tenant_ids:
                try:
                    counts = await self.run(tenant_id)
                    all_results[str(tenant_id)] = counts
                except Exception as e:
                    logger.error(
                        "Error processing tenant",
                        tenant_id=str(tenant_id),
                        error=str(e)
                    )

            total_processed = sum(r['processed'] for r in all_results.values())
            total_escalated = sum(r['escalated'] for r in all_results.values())
            total_errors = sum(r['errors'] for r in all_results.values())

            logger.info(
                "All tenants processed",
                tenants=len(all_results),
                total_processed=total_processed,
                total_escalated=total_escalated,
                total_errors=total_errors
            )

        except Exception as e:
            logger.error(
                "Failed to run for all tenants",
                error=str(e)
            )

        return all_results


async def run_priority_recalculation_job(config, db_manager, redis_client):
    """
    Main entry point for scheduled job.

    This is called by the scheduler (cron/celery/etc).
    """
    job = PriorityRecalculationJob(config, db_manager, redis_client)
    return await job.run_for_all_tenants()