""" Priority Recalculation Job Scheduled job that recalculates priority scores for active alerts, applying time-based escalation boosts. Runs hourly to ensure stale actions get escalated appropriately. """ import structlog from datetime import datetime, timedelta, timezone from typing import Dict, List from uuid import UUID from sqlalchemy import select, update from sqlalchemy.ext.asyncio import AsyncSession from app.models.events import Alert, AlertStatus from app.services.enrichment.priority_scoring import PriorityScoringService from shared.schemas.alert_types import UrgencyContext logger = structlog.get_logger() class PriorityRecalculationJob: """Recalculates alert priorities with time-based escalation""" def __init__(self, config, db_manager, redis_client): self.config = config self.db_manager = db_manager self.redis = redis_client self.priority_service = PriorityScoringService(config) async def run(self, tenant_id: UUID = None) -> Dict[str, int]: """ Recalculate priorities for all active action-needed alerts. Args: tenant_id: Optional tenant filter. If None, runs for all tenants. Returns: Dict with counts: { 'processed': int, 'escalated': int, 'errors': int } """ logger.info("Starting priority recalculation job", tenant_id=str(tenant_id) if tenant_id else "all") counts = { 'processed': 0, 'escalated': 0, 'errors': 0 } try: # Process alerts in batches to avoid memory issues and timeouts batch_size = 50 # Process 50 alerts at a time to prevent timeouts # Get tenant IDs to process tenant_ids = [tenant_id] if tenant_id else await self._get_tenant_ids() for current_tenant_id in tenant_ids: offset = 0 while True: async with self.db_manager.get_session() as session: # Get a batch of active alerts alerts_batch = await self._get_active_alerts_batch(session, current_tenant_id, offset, batch_size) if not alerts_batch: break # No more alerts to process logger.info(f"Processing batch of {len(alerts_batch)} alerts for tenant {current_tenant_id}, offset {offset}") for alert in alerts_batch: try: result = await self._recalculate_alert_priority(session, alert) counts['processed'] += 1 if result['escalated']: counts['escalated'] += 1 except Exception as e: logger.error( "Error recalculating alert priority", alert_id=str(alert.id), error=str(e) ) counts['errors'] += 1 # Commit this batch await session.commit() # Update offset for next batch offset += batch_size # Log progress periodically if offset % (batch_size * 10) == 0: # Every 10 batches logger.info( "Priority recalculation progress update", tenant_id=str(current_tenant_id), processed=counts['processed'], escalated=counts['escalated'], errors=counts['errors'] ) logger.info( "Tenant priority recalculation completed", tenant_id=str(current_tenant_id), processed=counts['processed'], escalated=counts['escalated'], errors=counts['errors'] ) logger.info( "Priority recalculation completed for all tenants", **counts ) except Exception as e: logger.error( "Priority recalculation job failed", error=str(e) ) counts['errors'] += 1 return counts async def _get_active_alerts( self, session: AsyncSession, tenant_id: UUID = None ) -> List[Alert]: """ Get all active alerts that need priority recalculation. Filters: - Status: active - Type class: action_needed (only these can escalate) - Has action_created_at set """ stmt = select(Alert).where( Alert.status == AlertStatus.ACTIVE, Alert.type_class == 'action_needed', Alert.action_created_at.isnot(None), Alert.hidden_from_ui == False ) if tenant_id: stmt = stmt.where(Alert.tenant_id == tenant_id) # Order by oldest first (most likely to need escalation) stmt = stmt.order_by(Alert.action_created_at.asc()) result = await session.execute(stmt) return result.scalars().all() async def _get_tenant_ids(self) -> List[UUID]: """ Get all unique tenant IDs that have active alerts that need recalculation. """ async with self.db_manager.get_session() as session: # Get unique tenant IDs with active alerts stmt = select(Alert.tenant_id).distinct().where( Alert.status == AlertStatus.ACTIVE, Alert.type_class == 'action_needed', Alert.action_created_at.isnot(None), Alert.hidden_from_ui == False ) result = await session.execute(stmt) tenant_ids = result.scalars().all() return tenant_ids async def _get_active_alerts_batch( self, session: AsyncSession, tenant_id: UUID, offset: int, limit: int ) -> List[Alert]: """ Get a batch of active alerts that need priority recalculation. Filters: - Status: active - Type class: action_needed (only these can escalate) - Has action_created_at set """ stmt = select(Alert).where( Alert.status == AlertStatus.ACTIVE, Alert.type_class == 'action_needed', Alert.action_created_at.isnot(None), Alert.hidden_from_ui == False ) if tenant_id: stmt = stmt.where(Alert.tenant_id == tenant_id) # Order by oldest first (most likely to need escalation) stmt = stmt.order_by(Alert.action_created_at.asc()) # Apply offset and limit for batching stmt = stmt.offset(offset).limit(limit) result = await session.execute(stmt) return result.scalars().all() async def _recalculate_alert_priority( self, session: AsyncSession, alert: Alert ) -> Dict[str, any]: """ Recalculate priority for a single alert with escalation boost. Returns: Dict with 'old_score', 'new_score', 'escalated' (bool) """ old_score = alert.priority_score # Build urgency context from alert metadata urgency_context = None if alert.urgency_context: urgency_context = UrgencyContext(**alert.urgency_context) # Calculate escalation boost boost = self.priority_service.calculate_escalation_boost( action_created_at=alert.action_created_at, urgency_context=urgency_context, current_priority=old_score ) # Apply boost new_score = min(100, old_score + boost) # Update if score changed if new_score != old_score: # Update priority score and level new_level = self.priority_service.get_priority_level(new_score) alert.priority_score = new_score alert.priority_level = new_level alert.updated_at = datetime.now(timezone.utc) # Add escalation metadata if not alert.alert_metadata: alert.alert_metadata = {} alert.alert_metadata['escalation'] = { 'original_score': old_score, 'boost_applied': boost, 'escalated_at': datetime.now(timezone.utc).isoformat(), 'reason': 'time_based_escalation' } # Invalidate cache cache_key = f"alert:{alert.tenant_id}:{alert.id}" await self.redis.delete(cache_key) logger.info( "Alert priority escalated", alert_id=str(alert.id), old_score=old_score, new_score=new_score, boost=boost, old_level=alert.priority_level if old_score == new_score else self.priority_service.get_priority_level(old_score), new_level=new_level ) return { 'old_score': old_score, 'new_score': new_score, 'escalated': True } return { 'old_score': old_score, 'new_score': new_score, 'escalated': False } async def run_for_all_tenants(self) -> Dict[str, Dict[str, int]]: """ Run recalculation for all tenants. Returns: Dict mapping tenant_id to counts """ logger.info("Running priority recalculation for all tenants") all_results = {} try: # Get unique tenant IDs with active alerts using the new efficient method tenant_ids = await self._get_tenant_ids() logger.info(f"Found {len(tenant_ids)} tenants with active alerts") for tenant_id in tenant_ids: try: counts = await self.run(tenant_id) all_results[str(tenant_id)] = counts except Exception as e: logger.error( "Error processing tenant", tenant_id=str(tenant_id), error=str(e) ) total_processed = sum(r['processed'] for r in all_results.values()) total_escalated = sum(r['escalated'] for r in all_results.values()) total_errors = sum(r['errors'] for r in all_results.values()) logger.info( "All tenants processed", tenants=len(all_results), total_processed=total_processed, total_escalated=total_escalated, total_errors=total_errors ) except Exception as e: logger.error( "Failed to run for all tenants", error=str(e) ) return all_results async def run_priority_recalculation_job(config, db_manager, redis_client): """ Main entry point for scheduled job. This is called by the scheduler (cron/celery/etc). """ job = PriorityRecalculationJob(config, db_manager, redis_client) return await job.run_for_all_tenants()