# ================================================================ # services/forecasting/app/jobs/auto_backfill_job.py # ================================================================ """ Automated Backfill Job Scheduled job to automatically detect and backfill validation gaps. Can be run daily or weekly to ensure all historical forecasts are validated. """ from typing import Dict, Any, List from datetime import datetime, timezone import structlog import uuid from app.services.historical_validation_service import HistoricalValidationService from app.core.database import database_manager from app.jobs.sales_data_listener import process_pending_validations logger = structlog.get_logger() async def auto_backfill_all_tenants( tenant_ids: List[uuid.UUID], lookback_days: int = 90, max_gaps_per_tenant: int = 5 ) -> Dict[str, Any]: """ Run auto backfill for multiple tenants Args: tenant_ids: List of tenant IDs to process lookback_days: How far back to check for gaps max_gaps_per_tenant: Maximum number of gaps to process per tenant Returns: Summary of backfill operations across all tenants """ try: logger.info( "Starting auto backfill for all tenants", tenant_count=len(tenant_ids), lookback_days=lookback_days ) results = [] total_gaps_found = 0 total_gaps_processed = 0 total_successful = 0 for tenant_id in tenant_ids: try: async with database_manager.get_session() as db: service = HistoricalValidationService(db) result = await service.auto_backfill_gaps( tenant_id=tenant_id, lookback_days=lookback_days, max_gaps_to_process=max_gaps_per_tenant ) results.append({ "tenant_id": str(tenant_id), "status": "success", **result }) total_gaps_found += result.get("gaps_found", 0) total_gaps_processed += result.get("gaps_processed", 0) total_successful += result.get("validations_completed", 0) except Exception as e: logger.error( "Failed to auto backfill for tenant", tenant_id=tenant_id, error=str(e) ) results.append({ "tenant_id": str(tenant_id), "status": "failed", "error": str(e) }) logger.info( "Auto backfill completed for all tenants", tenant_count=len(tenant_ids), total_gaps_found=total_gaps_found, total_gaps_processed=total_gaps_processed, total_successful=total_successful ) return { "status": "completed", "tenants_processed": len(tenant_ids), "total_gaps_found": total_gaps_found, "total_gaps_processed": total_gaps_processed, "total_validations_completed": total_successful, "results": results } except Exception as e: logger.error( "Auto backfill job failed", error=str(e) ) return { "status": "failed", "error": str(e) } async def process_all_pending_validations( tenant_ids: List[uuid.UUID], max_per_tenant: int = 10 ) -> Dict[str, Any]: """ Process all pending validations for multiple tenants Args: tenant_ids: List of tenant IDs to process max_per_tenant: Maximum pending validations to process per tenant Returns: Summary of processing results """ try: logger.info( "Processing pending validations for all tenants", tenant_count=len(tenant_ids) ) results = [] total_pending = 0 total_processed = 0 total_successful = 0 for tenant_id in tenant_ids: try: result = await process_pending_validations( tenant_id=tenant_id, max_to_process=max_per_tenant ) results.append({ "tenant_id": str(tenant_id), **result }) total_pending += result.get("pending_count", 0) total_processed += result.get("processed", 0) total_successful += result.get("successful", 0) except Exception as e: logger.error( "Failed to process pending validations for tenant", tenant_id=tenant_id, error=str(e) ) results.append({ "tenant_id": str(tenant_id), "status": "failed", "error": str(e) }) logger.info( "Pending validations processed for all tenants", tenant_count=len(tenant_ids), total_pending=total_pending, total_processed=total_processed, total_successful=total_successful ) return { "status": "completed", "tenants_processed": len(tenant_ids), "total_pending": total_pending, "total_processed": total_processed, "total_successful": total_successful, "results": results } except Exception as e: logger.error( "Failed to process all pending validations", error=str(e) ) return { "status": "failed", "error": str(e) } async def daily_validation_maintenance_job( tenant_ids: List[uuid.UUID] ) -> Dict[str, Any]: """ Daily validation maintenance job Combines gap detection/backfill and pending validation processing. Recommended to run once daily (e.g., 6:00 AM after orchestrator completes). Args: tenant_ids: List of tenant IDs to process Returns: Summary of all maintenance operations """ try: logger.info( "Starting daily validation maintenance", tenant_count=len(tenant_ids), timestamp=datetime.now(timezone.utc).isoformat() ) # Step 1: Process pending validations (retry failures) pending_result = await process_all_pending_validations( tenant_ids=tenant_ids, max_per_tenant=10 ) # Step 2: Auto backfill detected gaps backfill_result = await auto_backfill_all_tenants( tenant_ids=tenant_ids, lookback_days=90, max_gaps_per_tenant=5 ) logger.info( "Daily validation maintenance completed", pending_validations_processed=pending_result.get("total_processed", 0), gaps_backfilled=backfill_result.get("total_validations_completed", 0) ) return { "status": "completed", "timestamp": datetime.now(timezone.utc).isoformat(), "tenants_processed": len(tenant_ids), "pending_validations": pending_result, "gap_backfill": backfill_result, "summary": { "total_pending_processed": pending_result.get("total_processed", 0), "total_gaps_backfilled": backfill_result.get("total_validations_completed", 0), "total_validations": ( pending_result.get("total_processed", 0) + backfill_result.get("total_validations_completed", 0) ) } } except Exception as e: logger.error( "Daily validation maintenance failed", error=str(e) ) return { "status": "failed", "timestamp": datetime.now(timezone.utc).isoformat(), "error": str(e) } # Convenience function for single tenant async def run_validation_maintenance_for_tenant( tenant_id: uuid.UUID ) -> Dict[str, Any]: """ Run validation maintenance for a single tenant Args: tenant_id: Tenant identifier Returns: Maintenance results """ return await daily_validation_maintenance_job([tenant_id])