bakery-ia/services/forecasting/app/jobs/auto_backfill_job.py

# ================================================================
# services/forecasting/app/jobs/auto_backfill_job.py
# ================================================================
"""
Automated Backfill Job

Scheduled job to automatically detect and backfill validation gaps.
Can be run daily or weekly to ensure all historical forecasts are validated.
"""

from typing import Dict, Any, List
from datetime import datetime, timezone
import structlog
import uuid

from app.services.historical_validation_service import HistoricalValidationService
from app.core.database import database_manager
from app.jobs.sales_data_listener import process_pending_validations

logger = structlog.get_logger()


async def auto_backfill_all_tenants(
    tenant_ids: List[uuid.UUID],
    lookback_days: int = 90,
    max_gaps_per_tenant: int = 5
) -> Dict[str, Any]:
    """
    Run auto backfill for multiple tenants

    Args:
        tenant_ids: List of tenant IDs to process
        lookback_days: How far back to check for gaps
        max_gaps_per_tenant: Maximum number of gaps to process per tenant

    Returns:
        Summary of backfill operations across all tenants
    """
    try:
        logger.info(
            "Starting auto backfill for all tenants",
            tenant_count=len(tenant_ids),
            lookback_days=lookback_days
        )

        results = []
        total_gaps_found = 0
        total_gaps_processed = 0
        total_successful = 0

        for tenant_id in tenant_ids:
            try:
                async with database_manager.get_session() as db:
                    service = HistoricalValidationService(db)

                    result = await service.auto_backfill_gaps(
                        tenant_id=tenant_id,
                        lookback_days=lookback_days,
                        max_gaps_to_process=max_gaps_per_tenant
                    )

                    results.append({
                        "tenant_id": str(tenant_id),
                        "status": "success",
                        **result
                    })

                    total_gaps_found += result.get("gaps_found", 0)
                    total_gaps_processed += result.get("gaps_processed", 0)
                    total_successful += result.get("validations_completed", 0)

            except Exception as e:
                logger.error(
                    "Failed to auto backfill for tenant",
                    tenant_id=tenant_id,
                    error=str(e)
                )
                results.append({
                    "tenant_id": str(tenant_id),
                    "status": "failed",
                    "error": str(e)
                })

        logger.info(
            "Auto backfill completed for all tenants",
            tenant_count=len(tenant_ids),
            total_gaps_found=total_gaps_found,
            total_gaps_processed=total_gaps_processed,
            total_successful=total_successful
        )

        return {
            "status": "completed",
            "tenants_processed": len(tenant_ids),
            "total_gaps_found": total_gaps_found,
            "total_gaps_processed": total_gaps_processed,
            "total_validations_completed": total_successful,
            "results": results
        }

    except Exception as e:
        logger.error(
            "Auto backfill job failed",
            error=str(e)
        )
        return {
            "status": "failed",
            "error": str(e)
        }


async def process_all_pending_validations(
    tenant_ids: List[uuid.UUID],
    max_per_tenant: int = 10
) -> Dict[str, Any]:
    """
    Process all pending validations for multiple tenants

    Args:
        tenant_ids: List of tenant IDs to process
        max_per_tenant: Maximum pending validations to process per tenant

    Returns:
        Summary of processing results
    """
    try:
        logger.info(
            "Processing pending validations for all tenants",
            tenant_count=len(tenant_ids)
        )

        results = []
        total_pending = 0
        total_processed = 0
        total_successful = 0

        for tenant_id in tenant_ids:
            try:
                result = await process_pending_validations(
                    tenant_id=tenant_id,
                    max_to_process=max_per_tenant
                )

                results.append({
                    "tenant_id": str(tenant_id),
                    **result
                })

                total_pending += result.get("pending_count", 0)
                total_processed += result.get("processed", 0)
                total_successful += result.get("successful", 0)

            except Exception as e:
                logger.error(
                    "Failed to process pending validations for tenant",
                    tenant_id=tenant_id,
                    error=str(e)
                )
                results.append({
                    "tenant_id": str(tenant_id),
                    "status": "failed",
                    "error": str(e)
                })

        logger.info(
            "Pending validations processed for all tenants",
            tenant_count=len(tenant_ids),
            total_pending=total_pending,
            total_processed=total_processed,
            total_successful=total_successful
        )

        return {
            "status": "completed",
            "tenants_processed": len(tenant_ids),
            "total_pending": total_pending,
            "total_processed": total_processed,
            "total_successful": total_successful,
            "results": results
        }

    except Exception as e:
        logger.error(
            "Failed to process all pending validations",
            error=str(e)
        )
        return {
            "status": "failed",
            "error": str(e)
        }


async def daily_validation_maintenance_job(
    tenant_ids: List[uuid.UUID]
) -> Dict[str, Any]:
    """
    Daily validation maintenance job

    Combines gap detection/backfill and pending validation processing.
    Recommended to run once daily (e.g., 6:00 AM after orchestrator completes).

    Args:
        tenant_ids: List of tenant IDs to process

    Returns:
        Summary of all maintenance operations
    """
    try:
        logger.info(
            "Starting daily validation maintenance",
            tenant_count=len(tenant_ids),
            timestamp=datetime.now(timezone.utc).isoformat()
        )

        # Step 1: Process pending validations (retry failures)
        pending_result = await process_all_pending_validations(
            tenant_ids=tenant_ids,
            max_per_tenant=10
        )

        # Step 2: Auto backfill detected gaps
        backfill_result = await auto_backfill_all_tenants(
            tenant_ids=tenant_ids,
            lookback_days=90,
            max_gaps_per_tenant=5
        )

        logger.info(
            "Daily validation maintenance completed",
            pending_validations_processed=pending_result.get("total_processed", 0),
            gaps_backfilled=backfill_result.get("total_validations_completed", 0)
        )

        return {
            "status": "completed",
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "tenants_processed": len(tenant_ids),
            "pending_validations": pending_result,
            "gap_backfill": backfill_result,
            "summary": {
                "total_pending_processed": pending_result.get("total_processed", 0),
                "total_gaps_backfilled": backfill_result.get("total_validations_completed", 0),
                "total_validations": (
                    pending_result.get("total_processed", 0) +
                    backfill_result.get("total_validations_completed", 0)
                )
            }
        }

    except Exception as e:
        logger.error(
            "Daily validation maintenance failed",
            error=str(e)
        )
        return {
            "status": "failed",
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "error": str(e)
        }


# Convenience function for single tenant
async def run_validation_maintenance_for_tenant(
    tenant_id: uuid.UUID
) -> Dict[str, Any]:
    """
    Run validation maintenance for a single tenant

    Args:
        tenant_id: Tenant identifier

    Returns:
        Maintenance results
    """
    return await daily_validation_maintenance_job([tenant_id])