Files
bakery-ia/services/forecasting/app/jobs/auto_backfill_job.py
2025-11-18 07:17:17 +01:00

276 lines
8.3 KiB
Python

# ================================================================
# services/forecasting/app/jobs/auto_backfill_job.py
# ================================================================
"""
Automated Backfill Job
Scheduled job to automatically detect and backfill validation gaps.
Can be run daily or weekly to ensure all historical forecasts are validated.
"""
from typing import Dict, Any, List
from datetime import datetime, timezone
import structlog
import uuid
from app.services.historical_validation_service import HistoricalValidationService
from app.core.database import database_manager
from app.jobs.sales_data_listener import process_pending_validations
logger = structlog.get_logger()
async def auto_backfill_all_tenants(
tenant_ids: List[uuid.UUID],
lookback_days: int = 90,
max_gaps_per_tenant: int = 5
) -> Dict[str, Any]:
"""
Run auto backfill for multiple tenants
Args:
tenant_ids: List of tenant IDs to process
lookback_days: How far back to check for gaps
max_gaps_per_tenant: Maximum number of gaps to process per tenant
Returns:
Summary of backfill operations across all tenants
"""
try:
logger.info(
"Starting auto backfill for all tenants",
tenant_count=len(tenant_ids),
lookback_days=lookback_days
)
results = []
total_gaps_found = 0
total_gaps_processed = 0
total_successful = 0
for tenant_id in tenant_ids:
try:
async with database_manager.get_session() as db:
service = HistoricalValidationService(db)
result = await service.auto_backfill_gaps(
tenant_id=tenant_id,
lookback_days=lookback_days,
max_gaps_to_process=max_gaps_per_tenant
)
results.append({
"tenant_id": str(tenant_id),
"status": "success",
**result
})
total_gaps_found += result.get("gaps_found", 0)
total_gaps_processed += result.get("gaps_processed", 0)
total_successful += result.get("validations_completed", 0)
except Exception as e:
logger.error(
"Failed to auto backfill for tenant",
tenant_id=tenant_id,
error=str(e)
)
results.append({
"tenant_id": str(tenant_id),
"status": "failed",
"error": str(e)
})
logger.info(
"Auto backfill completed for all tenants",
tenant_count=len(tenant_ids),
total_gaps_found=total_gaps_found,
total_gaps_processed=total_gaps_processed,
total_successful=total_successful
)
return {
"status": "completed",
"tenants_processed": len(tenant_ids),
"total_gaps_found": total_gaps_found,
"total_gaps_processed": total_gaps_processed,
"total_validations_completed": total_successful,
"results": results
}
except Exception as e:
logger.error(
"Auto backfill job failed",
error=str(e)
)
return {
"status": "failed",
"error": str(e)
}
async def process_all_pending_validations(
tenant_ids: List[uuid.UUID],
max_per_tenant: int = 10
) -> Dict[str, Any]:
"""
Process all pending validations for multiple tenants
Args:
tenant_ids: List of tenant IDs to process
max_per_tenant: Maximum pending validations to process per tenant
Returns:
Summary of processing results
"""
try:
logger.info(
"Processing pending validations for all tenants",
tenant_count=len(tenant_ids)
)
results = []
total_pending = 0
total_processed = 0
total_successful = 0
for tenant_id in tenant_ids:
try:
result = await process_pending_validations(
tenant_id=tenant_id,
max_to_process=max_per_tenant
)
results.append({
"tenant_id": str(tenant_id),
**result
})
total_pending += result.get("pending_count", 0)
total_processed += result.get("processed", 0)
total_successful += result.get("successful", 0)
except Exception as e:
logger.error(
"Failed to process pending validations for tenant",
tenant_id=tenant_id,
error=str(e)
)
results.append({
"tenant_id": str(tenant_id),
"status": "failed",
"error": str(e)
})
logger.info(
"Pending validations processed for all tenants",
tenant_count=len(tenant_ids),
total_pending=total_pending,
total_processed=total_processed,
total_successful=total_successful
)
return {
"status": "completed",
"tenants_processed": len(tenant_ids),
"total_pending": total_pending,
"total_processed": total_processed,
"total_successful": total_successful,
"results": results
}
except Exception as e:
logger.error(
"Failed to process all pending validations",
error=str(e)
)
return {
"status": "failed",
"error": str(e)
}
async def daily_validation_maintenance_job(
tenant_ids: List[uuid.UUID]
) -> Dict[str, Any]:
"""
Daily validation maintenance job
Combines gap detection/backfill and pending validation processing.
Recommended to run once daily (e.g., 6:00 AM after orchestrator completes).
Args:
tenant_ids: List of tenant IDs to process
Returns:
Summary of all maintenance operations
"""
try:
logger.info(
"Starting daily validation maintenance",
tenant_count=len(tenant_ids),
timestamp=datetime.now(timezone.utc).isoformat()
)
# Step 1: Process pending validations (retry failures)
pending_result = await process_all_pending_validations(
tenant_ids=tenant_ids,
max_per_tenant=10
)
# Step 2: Auto backfill detected gaps
backfill_result = await auto_backfill_all_tenants(
tenant_ids=tenant_ids,
lookback_days=90,
max_gaps_per_tenant=5
)
logger.info(
"Daily validation maintenance completed",
pending_validations_processed=pending_result.get("total_processed", 0),
gaps_backfilled=backfill_result.get("total_validations_completed", 0)
)
return {
"status": "completed",
"timestamp": datetime.now(timezone.utc).isoformat(),
"tenants_processed": len(tenant_ids),
"pending_validations": pending_result,
"gap_backfill": backfill_result,
"summary": {
"total_pending_processed": pending_result.get("total_processed", 0),
"total_gaps_backfilled": backfill_result.get("total_validations_completed", 0),
"total_validations": (
pending_result.get("total_processed", 0) +
backfill_result.get("total_validations_completed", 0)
)
}
}
except Exception as e:
logger.error(
"Daily validation maintenance failed",
error=str(e)
)
return {
"status": "failed",
"timestamp": datetime.now(timezone.utc).isoformat(),
"error": str(e)
}
# Convenience function for single tenant
async def run_validation_maintenance_for_tenant(
tenant_id: uuid.UUID
) -> Dict[str, Any]:
"""
Run validation maintenance for a single tenant
Args:
tenant_id: Tenant identifier
Returns:
Maintenance results
"""
return await daily_validation_maintenance_job([tenant_id])