276 lines
8.3 KiB
Python
276 lines
8.3 KiB
Python
# ================================================================
|
|
# services/forecasting/app/jobs/auto_backfill_job.py
|
|
# ================================================================
|
|
"""
|
|
Automated Backfill Job
|
|
|
|
Scheduled job to automatically detect and backfill validation gaps.
|
|
Can be run daily or weekly to ensure all historical forecasts are validated.
|
|
"""
|
|
|
|
from typing import Dict, Any, List
|
|
from datetime import datetime, timezone
|
|
import structlog
|
|
import uuid
|
|
|
|
from app.services.historical_validation_service import HistoricalValidationService
|
|
from app.core.database import database_manager
|
|
from app.jobs.sales_data_listener import process_pending_validations
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
async def auto_backfill_all_tenants(
|
|
tenant_ids: List[uuid.UUID],
|
|
lookback_days: int = 90,
|
|
max_gaps_per_tenant: int = 5
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run auto backfill for multiple tenants
|
|
|
|
Args:
|
|
tenant_ids: List of tenant IDs to process
|
|
lookback_days: How far back to check for gaps
|
|
max_gaps_per_tenant: Maximum number of gaps to process per tenant
|
|
|
|
Returns:
|
|
Summary of backfill operations across all tenants
|
|
"""
|
|
try:
|
|
logger.info(
|
|
"Starting auto backfill for all tenants",
|
|
tenant_count=len(tenant_ids),
|
|
lookback_days=lookback_days
|
|
)
|
|
|
|
results = []
|
|
total_gaps_found = 0
|
|
total_gaps_processed = 0
|
|
total_successful = 0
|
|
|
|
for tenant_id in tenant_ids:
|
|
try:
|
|
async with database_manager.get_session() as db:
|
|
service = HistoricalValidationService(db)
|
|
|
|
result = await service.auto_backfill_gaps(
|
|
tenant_id=tenant_id,
|
|
lookback_days=lookback_days,
|
|
max_gaps_to_process=max_gaps_per_tenant
|
|
)
|
|
|
|
results.append({
|
|
"tenant_id": str(tenant_id),
|
|
"status": "success",
|
|
**result
|
|
})
|
|
|
|
total_gaps_found += result.get("gaps_found", 0)
|
|
total_gaps_processed += result.get("gaps_processed", 0)
|
|
total_successful += result.get("validations_completed", 0)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to auto backfill for tenant",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
results.append({
|
|
"tenant_id": str(tenant_id),
|
|
"status": "failed",
|
|
"error": str(e)
|
|
})
|
|
|
|
logger.info(
|
|
"Auto backfill completed for all tenants",
|
|
tenant_count=len(tenant_ids),
|
|
total_gaps_found=total_gaps_found,
|
|
total_gaps_processed=total_gaps_processed,
|
|
total_successful=total_successful
|
|
)
|
|
|
|
return {
|
|
"status": "completed",
|
|
"tenants_processed": len(tenant_ids),
|
|
"total_gaps_found": total_gaps_found,
|
|
"total_gaps_processed": total_gaps_processed,
|
|
"total_validations_completed": total_successful,
|
|
"results": results
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Auto backfill job failed",
|
|
error=str(e)
|
|
)
|
|
return {
|
|
"status": "failed",
|
|
"error": str(e)
|
|
}
|
|
|
|
|
|
async def process_all_pending_validations(
|
|
tenant_ids: List[uuid.UUID],
|
|
max_per_tenant: int = 10
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Process all pending validations for multiple tenants
|
|
|
|
Args:
|
|
tenant_ids: List of tenant IDs to process
|
|
max_per_tenant: Maximum pending validations to process per tenant
|
|
|
|
Returns:
|
|
Summary of processing results
|
|
"""
|
|
try:
|
|
logger.info(
|
|
"Processing pending validations for all tenants",
|
|
tenant_count=len(tenant_ids)
|
|
)
|
|
|
|
results = []
|
|
total_pending = 0
|
|
total_processed = 0
|
|
total_successful = 0
|
|
|
|
for tenant_id in tenant_ids:
|
|
try:
|
|
result = await process_pending_validations(
|
|
tenant_id=tenant_id,
|
|
max_to_process=max_per_tenant
|
|
)
|
|
|
|
results.append({
|
|
"tenant_id": str(tenant_id),
|
|
**result
|
|
})
|
|
|
|
total_pending += result.get("pending_count", 0)
|
|
total_processed += result.get("processed", 0)
|
|
total_successful += result.get("successful", 0)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to process pending validations for tenant",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
results.append({
|
|
"tenant_id": str(tenant_id),
|
|
"status": "failed",
|
|
"error": str(e)
|
|
})
|
|
|
|
logger.info(
|
|
"Pending validations processed for all tenants",
|
|
tenant_count=len(tenant_ids),
|
|
total_pending=total_pending,
|
|
total_processed=total_processed,
|
|
total_successful=total_successful
|
|
)
|
|
|
|
return {
|
|
"status": "completed",
|
|
"tenants_processed": len(tenant_ids),
|
|
"total_pending": total_pending,
|
|
"total_processed": total_processed,
|
|
"total_successful": total_successful,
|
|
"results": results
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to process all pending validations",
|
|
error=str(e)
|
|
)
|
|
return {
|
|
"status": "failed",
|
|
"error": str(e)
|
|
}
|
|
|
|
|
|
async def daily_validation_maintenance_job(
|
|
tenant_ids: List[uuid.UUID]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Daily validation maintenance job
|
|
|
|
Combines gap detection/backfill and pending validation processing.
|
|
Recommended to run once daily (e.g., 6:00 AM after orchestrator completes).
|
|
|
|
Args:
|
|
tenant_ids: List of tenant IDs to process
|
|
|
|
Returns:
|
|
Summary of all maintenance operations
|
|
"""
|
|
try:
|
|
logger.info(
|
|
"Starting daily validation maintenance",
|
|
tenant_count=len(tenant_ids),
|
|
timestamp=datetime.now(timezone.utc).isoformat()
|
|
)
|
|
|
|
# Step 1: Process pending validations (retry failures)
|
|
pending_result = await process_all_pending_validations(
|
|
tenant_ids=tenant_ids,
|
|
max_per_tenant=10
|
|
)
|
|
|
|
# Step 2: Auto backfill detected gaps
|
|
backfill_result = await auto_backfill_all_tenants(
|
|
tenant_ids=tenant_ids,
|
|
lookback_days=90,
|
|
max_gaps_per_tenant=5
|
|
)
|
|
|
|
logger.info(
|
|
"Daily validation maintenance completed",
|
|
pending_validations_processed=pending_result.get("total_processed", 0),
|
|
gaps_backfilled=backfill_result.get("total_validations_completed", 0)
|
|
)
|
|
|
|
return {
|
|
"status": "completed",
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"tenants_processed": len(tenant_ids),
|
|
"pending_validations": pending_result,
|
|
"gap_backfill": backfill_result,
|
|
"summary": {
|
|
"total_pending_processed": pending_result.get("total_processed", 0),
|
|
"total_gaps_backfilled": backfill_result.get("total_validations_completed", 0),
|
|
"total_validations": (
|
|
pending_result.get("total_processed", 0) +
|
|
backfill_result.get("total_validations_completed", 0)
|
|
)
|
|
}
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Daily validation maintenance failed",
|
|
error=str(e)
|
|
)
|
|
return {
|
|
"status": "failed",
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"error": str(e)
|
|
}
|
|
|
|
|
|
# Convenience function for single tenant
|
|
async def run_validation_maintenance_for_tenant(
|
|
tenant_id: uuid.UUID
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run validation maintenance for a single tenant
|
|
|
|
Args:
|
|
tenant_id: Tenant identifier
|
|
|
|
Returns:
|
|
Maintenance results
|
|
"""
|
|
return await daily_validation_maintenance_job([tenant_id])
|