481 lines
17 KiB
Python
481 lines
17 KiB
Python
# ================================================================
|
|
# services/forecasting/app/services/historical_validation_service.py
|
|
# ================================================================
|
|
"""
|
|
Historical Validation Service
|
|
|
|
Handles validation backfill when historical sales data is uploaded late.
|
|
Detects gaps in validation coverage and automatically triggers validation
|
|
for periods where forecasts exist but haven't been validated yet.
|
|
"""
|
|
|
|
from typing import Dict, Any, List, Optional
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select, and_, func, Date, or_
|
|
from datetime import datetime, timedelta, timezone, date
|
|
import structlog
|
|
import uuid
|
|
|
|
from app.models.forecasts import Forecast
|
|
from app.models.validation_run import ValidationRun
|
|
from app.models.sales_data_update import SalesDataUpdate
|
|
from app.services.validation_service import ValidationService
|
|
from shared.database.exceptions import DatabaseError
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class HistoricalValidationService:
|
|
"""Service for backfilling historical validation when sales data arrives late"""
|
|
|
|
def __init__(self, db_session: AsyncSession):
|
|
self.db = db_session
|
|
self.validation_service = ValidationService(db_session)
|
|
|
|
async def detect_validation_gaps(
|
|
self,
|
|
tenant_id: uuid.UUID,
|
|
lookback_days: int = 90
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Detect date ranges where forecasts exist but haven't been validated
|
|
|
|
Args:
|
|
tenant_id: Tenant identifier
|
|
lookback_days: How far back to check (default 90 days)
|
|
|
|
Returns:
|
|
List of gap periods with date ranges
|
|
"""
|
|
try:
|
|
end_date = datetime.now(timezone.utc)
|
|
start_date = end_date - timedelta(days=lookback_days)
|
|
|
|
logger.info(
|
|
"Detecting validation gaps",
|
|
tenant_id=tenant_id,
|
|
start_date=start_date.isoformat(),
|
|
end_date=end_date.isoformat()
|
|
)
|
|
|
|
# Get all dates with forecasts
|
|
forecast_query = select(
|
|
func.cast(Forecast.forecast_date, Date).label('forecast_date')
|
|
).where(
|
|
and_(
|
|
Forecast.tenant_id == tenant_id,
|
|
Forecast.forecast_date >= start_date,
|
|
Forecast.forecast_date <= end_date
|
|
)
|
|
).group_by(
|
|
func.cast(Forecast.forecast_date, Date)
|
|
).order_by(
|
|
func.cast(Forecast.forecast_date, Date)
|
|
)
|
|
|
|
forecast_result = await self.db.execute(forecast_query)
|
|
forecast_dates = {row.forecast_date for row in forecast_result.fetchall()}
|
|
|
|
if not forecast_dates:
|
|
logger.info("No forecasts found in lookback period", tenant_id=tenant_id)
|
|
return []
|
|
|
|
# Get all dates that have been validated
|
|
validation_query = select(
|
|
func.cast(ValidationRun.validation_start_date, Date).label('validated_date')
|
|
).where(
|
|
and_(
|
|
ValidationRun.tenant_id == tenant_id,
|
|
ValidationRun.status == "completed",
|
|
ValidationRun.validation_start_date >= start_date,
|
|
ValidationRun.validation_end_date <= end_date
|
|
)
|
|
).group_by(
|
|
func.cast(ValidationRun.validation_start_date, Date)
|
|
)
|
|
|
|
validation_result = await self.db.execute(validation_query)
|
|
validated_dates = {row.validated_date for row in validation_result.fetchall()}
|
|
|
|
# Find gaps (dates with forecasts but no validation)
|
|
gap_dates = sorted(forecast_dates - validated_dates)
|
|
|
|
if not gap_dates:
|
|
logger.info("No validation gaps found", tenant_id=tenant_id)
|
|
return []
|
|
|
|
# Group consecutive dates into ranges
|
|
gaps = []
|
|
current_gap_start = gap_dates[0]
|
|
current_gap_end = gap_dates[0]
|
|
|
|
for i in range(1, len(gap_dates)):
|
|
if (gap_dates[i] - current_gap_end).days == 1:
|
|
# Consecutive date, extend current gap
|
|
current_gap_end = gap_dates[i]
|
|
else:
|
|
# Gap in dates, save current gap and start new one
|
|
gaps.append({
|
|
"start_date": current_gap_start,
|
|
"end_date": current_gap_end,
|
|
"days_count": (current_gap_end - current_gap_start).days + 1
|
|
})
|
|
current_gap_start = gap_dates[i]
|
|
current_gap_end = gap_dates[i]
|
|
|
|
# Don't forget the last gap
|
|
gaps.append({
|
|
"start_date": current_gap_start,
|
|
"end_date": current_gap_end,
|
|
"days_count": (current_gap_end - current_gap_start).days + 1
|
|
})
|
|
|
|
logger.info(
|
|
"Validation gaps detected",
|
|
tenant_id=tenant_id,
|
|
gaps_count=len(gaps),
|
|
total_days=len(gap_dates)
|
|
)
|
|
|
|
return gaps
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to detect validation gaps",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
raise DatabaseError(f"Failed to detect validation gaps: {str(e)}")
|
|
|
|
async def backfill_validation(
|
|
self,
|
|
tenant_id: uuid.UUID,
|
|
start_date: date,
|
|
end_date: date,
|
|
triggered_by: str = "manual",
|
|
sales_data_update_id: Optional[uuid.UUID] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Backfill validation for a historical date range
|
|
|
|
Args:
|
|
tenant_id: Tenant identifier
|
|
start_date: Start date for backfill
|
|
end_date: End date for backfill
|
|
triggered_by: How this backfill was triggered
|
|
sales_data_update_id: Optional link to sales data update record
|
|
|
|
Returns:
|
|
Backfill results with validation summary
|
|
"""
|
|
try:
|
|
logger.info(
|
|
"Starting validation backfill",
|
|
tenant_id=tenant_id,
|
|
start_date=start_date.isoformat(),
|
|
end_date=end_date.isoformat(),
|
|
triggered_by=triggered_by
|
|
)
|
|
|
|
# Convert dates to datetime
|
|
start_datetime = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=timezone.utc)
|
|
end_datetime = datetime.combine(end_date, datetime.max.time()).replace(tzinfo=timezone.utc)
|
|
|
|
# Run validation for the date range
|
|
validation_result = await self.validation_service.validate_date_range(
|
|
tenant_id=tenant_id,
|
|
start_date=start_datetime,
|
|
end_date=end_datetime,
|
|
orchestration_run_id=None,
|
|
triggered_by=triggered_by
|
|
)
|
|
|
|
# Update sales data update record if provided
|
|
if sales_data_update_id:
|
|
await self._update_sales_data_record(
|
|
sales_data_update_id=sales_data_update_id,
|
|
validation_run_id=uuid.UUID(validation_result["validation_run_id"]),
|
|
status="completed" if validation_result["status"] == "completed" else "failed"
|
|
)
|
|
|
|
logger.info(
|
|
"Validation backfill completed",
|
|
tenant_id=tenant_id,
|
|
validation_run_id=validation_result.get("validation_run_id"),
|
|
forecasts_evaluated=validation_result.get("forecasts_evaluated")
|
|
)
|
|
|
|
return {
|
|
**validation_result,
|
|
"backfill_date_range": {
|
|
"start": start_date.isoformat(),
|
|
"end": end_date.isoformat()
|
|
}
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Validation backfill failed",
|
|
tenant_id=tenant_id,
|
|
start_date=start_date.isoformat(),
|
|
end_date=end_date.isoformat(),
|
|
error=str(e)
|
|
)
|
|
|
|
if sales_data_update_id:
|
|
await self._update_sales_data_record(
|
|
sales_data_update_id=sales_data_update_id,
|
|
validation_run_id=None,
|
|
status="failed",
|
|
error_message=str(e)
|
|
)
|
|
|
|
raise DatabaseError(f"Validation backfill failed: {str(e)}")
|
|
|
|
async def auto_backfill_gaps(
|
|
self,
|
|
tenant_id: uuid.UUID,
|
|
lookback_days: int = 90,
|
|
max_gaps_to_process: int = 10
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Automatically detect and backfill validation gaps
|
|
|
|
Args:
|
|
tenant_id: Tenant identifier
|
|
lookback_days: How far back to check
|
|
max_gaps_to_process: Maximum number of gaps to process in one run
|
|
|
|
Returns:
|
|
Summary of backfill operations
|
|
"""
|
|
try:
|
|
logger.info(
|
|
"Starting auto backfill",
|
|
tenant_id=tenant_id,
|
|
lookback_days=lookback_days
|
|
)
|
|
|
|
# Detect gaps
|
|
gaps = await self.detect_validation_gaps(tenant_id, lookback_days)
|
|
|
|
if not gaps:
|
|
return {
|
|
"gaps_found": 0,
|
|
"gaps_processed": 0,
|
|
"validations_completed": 0,
|
|
"message": "No validation gaps found"
|
|
}
|
|
|
|
# Limit number of gaps to process
|
|
gaps_to_process = gaps[:max_gaps_to_process]
|
|
|
|
results = []
|
|
for gap in gaps_to_process:
|
|
try:
|
|
result = await self.backfill_validation(
|
|
tenant_id=tenant_id,
|
|
start_date=gap["start_date"],
|
|
end_date=gap["end_date"],
|
|
triggered_by="auto_backfill"
|
|
)
|
|
results.append({
|
|
"gap": gap,
|
|
"result": result,
|
|
"status": "success"
|
|
})
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to backfill gap",
|
|
gap=gap,
|
|
error=str(e)
|
|
)
|
|
results.append({
|
|
"gap": gap,
|
|
"error": str(e),
|
|
"status": "failed"
|
|
})
|
|
|
|
successful = sum(1 for r in results if r["status"] == "success")
|
|
|
|
logger.info(
|
|
"Auto backfill completed",
|
|
tenant_id=tenant_id,
|
|
gaps_found=len(gaps),
|
|
gaps_processed=len(results),
|
|
successful=successful
|
|
)
|
|
|
|
return {
|
|
"gaps_found": len(gaps),
|
|
"gaps_processed": len(results),
|
|
"validations_completed": successful,
|
|
"validations_failed": len(results) - successful,
|
|
"results": results
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Auto backfill failed",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
raise DatabaseError(f"Auto backfill failed: {str(e)}")
|
|
|
|
async def register_sales_data_update(
|
|
self,
|
|
tenant_id: uuid.UUID,
|
|
start_date: date,
|
|
end_date: date,
|
|
records_affected: int,
|
|
update_source: str = "import",
|
|
import_job_id: Optional[str] = None,
|
|
auto_trigger_validation: bool = True
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Register a sales data update and optionally trigger validation
|
|
|
|
Args:
|
|
tenant_id: Tenant identifier
|
|
start_date: Start date of updated data
|
|
end_date: End date of updated data
|
|
records_affected: Number of sales records affected
|
|
update_source: Source of update (import, manual, pos_sync)
|
|
import_job_id: Optional import job ID
|
|
auto_trigger_validation: Whether to automatically trigger validation
|
|
|
|
Returns:
|
|
Update record and validation result if triggered
|
|
"""
|
|
try:
|
|
# Create sales data update record
|
|
update_record = SalesDataUpdate(
|
|
tenant_id=tenant_id,
|
|
update_date_start=start_date,
|
|
update_date_end=end_date,
|
|
records_affected=records_affected,
|
|
update_source=update_source,
|
|
import_job_id=import_job_id,
|
|
requires_validation=auto_trigger_validation,
|
|
validation_status="pending" if auto_trigger_validation else "not_required"
|
|
)
|
|
|
|
self.db.add(update_record)
|
|
await self.db.flush()
|
|
|
|
logger.info(
|
|
"Registered sales data update",
|
|
tenant_id=tenant_id,
|
|
update_id=update_record.id,
|
|
date_range=f"{start_date} to {end_date}",
|
|
records_affected=records_affected
|
|
)
|
|
|
|
result = {
|
|
"update_id": str(update_record.id),
|
|
"update_record": update_record.to_dict(),
|
|
"validation_triggered": False
|
|
}
|
|
|
|
# Trigger validation if requested
|
|
if auto_trigger_validation:
|
|
try:
|
|
validation_result = await self.backfill_validation(
|
|
tenant_id=tenant_id,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
triggered_by="sales_data_update",
|
|
sales_data_update_id=update_record.id
|
|
)
|
|
|
|
result["validation_triggered"] = True
|
|
result["validation_result"] = validation_result
|
|
|
|
logger.info(
|
|
"Validation triggered for sales data update",
|
|
update_id=update_record.id,
|
|
validation_run_id=validation_result.get("validation_run_id")
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to trigger validation for sales data update",
|
|
update_id=update_record.id,
|
|
error=str(e)
|
|
)
|
|
update_record.validation_status = "failed"
|
|
update_record.validation_error = str(e)[:500]
|
|
|
|
await self.db.commit()
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to register sales data update",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
await self.db.rollback()
|
|
raise DatabaseError(f"Failed to register sales data update: {str(e)}")
|
|
|
|
async def _update_sales_data_record(
|
|
self,
|
|
sales_data_update_id: uuid.UUID,
|
|
validation_run_id: Optional[uuid.UUID],
|
|
status: str,
|
|
error_message: Optional[str] = None
|
|
):
|
|
"""Update sales data update record with validation results"""
|
|
try:
|
|
query = select(SalesDataUpdate).where(SalesDataUpdate.id == sales_data_update_id)
|
|
result = await self.db.execute(query)
|
|
update_record = result.scalar_one_or_none()
|
|
|
|
if update_record:
|
|
update_record.validation_status = status
|
|
update_record.validation_run_id = validation_run_id
|
|
update_record.validated_at = datetime.now(timezone.utc)
|
|
if error_message:
|
|
update_record.validation_error = error_message[:500]
|
|
|
|
await self.db.commit()
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to update sales data record",
|
|
sales_data_update_id=sales_data_update_id,
|
|
error=str(e)
|
|
)
|
|
|
|
async def get_pending_validations(
|
|
self,
|
|
tenant_id: uuid.UUID,
|
|
limit: int = 50
|
|
) -> List[SalesDataUpdate]:
|
|
"""Get pending sales data updates that need validation"""
|
|
try:
|
|
query = (
|
|
select(SalesDataUpdate)
|
|
.where(
|
|
and_(
|
|
SalesDataUpdate.tenant_id == tenant_id,
|
|
SalesDataUpdate.validation_status == "pending",
|
|
SalesDataUpdate.requires_validation == True
|
|
)
|
|
)
|
|
.order_by(SalesDataUpdate.created_at)
|
|
.limit(limit)
|
|
)
|
|
|
|
result = await self.db.execute(query)
|
|
return result.scalars().all()
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to get pending validations",
|
|
tenant_id=tenant_id,
|
|
error=str(e)
|
|
)
|
|
raise DatabaseError(f"Failed to get pending validations: {str(e)}")
|