Improve backend
This commit is contained in:
@@ -0,0 +1,480 @@
|
||||
# ================================================================
|
||||
# services/forecasting/app/services/historical_validation_service.py
|
||||
# ================================================================
|
||||
"""
|
||||
Historical Validation Service
|
||||
|
||||
Handles validation backfill when historical sales data is uploaded late.
|
||||
Detects gaps in validation coverage and automatically triggers validation
|
||||
for periods where forecasts exist but haven't been validated yet.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, and_, func, Date, or_
|
||||
from datetime import datetime, timedelta, timezone, date
|
||||
import structlog
|
||||
import uuid
|
||||
|
||||
from app.models.forecasts import Forecast
|
||||
from app.models.validation_run import ValidationRun
|
||||
from app.models.sales_data_update import SalesDataUpdate
|
||||
from app.services.validation_service import ValidationService
|
||||
from shared.database.exceptions import DatabaseError
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class HistoricalValidationService:
|
||||
"""Service for backfilling historical validation when sales data arrives late"""
|
||||
|
||||
def __init__(self, db_session: AsyncSession):
|
||||
self.db = db_session
|
||||
self.validation_service = ValidationService(db_session)
|
||||
|
||||
async def detect_validation_gaps(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
lookback_days: int = 90
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect date ranges where forecasts exist but haven't been validated
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
lookback_days: How far back to check (default 90 days)
|
||||
|
||||
Returns:
|
||||
List of gap periods with date ranges
|
||||
"""
|
||||
try:
|
||||
end_date = datetime.now(timezone.utc)
|
||||
start_date = end_date - timedelta(days=lookback_days)
|
||||
|
||||
logger.info(
|
||||
"Detecting validation gaps",
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date.isoformat(),
|
||||
end_date=end_date.isoformat()
|
||||
)
|
||||
|
||||
# Get all dates with forecasts
|
||||
forecast_query = select(
|
||||
func.cast(Forecast.forecast_date, Date).label('forecast_date')
|
||||
).where(
|
||||
and_(
|
||||
Forecast.tenant_id == tenant_id,
|
||||
Forecast.forecast_date >= start_date,
|
||||
Forecast.forecast_date <= end_date
|
||||
)
|
||||
).group_by(
|
||||
func.cast(Forecast.forecast_date, Date)
|
||||
).order_by(
|
||||
func.cast(Forecast.forecast_date, Date)
|
||||
)
|
||||
|
||||
forecast_result = await self.db.execute(forecast_query)
|
||||
forecast_dates = {row.forecast_date for row in forecast_result.fetchall()}
|
||||
|
||||
if not forecast_dates:
|
||||
logger.info("No forecasts found in lookback period", tenant_id=tenant_id)
|
||||
return []
|
||||
|
||||
# Get all dates that have been validated
|
||||
validation_query = select(
|
||||
func.cast(ValidationRun.validation_start_date, Date).label('validated_date')
|
||||
).where(
|
||||
and_(
|
||||
ValidationRun.tenant_id == tenant_id,
|
||||
ValidationRun.status == "completed",
|
||||
ValidationRun.validation_start_date >= start_date,
|
||||
ValidationRun.validation_end_date <= end_date
|
||||
)
|
||||
).group_by(
|
||||
func.cast(ValidationRun.validation_start_date, Date)
|
||||
)
|
||||
|
||||
validation_result = await self.db.execute(validation_query)
|
||||
validated_dates = {row.validated_date for row in validation_result.fetchall()}
|
||||
|
||||
# Find gaps (dates with forecasts but no validation)
|
||||
gap_dates = sorted(forecast_dates - validated_dates)
|
||||
|
||||
if not gap_dates:
|
||||
logger.info("No validation gaps found", tenant_id=tenant_id)
|
||||
return []
|
||||
|
||||
# Group consecutive dates into ranges
|
||||
gaps = []
|
||||
current_gap_start = gap_dates[0]
|
||||
current_gap_end = gap_dates[0]
|
||||
|
||||
for i in range(1, len(gap_dates)):
|
||||
if (gap_dates[i] - current_gap_end).days == 1:
|
||||
# Consecutive date, extend current gap
|
||||
current_gap_end = gap_dates[i]
|
||||
else:
|
||||
# Gap in dates, save current gap and start new one
|
||||
gaps.append({
|
||||
"start_date": current_gap_start,
|
||||
"end_date": current_gap_end,
|
||||
"days_count": (current_gap_end - current_gap_start).days + 1
|
||||
})
|
||||
current_gap_start = gap_dates[i]
|
||||
current_gap_end = gap_dates[i]
|
||||
|
||||
# Don't forget the last gap
|
||||
gaps.append({
|
||||
"start_date": current_gap_start,
|
||||
"end_date": current_gap_end,
|
||||
"days_count": (current_gap_end - current_gap_start).days + 1
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Validation gaps detected",
|
||||
tenant_id=tenant_id,
|
||||
gaps_count=len(gaps),
|
||||
total_days=len(gap_dates)
|
||||
)
|
||||
|
||||
return gaps
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to detect validation gaps",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise DatabaseError(f"Failed to detect validation gaps: {str(e)}")
|
||||
|
||||
async def backfill_validation(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
triggered_by: str = "manual",
|
||||
sales_data_update_id: Optional[uuid.UUID] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Backfill validation for a historical date range
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
start_date: Start date for backfill
|
||||
end_date: End date for backfill
|
||||
triggered_by: How this backfill was triggered
|
||||
sales_data_update_id: Optional link to sales data update record
|
||||
|
||||
Returns:
|
||||
Backfill results with validation summary
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Starting validation backfill",
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date.isoformat(),
|
||||
end_date=end_date.isoformat(),
|
||||
triggered_by=triggered_by
|
||||
)
|
||||
|
||||
# Convert dates to datetime
|
||||
start_datetime = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=timezone.utc)
|
||||
end_datetime = datetime.combine(end_date, datetime.max.time()).replace(tzinfo=timezone.utc)
|
||||
|
||||
# Run validation for the date range
|
||||
validation_result = await self.validation_service.validate_date_range(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_datetime,
|
||||
end_date=end_datetime,
|
||||
orchestration_run_id=None,
|
||||
triggered_by=triggered_by
|
||||
)
|
||||
|
||||
# Update sales data update record if provided
|
||||
if sales_data_update_id:
|
||||
await self._update_sales_data_record(
|
||||
sales_data_update_id=sales_data_update_id,
|
||||
validation_run_id=uuid.UUID(validation_result["validation_run_id"]),
|
||||
status="completed" if validation_result["status"] == "completed" else "failed"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Validation backfill completed",
|
||||
tenant_id=tenant_id,
|
||||
validation_run_id=validation_result.get("validation_run_id"),
|
||||
forecasts_evaluated=validation_result.get("forecasts_evaluated")
|
||||
)
|
||||
|
||||
return {
|
||||
**validation_result,
|
||||
"backfill_date_range": {
|
||||
"start": start_date.isoformat(),
|
||||
"end": end_date.isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Validation backfill failed",
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date.isoformat(),
|
||||
end_date=end_date.isoformat(),
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
if sales_data_update_id:
|
||||
await self._update_sales_data_record(
|
||||
sales_data_update_id=sales_data_update_id,
|
||||
validation_run_id=None,
|
||||
status="failed",
|
||||
error_message=str(e)
|
||||
)
|
||||
|
||||
raise DatabaseError(f"Validation backfill failed: {str(e)}")
|
||||
|
||||
async def auto_backfill_gaps(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
lookback_days: int = 90,
|
||||
max_gaps_to_process: int = 10
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Automatically detect and backfill validation gaps
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
lookback_days: How far back to check
|
||||
max_gaps_to_process: Maximum number of gaps to process in one run
|
||||
|
||||
Returns:
|
||||
Summary of backfill operations
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
"Starting auto backfill",
|
||||
tenant_id=tenant_id,
|
||||
lookback_days=lookback_days
|
||||
)
|
||||
|
||||
# Detect gaps
|
||||
gaps = await self.detect_validation_gaps(tenant_id, lookback_days)
|
||||
|
||||
if not gaps:
|
||||
return {
|
||||
"gaps_found": 0,
|
||||
"gaps_processed": 0,
|
||||
"validations_completed": 0,
|
||||
"message": "No validation gaps found"
|
||||
}
|
||||
|
||||
# Limit number of gaps to process
|
||||
gaps_to_process = gaps[:max_gaps_to_process]
|
||||
|
||||
results = []
|
||||
for gap in gaps_to_process:
|
||||
try:
|
||||
result = await self.backfill_validation(
|
||||
tenant_id=tenant_id,
|
||||
start_date=gap["start_date"],
|
||||
end_date=gap["end_date"],
|
||||
triggered_by="auto_backfill"
|
||||
)
|
||||
results.append({
|
||||
"gap": gap,
|
||||
"result": result,
|
||||
"status": "success"
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to backfill gap",
|
||||
gap=gap,
|
||||
error=str(e)
|
||||
)
|
||||
results.append({
|
||||
"gap": gap,
|
||||
"error": str(e),
|
||||
"status": "failed"
|
||||
})
|
||||
|
||||
successful = sum(1 for r in results if r["status"] == "success")
|
||||
|
||||
logger.info(
|
||||
"Auto backfill completed",
|
||||
tenant_id=tenant_id,
|
||||
gaps_found=len(gaps),
|
||||
gaps_processed=len(results),
|
||||
successful=successful
|
||||
)
|
||||
|
||||
return {
|
||||
"gaps_found": len(gaps),
|
||||
"gaps_processed": len(results),
|
||||
"validations_completed": successful,
|
||||
"validations_failed": len(results) - successful,
|
||||
"results": results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Auto backfill failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise DatabaseError(f"Auto backfill failed: {str(e)}")
|
||||
|
||||
async def register_sales_data_update(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
records_affected: int,
|
||||
update_source: str = "import",
|
||||
import_job_id: Optional[str] = None,
|
||||
auto_trigger_validation: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Register a sales data update and optionally trigger validation
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
start_date: Start date of updated data
|
||||
end_date: End date of updated data
|
||||
records_affected: Number of sales records affected
|
||||
update_source: Source of update (import, manual, pos_sync)
|
||||
import_job_id: Optional import job ID
|
||||
auto_trigger_validation: Whether to automatically trigger validation
|
||||
|
||||
Returns:
|
||||
Update record and validation result if triggered
|
||||
"""
|
||||
try:
|
||||
# Create sales data update record
|
||||
update_record = SalesDataUpdate(
|
||||
tenant_id=tenant_id,
|
||||
update_date_start=start_date,
|
||||
update_date_end=end_date,
|
||||
records_affected=records_affected,
|
||||
update_source=update_source,
|
||||
import_job_id=import_job_id,
|
||||
requires_validation=auto_trigger_validation,
|
||||
validation_status="pending" if auto_trigger_validation else "not_required"
|
||||
)
|
||||
|
||||
self.db.add(update_record)
|
||||
await self.db.flush()
|
||||
|
||||
logger.info(
|
||||
"Registered sales data update",
|
||||
tenant_id=tenant_id,
|
||||
update_id=update_record.id,
|
||||
date_range=f"{start_date} to {end_date}",
|
||||
records_affected=records_affected
|
||||
)
|
||||
|
||||
result = {
|
||||
"update_id": str(update_record.id),
|
||||
"update_record": update_record.to_dict(),
|
||||
"validation_triggered": False
|
||||
}
|
||||
|
||||
# Trigger validation if requested
|
||||
if auto_trigger_validation:
|
||||
try:
|
||||
validation_result = await self.backfill_validation(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
triggered_by="sales_data_update",
|
||||
sales_data_update_id=update_record.id
|
||||
)
|
||||
|
||||
result["validation_triggered"] = True
|
||||
result["validation_result"] = validation_result
|
||||
|
||||
logger.info(
|
||||
"Validation triggered for sales data update",
|
||||
update_id=update_record.id,
|
||||
validation_run_id=validation_result.get("validation_run_id")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to trigger validation for sales data update",
|
||||
update_id=update_record.id,
|
||||
error=str(e)
|
||||
)
|
||||
update_record.validation_status = "failed"
|
||||
update_record.validation_error = str(e)[:500]
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to register sales data update",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
await self.db.rollback()
|
||||
raise DatabaseError(f"Failed to register sales data update: {str(e)}")
|
||||
|
||||
async def _update_sales_data_record(
|
||||
self,
|
||||
sales_data_update_id: uuid.UUID,
|
||||
validation_run_id: Optional[uuid.UUID],
|
||||
status: str,
|
||||
error_message: Optional[str] = None
|
||||
):
|
||||
"""Update sales data update record with validation results"""
|
||||
try:
|
||||
query = select(SalesDataUpdate).where(SalesDataUpdate.id == sales_data_update_id)
|
||||
result = await self.db.execute(query)
|
||||
update_record = result.scalar_one_or_none()
|
||||
|
||||
if update_record:
|
||||
update_record.validation_status = status
|
||||
update_record.validation_run_id = validation_run_id
|
||||
update_record.validated_at = datetime.now(timezone.utc)
|
||||
if error_message:
|
||||
update_record.validation_error = error_message[:500]
|
||||
|
||||
await self.db.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to update sales data record",
|
||||
sales_data_update_id=sales_data_update_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def get_pending_validations(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
limit: int = 50
|
||||
) -> List[SalesDataUpdate]:
|
||||
"""Get pending sales data updates that need validation"""
|
||||
try:
|
||||
query = (
|
||||
select(SalesDataUpdate)
|
||||
.where(
|
||||
and_(
|
||||
SalesDataUpdate.tenant_id == tenant_id,
|
||||
SalesDataUpdate.validation_status == "pending",
|
||||
SalesDataUpdate.requires_validation == True
|
||||
)
|
||||
)
|
||||
.order_by(SalesDataUpdate.created_at)
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
result = await self.db.execute(query)
|
||||
return result.scalars().all()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to get pending validations",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e)
|
||||
)
|
||||
raise DatabaseError(f"Failed to get pending validations: {str(e)}")
|
||||
Reference in New Issue
Block a user