Improve backend

2025-11-18 07:17:17 +01:00
parent d36f2ab9af
commit 5c45164c8e
61 changed files with 9846 additions and 495 deletions
--- a/services/forecasting/app/services/historical_validation_service.py
+++ b/services/forecasting/app/services/historical_validation_service.py
@@ -0,0 +1,480 @@
+# ================================================================
+# services/forecasting/app/services/historical_validation_service.py
+# ================================================================
+"""
+Historical Validation Service
+
+Handles validation backfill when historical sales data is uploaded late.
+Detects gaps in validation coverage and automatically triggers validation
+for periods where forecasts exist but haven't been validated yet.
+"""
+
+from typing import Dict, Any, List, Optional
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select, and_, func, Date, or_
+from datetime import datetime, timedelta, timezone, date
+import structlog
+import uuid
+
+from app.models.forecasts import Forecast
+from app.models.validation_run import ValidationRun
+from app.models.sales_data_update import SalesDataUpdate
+from app.services.validation_service import ValidationService
+from shared.database.exceptions import DatabaseError
+
+logger = structlog.get_logger()
+
+
+class HistoricalValidationService:
+    """Service for backfilling historical validation when sales data arrives late"""
+
+    def __init__(self, db_session: AsyncSession):
+        self.db = db_session
+        self.validation_service = ValidationService(db_session)
+
+    async def detect_validation_gaps(
+        self,
+        tenant_id: uuid.UUID,
+        lookback_days: int = 90
+    ) -> List[Dict[str, Any]]:
+        """
+        Detect date ranges where forecasts exist but haven't been validated
+
+        Args:
+            tenant_id: Tenant identifier
+            lookback_days: How far back to check (default 90 days)
+
+        Returns:
+            List of gap periods with date ranges
+        """
+        try:
+            end_date = datetime.now(timezone.utc)
+            start_date = end_date - timedelta(days=lookback_days)
+
+            logger.info(
+                "Detecting validation gaps",
+                tenant_id=tenant_id,
+                start_date=start_date.isoformat(),
+                end_date=end_date.isoformat()
+            )
+
+            # Get all dates with forecasts
+            forecast_query = select(
+                func.cast(Forecast.forecast_date, Date).label('forecast_date')
+            ).where(
+                and_(
+                    Forecast.tenant_id == tenant_id,
+                    Forecast.forecast_date >= start_date,
+                    Forecast.forecast_date <= end_date
+                )
+            ).group_by(
+                func.cast(Forecast.forecast_date, Date)
+            ).order_by(
+                func.cast(Forecast.forecast_date, Date)
+            )
+
+            forecast_result = await self.db.execute(forecast_query)
+            forecast_dates = {row.forecast_date for row in forecast_result.fetchall()}
+
+            if not forecast_dates:
+                logger.info("No forecasts found in lookback period", tenant_id=tenant_id)
+                return []
+
+            # Get all dates that have been validated
+            validation_query = select(
+                func.cast(ValidationRun.validation_start_date, Date).label('validated_date')
+            ).where(
+                and_(
+                    ValidationRun.tenant_id == tenant_id,
+                    ValidationRun.status == "completed",
+                    ValidationRun.validation_start_date >= start_date,
+                    ValidationRun.validation_end_date <= end_date
+                )
+            ).group_by(
+                func.cast(ValidationRun.validation_start_date, Date)
+            )
+
+            validation_result = await self.db.execute(validation_query)
+            validated_dates = {row.validated_date for row in validation_result.fetchall()}
+
+            # Find gaps (dates with forecasts but no validation)
+            gap_dates = sorted(forecast_dates - validated_dates)
+
+            if not gap_dates:
+                logger.info("No validation gaps found", tenant_id=tenant_id)
+                return []
+
+            # Group consecutive dates into ranges
+            gaps = []
+            current_gap_start = gap_dates[0]
+            current_gap_end = gap_dates[0]
+
+            for i in range(1, len(gap_dates)):
+                if (gap_dates[i] - current_gap_end).days == 1:
+                    # Consecutive date, extend current gap
+                    current_gap_end = gap_dates[i]
+                else:
+                    # Gap in dates, save current gap and start new one
+                    gaps.append({
+                        "start_date": current_gap_start,
+                        "end_date": current_gap_end,
+                        "days_count": (current_gap_end - current_gap_start).days + 1
+                    })
+                    current_gap_start = gap_dates[i]
+                    current_gap_end = gap_dates[i]
+
+            # Don't forget the last gap
+            gaps.append({
+                "start_date": current_gap_start,
+                "end_date": current_gap_end,
+                "days_count": (current_gap_end - current_gap_start).days + 1
+            })
+
+            logger.info(
+                "Validation gaps detected",
+                tenant_id=tenant_id,
+                gaps_count=len(gaps),
+                total_days=len(gap_dates)
+            )
+
+            return gaps
+
+        except Exception as e:
+            logger.error(
+                "Failed to detect validation gaps",
+                tenant_id=tenant_id,
+                error=str(e)
+            )
+            raise DatabaseError(f"Failed to detect validation gaps: {str(e)}")
+
+    async def backfill_validation(
+        self,
+        tenant_id: uuid.UUID,
+        start_date: date,
+        end_date: date,
+        triggered_by: str = "manual",
+        sales_data_update_id: Optional[uuid.UUID] = None
+    ) -> Dict[str, Any]:
+        """
+        Backfill validation for a historical date range
+
+        Args:
+            tenant_id: Tenant identifier
+            start_date: Start date for backfill
+            end_date: End date for backfill
+            triggered_by: How this backfill was triggered
+            sales_data_update_id: Optional link to sales data update record
+
+        Returns:
+            Backfill results with validation summary
+        """
+        try:
+            logger.info(
+                "Starting validation backfill",
+                tenant_id=tenant_id,
+                start_date=start_date.isoformat(),
+                end_date=end_date.isoformat(),
+                triggered_by=triggered_by
+            )
+
+            # Convert dates to datetime
+            start_datetime = datetime.combine(start_date, datetime.min.time()).replace(tzinfo=timezone.utc)
+            end_datetime = datetime.combine(end_date, datetime.max.time()).replace(tzinfo=timezone.utc)
+
+            # Run validation for the date range
+            validation_result = await self.validation_service.validate_date_range(
+                tenant_id=tenant_id,
+                start_date=start_datetime,
+                end_date=end_datetime,
+                orchestration_run_id=None,
+                triggered_by=triggered_by
+            )
+
+            # Update sales data update record if provided
+            if sales_data_update_id:
+                await self._update_sales_data_record(
+                    sales_data_update_id=sales_data_update_id,
+                    validation_run_id=uuid.UUID(validation_result["validation_run_id"]),
+                    status="completed" if validation_result["status"] == "completed" else "failed"
+                )
+
+            logger.info(
+                "Validation backfill completed",
+                tenant_id=tenant_id,
+                validation_run_id=validation_result.get("validation_run_id"),
+                forecasts_evaluated=validation_result.get("forecasts_evaluated")
+            )
+
+            return {
+                **validation_result,
+                "backfill_date_range": {
+                    "start": start_date.isoformat(),
+                    "end": end_date.isoformat()
+                }
+            }
+
+        except Exception as e:
+            logger.error(
+                "Validation backfill failed",
+                tenant_id=tenant_id,
+                start_date=start_date.isoformat(),
+                end_date=end_date.isoformat(),
+                error=str(e)
+            )
+
+            if sales_data_update_id:
+                await self._update_sales_data_record(
+                    sales_data_update_id=sales_data_update_id,
+                    validation_run_id=None,
+                    status="failed",
+                    error_message=str(e)
+                )
+
+            raise DatabaseError(f"Validation backfill failed: {str(e)}")
+
+    async def auto_backfill_gaps(
+        self,
+        tenant_id: uuid.UUID,
+        lookback_days: int = 90,
+        max_gaps_to_process: int = 10
+    ) -> Dict[str, Any]:
+        """
+        Automatically detect and backfill validation gaps
+
+        Args:
+            tenant_id: Tenant identifier
+            lookback_days: How far back to check
+            max_gaps_to_process: Maximum number of gaps to process in one run
+
+        Returns:
+            Summary of backfill operations
+        """
+        try:
+            logger.info(
+                "Starting auto backfill",
+                tenant_id=tenant_id,
+                lookback_days=lookback_days
+            )
+
+            # Detect gaps
+            gaps = await self.detect_validation_gaps(tenant_id, lookback_days)
+
+            if not gaps:
+                return {
+                    "gaps_found": 0,
+                    "gaps_processed": 0,
+                    "validations_completed": 0,
+                    "message": "No validation gaps found"
+                }
+
+            # Limit number of gaps to process
+            gaps_to_process = gaps[:max_gaps_to_process]
+
+            results = []
+            for gap in gaps_to_process:
+                try:
+                    result = await self.backfill_validation(
+                        tenant_id=tenant_id,
+                        start_date=gap["start_date"],
+                        end_date=gap["end_date"],
+                        triggered_by="auto_backfill"
+                    )
+                    results.append({
+                        "gap": gap,
+                        "result": result,
+                        "status": "success"
+                    })
+                except Exception as e:
+                    logger.error(
+                        "Failed to backfill gap",
+                        gap=gap,
+                        error=str(e)
+                    )
+                    results.append({
+                        "gap": gap,
+                        "error": str(e),
+                        "status": "failed"
+                    })
+
+            successful = sum(1 for r in results if r["status"] == "success")
+
+            logger.info(
+                "Auto backfill completed",
+                tenant_id=tenant_id,
+                gaps_found=len(gaps),
+                gaps_processed=len(results),
+                successful=successful
+            )
+
+            return {
+                "gaps_found": len(gaps),
+                "gaps_processed": len(results),
+                "validations_completed": successful,
+                "validations_failed": len(results) - successful,
+                "results": results
+            }
+
+        except Exception as e:
+            logger.error(
+                "Auto backfill failed",
+                tenant_id=tenant_id,
+                error=str(e)
+            )
+            raise DatabaseError(f"Auto backfill failed: {str(e)}")
+
+    async def register_sales_data_update(
+        self,
+        tenant_id: uuid.UUID,
+        start_date: date,
+        end_date: date,
+        records_affected: int,
+        update_source: str = "import",
+        import_job_id: Optional[str] = None,
+        auto_trigger_validation: bool = True
+    ) -> Dict[str, Any]:
+        """
+        Register a sales data update and optionally trigger validation
+
+        Args:
+            tenant_id: Tenant identifier
+            start_date: Start date of updated data
+            end_date: End date of updated data
+            records_affected: Number of sales records affected
+            update_source: Source of update (import, manual, pos_sync)
+            import_job_id: Optional import job ID
+            auto_trigger_validation: Whether to automatically trigger validation
+
+        Returns:
+            Update record and validation result if triggered
+        """
+        try:
+            # Create sales data update record
+            update_record = SalesDataUpdate(
+                tenant_id=tenant_id,
+                update_date_start=start_date,
+                update_date_end=end_date,
+                records_affected=records_affected,
+                update_source=update_source,
+                import_job_id=import_job_id,
+                requires_validation=auto_trigger_validation,
+                validation_status="pending" if auto_trigger_validation else "not_required"
+            )
+
+            self.db.add(update_record)
+            await self.db.flush()
+
+            logger.info(
+                "Registered sales data update",
+                tenant_id=tenant_id,
+                update_id=update_record.id,
+                date_range=f"{start_date} to {end_date}",
+                records_affected=records_affected
+            )
+
+            result = {
+                "update_id": str(update_record.id),
+                "update_record": update_record.to_dict(),
+                "validation_triggered": False
+            }
+
+            # Trigger validation if requested
+            if auto_trigger_validation:
+                try:
+                    validation_result = await self.backfill_validation(
+                        tenant_id=tenant_id,
+                        start_date=start_date,
+                        end_date=end_date,
+                        triggered_by="sales_data_update",
+                        sales_data_update_id=update_record.id
+                    )
+
+                    result["validation_triggered"] = True
+                    result["validation_result"] = validation_result
+
+                    logger.info(
+                        "Validation triggered for sales data update",
+                        update_id=update_record.id,
+                        validation_run_id=validation_result.get("validation_run_id")
+                    )
+
+                except Exception as e:
+                    logger.error(
+                        "Failed to trigger validation for sales data update",
+                        update_id=update_record.id,
+                        error=str(e)
+                    )
+                    update_record.validation_status = "failed"
+                    update_record.validation_error = str(e)[:500]
+
+            await self.db.commit()
+
+            return result
+
+        except Exception as e:
+            logger.error(
+                "Failed to register sales data update",
+                tenant_id=tenant_id,
+                error=str(e)
+            )
+            await self.db.rollback()
+            raise DatabaseError(f"Failed to register sales data update: {str(e)}")
+
+    async def _update_sales_data_record(
+        self,
+        sales_data_update_id: uuid.UUID,
+        validation_run_id: Optional[uuid.UUID],
+        status: str,
+        error_message: Optional[str] = None
+    ):
+        """Update sales data update record with validation results"""
+        try:
+            query = select(SalesDataUpdate).where(SalesDataUpdate.id == sales_data_update_id)
+            result = await self.db.execute(query)
+            update_record = result.scalar_one_or_none()
+
+            if update_record:
+                update_record.validation_status = status
+                update_record.validation_run_id = validation_run_id
+                update_record.validated_at = datetime.now(timezone.utc)
+                if error_message:
+                    update_record.validation_error = error_message[:500]
+
+                await self.db.commit()
+
+        except Exception as e:
+            logger.error(
+                "Failed to update sales data record",
+                sales_data_update_id=sales_data_update_id,
+                error=str(e)
+            )
+
+    async def get_pending_validations(
+        self,
+        tenant_id: uuid.UUID,
+        limit: int = 50
+    ) -> List[SalesDataUpdate]:
+        """Get pending sales data updates that need validation"""
+        try:
+            query = (
+                select(SalesDataUpdate)
+                .where(
+                    and_(
+                        SalesDataUpdate.tenant_id == tenant_id,
+                        SalesDataUpdate.validation_status == "pending",
+                        SalesDataUpdate.requires_validation == True
+                    )
+                )
+                .order_by(SalesDataUpdate.created_at)
+                .limit(limit)
+            )
+
+            result = await self.db.execute(query)
+            return result.scalars().all()
+
+        except Exception as e:
+            logger.error(
+                "Failed to get pending validations",
+                tenant_id=tenant_id,
+                error=str(e)
+            )
+            raise DatabaseError(f"Failed to get pending validations: {str(e)}")