Initial commit - production deployment

2026-01-21 17:17:16 +01:00
commit c23d00dd92
2289 changed files with 638440 additions and 0 deletions
--- a/services/forecasting/app/models/init.py
+++ b/services/forecasting/app/models/init.py
@@ -0,0 +1,29 @@
+"""
+Forecasting Service Models Package
+
+Import all models to ensure they are registered with SQLAlchemy Base.
+"""
+
+# Import AuditLog model for this service
+from shared.security import create_audit_log_model
+from shared.database.base import Base
+
+# Create audit log model for this service
+AuditLog = create_audit_log_model(Base)
+
+# Import all models to register them with the Base metadata
+from .forecasts import Forecast, PredictionBatch
+from .predictions import ModelPerformanceMetric, PredictionCache
+from .validation_run import ValidationRun
+from .sales_data_update import SalesDataUpdate
+
+# List all models for easier access
+__all__ = [
+    "Forecast",
+    "PredictionBatch",
+    "ModelPerformanceMetric",
+    "PredictionCache",
+    "ValidationRun",
+    "SalesDataUpdate",
+    "AuditLog",
+]
--- a/services/forecasting/app/models/forecasts.py
+++ b/services/forecasting/app/models/forecasts.py
@@ -0,0 +1,101 @@
+# ================================================================
+# services/forecasting/app/models/forecasts.py
+# ================================================================
+"""
+Forecast models for the forecasting service
+"""
+
+from sqlalchemy import Column, String, Integer, Float, DateTime, Boolean, Text, JSON, UniqueConstraint, Index
+from sqlalchemy.dialects.postgresql import UUID
+from datetime import datetime, timezone
+import uuid
+
+from shared.database.base import Base
+
+class Forecast(Base):
+    """Forecast model for storing prediction results"""
+    __tablename__ = "forecasts"
+
+    __table_args__ = (
+        # Unique constraint to prevent duplicate forecasts
+        # Ensures only one forecast per (tenant, product, date, location) combination
+        UniqueConstraint(
+            'tenant_id', 'inventory_product_id', 'forecast_date', 'location',
+            name='uq_forecast_tenant_product_date_location'
+        ),
+        # Composite index for common query patterns
+        Index('ix_forecasts_tenant_product_date', 'tenant_id', 'inventory_product_id', 'forecast_date'),
+    )
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
+    inventory_product_id = Column(UUID(as_uuid=True), nullable=False, index=True)  # Reference to inventory service
+    product_name = Column(String(255), nullable=True, index=True)  # Product name (optional - use inventory_product_id as reference)
+    location = Column(String(255), nullable=False, index=True)
+    
+    # Forecast period
+    forecast_date = Column(DateTime(timezone=True), nullable=False, index=True)
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    
+    # Prediction results
+    predicted_demand = Column(Float, nullable=False)
+    confidence_lower = Column(Float, nullable=False)
+    confidence_upper = Column(Float, nullable=False)
+    confidence_level = Column(Float, default=0.8)
+    
+    # Model information
+    model_id = Column(String(255), nullable=False)
+    model_version = Column(String(50), nullable=False)
+    algorithm = Column(String(50), default="prophet")
+    
+    # Business context
+    business_type = Column(String(50), default="individual")  # individual or central_workshop
+    day_of_week = Column(Integer, nullable=False)
+    is_holiday = Column(Boolean, default=False)
+    is_weekend = Column(Boolean, default=False)
+    
+    # External factors
+    weather_temperature = Column(Float)
+    weather_precipitation = Column(Float)
+    weather_description = Column(String(100))
+    traffic_volume = Column(Integer)
+    
+    # Metadata
+    processing_time_ms = Column(Integer)
+    features_used = Column(JSON)
+    
+    def __repr__(self):
+        return f"<Forecast(id={self.id}, inventory_product_id={self.inventory_product_id}, date={self.forecast_date})>"
+
+class PredictionBatch(Base):
+    """Batch prediction requests"""
+    __tablename__ = "prediction_batches"
+    
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
+    
+    # Batch information
+    batch_name = Column(String(255), nullable=False)
+    requested_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    completed_at = Column(DateTime(timezone=True))
+    
+    # Status
+    status = Column(String(50), default="pending")  # pending, processing, completed, failed
+    total_products = Column(Integer, default=0)
+    completed_products = Column(Integer, default=0)
+    failed_products = Column(Integer, default=0)
+    
+    # Configuration
+    forecast_days = Column(Integer, default=7)
+    business_type = Column(String(50), default="individual")
+    
+    # Results
+    error_message = Column(Text)
+    processing_time_ms = Column(Integer)
+    
+    cancelled_by = Column(String, nullable=True)
+    
+    def __repr__(self):
+        return f"<PredictionBatch(id={self.id}, status={self.status})>"
+
+
--- a/services/forecasting/app/models/predictions.py
+++ b/services/forecasting/app/models/predictions.py
@@ -0,0 +1,67 @@
+# ================================================================
+# services/forecasting/app/models/predictions.py
+# ================================================================
+"""
+Additional prediction models for the forecasting service
+"""
+
+from sqlalchemy import Column, String, Integer, Float, DateTime, Boolean, Text, JSON
+from sqlalchemy.dialects.postgresql import UUID
+from datetime import datetime, timezone
+import uuid
+
+from shared.database.base import Base
+
+class ModelPerformanceMetric(Base):
+    """Track model performance over time"""
+    __tablename__ = "model_performance_metrics"
+    
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    model_id = Column(UUID(as_uuid=True), nullable=False, index=True)
+    tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
+    inventory_product_id = Column(UUID(as_uuid=True), nullable=False)  # Reference to inventory service
+    
+    # Performance metrics
+    mae = Column(Float)  # Mean Absolute Error
+    mape = Column(Float)  # Mean Absolute Percentage Error
+    rmse = Column(Float)  # Root Mean Square Error
+    accuracy_score = Column(Float)
+    
+    # Evaluation period
+    evaluation_date = Column(DateTime(timezone=True), nullable=False)
+    evaluation_period_start = Column(DateTime(timezone=True))
+    evaluation_period_end = Column(DateTime(timezone=True))
+    
+    # Metadata
+    sample_size = Column(Integer)
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    
+    def __repr__(self):
+        return f"<ModelPerformanceMetric(model_id={self.model_id}, mae={self.mae})>"
+
+class PredictionCache(Base):
+    """Cache frequently requested predictions"""
+    __tablename__ = "prediction_cache"
+    
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    cache_key = Column(String(255), unique=True, nullable=False, index=True)
+    
+    # Cached data
+    tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
+    inventory_product_id = Column(UUID(as_uuid=True), nullable=False)  # Reference to inventory service
+    location = Column(String(255), nullable=False)
+    forecast_date = Column(DateTime(timezone=True), nullable=False)
+    
+    # Cached results
+    predicted_demand = Column(Float, nullable=False)
+    confidence_lower = Column(Float, nullable=False)
+    confidence_upper = Column(Float, nullable=False)
+    model_id = Column(UUID(as_uuid=True), nullable=False)
+    
+    # Cache metadata
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    expires_at = Column(DateTime(timezone=True), nullable=False)
+    hit_count = Column(Integer, default=0)
+    
+    def __repr__(self):
+        return f"<PredictionCache(key={self.cache_key}, inventory_product_id={self.inventory_product_id})>"
--- a/services/forecasting/app/models/sales_data_update.py
+++ b/services/forecasting/app/models/sales_data_update.py
@@ -0,0 +1,78 @@
+# ================================================================
+# services/forecasting/app/models/sales_data_update.py
+# ================================================================
+"""
+Sales Data Update Tracking Model
+
+Tracks when sales data is added or updated for past dates,
+enabling automated historical validation backfill.
+"""
+
+from sqlalchemy import Column, String, Integer, DateTime, Boolean, Index, Date
+from sqlalchemy.dialects.postgresql import UUID
+from datetime import datetime, timezone
+import uuid
+
+from shared.database.base import Base
+
+
+class SalesDataUpdate(Base):
+    """Track sales data updates for historical validation"""
+    __tablename__ = "sales_data_updates"
+
+    __table_args__ = (
+        Index('ix_sales_updates_tenant_status', 'tenant_id', 'validation_status', 'created_at'),
+        Index('ix_sales_updates_date_range', 'tenant_id', 'update_date_start', 'update_date_end'),
+        Index('ix_sales_updates_validation_status', 'validation_status'),
+    )
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
+
+    # Date range of sales data that was added/updated
+    update_date_start = Column(Date, nullable=False, index=True)
+    update_date_end = Column(Date, nullable=False, index=True)
+
+    # Update metadata
+    created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    update_source = Column(String(100), nullable=True)  # import, manual, pos_sync
+    records_affected = Column(Integer, default=0)
+
+    # Validation tracking
+    validation_status = Column(String(50), default="pending")  # pending, processing, completed, failed
+    validation_run_id = Column(UUID(as_uuid=True), nullable=True)
+    validated_at = Column(DateTime(timezone=True), nullable=True)
+    validation_error = Column(String(500), nullable=True)
+
+    # Determines if this update should trigger validation
+    requires_validation = Column(Boolean, default=True)
+
+    # Additional context
+    import_job_id = Column(String(255), nullable=True)  # Link to sales import job if applicable
+    notes = Column(String(500), nullable=True)
+
+    def __repr__(self):
+        return (
+            f"<SalesDataUpdate(id={self.id}, tenant_id={self.tenant_id}, "
+            f"date_range={self.update_date_start} to {self.update_date_end}, "
+            f"status={self.validation_status})>"
+        )
+
+    def to_dict(self):
+        """Convert to dictionary for API responses"""
+        return {
+            'id': str(self.id),
+            'tenant_id': str(self.tenant_id),
+            'update_date_start': self.update_date_start.isoformat() if self.update_date_start else None,
+            'update_date_end': self.update_date_end.isoformat() if self.update_date_end else None,
+            'created_at': self.created_at.isoformat() if self.created_at else None,
+            'update_source': self.update_source,
+            'records_affected': self.records_affected,
+            'validation_status': self.validation_status,
+            'validation_run_id': str(self.validation_run_id) if self.validation_run_id else None,
+            'validated_at': self.validated_at.isoformat() if self.validated_at else None,
+            'validation_error': self.validation_error,
+            'requires_validation': self.requires_validation,
+            'import_job_id': self.import_job_id,
+            'notes': self.notes
+        }
--- a/services/forecasting/app/models/validation_run.py
+++ b/services/forecasting/app/models/validation_run.py
@@ -0,0 +1,110 @@
+# ================================================================
+# services/forecasting/app/models/validation_run.py
+# ================================================================
+"""
+Validation run models for tracking forecast validation executions
+"""
+
+from sqlalchemy import Column, String, Integer, Float, DateTime, Text, JSON, Index
+from sqlalchemy.dialects.postgresql import UUID
+from datetime import datetime, timezone
+import uuid
+
+from shared.database.base import Base
+
+
+class ValidationRun(Base):
+    """Track forecast validation execution runs"""
+    __tablename__ = "validation_runs"
+
+    __table_args__ = (
+        Index('ix_validation_runs_tenant_created', 'tenant_id', 'started_at'),
+        Index('ix_validation_runs_status', 'status', 'started_at'),
+        Index('ix_validation_runs_orchestration', 'orchestration_run_id'),
+    )
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
+
+    # Link to orchestration run (if triggered by orchestrator)
+    orchestration_run_id = Column(UUID(as_uuid=True), nullable=True)
+
+    # Validation period
+    validation_start_date = Column(DateTime(timezone=True), nullable=False)
+    validation_end_date = Column(DateTime(timezone=True), nullable=False)
+
+    # Execution metadata
+    started_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    completed_at = Column(DateTime(timezone=True), nullable=True)
+    duration_seconds = Column(Float, nullable=True)
+
+    # Status and results
+    status = Column(String(50), default="pending")  # pending, running, completed, failed
+
+    # Validation statistics
+    total_forecasts_evaluated = Column(Integer, default=0)
+    forecasts_with_actuals = Column(Integer, default=0)
+    forecasts_without_actuals = Column(Integer, default=0)
+
+    # Accuracy metrics summary (across all validated forecasts)
+    overall_mae = Column(Float, nullable=True)
+    overall_mape = Column(Float, nullable=True)
+    overall_rmse = Column(Float, nullable=True)
+    overall_r2_score = Column(Float, nullable=True)
+    overall_accuracy_percentage = Column(Float, nullable=True)
+
+    # Additional statistics
+    total_predicted_demand = Column(Float, default=0.0)
+    total_actual_demand = Column(Float, default=0.0)
+
+    # Breakdown by product/location (JSON)
+    metrics_by_product = Column(JSON, nullable=True)  # {product_id: {mae, mape, ...}}
+    metrics_by_location = Column(JSON, nullable=True)  # {location: {mae, mape, ...}}
+
+    # Performance metrics created count
+    metrics_records_created = Column(Integer, default=0)
+
+    # Error tracking
+    error_message = Column(Text, nullable=True)
+    error_details = Column(JSON, nullable=True)
+
+    # Execution context
+    triggered_by = Column(String(100), default="manual")  # manual, orchestrator, scheduled
+    execution_mode = Column(String(50), default="batch")  # batch, single_day, real_time
+
+    def __repr__(self):
+        return (
+            f"<ValidationRun(id={self.id}, tenant_id={self.tenant_id}, "
+            f"status={self.status}, forecasts_evaluated={self.total_forecasts_evaluated})>"
+        )
+
+    def to_dict(self):
+        """Convert to dictionary for API responses"""
+        return {
+            'id': str(self.id),
+            'tenant_id': str(self.tenant_id),
+            'orchestration_run_id': str(self.orchestration_run_id) if self.orchestration_run_id else None,
+            'validation_start_date': self.validation_start_date.isoformat() if self.validation_start_date else None,
+            'validation_end_date': self.validation_end_date.isoformat() if self.validation_end_date else None,
+            'started_at': self.started_at.isoformat() if self.started_at else None,
+            'completed_at': self.completed_at.isoformat() if self.completed_at else None,
+            'duration_seconds': self.duration_seconds,
+            'status': self.status,
+            'total_forecasts_evaluated': self.total_forecasts_evaluated,
+            'forecasts_with_actuals': self.forecasts_with_actuals,
+            'forecasts_without_actuals': self.forecasts_without_actuals,
+            'overall_mae': self.overall_mae,
+            'overall_mape': self.overall_mape,
+            'overall_rmse': self.overall_rmse,
+            'overall_r2_score': self.overall_r2_score,
+            'overall_accuracy_percentage': self.overall_accuracy_percentage,
+            'total_predicted_demand': self.total_predicted_demand,
+            'total_actual_demand': self.total_actual_demand,
+            'metrics_by_product': self.metrics_by_product,
+            'metrics_by_location': self.metrics_by_location,
+            'metrics_records_created': self.metrics_records_created,
+            'error_message': self.error_message,
+            'error_details': self.error_details,
+            'triggered_by': self.triggered_by,
+            'execution_mode': self.execution_mode,
+        }