This commit addresses all 15 issues identified in the orchestration scheduler analysis: HIGH PRIORITY FIXES: 1. ✅ Database update methods already in orchestrator service (not in saga) 2. ✅ Add null check for training_client before using it 3. ✅ Fix cron schedule config from "0 5" to "30 5" (5:30 AM) 4. ✅ Standardize on timezone-aware datetime (datetime.now(timezone.utc)) 5. ✅ Implement saga compensation logic with actual deletion calls 6. ✅ Extract actual counts from saga results (no placeholders) MEDIUM PRIORITY FIXES: 7. ✅ Add circuit breakers for inventory/suppliers/recipes clients 8. ✅ Pass circuit breakers to saga and use them in all service calls 9. ✅ Add calling_service_name to AI Insights client 10. ✅ Add database indexes on (tenant_id, started_at) and (status, started_at) 11. ✅ Handle empty shared data gracefully (fail if all 3 fetches fail) LOW PRIORITY IMPROVEMENTS: 12. ✅ Make notification/validation failures more visible with explicit logging 13. ✅ Track AI insights status in orchestration_runs table 14. ✅ Improve run number generation atomicity using MAX() approach 15. ✅ Optimize tenant ID handling (consistent UUID usage) CHANGES: - services/orchestrator/app/core/config.py: Fix cron schedule to 30 5 * * * - services/orchestrator/app/models/orchestration_run.py: Add AI insights & saga tracking columns - services/orchestrator/app/repositories/orchestration_run_repository.py: Atomic run number generation - services/orchestrator/app/services/orchestration_saga.py: Circuit breakers, compensation, error handling - services/orchestrator/app/services/orchestrator_service.py: Circuit breakers, actual counts, AI tracking - services/orchestrator/migrations/versions/20251105_add_ai_insights_tracking.py: New migration All issues resolved. No backwards compatibility. No TODOs. Production-ready.
114 lines
5.2 KiB
Python
114 lines
5.2 KiB
Python
# ================================================================
|
|
# services/orchestrator/app/models/orchestration_run.py
|
|
# ================================================================
|
|
"""
|
|
Orchestration Run Models - Audit trail for orchestration executions
|
|
"""
|
|
|
|
import uuid
|
|
import enum
|
|
from datetime import datetime, timezone
|
|
from sqlalchemy import Column, String, DateTime, Integer, Text, Boolean, Enum as SQLEnum
|
|
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
|
from sqlalchemy.sql import func
|
|
|
|
from shared.database.base import Base
|
|
|
|
|
|
class OrchestrationStatus(enum.Enum):
|
|
"""Orchestration run status"""
|
|
pending = "pending"
|
|
running = "running"
|
|
completed = "completed"
|
|
partial_success = "partial_success"
|
|
failed = "failed"
|
|
cancelled = "cancelled"
|
|
|
|
|
|
class OrchestrationRun(Base):
|
|
"""Audit trail for orchestration executions"""
|
|
__tablename__ = "orchestration_runs"
|
|
|
|
# Primary identification
|
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
|
run_number = Column(String(50), nullable=False, unique=True, index=True)
|
|
|
|
# Run details
|
|
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
|
status = Column(SQLEnum(OrchestrationStatus), nullable=False, default=OrchestrationStatus.pending, index=True)
|
|
run_type = Column(String(50), nullable=False, default="scheduled") # scheduled, manual, test
|
|
priority = Column(String(20), nullable=False, default="normal") # normal, high, critical
|
|
|
|
# Timing
|
|
started_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
|
|
completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
duration_seconds = Column(Integer, nullable=True)
|
|
|
|
# Step tracking
|
|
forecasting_started_at = Column(DateTime(timezone=True), nullable=True)
|
|
forecasting_completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
forecasting_status = Column(String(20), nullable=True) # success, failed, skipped
|
|
forecasting_error = Column(Text, nullable=True)
|
|
|
|
production_started_at = Column(DateTime(timezone=True), nullable=True)
|
|
production_completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
production_status = Column(String(20), nullable=True) # success, failed, skipped
|
|
production_error = Column(Text, nullable=True)
|
|
|
|
procurement_started_at = Column(DateTime(timezone=True), nullable=True)
|
|
procurement_completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
procurement_status = Column(String(20), nullable=True) # success, failed, skipped
|
|
procurement_error = Column(Text, nullable=True)
|
|
|
|
notification_started_at = Column(DateTime(timezone=True), nullable=True)
|
|
notification_completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
notification_status = Column(String(20), nullable=True) # success, failed, skipped
|
|
notification_error = Column(Text, nullable=True)
|
|
|
|
# AI Insights tracking
|
|
ai_insights_started_at = Column(DateTime(timezone=True), nullable=True)
|
|
ai_insights_completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
ai_insights_status = Column(String(20), nullable=True) # success, failed, skipped
|
|
ai_insights_error = Column(Text, nullable=True)
|
|
ai_insights_generated = Column(Integer, nullable=False, default=0)
|
|
ai_insights_posted = Column(Integer, nullable=False, default=0)
|
|
|
|
# Results summary
|
|
forecasts_generated = Column(Integer, nullable=False, default=0)
|
|
production_batches_created = Column(Integer, nullable=False, default=0)
|
|
procurement_plans_created = Column(Integer, nullable=False, default=0)
|
|
purchase_orders_created = Column(Integer, nullable=False, default=0)
|
|
notifications_sent = Column(Integer, nullable=False, default=0)
|
|
|
|
# Forecast data passed between services
|
|
forecast_data = Column(JSONB, nullable=True) # Store forecast results for downstream services
|
|
|
|
# Error handling
|
|
retry_count = Column(Integer, nullable=False, default=0)
|
|
max_retries_reached = Column(Boolean, nullable=False, default=False)
|
|
error_message = Column(Text, nullable=True)
|
|
error_details = Column(JSONB, nullable=True)
|
|
|
|
# External references
|
|
forecast_id = Column(UUID(as_uuid=True), nullable=True)
|
|
production_schedule_id = Column(UUID(as_uuid=True), nullable=True)
|
|
procurement_plan_id = Column(UUID(as_uuid=True), nullable=True)
|
|
|
|
# Saga tracking
|
|
saga_steps_total = Column(Integer, nullable=False, default=0)
|
|
saga_steps_completed = Column(Integer, nullable=False, default=0)
|
|
|
|
# Audit fields
|
|
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
|
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
|
triggered_by = Column(String(100), nullable=True) # scheduler, user_id, api
|
|
|
|
# Performance metrics
|
|
fulfillment_rate = Column(Integer, nullable=True) # Percentage as integer (0-100)
|
|
on_time_delivery_rate = Column(Integer, nullable=True) # Percentage as integer (0-100)
|
|
cost_accuracy = Column(Integer, nullable=True) # Percentage as integer (0-100)
|
|
quality_score = Column(Integer, nullable=True) # Rating as integer (0-100)
|
|
|
|
# Metadata
|
|
run_metadata = Column(JSONB, nullable=True)
|