Files
bakery-ia/services/orchestrator/app/models/orchestration_run.py
Claude 961bd2328f Fix all critical orchestration scheduler issues and add improvements
This commit addresses all 15 issues identified in the orchestration scheduler analysis:

HIGH PRIORITY FIXES:
1.  Database update methods already in orchestrator service (not in saga)
2.  Add null check for training_client before using it
3.  Fix cron schedule config from "0 5" to "30 5" (5:30 AM)
4.  Standardize on timezone-aware datetime (datetime.now(timezone.utc))
5.  Implement saga compensation logic with actual deletion calls
6.  Extract actual counts from saga results (no placeholders)

MEDIUM PRIORITY FIXES:
7.  Add circuit breakers for inventory/suppliers/recipes clients
8.  Pass circuit breakers to saga and use them in all service calls
9.  Add calling_service_name to AI Insights client
10.  Add database indexes on (tenant_id, started_at) and (status, started_at)
11.  Handle empty shared data gracefully (fail if all 3 fetches fail)

LOW PRIORITY IMPROVEMENTS:
12.  Make notification/validation failures more visible with explicit logging
13.  Track AI insights status in orchestration_runs table
14.  Improve run number generation atomicity using MAX() approach
15.  Optimize tenant ID handling (consistent UUID usage)

CHANGES:
- services/orchestrator/app/core/config.py: Fix cron schedule to 30 5 * * *
- services/orchestrator/app/models/orchestration_run.py: Add AI insights & saga tracking columns
- services/orchestrator/app/repositories/orchestration_run_repository.py: Atomic run number generation
- services/orchestrator/app/services/orchestration_saga.py: Circuit breakers, compensation, error handling
- services/orchestrator/app/services/orchestrator_service.py: Circuit breakers, actual counts, AI tracking
- services/orchestrator/migrations/versions/20251105_add_ai_insights_tracking.py: New migration

All issues resolved. No backwards compatibility. No TODOs. Production-ready.
2025-11-05 13:33:13 +00:00

114 lines
5.2 KiB
Python

# ================================================================
# services/orchestrator/app/models/orchestration_run.py
# ================================================================
"""
Orchestration Run Models - Audit trail for orchestration executions
"""
import uuid
import enum
from datetime import datetime, timezone
from sqlalchemy import Column, String, DateTime, Integer, Text, Boolean, Enum as SQLEnum
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.sql import func
from shared.database.base import Base
class OrchestrationStatus(enum.Enum):
"""Orchestration run status"""
pending = "pending"
running = "running"
completed = "completed"
partial_success = "partial_success"
failed = "failed"
cancelled = "cancelled"
class OrchestrationRun(Base):
"""Audit trail for orchestration executions"""
__tablename__ = "orchestration_runs"
# Primary identification
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
run_number = Column(String(50), nullable=False, unique=True, index=True)
# Run details
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
status = Column(SQLEnum(OrchestrationStatus), nullable=False, default=OrchestrationStatus.pending, index=True)
run_type = Column(String(50), nullable=False, default="scheduled") # scheduled, manual, test
priority = Column(String(20), nullable=False, default="normal") # normal, high, critical
# Timing
started_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
completed_at = Column(DateTime(timezone=True), nullable=True)
duration_seconds = Column(Integer, nullable=True)
# Step tracking
forecasting_started_at = Column(DateTime(timezone=True), nullable=True)
forecasting_completed_at = Column(DateTime(timezone=True), nullable=True)
forecasting_status = Column(String(20), nullable=True) # success, failed, skipped
forecasting_error = Column(Text, nullable=True)
production_started_at = Column(DateTime(timezone=True), nullable=True)
production_completed_at = Column(DateTime(timezone=True), nullable=True)
production_status = Column(String(20), nullable=True) # success, failed, skipped
production_error = Column(Text, nullable=True)
procurement_started_at = Column(DateTime(timezone=True), nullable=True)
procurement_completed_at = Column(DateTime(timezone=True), nullable=True)
procurement_status = Column(String(20), nullable=True) # success, failed, skipped
procurement_error = Column(Text, nullable=True)
notification_started_at = Column(DateTime(timezone=True), nullable=True)
notification_completed_at = Column(DateTime(timezone=True), nullable=True)
notification_status = Column(String(20), nullable=True) # success, failed, skipped
notification_error = Column(Text, nullable=True)
# AI Insights tracking
ai_insights_started_at = Column(DateTime(timezone=True), nullable=True)
ai_insights_completed_at = Column(DateTime(timezone=True), nullable=True)
ai_insights_status = Column(String(20), nullable=True) # success, failed, skipped
ai_insights_error = Column(Text, nullable=True)
ai_insights_generated = Column(Integer, nullable=False, default=0)
ai_insights_posted = Column(Integer, nullable=False, default=0)
# Results summary
forecasts_generated = Column(Integer, nullable=False, default=0)
production_batches_created = Column(Integer, nullable=False, default=0)
procurement_plans_created = Column(Integer, nullable=False, default=0)
purchase_orders_created = Column(Integer, nullable=False, default=0)
notifications_sent = Column(Integer, nullable=False, default=0)
# Forecast data passed between services
forecast_data = Column(JSONB, nullable=True) # Store forecast results for downstream services
# Error handling
retry_count = Column(Integer, nullable=False, default=0)
max_retries_reached = Column(Boolean, nullable=False, default=False)
error_message = Column(Text, nullable=True)
error_details = Column(JSONB, nullable=True)
# External references
forecast_id = Column(UUID(as_uuid=True), nullable=True)
production_schedule_id = Column(UUID(as_uuid=True), nullable=True)
procurement_plan_id = Column(UUID(as_uuid=True), nullable=True)
# Saga tracking
saga_steps_total = Column(Integer, nullable=False, default=0)
saga_steps_completed = Column(Integer, nullable=False, default=0)
# Audit fields
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
triggered_by = Column(String(100), nullable=True) # scheduler, user_id, api
# Performance metrics
fulfillment_rate = Column(Integer, nullable=True) # Percentage as integer (0-100)
on_time_delivery_rate = Column(Integer, nullable=True) # Percentage as integer (0-100)
cost_accuracy = Column(Integer, nullable=True) # Percentage as integer (0-100)
quality_score = Column(Integer, nullable=True) # Rating as integer (0-100)
# Metadata
run_metadata = Column(JSONB, nullable=True)