Fix all critical orchestration scheduler issues and add improvements
This commit addresses all 15 issues identified in the orchestration scheduler analysis: HIGH PRIORITY FIXES: 1. ✅ Database update methods already in orchestrator service (not in saga) 2. ✅ Add null check for training_client before using it 3. ✅ Fix cron schedule config from "0 5" to "30 5" (5:30 AM) 4. ✅ Standardize on timezone-aware datetime (datetime.now(timezone.utc)) 5. ✅ Implement saga compensation logic with actual deletion calls 6. ✅ Extract actual counts from saga results (no placeholders) MEDIUM PRIORITY FIXES: 7. ✅ Add circuit breakers for inventory/suppliers/recipes clients 8. ✅ Pass circuit breakers to saga and use them in all service calls 9. ✅ Add calling_service_name to AI Insights client 10. ✅ Add database indexes on (tenant_id, started_at) and (status, started_at) 11. ✅ Handle empty shared data gracefully (fail if all 3 fetches fail) LOW PRIORITY IMPROVEMENTS: 12. ✅ Make notification/validation failures more visible with explicit logging 13. ✅ Track AI insights status in orchestration_runs table 14. ✅ Improve run number generation atomicity using MAX() approach 15. ✅ Optimize tenant ID handling (consistent UUID usage) CHANGES: - services/orchestrator/app/core/config.py: Fix cron schedule to 30 5 * * * - services/orchestrator/app/models/orchestration_run.py: Add AI insights & saga tracking columns - services/orchestrator/app/repositories/orchestration_run_repository.py: Atomic run number generation - services/orchestrator/app/services/orchestration_saga.py: Circuit breakers, compensation, error handling - services/orchestrator/app/services/orchestrator_service.py: Circuit breakers, actual counts, AI tracking - services/orchestrator/migrations/versions/20251105_add_ai_insights_tracking.py: New migration All issues resolved. No backwards compatibility. No TODOs. Production-ready.
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
"""Add AI insights tracking and indexes
|
||||
|
||||
Revision ID: 20251105_add_ai_insights
|
||||
Revises: 20251029_1700_add_orchestration_runs
|
||||
Create Date: 2025-11-05 12:00:00.000000
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '20251105_add_ai_insights'
|
||||
down_revision = '20251029_1700_add_orchestration_runs'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
"""Add AI insights tracking columns, saga tracking, and performance indexes"""
|
||||
|
||||
# Add AI Insights tracking columns
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('ai_insights_started_at', sa.DateTime(timezone=True), nullable=True))
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('ai_insights_completed_at', sa.DateTime(timezone=True), nullable=True))
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('ai_insights_status', sa.String(20), nullable=True))
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('ai_insights_error', sa.Text(), nullable=True))
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('ai_insights_generated', sa.Integer(), nullable=False, server_default='0'))
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('ai_insights_posted', sa.Integer(), nullable=False, server_default='0'))
|
||||
|
||||
# Add forecast_id reference (was missing)
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('forecast_id', postgresql.UUID(as_uuid=True), nullable=True))
|
||||
|
||||
# Add saga tracking columns
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('saga_steps_total', sa.Integer(), nullable=False, server_default='0'))
|
||||
op.add_column('orchestration_runs',
|
||||
sa.Column('saga_steps_completed', sa.Integer(), nullable=False, server_default='0'))
|
||||
|
||||
# Add performance indexes
|
||||
# Index for querying by tenant_id and date range
|
||||
op.create_index(
|
||||
'ix_orchestration_runs_tenant_started',
|
||||
'orchestration_runs',
|
||||
['tenant_id', 'started_at'],
|
||||
unique=False
|
||||
)
|
||||
|
||||
# Index for querying by status and date
|
||||
op.create_index(
|
||||
'ix_orchestration_runs_status_started',
|
||||
'orchestration_runs',
|
||||
['status', 'started_at'],
|
||||
unique=False
|
||||
)
|
||||
|
||||
# Index for run number lookups (already unique, but add explicit index for performance)
|
||||
# run_number already has index from unique constraint, so this is redundant
|
||||
# op.create_index('ix_orchestration_runs_run_number', 'orchestration_runs', ['run_number'], unique=False)
|
||||
|
||||
|
||||
def downgrade():
|
||||
"""Remove AI insights tracking columns, saga tracking, and indexes"""
|
||||
|
||||
# Remove indexes
|
||||
op.drop_index('ix_orchestration_runs_status_started', table_name='orchestration_runs')
|
||||
op.drop_index('ix_orchestration_runs_tenant_started', table_name='orchestration_runs')
|
||||
|
||||
# Remove saga tracking columns
|
||||
op.drop_column('orchestration_runs', 'saga_steps_completed')
|
||||
op.drop_column('orchestration_runs', 'saga_steps_total')
|
||||
|
||||
# Remove forecast_id reference
|
||||
op.drop_column('orchestration_runs', 'forecast_id')
|
||||
|
||||
# Remove AI insights tracking columns
|
||||
op.drop_column('orchestration_runs', 'ai_insights_posted')
|
||||
op.drop_column('orchestration_runs', 'ai_insights_generated')
|
||||
op.drop_column('orchestration_runs', 'ai_insights_error')
|
||||
op.drop_column('orchestration_runs', 'ai_insights_status')
|
||||
op.drop_column('orchestration_runs', 'ai_insights_completed_at')
|
||||
op.drop_column('orchestration_runs', 'ai_insights_started_at')
|
||||
Reference in New Issue
Block a user