Files

238 lines
9.5 KiB
Python
Raw Permalink Normal View History

2025-10-30 21:08:07 +01:00
# ================================================================
# services/orchestrator/app/main.py
# ================================================================
"""
Orchestrator Service - FastAPI Application
Automated orchestration of forecasting, production, and procurement workflows
"""
from fastapi import FastAPI, Request
from sqlalchemy import text
from app.core.config import settings
from app.core.database import database_manager
from shared.service_base import StandardFastAPIService
class OrchestratorService(StandardFastAPIService):
"""Orchestrator Service with standardized setup"""
2025-12-05 20:07:01 +01:00
expected_migration_version = "001_initial_schema"
2025-10-30 21:08:07 +01:00
def __init__(self):
# Define expected database tables for health checks
orchestrator_expected_tables = [
'orchestration_runs'
]
2025-12-05 20:07:01 +01:00
self.rabbitmq_client = None
self.event_publisher = None
2026-01-18 09:02:27 +01:00
self.leader_election = None
self.scheduler_service = None
2025-12-05 20:07:01 +01:00
2025-10-30 21:08:07 +01:00
super().__init__(
service_name="orchestrator-service",
app_name=settings.APP_NAME,
description=settings.DESCRIPTION,
version=settings.VERSION,
api_prefix="", # Empty because RouteBuilder already includes /api/v1
database_manager=database_manager,
2025-12-05 20:07:01 +01:00
expected_tables=orchestrator_expected_tables,
enable_messaging=True # Enable RabbitMQ for event publishing
2025-10-30 21:08:07 +01:00
)
2026-01-18 09:02:27 +01:00
async def verify_migrations(self):
"""Verify database schema matches the latest migrations"""
try:
async with self.database_manager.get_session() as session:
result = await session.execute(text("SELECT version_num FROM alembic_version"))
version = result.scalar()
if version != self.expected_migration_version:
self.logger.error(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
raise RuntimeError(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
self.logger.info(f"Migration verification successful: {version}")
except Exception as e:
self.logger.error(f"Migration verification failed: {e}")
raise
2025-12-05 20:07:01 +01:00
async def _setup_messaging(self):
"""Setup messaging for orchestrator service"""
from shared.messaging import UnifiedEventPublisher, RabbitMQClient
try:
self.rabbitmq_client = RabbitMQClient(settings.RABBITMQ_URL, service_name="orchestrator-service")
await self.rabbitmq_client.connect()
# Create event publisher
self.event_publisher = UnifiedEventPublisher(self.rabbitmq_client, "orchestrator-service")
self.logger.info("Orchestrator service messaging setup completed")
except Exception as e:
self.logger.error("Failed to setup orchestrator messaging", error=str(e))
raise
async def _cleanup_messaging(self):
"""Cleanup messaging for orchestrator service"""
try:
if self.rabbitmq_client:
await self.rabbitmq_client.disconnect()
self.logger.info("Orchestrator service messaging cleanup completed")
except Exception as e:
self.logger.error("Error during orchestrator messaging cleanup", error=str(e))
2025-10-30 21:08:07 +01:00
async def on_startup(self, app: FastAPI):
"""Custom startup logic for orchestrator service"""
2025-12-05 20:07:01 +01:00
# Verify migrations first
await self.verify_migrations()
# Call parent startup (includes database, messaging, etc.)
await super().on_startup(app)
2025-10-30 21:08:07 +01:00
self.logger.info("Orchestrator Service starting up...")
2026-01-18 09:02:27 +01:00
# Initialize leader election for horizontal scaling
# Only the leader pod will run the scheduler
await self._setup_leader_election(app)
2025-10-30 21:08:07 +01:00
2025-12-05 20:07:01 +01:00
# REMOVED: Delivery tracking service - moved to procurement service (domain ownership)
2026-01-18 09:02:27 +01:00
async def _setup_leader_election(self, app: FastAPI):
"""
Setup leader election for scheduler.
CRITICAL FOR HORIZONTAL SCALING:
Without leader election, each pod would run the same scheduled jobs,
causing duplicate forecasts, production schedules, and database contention.
"""
from shared.leader_election import LeaderElectionService
import redis.asyncio as redis
try:
# Create Redis connection for leader election
redis_url = f"redis://:{settings.REDIS_PASSWORD}@{settings.REDIS_HOST}:{settings.REDIS_PORT}/{settings.REDIS_DB}"
if settings.REDIS_TLS_ENABLED.lower() == "true":
redis_url = redis_url.replace("redis://", "rediss://")
redis_client = redis.from_url(redis_url, decode_responses=False)
await redis_client.ping()
# Use shared leader election service
self.leader_election = LeaderElectionService(
redis_client,
service_name="orchestrator"
)
# Define callbacks for leader state changes
async def on_become_leader():
self.logger.info("This pod became the leader - starting scheduler")
from app.services.orchestrator_service import OrchestratorSchedulerService
self.scheduler_service = OrchestratorSchedulerService(self.event_publisher, settings)
await self.scheduler_service.start()
app.state.scheduler_service = self.scheduler_service
self.logger.info("Orchestrator scheduler service started (leader only)")
async def on_lose_leader():
self.logger.warning("This pod lost leadership - stopping scheduler")
if self.scheduler_service:
await self.scheduler_service.stop()
self.scheduler_service = None
if hasattr(app.state, 'scheduler_service'):
app.state.scheduler_service = None
self.logger.info("Orchestrator scheduler service stopped (no longer leader)")
# Start leader election
await self.leader_election.start(
on_become_leader=on_become_leader,
on_lose_leader=on_lose_leader
)
# Store leader election in app state for health checks
app.state.leader_election = self.leader_election
self.logger.info("Leader election initialized",
is_leader=self.leader_election.is_leader,
instance_id=self.leader_election.instance_id)
except Exception as e:
self.logger.error("Failed to setup leader election, falling back to standalone mode",
error=str(e))
# Fallback: start scheduler anyway (for single-pod deployments)
from app.services.orchestrator_service import OrchestratorSchedulerService
self.scheduler_service = OrchestratorSchedulerService(self.event_publisher, settings)
await self.scheduler_service.start()
app.state.scheduler_service = self.scheduler_service
self.logger.warning("Scheduler started in standalone mode (no leader election)")
2025-10-30 21:08:07 +01:00
async def on_shutdown(self, app: FastAPI):
"""Custom shutdown logic for orchestrator service"""
self.logger.info("Orchestrator Service shutting down...")
2026-01-18 09:02:27 +01:00
# Stop leader election (this will also stop scheduler if we're the leader)
if self.leader_election:
await self.leader_election.stop()
self.logger.info("Leader election stopped")
# Stop scheduler service if still running
if self.scheduler_service:
await self.scheduler_service.stop()
2025-10-30 21:08:07 +01:00
self.logger.info("Orchestrator scheduler service stopped")
2025-12-05 20:07:01 +01:00
2025-10-30 21:08:07 +01:00
def get_service_features(self):
"""Return orchestrator-specific features"""
return [
"automated_orchestration",
"forecasting_integration",
"production_scheduling",
"procurement_planning",
"notification_dispatch",
"leader_election",
"retry_mechanism",
"circuit_breaker"
]
# Create service instance
service = OrchestratorService()
# Create FastAPI app with standardized setup
app = service.create_app()
# Setup standard endpoints (health, readiness, metrics)
service.setup_standard_endpoints()
# Include routers
# BUSINESS: Orchestration operations
from app.api.orchestration import router as orchestration_router
from app.api.internal import router as internal_router
2025-10-30 21:08:07 +01:00
service.add_router(orchestration_router)
service.add_router(internal_router)
2025-10-30 21:08:07 +01:00
2025-12-13 23:57:54 +01:00
# INTERNAL: Service-to-service endpoints for demo data cloning
from app.api.internal_demo import router as internal_demo_router
service.add_router(internal_demo_router, tags=["internal-demo"])
2025-10-30 21:08:07 +01:00
@app.middleware("http")
async def logging_middleware(request: Request, call_next):
"""Add request logging middleware"""
import time
start_time = time.time()
response = await call_next(request)
process_time = time.time() - start_time
service.logger.info("HTTP request processed",
method=request.method,
url=str(request.url),
status_code=response.status_code,
process_time=round(process_time, 4))
return response
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8000,
reload=settings.DEBUG
)