Initial commit - production deployment
This commit is contained in:
0
services/orchestrator/app/__init__.py
Normal file
0
services/orchestrator/app/__init__.py
Normal file
4
services/orchestrator/app/api/__init__.py
Normal file
4
services/orchestrator/app/api/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .orchestration import router as orchestration_router
|
||||
from .internal_demo import router as internal_demo_router
|
||||
|
||||
__all__ = ["orchestration_router", "internal_demo_router"]
|
||||
177
services/orchestrator/app/api/internal.py
Normal file
177
services/orchestrator/app/api/internal.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Internal API for Alert Intelligence Service
|
||||
Provides orchestrator context for alert enrichment
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Header, HTTPException, Query
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
from uuid import UUID
|
||||
from pydantic import BaseModel
|
||||
|
||||
router = APIRouter(prefix="/api/internal", tags=["internal"])
|
||||
|
||||
|
||||
class OrchestrationAction(BaseModel):
|
||||
"""Recent orchestration action"""
|
||||
id: str
|
||||
type: str # purchase_order, production_batch
|
||||
status: str # created, pending_approval, approved, completed
|
||||
delivery_date: Optional[datetime]
|
||||
reasoning: Optional[Dict[str, Any]]
|
||||
estimated_resolution: Optional[datetime]
|
||||
created_at: datetime
|
||||
|
||||
|
||||
class RecentActionsResponse(BaseModel):
|
||||
"""Response with recent orchestrator actions"""
|
||||
actions: List[OrchestrationAction]
|
||||
count: int
|
||||
|
||||
|
||||
@router.get("/recent-actions", response_model=RecentActionsResponse)
|
||||
async def get_recent_actions(
|
||||
tenant_id: str = Query(..., description="Tenant ID"),
|
||||
ingredient_id: Optional[str] = Query(None, description="Filter by ingredient"),
|
||||
product_id: Optional[str] = Query(None, description="Filter by product"),
|
||||
hours_ago: int = Query(24, description="Look back hours"),
|
||||
):
|
||||
"""
|
||||
Get recent orchestrator actions for alert context enrichment.
|
||||
Only accessible by internal services (alert-intelligence).
|
||||
|
||||
Returns orchestration runs with details about POs created, batches adjusted, etc.
|
||||
This helps the alert system understand if AI already addressed similar issues.
|
||||
"""
|
||||
from shared.database.base import create_database_manager
|
||||
from ..core.config import get_settings
|
||||
from ..models.orchestration_run import OrchestrationRun, OrchestrationStatus
|
||||
from sqlalchemy import select, and_, desc
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
try:
|
||||
settings = get_settings()
|
||||
db_manager = create_database_manager(settings.DATABASE_URL, "orchestrator")
|
||||
|
||||
async with db_manager.get_session() as session:
|
||||
cutoff_time = datetime.utcnow() - timedelta(hours=hours_ago)
|
||||
|
||||
# Query recent orchestration runs
|
||||
query = select(OrchestrationRun).where(
|
||||
and_(
|
||||
OrchestrationRun.tenant_id == UUID(tenant_id),
|
||||
OrchestrationRun.created_at >= cutoff_time,
|
||||
OrchestrationRun.status.in_([
|
||||
OrchestrationStatus.completed,
|
||||
OrchestrationStatus.partial_success
|
||||
])
|
||||
)
|
||||
).order_by(desc(OrchestrationRun.created_at))
|
||||
|
||||
result = await session.execute(query)
|
||||
runs = result.scalars().all()
|
||||
|
||||
actions = []
|
||||
|
||||
for run in runs:
|
||||
run_metadata = run.run_metadata or {}
|
||||
|
||||
# Add purchase order actions
|
||||
if run.purchase_orders_created > 0:
|
||||
po_details = run_metadata.get('purchase_orders', [])
|
||||
|
||||
# If metadata has PO details, use them
|
||||
if po_details:
|
||||
for po in po_details:
|
||||
# Filter by ingredient if specified
|
||||
if ingredient_id:
|
||||
po_items = po.get('items', [])
|
||||
has_ingredient = any(
|
||||
item.get('ingredient_id') == ingredient_id
|
||||
for item in po_items
|
||||
)
|
||||
if not has_ingredient:
|
||||
continue
|
||||
|
||||
actions.append(OrchestrationAction(
|
||||
id=po.get('id', str(run.id)),
|
||||
type="purchase_order",
|
||||
status=po.get('status', 'created'),
|
||||
delivery_date=po.get('delivery_date'),
|
||||
reasoning=run_metadata.get('reasoning'),
|
||||
estimated_resolution=po.get('delivery_date'),
|
||||
created_at=run.created_at
|
||||
))
|
||||
else:
|
||||
# Fallback: create generic action from run
|
||||
actions.append(OrchestrationAction(
|
||||
id=str(run.id),
|
||||
type="purchase_order",
|
||||
status="created",
|
||||
delivery_date=None,
|
||||
reasoning=run_metadata.get('reasoning'),
|
||||
estimated_resolution=None,
|
||||
created_at=run.created_at
|
||||
))
|
||||
|
||||
# Add production batch actions
|
||||
if run.production_batches_created > 0:
|
||||
batch_details = run_metadata.get('production_batches', [])
|
||||
|
||||
if batch_details:
|
||||
for batch in batch_details:
|
||||
# Filter by product if specified
|
||||
if product_id and batch.get('product_id') != product_id:
|
||||
continue
|
||||
|
||||
actions.append(OrchestrationAction(
|
||||
id=batch.get('id', str(run.id)),
|
||||
type="production_batch",
|
||||
status=batch.get('status', 'created'),
|
||||
delivery_date=None,
|
||||
reasoning=run_metadata.get('reasoning'),
|
||||
estimated_resolution=batch.get('scheduled_date'),
|
||||
created_at=run.created_at
|
||||
))
|
||||
else:
|
||||
# Fallback: create generic action from run
|
||||
if not product_id: # Only add if no product filter
|
||||
actions.append(OrchestrationAction(
|
||||
id=str(run.id),
|
||||
type="production_batch",
|
||||
status="created",
|
||||
delivery_date=None,
|
||||
reasoning=run_metadata.get('reasoning'),
|
||||
estimated_resolution=None,
|
||||
created_at=run.created_at
|
||||
))
|
||||
|
||||
logger.info(
|
||||
"recent_actions_fetched",
|
||||
tenant_id=tenant_id,
|
||||
hours_ago=hours_ago,
|
||||
action_count=len(actions),
|
||||
ingredient_id=ingredient_id,
|
||||
product_id=product_id
|
||||
)
|
||||
|
||||
return RecentActionsResponse(
|
||||
actions=actions,
|
||||
count=len(actions)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("error_fetching_recent_actions", error=str(e), tenant_id=tenant_id)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to fetch recent actions: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def internal_health():
|
||||
"""Internal health check"""
|
||||
return {"status": "healthy", "api": "internal"}
|
||||
277
services/orchestrator/app/api/internal_demo.py
Normal file
277
services/orchestrator/app/api/internal_demo.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""
|
||||
Internal Demo API Endpoints for Orchestrator Service
|
||||
Used by demo_session service to clone data for virtual demo tenants
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Header
|
||||
from typing import Dict, Any
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
import os
|
||||
import json
|
||||
|
||||
from app.core.database import get_db
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, delete, func
|
||||
from app.models.orchestration_run import OrchestrationRun, OrchestrationStatus
|
||||
import uuid
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add shared utilities to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
||||
from shared.utils.demo_dates import adjust_date_for_demo
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
router = APIRouter(prefix="/internal/demo", tags=["internal"])
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
async def ensure_unique_run_number(db: AsyncSession, base_run_number: str) -> str:
|
||||
"""Ensure the run number is unique by appending a suffix if needed"""
|
||||
proposed_run_number = base_run_number
|
||||
|
||||
# Check if the proposed run number already exists in the database
|
||||
while True:
|
||||
result = await db.execute(
|
||||
select(OrchestrationRun)
|
||||
.where(OrchestrationRun.run_number == proposed_run_number)
|
||||
)
|
||||
existing_run = result.scalar_one_or_none()
|
||||
|
||||
if not existing_run:
|
||||
# Run number is unique, return it
|
||||
return proposed_run_number
|
||||
|
||||
# Generate a new run number with an additional random suffix
|
||||
random_suffix = str(uuid.uuid4())[:4].upper()
|
||||
proposed_run_number = f"{base_run_number[:50-len(random_suffix)-1]}-{random_suffix}"
|
||||
|
||||
|
||||
async def load_fixture_data_for_tenant(
|
||||
db: AsyncSession,
|
||||
tenant_uuid: UUID,
|
||||
demo_account_type: str,
|
||||
reference_time: datetime,
|
||||
base_tenant_id: Optional[str] = None
|
||||
) -> int:
|
||||
"""
|
||||
Load orchestration run data from JSON fixture directly into the virtual tenant.
|
||||
Returns the number of runs created.
|
||||
"""
|
||||
from shared.utils.seed_data_paths import get_seed_data_path
|
||||
from shared.utils.demo_dates import resolve_time_marker, adjust_date_for_demo
|
||||
|
||||
# Load fixture data
|
||||
if demo_account_type == "enterprise_child" and base_tenant_id:
|
||||
json_file = get_seed_data_path("enterprise", "11-orchestrator.json", child_id=base_tenant_id)
|
||||
else:
|
||||
json_file = get_seed_data_path(demo_account_type, "11-orchestrator.json")
|
||||
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
fixture_data = json.load(f)
|
||||
|
||||
orchestration_run_data = fixture_data.get("orchestration_run")
|
||||
if not orchestration_run_data:
|
||||
logger.warning("No orchestration_run data in fixture")
|
||||
return 0
|
||||
|
||||
# Parse and adjust dates from fixture to reference_time
|
||||
base_started_at = resolve_time_marker(orchestration_run_data.get("started_at"), reference_time)
|
||||
base_completed_at = resolve_time_marker(orchestration_run_data.get("completed_at"), reference_time)
|
||||
|
||||
# Adjust dates to make them appear recent relative to session creation
|
||||
started_at = adjust_date_for_demo(base_started_at, reference_time) if base_started_at else reference_time - timedelta(hours=2)
|
||||
completed_at = adjust_date_for_demo(base_completed_at, reference_time) if base_completed_at else started_at + timedelta(minutes=15)
|
||||
|
||||
# Generate unique run number with session context
|
||||
current_year = reference_time.year
|
||||
unique_suffix = str(uuid.uuid4())[:8].upper()
|
||||
run_number = f"ORCH-DEMO-PROF-{current_year}-001-{unique_suffix}"
|
||||
|
||||
# Create orchestration run for virtual tenant
|
||||
new_run = OrchestrationRun(
|
||||
id=uuid.uuid4(), # Generate new UUID
|
||||
tenant_id=tenant_uuid,
|
||||
run_number=run_number,
|
||||
status=OrchestrationStatus[orchestration_run_data["status"]],
|
||||
run_type=orchestration_run_data.get("run_type", "daily"),
|
||||
priority="normal",
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration_seconds=orchestration_run_data.get("duration_seconds", 900),
|
||||
|
||||
# Step statuses from orchestration_results
|
||||
forecasting_status="success",
|
||||
forecasting_started_at=started_at,
|
||||
forecasting_completed_at=started_at + timedelta(minutes=2),
|
||||
|
||||
production_status="success",
|
||||
production_started_at=started_at + timedelta(minutes=2),
|
||||
production_completed_at=started_at + timedelta(minutes=5),
|
||||
|
||||
procurement_status="success",
|
||||
procurement_started_at=started_at + timedelta(minutes=5),
|
||||
procurement_completed_at=started_at + timedelta(minutes=8),
|
||||
|
||||
notification_status="success",
|
||||
notification_started_at=started_at + timedelta(minutes=8),
|
||||
notification_completed_at=completed_at,
|
||||
|
||||
# Results from orchestration_results
|
||||
forecasts_generated=fixture_data.get("orchestration_results", {}).get("forecasts_generated", 10),
|
||||
production_batches_created=fixture_data.get("orchestration_results", {}).get("production_batches_created", 18),
|
||||
procurement_plans_created=0,
|
||||
purchase_orders_created=fixture_data.get("orchestration_results", {}).get("purchase_orders_created", 6),
|
||||
notifications_sent=fixture_data.get("orchestration_results", {}).get("notifications_sent", 8),
|
||||
|
||||
# Metadata
|
||||
triggered_by="system",
|
||||
created_at=started_at,
|
||||
updated_at=completed_at
|
||||
)
|
||||
|
||||
db.add(new_run)
|
||||
await db.flush()
|
||||
|
||||
logger.info(
|
||||
"Loaded orchestration run from fixture",
|
||||
tenant_id=str(tenant_uuid),
|
||||
run_number=new_run.run_number,
|
||||
started_at=started_at.isoformat()
|
||||
)
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
@router.post("/clone")
|
||||
async def clone_demo_data(
|
||||
base_tenant_id: str,
|
||||
virtual_tenant_id: str,
|
||||
demo_account_type: str,
|
||||
session_id: Optional[str] = None,
|
||||
session_created_at: Optional[str] = None,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Clone orchestration run demo data from base tenant to virtual tenant
|
||||
|
||||
This endpoint is called by the demo_session service during session initialization.
|
||||
It clones orchestration runs with date adjustments to make them appear recent.
|
||||
|
||||
If the base tenant has no orchestration runs, it will first seed them from the fixture.
|
||||
"""
|
||||
|
||||
start_time = datetime.now(timezone.utc)
|
||||
|
||||
# Parse session_created_at or use current time
|
||||
if session_created_at:
|
||||
try:
|
||||
reference_time = datetime.fromisoformat(session_created_at.replace('Z', '+00:00'))
|
||||
except:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
else:
|
||||
reference_time = datetime.now(timezone.utc)
|
||||
|
||||
logger.info(
|
||||
"Starting orchestration runs cloning with date adjustment",
|
||||
base_tenant_id=base_tenant_id,
|
||||
virtual_tenant_id=virtual_tenant_id,
|
||||
demo_account_type=demo_account_type,
|
||||
session_id=session_id,
|
||||
reference_time=reference_time.isoformat()
|
||||
)
|
||||
|
||||
try:
|
||||
virtual_uuid = uuid.UUID(virtual_tenant_id)
|
||||
|
||||
# Load fixture data directly into virtual tenant (no base tenant cloning)
|
||||
runs_created = await load_fixture_data_for_tenant(
|
||||
db,
|
||||
virtual_uuid,
|
||||
demo_account_type,
|
||||
reference_time,
|
||||
base_tenant_id
|
||||
)
|
||||
|
||||
await db.commit()
|
||||
|
||||
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
|
||||
|
||||
logger.info(
|
||||
"Orchestration runs loaded from fixture successfully",
|
||||
virtual_tenant_id=str(virtual_tenant_id),
|
||||
runs_created=runs_created,
|
||||
duration_ms=duration_ms
|
||||
)
|
||||
|
||||
return {
|
||||
"service": "orchestrator",
|
||||
"status": "completed",
|
||||
"success": True,
|
||||
"records_cloned": runs_created,
|
||||
"runs_cloned": runs_created,
|
||||
"duration_ms": duration_ms
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to clone orchestration runs", error=str(e), exc_info=True)
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=f"Failed to clone orchestration runs: {str(e)}")
|
||||
|
||||
|
||||
@router.delete("/tenant/{virtual_tenant_id}")
|
||||
async def delete_demo_data(
|
||||
virtual_tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Delete all orchestration runs for a virtual demo tenant"""
|
||||
logger.info("Deleting orchestration runs for virtual tenant", virtual_tenant_id=virtual_tenant_id)
|
||||
start_time = datetime.now(timezone.utc)
|
||||
|
||||
try:
|
||||
virtual_uuid = uuid.UUID(virtual_tenant_id)
|
||||
|
||||
# Count records
|
||||
run_count = await db.scalar(
|
||||
select(func.count(OrchestrationRun.id))
|
||||
.where(OrchestrationRun.tenant_id == virtual_uuid)
|
||||
)
|
||||
|
||||
# Delete orchestration runs
|
||||
await db.execute(
|
||||
delete(OrchestrationRun)
|
||||
.where(OrchestrationRun.tenant_id == virtual_uuid)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
|
||||
logger.info(
|
||||
"Orchestration runs deleted successfully",
|
||||
virtual_tenant_id=virtual_tenant_id,
|
||||
duration_ms=duration_ms
|
||||
)
|
||||
|
||||
return {
|
||||
"service": "orchestrator",
|
||||
"status": "deleted",
|
||||
"virtual_tenant_id": virtual_tenant_id,
|
||||
"records_deleted": {
|
||||
"orchestration_runs": run_count,
|
||||
"total": run_count
|
||||
},
|
||||
"duration_ms": duration_ms
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Failed to delete orchestration runs", error=str(e), exc_info=True)
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/clone/health")
|
||||
async def health_check():
|
||||
"""Health check for demo cloning endpoint"""
|
||||
return {"status": "healthy", "service": "orchestrator"}
|
||||
346
services/orchestrator/app/api/orchestration.py
Normal file
346
services/orchestrator/app/api/orchestration.py
Normal file
@@ -0,0 +1,346 @@
|
||||
# ================================================================
|
||||
# services/orchestrator/app/api/orchestration.py
|
||||
# ================================================================
|
||||
"""
|
||||
Orchestration API Endpoints
|
||||
Testing and manual trigger endpoints for orchestration
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
import structlog
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.repositories.orchestration_run_repository import OrchestrationRunRepository
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
router = APIRouter(prefix="/api/v1/tenants/{tenant_id}/orchestrator", tags=["Orchestration"])
|
||||
|
||||
|
||||
# ================================================================
|
||||
# REQUEST/RESPONSE SCHEMAS
|
||||
# ================================================================
|
||||
|
||||
class OrchestratorTestRequest(BaseModel):
|
||||
"""Request schema for testing orchestrator"""
|
||||
test_scenario: Optional[str] = Field(None, description="Test scenario: full, production_only, procurement_only")
|
||||
dry_run: bool = Field(False, description="Dry run mode (no actual changes)")
|
||||
|
||||
|
||||
class OrchestratorTestResponse(BaseModel):
|
||||
"""Response schema for orchestrator test"""
|
||||
success: bool
|
||||
message: str
|
||||
tenant_id: str
|
||||
forecasting_completed: bool = False
|
||||
production_completed: bool = False
|
||||
procurement_completed: bool = False
|
||||
notifications_sent: bool = False
|
||||
summary: dict = {}
|
||||
|
||||
|
||||
class OrchestratorWorkflowRequest(BaseModel):
|
||||
"""Request schema for daily workflow trigger"""
|
||||
dry_run: bool = Field(False, description="Dry run mode (no actual changes)")
|
||||
|
||||
|
||||
class OrchestratorWorkflowResponse(BaseModel):
|
||||
"""Response schema for daily workflow trigger"""
|
||||
success: bool
|
||||
message: str
|
||||
tenant_id: str
|
||||
run_id: Optional[str] = None
|
||||
forecasting_completed: bool = False
|
||||
production_completed: bool = False
|
||||
procurement_completed: bool = False
|
||||
notifications_sent: bool = False
|
||||
summary: dict = {}
|
||||
|
||||
|
||||
# ================================================================
|
||||
# API ENDPOINTS
|
||||
# ================================================================
|
||||
|
||||
@router.post("/test", response_model=OrchestratorTestResponse)
|
||||
async def trigger_orchestrator_test(
|
||||
tenant_id: str,
|
||||
request_data: OrchestratorTestRequest,
|
||||
request: Request,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Trigger orchestrator for testing purposes
|
||||
|
||||
This endpoint allows manual triggering of the orchestration workflow
|
||||
for a specific tenant, useful for testing during development.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID to orchestrate
|
||||
request_data: Test request with scenario and dry_run options
|
||||
request: FastAPI request object
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
OrchestratorTestResponse with results
|
||||
"""
|
||||
logger.info("Orchestrator test trigger requested",
|
||||
tenant_id=tenant_id,
|
||||
test_scenario=request_data.test_scenario,
|
||||
dry_run=request_data.dry_run)
|
||||
|
||||
try:
|
||||
# Get scheduler service from app state
|
||||
if not hasattr(request.app.state, 'scheduler_service'):
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Orchestrator scheduler service not available"
|
||||
)
|
||||
|
||||
scheduler_service = request.app.state.scheduler_service
|
||||
|
||||
# Trigger orchestration
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
result = await scheduler_service.trigger_orchestration_for_tenant(
|
||||
tenant_id=tenant_uuid,
|
||||
test_scenario=request_data.test_scenario
|
||||
)
|
||||
|
||||
# Get the latest run for this tenant
|
||||
repo = OrchestrationRunRepository(db)
|
||||
latest_run = await repo.get_latest_run_for_tenant(tenant_uuid)
|
||||
|
||||
# Build response
|
||||
response = OrchestratorTestResponse(
|
||||
success=result.get('success', False),
|
||||
message=result.get('message', 'Orchestration completed'),
|
||||
tenant_id=tenant_id,
|
||||
forecasting_completed=latest_run.forecasting_status == 'success' if latest_run else False,
|
||||
production_completed=latest_run.production_status == 'success' if latest_run else False,
|
||||
procurement_completed=latest_run.procurement_status == 'success' if latest_run else False,
|
||||
notifications_sent=latest_run.notification_status == 'success' if latest_run else False,
|
||||
summary={
|
||||
'forecasts_generated': latest_run.forecasts_generated if latest_run else 0,
|
||||
'batches_created': latest_run.production_batches_created if latest_run else 0,
|
||||
'pos_created': latest_run.purchase_orders_created if latest_run else 0,
|
||||
'notifications_sent': latest_run.notifications_sent if latest_run else 0
|
||||
}
|
||||
)
|
||||
|
||||
logger.info("Orchestrator test completed",
|
||||
tenant_id=tenant_id,
|
||||
success=response.success)
|
||||
|
||||
return response
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error("Orchestrator test failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Orchestrator test failed: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/run-daily-workflow", response_model=OrchestratorWorkflowResponse)
|
||||
async def run_daily_workflow(
|
||||
tenant_id: str,
|
||||
request_data: Optional[OrchestratorWorkflowRequest] = None,
|
||||
request: Request = None,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Trigger the daily orchestrated workflow for a tenant
|
||||
|
||||
This endpoint runs the complete daily workflow which includes:
|
||||
1. Forecasting Service: Generate demand forecasts
|
||||
2. Production Service: Create production schedule from forecasts
|
||||
3. Procurement Service: Generate procurement plan
|
||||
4. Notification Service: Send relevant notifications
|
||||
|
||||
This is the production endpoint used by the dashboard scheduler button.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID to orchestrate
|
||||
request_data: Optional request data with dry_run flag
|
||||
request: FastAPI request object
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
OrchestratorWorkflowResponse with workflow execution results
|
||||
"""
|
||||
logger.info("Daily workflow trigger requested", tenant_id=tenant_id)
|
||||
|
||||
# Handle optional request_data
|
||||
if request_data is None:
|
||||
request_data = OrchestratorWorkflowRequest()
|
||||
|
||||
try:
|
||||
# Get scheduler service from app state
|
||||
if not hasattr(request.app.state, 'scheduler_service'):
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail="Orchestrator scheduler service not available"
|
||||
)
|
||||
|
||||
scheduler_service = request.app.state.scheduler_service
|
||||
|
||||
# Trigger orchestration (use full workflow, not test scenario)
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
result = await scheduler_service.trigger_orchestration_for_tenant(
|
||||
tenant_id=tenant_uuid,
|
||||
test_scenario=None # Full production workflow
|
||||
)
|
||||
|
||||
# Get the latest run for this tenant
|
||||
repo = OrchestrationRunRepository(db)
|
||||
latest_run = await repo.get_latest_run_for_tenant(tenant_uuid)
|
||||
|
||||
# Build response
|
||||
response = OrchestratorWorkflowResponse(
|
||||
success=result.get('success', False),
|
||||
message=result.get('message', 'Daily workflow completed successfully'),
|
||||
tenant_id=tenant_id,
|
||||
run_id=str(latest_run.id) if latest_run else None,
|
||||
forecasting_completed=latest_run.forecasting_status == 'success' if latest_run else False,
|
||||
production_completed=latest_run.production_status == 'success' if latest_run else False,
|
||||
procurement_completed=latest_run.procurement_status == 'success' if latest_run else False,
|
||||
notifications_sent=latest_run.notification_status == 'success' if latest_run else False,
|
||||
summary={
|
||||
'run_number': latest_run.run_number if latest_run else 0,
|
||||
'forecasts_generated': latest_run.forecasts_generated if latest_run else 0,
|
||||
'production_batches_created': latest_run.production_batches_created if latest_run else 0,
|
||||
'purchase_orders_created': latest_run.purchase_orders_created if latest_run else 0,
|
||||
'notifications_sent': latest_run.notifications_sent if latest_run else 0,
|
||||
'duration_seconds': latest_run.duration_seconds if latest_run else 0
|
||||
}
|
||||
)
|
||||
|
||||
logger.info("Daily workflow completed",
|
||||
tenant_id=tenant_id,
|
||||
success=response.success,
|
||||
run_id=response.run_id)
|
||||
|
||||
return response
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error("Daily workflow failed",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e),
|
||||
exc_info=True)
|
||||
raise HTTPException(status_code=500, detail=f"Daily workflow failed: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def orchestrator_health():
|
||||
"""Check orchestrator health"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "orchestrator",
|
||||
"message": "Orchestrator service is running"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/runs", response_model=dict)
|
||||
async def list_orchestration_runs(
|
||||
tenant_id: str,
|
||||
limit: int = 10,
|
||||
offset: int = 0,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
List orchestration runs for a tenant
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID
|
||||
limit: Maximum number of runs to return
|
||||
offset: Number of runs to skip
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
List of orchestration runs
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
repo = OrchestrationRunRepository(db)
|
||||
|
||||
runs = await repo.list_runs(
|
||||
tenant_id=tenant_uuid,
|
||||
limit=limit,
|
||||
offset=offset
|
||||
)
|
||||
|
||||
return {
|
||||
"runs": [
|
||||
{
|
||||
"id": str(run.id),
|
||||
"run_number": run.run_number,
|
||||
"status": run.status.value,
|
||||
"started_at": run.started_at.isoformat() if run.started_at else None,
|
||||
"completed_at": run.completed_at.isoformat() if run.completed_at else None,
|
||||
"duration_seconds": run.duration_seconds,
|
||||
"forecasts_generated": run.forecasts_generated,
|
||||
"batches_created": run.production_batches_created,
|
||||
"pos_created": run.purchase_orders_created
|
||||
}
|
||||
for run in runs
|
||||
],
|
||||
"total": len(runs),
|
||||
"limit": limit,
|
||||
"offset": offset
|
||||
}
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error("Error listing orchestration runs",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/last-run")
|
||||
async def get_last_orchestration_run(
|
||||
tenant_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get timestamp of last orchestration run
|
||||
|
||||
Lightweight endpoint for health status frontend migration (Phase 4).
|
||||
Returns only timestamp and run number for the most recent completed run.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID
|
||||
|
||||
Returns:
|
||||
Dict with timestamp and runNumber (or None if no runs)
|
||||
"""
|
||||
try:
|
||||
tenant_uuid = uuid.UUID(tenant_id)
|
||||
repo = OrchestrationRunRepository(db)
|
||||
|
||||
# Get most recent completed run
|
||||
latest_run = await repo.get_latest_run_for_tenant(tenant_uuid)
|
||||
|
||||
if not latest_run:
|
||||
return {"timestamp": None, "runNumber": None}
|
||||
|
||||
return {
|
||||
"timestamp": latest_run.started_at.isoformat() if latest_run.started_at else None,
|
||||
"runNumber": latest_run.run_number
|
||||
}
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error("Error getting last orchestration run",
|
||||
tenant_id=tenant_id,
|
||||
error=str(e))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
0
services/orchestrator/app/core/__init__.py
Normal file
0
services/orchestrator/app/core/__init__.py
Normal file
133
services/orchestrator/app/core/config.py
Normal file
133
services/orchestrator/app/core/config.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# ================================================================
|
||||
# services/orchestrator/app/core/config.py
|
||||
# ================================================================
|
||||
"""
|
||||
Orchestrator Service Configuration
|
||||
"""
|
||||
|
||||
import os
|
||||
from pydantic import Field
|
||||
from shared.config.base import BaseServiceSettings
|
||||
|
||||
|
||||
class OrchestratorSettings(BaseServiceSettings):
|
||||
"""Orchestrator service specific settings"""
|
||||
|
||||
# Service Identity
|
||||
APP_NAME: str = "Orchestrator Service"
|
||||
SERVICE_NAME: str = "orchestrator-service"
|
||||
VERSION: str = "1.0.0"
|
||||
DESCRIPTION: str = "Automated orchestration of forecasting, production, and procurement workflows"
|
||||
|
||||
# Database configuration (minimal - only for audit logs)
|
||||
@property
|
||||
def DATABASE_URL(self) -> str:
|
||||
"""Build database URL from secure components"""
|
||||
# Try complete URL first (for backward compatibility)
|
||||
complete_url = os.getenv("ORCHESTRATOR_DATABASE_URL")
|
||||
if complete_url:
|
||||
return complete_url
|
||||
|
||||
# Build from components (secure approach)
|
||||
user = os.getenv("ORCHESTRATOR_DB_USER", "orchestrator_user")
|
||||
password = os.getenv("ORCHESTRATOR_DB_PASSWORD", "orchestrator_pass123")
|
||||
host = os.getenv("ORCHESTRATOR_DB_HOST", "localhost")
|
||||
port = os.getenv("ORCHESTRATOR_DB_PORT", "5432")
|
||||
name = os.getenv("ORCHESTRATOR_DB_NAME", "orchestrator_db")
|
||||
|
||||
return f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{name}"
|
||||
|
||||
# Orchestration Settings
|
||||
ORCHESTRATION_ENABLED: bool = os.getenv("ORCHESTRATION_ENABLED", "true").lower() == "true"
|
||||
ORCHESTRATION_SCHEDULE: str = os.getenv("ORCHESTRATION_SCHEDULE", "30 5 * * *") # 5:30 AM daily (cron format)
|
||||
ORCHESTRATION_HOUR: int = int(os.getenv("ORCHESTRATION_HOUR", "2")) # Hour to run daily orchestration (default: 2 AM)
|
||||
ORCHESTRATION_MINUTE: int = int(os.getenv("ORCHESTRATION_MINUTE", "0")) # Minute to run (default: :00)
|
||||
ORCHESTRATION_TIMEOUT_SECONDS: int = int(os.getenv("ORCHESTRATION_TIMEOUT_SECONDS", "600")) # 10 minutes
|
||||
|
||||
# Tenant Processing
|
||||
MAX_CONCURRENT_TENANTS: int = int(os.getenv("MAX_CONCURRENT_TENANTS", "5"))
|
||||
TENANT_TIMEOUT_SECONDS: int = int(os.getenv("TENANT_TIMEOUT_SECONDS", "180")) # 3 minutes per tenant
|
||||
|
||||
# Retry Configuration
|
||||
MAX_RETRIES: int = int(os.getenv("MAX_RETRIES", "3"))
|
||||
RETRY_DELAY_SECONDS: int = int(os.getenv("RETRY_DELAY_SECONDS", "30"))
|
||||
ENABLE_EXPONENTIAL_BACKOFF: bool = os.getenv("ENABLE_EXPONENTIAL_BACKOFF", "true").lower() == "true"
|
||||
|
||||
# Circuit Breaker
|
||||
CIRCUIT_BREAKER_ENABLED: bool = os.getenv("CIRCUIT_BREAKER_ENABLED", "true").lower() == "true"
|
||||
CIRCUIT_BREAKER_FAILURE_THRESHOLD: int = int(os.getenv("CIRCUIT_BREAKER_FAILURE_THRESHOLD", "5"))
|
||||
CIRCUIT_BREAKER_RESET_TIMEOUT: int = int(os.getenv("CIRCUIT_BREAKER_RESET_TIMEOUT", "300")) # 5 minutes
|
||||
|
||||
# ================================================================
|
||||
# CIRCUIT BREAKER SETTINGS - Enhanced with Pydantic validation
|
||||
# ================================================================
|
||||
|
||||
CIRCUIT_BREAKER_TIMEOUT_DURATION: int = Field(
|
||||
default=60,
|
||||
description="Seconds to wait before attempting recovery"
|
||||
)
|
||||
CIRCUIT_BREAKER_SUCCESS_THRESHOLD: int = Field(
|
||||
default=2,
|
||||
description="Successful calls needed to close circuit"
|
||||
)
|
||||
|
||||
# ================================================================
|
||||
# SAGA PATTERN SETTINGS
|
||||
# ================================================================
|
||||
|
||||
SAGA_TIMEOUT_SECONDS: int = Field(
|
||||
default=600,
|
||||
description="Timeout for saga execution (10 minutes)"
|
||||
)
|
||||
SAGA_ENABLE_COMPENSATION: bool = Field(
|
||||
default=True,
|
||||
description="Enable saga compensation on failure"
|
||||
)
|
||||
|
||||
# Service Integration URLs
|
||||
FORECASTING_SERVICE_URL: str = os.getenv("FORECASTING_SERVICE_URL", "http://forecasting-service:8000")
|
||||
PRODUCTION_SERVICE_URL: str = os.getenv("PRODUCTION_SERVICE_URL", "http://production-service:8000")
|
||||
PROCUREMENT_SERVICE_URL: str = os.getenv("PROCUREMENT_SERVICE_URL", "http://procurement-service:8000")
|
||||
NOTIFICATION_SERVICE_URL: str = os.getenv("NOTIFICATION_SERVICE_URL", "http://notification-service:8000")
|
||||
TENANT_SERVICE_URL: str = os.getenv("TENANT_SERVICE_URL", "http://tenant-service:8000")
|
||||
|
||||
# Notification Settings
|
||||
SEND_NOTIFICATIONS: bool = os.getenv("SEND_NOTIFICATIONS", "true").lower() == "true"
|
||||
NOTIFY_ON_SUCCESS: bool = os.getenv("NOTIFY_ON_SUCCESS", "true").lower() == "true"
|
||||
NOTIFY_ON_FAILURE: bool = os.getenv("NOTIFY_ON_FAILURE", "true").lower() == "true"
|
||||
|
||||
# Audit and Logging
|
||||
AUDIT_ORCHESTRATION_RUNS: bool = os.getenv("AUDIT_ORCHESTRATION_RUNS", "true").lower() == "true"
|
||||
DETAILED_LOGGING: bool = os.getenv("DETAILED_LOGGING", "true").lower() == "true"
|
||||
|
||||
# AI Enhancement Settings
|
||||
ORCHESTRATION_USE_AI_INSIGHTS: bool = os.getenv("ORCHESTRATION_USE_AI_INSIGHTS", "true").lower() == "true"
|
||||
AI_INSIGHTS_SERVICE_URL: str = os.getenv("AI_INSIGHTS_SERVICE_URL", "http://ai-insights-service:8000")
|
||||
AI_INSIGHTS_MIN_CONFIDENCE: int = int(os.getenv("AI_INSIGHTS_MIN_CONFIDENCE", "70"))
|
||||
|
||||
# Redis Cache Settings (for dashboard performance)
|
||||
REDIS_HOST: str = os.getenv("REDIS_HOST", "localhost")
|
||||
REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
|
||||
REDIS_DB: int = int(os.getenv("REDIS_DB", "0"))
|
||||
REDIS_PASSWORD: str = os.getenv("REDIS_PASSWORD", "")
|
||||
REDIS_TLS_ENABLED: str = os.getenv("REDIS_TLS_ENABLED", "false")
|
||||
CACHE_ENABLED: bool = os.getenv("CACHE_ENABLED", "true").lower() == "true"
|
||||
CACHE_TTL_HEALTH: int = int(os.getenv("CACHE_TTL_HEALTH", "30")) # 30 seconds
|
||||
CACHE_TTL_INSIGHTS: int = int(os.getenv("CACHE_TTL_INSIGHTS", "60")) # 1 minute (reduced for faster metrics updates)
|
||||
CACHE_TTL_SUMMARY: int = int(os.getenv("CACHE_TTL_SUMMARY", "60")) # 1 minute
|
||||
|
||||
# Enterprise dashboard cache TTLs
|
||||
CACHE_TTL_ENTERPRISE_SUMMARY: int = int(os.getenv("CACHE_TTL_ENTERPRISE_SUMMARY", "60")) # 1 minute
|
||||
CACHE_TTL_ENTERPRISE_PERFORMANCE: int = int(os.getenv("CACHE_TTL_ENTERPRISE_PERFORMANCE", "60")) # 1 minute
|
||||
CACHE_TTL_ENTERPRISE_DISTRIBUTION: int = int(os.getenv("CACHE_TTL_ENTERPRISE_DISTRIBUTION", "30")) # 30 seconds
|
||||
CACHE_TTL_ENTERPRISE_FORECAST: int = int(os.getenv("CACHE_TTL_ENTERPRISE_FORECAST", "120")) # 2 minutes
|
||||
CACHE_TTL_ENTERPRISE_NETWORK: int = int(os.getenv("CACHE_TTL_ENTERPRISE_NETWORK", "60")) # 1 minute
|
||||
|
||||
|
||||
# Global settings instance
|
||||
settings = OrchestratorSettings()
|
||||
|
||||
|
||||
def get_settings():
|
||||
"""Get the global settings instance"""
|
||||
return settings
|
||||
48
services/orchestrator/app/core/database.py
Normal file
48
services/orchestrator/app/core/database.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# ================================================================
|
||||
# services/orchestrator/app/core/database.py
|
||||
# ================================================================
|
||||
"""
|
||||
Database connection and session management for Orchestrator Service
|
||||
Minimal database - only for audit trail
|
||||
"""
|
||||
|
||||
from shared.database.base import DatabaseManager
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
from .config import settings
|
||||
|
||||
# Initialize database manager
|
||||
database_manager = DatabaseManager(
|
||||
database_url=settings.DATABASE_URL,
|
||||
echo=settings.DEBUG
|
||||
)
|
||||
|
||||
# Create async session factory
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
database_manager.async_engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False,
|
||||
autocommit=False,
|
||||
autoflush=False,
|
||||
)
|
||||
|
||||
|
||||
async def get_db() -> AsyncSession:
|
||||
"""
|
||||
Dependency to get database session.
|
||||
Used in FastAPI endpoints via Depends(get_db).
|
||||
"""
|
||||
async with AsyncSessionLocal() as session:
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
await session.close()
|
||||
|
||||
|
||||
async def init_db():
|
||||
"""Initialize database (create tables if needed)"""
|
||||
await database_manager.create_all()
|
||||
|
||||
|
||||
async def close_db():
|
||||
"""Close database connections"""
|
||||
await database_manager.close()
|
||||
237
services/orchestrator/app/main.py
Normal file
237
services/orchestrator/app/main.py
Normal file
@@ -0,0 +1,237 @@
|
||||
# ================================================================
|
||||
# services/orchestrator/app/main.py
|
||||
# ================================================================
|
||||
"""
|
||||
Orchestrator Service - FastAPI Application
|
||||
Automated orchestration of forecasting, production, and procurement workflows
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from sqlalchemy import text
|
||||
from app.core.config import settings
|
||||
from app.core.database import database_manager
|
||||
from shared.service_base import StandardFastAPIService
|
||||
|
||||
|
||||
class OrchestratorService(StandardFastAPIService):
|
||||
"""Orchestrator Service with standardized setup"""
|
||||
|
||||
expected_migration_version = "001_initial_schema"
|
||||
|
||||
def __init__(self):
|
||||
# Define expected database tables for health checks
|
||||
orchestrator_expected_tables = [
|
||||
'orchestration_runs'
|
||||
]
|
||||
|
||||
self.rabbitmq_client = None
|
||||
self.event_publisher = None
|
||||
self.leader_election = None
|
||||
self.scheduler_service = None
|
||||
|
||||
super().__init__(
|
||||
service_name="orchestrator-service",
|
||||
app_name=settings.APP_NAME,
|
||||
description=settings.DESCRIPTION,
|
||||
version=settings.VERSION,
|
||||
api_prefix="", # Empty because RouteBuilder already includes /api/v1
|
||||
database_manager=database_manager,
|
||||
expected_tables=orchestrator_expected_tables,
|
||||
enable_messaging=True # Enable RabbitMQ for event publishing
|
||||
)
|
||||
|
||||
async def verify_migrations(self):
|
||||
"""Verify database schema matches the latest migrations"""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
result = await session.execute(text("SELECT version_num FROM alembic_version"))
|
||||
version = result.scalar()
|
||||
if version != self.expected_migration_version:
|
||||
self.logger.error(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
raise RuntimeError(f"Migration version mismatch: expected {self.expected_migration_version}, got {version}")
|
||||
self.logger.info(f"Migration verification successful: {version}")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Migration verification failed: {e}")
|
||||
raise
|
||||
|
||||
async def _setup_messaging(self):
|
||||
"""Setup messaging for orchestrator service"""
|
||||
from shared.messaging import UnifiedEventPublisher, RabbitMQClient
|
||||
try:
|
||||
self.rabbitmq_client = RabbitMQClient(settings.RABBITMQ_URL, service_name="orchestrator-service")
|
||||
await self.rabbitmq_client.connect()
|
||||
# Create event publisher
|
||||
self.event_publisher = UnifiedEventPublisher(self.rabbitmq_client, "orchestrator-service")
|
||||
self.logger.info("Orchestrator service messaging setup completed")
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to setup orchestrator messaging", error=str(e))
|
||||
raise
|
||||
|
||||
async def _cleanup_messaging(self):
|
||||
"""Cleanup messaging for orchestrator service"""
|
||||
try:
|
||||
if self.rabbitmq_client:
|
||||
await self.rabbitmq_client.disconnect()
|
||||
self.logger.info("Orchestrator service messaging cleanup completed")
|
||||
except Exception as e:
|
||||
self.logger.error("Error during orchestrator messaging cleanup", error=str(e))
|
||||
|
||||
async def on_startup(self, app: FastAPI):
|
||||
"""Custom startup logic for orchestrator service"""
|
||||
# Verify migrations first
|
||||
await self.verify_migrations()
|
||||
|
||||
# Call parent startup (includes database, messaging, etc.)
|
||||
await super().on_startup(app)
|
||||
|
||||
self.logger.info("Orchestrator Service starting up...")
|
||||
|
||||
# Initialize leader election for horizontal scaling
|
||||
# Only the leader pod will run the scheduler
|
||||
await self._setup_leader_election(app)
|
||||
|
||||
# REMOVED: Delivery tracking service - moved to procurement service (domain ownership)
|
||||
|
||||
async def _setup_leader_election(self, app: FastAPI):
|
||||
"""
|
||||
Setup leader election for scheduler.
|
||||
|
||||
CRITICAL FOR HORIZONTAL SCALING:
|
||||
Without leader election, each pod would run the same scheduled jobs,
|
||||
causing duplicate forecasts, production schedules, and database contention.
|
||||
"""
|
||||
from shared.leader_election import LeaderElectionService
|
||||
import redis.asyncio as redis
|
||||
|
||||
try:
|
||||
# Create Redis connection for leader election
|
||||
redis_url = f"redis://:{settings.REDIS_PASSWORD}@{settings.REDIS_HOST}:{settings.REDIS_PORT}/{settings.REDIS_DB}"
|
||||
if settings.REDIS_TLS_ENABLED.lower() == "true":
|
||||
redis_url = redis_url.replace("redis://", "rediss://")
|
||||
|
||||
redis_client = redis.from_url(redis_url, decode_responses=False)
|
||||
await redis_client.ping()
|
||||
|
||||
# Use shared leader election service
|
||||
self.leader_election = LeaderElectionService(
|
||||
redis_client,
|
||||
service_name="orchestrator"
|
||||
)
|
||||
|
||||
# Define callbacks for leader state changes
|
||||
async def on_become_leader():
|
||||
self.logger.info("This pod became the leader - starting scheduler")
|
||||
from app.services.orchestrator_service import OrchestratorSchedulerService
|
||||
self.scheduler_service = OrchestratorSchedulerService(self.event_publisher, settings)
|
||||
await self.scheduler_service.start()
|
||||
app.state.scheduler_service = self.scheduler_service
|
||||
self.logger.info("Orchestrator scheduler service started (leader only)")
|
||||
|
||||
async def on_lose_leader():
|
||||
self.logger.warning("This pod lost leadership - stopping scheduler")
|
||||
if self.scheduler_service:
|
||||
await self.scheduler_service.stop()
|
||||
self.scheduler_service = None
|
||||
if hasattr(app.state, 'scheduler_service'):
|
||||
app.state.scheduler_service = None
|
||||
self.logger.info("Orchestrator scheduler service stopped (no longer leader)")
|
||||
|
||||
# Start leader election
|
||||
await self.leader_election.start(
|
||||
on_become_leader=on_become_leader,
|
||||
on_lose_leader=on_lose_leader
|
||||
)
|
||||
|
||||
# Store leader election in app state for health checks
|
||||
app.state.leader_election = self.leader_election
|
||||
|
||||
self.logger.info("Leader election initialized",
|
||||
is_leader=self.leader_election.is_leader,
|
||||
instance_id=self.leader_election.instance_id)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to setup leader election, falling back to standalone mode",
|
||||
error=str(e))
|
||||
# Fallback: start scheduler anyway (for single-pod deployments)
|
||||
from app.services.orchestrator_service import OrchestratorSchedulerService
|
||||
self.scheduler_service = OrchestratorSchedulerService(self.event_publisher, settings)
|
||||
await self.scheduler_service.start()
|
||||
app.state.scheduler_service = self.scheduler_service
|
||||
self.logger.warning("Scheduler started in standalone mode (no leader election)")
|
||||
|
||||
async def on_shutdown(self, app: FastAPI):
|
||||
"""Custom shutdown logic for orchestrator service"""
|
||||
self.logger.info("Orchestrator Service shutting down...")
|
||||
|
||||
# Stop leader election (this will also stop scheduler if we're the leader)
|
||||
if self.leader_election:
|
||||
await self.leader_election.stop()
|
||||
self.logger.info("Leader election stopped")
|
||||
|
||||
# Stop scheduler service if still running
|
||||
if self.scheduler_service:
|
||||
await self.scheduler_service.stop()
|
||||
self.logger.info("Orchestrator scheduler service stopped")
|
||||
|
||||
|
||||
def get_service_features(self):
|
||||
"""Return orchestrator-specific features"""
|
||||
return [
|
||||
"automated_orchestration",
|
||||
"forecasting_integration",
|
||||
"production_scheduling",
|
||||
"procurement_planning",
|
||||
"notification_dispatch",
|
||||
"leader_election",
|
||||
"retry_mechanism",
|
||||
"circuit_breaker"
|
||||
]
|
||||
|
||||
|
||||
# Create service instance
|
||||
service = OrchestratorService()
|
||||
|
||||
# Create FastAPI app with standardized setup
|
||||
app = service.create_app()
|
||||
|
||||
# Setup standard endpoints (health, readiness, metrics)
|
||||
service.setup_standard_endpoints()
|
||||
|
||||
# Include routers
|
||||
# BUSINESS: Orchestration operations
|
||||
from app.api.orchestration import router as orchestration_router
|
||||
from app.api.internal import router as internal_router
|
||||
service.add_router(orchestration_router)
|
||||
service.add_router(internal_router)
|
||||
|
||||
# INTERNAL: Service-to-service endpoints for demo data cloning
|
||||
from app.api.internal_demo import router as internal_demo_router
|
||||
service.add_router(internal_demo_router, tags=["internal-demo"])
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def logging_middleware(request: Request, call_next):
|
||||
"""Add request logging middleware"""
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
response = await call_next(request)
|
||||
process_time = time.time() - start_time
|
||||
|
||||
service.logger.info("HTTP request processed",
|
||||
method=request.method,
|
||||
url=str(request.url),
|
||||
status_code=response.status_code,
|
||||
process_time=round(process_time, 4))
|
||||
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(
|
||||
"main:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=settings.DEBUG
|
||||
)
|
||||
0
services/orchestrator/app/ml/__init__.py
Normal file
0
services/orchestrator/app/ml/__init__.py
Normal file
894
services/orchestrator/app/ml/ai_enhanced_orchestrator.py
Normal file
894
services/orchestrator/app/ml/ai_enhanced_orchestrator.py
Normal file
@@ -0,0 +1,894 @@
|
||||
"""
|
||||
AI-Enhanced Orchestration Saga
|
||||
Integrates ML insights into daily workflow orchestration
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
|
||||
from shared.clients.ai_insights_client import AIInsightsClient
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class AIEnhancedOrchestrator:
|
||||
"""
|
||||
Enhanced orchestration engine that integrates ML insights into daily workflow.
|
||||
|
||||
Workflow:
|
||||
1. Pre-Orchestration: Gather all relevant insights for target date
|
||||
2. Intelligent Planning: Modify orchestration plan based on insights
|
||||
3. Execution: Apply insights with confidence-based decision making
|
||||
4. Feedback Tracking: Record outcomes for continuous learning
|
||||
|
||||
Replaces hardcoded logic with learned intelligence from:
|
||||
- Demand Forecasting
|
||||
- Supplier Performance
|
||||
- Safety Stock Optimization
|
||||
- Price Forecasting
|
||||
- Production Yield Prediction
|
||||
- Dynamic Business Rules
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ai_insights_base_url: str = "http://ai-insights-service:8000",
|
||||
min_confidence_threshold: int = 70
|
||||
):
|
||||
self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
|
||||
self.min_confidence_threshold = min_confidence_threshold
|
||||
self.applied_insights = [] # Track applied insights for feedback
|
||||
|
||||
async def orchestrate_with_ai(
|
||||
self,
|
||||
tenant_id: str,
|
||||
target_date: datetime,
|
||||
base_orchestration_plan: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run AI-enhanced orchestration for a target date.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
target_date: Date to orchestrate for
|
||||
base_orchestration_plan: Optional base plan to enhance (if None, creates new)
|
||||
|
||||
Returns:
|
||||
Enhanced orchestration plan with applied insights and metadata
|
||||
"""
|
||||
logger.info(
|
||||
"Starting AI-enhanced orchestration",
|
||||
tenant_id=tenant_id,
|
||||
target_date=target_date.isoformat()
|
||||
)
|
||||
|
||||
# Step 1: Gather insights for target date
|
||||
insights = await self._gather_insights(tenant_id, target_date)
|
||||
|
||||
logger.info(
|
||||
"Insights gathered",
|
||||
demand_forecasts=len(insights['demand_forecasts']),
|
||||
supplier_alerts=len(insights['supplier_alerts']),
|
||||
inventory_optimizations=len(insights['inventory_optimizations']),
|
||||
price_opportunities=len(insights['price_opportunities']),
|
||||
yield_predictions=len(insights['yield_predictions']),
|
||||
business_rules=len(insights['business_rules'])
|
||||
)
|
||||
|
||||
# Step 2: Initialize or load base plan
|
||||
if base_orchestration_plan is None:
|
||||
orchestration_plan = self._create_base_plan(target_date)
|
||||
else:
|
||||
orchestration_plan = base_orchestration_plan.copy()
|
||||
|
||||
# Step 3: Apply insights to plan
|
||||
enhanced_plan = await self._apply_insights_to_plan(
|
||||
orchestration_plan, insights, tenant_id
|
||||
)
|
||||
|
||||
# Step 4: Generate execution summary
|
||||
execution_summary = self._generate_execution_summary(
|
||||
enhanced_plan, insights
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"AI-enhanced orchestration complete",
|
||||
tenant_id=tenant_id,
|
||||
insights_applied=execution_summary['total_insights_applied'],
|
||||
modifications=execution_summary['total_modifications']
|
||||
)
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'target_date': target_date.isoformat(),
|
||||
'orchestrated_at': datetime.utcnow().isoformat(),
|
||||
'plan': enhanced_plan,
|
||||
'insights_used': insights,
|
||||
'execution_summary': execution_summary,
|
||||
'applied_insights': self.applied_insights
|
||||
}
|
||||
|
||||
async def _gather_insights(
|
||||
self,
|
||||
tenant_id: str,
|
||||
target_date: datetime
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Gather all relevant insights for target date from AI Insights Service.
|
||||
|
||||
Returns insights categorized by type:
|
||||
- demand_forecasts
|
||||
- supplier_alerts
|
||||
- inventory_optimizations
|
||||
- price_opportunities
|
||||
- yield_predictions
|
||||
- business_rules
|
||||
"""
|
||||
# Get orchestration-ready insights
|
||||
insights = await self.ai_insights_client.get_orchestration_ready_insights(
|
||||
tenant_id=UUID(tenant_id),
|
||||
target_date=target_date,
|
||||
min_confidence=self.min_confidence_threshold
|
||||
)
|
||||
|
||||
# Categorize insights by source
|
||||
categorized = {
|
||||
'demand_forecasts': [],
|
||||
'supplier_alerts': [],
|
||||
'inventory_optimizations': [],
|
||||
'price_opportunities': [],
|
||||
'yield_predictions': [],
|
||||
'business_rules': [],
|
||||
'other': []
|
||||
}
|
||||
|
||||
for insight in insights:
|
||||
source_model = insight.get('source_model', '')
|
||||
category = insight.get('category', '')
|
||||
|
||||
if source_model == 'hybrid_forecaster' or category == 'demand':
|
||||
categorized['demand_forecasts'].append(insight)
|
||||
elif source_model == 'supplier_performance_predictor':
|
||||
categorized['supplier_alerts'].append(insight)
|
||||
elif source_model == 'safety_stock_optimizer':
|
||||
categorized['inventory_optimizations'].append(insight)
|
||||
elif source_model == 'price_forecaster':
|
||||
categorized['price_opportunities'].append(insight)
|
||||
elif source_model == 'yield_predictor':
|
||||
categorized['yield_predictions'].append(insight)
|
||||
elif source_model == 'business_rules_engine':
|
||||
categorized['business_rules'].append(insight)
|
||||
else:
|
||||
categorized['other'].append(insight)
|
||||
|
||||
return categorized
|
||||
|
||||
def _create_base_plan(self, target_date: datetime) -> Dict[str, Any]:
|
||||
"""Create base orchestration plan with default hardcoded values."""
|
||||
return {
|
||||
'target_date': target_date.isoformat(),
|
||||
'procurement': {
|
||||
'orders': [],
|
||||
'supplier_selections': {},
|
||||
'order_quantities': {}
|
||||
},
|
||||
'inventory': {
|
||||
'safety_stock_levels': {},
|
||||
'reorder_points': {},
|
||||
'transfers': []
|
||||
},
|
||||
'production': {
|
||||
'production_runs': [],
|
||||
'recipe_quantities': {},
|
||||
'worker_assignments': {}
|
||||
},
|
||||
'sales': {
|
||||
'forecasted_demand': {},
|
||||
'pricing_adjustments': {}
|
||||
},
|
||||
'modifications': [],
|
||||
'ai_enhancements': []
|
||||
}
|
||||
|
||||
async def _apply_insights_to_plan(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
insights: Dict[str, List[Dict[str, Any]]],
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply categorized insights to orchestration plan.
|
||||
|
||||
Each insight type modifies specific parts of the plan:
|
||||
- Demand forecasts → sales forecasts, production quantities
|
||||
- Supplier alerts → supplier selection, procurement timing
|
||||
- Inventory optimizations → safety stock levels, reorder points
|
||||
- Price opportunities → procurement timing, order quantities
|
||||
- Yield predictions → production quantities, worker assignments
|
||||
- Business rules → cross-cutting modifications
|
||||
"""
|
||||
enhanced_plan = plan.copy()
|
||||
|
||||
# Apply demand forecasts
|
||||
if insights['demand_forecasts']:
|
||||
enhanced_plan = await self._apply_demand_forecasts(
|
||||
enhanced_plan, insights['demand_forecasts'], tenant_id
|
||||
)
|
||||
|
||||
# Apply supplier alerts
|
||||
if insights['supplier_alerts']:
|
||||
enhanced_plan = await self._apply_supplier_alerts(
|
||||
enhanced_plan, insights['supplier_alerts'], tenant_id
|
||||
)
|
||||
|
||||
# Apply inventory optimizations
|
||||
if insights['inventory_optimizations']:
|
||||
enhanced_plan = await self._apply_inventory_optimizations(
|
||||
enhanced_plan, insights['inventory_optimizations'], tenant_id
|
||||
)
|
||||
|
||||
# Apply price opportunities
|
||||
if insights['price_opportunities']:
|
||||
enhanced_plan = await self._apply_price_opportunities(
|
||||
enhanced_plan, insights['price_opportunities'], tenant_id
|
||||
)
|
||||
|
||||
# Apply yield predictions
|
||||
if insights['yield_predictions']:
|
||||
enhanced_plan = await self._apply_yield_predictions(
|
||||
enhanced_plan, insights['yield_predictions'], tenant_id
|
||||
)
|
||||
|
||||
# Apply business rules (highest priority, can override)
|
||||
if insights['business_rules']:
|
||||
enhanced_plan = await self._apply_business_rules(
|
||||
enhanced_plan, insights['business_rules'], tenant_id
|
||||
)
|
||||
|
||||
return enhanced_plan
|
||||
|
||||
async def _apply_demand_forecasts(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
forecasts: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply demand forecasts to sales and production planning.
|
||||
|
||||
Modifications:
|
||||
- Update sales forecasted_demand
|
||||
- Adjust production recipe_quantities
|
||||
- Record insight application
|
||||
"""
|
||||
for forecast in forecasts:
|
||||
if forecast['confidence'] < self.min_confidence_threshold:
|
||||
continue
|
||||
|
||||
metrics = forecast.get('metrics_json', {})
|
||||
product_id = metrics.get('product_id')
|
||||
predicted_demand = metrics.get('predicted_demand')
|
||||
forecast_date = metrics.get('forecast_date')
|
||||
|
||||
if not product_id or predicted_demand is None:
|
||||
continue
|
||||
|
||||
# Update sales forecast
|
||||
plan['sales']['forecasted_demand'][product_id] = {
|
||||
'quantity': predicted_demand,
|
||||
'confidence': forecast['confidence'],
|
||||
'source': 'ai_forecast',
|
||||
'insight_id': forecast.get('id')
|
||||
}
|
||||
|
||||
# Adjust production quantities (demand + buffer)
|
||||
buffer_pct = 1.10 # 10% buffer for uncertainty
|
||||
production_quantity = int(predicted_demand * buffer_pct)
|
||||
|
||||
plan['production']['recipe_quantities'][product_id] = {
|
||||
'quantity': production_quantity,
|
||||
'demand_forecast': predicted_demand,
|
||||
'buffer_applied': buffer_pct,
|
||||
'source': 'ai_forecast',
|
||||
'insight_id': forecast.get('id')
|
||||
}
|
||||
|
||||
# Record modification
|
||||
plan['modifications'].append({
|
||||
'type': 'demand_forecast_applied',
|
||||
'insight_id': forecast.get('id'),
|
||||
'product_id': product_id,
|
||||
'predicted_demand': predicted_demand,
|
||||
'production_quantity': production_quantity,
|
||||
'confidence': forecast['confidence']
|
||||
})
|
||||
|
||||
# Track for feedback
|
||||
self.applied_insights.append({
|
||||
'insight_id': forecast.get('id'),
|
||||
'type': 'demand_forecast',
|
||||
'applied_at': datetime.utcnow().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'metrics': {
|
||||
'product_id': product_id,
|
||||
'predicted_demand': predicted_demand,
|
||||
'production_quantity': production_quantity
|
||||
}
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Applied demand forecast",
|
||||
product_id=product_id,
|
||||
predicted_demand=predicted_demand,
|
||||
production_quantity=production_quantity
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
async def _apply_supplier_alerts(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
alerts: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply supplier performance alerts to procurement decisions.
|
||||
|
||||
Modifications:
|
||||
- Switch suppliers for low reliability
|
||||
- Adjust lead times for delays
|
||||
- Increase order quantities for short deliveries
|
||||
"""
|
||||
for alert in alerts:
|
||||
if alert['confidence'] < self.min_confidence_threshold:
|
||||
continue
|
||||
|
||||
metrics = alert.get('metrics_json', {})
|
||||
supplier_id = metrics.get('supplier_id')
|
||||
reliability_score = metrics.get('reliability_score')
|
||||
predicted_delay = metrics.get('predicted_delivery_delay_days')
|
||||
|
||||
if not supplier_id:
|
||||
continue
|
||||
|
||||
# Low reliability: recommend supplier switch
|
||||
if reliability_score and reliability_score < 70:
|
||||
plan['procurement']['supplier_selections'][supplier_id] = {
|
||||
'action': 'avoid',
|
||||
'reason': f'Low reliability score: {reliability_score}',
|
||||
'alternative_required': True,
|
||||
'source': 'supplier_alert',
|
||||
'insight_id': alert.get('id')
|
||||
}
|
||||
|
||||
plan['modifications'].append({
|
||||
'type': 'supplier_switch_recommended',
|
||||
'insight_id': alert.get('id'),
|
||||
'supplier_id': supplier_id,
|
||||
'reliability_score': reliability_score,
|
||||
'confidence': alert['confidence']
|
||||
})
|
||||
|
||||
# Delay predicted: adjust lead time
|
||||
if predicted_delay and predicted_delay > 1:
|
||||
plan['procurement']['supplier_selections'][supplier_id] = {
|
||||
'action': 'adjust_lead_time',
|
||||
'additional_lead_days': int(predicted_delay),
|
||||
'reason': f'Predicted delay: {predicted_delay} days',
|
||||
'source': 'supplier_alert',
|
||||
'insight_id': alert.get('id')
|
||||
}
|
||||
|
||||
plan['modifications'].append({
|
||||
'type': 'lead_time_adjusted',
|
||||
'insight_id': alert.get('id'),
|
||||
'supplier_id': supplier_id,
|
||||
'additional_days': int(predicted_delay),
|
||||
'confidence': alert['confidence']
|
||||
})
|
||||
|
||||
# Track for feedback
|
||||
self.applied_insights.append({
|
||||
'insight_id': alert.get('id'),
|
||||
'type': 'supplier_alert',
|
||||
'applied_at': datetime.utcnow().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'metrics': {
|
||||
'supplier_id': supplier_id,
|
||||
'reliability_score': reliability_score,
|
||||
'predicted_delay': predicted_delay
|
||||
}
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Applied supplier alert",
|
||||
supplier_id=supplier_id,
|
||||
reliability_score=reliability_score,
|
||||
predicted_delay=predicted_delay
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
async def _apply_inventory_optimizations(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
optimizations: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply safety stock optimizations to inventory management.
|
||||
|
||||
Modifications:
|
||||
- Update safety stock levels (from hardcoded 95% to learned optimal)
|
||||
- Adjust reorder points accordingly
|
||||
"""
|
||||
for optimization in optimizations:
|
||||
if optimization['confidence'] < self.min_confidence_threshold:
|
||||
continue
|
||||
|
||||
metrics = optimization.get('metrics_json', {})
|
||||
product_id = metrics.get('inventory_product_id')
|
||||
optimal_safety_stock = metrics.get('optimal_safety_stock')
|
||||
optimal_service_level = metrics.get('optimal_service_level')
|
||||
|
||||
if not product_id or optimal_safety_stock is None:
|
||||
continue
|
||||
|
||||
# Update safety stock level
|
||||
plan['inventory']['safety_stock_levels'][product_id] = {
|
||||
'quantity': optimal_safety_stock,
|
||||
'service_level': optimal_service_level,
|
||||
'source': 'ai_optimization',
|
||||
'insight_id': optimization.get('id'),
|
||||
'replaced_hardcoded': True
|
||||
}
|
||||
|
||||
# Adjust reorder point (lead time demand + safety stock)
|
||||
# This would use demand forecast if available
|
||||
lead_time_demand = metrics.get('lead_time_demand', optimal_safety_stock * 2)
|
||||
reorder_point = lead_time_demand + optimal_safety_stock
|
||||
|
||||
plan['inventory']['reorder_points'][product_id] = {
|
||||
'quantity': reorder_point,
|
||||
'lead_time_demand': lead_time_demand,
|
||||
'safety_stock': optimal_safety_stock,
|
||||
'source': 'ai_optimization',
|
||||
'insight_id': optimization.get('id')
|
||||
}
|
||||
|
||||
plan['modifications'].append({
|
||||
'type': 'safety_stock_optimized',
|
||||
'insight_id': optimization.get('id'),
|
||||
'product_id': product_id,
|
||||
'optimal_safety_stock': optimal_safety_stock,
|
||||
'optimal_service_level': optimal_service_level,
|
||||
'confidence': optimization['confidence']
|
||||
})
|
||||
|
||||
# Track for feedback
|
||||
self.applied_insights.append({
|
||||
'insight_id': optimization.get('id'),
|
||||
'type': 'inventory_optimization',
|
||||
'applied_at': datetime.utcnow().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'metrics': {
|
||||
'product_id': product_id,
|
||||
'optimal_safety_stock': optimal_safety_stock,
|
||||
'reorder_point': reorder_point
|
||||
}
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Applied safety stock optimization",
|
||||
product_id=product_id,
|
||||
optimal_safety_stock=optimal_safety_stock,
|
||||
reorder_point=reorder_point
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
async def _apply_price_opportunities(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
opportunities: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply price forecasting opportunities to procurement timing.
|
||||
|
||||
Modifications:
|
||||
- Advance orders for predicted price increases
|
||||
- Delay orders for predicted price decreases
|
||||
- Increase quantities for bulk opportunities
|
||||
"""
|
||||
for opportunity in opportunities:
|
||||
if opportunity['confidence'] < self.min_confidence_threshold:
|
||||
continue
|
||||
|
||||
metrics = opportunity.get('metrics_json', {})
|
||||
ingredient_id = metrics.get('ingredient_id')
|
||||
recommendation = metrics.get('recommendation')
|
||||
expected_price_change = metrics.get('expected_price_change_pct')
|
||||
|
||||
if not ingredient_id or not recommendation:
|
||||
continue
|
||||
|
||||
# Buy now: price increasing
|
||||
if recommendation == 'buy_now' and expected_price_change and expected_price_change > 5:
|
||||
plan['procurement']['order_quantities'][ingredient_id] = {
|
||||
'action': 'increase',
|
||||
'multiplier': 1.5, # Buy 50% more
|
||||
'reason': f'Price expected to increase {expected_price_change:.1f}%',
|
||||
'source': 'price_forecast',
|
||||
'insight_id': opportunity.get('id')
|
||||
}
|
||||
|
||||
plan['modifications'].append({
|
||||
'type': 'bulk_purchase_opportunity',
|
||||
'insight_id': opportunity.get('id'),
|
||||
'ingredient_id': ingredient_id,
|
||||
'expected_price_change': expected_price_change,
|
||||
'quantity_multiplier': 1.5,
|
||||
'confidence': opportunity['confidence']
|
||||
})
|
||||
|
||||
# Wait: price decreasing
|
||||
elif recommendation == 'wait' and expected_price_change and expected_price_change < -5:
|
||||
plan['procurement']['order_quantities'][ingredient_id] = {
|
||||
'action': 'delay',
|
||||
'delay_days': 7,
|
||||
'reason': f'Price expected to decrease {abs(expected_price_change):.1f}%',
|
||||
'source': 'price_forecast',
|
||||
'insight_id': opportunity.get('id')
|
||||
}
|
||||
|
||||
plan['modifications'].append({
|
||||
'type': 'procurement_delayed',
|
||||
'insight_id': opportunity.get('id'),
|
||||
'ingredient_id': ingredient_id,
|
||||
'expected_price_change': expected_price_change,
|
||||
'delay_days': 7,
|
||||
'confidence': opportunity['confidence']
|
||||
})
|
||||
|
||||
# Track for feedback
|
||||
self.applied_insights.append({
|
||||
'insight_id': opportunity.get('id'),
|
||||
'type': 'price_opportunity',
|
||||
'applied_at': datetime.utcnow().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'metrics': {
|
||||
'ingredient_id': ingredient_id,
|
||||
'recommendation': recommendation,
|
||||
'expected_price_change': expected_price_change
|
||||
}
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Applied price opportunity",
|
||||
ingredient_id=ingredient_id,
|
||||
recommendation=recommendation,
|
||||
expected_price_change=expected_price_change
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
async def _apply_yield_predictions(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
predictions: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply production yield predictions to production planning.
|
||||
|
||||
Modifications:
|
||||
- Increase production quantities for low predicted yield
|
||||
- Optimize worker assignments
|
||||
- Adjust production timing
|
||||
"""
|
||||
for prediction in predictions:
|
||||
if prediction['confidence'] < self.min_confidence_threshold:
|
||||
continue
|
||||
|
||||
metrics = prediction.get('metrics_json', {})
|
||||
recipe_id = metrics.get('recipe_id')
|
||||
predicted_yield = metrics.get('predicted_yield')
|
||||
expected_waste = metrics.get('expected_waste')
|
||||
|
||||
if not recipe_id or predicted_yield is None:
|
||||
continue
|
||||
|
||||
# Low yield: increase production quantity to compensate
|
||||
if predicted_yield < 90:
|
||||
current_quantity = plan['production']['recipe_quantities'].get(
|
||||
recipe_id, {}
|
||||
).get('quantity', 100)
|
||||
|
||||
# Adjust quantity to account for predicted waste
|
||||
adjusted_quantity = int(current_quantity * (100 / predicted_yield))
|
||||
|
||||
plan['production']['recipe_quantities'][recipe_id] = {
|
||||
'quantity': adjusted_quantity,
|
||||
'predicted_yield': predicted_yield,
|
||||
'waste_compensation': adjusted_quantity - current_quantity,
|
||||
'source': 'yield_prediction',
|
||||
'insight_id': prediction.get('id')
|
||||
}
|
||||
|
||||
plan['modifications'].append({
|
||||
'type': 'yield_compensation_applied',
|
||||
'insight_id': prediction.get('id'),
|
||||
'recipe_id': recipe_id,
|
||||
'predicted_yield': predicted_yield,
|
||||
'original_quantity': current_quantity,
|
||||
'adjusted_quantity': adjusted_quantity,
|
||||
'confidence': prediction['confidence']
|
||||
})
|
||||
|
||||
# Track for feedback
|
||||
self.applied_insights.append({
|
||||
'insight_id': prediction.get('id'),
|
||||
'type': 'yield_prediction',
|
||||
'applied_at': datetime.utcnow().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'metrics': {
|
||||
'recipe_id': recipe_id,
|
||||
'predicted_yield': predicted_yield,
|
||||
'expected_waste': expected_waste
|
||||
}
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Applied yield prediction",
|
||||
recipe_id=recipe_id,
|
||||
predicted_yield=predicted_yield
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
async def _apply_business_rules(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
rules: List[Dict[str, Any]],
|
||||
tenant_id: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply dynamic business rules to orchestration plan.
|
||||
|
||||
Business rules can override other insights based on business logic.
|
||||
"""
|
||||
for rule in rules:
|
||||
if rule['confidence'] < self.min_confidence_threshold:
|
||||
continue
|
||||
|
||||
# Business rules are flexible and defined in JSONB
|
||||
# Parse recommendation_actions to understand what to apply
|
||||
actions = rule.get('recommendation_actions', [])
|
||||
|
||||
for action in actions:
|
||||
action_type = action.get('action')
|
||||
params = action.get('params', {})
|
||||
|
||||
# Example: Force supplier switch
|
||||
if action_type == 'force_supplier_switch':
|
||||
supplier_id = params.get('from_supplier_id')
|
||||
alternate_id = params.get('to_supplier_id')
|
||||
|
||||
if supplier_id and alternate_id:
|
||||
plan['procurement']['supplier_selections'][supplier_id] = {
|
||||
'action': 'replace',
|
||||
'alternate_supplier': alternate_id,
|
||||
'reason': rule.get('description'),
|
||||
'source': 'business_rule',
|
||||
'insight_id': rule.get('id'),
|
||||
'override': True
|
||||
}
|
||||
|
||||
# Example: Halt production
|
||||
elif action_type == 'halt_production':
|
||||
recipe_id = params.get('recipe_id')
|
||||
if recipe_id:
|
||||
plan['production']['recipe_quantities'][recipe_id] = {
|
||||
'quantity': 0,
|
||||
'halted': True,
|
||||
'reason': rule.get('description'),
|
||||
'source': 'business_rule',
|
||||
'insight_id': rule.get('id')
|
||||
}
|
||||
|
||||
plan['modifications'].append({
|
||||
'type': 'business_rule_applied',
|
||||
'insight_id': rule.get('id'),
|
||||
'rule_description': rule.get('description'),
|
||||
'confidence': rule['confidence']
|
||||
})
|
||||
|
||||
# Track for feedback
|
||||
self.applied_insights.append({
|
||||
'insight_id': rule.get('id'),
|
||||
'type': 'business_rule',
|
||||
'applied_at': datetime.utcnow().isoformat(),
|
||||
'tenant_id': tenant_id,
|
||||
'metrics': {'actions': len(actions)}
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Applied business rule",
|
||||
rule_description=rule.get('title')
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
def _generate_execution_summary(
|
||||
self,
|
||||
plan: Dict[str, Any],
|
||||
insights: Dict[str, List[Dict[str, Any]]]
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate summary of AI-enhanced orchestration execution."""
|
||||
total_insights_available = sum(len(v) for v in insights.values())
|
||||
total_insights_applied = len(self.applied_insights)
|
||||
total_modifications = len(plan.get('modifications', []))
|
||||
|
||||
# Count by type
|
||||
insights_by_type = {}
|
||||
for category, category_insights in insights.items():
|
||||
insights_by_type[category] = {
|
||||
'available': len(category_insights),
|
||||
'applied': len([
|
||||
i for i in self.applied_insights
|
||||
if i['type'] == category.rstrip('s') # Remove plural
|
||||
])
|
||||
}
|
||||
|
||||
return {
|
||||
'total_insights_available': total_insights_available,
|
||||
'total_insights_applied': total_insights_applied,
|
||||
'total_modifications': total_modifications,
|
||||
'application_rate': round(
|
||||
(total_insights_applied / total_insights_available * 100)
|
||||
if total_insights_available > 0 else 0,
|
||||
2
|
||||
),
|
||||
'insights_by_type': insights_by_type,
|
||||
'modifications_summary': self._summarize_modifications(plan)
|
||||
}
|
||||
|
||||
def _summarize_modifications(self, plan: Dict[str, Any]) -> Dict[str, int]:
|
||||
"""Summarize modifications by type."""
|
||||
modifications = plan.get('modifications', [])
|
||||
summary = {}
|
||||
|
||||
for mod in modifications:
|
||||
mod_type = mod.get('type', 'unknown')
|
||||
summary[mod_type] = summary.get(mod_type, 0) + 1
|
||||
|
||||
return summary
|
||||
|
||||
async def record_orchestration_feedback(
|
||||
self,
|
||||
tenant_id: str,
|
||||
target_date: datetime,
|
||||
actual_outcomes: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Record feedback for applied insights to enable continuous learning.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier
|
||||
target_date: Orchestration target date
|
||||
actual_outcomes: Actual results:
|
||||
- actual_demand: {product_id: actual_quantity}
|
||||
- actual_yields: {recipe_id: actual_yield_pct}
|
||||
- actual_costs: {ingredient_id: actual_price}
|
||||
- supplier_performance: {supplier_id: on_time_delivery}
|
||||
|
||||
Returns:
|
||||
Feedback recording results
|
||||
"""
|
||||
logger.info(
|
||||
"Recording orchestration feedback",
|
||||
tenant_id=tenant_id,
|
||||
target_date=target_date.isoformat(),
|
||||
applied_insights=len(self.applied_insights)
|
||||
)
|
||||
|
||||
feedback_results = []
|
||||
|
||||
for applied in self.applied_insights:
|
||||
insight_id = applied.get('insight_id')
|
||||
insight_type = applied.get('type')
|
||||
metrics = applied.get('metrics', {})
|
||||
|
||||
# Prepare feedback based on type
|
||||
feedback_data = {
|
||||
'applied': True,
|
||||
'applied_at': applied.get('applied_at'),
|
||||
'outcome_date': target_date.isoformat()
|
||||
}
|
||||
|
||||
# Demand forecast feedback
|
||||
if insight_type == 'demand_forecast':
|
||||
product_id = metrics.get('product_id')
|
||||
predicted_demand = metrics.get('predicted_demand')
|
||||
actual_demand = actual_outcomes.get('actual_demand', {}).get(product_id)
|
||||
|
||||
if actual_demand is not None:
|
||||
error = abs(actual_demand - predicted_demand)
|
||||
error_pct = (error / actual_demand * 100) if actual_demand > 0 else 0
|
||||
|
||||
feedback_data['outcome_metrics'] = {
|
||||
'predicted_demand': predicted_demand,
|
||||
'actual_demand': actual_demand,
|
||||
'error': error,
|
||||
'error_pct': round(error_pct, 2),
|
||||
'accuracy': round(100 - error_pct, 2)
|
||||
}
|
||||
|
||||
# Yield prediction feedback
|
||||
elif insight_type == 'yield_prediction':
|
||||
recipe_id = metrics.get('recipe_id')
|
||||
predicted_yield = metrics.get('predicted_yield')
|
||||
actual_yield = actual_outcomes.get('actual_yields', {}).get(recipe_id)
|
||||
|
||||
if actual_yield is not None:
|
||||
error = abs(actual_yield - predicted_yield)
|
||||
|
||||
feedback_data['outcome_metrics'] = {
|
||||
'predicted_yield': predicted_yield,
|
||||
'actual_yield': actual_yield,
|
||||
'error': round(error, 2),
|
||||
'accuracy': round(100 - (error / actual_yield * 100), 2) if actual_yield > 0 else 0
|
||||
}
|
||||
|
||||
# Record feedback via AI Insights Client
|
||||
try:
|
||||
await self.ai_insights_client.record_feedback(
|
||||
tenant_id=UUID(tenant_id),
|
||||
insight_id=UUID(insight_id) if insight_id else None,
|
||||
feedback_data=feedback_data
|
||||
)
|
||||
|
||||
feedback_results.append({
|
||||
'insight_id': insight_id,
|
||||
'insight_type': insight_type,
|
||||
'status': 'recorded',
|
||||
'feedback': feedback_data
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Error recording feedback",
|
||||
insight_id=insight_id,
|
||||
error=str(e)
|
||||
)
|
||||
feedback_results.append({
|
||||
'insight_id': insight_id,
|
||||
'insight_type': insight_type,
|
||||
'status': 'failed',
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Feedback recording complete",
|
||||
total=len(feedback_results),
|
||||
successful=len([r for r in feedback_results if r['status'] == 'recorded'])
|
||||
)
|
||||
|
||||
return {
|
||||
'tenant_id': tenant_id,
|
||||
'target_date': target_date.isoformat(),
|
||||
'feedback_recorded_at': datetime.utcnow().isoformat(),
|
||||
'total_insights': len(self.applied_insights),
|
||||
'feedback_results': feedback_results,
|
||||
'successful': len([r for r in feedback_results if r['status'] == 'recorded']),
|
||||
'failed': len([r for r in feedback_results if r['status'] == 'failed'])
|
||||
}
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client connections."""
|
||||
await self.ai_insights_client.close()
|
||||
13
services/orchestrator/app/models/__init__.py
Normal file
13
services/orchestrator/app/models/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
# ================================================================
|
||||
# services/orchestrator/app/models/__init__.py
|
||||
# ================================================================
|
||||
"""
|
||||
Orchestrator Service Models
|
||||
"""
|
||||
|
||||
from .orchestration_run import OrchestrationRun, OrchestrationStatus
|
||||
|
||||
__all__ = [
|
||||
"OrchestrationRun",
|
||||
"OrchestrationStatus",
|
||||
]
|
||||
113
services/orchestrator/app/models/orchestration_run.py
Normal file
113
services/orchestrator/app/models/orchestration_run.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# ================================================================
|
||||
# services/orchestrator/app/models/orchestration_run.py
|
||||
# ================================================================
|
||||
"""
|
||||
Orchestration Run Models - Audit trail for orchestration executions
|
||||
"""
|
||||
|
||||
import uuid
|
||||
import enum
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import Column, String, DateTime, Integer, Text, Boolean, Enum as SQLEnum
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from shared.database.base import Base
|
||||
|
||||
|
||||
class OrchestrationStatus(enum.Enum):
|
||||
"""Orchestration run status"""
|
||||
pending = "pending"
|
||||
running = "running"
|
||||
completed = "completed"
|
||||
partial_success = "partial_success"
|
||||
failed = "failed"
|
||||
cancelled = "cancelled"
|
||||
|
||||
|
||||
class OrchestrationRun(Base):
|
||||
"""Audit trail for orchestration executions"""
|
||||
__tablename__ = "orchestration_runs"
|
||||
|
||||
# Primary identification
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
run_number = Column(String(50), nullable=False, unique=True, index=True)
|
||||
|
||||
# Run details
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
status = Column(SQLEnum(OrchestrationStatus), nullable=False, default=OrchestrationStatus.pending, index=True)
|
||||
run_type = Column(String(50), nullable=False, default="scheduled") # scheduled, manual, test
|
||||
priority = Column(String(20), nullable=False, default="normal") # normal, high, critical
|
||||
|
||||
# Timing
|
||||
started_at = Column(DateTime(timezone=True), nullable=False, default=lambda: datetime.now(timezone.utc))
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
duration_seconds = Column(Integer, nullable=True)
|
||||
|
||||
# Step tracking
|
||||
forecasting_started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
forecasting_completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
forecasting_status = Column(String(20), nullable=True) # success, failed, skipped
|
||||
forecasting_error = Column(Text, nullable=True)
|
||||
|
||||
production_started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
production_completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
production_status = Column(String(20), nullable=True) # success, failed, skipped
|
||||
production_error = Column(Text, nullable=True)
|
||||
|
||||
procurement_started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
procurement_completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
procurement_status = Column(String(20), nullable=True) # success, failed, skipped
|
||||
procurement_error = Column(Text, nullable=True)
|
||||
|
||||
notification_started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
notification_completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
notification_status = Column(String(20), nullable=True) # success, failed, skipped
|
||||
notification_error = Column(Text, nullable=True)
|
||||
|
||||
# AI Insights tracking
|
||||
ai_insights_started_at = Column(DateTime(timezone=True), nullable=True)
|
||||
ai_insights_completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
ai_insights_status = Column(String(20), nullable=True) # success, failed, skipped
|
||||
ai_insights_error = Column(Text, nullable=True)
|
||||
ai_insights_generated = Column(Integer, nullable=False, default=0)
|
||||
ai_insights_posted = Column(Integer, nullable=False, default=0)
|
||||
|
||||
# Results summary
|
||||
forecasts_generated = Column(Integer, nullable=False, default=0)
|
||||
production_batches_created = Column(Integer, nullable=False, default=0)
|
||||
procurement_plans_created = Column(Integer, nullable=False, default=0)
|
||||
purchase_orders_created = Column(Integer, nullable=False, default=0)
|
||||
notifications_sent = Column(Integer, nullable=False, default=0)
|
||||
|
||||
# Forecast data passed between services
|
||||
forecast_data = Column(JSONB, nullable=True) # Store forecast results for downstream services
|
||||
|
||||
# Error handling
|
||||
retry_count = Column(Integer, nullable=False, default=0)
|
||||
max_retries_reached = Column(Boolean, nullable=False, default=False)
|
||||
error_message = Column(Text, nullable=True)
|
||||
error_details = Column(JSONB, nullable=True)
|
||||
|
||||
# External references
|
||||
forecast_id = Column(UUID(as_uuid=True), nullable=True)
|
||||
production_schedule_id = Column(UUID(as_uuid=True), nullable=True)
|
||||
procurement_plan_id = Column(UUID(as_uuid=True), nullable=True)
|
||||
|
||||
# Saga tracking
|
||||
saga_steps_total = Column(Integer, nullable=False, default=0)
|
||||
saga_steps_completed = Column(Integer, nullable=False, default=0)
|
||||
|
||||
# Audit fields
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
|
||||
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False)
|
||||
triggered_by = Column(String(100), nullable=True) # scheduler, user_id, api
|
||||
|
||||
# Performance metrics
|
||||
fulfillment_rate = Column(Integer, nullable=True) # Percentage as integer (0-100)
|
||||
on_time_delivery_rate = Column(Integer, nullable=True) # Percentage as integer (0-100)
|
||||
cost_accuracy = Column(Integer, nullable=True) # Percentage as integer (0-100)
|
||||
quality_score = Column(Integer, nullable=True) # Rating as integer (0-100)
|
||||
|
||||
# Metadata
|
||||
run_metadata = Column(JSONB, nullable=True)
|
||||
0
services/orchestrator/app/repositories/__init__.py
Normal file
0
services/orchestrator/app/repositories/__init__.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# ================================================================
|
||||
# services/orchestrator/app/repositories/orchestration_run_repository.py
|
||||
# ================================================================
|
||||
"""
|
||||
Orchestration Run Repository - Database operations for orchestration audit trail
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, date, timezone
|
||||
from typing import List, Optional, Dict, Any
|
||||
from sqlalchemy import select, and_, desc, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.models.orchestration_run import OrchestrationRun, OrchestrationStatus
|
||||
|
||||
|
||||
class OrchestrationRunRepository:
|
||||
"""Repository for orchestration run operations"""
|
||||
|
||||
def __init__(self, db: AsyncSession):
|
||||
self.db = db
|
||||
|
||||
async def create_run(self, run_data: Dict[str, Any]) -> OrchestrationRun:
|
||||
"""Create a new orchestration run"""
|
||||
run = OrchestrationRun(**run_data)
|
||||
self.db.add(run)
|
||||
await self.db.flush()
|
||||
return run
|
||||
|
||||
async def get_run_by_id(self, run_id: uuid.UUID) -> Optional[OrchestrationRun]:
|
||||
"""Get orchestration run by ID"""
|
||||
stmt = select(OrchestrationRun).where(OrchestrationRun.id == run_id)
|
||||
result = await self.db.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def update_run(self, run_id: uuid.UUID, updates: Dict[str, Any]) -> Optional[OrchestrationRun]:
|
||||
"""Update orchestration run"""
|
||||
run = await self.get_run_by_id(run_id)
|
||||
if not run:
|
||||
return None
|
||||
|
||||
for key, value in updates.items():
|
||||
if hasattr(run, key):
|
||||
setattr(run, key, value)
|
||||
|
||||
run.updated_at = datetime.now(timezone.utc)
|
||||
await self.db.flush()
|
||||
return run
|
||||
|
||||
async def list_runs(
|
||||
self,
|
||||
tenant_id: Optional[uuid.UUID] = None,
|
||||
status: Optional[OrchestrationStatus] = None,
|
||||
start_date: Optional[date] = None,
|
||||
end_date: Optional[date] = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0
|
||||
) -> List[OrchestrationRun]:
|
||||
"""List orchestration runs with filters"""
|
||||
conditions = []
|
||||
|
||||
if tenant_id:
|
||||
conditions.append(OrchestrationRun.tenant_id == tenant_id)
|
||||
if status:
|
||||
conditions.append(OrchestrationRun.status == status)
|
||||
if start_date:
|
||||
conditions.append(func.date(OrchestrationRun.started_at) >= start_date)
|
||||
if end_date:
|
||||
conditions.append(func.date(OrchestrationRun.started_at) <= end_date)
|
||||
|
||||
stmt = (
|
||||
select(OrchestrationRun)
|
||||
.where(and_(*conditions) if conditions else True)
|
||||
.order_by(desc(OrchestrationRun.started_at))
|
||||
.limit(limit)
|
||||
.offset(offset)
|
||||
)
|
||||
|
||||
result = await self.db.execute(stmt)
|
||||
return result.scalars().all()
|
||||
|
||||
async def get_latest_run_for_tenant(self, tenant_id: uuid.UUID) -> Optional[OrchestrationRun]:
|
||||
"""Get the most recent orchestration run for a tenant"""
|
||||
stmt = (
|
||||
select(OrchestrationRun)
|
||||
.where(OrchestrationRun.tenant_id == tenant_id)
|
||||
.order_by(desc(OrchestrationRun.started_at))
|
||||
.limit(1)
|
||||
)
|
||||
|
||||
result = await self.db.execute(stmt)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def generate_run_number(self) -> str:
|
||||
"""
|
||||
Generate unique run number atomically using database-level counting.
|
||||
|
||||
Uses MAX(run_number) + 1 approach to avoid race conditions
|
||||
between reading count and inserting new record.
|
||||
"""
|
||||
today = date.today()
|
||||
date_str = today.strftime("%Y%m%d")
|
||||
|
||||
# Get the highest run number for today atomically
|
||||
# Using MAX on run_number suffix to avoid counting which has race conditions
|
||||
stmt = select(func.max(OrchestrationRun.run_number)).where(
|
||||
OrchestrationRun.run_number.like(f"ORCH-{date_str}-%")
|
||||
)
|
||||
result = await self.db.execute(stmt)
|
||||
max_run_number = result.scalar()
|
||||
|
||||
if max_run_number:
|
||||
# Extract the numeric suffix and increment it
|
||||
try:
|
||||
suffix = int(max_run_number.split('-')[-1])
|
||||
next_number = suffix + 1
|
||||
except (ValueError, IndexError):
|
||||
# Fallback to 1 if parsing fails
|
||||
next_number = 1
|
||||
else:
|
||||
# No runs for today yet
|
||||
next_number = 1
|
||||
|
||||
return f"ORCH-{date_str}-{next_number:04d}"
|
||||
|
||||
async def get_failed_runs(self, limit: int = 10) -> List[OrchestrationRun]:
|
||||
"""Get recent failed orchestration runs"""
|
||||
stmt = (
|
||||
select(OrchestrationRun)
|
||||
.where(OrchestrationRun.status == OrchestrationStatus.failed)
|
||||
.order_by(desc(OrchestrationRun.started_at))
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
result = await self.db.execute(stmt)
|
||||
return result.scalars().all()
|
||||
|
||||
async def get_run_statistics(
|
||||
self,
|
||||
start_date: Optional[date] = None,
|
||||
end_date: Optional[date] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Get orchestration run statistics"""
|
||||
conditions = []
|
||||
if start_date:
|
||||
conditions.append(func.date(OrchestrationRun.started_at) >= start_date)
|
||||
if end_date:
|
||||
conditions.append(func.date(OrchestrationRun.started_at) <= end_date)
|
||||
|
||||
where_clause = and_(*conditions) if conditions else True
|
||||
|
||||
# Total runs
|
||||
total_stmt = select(func.count(OrchestrationRun.id)).where(where_clause)
|
||||
total_result = await self.db.execute(total_stmt)
|
||||
total_runs = total_result.scalar() or 0
|
||||
|
||||
# Successful runs
|
||||
success_stmt = select(func.count(OrchestrationRun.id)).where(
|
||||
and_(
|
||||
where_clause,
|
||||
OrchestrationRun.status == OrchestrationStatus.completed
|
||||
)
|
||||
)
|
||||
success_result = await self.db.execute(success_stmt)
|
||||
successful_runs = success_result.scalar() or 0
|
||||
|
||||
# Failed runs
|
||||
failed_stmt = select(func.count(OrchestrationRun.id)).where(
|
||||
and_(
|
||||
where_clause,
|
||||
OrchestrationRun.status == OrchestrationStatus.failed
|
||||
)
|
||||
)
|
||||
failed_result = await self.db.execute(failed_stmt)
|
||||
failed_runs = failed_result.scalar() or 0
|
||||
|
||||
# Average duration
|
||||
avg_duration_stmt = select(func.avg(OrchestrationRun.duration_seconds)).where(
|
||||
and_(
|
||||
where_clause,
|
||||
OrchestrationRun.status == OrchestrationStatus.completed
|
||||
)
|
||||
)
|
||||
avg_duration_result = await self.db.execute(avg_duration_stmt)
|
||||
avg_duration = avg_duration_result.scalar() or 0
|
||||
|
||||
return {
|
||||
'total_runs': total_runs,
|
||||
'successful_runs': successful_runs,
|
||||
'failed_runs': failed_runs,
|
||||
'success_rate': (successful_runs / total_runs * 100) if total_runs > 0 else 0,
|
||||
'average_duration_seconds': float(avg_duration) if avg_duration else 0
|
||||
}
|
||||
0
services/orchestrator/app/schemas/__init__.py
Normal file
0
services/orchestrator/app/schemas/__init__.py
Normal file
0
services/orchestrator/app/services/__init__.py
Normal file
0
services/orchestrator/app/services/__init__.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Orchestration Notification Service - Simplified
|
||||
|
||||
Emits minimal events using EventPublisher.
|
||||
All enrichment handled by alert_processor.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Dict, Any
|
||||
from uuid import UUID
|
||||
import structlog
|
||||
|
||||
from shared.messaging import UnifiedEventPublisher
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class OrchestrationNotificationService:
|
||||
"""
|
||||
Service for emitting orchestration notifications using EventPublisher.
|
||||
"""
|
||||
|
||||
def __init__(self, event_publisher: UnifiedEventPublisher):
|
||||
self.publisher = event_publisher
|
||||
|
||||
async def emit_orchestration_run_started_notification(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
run_id: str,
|
||||
run_type: str, # 'scheduled', 'manual', 'triggered'
|
||||
scope: str, # 'full', 'inventory_only', 'production_only'
|
||||
) -> None:
|
||||
"""
|
||||
Emit notification when an orchestration run starts.
|
||||
"""
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"run_type": run_type,
|
||||
"scope": scope,
|
||||
"started_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.orchestration_run_started",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"orchestration_run_started_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
run_id=run_id
|
||||
)
|
||||
|
||||
async def emit_orchestration_run_completed_notification(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
run_id: str,
|
||||
duration_seconds: float,
|
||||
actions_created: int,
|
||||
actions_by_type: Dict[str, int], # e.g., {'purchase_order': 2, 'production_batch': 3}
|
||||
status: str = "success",
|
||||
) -> None:
|
||||
"""
|
||||
Emit notification when an orchestration run completes.
|
||||
"""
|
||||
# Build message with action summary
|
||||
if actions_created == 0:
|
||||
action_summary = "No actions needed"
|
||||
else:
|
||||
action_summary = ", ".join([f"{count} {action_type}" for action_type, count in actions_by_type.items()])
|
||||
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"status": status,
|
||||
"duration_seconds": float(duration_seconds),
|
||||
"actions_created": actions_created,
|
||||
"actions_by_type": actions_by_type,
|
||||
"action_summary": action_summary,
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.orchestration_run_completed",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"orchestration_run_completed_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
run_id=run_id,
|
||||
actions_created=actions_created
|
||||
)
|
||||
|
||||
async def emit_action_created_notification(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
run_id: str,
|
||||
action_id: str,
|
||||
action_type: str, # 'purchase_order', 'production_batch', 'inventory_adjustment'
|
||||
action_details: Dict[str, Any], # Type-specific details
|
||||
reason: str,
|
||||
estimated_impact: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Emit notification when the orchestrator creates an action.
|
||||
"""
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"action_id": action_id,
|
||||
"action_type": action_type,
|
||||
"action_details": action_details,
|
||||
"reason": reason,
|
||||
"estimated_impact": estimated_impact,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.action_created",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"action_created_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
action_id=action_id,
|
||||
action_type=action_type
|
||||
)
|
||||
|
||||
async def emit_action_completed_notification(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
action_id: str,
|
||||
action_type: str,
|
||||
action_status: str, # 'approved', 'completed', 'rejected', 'cancelled'
|
||||
completed_by: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Emit notification when an orchestrator action is completed/resolved.
|
||||
"""
|
||||
metadata = {
|
||||
"action_id": action_id,
|
||||
"action_type": action_type,
|
||||
"action_status": action_status,
|
||||
"completed_by": completed_by,
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.action_completed",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"action_completed_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
action_id=action_id,
|
||||
action_status=action_status
|
||||
)
|
||||
1117
services/orchestrator/app/services/orchestration_saga.py
Normal file
1117
services/orchestrator/app/services/orchestration_saga.py
Normal file
File diff suppressed because it is too large
Load Diff
728
services/orchestrator/app/services/orchestrator_service.py
Normal file
728
services/orchestrator/app/services/orchestrator_service.py
Normal file
@@ -0,0 +1,728 @@
|
||||
"""
|
||||
Orchestrator Scheduler Service - REFACTORED
|
||||
Coordinates daily auto-generation workflow: Forecasting → Production → Procurement
|
||||
|
||||
CHANGES FROM ORIGINAL:
|
||||
- Updated to use new EventPublisher pattern for all messaging
|
||||
- Integrated OrchestrationSaga for error handling and compensation
|
||||
- Added circuit breakers for all service calls
|
||||
- Implemented real Forecasting Service integration
|
||||
- Implemented real Production Service integration
|
||||
- Implemented real Tenant Service integration
|
||||
- Implemented real Notification Service integration
|
||||
- NO backwards compatibility, NO feature flags - complete rewrite
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from datetime import datetime, date, timezone
|
||||
from decimal import Decimal
|
||||
from typing import List, Dict, Any, Optional
|
||||
import structlog
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
# Updated imports - removed old alert system
|
||||
from shared.messaging import UnifiedEventPublisher
|
||||
from shared.clients.forecast_client import ForecastServiceClient
|
||||
from shared.clients.production_client import ProductionServiceClient
|
||||
from shared.clients.procurement_client import ProcurementServiceClient
|
||||
from shared.clients.notification_client import NotificationServiceClient
|
||||
from shared.clients.tenant_client import TenantServiceClient
|
||||
from shared.clients.inventory_client import InventoryServiceClient
|
||||
from shared.clients.suppliers_client import SuppliersServiceClient
|
||||
from shared.clients.recipes_client import RecipesServiceClient
|
||||
from shared.clients.training_client import TrainingServiceClient
|
||||
from shared.utils.circuit_breaker import CircuitBreaker, CircuitBreakerOpenError
|
||||
from app.core.config import settings
|
||||
from app.repositories.orchestration_run_repository import OrchestrationRunRepository
|
||||
from app.models.orchestration_run import OrchestrationStatus
|
||||
from app.services.orchestration_saga import OrchestrationSaga
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class OrchestratorSchedulerService:
|
||||
"""
|
||||
Orchestrator Service using EventPublisher for messaging
|
||||
Handles automated daily orchestration of forecasting, production, and procurement
|
||||
"""
|
||||
|
||||
def __init__(self, event_publisher: UnifiedEventPublisher, config):
|
||||
self.publisher = event_publisher
|
||||
self.config = config
|
||||
|
||||
# APScheduler instance for running daily orchestration
|
||||
self.scheduler = None
|
||||
|
||||
# Service clients
|
||||
self.forecast_client = ForecastServiceClient(config, "orchestrator-service")
|
||||
self.production_client = ProductionServiceClient(config, "orchestrator-service")
|
||||
self.procurement_client = ProcurementServiceClient(config, "orchestrator-service")
|
||||
self.notification_client = NotificationServiceClient(config, "orchestrator-service")
|
||||
self.tenant_client = TenantServiceClient(config)
|
||||
self.training_client = TrainingServiceClient(config, "orchestrator-service")
|
||||
# Clients for centralized data fetching
|
||||
self.inventory_client = InventoryServiceClient(config, "orchestrator-service")
|
||||
self.suppliers_client = SuppliersServiceClient(config, "orchestrator-service")
|
||||
self.recipes_client = RecipesServiceClient(config, "orchestrator-service")
|
||||
|
||||
# Circuit breakers for each service
|
||||
self.forecast_breaker = CircuitBreaker(
|
||||
failure_threshold=5,
|
||||
timeout_duration=60,
|
||||
success_threshold=2
|
||||
)
|
||||
self.production_breaker = CircuitBreaker(
|
||||
failure_threshold=5,
|
||||
timeout_duration=60,
|
||||
success_threshold=2
|
||||
)
|
||||
self.procurement_breaker = CircuitBreaker(
|
||||
failure_threshold=5,
|
||||
timeout_duration=60,
|
||||
success_threshold=2
|
||||
)
|
||||
self.tenant_breaker = CircuitBreaker(
|
||||
failure_threshold=3,
|
||||
timeout_duration=30,
|
||||
success_threshold=2
|
||||
)
|
||||
self.inventory_breaker = CircuitBreaker(
|
||||
failure_threshold=5,
|
||||
timeout_duration=60,
|
||||
success_threshold=2
|
||||
)
|
||||
self.suppliers_breaker = CircuitBreaker(
|
||||
failure_threshold=5,
|
||||
timeout_duration=60,
|
||||
success_threshold=2
|
||||
)
|
||||
self.recipes_breaker = CircuitBreaker(
|
||||
failure_threshold=5,
|
||||
timeout_duration=60,
|
||||
success_threshold=2
|
||||
)
|
||||
|
||||
async def emit_orchestration_run_started(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
run_id: str,
|
||||
run_type: str, # 'scheduled', 'manual', 'triggered'
|
||||
scope: str, # 'full', 'inventory_only', 'production_only'
|
||||
):
|
||||
"""
|
||||
Emit notification when an orchestration run starts.
|
||||
"""
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"run_type": run_type,
|
||||
"scope": scope,
|
||||
"started_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.orchestration_run_started",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"orchestration_run_started_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
run_id=run_id
|
||||
)
|
||||
|
||||
async def emit_orchestration_run_completed(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
run_id: str,
|
||||
duration_seconds: float,
|
||||
actions_created: int,
|
||||
actions_by_type: Dict[str, int], # e.g., {'purchase_order': 2, 'production_batch': 3}
|
||||
status: str = "success",
|
||||
):
|
||||
"""
|
||||
Emit notification when an orchestration run completes.
|
||||
"""
|
||||
# Build message with action summary
|
||||
if actions_created == 0:
|
||||
action_summary = "No actions needed"
|
||||
else:
|
||||
action_summary = ", ".join([f"{count} {action_type}" for action_type, count in actions_by_type.items()])
|
||||
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"status": status,
|
||||
"duration_seconds": float(duration_seconds),
|
||||
"actions_created": actions_created,
|
||||
"actions_by_type": actions_by_type,
|
||||
"action_summary": action_summary,
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.orchestration_run_completed",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"orchestration_run_completed_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
run_id=run_id,
|
||||
actions_created=actions_created
|
||||
)
|
||||
|
||||
async def emit_action_created_notification(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
run_id: str,
|
||||
action_id: str,
|
||||
action_type: str, # 'purchase_order', 'production_batch', 'inventory_adjustment'
|
||||
action_details: Dict[str, Any], # Type-specific details
|
||||
reason: str,
|
||||
estimated_impact: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
"""
|
||||
Emit notification when the orchestrator creates an action.
|
||||
"""
|
||||
metadata = {
|
||||
"run_id": run_id,
|
||||
"action_id": action_id,
|
||||
"action_type": action_type,
|
||||
"action_details": action_details,
|
||||
"reason": reason,
|
||||
"estimated_impact": estimated_impact,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.action_created",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"action_created_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
action_id=action_id,
|
||||
action_type=action_type
|
||||
)
|
||||
|
||||
async def emit_action_completed_notification(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
action_id: str,
|
||||
action_type: str,
|
||||
action_status: str, # 'approved', 'completed', 'rejected', 'cancelled'
|
||||
completed_by: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Emit notification when an orchestrator action is completed/resolved.
|
||||
"""
|
||||
metadata = {
|
||||
"action_id": action_id,
|
||||
"action_type": action_type,
|
||||
"action_status": action_status,
|
||||
"completed_by": completed_by,
|
||||
"completed_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
await self.publisher.publish_notification(
|
||||
event_type="operations.action_completed",
|
||||
tenant_id=tenant_id,
|
||||
data=metadata
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"action_completed_notification_emitted",
|
||||
tenant_id=str(tenant_id),
|
||||
action_id=action_id,
|
||||
action_status=action_status
|
||||
)
|
||||
|
||||
async def run_daily_orchestration(self):
|
||||
"""
|
||||
Main orchestration workflow - runs daily
|
||||
Executes for all active tenants in parallel (with limits)
|
||||
"""
|
||||
if not settings.ORCHESTRATION_ENABLED:
|
||||
logger.info("Orchestration disabled via config")
|
||||
return
|
||||
|
||||
logger.info("Starting daily orchestration workflow")
|
||||
|
||||
try:
|
||||
# Get all active tenants
|
||||
active_tenants = await self._get_active_tenants()
|
||||
|
||||
if not active_tenants:
|
||||
logger.warning("No active tenants found for orchestration")
|
||||
return
|
||||
|
||||
logger.info("Processing tenants",
|
||||
total_tenants=len(active_tenants))
|
||||
|
||||
# Process tenants with concurrency limit
|
||||
semaphore = asyncio.Semaphore(settings.MAX_CONCURRENT_TENANTS)
|
||||
|
||||
async def process_with_semaphore(tenant_id):
|
||||
async with semaphore:
|
||||
return await self._orchestrate_tenant(tenant_id)
|
||||
|
||||
# Process all tenants in parallel (but limited by semaphore)
|
||||
tasks = [process_with_semaphore(tenant_id) for tenant_id in active_tenants]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Log summary
|
||||
successful = sum(1 for r in results if r and not isinstance(r, Exception))
|
||||
failed = len(results) - successful
|
||||
|
||||
logger.info("Daily orchestration completed",
|
||||
total_tenants=len(active_tenants),
|
||||
successful=successful,
|
||||
failed=failed)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error in daily orchestration",
|
||||
error=str(e), exc_info=True)
|
||||
|
||||
async def _orchestrate_tenant(self, tenant_id: uuid.UUID) -> bool:
|
||||
"""
|
||||
Orchestrate workflow for a single tenant using Saga pattern
|
||||
Returns True if successful, False otherwise
|
||||
"""
|
||||
logger.info("Starting orchestration for tenant", tenant_id=str(tenant_id))
|
||||
|
||||
# Create orchestration run record
|
||||
async with self.config.database_manager.get_session() as session:
|
||||
repo = OrchestrationRunRepository(session)
|
||||
run_number = await repo.generate_run_number()
|
||||
|
||||
run = await repo.create_run({
|
||||
'run_number': run_number,
|
||||
'tenant_id': tenant_id,
|
||||
'status': OrchestrationStatus.running,
|
||||
'run_type': 'scheduled',
|
||||
'started_at': datetime.now(timezone.utc),
|
||||
'triggered_by': 'scheduler'
|
||||
})
|
||||
await session.commit()
|
||||
run_id = run.id
|
||||
|
||||
try:
|
||||
# Emit orchestration started event
|
||||
await self.emit_orchestration_run_started(
|
||||
tenant_id=tenant_id,
|
||||
run_id=str(run_id),
|
||||
run_type='scheduled',
|
||||
scope='full'
|
||||
)
|
||||
|
||||
# Set timeout for entire tenant orchestration
|
||||
async with asyncio.timeout(settings.TENANT_TIMEOUT_SECONDS):
|
||||
# Execute orchestration using Saga pattern
|
||||
# AI enhancement is enabled via ORCHESTRATION_USE_AI_INSIGHTS config
|
||||
saga = OrchestrationSaga(
|
||||
forecast_client=self.forecast_client,
|
||||
production_client=self.production_client,
|
||||
procurement_client=self.procurement_client,
|
||||
notification_client=self.notification_client,
|
||||
inventory_client=self.inventory_client,
|
||||
suppliers_client=self.suppliers_client,
|
||||
recipes_client=self.recipes_client,
|
||||
training_client=self.training_client,
|
||||
use_ai_enhancement=settings.ORCHESTRATION_USE_AI_INSIGHTS,
|
||||
ai_insights_base_url=settings.AI_INSIGHTS_SERVICE_URL,
|
||||
ai_insights_min_confidence=settings.AI_INSIGHTS_MIN_CONFIDENCE,
|
||||
# Pass circuit breakers to saga for fault tolerance
|
||||
forecast_breaker=self.forecast_breaker,
|
||||
production_breaker=self.production_breaker,
|
||||
procurement_breaker=self.procurement_breaker,
|
||||
inventory_breaker=self.inventory_breaker,
|
||||
suppliers_breaker=self.suppliers_breaker,
|
||||
recipes_breaker=self.recipes_breaker
|
||||
)
|
||||
|
||||
result = await saga.execute_orchestration(
|
||||
tenant_id=str(tenant_id),
|
||||
orchestration_run_id=str(run_id)
|
||||
)
|
||||
|
||||
if result['success']:
|
||||
# Update orchestration run with saga results
|
||||
await self._complete_orchestration_run_with_saga(
|
||||
run_id,
|
||||
result
|
||||
)
|
||||
|
||||
# Emit orchestration completed event
|
||||
await self.emit_orchestration_run_completed(
|
||||
tenant_id=tenant_id,
|
||||
run_id=str(run_id),
|
||||
duration_seconds=result.get('duration_seconds', 0),
|
||||
actions_created=result.get('total_actions', 0),
|
||||
actions_by_type=result.get('actions_by_type', {}),
|
||||
status='success'
|
||||
)
|
||||
|
||||
logger.info("Tenant orchestration completed successfully",
|
||||
tenant_id=str(tenant_id), run_id=str(run_id))
|
||||
return True
|
||||
else:
|
||||
# Saga failed (with compensation)
|
||||
await self._mark_orchestration_failed(
|
||||
run_id,
|
||||
result.get('error', 'Saga execution failed')
|
||||
)
|
||||
|
||||
# Emit orchestration failed event
|
||||
await self.emit_orchestration_run_completed(
|
||||
tenant_id=tenant_id,
|
||||
run_id=str(run_id),
|
||||
duration_seconds=result.get('duration_seconds', 0),
|
||||
actions_created=0,
|
||||
actions_by_type={},
|
||||
status='failed'
|
||||
)
|
||||
|
||||
return False
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.error("Tenant orchestration timeout",
|
||||
tenant_id=str(tenant_id),
|
||||
timeout_seconds=settings.TENANT_TIMEOUT_SECONDS)
|
||||
await self._mark_orchestration_failed(run_id, "Timeout exceeded")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Tenant orchestration failed",
|
||||
tenant_id=str(tenant_id),
|
||||
error=str(e), exc_info=True)
|
||||
await self._mark_orchestration_failed(run_id, str(e))
|
||||
return False
|
||||
|
||||
async def _get_active_tenants(self) -> List[uuid.UUID]:
|
||||
"""
|
||||
Get list of active tenants for orchestration
|
||||
|
||||
REAL IMPLEMENTATION (no stubs)
|
||||
"""
|
||||
try:
|
||||
logger.info("Fetching active tenants from Tenant Service")
|
||||
|
||||
# Call Tenant Service with circuit breaker
|
||||
tenants_data = await self.tenant_breaker.call(
|
||||
self.tenant_client.get_active_tenants
|
||||
)
|
||||
|
||||
if not tenants_data:
|
||||
logger.warning("Tenant Service returned no active tenants")
|
||||
return []
|
||||
|
||||
# Extract tenant IDs
|
||||
tenant_ids = []
|
||||
for tenant in tenants_data:
|
||||
tenant_id = tenant.get('id') or tenant.get('tenant_id')
|
||||
if tenant_id:
|
||||
# Convert string to UUID if needed
|
||||
if isinstance(tenant_id, str):
|
||||
tenant_id = uuid.UUID(tenant_id)
|
||||
tenant_ids.append(tenant_id)
|
||||
|
||||
logger.info(f"Found {len(tenant_ids)} active tenants for orchestration")
|
||||
|
||||
return tenant_ids
|
||||
|
||||
except CircuitBreakerOpenError:
|
||||
logger.error("Circuit breaker open for Tenant Service, skipping orchestration")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting active tenants", error=str(e), exc_info=True)
|
||||
return []
|
||||
|
||||
async def _complete_orchestration_run_with_saga(
|
||||
self,
|
||||
run_id: uuid.UUID,
|
||||
saga_result: Dict[str, Any]
|
||||
):
|
||||
"""
|
||||
Complete orchestration run with saga results
|
||||
|
||||
Args:
|
||||
run_id: Orchestration run ID
|
||||
saga_result: Result from saga execution
|
||||
"""
|
||||
async with self.config.database_manager.get_session() as session:
|
||||
repo = OrchestrationRunRepository(session)
|
||||
run = await repo.get_run_by_id(run_id)
|
||||
|
||||
if run:
|
||||
started_at = run.started_at
|
||||
completed_at = datetime.now(timezone.utc)
|
||||
duration = (completed_at - started_at).total_seconds()
|
||||
|
||||
# Extract results from saga
|
||||
forecast_id = saga_result.get('forecast_id')
|
||||
production_schedule_id = saga_result.get('production_schedule_id')
|
||||
procurement_plan_id = saga_result.get('procurement_plan_id')
|
||||
notifications_sent = saga_result.get('notifications_sent', 0)
|
||||
|
||||
# Get saga summary
|
||||
saga_summary = saga_result.get('saga_summary', {})
|
||||
total_steps = saga_summary.get('total_steps', 0)
|
||||
completed_steps = saga_summary.get('completed_steps', 0)
|
||||
|
||||
# Extract actual counts from saga result (no placeholders)
|
||||
forecast_data = saga_result.get('forecast_data', {})
|
||||
production_data = saga_result.get('production_data', {})
|
||||
procurement_data = saga_result.get('procurement_data', {})
|
||||
|
||||
forecasts_generated = forecast_data.get('forecasts_created', 0)
|
||||
production_batches_created = production_data.get('batches_created', 0)
|
||||
purchase_orders_created = procurement_data.get('pos_created', 0)
|
||||
|
||||
# Extract AI insights tracking
|
||||
ai_insights_generated = saga_result.get('ai_insights_generated', 0)
|
||||
ai_insights_posted = saga_result.get('ai_insights_posted', 0)
|
||||
ai_insights_errors = saga_result.get('ai_insights_errors', [])
|
||||
|
||||
# Generate reasoning metadata for the orchestrator context
|
||||
reasoning_metadata = self._generate_reasoning_metadata(
|
||||
forecast_data,
|
||||
production_data,
|
||||
procurement_data,
|
||||
ai_insights_generated,
|
||||
ai_insights_posted
|
||||
)
|
||||
|
||||
await repo.update_run(run_id, {
|
||||
'status': OrchestrationStatus.completed,
|
||||
'completed_at': completed_at,
|
||||
'duration_seconds': int(duration),
|
||||
'forecast_id': forecast_id,
|
||||
'forecasting_status': 'success',
|
||||
'forecasting_completed_at': completed_at,
|
||||
'forecasts_generated': forecasts_generated,
|
||||
'production_schedule_id': production_schedule_id,
|
||||
'production_status': 'success',
|
||||
'production_completed_at': completed_at,
|
||||
'production_batches_created': production_batches_created,
|
||||
'procurement_plan_id': procurement_plan_id,
|
||||
'procurement_status': 'success',
|
||||
'procurement_completed_at': completed_at,
|
||||
'procurement_plans_created': 1, # Always 1 plan per orchestration
|
||||
'purchase_orders_created': purchase_orders_created,
|
||||
'notification_status': 'success',
|
||||
'notification_completed_at': completed_at,
|
||||
'notifications_sent': notifications_sent,
|
||||
'ai_insights_status': 'success' if not ai_insights_errors else 'partial',
|
||||
'ai_insights_generated': ai_insights_generated,
|
||||
'ai_insights_posted': ai_insights_posted,
|
||||
'ai_insights_completed_at': completed_at,
|
||||
'saga_steps_total': total_steps,
|
||||
'saga_steps_completed': completed_steps,
|
||||
'run_metadata': reasoning_metadata
|
||||
})
|
||||
await session.commit()
|
||||
|
||||
def _generate_reasoning_metadata(
|
||||
self,
|
||||
forecast_data: Dict[str, Any],
|
||||
production_data: Dict[str, Any],
|
||||
procurement_data: Dict[str, Any],
|
||||
ai_insights_generated: int,
|
||||
ai_insights_posted: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate reasoning metadata for orchestration run that will be used by alert processor.
|
||||
|
||||
This creates structured reasoning data that the alert processor can use to provide
|
||||
context when showing AI reasoning to users.
|
||||
"""
|
||||
reasoning_metadata = {
|
||||
'reasoning': {
|
||||
'type': 'daily_orchestration_summary',
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'summary': 'Daily orchestration run completed successfully',
|
||||
'details': {}
|
||||
},
|
||||
'purchase_orders': [],
|
||||
'production_batches': [],
|
||||
'ai_insights': {
|
||||
'generated': ai_insights_generated,
|
||||
'posted': ai_insights_posted
|
||||
}
|
||||
}
|
||||
|
||||
# Add forecast reasoning
|
||||
if forecast_data:
|
||||
reasoning_metadata['reasoning']['details']['forecasting'] = {
|
||||
'forecasts_created': forecast_data.get('forecasts_created', 0),
|
||||
'method': 'automated_daily_forecast',
|
||||
'reasoning': 'Generated forecasts based on historical patterns and seasonal trends'
|
||||
}
|
||||
|
||||
# Add production reasoning
|
||||
if production_data:
|
||||
reasoning_metadata['reasoning']['details']['production'] = {
|
||||
'batches_created': production_data.get('batches_created', 0),
|
||||
'method': 'demand_based_scheduling',
|
||||
'reasoning': 'Scheduled production batches based on forecasted demand and inventory levels'
|
||||
}
|
||||
|
||||
# Add procurement reasoning
|
||||
if procurement_data:
|
||||
reasoning_metadata['reasoning']['details']['procurement'] = {
|
||||
'requirements_created': procurement_data.get('requirements_created', 0),
|
||||
'pos_created': procurement_data.get('pos_created', 0),
|
||||
'method': 'automated_procurement',
|
||||
'reasoning': 'Generated procurement plan based on production needs and inventory optimization'
|
||||
}
|
||||
|
||||
# Add purchase order details with reasoning
|
||||
if procurement_data and procurement_data.get('purchase_orders'):
|
||||
for po in procurement_data['purchase_orders']:
|
||||
po_reasoning = {
|
||||
'id': po.get('id'),
|
||||
'status': po.get('status', 'created'),
|
||||
'delivery_date': po.get('delivery_date'),
|
||||
'reasoning': {
|
||||
'type': 'inventory_optimization',
|
||||
'parameters': {
|
||||
'trigger': 'low_stock_prediction',
|
||||
'min_depletion_days': po.get('min_depletion_days', 3),
|
||||
'quantity': po.get('quantity'),
|
||||
'unit': po.get('unit'),
|
||||
'supplier': po.get('supplier_name'),
|
||||
'financial_impact_eur': po.get('estimated_savings_eur', 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
reasoning_metadata['purchase_orders'].append(po_reasoning)
|
||||
|
||||
# Add production batch details with reasoning
|
||||
if production_data and production_data.get('production_batches'):
|
||||
for batch in production_data['production_batches']:
|
||||
batch_reasoning = {
|
||||
'id': batch.get('id'),
|
||||
'status': batch.get('status', 'scheduled'),
|
||||
'scheduled_date': batch.get('scheduled_date'),
|
||||
'reasoning': {
|
||||
'type': 'demand_forecasting',
|
||||
'parameters': {
|
||||
'trigger': 'forecasted_demand',
|
||||
'forecasted_quantity': batch.get('forecasted_quantity'),
|
||||
'product_name': batch.get('product_name'),
|
||||
'financial_impact_eur': batch.get('estimated_revenue_eur', 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
reasoning_metadata['production_batches'].append(batch_reasoning)
|
||||
|
||||
return reasoning_metadata
|
||||
|
||||
async def _mark_orchestration_failed(self, run_id: uuid.UUID, error_message: str):
|
||||
"""Mark orchestration run as failed"""
|
||||
async with self.config.database_manager.get_session() as session:
|
||||
repo = OrchestrationRunRepository(session)
|
||||
run = await repo.get_run_by_id(run_id)
|
||||
|
||||
if run:
|
||||
started_at = run.started_at
|
||||
completed_at = datetime.now(timezone.utc)
|
||||
duration = (completed_at - started_at).total_seconds()
|
||||
|
||||
await repo.update_run(run_id, {
|
||||
'status': OrchestrationStatus.failed,
|
||||
'completed_at': completed_at,
|
||||
'duration_seconds': int(duration),
|
||||
'error_message': error_message
|
||||
})
|
||||
await session.commit()
|
||||
|
||||
# Manual trigger for testing
|
||||
async def trigger_orchestration_for_tenant(
|
||||
self,
|
||||
tenant_id: uuid.UUID,
|
||||
test_scenario: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Manually trigger orchestration for a tenant (for testing)
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant ID to orchestrate
|
||||
test_scenario: Optional test scenario (full, production_only, procurement_only)
|
||||
|
||||
Returns:
|
||||
Dict with orchestration results
|
||||
"""
|
||||
logger.info("Manual orchestration trigger",
|
||||
tenant_id=str(tenant_id),
|
||||
test_scenario=test_scenario)
|
||||
|
||||
success = await self._orchestrate_tenant(tenant_id)
|
||||
|
||||
return {
|
||||
'success': success,
|
||||
'tenant_id': str(tenant_id),
|
||||
'test_scenario': test_scenario,
|
||||
'message': 'Orchestration completed' if success else 'Orchestration failed'
|
||||
}
|
||||
|
||||
async def start(self):
|
||||
"""Start the orchestrator scheduler service"""
|
||||
if not settings.ORCHESTRATION_ENABLED:
|
||||
logger.info("Orchestration disabled via config")
|
||||
return
|
||||
|
||||
# Initialize APScheduler
|
||||
self.scheduler = AsyncIOScheduler()
|
||||
|
||||
# Add daily orchestration job
|
||||
self.scheduler.add_job(
|
||||
self.run_daily_orchestration,
|
||||
trigger=CronTrigger(
|
||||
hour=settings.ORCHESTRATION_HOUR,
|
||||
minute=settings.ORCHESTRATION_MINUTE
|
||||
),
|
||||
id='daily_orchestration',
|
||||
name='Daily Orchestration Workflow',
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
coalesce=True
|
||||
)
|
||||
|
||||
# Start the scheduler
|
||||
self.scheduler.start()
|
||||
|
||||
# Log next run time
|
||||
next_run = self.scheduler.get_job('daily_orchestration').next_run_time
|
||||
logger.info(
|
||||
"OrchestratorSchedulerService started with daily job",
|
||||
orchestration_hour=settings.ORCHESTRATION_HOUR,
|
||||
orchestration_minute=settings.ORCHESTRATION_MINUTE,
|
||||
next_run=next_run.isoformat() if next_run else None
|
||||
)
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the orchestrator scheduler service"""
|
||||
if self.scheduler and self.scheduler.running:
|
||||
self.scheduler.shutdown(wait=True)
|
||||
logger.info("OrchestratorSchedulerService stopped")
|
||||
else:
|
||||
logger.info("OrchestratorSchedulerService already stopped")
|
||||
|
||||
def get_circuit_breaker_stats(self) -> Dict[str, Any]:
|
||||
"""Get circuit breaker statistics for monitoring"""
|
||||
return {
|
||||
'forecast_service': self.forecast_breaker.get_stats(),
|
||||
'production_service': self.production_breaker.get_stats(),
|
||||
'procurement_service': self.procurement_breaker.get_stats(),
|
||||
'tenant_service': self.tenant_breaker.get_stats(),
|
||||
'inventory_service': self.inventory_breaker.get_stats(),
|
||||
'suppliers_service': self.suppliers_breaker.get_stats(),
|
||||
'recipes_service': self.recipes_breaker.get_stats()
|
||||
}
|
||||
265
services/orchestrator/app/utils/cache.py
Normal file
265
services/orchestrator/app/utils/cache.py
Normal file
@@ -0,0 +1,265 @@
|
||||
# services/orchestrator/app/utils/cache.py
|
||||
"""
|
||||
Redis caching utilities for dashboard endpoints
|
||||
"""
|
||||
|
||||
import json
|
||||
import redis.asyncio as redis
|
||||
from typing import Optional, Any, Callable
|
||||
from functools import wraps
|
||||
import structlog
|
||||
from app.core.config import settings
|
||||
from pydantic import BaseModel
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Redis client instance
|
||||
_redis_client: Optional[redis.Redis] = None
|
||||
|
||||
|
||||
async def get_redis_client() -> redis.Redis:
|
||||
"""Get or create Redis client"""
|
||||
global _redis_client
|
||||
|
||||
if _redis_client is None:
|
||||
try:
|
||||
# Check if TLS is enabled - convert string to boolean properly
|
||||
redis_tls_str = str(getattr(settings, 'REDIS_TLS_ENABLED', 'false')).lower()
|
||||
redis_tls_enabled = redis_tls_str in ('true', '1', 'yes', 'on')
|
||||
|
||||
connection_kwargs = {
|
||||
'host': str(getattr(settings, 'REDIS_HOST', 'localhost')),
|
||||
'port': int(getattr(settings, 'REDIS_PORT', 6379)),
|
||||
'db': int(getattr(settings, 'REDIS_DB', 0)),
|
||||
'decode_responses': True,
|
||||
'socket_connect_timeout': 5,
|
||||
'socket_timeout': 5
|
||||
}
|
||||
|
||||
# Add password if configured
|
||||
redis_password = getattr(settings, 'REDIS_PASSWORD', None)
|
||||
if redis_password:
|
||||
connection_kwargs['password'] = redis_password
|
||||
|
||||
# Add SSL/TLS support if enabled
|
||||
if redis_tls_enabled:
|
||||
import ssl
|
||||
connection_kwargs['ssl'] = True
|
||||
connection_kwargs['ssl_cert_reqs'] = ssl.CERT_NONE
|
||||
logger.debug(f"Redis TLS enabled - connecting with SSL to {connection_kwargs['host']}:{connection_kwargs['port']}")
|
||||
|
||||
_redis_client = redis.Redis(**connection_kwargs)
|
||||
|
||||
# Test connection
|
||||
await _redis_client.ping()
|
||||
logger.info(f"Redis client connected successfully (TLS: {redis_tls_enabled})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to connect to Redis: {e}. Caching will be disabled.")
|
||||
_redis_client = None
|
||||
|
||||
return _redis_client
|
||||
|
||||
|
||||
async def close_redis():
|
||||
"""Close Redis connection"""
|
||||
global _redis_client
|
||||
if _redis_client:
|
||||
await _redis_client.close()
|
||||
_redis_client = None
|
||||
logger.info("Redis connection closed")
|
||||
|
||||
|
||||
async def get_cached(key: str) -> Optional[Any]:
|
||||
"""
|
||||
Get cached value by key
|
||||
|
||||
Args:
|
||||
key: Cache key
|
||||
|
||||
Returns:
|
||||
Cached value (deserialized from JSON) or None if not found or error
|
||||
"""
|
||||
try:
|
||||
client = await get_redis_client()
|
||||
if not client:
|
||||
return None
|
||||
|
||||
cached = await client.get(key)
|
||||
if cached:
|
||||
logger.debug(f"Cache hit: {key}")
|
||||
return json.loads(cached)
|
||||
else:
|
||||
logger.debug(f"Cache miss: {key}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache get error for key {key}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _serialize_value(value: Any) -> Any:
|
||||
"""
|
||||
Recursively serialize values for JSON storage, handling Pydantic models properly.
|
||||
|
||||
Args:
|
||||
value: Value to serialize
|
||||
|
||||
Returns:
|
||||
JSON-serializable value
|
||||
"""
|
||||
if isinstance(value, BaseModel):
|
||||
# Convert Pydantic model to dictionary
|
||||
return value.model_dump()
|
||||
elif isinstance(value, (list, tuple)):
|
||||
# Recursively serialize list/tuple elements
|
||||
return [_serialize_value(item) for item in value]
|
||||
elif isinstance(value, dict):
|
||||
# Recursively serialize dictionary values
|
||||
return {key: _serialize_value(val) for key, val in value.items()}
|
||||
else:
|
||||
# For other types, use default serialization
|
||||
return value
|
||||
|
||||
|
||||
async def set_cached(key: str, value: Any, ttl: int = 60) -> bool:
|
||||
"""
|
||||
Set cached value with TTL
|
||||
|
||||
Args:
|
||||
key: Cache key
|
||||
value: Value to cache (will be JSON serialized)
|
||||
ttl: Time to live in seconds
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
client = await get_redis_client()
|
||||
if not client:
|
||||
return False
|
||||
|
||||
# Serialize value properly before JSON encoding
|
||||
serialized_value = _serialize_value(value)
|
||||
serialized = json.dumps(serialized_value)
|
||||
await client.setex(key, ttl, serialized)
|
||||
logger.debug(f"Cache set: {key} (TTL: {ttl}s)")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache set error for key {key}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def delete_cached(key: str) -> bool:
|
||||
"""
|
||||
Delete cached value
|
||||
|
||||
Args:
|
||||
key: Cache key
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
client = await get_redis_client()
|
||||
if not client:
|
||||
return False
|
||||
|
||||
await client.delete(key)
|
||||
logger.debug(f"Cache deleted: {key}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache delete error for key {key}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def delete_pattern(pattern: str) -> int:
|
||||
"""
|
||||
Delete all keys matching pattern
|
||||
|
||||
Args:
|
||||
pattern: Redis key pattern (e.g., "dashboard:*")
|
||||
|
||||
Returns:
|
||||
Number of keys deleted
|
||||
"""
|
||||
try:
|
||||
client = await get_redis_client()
|
||||
if not client:
|
||||
return 0
|
||||
|
||||
keys = []
|
||||
async for key in client.scan_iter(match=pattern):
|
||||
keys.append(key)
|
||||
|
||||
if keys:
|
||||
deleted = await client.delete(*keys)
|
||||
logger.info(f"Deleted {deleted} keys matching pattern: {pattern}")
|
||||
return deleted
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.warning(f"Cache delete pattern error for {pattern}: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def cache_response(key_prefix: str, ttl: int = 60):
|
||||
"""
|
||||
Decorator to cache endpoint responses
|
||||
|
||||
Args:
|
||||
key_prefix: Prefix for cache key (will be combined with tenant_id)
|
||||
ttl: Time to live in seconds
|
||||
|
||||
Usage:
|
||||
@cache_response("dashboard:health", ttl=30)
|
||||
async def get_health(tenant_id: str):
|
||||
...
|
||||
"""
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
# Extract tenant_id from kwargs or args
|
||||
tenant_id = kwargs.get('tenant_id')
|
||||
if not tenant_id and args:
|
||||
# Try to find tenant_id in args (assuming it's the first argument)
|
||||
tenant_id = args[0] if len(args) > 0 else None
|
||||
|
||||
if not tenant_id:
|
||||
# No tenant_id, skip caching
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
# Build cache key
|
||||
cache_key = f"{key_prefix}:{tenant_id}"
|
||||
|
||||
# Try to get from cache
|
||||
cached_value = await get_cached(cache_key)
|
||||
if cached_value is not None:
|
||||
return cached_value
|
||||
|
||||
# Execute function
|
||||
result = await func(*args, **kwargs)
|
||||
|
||||
# Cache result
|
||||
await set_cached(cache_key, result, ttl)
|
||||
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
def make_cache_key(prefix: str, tenant_id: str, **params) -> str:
|
||||
"""
|
||||
Create a cache key with optional parameters
|
||||
|
||||
Args:
|
||||
prefix: Key prefix
|
||||
tenant_id: Tenant ID
|
||||
**params: Additional parameters to include in key
|
||||
|
||||
Returns:
|
||||
Cache key string
|
||||
"""
|
||||
key_parts = [prefix, tenant_id]
|
||||
for k, v in sorted(params.items()):
|
||||
if v is not None:
|
||||
key_parts.append(f"{k}:{v}")
|
||||
return ":".join(key_parts)
|
||||
Reference in New Issue
Block a user