Initial commit - production deployment

This commit is contained in:
2026-01-21 17:17:16 +01:00
commit c23d00dd92
2289 changed files with 638440 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
from .orchestration import router as orchestration_router
from .internal_demo import router as internal_demo_router
__all__ = ["orchestration_router", "internal_demo_router"]

View File

@@ -0,0 +1,177 @@
"""
Internal API for Alert Intelligence Service
Provides orchestrator context for alert enrichment
"""
from fastapi import APIRouter, Header, HTTPException, Query
from typing import Optional, List, Dict, Any
from datetime import datetime, timedelta
from uuid import UUID
from pydantic import BaseModel
router = APIRouter(prefix="/api/internal", tags=["internal"])
class OrchestrationAction(BaseModel):
"""Recent orchestration action"""
id: str
type: str # purchase_order, production_batch
status: str # created, pending_approval, approved, completed
delivery_date: Optional[datetime]
reasoning: Optional[Dict[str, Any]]
estimated_resolution: Optional[datetime]
created_at: datetime
class RecentActionsResponse(BaseModel):
"""Response with recent orchestrator actions"""
actions: List[OrchestrationAction]
count: int
@router.get("/recent-actions", response_model=RecentActionsResponse)
async def get_recent_actions(
tenant_id: str = Query(..., description="Tenant ID"),
ingredient_id: Optional[str] = Query(None, description="Filter by ingredient"),
product_id: Optional[str] = Query(None, description="Filter by product"),
hours_ago: int = Query(24, description="Look back hours"),
):
"""
Get recent orchestrator actions for alert context enrichment.
Only accessible by internal services (alert-intelligence).
Returns orchestration runs with details about POs created, batches adjusted, etc.
This helps the alert system understand if AI already addressed similar issues.
"""
from shared.database.base import create_database_manager
from ..core.config import get_settings
from ..models.orchestration_run import OrchestrationRun, OrchestrationStatus
from sqlalchemy import select, and_, desc
import structlog
logger = structlog.get_logger()
try:
settings = get_settings()
db_manager = create_database_manager(settings.DATABASE_URL, "orchestrator")
async with db_manager.get_session() as session:
cutoff_time = datetime.utcnow() - timedelta(hours=hours_ago)
# Query recent orchestration runs
query = select(OrchestrationRun).where(
and_(
OrchestrationRun.tenant_id == UUID(tenant_id),
OrchestrationRun.created_at >= cutoff_time,
OrchestrationRun.status.in_([
OrchestrationStatus.completed,
OrchestrationStatus.partial_success
])
)
).order_by(desc(OrchestrationRun.created_at))
result = await session.execute(query)
runs = result.scalars().all()
actions = []
for run in runs:
run_metadata = run.run_metadata or {}
# Add purchase order actions
if run.purchase_orders_created > 0:
po_details = run_metadata.get('purchase_orders', [])
# If metadata has PO details, use them
if po_details:
for po in po_details:
# Filter by ingredient if specified
if ingredient_id:
po_items = po.get('items', [])
has_ingredient = any(
item.get('ingredient_id') == ingredient_id
for item in po_items
)
if not has_ingredient:
continue
actions.append(OrchestrationAction(
id=po.get('id', str(run.id)),
type="purchase_order",
status=po.get('status', 'created'),
delivery_date=po.get('delivery_date'),
reasoning=run_metadata.get('reasoning'),
estimated_resolution=po.get('delivery_date'),
created_at=run.created_at
))
else:
# Fallback: create generic action from run
actions.append(OrchestrationAction(
id=str(run.id),
type="purchase_order",
status="created",
delivery_date=None,
reasoning=run_metadata.get('reasoning'),
estimated_resolution=None,
created_at=run.created_at
))
# Add production batch actions
if run.production_batches_created > 0:
batch_details = run_metadata.get('production_batches', [])
if batch_details:
for batch in batch_details:
# Filter by product if specified
if product_id and batch.get('product_id') != product_id:
continue
actions.append(OrchestrationAction(
id=batch.get('id', str(run.id)),
type="production_batch",
status=batch.get('status', 'created'),
delivery_date=None,
reasoning=run_metadata.get('reasoning'),
estimated_resolution=batch.get('scheduled_date'),
created_at=run.created_at
))
else:
# Fallback: create generic action from run
if not product_id: # Only add if no product filter
actions.append(OrchestrationAction(
id=str(run.id),
type="production_batch",
status="created",
delivery_date=None,
reasoning=run_metadata.get('reasoning'),
estimated_resolution=None,
created_at=run.created_at
))
logger.info(
"recent_actions_fetched",
tenant_id=tenant_id,
hours_ago=hours_ago,
action_count=len(actions),
ingredient_id=ingredient_id,
product_id=product_id
)
return RecentActionsResponse(
actions=actions,
count=len(actions)
)
except Exception as e:
logger.error("error_fetching_recent_actions", error=str(e), tenant_id=tenant_id)
raise HTTPException(
status_code=500,
detail=f"Failed to fetch recent actions: {str(e)}"
)
@router.get("/health")
async def internal_health():
"""Internal health check"""
return {"status": "healthy", "api": "internal"}

View File

@@ -0,0 +1,277 @@
"""
Internal Demo API Endpoints for Orchestrator Service
Used by demo_session service to clone data for virtual demo tenants
"""
from fastapi import APIRouter, Depends, HTTPException, Header
from typing import Dict, Any
from uuid import UUID
import structlog
import os
import json
from app.core.database import get_db
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, delete, func
from app.models.orchestration_run import OrchestrationRun, OrchestrationStatus
import uuid
from datetime import datetime, timezone, timedelta
from typing import Optional
import sys
from pathlib import Path
# Add shared utilities to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.utils.demo_dates import adjust_date_for_demo
from app.core.config import settings
router = APIRouter(prefix="/internal/demo", tags=["internal"])
logger = structlog.get_logger()
async def ensure_unique_run_number(db: AsyncSession, base_run_number: str) -> str:
"""Ensure the run number is unique by appending a suffix if needed"""
proposed_run_number = base_run_number
# Check if the proposed run number already exists in the database
while True:
result = await db.execute(
select(OrchestrationRun)
.where(OrchestrationRun.run_number == proposed_run_number)
)
existing_run = result.scalar_one_or_none()
if not existing_run:
# Run number is unique, return it
return proposed_run_number
# Generate a new run number with an additional random suffix
random_suffix = str(uuid.uuid4())[:4].upper()
proposed_run_number = f"{base_run_number[:50-len(random_suffix)-1]}-{random_suffix}"
async def load_fixture_data_for_tenant(
db: AsyncSession,
tenant_uuid: UUID,
demo_account_type: str,
reference_time: datetime,
base_tenant_id: Optional[str] = None
) -> int:
"""
Load orchestration run data from JSON fixture directly into the virtual tenant.
Returns the number of runs created.
"""
from shared.utils.seed_data_paths import get_seed_data_path
from shared.utils.demo_dates import resolve_time_marker, adjust_date_for_demo
# Load fixture data
if demo_account_type == "enterprise_child" and base_tenant_id:
json_file = get_seed_data_path("enterprise", "11-orchestrator.json", child_id=base_tenant_id)
else:
json_file = get_seed_data_path(demo_account_type, "11-orchestrator.json")
with open(json_file, 'r', encoding='utf-8') as f:
fixture_data = json.load(f)
orchestration_run_data = fixture_data.get("orchestration_run")
if not orchestration_run_data:
logger.warning("No orchestration_run data in fixture")
return 0
# Parse and adjust dates from fixture to reference_time
base_started_at = resolve_time_marker(orchestration_run_data.get("started_at"), reference_time)
base_completed_at = resolve_time_marker(orchestration_run_data.get("completed_at"), reference_time)
# Adjust dates to make them appear recent relative to session creation
started_at = adjust_date_for_demo(base_started_at, reference_time) if base_started_at else reference_time - timedelta(hours=2)
completed_at = adjust_date_for_demo(base_completed_at, reference_time) if base_completed_at else started_at + timedelta(minutes=15)
# Generate unique run number with session context
current_year = reference_time.year
unique_suffix = str(uuid.uuid4())[:8].upper()
run_number = f"ORCH-DEMO-PROF-{current_year}-001-{unique_suffix}"
# Create orchestration run for virtual tenant
new_run = OrchestrationRun(
id=uuid.uuid4(), # Generate new UUID
tenant_id=tenant_uuid,
run_number=run_number,
status=OrchestrationStatus[orchestration_run_data["status"]],
run_type=orchestration_run_data.get("run_type", "daily"),
priority="normal",
started_at=started_at,
completed_at=completed_at,
duration_seconds=orchestration_run_data.get("duration_seconds", 900),
# Step statuses from orchestration_results
forecasting_status="success",
forecasting_started_at=started_at,
forecasting_completed_at=started_at + timedelta(minutes=2),
production_status="success",
production_started_at=started_at + timedelta(minutes=2),
production_completed_at=started_at + timedelta(minutes=5),
procurement_status="success",
procurement_started_at=started_at + timedelta(minutes=5),
procurement_completed_at=started_at + timedelta(minutes=8),
notification_status="success",
notification_started_at=started_at + timedelta(minutes=8),
notification_completed_at=completed_at,
# Results from orchestration_results
forecasts_generated=fixture_data.get("orchestration_results", {}).get("forecasts_generated", 10),
production_batches_created=fixture_data.get("orchestration_results", {}).get("production_batches_created", 18),
procurement_plans_created=0,
purchase_orders_created=fixture_data.get("orchestration_results", {}).get("purchase_orders_created", 6),
notifications_sent=fixture_data.get("orchestration_results", {}).get("notifications_sent", 8),
# Metadata
triggered_by="system",
created_at=started_at,
updated_at=completed_at
)
db.add(new_run)
await db.flush()
logger.info(
"Loaded orchestration run from fixture",
tenant_id=str(tenant_uuid),
run_number=new_run.run_number,
started_at=started_at.isoformat()
)
return 1
@router.post("/clone")
async def clone_demo_data(
base_tenant_id: str,
virtual_tenant_id: str,
demo_account_type: str,
session_id: Optional[str] = None,
session_created_at: Optional[str] = None,
db: AsyncSession = Depends(get_db)
):
"""
Clone orchestration run demo data from base tenant to virtual tenant
This endpoint is called by the demo_session service during session initialization.
It clones orchestration runs with date adjustments to make them appear recent.
If the base tenant has no orchestration runs, it will first seed them from the fixture.
"""
start_time = datetime.now(timezone.utc)
# Parse session_created_at or use current time
if session_created_at:
try:
reference_time = datetime.fromisoformat(session_created_at.replace('Z', '+00:00'))
except:
reference_time = datetime.now(timezone.utc)
else:
reference_time = datetime.now(timezone.utc)
logger.info(
"Starting orchestration runs cloning with date adjustment",
base_tenant_id=base_tenant_id,
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
session_id=session_id,
reference_time=reference_time.isoformat()
)
try:
virtual_uuid = uuid.UUID(virtual_tenant_id)
# Load fixture data directly into virtual tenant (no base tenant cloning)
runs_created = await load_fixture_data_for_tenant(
db,
virtual_uuid,
demo_account_type,
reference_time,
base_tenant_id
)
await db.commit()
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.info(
"Orchestration runs loaded from fixture successfully",
virtual_tenant_id=str(virtual_tenant_id),
runs_created=runs_created,
duration_ms=duration_ms
)
return {
"service": "orchestrator",
"status": "completed",
"success": True,
"records_cloned": runs_created,
"runs_cloned": runs_created,
"duration_ms": duration_ms
}
except Exception as e:
logger.error("Failed to clone orchestration runs", error=str(e), exc_info=True)
await db.rollback()
raise HTTPException(status_code=500, detail=f"Failed to clone orchestration runs: {str(e)}")
@router.delete("/tenant/{virtual_tenant_id}")
async def delete_demo_data(
virtual_tenant_id: str,
db: AsyncSession = Depends(get_db)
):
"""Delete all orchestration runs for a virtual demo tenant"""
logger.info("Deleting orchestration runs for virtual tenant", virtual_tenant_id=virtual_tenant_id)
start_time = datetime.now(timezone.utc)
try:
virtual_uuid = uuid.UUID(virtual_tenant_id)
# Count records
run_count = await db.scalar(
select(func.count(OrchestrationRun.id))
.where(OrchestrationRun.tenant_id == virtual_uuid)
)
# Delete orchestration runs
await db.execute(
delete(OrchestrationRun)
.where(OrchestrationRun.tenant_id == virtual_uuid)
)
await db.commit()
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.info(
"Orchestration runs deleted successfully",
virtual_tenant_id=virtual_tenant_id,
duration_ms=duration_ms
)
return {
"service": "orchestrator",
"status": "deleted",
"virtual_tenant_id": virtual_tenant_id,
"records_deleted": {
"orchestration_runs": run_count,
"total": run_count
},
"duration_ms": duration_ms
}
except Exception as e:
logger.error("Failed to delete orchestration runs", error=str(e), exc_info=True)
await db.rollback()
raise HTTPException(status_code=500, detail=str(e))
@router.get("/clone/health")
async def health_check():
"""Health check for demo cloning endpoint"""
return {"status": "healthy", "service": "orchestrator"}

View File

@@ -0,0 +1,346 @@
# ================================================================
# services/orchestrator/app/api/orchestration.py
# ================================================================
"""
Orchestration API Endpoints
Testing and manual trigger endpoints for orchestration
"""
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Request
from pydantic import BaseModel, Field
import structlog
from app.core.database import get_db
from app.repositories.orchestration_run_repository import OrchestrationRunRepository
from sqlalchemy.ext.asyncio import AsyncSession
logger = structlog.get_logger()
router = APIRouter(prefix="/api/v1/tenants/{tenant_id}/orchestrator", tags=["Orchestration"])
# ================================================================
# REQUEST/RESPONSE SCHEMAS
# ================================================================
class OrchestratorTestRequest(BaseModel):
"""Request schema for testing orchestrator"""
test_scenario: Optional[str] = Field(None, description="Test scenario: full, production_only, procurement_only")
dry_run: bool = Field(False, description="Dry run mode (no actual changes)")
class OrchestratorTestResponse(BaseModel):
"""Response schema for orchestrator test"""
success: bool
message: str
tenant_id: str
forecasting_completed: bool = False
production_completed: bool = False
procurement_completed: bool = False
notifications_sent: bool = False
summary: dict = {}
class OrchestratorWorkflowRequest(BaseModel):
"""Request schema for daily workflow trigger"""
dry_run: bool = Field(False, description="Dry run mode (no actual changes)")
class OrchestratorWorkflowResponse(BaseModel):
"""Response schema for daily workflow trigger"""
success: bool
message: str
tenant_id: str
run_id: Optional[str] = None
forecasting_completed: bool = False
production_completed: bool = False
procurement_completed: bool = False
notifications_sent: bool = False
summary: dict = {}
# ================================================================
# API ENDPOINTS
# ================================================================
@router.post("/test", response_model=OrchestratorTestResponse)
async def trigger_orchestrator_test(
tenant_id: str,
request_data: OrchestratorTestRequest,
request: Request,
db: AsyncSession = Depends(get_db)
):
"""
Trigger orchestrator for testing purposes
This endpoint allows manual triggering of the orchestration workflow
for a specific tenant, useful for testing during development.
Args:
tenant_id: Tenant ID to orchestrate
request_data: Test request with scenario and dry_run options
request: FastAPI request object
db: Database session
Returns:
OrchestratorTestResponse with results
"""
logger.info("Orchestrator test trigger requested",
tenant_id=tenant_id,
test_scenario=request_data.test_scenario,
dry_run=request_data.dry_run)
try:
# Get scheduler service from app state
if not hasattr(request.app.state, 'scheduler_service'):
raise HTTPException(
status_code=503,
detail="Orchestrator scheduler service not available"
)
scheduler_service = request.app.state.scheduler_service
# Trigger orchestration
tenant_uuid = uuid.UUID(tenant_id)
result = await scheduler_service.trigger_orchestration_for_tenant(
tenant_id=tenant_uuid,
test_scenario=request_data.test_scenario
)
# Get the latest run for this tenant
repo = OrchestrationRunRepository(db)
latest_run = await repo.get_latest_run_for_tenant(tenant_uuid)
# Build response
response = OrchestratorTestResponse(
success=result.get('success', False),
message=result.get('message', 'Orchestration completed'),
tenant_id=tenant_id,
forecasting_completed=latest_run.forecasting_status == 'success' if latest_run else False,
production_completed=latest_run.production_status == 'success' if latest_run else False,
procurement_completed=latest_run.procurement_status == 'success' if latest_run else False,
notifications_sent=latest_run.notification_status == 'success' if latest_run else False,
summary={
'forecasts_generated': latest_run.forecasts_generated if latest_run else 0,
'batches_created': latest_run.production_batches_created if latest_run else 0,
'pos_created': latest_run.purchase_orders_created if latest_run else 0,
'notifications_sent': latest_run.notifications_sent if latest_run else 0
}
)
logger.info("Orchestrator test completed",
tenant_id=tenant_id,
success=response.success)
return response
except ValueError as e:
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
except Exception as e:
logger.error("Orchestrator test failed",
tenant_id=tenant_id,
error=str(e),
exc_info=True)
raise HTTPException(status_code=500, detail=f"Orchestrator test failed: {str(e)}")
@router.post("/run-daily-workflow", response_model=OrchestratorWorkflowResponse)
async def run_daily_workflow(
tenant_id: str,
request_data: Optional[OrchestratorWorkflowRequest] = None,
request: Request = None,
db: AsyncSession = Depends(get_db)
):
"""
Trigger the daily orchestrated workflow for a tenant
This endpoint runs the complete daily workflow which includes:
1. Forecasting Service: Generate demand forecasts
2. Production Service: Create production schedule from forecasts
3. Procurement Service: Generate procurement plan
4. Notification Service: Send relevant notifications
This is the production endpoint used by the dashboard scheduler button.
Args:
tenant_id: Tenant ID to orchestrate
request_data: Optional request data with dry_run flag
request: FastAPI request object
db: Database session
Returns:
OrchestratorWorkflowResponse with workflow execution results
"""
logger.info("Daily workflow trigger requested", tenant_id=tenant_id)
# Handle optional request_data
if request_data is None:
request_data = OrchestratorWorkflowRequest()
try:
# Get scheduler service from app state
if not hasattr(request.app.state, 'scheduler_service'):
raise HTTPException(
status_code=503,
detail="Orchestrator scheduler service not available"
)
scheduler_service = request.app.state.scheduler_service
# Trigger orchestration (use full workflow, not test scenario)
tenant_uuid = uuid.UUID(tenant_id)
result = await scheduler_service.trigger_orchestration_for_tenant(
tenant_id=tenant_uuid,
test_scenario=None # Full production workflow
)
# Get the latest run for this tenant
repo = OrchestrationRunRepository(db)
latest_run = await repo.get_latest_run_for_tenant(tenant_uuid)
# Build response
response = OrchestratorWorkflowResponse(
success=result.get('success', False),
message=result.get('message', 'Daily workflow completed successfully'),
tenant_id=tenant_id,
run_id=str(latest_run.id) if latest_run else None,
forecasting_completed=latest_run.forecasting_status == 'success' if latest_run else False,
production_completed=latest_run.production_status == 'success' if latest_run else False,
procurement_completed=latest_run.procurement_status == 'success' if latest_run else False,
notifications_sent=latest_run.notification_status == 'success' if latest_run else False,
summary={
'run_number': latest_run.run_number if latest_run else 0,
'forecasts_generated': latest_run.forecasts_generated if latest_run else 0,
'production_batches_created': latest_run.production_batches_created if latest_run else 0,
'purchase_orders_created': latest_run.purchase_orders_created if latest_run else 0,
'notifications_sent': latest_run.notifications_sent if latest_run else 0,
'duration_seconds': latest_run.duration_seconds if latest_run else 0
}
)
logger.info("Daily workflow completed",
tenant_id=tenant_id,
success=response.success,
run_id=response.run_id)
return response
except ValueError as e:
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
except Exception as e:
logger.error("Daily workflow failed",
tenant_id=tenant_id,
error=str(e),
exc_info=True)
raise HTTPException(status_code=500, detail=f"Daily workflow failed: {str(e)}")
@router.get("/health")
async def orchestrator_health():
"""Check orchestrator health"""
return {
"status": "healthy",
"service": "orchestrator",
"message": "Orchestrator service is running"
}
@router.get("/runs", response_model=dict)
async def list_orchestration_runs(
tenant_id: str,
limit: int = 10,
offset: int = 0,
db: AsyncSession = Depends(get_db)
):
"""
List orchestration runs for a tenant
Args:
tenant_id: Tenant ID
limit: Maximum number of runs to return
offset: Number of runs to skip
db: Database session
Returns:
List of orchestration runs
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
repo = OrchestrationRunRepository(db)
runs = await repo.list_runs(
tenant_id=tenant_uuid,
limit=limit,
offset=offset
)
return {
"runs": [
{
"id": str(run.id),
"run_number": run.run_number,
"status": run.status.value,
"started_at": run.started_at.isoformat() if run.started_at else None,
"completed_at": run.completed_at.isoformat() if run.completed_at else None,
"duration_seconds": run.duration_seconds,
"forecasts_generated": run.forecasts_generated,
"batches_created": run.production_batches_created,
"pos_created": run.purchase_orders_created
}
for run in runs
],
"total": len(runs),
"limit": limit,
"offset": offset
}
except ValueError as e:
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
except Exception as e:
logger.error("Error listing orchestration runs",
tenant_id=tenant_id,
error=str(e))
raise HTTPException(status_code=500, detail=str(e))
@router.get("/last-run")
async def get_last_orchestration_run(
tenant_id: str,
db: AsyncSession = Depends(get_db)
):
"""
Get timestamp of last orchestration run
Lightweight endpoint for health status frontend migration (Phase 4).
Returns only timestamp and run number for the most recent completed run.
Args:
tenant_id: Tenant ID
Returns:
Dict with timestamp and runNumber (or None if no runs)
"""
try:
tenant_uuid = uuid.UUID(tenant_id)
repo = OrchestrationRunRepository(db)
# Get most recent completed run
latest_run = await repo.get_latest_run_for_tenant(tenant_uuid)
if not latest_run:
return {"timestamp": None, "runNumber": None}
return {
"timestamp": latest_run.started_at.isoformat() if latest_run.started_at else None,
"runNumber": latest_run.run_number
}
except ValueError as e:
raise HTTPException(status_code=400, detail=f"Invalid tenant ID: {str(e)}")
except Exception as e:
logger.error("Error getting last orchestration run",
tenant_id=tenant_id,
error=str(e))
raise HTTPException(status_code=500, detail=str(e))