New alert system and panel de control page

This commit is contained in:
Urtzi Alfaro
2025-11-27 15:52:40 +01:00
parent 1a2f4602f3
commit e902419b6e
178 changed files with 20982 additions and 6944 deletions

View File

@@ -55,23 +55,26 @@ class HeadlineData(BaseModel):
class HealthChecklistItem(BaseModel):
"""Individual item in health checklist"""
icon: str = Field(..., description="Icon name: check, warning, alert")
"""Individual item in tri-state health checklist"""
icon: str = Field(..., description="Icon name: check, warning, alert, ai_handled")
text: Optional[str] = Field(None, description="Deprecated: Use textKey instead")
textKey: Optional[str] = Field(None, description="i18n translation key")
textParams: Optional[Dict[str, Any]] = Field(None, description="Parameters for i18n translation")
actionRequired: bool = Field(..., description="Whether action is required")
status: str = Field(..., description="Tri-state status: good, ai_handled, needs_you")
actionPath: Optional[str] = Field(None, description="Path to navigate for action")
class BakeryHealthStatusResponse(BaseModel):
"""Overall bakery health status"""
"""Overall bakery health status with tri-state checklist"""
status: str = Field(..., description="Health status: green, yellow, red")
headline: HeadlineData = Field(..., description="i18n-ready status headline")
lastOrchestrationRun: Optional[str] = Field(None, description="ISO timestamp of last orchestration")
nextScheduledRun: str = Field(..., description="ISO timestamp of next scheduled run")
checklistItems: List[HealthChecklistItem] = Field(..., description="Status checklist")
checklistItems: List[HealthChecklistItem] = Field(..., description="Tri-state status checklist")
criticalIssues: int = Field(..., description="Count of critical issues")
pendingActions: int = Field(..., description="Count of pending actions")
aiPreventedIssues: int = Field(0, description="Count of issues AI prevented")
class ReasoningInputs(BaseModel):
@@ -207,10 +210,10 @@ async def get_bakery_health_status(
db: AsyncSession = Depends(get_db)
) -> BakeryHealthStatusResponse:
"""
Get overall bakery health status
Get overall bakery health status with tri-state checklist
This is the top-level indicator showing if the bakery is running smoothly
or if there are issues requiring attention.
or if there are issues requiring attention. Includes AI-prevented issues.
"""
try:
# Try to get from cache
@@ -227,11 +230,19 @@ async def get_bakery_health_status(
async def fetch_alerts():
try:
alerts_data = await alerts_client.get_alerts_summary(tenant_id) or {}
return alerts_data.get("critical_count", 0)
alerts_data = await alerts_client.get_alerts(tenant_id, limit=100) or {}
alerts_list = alerts_data.get("alerts", [])
# Count critical alerts
critical_count = sum(1 for a in alerts_list if a.get('priority_level') == 'CRITICAL')
# Count AI prevented issues
prevented_count = sum(1 for a in alerts_list if a.get('type_class') == 'prevented_issue')
return critical_count, prevented_count, alerts_list
except Exception as e:
logger.warning(f"Failed to fetch alerts: {e}")
return 0
return 0, 0, []
async def fetch_pending_pos():
try:
@@ -260,24 +271,28 @@ async def get_bakery_health_status(
return 0
# Execute all fetches in parallel
critical_alerts, pending_approvals, production_delays, out_of_stock_count = await asyncio.gather(
alerts_result, pending_approvals, production_delays, out_of_stock_count = await asyncio.gather(
fetch_alerts(),
fetch_pending_pos(),
fetch_production_delays(),
fetch_inventory()
)
critical_alerts, ai_prevented_count, all_alerts = alerts_result
# System errors (would come from monitoring system)
system_errors = 0
# Calculate health status
# Calculate health status with tri-state checklist
health_status = await dashboard_service.get_bakery_health_status(
tenant_id=tenant_id,
critical_alerts=critical_alerts,
pending_approvals=pending_approvals,
production_delays=production_delays,
out_of_stock_count=out_of_stock_count,
system_errors=system_errors
system_errors=system_errors,
ai_prevented_count=ai_prevented_count,
action_needed_alerts=all_alerts
)
# Cache the result
@@ -501,6 +516,116 @@ async def get_production_timeline(
raise HTTPException(status_code=500, detail=str(e))
@router.get("/unified-action-queue")
async def get_unified_action_queue(
tenant_id: str,
db: AsyncSession = Depends(get_db)
) -> Dict[str, Any]:
"""
Get unified action queue with time-based grouping
Combines all alerts (PO approvals, delivery tracking, production, etc.)
into URGENT (<6h), TODAY (<24h), and THIS WEEK (<7d) sections.
"""
try:
dashboard_service = DashboardService(db)
# Fetch all alerts from alert processor
alerts_data = await alerts_client.get_alerts(tenant_id, limit=100) or {}
alerts = alerts_data.get("alerts", [])
# Build unified queue
action_queue = await dashboard_service.get_unified_action_queue(
tenant_id=tenant_id,
alerts=alerts
)
return action_queue
except Exception as e:
logger.error(f"Error getting unified action queue: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/execution-progress")
async def get_execution_progress(
tenant_id: str,
db: AsyncSession = Depends(get_db)
) -> Dict[str, Any]:
"""
Get execution progress for today's plan
Shows plan vs actual for production batches, deliveries, and approvals
"""
try:
dashboard_service = DashboardService(db)
# Fetch today's data in parallel
async def fetch_todays_batches():
try:
batch_data = await production_client.get_todays_batches(tenant_id)
if batch_data:
return batch_data.get("batches", [])
return []
except Exception as e:
logger.warning(f"Failed to fetch today's batches: {e}")
return []
async def fetch_expected_deliveries():
try:
# Get POs with expected deliveries today
from datetime import datetime, timedelta, timezone
pos_result = await procurement_client.get_pending_purchase_orders(tenant_id, limit=100)
if pos_result and isinstance(pos_result, list):
today_start = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
today_end = today_start.replace(hour=23, minute=59, second=59)
deliveries_today = []
for po in pos_result:
expected_date = po.get("expected_delivery_date")
if expected_date:
if isinstance(expected_date, str):
expected_date = datetime.fromisoformat(expected_date.replace('Z', '+00:00'))
if today_start <= expected_date <= today_end:
deliveries_today.append(po)
return deliveries_today
return []
except Exception as e:
logger.warning(f"Failed to fetch expected deliveries: {e}")
return []
async def fetch_pending_approvals():
try:
po_data = await procurement_client.get_pending_purchase_orders(tenant_id, limit=100) or []
return len(po_data) if isinstance(po_data, list) else 0
except Exception as e:
logger.warning(f"Failed to fetch pending approvals: {e}")
return 0
# Execute in parallel
todays_batches, expected_deliveries, pending_approvals = await asyncio.gather(
fetch_todays_batches(),
fetch_expected_deliveries(),
fetch_pending_approvals()
)
# Calculate progress
progress = await dashboard_service.get_execution_progress(
tenant_id=tenant_id,
todays_batches=todays_batches,
expected_deliveries=expected_deliveries,
pending_approvals=pending_approvals
)
return progress
except Exception as e:
logger.error(f"Error getting execution progress: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
@router.get("/insights", response_model=InsightsResponse)
async def get_insights(
tenant_id: str,
@@ -575,35 +700,32 @@ async def get_insights(
async def fetch_savings():
try:
# Get recent POs (last 7 days) and sum up optimization savings
seven_days_ago = datetime.now(timezone.utc) - timedelta(days=7)
# Get prevented issue savings from alert analytics
analytics = await alerts_client.get_dashboard_analytics(tenant_id, days=7)
pos_result = await procurement_client.get_pending_purchase_orders(tenant_id, limit=200)
if pos_result and isinstance(pos_result, list):
weekly_savings = 0
# Calculate savings from price optimization
for po in pos_result:
# Check if PO was created in last 7 days
created_at = po.get("created_at")
if created_at:
if isinstance(created_at, str):
created_at = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
if created_at >= seven_days_ago:
# Sum up savings from optimization
optimization_data = po.get("optimization_data", {})
if isinstance(optimization_data, dict):
savings = optimization_data.get("savings", 0) or 0
weekly_savings += float(savings)
if analytics:
weekly_savings = analytics.get('estimated_savings_eur', 0)
prevented_count = analytics.get('prevented_issues_count', 0)
# Calculate trend from period comparison
period_comparison = analytics.get('period_comparison', {})
current_prevented = period_comparison.get('current_prevented', 0)
previous_prevented = period_comparison.get('previous_prevented', 0)
trend_percentage = 0
if previous_prevented > 0:
trend_percentage = ((current_prevented - previous_prevented) / previous_prevented) * 100
# Default trend percentage (would need historical data for real trend)
return {
"weekly_savings": round(weekly_savings, 2),
"trend_percentage": 12 if weekly_savings > 0 else 0
"trend_percentage": round(trend_percentage, 1),
"prevented_count": prevented_count
}
return {"weekly_savings": 0, "trend_percentage": 0}
return {"weekly_savings": 0, "trend_percentage": 0, "prevented_count": 0}
except Exception as e:
logger.warning(f"Failed to calculate savings data: {e}")
return {"weekly_savings": 0, "trend_percentage": 0}
return {"weekly_savings": 0, "trend_percentage": 0, "prevented_count": 0}
# Execute all fetches in parallel
sustainability_data, inventory_data, delivery_data, savings_data = await asyncio.gather(

View File

@@ -0,0 +1,181 @@
"""
Internal API for Alert Intelligence Service
Provides orchestrator context for alert enrichment
"""
from fastapi import APIRouter, Header, HTTPException, Query
from typing import Optional, List, Dict, Any
from datetime import datetime, timedelta
from uuid import UUID
from pydantic import BaseModel
router = APIRouter(prefix="/api/internal", tags=["internal"])
class OrchestrationAction(BaseModel):
"""Recent orchestration action"""
id: str
type: str # purchase_order, production_batch
status: str # created, pending_approval, approved, completed
delivery_date: Optional[datetime]
reasoning: Optional[Dict[str, Any]]
estimated_resolution: Optional[datetime]
created_at: datetime
class RecentActionsResponse(BaseModel):
"""Response with recent orchestrator actions"""
actions: List[OrchestrationAction]
count: int
@router.get("/recent-actions", response_model=RecentActionsResponse)
async def get_recent_actions(
tenant_id: str = Query(..., description="Tenant ID"),
ingredient_id: Optional[str] = Query(None, description="Filter by ingredient"),
product_id: Optional[str] = Query(None, description="Filter by product"),
hours_ago: int = Query(24, description="Look back hours"),
x_internal_service: str = Header(None, description="Internal service authentication")
):
"""
Get recent orchestrator actions for alert context enrichment.
Only accessible by internal services (alert-intelligence).
Returns orchestration runs with details about POs created, batches adjusted, etc.
This helps the alert system understand if AI already addressed similar issues.
"""
from shared.database.base import create_database_manager
from ..core.config import get_settings
from ..models.orchestration_run import OrchestrationRun, OrchestrationStatus
from sqlalchemy import select, and_, desc
import structlog
logger = structlog.get_logger()
# Simple internal service authentication
if x_internal_service != "alert-intelligence":
raise HTTPException(status_code=403, detail="Access denied")
try:
settings = get_settings()
db_manager = create_database_manager(settings.DATABASE_URL, "orchestrator")
async with db_manager.get_session() as session:
cutoff_time = datetime.utcnow() - timedelta(hours=hours_ago)
# Query recent orchestration runs
query = select(OrchestrationRun).where(
and_(
OrchestrationRun.tenant_id == UUID(tenant_id),
OrchestrationRun.created_at >= cutoff_time,
OrchestrationRun.status.in_([
OrchestrationStatus.completed,
OrchestrationStatus.partial_success
])
)
).order_by(desc(OrchestrationRun.created_at))
result = await session.execute(query)
runs = result.scalars().all()
actions = []
for run in runs:
run_metadata = run.run_metadata or {}
# Add purchase order actions
if run.purchase_orders_created > 0:
po_details = run_metadata.get('purchase_orders', [])
# If metadata has PO details, use them
if po_details:
for po in po_details:
# Filter by ingredient if specified
if ingredient_id:
po_items = po.get('items', [])
has_ingredient = any(
item.get('ingredient_id') == ingredient_id
for item in po_items
)
if not has_ingredient:
continue
actions.append(OrchestrationAction(
id=po.get('id', str(run.id)),
type="purchase_order",
status=po.get('status', 'created'),
delivery_date=po.get('delivery_date'),
reasoning=run_metadata.get('reasoning'),
estimated_resolution=po.get('delivery_date'),
created_at=run.created_at
))
else:
# Fallback: create generic action from run
actions.append(OrchestrationAction(
id=str(run.id),
type="purchase_order",
status="created",
delivery_date=None,
reasoning=run_metadata.get('reasoning'),
estimated_resolution=None,
created_at=run.created_at
))
# Add production batch actions
if run.production_batches_created > 0:
batch_details = run_metadata.get('production_batches', [])
if batch_details:
for batch in batch_details:
# Filter by product if specified
if product_id and batch.get('product_id') != product_id:
continue
actions.append(OrchestrationAction(
id=batch.get('id', str(run.id)),
type="production_batch",
status=batch.get('status', 'created'),
delivery_date=None,
reasoning=run_metadata.get('reasoning'),
estimated_resolution=batch.get('scheduled_date'),
created_at=run.created_at
))
else:
# Fallback: create generic action from run
if not product_id: # Only add if no product filter
actions.append(OrchestrationAction(
id=str(run.id),
type="production_batch",
status="created",
delivery_date=None,
reasoning=run_metadata.get('reasoning'),
estimated_resolution=None,
created_at=run.created_at
))
logger.info(
"recent_actions_fetched",
tenant_id=tenant_id,
hours_ago=hours_ago,
action_count=len(actions),
ingredient_id=ingredient_id,
product_id=product_id
)
return RecentActionsResponse(
actions=actions,
count=len(actions)
)
except Exception as e:
logger.error("error_fetching_recent_actions", error=str(e), tenant_id=tenant_id)
raise HTTPException(
status_code=500,
detail=f"Failed to fetch recent actions: {str(e)}"
)
@router.get("/health")
async def internal_health():
"""Internal health check"""
return {"status": "healthy", "api": "internal"}

View File

@@ -16,6 +16,12 @@ from app.models.orchestration_run import OrchestrationRun
import uuid
from datetime import datetime, timezone, timedelta
from typing import Optional
import sys
from pathlib import Path
# Add shared utilities to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.utils.demo_dates import adjust_date_for_demo, BASE_REFERENCE_DATE
router = APIRouter()
logger = structlog.get_logger()
@@ -24,6 +30,27 @@ logger = structlog.get_logger()
INTERNAL_API_KEY = os.getenv("INTERNAL_API_KEY", "dev-internal-key-change-in-production")
async def ensure_unique_run_number(db: AsyncSession, base_run_number: str) -> str:
"""Ensure the run number is unique by appending a suffix if needed"""
proposed_run_number = base_run_number
# Check if the proposed run number already exists in the database
while True:
result = await db.execute(
select(OrchestrationRun)
.where(OrchestrationRun.run_number == proposed_run_number)
)
existing_run = result.scalar_one_or_none()
if not existing_run:
# Run number is unique, return it
return proposed_run_number
# Generate a new run number with an additional random suffix
random_suffix = str(uuid.uuid4())[:4].upper()
proposed_run_number = f"{base_run_number[:50-len(random_suffix)-1]}-{random_suffix}"
def verify_internal_api_key(x_internal_api_key: str = Header(...)):
"""Verify internal API key for service-to-service communication"""
if x_internal_api_key != INTERNAL_API_KEY:
@@ -86,38 +113,60 @@ async def clone_demo_data(
# Clone each orchestration run with date adjustment
for base_run in base_runs:
# Calculate time offset: how old was this run relative to when it was created
# We'll adjust all timestamps to be relative to the session creation time
# Use the shared date adjustment utility to ensure dates are always in the past
# This calculates the offset from BASE_REFERENCE_DATE and applies it to session creation time
if base_run.started_at:
# Calculate how many days ago this run was from a reference point
# Use a fixed reference date for consistency
reference_date = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
time_offset = base_run.started_at - reference_date
# Apply this offset to the current reference time
new_started_at = reference_time + time_offset
new_started_at = adjust_date_for_demo(
base_run.started_at, reference_time, BASE_REFERENCE_DATE
)
else:
new_started_at = reference_time - timedelta(hours=2)
# Adjust completed_at if it exists
if base_run.completed_at and base_run.started_at:
duration = base_run.completed_at - base_run.started_at
new_completed_at = new_started_at + duration
# Adjust completed_at using the same utility
if base_run.completed_at:
new_completed_at = adjust_date_for_demo(
base_run.completed_at, reference_time, BASE_REFERENCE_DATE
)
# Ensure completion is after start (in case of edge cases)
if new_completed_at and new_started_at and new_completed_at < new_started_at:
# Preserve original duration
duration = base_run.completed_at - base_run.started_at
new_completed_at = new_started_at + duration
else:
new_completed_at = None
# Adjust all step timestamps proportionally
# Adjust all step timestamps using the shared utility
def adjust_timestamp(original_timestamp):
if not original_timestamp or not base_run.started_at:
if not original_timestamp:
return None
step_offset = original_timestamp - base_run.started_at
return new_started_at + step_offset
return adjust_date_for_demo(original_timestamp, reference_time, BASE_REFERENCE_DATE)
# Create new orchestration run for virtual tenant
# Update run_number to have current year instead of original year, and make it unique
current_year = reference_time.year
# Extract type from original run number and create new format
parts = base_run.run_number.split('-')
if len(parts) >= 4:
tenant_prefix = parts[1] if len(parts) > 1 else "DEMO"
type_code = parts[2] if len(parts) > 2 else "TST"
original_index = parts[3] if len(parts) > 3 else "001"
# Generate a more robust unique suffix to avoid collisions
# Use UUID instead of just session_id substring to ensure uniqueness
unique_suffix = str(uuid.uuid4())[:8].upper()
proposed_run_number = f"ORCH-{tenant_prefix}-{type_code}-{current_year}-{original_index}-{unique_suffix}"
else:
unique_suffix = str(uuid.uuid4())[:12].upper()
proposed_run_number = f"{base_run.run_number}-{unique_suffix}"
# Ensure the run number is truly unique by checking against existing entries
# This prevents collisions especially in high-concurrency scenarios
run_number = await ensure_unique_run_number(db, proposed_run_number)
new_run = OrchestrationRun(
id=uuid.uuid4(),
tenant_id=virtual_uuid,
run_number=f"{base_run.run_number}-DEMO",
run_number=run_number,
status=base_run.status,
run_type=base_run.run_type,
priority=base_run.priority,

View File

@@ -107,11 +107,18 @@ class OrchestratorSettings(BaseServiceSettings):
REDIS_HOST: str = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
REDIS_DB: int = int(os.getenv("REDIS_DB", "0"))
REDIS_PASSWORD: str = os.getenv("REDIS_PASSWORD", "")
REDIS_TLS_ENABLED: str = os.getenv("REDIS_TLS_ENABLED", "false")
CACHE_ENABLED: bool = os.getenv("CACHE_ENABLED", "true").lower() == "true"
CACHE_TTL_HEALTH: int = int(os.getenv("CACHE_TTL_HEALTH", "30")) # 30 seconds
CACHE_TTL_INSIGHTS: int = int(os.getenv("CACHE_TTL_INSIGHTS", "120")) # 2 minutes
CACHE_TTL_INSIGHTS: int = int(os.getenv("CACHE_TTL_INSIGHTS", "60")) # 1 minute (reduced for faster metrics updates)
CACHE_TTL_SUMMARY: int = int(os.getenv("CACHE_TTL_SUMMARY", "60")) # 1 minute
# Global settings instance
settings = OrchestratorSettings()
def get_settings():
"""Get the global settings instance"""
return settings

View File

@@ -95,8 +95,10 @@ service.setup_standard_endpoints()
# BUSINESS: Orchestration operations
from app.api.orchestration import router as orchestration_router
from app.api.dashboard import router as dashboard_router
from app.api.internal import router as internal_router
service.add_router(orchestration_router)
service.add_router(dashboard_router)
service.add_router(internal_router)
# INTERNAL: Service-to-service endpoints
from app.api import internal_demo

View File

@@ -0,0 +1,420 @@
"""
Delivery Tracking Service
Tracks purchase order deliveries and generates appropriate alerts:
- DELIVERY_SCHEDULED: When PO is approved and delivery date is set
- DELIVERY_ARRIVING_SOON: 2 hours before delivery window
- DELIVERY_OVERDUE: 30 minutes after expected delivery time
- STOCK_RECEIPT_INCOMPLETE: If delivery not marked as received
Integrates with procurement service to get PO details and expected delivery windows.
"""
import structlog
from datetime import datetime, timedelta, timezone
from typing import Dict, Any, Optional, List
from uuid import UUID
import httpx
from shared.schemas.alert_types import AlertTypeConstants
from shared.alerts.base_service import BaseAlertService
logger = structlog.get_logger()
class DeliveryTrackingService:
"""Tracks deliveries and generates lifecycle alerts"""
def __init__(self, config, db_manager, redis_client, rabbitmq_client):
self.config = config
self.db_manager = db_manager
self.redis = redis_client
self.rabbitmq = rabbitmq_client
self.alert_service = BaseAlertService(config)
self.http_client = httpx.AsyncClient(
timeout=30.0,
follow_redirects=True
)
async def check_expected_deliveries(self, tenant_id: UUID) -> Dict[str, int]:
"""
Check all expected deliveries for a tenant and generate appropriate alerts.
Called by scheduled job (runs every hour).
Returns:
Dict with counts: {
'arriving_soon': int,
'overdue': int,
'receipt_incomplete': int
}
"""
logger.info("Checking expected deliveries", tenant_id=str(tenant_id))
counts = {
'arriving_soon': 0,
'overdue': 0,
'receipt_incomplete': 0
}
try:
# Get expected deliveries from procurement service
deliveries = await self._get_expected_deliveries(tenant_id)
now = datetime.now(timezone.utc)
for delivery in deliveries:
po_id = delivery.get('po_id')
po_number = delivery.get('po_number')
expected_date = delivery.get('expected_delivery_date')
delivery_window_hours = delivery.get('delivery_window_hours', 4) # Default 4h window
status = delivery.get('status')
if not expected_date:
continue
# Parse expected date
if isinstance(expected_date, str):
expected_date = datetime.fromisoformat(expected_date)
# Make timezone-aware
if expected_date.tzinfo is None:
expected_date = expected_date.replace(tzinfo=timezone.utc)
# Calculate delivery window
window_start = expected_date
window_end = expected_date + timedelta(hours=delivery_window_hours)
# Check if arriving soon (2 hours before window)
arriving_soon_time = window_start - timedelta(hours=2)
if arriving_soon_time <= now < window_start and status == 'approved':
if await self._send_arriving_soon_alert(tenant_id, delivery):
counts['arriving_soon'] += 1
# Check if overdue (30 min after window end)
overdue_time = window_end + timedelta(minutes=30)
if now >= overdue_time and status == 'approved':
if await self._send_overdue_alert(tenant_id, delivery):
counts['overdue'] += 1
# Check if receipt incomplete (delivery window passed, not marked received)
if now > window_end and status == 'approved':
if await self._send_receipt_incomplete_alert(tenant_id, delivery):
counts['receipt_incomplete'] += 1
logger.info(
"Delivery check completed",
tenant_id=str(tenant_id),
**counts
)
except Exception as e:
logger.error(
"Error checking deliveries",
tenant_id=str(tenant_id),
error=str(e)
)
return counts
async def _get_expected_deliveries(self, tenant_id: UUID) -> List[Dict[str, Any]]:
"""
Query procurement service for expected deliveries.
Returns:
List of delivery dicts with:
- po_id, po_number, expected_delivery_date
- supplier_id, supplier_name
- line_items (product list)
- status (approved, in_transit, received)
"""
try:
procurement_url = self.config.PROCUREMENT_SERVICE_URL
response = await self.http_client.get(
f"{procurement_url}/api/internal/expected-deliveries",
params={
"tenant_id": str(tenant_id),
"days_ahead": 1, # Check today + tomorrow
"include_overdue": True
},
headers={"X-Internal-Service": "orchestrator"}
)
if response.status_code == 200:
data = response.json()
return data.get('deliveries', [])
else:
logger.warning(
"Failed to get expected deliveries",
status_code=response.status_code,
tenant_id=str(tenant_id)
)
return []
except Exception as e:
logger.error(
"Error fetching expected deliveries",
tenant_id=str(tenant_id),
error=str(e)
)
return []
async def _send_arriving_soon_alert(
self,
tenant_id: UUID,
delivery: Dict[str, Any]
) -> bool:
"""
Send DELIVERY_ARRIVING_SOON alert (2h before delivery window).
This appears in the action queue with "Mark as Received" action.
"""
# Check if already sent
cache_key = f"delivery_alert:arriving:{tenant_id}:{delivery['po_id']}"
if await self.redis.exists(cache_key):
return False
po_number = delivery.get('po_number', 'N/A')
supplier_name = delivery.get('supplier_name', 'Supplier')
expected_date = delivery.get('expected_delivery_date')
line_items = delivery.get('line_items', [])
# Format product list
products = [item['product_name'] for item in line_items[:3]]
product_list = ", ".join(products)
if len(line_items) > 3:
product_list += f" (+{len(line_items) - 3} more)"
# Calculate time until arrival
if isinstance(expected_date, str):
expected_date = datetime.fromisoformat(expected_date)
if expected_date.tzinfo is None:
expected_date = expected_date.replace(tzinfo=timezone.utc)
hours_until = (expected_date - datetime.now(timezone.utc)).total_seconds() / 3600
alert_data = {
"tenant_id": str(tenant_id),
"alert_type": AlertTypeConstants.DELIVERY_ARRIVING_SOON,
"title": f"Delivery arriving soon: {supplier_name}",
"message": f"Purchase order {po_number} expected in ~{hours_until:.1f} hours. Products: {product_list}",
"service": "orchestrator",
"actions": ["mark_delivery_received", "call_supplier"],
"alert_metadata": {
"po_id": delivery['po_id'],
"po_number": po_number,
"supplier_id": delivery.get('supplier_id'),
"supplier_name": supplier_name,
"supplier_phone": delivery.get('supplier_phone'),
"expected_delivery_date": expected_date.isoformat(),
"line_items": line_items,
"hours_until_arrival": hours_until,
"confidence_score": 0.9
}
}
success = await self.alert_service.send_alert(alert_data)
if success:
# Cache for 24 hours to avoid duplicate alerts
await self.redis.set(cache_key, "1", ex=86400)
logger.info(
"Sent arriving soon alert",
po_number=po_number,
supplier=supplier_name
)
return success
async def _send_overdue_alert(
self,
tenant_id: UUID,
delivery: Dict[str, Any]
) -> bool:
"""
Send DELIVERY_OVERDUE alert (30min after expected window).
Critical priority - needs immediate action (call supplier).
"""
# Check if already sent
cache_key = f"delivery_alert:overdue:{tenant_id}:{delivery['po_id']}"
if await self.redis.exists(cache_key):
return False
po_number = delivery.get('po_number', 'N/A')
supplier_name = delivery.get('supplier_name', 'Supplier')
expected_date = delivery.get('expected_delivery_date')
# Calculate how late
if isinstance(expected_date, str):
expected_date = datetime.fromisoformat(expected_date)
if expected_date.tzinfo is None:
expected_date = expected_date.replace(tzinfo=timezone.utc)
hours_late = (datetime.now(timezone.utc) - expected_date).total_seconds() / 3600
alert_data = {
"tenant_id": str(tenant_id),
"alert_type": AlertTypeConstants.DELIVERY_OVERDUE,
"title": f"Delivery overdue: {supplier_name}",
"message": f"Purchase order {po_number} was expected {hours_late:.1f} hours ago. Contact supplier immediately.",
"service": "orchestrator",
"actions": ["call_supplier", "snooze", "report_issue"],
"alert_metadata": {
"po_id": delivery['po_id'],
"po_number": po_number,
"supplier_id": delivery.get('supplier_id'),
"supplier_name": supplier_name,
"supplier_phone": delivery.get('supplier_phone'),
"expected_delivery_date": expected_date.isoformat(),
"hours_late": hours_late,
"financial_impact": delivery.get('total_amount', 0), # Blocked capital
"affected_orders": len(delivery.get('affected_production_batches', [])),
"confidence_score": 1.0
}
}
success = await self.alert_service.send_alert(alert_data)
if success:
# Cache for 48 hours
await self.redis.set(cache_key, "1", ex=172800)
logger.warning(
"Sent overdue delivery alert",
po_number=po_number,
supplier=supplier_name,
hours_late=hours_late
)
return success
async def _send_receipt_incomplete_alert(
self,
tenant_id: UUID,
delivery: Dict[str, Any]
) -> bool:
"""
Send STOCK_RECEIPT_INCOMPLETE alert.
Delivery window has passed but stock not marked as received.
"""
# Check if already sent
cache_key = f"delivery_alert:receipt:{tenant_id}:{delivery['po_id']}"
if await self.redis.exists(cache_key):
return False
po_number = delivery.get('po_number', 'N/A')
supplier_name = delivery.get('supplier_name', 'Supplier')
alert_data = {
"tenant_id": str(tenant_id),
"alert_type": AlertTypeConstants.STOCK_RECEIPT_INCOMPLETE,
"title": f"Confirm stock receipt: {po_number}",
"message": f"Delivery from {supplier_name} should have arrived. Please confirm receipt and log lot details.",
"service": "orchestrator",
"actions": ["complete_stock_receipt", "report_missing"],
"alert_metadata": {
"po_id": delivery['po_id'],
"po_number": po_number,
"supplier_id": delivery.get('supplier_id'),
"supplier_name": supplier_name,
"expected_delivery_date": delivery.get('expected_delivery_date'),
"confidence_score": 0.8
}
}
success = await self.alert_service.send_alert(alert_data)
if success:
# Cache for 7 days
await self.redis.set(cache_key, "1", ex=604800)
logger.info(
"Sent receipt incomplete alert",
po_number=po_number
)
return success
async def mark_delivery_received(
self,
tenant_id: UUID,
po_id: UUID,
received_by_user_id: UUID
) -> Dict[str, Any]:
"""
Mark delivery as received and trigger stock receipt workflow.
This is called when user clicks "Mark as Received" action button.
Returns:
Dict with receipt_id and status
"""
try:
# Call inventory service to create draft stock receipt
inventory_url = self.config.INVENTORY_SERVICE_URL
response = await self.http_client.post(
f"{inventory_url}/api/inventory/stock-receipts",
json={
"tenant_id": str(tenant_id),
"po_id": str(po_id),
"received_by_user_id": str(received_by_user_id)
},
headers={"X-Internal-Service": "orchestrator"}
)
if response.status_code in [200, 201]:
receipt_data = response.json()
# Clear delivery alerts
await self._clear_delivery_alerts(tenant_id, po_id)
logger.info(
"Delivery marked as received",
po_id=str(po_id),
receipt_id=receipt_data.get('id')
)
return {
"status": "success",
"receipt_id": receipt_data.get('id'),
"message": "Stock receipt created. Please complete lot details."
}
else:
logger.error(
"Failed to create stock receipt",
status_code=response.status_code,
po_id=str(po_id)
)
return {
"status": "error",
"message": "Failed to create stock receipt"
}
except Exception as e:
logger.error(
"Error marking delivery received",
po_id=str(po_id),
error=str(e)
)
return {
"status": "error",
"message": str(e)
}
async def _clear_delivery_alerts(self, tenant_id: UUID, po_id: UUID):
"""Clear all delivery-related alerts for a PO once received"""
alert_types = [
"arriving",
"overdue",
"receipt"
]
for alert_type in alert_types:
cache_key = f"delivery_alert:{alert_type}:{tenant_id}:{po_id}"
await self.redis.delete(cache_key)
logger.debug("Cleared delivery alerts", po_id=str(po_id))
async def close(self):
"""Close HTTP client on shutdown"""
await self.http_client.aclose()

View File

@@ -0,0 +1,275 @@
"""
Orchestration Notification Service
Emits informational notifications for orchestration events:
- orchestration_run_started: When an orchestration run begins
- orchestration_run_completed: When an orchestration run finishes successfully
- action_created: When the orchestrator creates an action (PO, batch, adjustment)
These are NOTIFICATIONS (not alerts) - informational state changes that don't require user action.
"""
import logging
from datetime import datetime, timezone
from typing import Optional, Dict, Any, List
from sqlalchemy.orm import Session
from shared.schemas.event_classification import RawEvent, EventClass, EventDomain
from shared.alerts.base_service import BaseAlertService
logger = logging.getLogger(__name__)
class OrchestrationNotificationService(BaseAlertService):
"""
Service for emitting orchestration notifications (informational state changes).
"""
def __init__(self, rabbitmq_url: str = None):
super().__init__(service_name="orchestrator", rabbitmq_url=rabbitmq_url)
async def emit_orchestration_run_started_notification(
self,
db: Session,
tenant_id: str,
run_id: str,
run_type: str, # 'scheduled', 'manual', 'triggered'
scope: str, # 'full', 'inventory_only', 'production_only'
) -> None:
"""
Emit notification when an orchestration run starts.
Args:
db: Database session
tenant_id: Tenant ID
run_id: Orchestration run ID
run_type: Type of run
scope: Scope of run
"""
try:
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.OPERATIONS,
event_type="orchestration_run_started",
title="Orchestration Started",
message=f"AI orchestration run started ({run_type}, scope: {scope})",
service="orchestrator",
event_metadata={
"run_id": run_id,
"run_type": run_type,
"scope": scope,
"started_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
await self.publish_item(tenant_id, event.dict(), item_type="notification")
logger.info(
f"Orchestration run started notification emitted: {run_id}",
extra={"tenant_id": tenant_id, "run_id": run_id}
)
except Exception as e:
logger.error(
f"Failed to emit orchestration run started notification: {e}",
extra={"tenant_id": tenant_id, "run_id": run_id},
exc_info=True,
)
async def emit_orchestration_run_completed_notification(
self,
db: Session,
tenant_id: str,
run_id: str,
duration_seconds: float,
actions_created: int,
actions_by_type: Dict[str, int], # e.g., {'purchase_order': 2, 'production_batch': 3}
status: str = "success",
) -> None:
"""
Emit notification when an orchestration run completes.
Args:
db: Database session
tenant_id: Tenant ID
run_id: Orchestration run ID
duration_seconds: Run duration
actions_created: Total actions created
actions_by_type: Breakdown of actions by type
status: Run status (success, partial, failed)
"""
try:
# Build message with action summary
if actions_created == 0:
message = "No actions needed"
else:
action_summary = ", ".join([f"{count} {action_type}" for action_type, count in actions_by_type.items()])
message = f"Created {actions_created} actions: {action_summary}"
message += f" ({duration_seconds:.1f}s)"
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.OPERATIONS,
event_type="orchestration_run_completed",
title=f"Orchestration Completed: {status.title()}",
message=message,
service="orchestrator",
event_metadata={
"run_id": run_id,
"status": status,
"duration_seconds": duration_seconds,
"actions_created": actions_created,
"actions_by_type": actions_by_type,
"completed_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
await self.publish_item(tenant_id, event.dict(), item_type="notification")
logger.info(
f"Orchestration run completed notification emitted: {run_id} ({actions_created} actions)",
extra={"tenant_id": tenant_id, "run_id": run_id}
)
except Exception as e:
logger.error(
f"Failed to emit orchestration run completed notification: {e}",
extra={"tenant_id": tenant_id, "run_id": run_id},
exc_info=True,
)
async def emit_action_created_notification(
self,
db: Session,
tenant_id: str,
run_id: str,
action_id: str,
action_type: str, # 'purchase_order', 'production_batch', 'inventory_adjustment'
action_details: Dict[str, Any], # Type-specific details
reason: str,
estimated_impact: Optional[Dict[str, Any]] = None,
) -> None:
"""
Emit notification when the orchestrator creates an action.
Args:
db: Database session
tenant_id: Tenant ID
run_id: Orchestration run ID
action_id: Created action ID
action_type: Type of action
action_details: Action-specific details
reason: Reason for creating action
estimated_impact: Estimated impact (optional)
"""
try:
# Build title and message based on action type
if action_type == "purchase_order":
title = f"Purchase Order Created: {action_details.get('supplier_name', 'Unknown')}"
message = f"Ordered {action_details.get('items_count', 0)} items - {reason}"
elif action_type == "production_batch":
title = f"Production Batch Scheduled: {action_details.get('product_name', 'Unknown')}"
message = f"Scheduled {action_details.get('quantity', 0)} {action_details.get('unit', 'units')} - {reason}"
elif action_type == "inventory_adjustment":
title = f"Inventory Adjustment: {action_details.get('ingredient_name', 'Unknown')}"
message = f"Adjusted by {action_details.get('quantity', 0)} {action_details.get('unit', 'units')} - {reason}"
else:
title = f"Action Created: {action_type}"
message = reason
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.OPERATIONS,
event_type="action_created",
title=title,
message=message,
service="orchestrator",
event_metadata={
"run_id": run_id,
"action_id": action_id,
"action_type": action_type,
"action_details": action_details,
"reason": reason,
"estimated_impact": estimated_impact,
"created_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
await self.publish_item(tenant_id, event.dict(), item_type="notification")
logger.info(
f"Action created notification emitted: {action_type} - {action_id}",
extra={"tenant_id": tenant_id, "action_id": action_id}
)
except Exception as e:
logger.error(
f"Failed to emit action created notification: {e}",
extra={"tenant_id": tenant_id, "action_id": action_id},
exc_info=True,
)
async def emit_action_completed_notification(
self,
db: Session,
tenant_id: str,
action_id: str,
action_type: str,
action_status: str, # 'approved', 'completed', 'rejected', 'cancelled'
completed_by: Optional[str] = None,
) -> None:
"""
Emit notification when an orchestrator action is completed/resolved.
Args:
db: Database session
tenant_id: Tenant ID
action_id: Action ID
action_type: Type of action
action_status: Final status
completed_by: Who completed it (optional)
"""
try:
message = f"{action_type.replace('_', ' ').title()}: {action_status}"
if completed_by:
message += f" by {completed_by}"
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.OPERATIONS,
event_type="action_completed",
title=f"Action {action_status.title()}",
message=message,
service="orchestrator",
event_metadata={
"action_id": action_id,
"action_type": action_type,
"action_status": action_status,
"completed_by": completed_by,
"completed_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
await self.publish_item(tenant_id, event.dict(), item_type="notification")
logger.info(
f"Action completed notification emitted: {action_id} ({action_status})",
extra={"tenant_id": tenant_id, "action_id": action_id}
)
except Exception as e:
logger.error(
f"Failed to emit action completed notification: {e}",
extra={"tenant_id": tenant_id, "action_id": action_id},
exc_info=True,
)

View File

@@ -352,6 +352,15 @@ class OrchestratorSchedulerService(BaseAlertService):
ai_insights_posted = saga_result.get('ai_insights_posted', 0)
ai_insights_errors = saga_result.get('ai_insights_errors', [])
# Generate reasoning metadata for the orchestrator context
reasoning_metadata = self._generate_reasoning_metadata(
forecast_data,
production_data,
procurement_data,
ai_insights_generated,
ai_insights_posted
)
await repo.update_run(run_id, {
'status': OrchestrationStatus.completed,
'completed_at': completed_at,
@@ -377,10 +386,107 @@ class OrchestratorSchedulerService(BaseAlertService):
'ai_insights_posted': ai_insights_posted,
'ai_insights_completed_at': completed_at,
'saga_steps_total': total_steps,
'saga_steps_completed': completed_steps
'saga_steps_completed': completed_steps,
'run_metadata': reasoning_metadata
})
await session.commit()
def _generate_reasoning_metadata(
self,
forecast_data: Dict[str, Any],
production_data: Dict[str, Any],
procurement_data: Dict[str, Any],
ai_insights_generated: int,
ai_insights_posted: int
) -> Dict[str, Any]:
"""
Generate reasoning metadata for orchestration run that will be used by alert processor.
This creates structured reasoning data that the alert processor can use to provide
context when showing AI reasoning to users.
"""
reasoning_metadata = {
'reasoning': {
'type': 'daily_orchestration_summary',
'timestamp': datetime.now(timezone.utc).isoformat(),
'summary': 'Daily orchestration run completed successfully',
'details': {}
},
'purchase_orders': [],
'production_batches': [],
'ai_insights': {
'generated': ai_insights_generated,
'posted': ai_insights_posted
}
}
# Add forecast reasoning
if forecast_data:
reasoning_metadata['reasoning']['details']['forecasting'] = {
'forecasts_created': forecast_data.get('forecasts_created', 0),
'method': 'automated_daily_forecast',
'reasoning': 'Generated forecasts based on historical patterns and seasonal trends'
}
# Add production reasoning
if production_data:
reasoning_metadata['reasoning']['details']['production'] = {
'batches_created': production_data.get('batches_created', 0),
'method': 'demand_based_scheduling',
'reasoning': 'Scheduled production batches based on forecasted demand and inventory levels'
}
# Add procurement reasoning
if procurement_data:
reasoning_metadata['reasoning']['details']['procurement'] = {
'requirements_created': procurement_data.get('requirements_created', 0),
'pos_created': procurement_data.get('pos_created', 0),
'method': 'automated_procurement',
'reasoning': 'Generated procurement plan based on production needs and inventory optimization'
}
# Add purchase order details with reasoning
if procurement_data and procurement_data.get('purchase_orders'):
for po in procurement_data['purchase_orders']:
po_reasoning = {
'id': po.get('id'),
'status': po.get('status', 'created'),
'delivery_date': po.get('delivery_date'),
'reasoning': {
'type': 'inventory_optimization',
'parameters': {
'trigger': 'low_stock_prediction',
'min_depletion_days': po.get('min_depletion_days', 3),
'quantity': po.get('quantity'),
'unit': po.get('unit'),
'supplier': po.get('supplier_name'),
'financial_impact_eur': po.get('estimated_savings_eur', 0)
}
}
}
reasoning_metadata['purchase_orders'].append(po_reasoning)
# Add production batch details with reasoning
if production_data and production_data.get('production_batches'):
for batch in production_data['production_batches']:
batch_reasoning = {
'id': batch.get('id'),
'status': batch.get('status', 'scheduled'),
'scheduled_date': batch.get('scheduled_date'),
'reasoning': {
'type': 'demand_forecasting',
'parameters': {
'trigger': 'forecasted_demand',
'forecasted_quantity': batch.get('forecasted_quantity'),
'product_name': batch.get('product_name'),
'financial_impact_eur': batch.get('estimated_revenue_eur', 0)
}
}
}
reasoning_metadata['production_batches'].append(batch_reasoning)
return reasoning_metadata
async def _mark_orchestration_failed(self, run_id: uuid.UUID, error_message: str):
"""Mark orchestration run as failed"""
async with self.db_manager.get_session() as session:

View File

@@ -9,6 +9,7 @@ from typing import Optional, Any, Callable
from functools import wraps
import structlog
from app.core.config import settings
from pydantic import BaseModel
logger = structlog.get_logger()
@@ -22,17 +23,36 @@ async def get_redis_client() -> redis.Redis:
if _redis_client is None:
try:
_redis_client = redis.Redis(
host=getattr(settings, 'REDIS_HOST', 'localhost'),
port=getattr(settings, 'REDIS_PORT', 6379),
db=getattr(settings, 'REDIS_DB', 0),
decode_responses=True,
socket_connect_timeout=5,
socket_timeout=5
)
# Check if TLS is enabled - convert string to boolean properly
redis_tls_str = str(getattr(settings, 'REDIS_TLS_ENABLED', 'false')).lower()
redis_tls_enabled = redis_tls_str in ('true', '1', 'yes', 'on')
connection_kwargs = {
'host': str(getattr(settings, 'REDIS_HOST', 'localhost')),
'port': int(getattr(settings, 'REDIS_PORT', 6379)),
'db': int(getattr(settings, 'REDIS_DB', 0)),
'decode_responses': True,
'socket_connect_timeout': 5,
'socket_timeout': 5
}
# Add password if configured
redis_password = getattr(settings, 'REDIS_PASSWORD', None)
if redis_password:
connection_kwargs['password'] = redis_password
# Add SSL/TLS support if enabled
if redis_tls_enabled:
import ssl
connection_kwargs['ssl'] = True
connection_kwargs['ssl_cert_reqs'] = ssl.CERT_NONE
logger.debug(f"Redis TLS enabled - connecting with SSL to {connection_kwargs['host']}:{connection_kwargs['port']}")
_redis_client = redis.Redis(**connection_kwargs)
# Test connection
await _redis_client.ping()
logger.info("Redis client connected successfully")
logger.info(f"Redis client connected successfully (TLS: {redis_tls_enabled})")
except Exception as e:
logger.warning(f"Failed to connect to Redis: {e}. Caching will be disabled.")
_redis_client = None
@@ -76,6 +96,30 @@ async def get_cached(key: str) -> Optional[Any]:
return None
def _serialize_value(value: Any) -> Any:
"""
Recursively serialize values for JSON storage, handling Pydantic models properly.
Args:
value: Value to serialize
Returns:
JSON-serializable value
"""
if isinstance(value, BaseModel):
# Convert Pydantic model to dictionary
return value.model_dump()
elif isinstance(value, (list, tuple)):
# Recursively serialize list/tuple elements
return [_serialize_value(item) for item in value]
elif isinstance(value, dict):
# Recursively serialize dictionary values
return {key: _serialize_value(val) for key, val in value.items()}
else:
# For other types, use default serialization
return value
async def set_cached(key: str, value: Any, ttl: int = 60) -> bool:
"""
Set cached value with TTL
@@ -93,7 +137,9 @@ async def set_cached(key: str, value: Any, ttl: int = 60) -> bool:
if not client:
return False
serialized = json.dumps(value, default=str)
# Serialize value properly before JSON encoding
serialized_value = _serialize_value(value)
serialized = json.dumps(serialized_value)
await client.setex(key, ttl, serialized)
logger.debug(f"Cache set: {key} (TTL: {ttl}s)")
return True