New alert system and panel de control page

2025-11-27 15:52:40 +01:00
parent 1a2f4602f3
commit e902419b6e
178 changed files with 20982 additions and 6944 deletions
--- a/services/orchestrator/app/api/dashboard.py
+++ b/services/orchestrator/app/api/dashboard.py
@@ -55,23 +55,26 @@ class HeadlineData(BaseModel):


 class HealthChecklistItem(BaseModel):
-    """Individual item in health checklist"""
-    icon: str = Field(..., description="Icon name: check, warning, alert")
+    """Individual item in tri-state health checklist"""
+    icon: str = Field(..., description="Icon name: check, warning, alert, ai_handled")
    text: Optional[str] = Field(None, description="Deprecated: Use textKey instead")
    textKey: Optional[str] = Field(None, description="i18n translation key")
    textParams: Optional[Dict[str, Any]] = Field(None, description="Parameters for i18n translation")
    actionRequired: bool = Field(..., description="Whether action is required")
+    status: str = Field(..., description="Tri-state status: good, ai_handled, needs_you")
+    actionPath: Optional[str] = Field(None, description="Path to navigate for action")


 class BakeryHealthStatusResponse(BaseModel):
-    """Overall bakery health status"""
+    """Overall bakery health status with tri-state checklist"""
    status: str = Field(..., description="Health status: green, yellow, red")
    headline: HeadlineData = Field(..., description="i18n-ready status headline")
    lastOrchestrationRun: Optional[str] = Field(None, description="ISO timestamp of last orchestration")
    nextScheduledRun: str = Field(..., description="ISO timestamp of next scheduled run")
-    checklistItems: List[HealthChecklistItem] = Field(..., description="Status checklist")
+    checklistItems: List[HealthChecklistItem] = Field(..., description="Tri-state status checklist")
    criticalIssues: int = Field(..., description="Count of critical issues")
    pendingActions: int = Field(..., description="Count of pending actions")
+    aiPreventedIssues: int = Field(0, description="Count of issues AI prevented")


 class ReasoningInputs(BaseModel):
@@ -207,10 +210,10 @@ async def get_bakery_health_status(
    db: AsyncSession = Depends(get_db)
 ) -> BakeryHealthStatusResponse:
    """
-    Get overall bakery health status
+    Get overall bakery health status with tri-state checklist

    This is the top-level indicator showing if the bakery is running smoothly
-    or if there are issues requiring attention.
+    or if there are issues requiring attention. Includes AI-prevented issues.
    """
    try:
        # Try to get from cache
@@ -227,11 +230,19 @@ async def get_bakery_health_status(

        async def fetch_alerts():
            try:
-                alerts_data = await alerts_client.get_alerts_summary(tenant_id) or {}
-                return alerts_data.get("critical_count", 0)
+                alerts_data = await alerts_client.get_alerts(tenant_id, limit=100) or {}
+                alerts_list = alerts_data.get("alerts", [])
+
+                # Count critical alerts
+                critical_count = sum(1 for a in alerts_list if a.get('priority_level') == 'CRITICAL')
+
+                # Count AI prevented issues
+                prevented_count = sum(1 for a in alerts_list if a.get('type_class') == 'prevented_issue')
+
+                return critical_count, prevented_count, alerts_list
            except Exception as e:
                logger.warning(f"Failed to fetch alerts: {e}")
-                return 0
+                return 0, 0, []

        async def fetch_pending_pos():
            try:
@@ -260,24 +271,28 @@ async def get_bakery_health_status(
                return 0

        # Execute all fetches in parallel
-        critical_alerts, pending_approvals, production_delays, out_of_stock_count = await asyncio.gather(
+        alerts_result, pending_approvals, production_delays, out_of_stock_count = await asyncio.gather(
            fetch_alerts(),
            fetch_pending_pos(),
            fetch_production_delays(),
            fetch_inventory()
        )

+        critical_alerts, ai_prevented_count, all_alerts = alerts_result
+
        # System errors (would come from monitoring system)
        system_errors = 0

-        # Calculate health status
+        # Calculate health status with tri-state checklist
        health_status = await dashboard_service.get_bakery_health_status(
            tenant_id=tenant_id,
            critical_alerts=critical_alerts,
            pending_approvals=pending_approvals,
            production_delays=production_delays,
            out_of_stock_count=out_of_stock_count,
-            system_errors=system_errors
+            system_errors=system_errors,
+            ai_prevented_count=ai_prevented_count,
+            action_needed_alerts=all_alerts
        )

        # Cache the result
@@ -501,6 +516,116 @@ async def get_production_timeline(
        raise HTTPException(status_code=500, detail=str(e))


+@router.get("/unified-action-queue")
+async def get_unified_action_queue(
+    tenant_id: str,
+    db: AsyncSession = Depends(get_db)
+) -> Dict[str, Any]:
+    """
+    Get unified action queue with time-based grouping
+
+    Combines all alerts (PO approvals, delivery tracking, production, etc.)
+    into URGENT (<6h), TODAY (<24h), and THIS WEEK (<7d) sections.
+    """
+    try:
+        dashboard_service = DashboardService(db)
+
+        # Fetch all alerts from alert processor
+        alerts_data = await alerts_client.get_alerts(tenant_id, limit=100) or {}
+        alerts = alerts_data.get("alerts", [])
+
+        # Build unified queue
+        action_queue = await dashboard_service.get_unified_action_queue(
+            tenant_id=tenant_id,
+            alerts=alerts
+        )
+
+        return action_queue
+
+    except Exception as e:
+        logger.error(f"Error getting unified action queue: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.get("/execution-progress")
+async def get_execution_progress(
+    tenant_id: str,
+    db: AsyncSession = Depends(get_db)
+) -> Dict[str, Any]:
+    """
+    Get execution progress for today's plan
+
+    Shows plan vs actual for production batches, deliveries, and approvals
+    """
+    try:
+        dashboard_service = DashboardService(db)
+
+        # Fetch today's data in parallel
+        async def fetch_todays_batches():
+            try:
+                batch_data = await production_client.get_todays_batches(tenant_id)
+                if batch_data:
+                    return batch_data.get("batches", [])
+                return []
+            except Exception as e:
+                logger.warning(f"Failed to fetch today's batches: {e}")
+                return []
+
+        async def fetch_expected_deliveries():
+            try:
+                # Get POs with expected deliveries today
+                from datetime import datetime, timedelta, timezone
+
+                pos_result = await procurement_client.get_pending_purchase_orders(tenant_id, limit=100)
+                if pos_result and isinstance(pos_result, list):
+                    today_start = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
+                    today_end = today_start.replace(hour=23, minute=59, second=59)
+
+                    deliveries_today = []
+                    for po in pos_result:
+                        expected_date = po.get("expected_delivery_date")
+                        if expected_date:
+                            if isinstance(expected_date, str):
+                                expected_date = datetime.fromisoformat(expected_date.replace('Z', '+00:00'))
+                            if today_start <= expected_date <= today_end:
+                                deliveries_today.append(po)
+
+                    return deliveries_today
+                return []
+            except Exception as e:
+                logger.warning(f"Failed to fetch expected deliveries: {e}")
+                return []
+
+        async def fetch_pending_approvals():
+            try:
+                po_data = await procurement_client.get_pending_purchase_orders(tenant_id, limit=100) or []
+                return len(po_data) if isinstance(po_data, list) else 0
+            except Exception as e:
+                logger.warning(f"Failed to fetch pending approvals: {e}")
+                return 0
+
+        # Execute in parallel
+        todays_batches, expected_deliveries, pending_approvals = await asyncio.gather(
+            fetch_todays_batches(),
+            fetch_expected_deliveries(),
+            fetch_pending_approvals()
+        )
+
+        # Calculate progress
+        progress = await dashboard_service.get_execution_progress(
+            tenant_id=tenant_id,
+            todays_batches=todays_batches,
+            expected_deliveries=expected_deliveries,
+            pending_approvals=pending_approvals
+        )
+
+        return progress
+
+    except Exception as e:
+        logger.error(f"Error getting execution progress: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.get("/insights", response_model=InsightsResponse)
 async def get_insights(
    tenant_id: str,
@@ -575,35 +700,32 @@ async def get_insights(

        async def fetch_savings():
            try:
-                # Get recent POs (last 7 days) and sum up optimization savings
-                seven_days_ago = datetime.now(timezone.utc) - timedelta(days=7)
+                # Get prevented issue savings from alert analytics
+                analytics = await alerts_client.get_dashboard_analytics(tenant_id, days=7)

-                pos_result = await procurement_client.get_pending_purchase_orders(tenant_id, limit=200)
-                if pos_result and isinstance(pos_result, list):
-                    weekly_savings = 0
-                    # Calculate savings from price optimization
-                    for po in pos_result:
-                        # Check if PO was created in last 7 days
-                        created_at = po.get("created_at")
-                        if created_at:
-                            if isinstance(created_at, str):
-                                created_at = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
-                            if created_at >= seven_days_ago:
-                                # Sum up savings from optimization
-                                optimization_data = po.get("optimization_data", {})
-                                if isinstance(optimization_data, dict):
-                                    savings = optimization_data.get("savings", 0) or 0
-                                    weekly_savings += float(savings)
+                if analytics:
+                    weekly_savings = analytics.get('estimated_savings_eur', 0)
+                    prevented_count = analytics.get('prevented_issues_count', 0)
+
+                    # Calculate trend from period comparison
+                    period_comparison = analytics.get('period_comparison', {})
+                    current_prevented = period_comparison.get('current_prevented', 0)
+                    previous_prevented = period_comparison.get('previous_prevented', 0)
+
+                    trend_percentage = 0
+                    if previous_prevented > 0:
+                        trend_percentage = ((current_prevented - previous_prevented) / previous_prevented) * 100

-                    # Default trend percentage (would need historical data for real trend)
                    return {
                        "weekly_savings": round(weekly_savings, 2),
-                        "trend_percentage": 12 if weekly_savings > 0 else 0
+                        "trend_percentage": round(trend_percentage, 1),
+                        "prevented_count": prevented_count
                    }
-                return {"weekly_savings": 0, "trend_percentage": 0}
+
+                return {"weekly_savings": 0, "trend_percentage": 0, "prevented_count": 0}
            except Exception as e:
                logger.warning(f"Failed to calculate savings data: {e}")
-                return {"weekly_savings": 0, "trend_percentage": 0}
+                return {"weekly_savings": 0, "trend_percentage": 0, "prevented_count": 0}

        # Execute all fetches in parallel
        sustainability_data, inventory_data, delivery_data, savings_data = await asyncio.gather(
--- a/services/orchestrator/app/api/internal.py
+++ b/services/orchestrator/app/api/internal.py
@@ -0,0 +1,181 @@
+"""
+Internal API for Alert Intelligence Service
+Provides orchestrator context for alert enrichment
+"""
+
+from fastapi import APIRouter, Header, HTTPException, Query
+from typing import Optional, List, Dict, Any
+from datetime import datetime, timedelta
+from uuid import UUID
+from pydantic import BaseModel
+
+router = APIRouter(prefix="/api/internal", tags=["internal"])
+
+
+class OrchestrationAction(BaseModel):
+    """Recent orchestration action"""
+    id: str
+    type: str  # purchase_order, production_batch
+    status: str  # created, pending_approval, approved, completed
+    delivery_date: Optional[datetime]
+    reasoning: Optional[Dict[str, Any]]
+    estimated_resolution: Optional[datetime]
+    created_at: datetime
+
+
+class RecentActionsResponse(BaseModel):
+    """Response with recent orchestrator actions"""
+    actions: List[OrchestrationAction]
+    count: int
+
+
+@router.get("/recent-actions", response_model=RecentActionsResponse)
+async def get_recent_actions(
+    tenant_id: str = Query(..., description="Tenant ID"),
+    ingredient_id: Optional[str] = Query(None, description="Filter by ingredient"),
+    product_id: Optional[str] = Query(None, description="Filter by product"),
+    hours_ago: int = Query(24, description="Look back hours"),
+    x_internal_service: str = Header(None, description="Internal service authentication")
+):
+    """
+    Get recent orchestrator actions for alert context enrichment.
+    Only accessible by internal services (alert-intelligence).
+
+    Returns orchestration runs with details about POs created, batches adjusted, etc.
+    This helps the alert system understand if AI already addressed similar issues.
+    """
+    from shared.database.base import create_database_manager
+    from ..core.config import get_settings
+    from ..models.orchestration_run import OrchestrationRun, OrchestrationStatus
+    from sqlalchemy import select, and_, desc
+    import structlog
+
+    logger = structlog.get_logger()
+
+    # Simple internal service authentication
+    if x_internal_service != "alert-intelligence":
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    try:
+        settings = get_settings()
+        db_manager = create_database_manager(settings.DATABASE_URL, "orchestrator")
+
+        async with db_manager.get_session() as session:
+            cutoff_time = datetime.utcnow() - timedelta(hours=hours_ago)
+
+            # Query recent orchestration runs
+            query = select(OrchestrationRun).where(
+                and_(
+                    OrchestrationRun.tenant_id == UUID(tenant_id),
+                    OrchestrationRun.created_at >= cutoff_time,
+                    OrchestrationRun.status.in_([
+                        OrchestrationStatus.completed,
+                        OrchestrationStatus.partial_success
+                    ])
+                )
+            ).order_by(desc(OrchestrationRun.created_at))
+
+            result = await session.execute(query)
+            runs = result.scalars().all()
+
+            actions = []
+
+            for run in runs:
+                run_metadata = run.run_metadata or {}
+
+                # Add purchase order actions
+                if run.purchase_orders_created > 0:
+                    po_details = run_metadata.get('purchase_orders', [])
+
+                    # If metadata has PO details, use them
+                    if po_details:
+                        for po in po_details:
+                            # Filter by ingredient if specified
+                            if ingredient_id:
+                                po_items = po.get('items', [])
+                                has_ingredient = any(
+                                    item.get('ingredient_id') == ingredient_id
+                                    for item in po_items
+                                )
+                                if not has_ingredient:
+                                    continue
+
+                            actions.append(OrchestrationAction(
+                                id=po.get('id', str(run.id)),
+                                type="purchase_order",
+                                status=po.get('status', 'created'),
+                                delivery_date=po.get('delivery_date'),
+                                reasoning=run_metadata.get('reasoning'),
+                                estimated_resolution=po.get('delivery_date'),
+                                created_at=run.created_at
+                            ))
+                    else:
+                        # Fallback: create generic action from run
+                        actions.append(OrchestrationAction(
+                            id=str(run.id),
+                            type="purchase_order",
+                            status="created",
+                            delivery_date=None,
+                            reasoning=run_metadata.get('reasoning'),
+                            estimated_resolution=None,
+                            created_at=run.created_at
+                        ))
+
+                # Add production batch actions
+                if run.production_batches_created > 0:
+                    batch_details = run_metadata.get('production_batches', [])
+
+                    if batch_details:
+                        for batch in batch_details:
+                            # Filter by product if specified
+                            if product_id and batch.get('product_id') != product_id:
+                                continue
+
+                            actions.append(OrchestrationAction(
+                                id=batch.get('id', str(run.id)),
+                                type="production_batch",
+                                status=batch.get('status', 'created'),
+                                delivery_date=None,
+                                reasoning=run_metadata.get('reasoning'),
+                                estimated_resolution=batch.get('scheduled_date'),
+                                created_at=run.created_at
+                            ))
+                    else:
+                        # Fallback: create generic action from run
+                        if not product_id:  # Only add if no product filter
+                            actions.append(OrchestrationAction(
+                                id=str(run.id),
+                                type="production_batch",
+                                status="created",
+                                delivery_date=None,
+                                reasoning=run_metadata.get('reasoning'),
+                                estimated_resolution=None,
+                                created_at=run.created_at
+                            ))
+
+            logger.info(
+                "recent_actions_fetched",
+                tenant_id=tenant_id,
+                hours_ago=hours_ago,
+                action_count=len(actions),
+                ingredient_id=ingredient_id,
+                product_id=product_id
+            )
+
+            return RecentActionsResponse(
+                actions=actions,
+                count=len(actions)
+            )
+
+    except Exception as e:
+        logger.error("error_fetching_recent_actions", error=str(e), tenant_id=tenant_id)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to fetch recent actions: {str(e)}"
+        )
+
+
+@router.get("/health")
+async def internal_health():
+    """Internal health check"""
+    return {"status": "healthy", "api": "internal"}
--- a/services/orchestrator/app/api/internal_demo.py
+++ b/services/orchestrator/app/api/internal_demo.py
@@ -16,6 +16,12 @@ from app.models.orchestration_run import OrchestrationRun
 import uuid
 from datetime import datetime, timezone, timedelta
 from typing import Optional
+import sys
+from pathlib import Path
+
+# Add shared utilities to path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
+from shared.utils.demo_dates import adjust_date_for_demo, BASE_REFERENCE_DATE

 router = APIRouter()
 logger = structlog.get_logger()
@@ -24,6 +30,27 @@ logger = structlog.get_logger()
 INTERNAL_API_KEY = os.getenv("INTERNAL_API_KEY", "dev-internal-key-change-in-production")


+async def ensure_unique_run_number(db: AsyncSession, base_run_number: str) -> str:
+    """Ensure the run number is unique by appending a suffix if needed"""
+    proposed_run_number = base_run_number
+
+    # Check if the proposed run number already exists in the database
+    while True:
+        result = await db.execute(
+            select(OrchestrationRun)
+            .where(OrchestrationRun.run_number == proposed_run_number)
+        )
+        existing_run = result.scalar_one_or_none()
+
+        if not existing_run:
+            # Run number is unique, return it
+            return proposed_run_number
+
+        # Generate a new run number with an additional random suffix
+        random_suffix = str(uuid.uuid4())[:4].upper()
+        proposed_run_number = f"{base_run_number[:50-len(random_suffix)-1]}-{random_suffix}"
+
+
 def verify_internal_api_key(x_internal_api_key: str = Header(...)):
    """Verify internal API key for service-to-service communication"""
    if x_internal_api_key != INTERNAL_API_KEY:
@@ -86,38 +113,60 @@ async def clone_demo_data(

        # Clone each orchestration run with date adjustment
        for base_run in base_runs:
-            # Calculate time offset: how old was this run relative to when it was created
-            # We'll adjust all timestamps to be relative to the session creation time
+            # Use the shared date adjustment utility to ensure dates are always in the past
+            # This calculates the offset from BASE_REFERENCE_DATE and applies it to session creation time
            if base_run.started_at:
-                # Calculate how many days ago this run was from a reference point
-                # Use a fixed reference date for consistency
-                reference_date = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
-                time_offset = base_run.started_at - reference_date
-
-                # Apply this offset to the current reference time
-                new_started_at = reference_time + time_offset
+                new_started_at = adjust_date_for_demo(
+                    base_run.started_at, reference_time, BASE_REFERENCE_DATE
+                )
            else:
                new_started_at = reference_time - timedelta(hours=2)

-            # Adjust completed_at if it exists
-            if base_run.completed_at and base_run.started_at:
-                duration = base_run.completed_at - base_run.started_at
-                new_completed_at = new_started_at + duration
+            # Adjust completed_at using the same utility
+            if base_run.completed_at:
+                new_completed_at = adjust_date_for_demo(
+                    base_run.completed_at, reference_time, BASE_REFERENCE_DATE
+                )
+                # Ensure completion is after start (in case of edge cases)
+                if new_completed_at and new_started_at and new_completed_at < new_started_at:
+                    # Preserve original duration
+                    duration = base_run.completed_at - base_run.started_at
+                    new_completed_at = new_started_at + duration
            else:
                new_completed_at = None

-            # Adjust all step timestamps proportionally
+            # Adjust all step timestamps using the shared utility
            def adjust_timestamp(original_timestamp):
-                if not original_timestamp or not base_run.started_at:
+                if not original_timestamp:
                    return None
-                step_offset = original_timestamp - base_run.started_at
-                return new_started_at + step_offset
+                return adjust_date_for_demo(original_timestamp, reference_time, BASE_REFERENCE_DATE)

            # Create new orchestration run for virtual tenant
+            # Update run_number to have current year instead of original year, and make it unique
+            current_year = reference_time.year
+            # Extract type from original run number and create new format
+            parts = base_run.run_number.split('-')
+            if len(parts) >= 4:
+                tenant_prefix = parts[1] if len(parts) > 1 else "DEMO"
+                type_code = parts[2] if len(parts) > 2 else "TST"
+                original_index = parts[3] if len(parts) > 3 else "001"
+
+                # Generate a more robust unique suffix to avoid collisions
+                # Use UUID instead of just session_id substring to ensure uniqueness
+                unique_suffix = str(uuid.uuid4())[:8].upper()
+                proposed_run_number = f"ORCH-{tenant_prefix}-{type_code}-{current_year}-{original_index}-{unique_suffix}"
+            else:
+                unique_suffix = str(uuid.uuid4())[:12].upper()
+                proposed_run_number = f"{base_run.run_number}-{unique_suffix}"
+
+            # Ensure the run number is truly unique by checking against existing entries
+            # This prevents collisions especially in high-concurrency scenarios
+            run_number = await ensure_unique_run_number(db, proposed_run_number)
+
            new_run = OrchestrationRun(
                id=uuid.uuid4(),
                tenant_id=virtual_uuid,
-                run_number=f"{base_run.run_number}-DEMO",
+                run_number=run_number,
                status=base_run.status,
                run_type=base_run.run_type,
                priority=base_run.priority,