445 lines
19 KiB
Python
445 lines
19 KiB
Python
"""
|
|
Network Alerts API
|
|
Endpoints for aggregating and managing alerts across enterprise networks
|
|
"""
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from typing import List, Dict, Any, Optional
|
|
from datetime import datetime
|
|
from pydantic import BaseModel, Field
|
|
import structlog
|
|
|
|
from app.services.network_alerts_service import NetworkAlertsService
|
|
from shared.auth.tenant_access import verify_tenant_permission_dep
|
|
from shared.clients import get_tenant_client, get_alerts_client
|
|
from app.core.config import settings
|
|
|
|
logger = structlog.get_logger()
|
|
router = APIRouter()
|
|
|
|
|
|
# Pydantic models for request/response
|
|
class NetworkAlert(BaseModel):
|
|
alert_id: str = Field(..., description="Unique alert ID")
|
|
tenant_id: str = Field(..., description="Tenant ID where alert originated")
|
|
tenant_name: str = Field(..., description="Tenant name")
|
|
alert_type: str = Field(..., description="Type of alert: inventory, production, delivery, etc.")
|
|
severity: str = Field(..., description="Severity: critical, high, medium, low")
|
|
title: str = Field(..., description="Alert title")
|
|
message: str = Field(..., description="Alert message")
|
|
timestamp: str = Field(..., description="Alert timestamp")
|
|
status: str = Field(..., description="Alert status: active, acknowledged, resolved")
|
|
source_system: str = Field(..., description="System that generated the alert")
|
|
related_entity_id: Optional[str] = Field(None, description="ID of related entity (product, route, etc.)")
|
|
related_entity_type: Optional[str] = Field(None, description="Type of related entity")
|
|
|
|
|
|
class AlertSeveritySummary(BaseModel):
|
|
critical_count: int = Field(..., description="Number of critical alerts")
|
|
high_count: int = Field(..., description="Number of high severity alerts")
|
|
medium_count: int = Field(..., description="Number of medium severity alerts")
|
|
low_count: int = Field(..., description="Number of low severity alerts")
|
|
total_alerts: int = Field(..., description="Total number of alerts")
|
|
|
|
|
|
class AlertTypeSummary(BaseModel):
|
|
inventory_alerts: int = Field(..., description="Inventory-related alerts")
|
|
production_alerts: int = Field(..., description="Production-related alerts")
|
|
delivery_alerts: int = Field(..., description="Delivery-related alerts")
|
|
equipment_alerts: int = Field(..., description="Equipment-related alerts")
|
|
quality_alerts: int = Field(..., description="Quality-related alerts")
|
|
other_alerts: int = Field(..., description="Other types of alerts")
|
|
|
|
|
|
class NetworkAlertsSummary(BaseModel):
|
|
total_alerts: int = Field(..., description="Total alerts across network")
|
|
active_alerts: int = Field(..., description="Currently active alerts")
|
|
acknowledged_alerts: int = Field(..., description="Acknowledged alerts")
|
|
resolved_alerts: int = Field(..., description="Resolved alerts")
|
|
severity_summary: AlertSeveritySummary = Field(..., description="Alerts by severity")
|
|
type_summary: AlertTypeSummary = Field(..., description="Alerts by type")
|
|
most_recent_alert: Optional[NetworkAlert] = Field(None, description="Most recent alert")
|
|
|
|
|
|
class AlertCorrelation(BaseModel):
|
|
correlation_id: str = Field(..., description="Correlation group ID")
|
|
primary_alert: NetworkAlert = Field(..., description="Primary alert in the group")
|
|
related_alerts: List[NetworkAlert] = Field(..., description="Alerts correlated with primary alert")
|
|
correlation_type: str = Field(..., description="Type of correlation: causal, temporal, spatial")
|
|
correlation_strength: float = Field(..., description="Correlation strength (0-1)")
|
|
impact_analysis: str = Field(..., description="Analysis of combined impact")
|
|
|
|
|
|
async def get_network_alerts_service() -> NetworkAlertsService:
|
|
"""Dependency injection for NetworkAlertsService"""
|
|
tenant_client = get_tenant_client(settings, "tenant-service")
|
|
alerts_client = get_alerts_client(settings, "tenant-service")
|
|
return NetworkAlertsService(tenant_client, alerts_client)
|
|
|
|
|
|
@router.get("/tenants/{parent_id}/network/alerts",
|
|
response_model=List[NetworkAlert],
|
|
summary="Get aggregated alerts across network")
|
|
async def get_network_alerts(
|
|
parent_id: str,
|
|
severity: Optional[str] = Query(None, description="Filter by severity: critical, high, medium, low"),
|
|
alert_type: Optional[str] = Query(None, description="Filter by alert type"),
|
|
status: Optional[str] = Query(None, description="Filter by status: active, acknowledged, resolved"),
|
|
limit: int = Query(100, description="Maximum number of alerts to return"),
|
|
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
|
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
|
):
|
|
"""
|
|
Get aggregated alerts across all child tenants in a parent network
|
|
|
|
This endpoint provides a unified view of alerts across the entire enterprise network,
|
|
enabling network managers to identify and prioritize issues that require attention.
|
|
"""
|
|
try:
|
|
# Verify this is a parent tenant
|
|
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
|
if tenant_info.get('tenant_type') != 'parent':
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Only parent tenants can access network alerts"
|
|
)
|
|
|
|
# Get all child tenants
|
|
child_tenants = await network_alerts_service.get_child_tenants(parent_id)
|
|
|
|
if not child_tenants:
|
|
return []
|
|
|
|
# Aggregate alerts from all child tenants
|
|
all_alerts = []
|
|
|
|
for child in child_tenants:
|
|
child_id = child['id']
|
|
child_name = child['name']
|
|
|
|
# Get alerts for this child tenant
|
|
child_alerts = await network_alerts_service.get_alerts_for_tenant(child_id)
|
|
|
|
# Enrich with tenant information and apply filters
|
|
for alert in child_alerts:
|
|
enriched_alert = {
|
|
'alert_id': alert.get('alert_id', str(uuid.uuid4())),
|
|
'tenant_id': child_id,
|
|
'tenant_name': child_name,
|
|
'alert_type': alert.get('alert_type', 'unknown'),
|
|
'severity': alert.get('severity', 'medium'),
|
|
'title': alert.get('title', 'No title'),
|
|
'message': alert.get('message', 'No message'),
|
|
'timestamp': alert.get('timestamp', datetime.now().isoformat()),
|
|
'status': alert.get('status', 'active'),
|
|
'source_system': alert.get('source_system', 'unknown'),
|
|
'related_entity_id': alert.get('related_entity_id'),
|
|
'related_entity_type': alert.get('related_entity_type')
|
|
}
|
|
|
|
# Apply filters
|
|
if severity and enriched_alert['severity'] != severity:
|
|
continue
|
|
if alert_type and enriched_alert['alert_type'] != alert_type:
|
|
continue
|
|
if status and enriched_alert['status'] != status:
|
|
continue
|
|
|
|
all_alerts.append(enriched_alert)
|
|
|
|
# Sort by severity (critical first) and timestamp (newest first)
|
|
severity_order = {'critical': 1, 'high': 2, 'medium': 3, 'low': 4}
|
|
all_alerts.sort(key=lambda x: (severity_order.get(x['severity'], 5), -int(x['timestamp'] or 0)))
|
|
|
|
return all_alerts[:limit]
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to get network alerts", parent_id=parent_id, error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Failed to get network alerts: {str(e)}")
|
|
|
|
|
|
@router.get("/tenants/{parent_id}/network/alerts/summary",
|
|
response_model=NetworkAlertsSummary,
|
|
summary="Get network alerts summary")
|
|
async def get_network_alerts_summary(
|
|
parent_id: str,
|
|
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
|
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
|
):
|
|
"""
|
|
Get summary of alerts across the network
|
|
|
|
Provides aggregated metrics and statistics about alerts across all child tenants.
|
|
"""
|
|
try:
|
|
# Verify this is a parent tenant
|
|
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
|
if tenant_info.get('tenant_type') != 'parent':
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Only parent tenants can access network alerts summary"
|
|
)
|
|
|
|
# Get all network alerts
|
|
all_alerts = await network_alerts_service.get_network_alerts(parent_id)
|
|
|
|
if not all_alerts:
|
|
return NetworkAlertsSummary(
|
|
total_alerts=0,
|
|
active_alerts=0,
|
|
acknowledged_alerts=0,
|
|
resolved_alerts=0,
|
|
severity_summary=AlertSeveritySummary(
|
|
critical_count=0,
|
|
high_count=0,
|
|
medium_count=0,
|
|
low_count=0,
|
|
total_alerts=0
|
|
),
|
|
type_summary=AlertTypeSummary(
|
|
inventory_alerts=0,
|
|
production_alerts=0,
|
|
delivery_alerts=0,
|
|
equipment_alerts=0,
|
|
quality_alerts=0,
|
|
other_alerts=0
|
|
),
|
|
most_recent_alert=None
|
|
)
|
|
|
|
# Calculate summary metrics
|
|
active_alerts = sum(1 for a in all_alerts if a['status'] == 'active')
|
|
acknowledged_alerts = sum(1 for a in all_alerts if a['status'] == 'acknowledged')
|
|
resolved_alerts = sum(1 for a in all_alerts if a['status'] == 'resolved')
|
|
|
|
# Calculate severity summary
|
|
severity_summary = AlertSeveritySummary(
|
|
critical_count=sum(1 for a in all_alerts if a['severity'] == 'critical'),
|
|
high_count=sum(1 for a in all_alerts if a['severity'] == 'high'),
|
|
medium_count=sum(1 for a in all_alerts if a['severity'] == 'medium'),
|
|
low_count=sum(1 for a in all_alerts if a['severity'] == 'low'),
|
|
total_alerts=len(all_alerts)
|
|
)
|
|
|
|
# Calculate type summary
|
|
type_summary = AlertTypeSummary(
|
|
inventory_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'inventory'),
|
|
production_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'production'),
|
|
delivery_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'delivery'),
|
|
equipment_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'equipment'),
|
|
quality_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'quality'),
|
|
other_alerts=sum(1 for a in all_alerts if a['alert_type'] not in ['inventory', 'production', 'delivery', 'equipment', 'quality'])
|
|
)
|
|
|
|
# Get most recent alert
|
|
most_recent_alert = None
|
|
if all_alerts:
|
|
most_recent_alert = max(all_alerts, key=lambda x: x['timestamp'])
|
|
|
|
return NetworkAlertsSummary(
|
|
total_alerts=len(all_alerts),
|
|
active_alerts=active_alerts,
|
|
acknowledged_alerts=acknowledged_alerts,
|
|
resolved_alerts=resolved_alerts,
|
|
severity_summary=severity_summary,
|
|
type_summary=type_summary,
|
|
most_recent_alert=most_recent_alert
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to get network alerts summary", parent_id=parent_id, error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Failed to get alerts summary: {str(e)}")
|
|
|
|
|
|
@router.get("/tenants/{parent_id}/network/alerts/correlations",
|
|
response_model=List[AlertCorrelation],
|
|
summary="Get correlated alert groups")
|
|
async def get_correlated_alerts(
|
|
parent_id: str,
|
|
min_correlation_strength: float = Query(0.7, ge=0.5, le=1.0, description="Minimum correlation strength"),
|
|
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
|
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
|
):
|
|
"""
|
|
Get groups of correlated alerts
|
|
|
|
Identifies alerts that are related or have cascading effects across the network.
|
|
"""
|
|
try:
|
|
# Verify this is a parent tenant
|
|
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
|
if tenant_info.get('tenant_type') != 'parent':
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Only parent tenants can access alert correlations"
|
|
)
|
|
|
|
# Get all network alerts
|
|
all_alerts = await network_alerts_service.get_network_alerts(parent_id)
|
|
|
|
if not all_alerts:
|
|
return []
|
|
|
|
# Detect correlations (simplified for demo)
|
|
correlations = await network_alerts_service.detect_alert_correlations(
|
|
all_alerts, min_correlation_strength
|
|
)
|
|
|
|
return correlations
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to get correlated alerts", parent_id=parent_id, error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Failed to get alert correlations: {str(e)}")
|
|
|
|
|
|
@router.post("/tenants/{parent_id}/network/alerts/{alert_id}/acknowledge",
|
|
summary="Acknowledge network alert")
|
|
async def acknowledge_network_alert(
|
|
parent_id: str,
|
|
alert_id: str,
|
|
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
|
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
|
):
|
|
"""
|
|
Acknowledge a network alert
|
|
|
|
Marks an alert as acknowledged to indicate it's being addressed.
|
|
"""
|
|
try:
|
|
# Verify this is a parent tenant
|
|
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
|
if tenant_info.get('tenant_type') != 'parent':
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Only parent tenants can acknowledge network alerts"
|
|
)
|
|
|
|
# Acknowledge the alert
|
|
result = await network_alerts_service.acknowledge_alert(parent_id, alert_id)
|
|
|
|
return {
|
|
'success': True,
|
|
'alert_id': alert_id,
|
|
'status': 'acknowledged',
|
|
'message': 'Alert acknowledged successfully'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to acknowledge alert", parent_id=parent_id, alert_id=alert_id, error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Failed to acknowledge alert: {str(e)}")
|
|
|
|
|
|
@router.post("/tenants/{parent_id}/network/alerts/{alert_id}/resolve",
|
|
summary="Resolve network alert")
|
|
async def resolve_network_alert(
|
|
parent_id: str,
|
|
alert_id: str,
|
|
resolution_notes: Optional[str] = Query(None, description="Notes about resolution"),
|
|
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
|
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
|
):
|
|
"""
|
|
Resolve a network alert
|
|
|
|
Marks an alert as resolved after the issue has been addressed.
|
|
"""
|
|
try:
|
|
# Verify this is a parent tenant
|
|
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
|
if tenant_info.get('tenant_type') != 'parent':
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Only parent tenants can resolve network alerts"
|
|
)
|
|
|
|
# Resolve the alert
|
|
result = await network_alerts_service.resolve_alert(parent_id, alert_id, resolution_notes)
|
|
|
|
return {
|
|
'success': True,
|
|
'alert_id': alert_id,
|
|
'status': 'resolved',
|
|
'resolution_notes': resolution_notes,
|
|
'message': 'Alert resolved successfully'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to resolve alert", parent_id=parent_id, alert_id=alert_id, error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Failed to resolve alert: {str(e)}")
|
|
|
|
|
|
@router.get("/tenants/{parent_id}/network/alerts/trends",
|
|
summary="Get alert trends over time")
|
|
async def get_alert_trends(
|
|
parent_id: str,
|
|
days: int = Query(30, ge=7, le=365, description="Number of days to analyze"),
|
|
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
|
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
|
):
|
|
"""
|
|
Get alert trends over time
|
|
|
|
Analyzes how alert patterns change over time to identify systemic issues.
|
|
"""
|
|
try:
|
|
# Verify this is a parent tenant
|
|
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
|
if tenant_info.get('tenant_type') != 'parent':
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Only parent tenants can access alert trends"
|
|
)
|
|
|
|
# Get alert trends
|
|
trends = await network_alerts_service.get_alert_trends(parent_id, days)
|
|
|
|
return {
|
|
'success': True,
|
|
'trends': trends,
|
|
'period': f'Last {days} days'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to get alert trends", parent_id=parent_id, error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Failed to get alert trends: {str(e)}")
|
|
|
|
|
|
@router.get("/tenants/{parent_id}/network/alerts/prioritization",
|
|
summary="Get prioritized alerts")
|
|
async def get_prioritized_alerts(
|
|
parent_id: str,
|
|
limit: int = Query(10, description="Maximum number of alerts to return"),
|
|
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
|
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
|
):
|
|
"""
|
|
Get prioritized alerts based on impact and urgency
|
|
|
|
Uses AI to prioritize alerts based on potential business impact and urgency.
|
|
"""
|
|
try:
|
|
# Verify this is a parent tenant
|
|
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
|
if tenant_info.get('tenant_type') != 'parent':
|
|
raise HTTPException(
|
|
status_code=403,
|
|
detail="Only parent tenants can access prioritized alerts"
|
|
)
|
|
|
|
# Get prioritized alerts
|
|
prioritized_alerts = await network_alerts_service.get_prioritized_alerts(parent_id, limit)
|
|
|
|
return {
|
|
'success': True,
|
|
'prioritized_alerts': prioritized_alerts,
|
|
'message': f'Top {len(prioritized_alerts)} prioritized alerts'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to get prioritized alerts", parent_id=parent_id, error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Failed to get prioritized alerts: {str(e)}")
|
|
|
|
|
|
# Import datetime at runtime to avoid circular imports
|
|
from datetime import datetime, timedelta
|
|
import uuid |