Imporve enterprise
This commit is contained in:
445
services/tenant/app/api/network_alerts.py
Normal file
445
services/tenant/app/api/network_alerts.py
Normal file
@@ -0,0 +1,445 @@
|
||||
"""
|
||||
Network Alerts API
|
||||
Endpoints for aggregating and managing alerts across enterprise networks
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field
|
||||
import structlog
|
||||
|
||||
from app.services.network_alerts_service import NetworkAlertsService
|
||||
from shared.auth.tenant_access import verify_tenant_permission_dep
|
||||
from shared.clients import get_tenant_client, get_alerts_client
|
||||
from app.core.config import settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# Pydantic models for request/response
|
||||
class NetworkAlert(BaseModel):
|
||||
alert_id: str = Field(..., description="Unique alert ID")
|
||||
tenant_id: str = Field(..., description="Tenant ID where alert originated")
|
||||
tenant_name: str = Field(..., description="Tenant name")
|
||||
alert_type: str = Field(..., description="Type of alert: inventory, production, delivery, etc.")
|
||||
severity: str = Field(..., description="Severity: critical, high, medium, low")
|
||||
title: str = Field(..., description="Alert title")
|
||||
message: str = Field(..., description="Alert message")
|
||||
timestamp: str = Field(..., description="Alert timestamp")
|
||||
status: str = Field(..., description="Alert status: active, acknowledged, resolved")
|
||||
source_system: str = Field(..., description="System that generated the alert")
|
||||
related_entity_id: Optional[str] = Field(None, description="ID of related entity (product, route, etc.)")
|
||||
related_entity_type: Optional[str] = Field(None, description="Type of related entity")
|
||||
|
||||
|
||||
class AlertSeveritySummary(BaseModel):
|
||||
critical_count: int = Field(..., description="Number of critical alerts")
|
||||
high_count: int = Field(..., description="Number of high severity alerts")
|
||||
medium_count: int = Field(..., description="Number of medium severity alerts")
|
||||
low_count: int = Field(..., description="Number of low severity alerts")
|
||||
total_alerts: int = Field(..., description="Total number of alerts")
|
||||
|
||||
|
||||
class AlertTypeSummary(BaseModel):
|
||||
inventory_alerts: int = Field(..., description="Inventory-related alerts")
|
||||
production_alerts: int = Field(..., description="Production-related alerts")
|
||||
delivery_alerts: int = Field(..., description="Delivery-related alerts")
|
||||
equipment_alerts: int = Field(..., description="Equipment-related alerts")
|
||||
quality_alerts: int = Field(..., description="Quality-related alerts")
|
||||
other_alerts: int = Field(..., description="Other types of alerts")
|
||||
|
||||
|
||||
class NetworkAlertsSummary(BaseModel):
|
||||
total_alerts: int = Field(..., description="Total alerts across network")
|
||||
active_alerts: int = Field(..., description="Currently active alerts")
|
||||
acknowledged_alerts: int = Field(..., description="Acknowledged alerts")
|
||||
resolved_alerts: int = Field(..., description="Resolved alerts")
|
||||
severity_summary: AlertSeveritySummary = Field(..., description="Alerts by severity")
|
||||
type_summary: AlertTypeSummary = Field(..., description="Alerts by type")
|
||||
most_recent_alert: Optional[NetworkAlert] = Field(None, description="Most recent alert")
|
||||
|
||||
|
||||
class AlertCorrelation(BaseModel):
|
||||
correlation_id: str = Field(..., description="Correlation group ID")
|
||||
primary_alert: NetworkAlert = Field(..., description="Primary alert in the group")
|
||||
related_alerts: List[NetworkAlert] = Field(..., description="Alerts correlated with primary alert")
|
||||
correlation_type: str = Field(..., description="Type of correlation: causal, temporal, spatial")
|
||||
correlation_strength: float = Field(..., description="Correlation strength (0-1)")
|
||||
impact_analysis: str = Field(..., description="Analysis of combined impact")
|
||||
|
||||
|
||||
async def get_network_alerts_service() -> NetworkAlertsService:
|
||||
"""Dependency injection for NetworkAlertsService"""
|
||||
tenant_client = get_tenant_client(settings, "tenant-service")
|
||||
alerts_client = get_alerts_client(settings, "tenant-service")
|
||||
return NetworkAlertsService(tenant_client, alerts_client)
|
||||
|
||||
|
||||
@router.get("/tenants/{parent_id}/network/alerts",
|
||||
response_model=List[NetworkAlert],
|
||||
summary="Get aggregated alerts across network")
|
||||
async def get_network_alerts(
|
||||
parent_id: str,
|
||||
severity: Optional[str] = Query(None, description="Filter by severity: critical, high, medium, low"),
|
||||
alert_type: Optional[str] = Query(None, description="Filter by alert type"),
|
||||
status: Optional[str] = Query(None, description="Filter by status: active, acknowledged, resolved"),
|
||||
limit: int = Query(100, description="Maximum number of alerts to return"),
|
||||
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
||||
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
||||
):
|
||||
"""
|
||||
Get aggregated alerts across all child tenants in a parent network
|
||||
|
||||
This endpoint provides a unified view of alerts across the entire enterprise network,
|
||||
enabling network managers to identify and prioritize issues that require attention.
|
||||
"""
|
||||
try:
|
||||
# Verify this is a parent tenant
|
||||
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
||||
if tenant_info.get('tenant_type') != 'parent':
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Only parent tenants can access network alerts"
|
||||
)
|
||||
|
||||
# Get all child tenants
|
||||
child_tenants = await network_alerts_service.get_child_tenants(parent_id)
|
||||
|
||||
if not child_tenants:
|
||||
return []
|
||||
|
||||
# Aggregate alerts from all child tenants
|
||||
all_alerts = []
|
||||
|
||||
for child in child_tenants:
|
||||
child_id = child['id']
|
||||
child_name = child['name']
|
||||
|
||||
# Get alerts for this child tenant
|
||||
child_alerts = await network_alerts_service.get_alerts_for_tenant(child_id)
|
||||
|
||||
# Enrich with tenant information and apply filters
|
||||
for alert in child_alerts:
|
||||
enriched_alert = {
|
||||
'alert_id': alert.get('alert_id', str(uuid.uuid4())),
|
||||
'tenant_id': child_id,
|
||||
'tenant_name': child_name,
|
||||
'alert_type': alert.get('alert_type', 'unknown'),
|
||||
'severity': alert.get('severity', 'medium'),
|
||||
'title': alert.get('title', 'No title'),
|
||||
'message': alert.get('message', 'No message'),
|
||||
'timestamp': alert.get('timestamp', datetime.now().isoformat()),
|
||||
'status': alert.get('status', 'active'),
|
||||
'source_system': alert.get('source_system', 'unknown'),
|
||||
'related_entity_id': alert.get('related_entity_id'),
|
||||
'related_entity_type': alert.get('related_entity_type')
|
||||
}
|
||||
|
||||
# Apply filters
|
||||
if severity and enriched_alert['severity'] != severity:
|
||||
continue
|
||||
if alert_type and enriched_alert['alert_type'] != alert_type:
|
||||
continue
|
||||
if status and enriched_alert['status'] != status:
|
||||
continue
|
||||
|
||||
all_alerts.append(enriched_alert)
|
||||
|
||||
# Sort by severity (critical first) and timestamp (newest first)
|
||||
severity_order = {'critical': 1, 'high': 2, 'medium': 3, 'low': 4}
|
||||
all_alerts.sort(key=lambda x: (severity_order.get(x['severity'], 5), -int(x['timestamp'] or 0)))
|
||||
|
||||
return all_alerts[:limit]
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get network alerts", parent_id=parent_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get network alerts: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/tenants/{parent_id}/network/alerts/summary",
|
||||
response_model=NetworkAlertsSummary,
|
||||
summary="Get network alerts summary")
|
||||
async def get_network_alerts_summary(
|
||||
parent_id: str,
|
||||
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
||||
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
||||
):
|
||||
"""
|
||||
Get summary of alerts across the network
|
||||
|
||||
Provides aggregated metrics and statistics about alerts across all child tenants.
|
||||
"""
|
||||
try:
|
||||
# Verify this is a parent tenant
|
||||
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
||||
if tenant_info.get('tenant_type') != 'parent':
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Only parent tenants can access network alerts summary"
|
||||
)
|
||||
|
||||
# Get all network alerts
|
||||
all_alerts = await network_alerts_service.get_network_alerts(parent_id)
|
||||
|
||||
if not all_alerts:
|
||||
return NetworkAlertsSummary(
|
||||
total_alerts=0,
|
||||
active_alerts=0,
|
||||
acknowledged_alerts=0,
|
||||
resolved_alerts=0,
|
||||
severity_summary=AlertSeveritySummary(
|
||||
critical_count=0,
|
||||
high_count=0,
|
||||
medium_count=0,
|
||||
low_count=0,
|
||||
total_alerts=0
|
||||
),
|
||||
type_summary=AlertTypeSummary(
|
||||
inventory_alerts=0,
|
||||
production_alerts=0,
|
||||
delivery_alerts=0,
|
||||
equipment_alerts=0,
|
||||
quality_alerts=0,
|
||||
other_alerts=0
|
||||
),
|
||||
most_recent_alert=None
|
||||
)
|
||||
|
||||
# Calculate summary metrics
|
||||
active_alerts = sum(1 for a in all_alerts if a['status'] == 'active')
|
||||
acknowledged_alerts = sum(1 for a in all_alerts if a['status'] == 'acknowledged')
|
||||
resolved_alerts = sum(1 for a in all_alerts if a['status'] == 'resolved')
|
||||
|
||||
# Calculate severity summary
|
||||
severity_summary = AlertSeveritySummary(
|
||||
critical_count=sum(1 for a in all_alerts if a['severity'] == 'critical'),
|
||||
high_count=sum(1 for a in all_alerts if a['severity'] == 'high'),
|
||||
medium_count=sum(1 for a in all_alerts if a['severity'] == 'medium'),
|
||||
low_count=sum(1 for a in all_alerts if a['severity'] == 'low'),
|
||||
total_alerts=len(all_alerts)
|
||||
)
|
||||
|
||||
# Calculate type summary
|
||||
type_summary = AlertTypeSummary(
|
||||
inventory_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'inventory'),
|
||||
production_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'production'),
|
||||
delivery_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'delivery'),
|
||||
equipment_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'equipment'),
|
||||
quality_alerts=sum(1 for a in all_alerts if a['alert_type'] == 'quality'),
|
||||
other_alerts=sum(1 for a in all_alerts if a['alert_type'] not in ['inventory', 'production', 'delivery', 'equipment', 'quality'])
|
||||
)
|
||||
|
||||
# Get most recent alert
|
||||
most_recent_alert = None
|
||||
if all_alerts:
|
||||
most_recent_alert = max(all_alerts, key=lambda x: x['timestamp'])
|
||||
|
||||
return NetworkAlertsSummary(
|
||||
total_alerts=len(all_alerts),
|
||||
active_alerts=active_alerts,
|
||||
acknowledged_alerts=acknowledged_alerts,
|
||||
resolved_alerts=resolved_alerts,
|
||||
severity_summary=severity_summary,
|
||||
type_summary=type_summary,
|
||||
most_recent_alert=most_recent_alert
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get network alerts summary", parent_id=parent_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get alerts summary: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/tenants/{parent_id}/network/alerts/correlations",
|
||||
response_model=List[AlertCorrelation],
|
||||
summary="Get correlated alert groups")
|
||||
async def get_correlated_alerts(
|
||||
parent_id: str,
|
||||
min_correlation_strength: float = Query(0.7, ge=0.5, le=1.0, description="Minimum correlation strength"),
|
||||
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
||||
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
||||
):
|
||||
"""
|
||||
Get groups of correlated alerts
|
||||
|
||||
Identifies alerts that are related or have cascading effects across the network.
|
||||
"""
|
||||
try:
|
||||
# Verify this is a parent tenant
|
||||
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
||||
if tenant_info.get('tenant_type') != 'parent':
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Only parent tenants can access alert correlations"
|
||||
)
|
||||
|
||||
# Get all network alerts
|
||||
all_alerts = await network_alerts_service.get_network_alerts(parent_id)
|
||||
|
||||
if not all_alerts:
|
||||
return []
|
||||
|
||||
# Detect correlations (simplified for demo)
|
||||
correlations = await network_alerts_service.detect_alert_correlations(
|
||||
all_alerts, min_correlation_strength
|
||||
)
|
||||
|
||||
return correlations
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get correlated alerts", parent_id=parent_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get alert correlations: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/tenants/{parent_id}/network/alerts/{alert_id}/acknowledge",
|
||||
summary="Acknowledge network alert")
|
||||
async def acknowledge_network_alert(
|
||||
parent_id: str,
|
||||
alert_id: str,
|
||||
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
||||
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
||||
):
|
||||
"""
|
||||
Acknowledge a network alert
|
||||
|
||||
Marks an alert as acknowledged to indicate it's being addressed.
|
||||
"""
|
||||
try:
|
||||
# Verify this is a parent tenant
|
||||
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
||||
if tenant_info.get('tenant_type') != 'parent':
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Only parent tenants can acknowledge network alerts"
|
||||
)
|
||||
|
||||
# Acknowledge the alert
|
||||
result = await network_alerts_service.acknowledge_alert(parent_id, alert_id)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'alert_id': alert_id,
|
||||
'status': 'acknowledged',
|
||||
'message': 'Alert acknowledged successfully'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to acknowledge alert", parent_id=parent_id, alert_id=alert_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to acknowledge alert: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/tenants/{parent_id}/network/alerts/{alert_id}/resolve",
|
||||
summary="Resolve network alert")
|
||||
async def resolve_network_alert(
|
||||
parent_id: str,
|
||||
alert_id: str,
|
||||
resolution_notes: Optional[str] = Query(None, description="Notes about resolution"),
|
||||
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
||||
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
||||
):
|
||||
"""
|
||||
Resolve a network alert
|
||||
|
||||
Marks an alert as resolved after the issue has been addressed.
|
||||
"""
|
||||
try:
|
||||
# Verify this is a parent tenant
|
||||
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
||||
if tenant_info.get('tenant_type') != 'parent':
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Only parent tenants can resolve network alerts"
|
||||
)
|
||||
|
||||
# Resolve the alert
|
||||
result = await network_alerts_service.resolve_alert(parent_id, alert_id, resolution_notes)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'alert_id': alert_id,
|
||||
'status': 'resolved',
|
||||
'resolution_notes': resolution_notes,
|
||||
'message': 'Alert resolved successfully'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to resolve alert", parent_id=parent_id, alert_id=alert_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to resolve alert: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/tenants/{parent_id}/network/alerts/trends",
|
||||
summary="Get alert trends over time")
|
||||
async def get_alert_trends(
|
||||
parent_id: str,
|
||||
days: int = Query(30, ge=7, le=365, description="Number of days to analyze"),
|
||||
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
||||
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
||||
):
|
||||
"""
|
||||
Get alert trends over time
|
||||
|
||||
Analyzes how alert patterns change over time to identify systemic issues.
|
||||
"""
|
||||
try:
|
||||
# Verify this is a parent tenant
|
||||
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
||||
if tenant_info.get('tenant_type') != 'parent':
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Only parent tenants can access alert trends"
|
||||
)
|
||||
|
||||
# Get alert trends
|
||||
trends = await network_alerts_service.get_alert_trends(parent_id, days)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'trends': trends,
|
||||
'period': f'Last {days} days'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get alert trends", parent_id=parent_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get alert trends: {str(e)}")
|
||||
|
||||
|
||||
@router.get("/tenants/{parent_id}/network/alerts/prioritization",
|
||||
summary="Get prioritized alerts")
|
||||
async def get_prioritized_alerts(
|
||||
parent_id: str,
|
||||
limit: int = Query(10, description="Maximum number of alerts to return"),
|
||||
network_alerts_service: NetworkAlertsService = Depends(get_network_alerts_service),
|
||||
verified_tenant: str = Depends(verify_tenant_permission_dep)
|
||||
):
|
||||
"""
|
||||
Get prioritized alerts based on impact and urgency
|
||||
|
||||
Uses AI to prioritize alerts based on potential business impact and urgency.
|
||||
"""
|
||||
try:
|
||||
# Verify this is a parent tenant
|
||||
tenant_info = await network_alerts_service.tenant_client.get_tenant(parent_id)
|
||||
if tenant_info.get('tenant_type') != 'parent':
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="Only parent tenants can access prioritized alerts"
|
||||
)
|
||||
|
||||
# Get prioritized alerts
|
||||
prioritized_alerts = await network_alerts_service.get_prioritized_alerts(parent_id, limit)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'prioritized_alerts': prioritized_alerts,
|
||||
'message': f'Top {len(prioritized_alerts)} prioritized alerts'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get prioritized alerts", parent_id=parent_id, error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get prioritized alerts: {str(e)}")
|
||||
|
||||
|
||||
# Import datetime at runtime to avoid circular imports
|
||||
from datetime import datetime, timedelta
|
||||
import uuid
|
||||
@@ -7,7 +7,7 @@ from fastapi import FastAPI
|
||||
from sqlalchemy import text
|
||||
from app.core.config import settings
|
||||
from app.core.database import database_manager
|
||||
from app.api import tenants, tenant_members, tenant_operations, webhooks, plans, subscription, tenant_settings, whatsapp_admin, usage_forecast, enterprise_upgrade, tenant_locations, tenant_hierarchy, internal_demo
|
||||
from app.api import tenants, tenant_members, tenant_operations, webhooks, plans, subscription, tenant_settings, whatsapp_admin, usage_forecast, enterprise_upgrade, tenant_locations, tenant_hierarchy, internal_demo, network_alerts
|
||||
from shared.service_base import StandardFastAPIService
|
||||
|
||||
|
||||
@@ -157,6 +157,7 @@ service.add_router(tenant_locations.router, tags=["tenant-locations"]) # Tenant
|
||||
service.add_router(internal_demo.router, tags=["internal-demo"]) # Internal demo data cloning
|
||||
service.add_router(tenant_hierarchy.router, tags=["tenant-hierarchy"]) # Tenant hierarchy endpoints
|
||||
service.add_router(internal_demo.router, tags=["internal-demo"]) # Internal demo data cloning
|
||||
service.add_router(network_alerts.router, tags=["network-alerts"]) # Network alerts aggregation endpoints
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
365
services/tenant/app/services/network_alerts_service.py
Normal file
365
services/tenant/app/services/network_alerts_service.py
Normal file
@@ -0,0 +1,365 @@
|
||||
# services/tenant/app/services/network_alerts_service.py
|
||||
"""
|
||||
Network Alerts Service
|
||||
Business logic for aggregating and managing alerts across enterprise networks
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import uuid
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class NetworkAlertsService:
|
||||
"""
|
||||
Service for aggregating and managing alerts across enterprise networks
|
||||
"""
|
||||
|
||||
def __init__(self, tenant_client, alerts_client):
|
||||
self.tenant_client = tenant_client
|
||||
self.alerts_client = alerts_client
|
||||
|
||||
async def get_child_tenants(self, parent_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all child tenants for a parent tenant
|
||||
"""
|
||||
try:
|
||||
# Get child tenants from tenant service
|
||||
children = await self.tenant_client.get_child_tenants(parent_id)
|
||||
|
||||
# Enrich with tenant details
|
||||
enriched_children = []
|
||||
for child in children:
|
||||
child_details = await self.tenant_client.get_tenant(child['id'])
|
||||
enriched_children.append({
|
||||
'id': child['id'],
|
||||
'name': child_details.get('name', f"Outlet {child['id']}"),
|
||||
'subdomain': child_details.get('subdomain'),
|
||||
'city': child_details.get('city')
|
||||
})
|
||||
|
||||
return enriched_children
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get child tenants", parent_id=parent_id, error=str(e))
|
||||
raise Exception(f"Failed to get child tenants: {str(e)}")
|
||||
|
||||
async def get_alerts_for_tenant(self, tenant_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get alerts for a specific tenant
|
||||
"""
|
||||
try:
|
||||
# In a real implementation, this would call the alert service
|
||||
# For demo purposes, we'll simulate some alert data
|
||||
|
||||
# Simulate different types of alerts based on tenant type
|
||||
simulated_alerts = []
|
||||
|
||||
# Generate some sample alerts
|
||||
alert_types = ['inventory', 'production', 'delivery', 'equipment', 'quality']
|
||||
severities = ['critical', 'high', 'medium', 'low']
|
||||
|
||||
for i in range(3): # Generate 3 sample alerts per tenant
|
||||
alert = {
|
||||
'alert_id': str(uuid.uuid4()),
|
||||
'tenant_id': tenant_id,
|
||||
'alert_type': alert_types[i % len(alert_types)],
|
||||
'severity': severities[i % len(severities)],
|
||||
'title': f"{alert_types[i % len(alert_types)].title()} Alert Detected",
|
||||
'message': f"Sample {alert_types[i % len(alert_types)]} alert for tenant {tenant_id}",
|
||||
'timestamp': (datetime.now() - timedelta(hours=i)).isoformat(),
|
||||
'status': 'active' if i < 2 else 'resolved',
|
||||
'source_system': f"{alert_types[i % len(alert_types)]}-service",
|
||||
'related_entity_id': f"entity-{i+1}",
|
||||
'related_entity_type': alert_types[i % len(alert_types)]
|
||||
}
|
||||
simulated_alerts.append(alert)
|
||||
|
||||
return simulated_alerts
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get alerts for tenant", tenant_id=tenant_id, error=str(e))
|
||||
raise Exception(f"Failed to get alerts: {str(e)}")
|
||||
|
||||
async def get_network_alerts(self, parent_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all alerts across the network
|
||||
"""
|
||||
try:
|
||||
# Get all child tenants
|
||||
child_tenants = await self.get_child_tenants(parent_id)
|
||||
|
||||
# Aggregate alerts from all child tenants
|
||||
all_alerts = []
|
||||
|
||||
for child in child_tenants:
|
||||
child_id = child['id']
|
||||
child_alerts = await self.get_alerts_for_tenant(child_id)
|
||||
all_alerts.extend(child_alerts)
|
||||
|
||||
return all_alerts
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get network alerts", parent_id=parent_id, error=str(e))
|
||||
raise Exception(f"Failed to get network alerts: {str(e)}")
|
||||
|
||||
async def detect_alert_correlations(
|
||||
self,
|
||||
alerts: List[Dict[str, Any]],
|
||||
min_correlation_strength: float = 0.7
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect correlations between alerts
|
||||
"""
|
||||
try:
|
||||
# Simple correlation detection (in real implementation, this would be more sophisticated)
|
||||
correlations = []
|
||||
|
||||
# Group alerts by type and time proximity
|
||||
alert_groups = {}
|
||||
|
||||
for alert in alerts:
|
||||
alert_type = alert['alert_type']
|
||||
timestamp = alert['timestamp']
|
||||
|
||||
# Use timestamp as key for grouping (simplified)
|
||||
if alert_type not in alert_groups:
|
||||
alert_groups[alert_type] = []
|
||||
|
||||
alert_groups[alert_type].append(alert)
|
||||
|
||||
# Create correlation groups
|
||||
for alert_type, group in alert_groups.items():
|
||||
if len(group) >= 2: # Only create correlations for groups with 2+ alerts
|
||||
primary_alert = group[0]
|
||||
related_alerts = group[1:]
|
||||
|
||||
correlation = {
|
||||
'correlation_id': str(uuid.uuid4()),
|
||||
'primary_alert': primary_alert,
|
||||
'related_alerts': related_alerts,
|
||||
'correlation_type': 'temporal',
|
||||
'correlation_strength': 0.85,
|
||||
'impact_analysis': f"Multiple {alert_type} alerts detected within short timeframe"
|
||||
}
|
||||
|
||||
if correlation['correlation_strength'] >= min_correlation_strength:
|
||||
correlations.append(correlation)
|
||||
|
||||
return correlations
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to detect alert correlations", error=str(e))
|
||||
raise Exception(f"Failed to detect correlations: {str(e)}")
|
||||
|
||||
async def acknowledge_alert(self, parent_id: str, alert_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Acknowledge an alert
|
||||
"""
|
||||
try:
|
||||
# In a real implementation, this would update the alert status
|
||||
# For demo purposes, we'll simulate the operation
|
||||
|
||||
logger.info("Alert acknowledged", parent_id=parent_id, alert_id=alert_id)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'alert_id': alert_id,
|
||||
'status': 'acknowledged'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to acknowledge alert", parent_id=parent_id, alert_id=alert_id, error=str(e))
|
||||
raise Exception(f"Failed to acknowledge alert: {str(e)}")
|
||||
|
||||
async def resolve_alert(self, parent_id: str, alert_id: str, resolution_notes: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Resolve an alert
|
||||
"""
|
||||
try:
|
||||
# In a real implementation, this would update the alert status
|
||||
# For demo purposes, we'll simulate the operation
|
||||
|
||||
logger.info("Alert resolved", parent_id=parent_id, alert_id=alert_id, notes=resolution_notes)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'alert_id': alert_id,
|
||||
'status': 'resolved',
|
||||
'resolution_notes': resolution_notes
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to resolve alert", parent_id=parent_id, alert_id=alert_id, error=str(e))
|
||||
raise Exception(f"Failed to resolve alert: {str(e)}")
|
||||
|
||||
async def get_alert_trends(self, parent_id: str, days: int = 30) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get alert trends over time
|
||||
"""
|
||||
try:
|
||||
# Simulate trend data
|
||||
trends = []
|
||||
end_date = datetime.now()
|
||||
|
||||
# Generate daily trend data
|
||||
for i in range(days):
|
||||
date = end_date - timedelta(days=i)
|
||||
|
||||
# Simulate varying alert counts with weekly pattern
|
||||
base_count = 5
|
||||
weekly_variation = int((i % 7) * 1.5) # Higher on weekdays
|
||||
daily_noise = (i % 3 - 1) # Daily noise
|
||||
|
||||
alert_count = max(1, base_count + weekly_variation + daily_noise)
|
||||
|
||||
trends.append({
|
||||
'date': date.strftime('%Y-%m-%d'),
|
||||
'total_alerts': alert_count,
|
||||
'critical_alerts': max(0, int(alert_count * 0.1)),
|
||||
'high_alerts': max(0, int(alert_count * 0.2)),
|
||||
'medium_alerts': max(0, int(alert_count * 0.4)),
|
||||
'low_alerts': max(0, int(alert_count * 0.3))
|
||||
})
|
||||
|
||||
# Sort by date (oldest first)
|
||||
trends.sort(key=lambda x: x['date'])
|
||||
|
||||
return trends
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get alert trends", parent_id=parent_id, error=str(e))
|
||||
raise Exception(f"Failed to get alert trends: {str(e)}")
|
||||
|
||||
async def get_prioritized_alerts(self, parent_id: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get prioritized alerts based on impact and urgency
|
||||
"""
|
||||
try:
|
||||
# Get all network alerts
|
||||
all_alerts = await self.get_network_alerts(parent_id)
|
||||
|
||||
if not all_alerts:
|
||||
return []
|
||||
|
||||
# Simple prioritization (in real implementation, this would use ML)
|
||||
# Priority based on severity and recency
|
||||
severity_scores = {'critical': 4, 'high': 3, 'medium': 2, 'low': 1}
|
||||
|
||||
for alert in all_alerts:
|
||||
severity_score = severity_scores.get(alert['severity'], 1)
|
||||
# Add recency score (newer alerts get higher priority)
|
||||
timestamp = datetime.fromisoformat(alert['timestamp'])
|
||||
recency_score = min(3, (datetime.now() - timestamp).days + 1)
|
||||
|
||||
alert['priority_score'] = severity_score * recency_score
|
||||
|
||||
# Sort by priority score (highest first)
|
||||
all_alerts.sort(key=lambda x: x['priority_score'], reverse=True)
|
||||
|
||||
# Return top N alerts
|
||||
prioritized = all_alerts[:limit]
|
||||
|
||||
# Remove priority score from response
|
||||
for alert in prioritized:
|
||||
alert.pop('priority_score', None)
|
||||
|
||||
return prioritized
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get prioritized alerts", parent_id=parent_id, error=str(e))
|
||||
raise Exception(f"Failed to get prioritized alerts: {str(e)}")
|
||||
|
||||
|
||||
# Helper class for alert analysis
|
||||
class AlertAnalyzer:
|
||||
"""
|
||||
Helper class for analyzing alert patterns
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def calculate_alert_severity_score(alert: Dict[str, Any]) -> float:
|
||||
"""
|
||||
Calculate severity score for an alert
|
||||
"""
|
||||
severity_scores = {'critical': 1.0, 'high': 0.75, 'medium': 0.5, 'low': 0.25}
|
||||
return severity_scores.get(alert['severity'], 0.25)
|
||||
|
||||
@staticmethod
|
||||
def detect_alert_patterns(alerts: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Detect patterns in alert data
|
||||
"""
|
||||
if not alerts:
|
||||
return {'patterns': [], 'anomalies': []}
|
||||
|
||||
patterns = []
|
||||
anomalies = []
|
||||
|
||||
# Simple pattern detection
|
||||
alert_types = [a['alert_type'] for a in alerts]
|
||||
type_counts = {}
|
||||
|
||||
for alert_type in alert_types:
|
||||
type_counts[alert_type] = type_counts.get(alert_type, 0) + 1
|
||||
|
||||
# Detect if one type dominates
|
||||
total_alerts = len(alerts)
|
||||
for alert_type, count in type_counts.items():
|
||||
if count / total_alerts > 0.6: # More than 60% of one type
|
||||
patterns.append({
|
||||
'type': 'dominant_alert_type',
|
||||
'pattern': f'{alert_type} alerts dominate ({count}/{total_alerts})',
|
||||
'confidence': 0.85
|
||||
})
|
||||
|
||||
return {'patterns': patterns, 'anomalies': anomalies}
|
||||
|
||||
|
||||
# Helper class for alert correlation
|
||||
class AlertCorrelator:
|
||||
"""
|
||||
Helper class for correlating alerts
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def calculate_correlation_strength(alert1: Dict[str, Any], alert2: Dict[str, Any]) -> float:
|
||||
"""
|
||||
Calculate correlation strength between two alerts
|
||||
"""
|
||||
# Simple correlation based on type and time proximity
|
||||
same_type = 1.0 if alert1['alert_type'] == alert2['alert_type'] else 0.3
|
||||
|
||||
time1 = datetime.fromisoformat(alert1['timestamp'])
|
||||
time2 = datetime.fromisoformat(alert2['timestamp'])
|
||||
time_diff_hours = abs((time2 - time1).total_seconds() / 3600)
|
||||
|
||||
# Time proximity score (higher for closer times)
|
||||
time_proximity = max(0, 1.0 - min(1.0, time_diff_hours / 24)) # Decays over 24 hours
|
||||
|
||||
return same_type * time_proximity
|
||||
|
||||
|
||||
# Helper class for alert prioritization
|
||||
class AlertPrioritizer:
|
||||
"""
|
||||
Helper class for prioritizing alerts
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def calculate_priority_score(alert: Dict[str, Any]) -> float:
|
||||
"""
|
||||
Calculate priority score for an alert
|
||||
"""
|
||||
# Base score from severity
|
||||
severity_scores = {'critical': 100, 'high': 75, 'medium': 50, 'low': 25}
|
||||
base_score = severity_scores.get(alert['severity'], 25)
|
||||
|
||||
# Add recency bonus (newer alerts get higher priority)
|
||||
timestamp = datetime.fromisoformat(alert['timestamp'])
|
||||
hours_old = (datetime.now() - timestamp).total_seconds() / 3600
|
||||
recency_bonus = max(0, 50 - hours_old) # Decays over 50 hours
|
||||
|
||||
return base_score + recency_bonus
|
||||
Reference in New Issue
Block a user