New alert system and panel de control page
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
Alerts API endpoints for dashboard and alert management
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Path
|
||||
from fastapi import APIRouter, HTTPException, Query, Path, Depends
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from uuid import UUID
|
||||
@@ -11,7 +11,8 @@ from datetime import datetime
|
||||
import structlog
|
||||
|
||||
from app.repositories.alerts_repository import AlertsRepository
|
||||
from app.models.alerts import AlertSeverity, AlertStatus
|
||||
from app.models.events import AlertStatus
|
||||
from app.dependencies import get_current_user
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
@@ -28,12 +29,14 @@ class AlertResponse(BaseModel):
|
||||
tenant_id: str
|
||||
item_type: str
|
||||
alert_type: str
|
||||
severity: str
|
||||
priority_level: str
|
||||
priority_score: int
|
||||
status: str
|
||||
service: str
|
||||
title: str
|
||||
message: str
|
||||
actions: Optional[dict] = None
|
||||
type_class: str
|
||||
actions: Optional[List[dict]] = None # smart_actions is a list of action objects
|
||||
alert_metadata: Optional[dict] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
@@ -47,10 +50,10 @@ class AlertsSummaryResponse(BaseModel):
|
||||
"""Alerts summary for dashboard"""
|
||||
total_count: int = Field(..., description="Total number of alerts")
|
||||
active_count: int = Field(..., description="Number of active (unresolved) alerts")
|
||||
critical_count: int = Field(..., description="Number of critical/urgent alerts")
|
||||
high_count: int = Field(..., description="Number of high severity alerts")
|
||||
medium_count: int = Field(..., description="Number of medium severity alerts")
|
||||
low_count: int = Field(..., description="Number of low severity alerts")
|
||||
critical_count: int = Field(..., description="Number of critical priority alerts")
|
||||
high_count: int = Field(..., description="Number of high priority alerts")
|
||||
medium_count: int = Field(..., description="Number of medium priority alerts")
|
||||
low_count: int = Field(..., description="Number of low priority alerts")
|
||||
resolved_count: int = Field(..., description="Number of resolved alerts")
|
||||
acknowledged_count: int = Field(..., description="Number of acknowledged alerts")
|
||||
|
||||
@@ -71,7 +74,7 @@ class AlertsListResponse(BaseModel):
|
||||
"/api/v1/tenants/{tenant_id}/alerts/summary",
|
||||
response_model=AlertsSummaryResponse,
|
||||
summary="Get alerts summary",
|
||||
description="Get summary of alerts by severity and status for dashboard health indicator"
|
||||
description="Get summary of alerts by priority level and status for dashboard health indicator"
|
||||
)
|
||||
async def get_alerts_summary(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID")
|
||||
@@ -79,8 +82,8 @@ async def get_alerts_summary(
|
||||
"""
|
||||
Get alerts summary for dashboard
|
||||
|
||||
Returns counts of alerts grouped by severity and status.
|
||||
Critical count maps to URGENT severity for dashboard compatibility.
|
||||
Returns counts of alerts grouped by priority level and status.
|
||||
Critical count maps to URGENT priority level for dashboard compatibility.
|
||||
"""
|
||||
from app.config import AlertProcessorConfig
|
||||
from shared.database.base import create_database_manager
|
||||
@@ -107,7 +110,7 @@ async def get_alerts_summary(
|
||||
)
|
||||
async def get_alerts(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
severity: Optional[str] = Query(None, description="Filter by severity: low, medium, high, urgent"),
|
||||
priority_level: Optional[str] = Query(None, description="Filter by priority level: critical, important, standard, info"),
|
||||
status: Optional[str] = Query(None, description="Filter by status: active, resolved, acknowledged, ignored"),
|
||||
resolved: Optional[bool] = Query(None, description="Filter by resolved status: true=resolved only, false=unresolved only"),
|
||||
limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
|
||||
@@ -117,7 +120,7 @@ async def get_alerts(
|
||||
Get filtered list of alerts
|
||||
|
||||
Supports filtering by:
|
||||
- severity: low, medium, high, urgent (maps to "critical" in dashboard)
|
||||
- priority_level: critical, important, standard, info
|
||||
- status: active, resolved, acknowledged, ignored
|
||||
- resolved: boolean filter for resolved status
|
||||
- pagination: limit and offset
|
||||
@@ -126,18 +129,20 @@ async def get_alerts(
|
||||
from shared.database.base import create_database_manager
|
||||
|
||||
try:
|
||||
# Validate severity enum
|
||||
if severity and severity not in [s.value for s in AlertSeverity]:
|
||||
# Validate priority_level enum
|
||||
valid_priority_levels = ['critical', 'important', 'standard', 'info']
|
||||
if priority_level and priority_level not in valid_priority_levels:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid severity. Must be one of: {[s.value for s in AlertSeverity]}"
|
||||
detail=f"Invalid priority level. Must be one of: {valid_priority_levels}"
|
||||
)
|
||||
|
||||
# Validate status enum
|
||||
if status and status not in [s.value for s in AlertStatus]:
|
||||
valid_status_values = ['active', 'resolved', 'acknowledged', 'ignored']
|
||||
if status and status not in valid_status_values:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid status. Must be one of: {[s.value for s in AlertStatus]}"
|
||||
detail=f"Invalid status. Must be one of: {valid_status_values}"
|
||||
)
|
||||
|
||||
config = AlertProcessorConfig()
|
||||
@@ -147,7 +152,7 @@ async def get_alerts(
|
||||
repo = AlertsRepository(session)
|
||||
alerts = await repo.get_alerts(
|
||||
tenant_id=tenant_id,
|
||||
severity=severity,
|
||||
priority_level=priority_level,
|
||||
status=status,
|
||||
resolved=resolved,
|
||||
limit=limit,
|
||||
@@ -155,25 +160,42 @@ async def get_alerts(
|
||||
)
|
||||
|
||||
# Convert to response models
|
||||
alert_responses = [
|
||||
AlertResponse(
|
||||
alert_responses = []
|
||||
for alert in alerts:
|
||||
# Handle old format actions (strings) by converting to proper dict format
|
||||
actions = alert.smart_actions
|
||||
if actions and isinstance(actions, list) and len(actions) > 0:
|
||||
# Check if actions are strings (old format)
|
||||
if isinstance(actions[0], str):
|
||||
# Convert old format to new format
|
||||
actions = [
|
||||
{
|
||||
'action_type': action,
|
||||
'label': action.replace('_', ' ').title(),
|
||||
'variant': 'default',
|
||||
'disabled': False
|
||||
}
|
||||
for action in actions
|
||||
]
|
||||
|
||||
alert_responses.append(AlertResponse(
|
||||
id=str(alert.id),
|
||||
tenant_id=str(alert.tenant_id),
|
||||
item_type=alert.item_type,
|
||||
alert_type=alert.alert_type,
|
||||
severity=alert.severity,
|
||||
status=alert.status,
|
||||
priority_level=alert.priority_level.value if hasattr(alert.priority_level, 'value') else alert.priority_level,
|
||||
priority_score=alert.priority_score,
|
||||
status=alert.status.value if hasattr(alert.status, 'value') else alert.status,
|
||||
service=alert.service,
|
||||
title=alert.title,
|
||||
message=alert.message,
|
||||
actions=alert.actions,
|
||||
type_class=alert.type_class.value if hasattr(alert.type_class, 'value') else alert.type_class,
|
||||
actions=actions, # Use converted actions
|
||||
alert_metadata=alert.alert_metadata,
|
||||
created_at=alert.created_at,
|
||||
updated_at=alert.updated_at,
|
||||
resolved_at=alert.resolved_at
|
||||
)
|
||||
for alert in alerts
|
||||
]
|
||||
))
|
||||
|
||||
return AlertsListResponse(
|
||||
alerts=alert_responses,
|
||||
@@ -214,17 +236,35 @@ async def get_alert(
|
||||
if not alert:
|
||||
raise HTTPException(status_code=404, detail="Alert not found")
|
||||
|
||||
# Handle old format actions (strings) by converting to proper dict format
|
||||
actions = alert.smart_actions
|
||||
if actions and isinstance(actions, list) and len(actions) > 0:
|
||||
# Check if actions are strings (old format)
|
||||
if isinstance(actions[0], str):
|
||||
# Convert old format to new format
|
||||
actions = [
|
||||
{
|
||||
'action_type': action,
|
||||
'label': action.replace('_', ' ').title(),
|
||||
'variant': 'default',
|
||||
'disabled': False
|
||||
}
|
||||
for action in actions
|
||||
]
|
||||
|
||||
return AlertResponse(
|
||||
id=str(alert.id),
|
||||
tenant_id=str(alert.tenant_id),
|
||||
item_type=alert.item_type,
|
||||
alert_type=alert.alert_type,
|
||||
severity=alert.severity,
|
||||
status=alert.status,
|
||||
priority_level=alert.priority_level.value if hasattr(alert.priority_level, 'value') else alert.priority_level,
|
||||
priority_score=alert.priority_score,
|
||||
status=alert.status.value if hasattr(alert.status, 'value') else alert.status,
|
||||
service=alert.service,
|
||||
title=alert.title,
|
||||
message=alert.message,
|
||||
actions=alert.actions,
|
||||
type_class=alert.type_class.value if hasattr(alert.type_class, 'value') else alert.type_class,
|
||||
actions=actions, # Use converted actions
|
||||
alert_metadata=alert.alert_metadata,
|
||||
created_at=alert.created_at,
|
||||
updated_at=alert.updated_at,
|
||||
@@ -236,3 +276,242 @@ async def get_alert(
|
||||
except Exception as e:
|
||||
logger.error("Error getting alert", error=str(e), alert_id=str(alert_id))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/api/v1/tenants/{tenant_id}/alerts/{alert_id}/cancel-auto-action",
|
||||
summary="Cancel auto-action for escalation alert",
|
||||
description="Cancel the pending auto-action for an escalation-type alert"
|
||||
)
|
||||
async def cancel_auto_action(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
alert_id: UUID = Path(..., description="Alert ID")
|
||||
) -> dict:
|
||||
"""
|
||||
Cancel the auto-action scheduled for an escalation alert.
|
||||
This prevents the system from automatically executing the action.
|
||||
"""
|
||||
from app.config import AlertProcessorConfig
|
||||
from shared.database.base import create_database_manager
|
||||
from app.models.events import AlertStatus
|
||||
|
||||
try:
|
||||
config = AlertProcessorConfig()
|
||||
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
||||
|
||||
async with db_manager.get_session() as session:
|
||||
repo = AlertsRepository(session)
|
||||
alert = await repo.get_alert_by_id(tenant_id, alert_id)
|
||||
|
||||
if not alert:
|
||||
raise HTTPException(status_code=404, detail="Alert not found")
|
||||
|
||||
# Verify this is an escalation alert
|
||||
if alert.type_class != 'escalation':
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Alert is not an escalation type, no auto-action to cancel"
|
||||
)
|
||||
|
||||
# Update alert metadata to mark auto-action as cancelled
|
||||
alert.alert_metadata = alert.alert_metadata or {}
|
||||
alert.alert_metadata['auto_action_cancelled'] = True
|
||||
alert.alert_metadata['auto_action_cancelled_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Update urgency context to remove countdown
|
||||
if alert.urgency_context:
|
||||
alert.urgency_context['auto_action_countdown_seconds'] = None
|
||||
alert.urgency_context['auto_action_cancelled'] = True
|
||||
|
||||
# Change type class from escalation to action_needed
|
||||
alert.type_class = 'action_needed'
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(alert)
|
||||
|
||||
logger.info("Auto-action cancelled", alert_id=str(alert_id), tenant_id=str(tenant_id))
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"alert_id": str(alert_id),
|
||||
"message": "Auto-action cancelled successfully",
|
||||
"updated_type_class": alert.type_class.value
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error cancelling auto-action", error=str(e), alert_id=str(alert_id))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/api/v1/tenants/{tenant_id}/alerts/{alert_id}/acknowledge",
|
||||
summary="Acknowledge alert",
|
||||
description="Mark alert as acknowledged"
|
||||
)
|
||||
async def acknowledge_alert(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
alert_id: UUID = Path(..., description="Alert ID")
|
||||
) -> dict:
|
||||
"""Mark an alert as acknowledged"""
|
||||
from app.config import AlertProcessorConfig
|
||||
from shared.database.base import create_database_manager
|
||||
from app.models.events import AlertStatus
|
||||
|
||||
try:
|
||||
config = AlertProcessorConfig()
|
||||
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
||||
|
||||
async with db_manager.get_session() as session:
|
||||
repo = AlertsRepository(session)
|
||||
alert = await repo.get_alert_by_id(tenant_id, alert_id)
|
||||
|
||||
if not alert:
|
||||
raise HTTPException(status_code=404, detail="Alert not found")
|
||||
|
||||
alert.status = AlertStatus.ACKNOWLEDGED
|
||||
await session.commit()
|
||||
|
||||
logger.info("Alert acknowledged", alert_id=str(alert_id), tenant_id=str(tenant_id))
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"alert_id": str(alert_id),
|
||||
"status": alert.status.value
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error acknowledging alert", error=str(e), alert_id=str(alert_id))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/api/v1/tenants/{tenant_id}/alerts/{alert_id}/resolve",
|
||||
summary="Resolve alert",
|
||||
description="Mark alert as resolved"
|
||||
)
|
||||
async def resolve_alert(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
alert_id: UUID = Path(..., description="Alert ID")
|
||||
) -> dict:
|
||||
"""Mark an alert as resolved"""
|
||||
from app.config import AlertProcessorConfig
|
||||
from shared.database.base import create_database_manager
|
||||
from app.models.events import AlertStatus
|
||||
|
||||
try:
|
||||
config = AlertProcessorConfig()
|
||||
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
||||
|
||||
async with db_manager.get_session() as session:
|
||||
repo = AlertsRepository(session)
|
||||
alert = await repo.get_alert_by_id(tenant_id, alert_id)
|
||||
|
||||
if not alert:
|
||||
raise HTTPException(status_code=404, detail="Alert not found")
|
||||
|
||||
alert.status = AlertStatus.RESOLVED
|
||||
alert.resolved_at = datetime.utcnow()
|
||||
await session.commit()
|
||||
|
||||
logger.info("Alert resolved", alert_id=str(alert_id), tenant_id=str(tenant_id))
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"alert_id": str(alert_id),
|
||||
"status": alert.status.value,
|
||||
"resolved_at": alert.resolved_at.isoformat()
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Error resolving alert", error=str(e), alert_id=str(alert_id))
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/api/v1/tenants/{tenant_id}/alerts/digest/send",
|
||||
summary="Send email digest for alerts"
|
||||
)
|
||||
async def send_alert_digest(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
days: int = Query(1, ge=1, le=7, description="Number of days to include in digest"),
|
||||
digest_type: str = Query("daily", description="Type of digest: daily or weekly"),
|
||||
user_email: str = Query(..., description="Email address to send digest to"),
|
||||
user_name: str = Query(None, description="User name for personalization"),
|
||||
current_user: dict = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Send email digest of alerts.
|
||||
|
||||
Digest includes:
|
||||
- AI Impact Summary (prevented issues, savings)
|
||||
- Prevented Issues List with AI reasoning
|
||||
- Action Needed Alerts
|
||||
- Trend Warnings
|
||||
"""
|
||||
from app.config import AlertProcessorConfig
|
||||
from shared.database.base import create_database_manager
|
||||
from app.models.events import Alert
|
||||
from app.services.enrichment.email_digest import EmailDigestService
|
||||
from sqlalchemy import select, and_
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
config = AlertProcessorConfig()
|
||||
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
||||
|
||||
async with db_manager.get_session() as session:
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=days)
|
||||
|
||||
# Fetch alerts from the specified period
|
||||
query = select(Alert).where(
|
||||
and_(
|
||||
Alert.tenant_id == tenant_id,
|
||||
Alert.created_at >= cutoff_date
|
||||
)
|
||||
).order_by(Alert.created_at.desc())
|
||||
|
||||
result = await session.execute(query)
|
||||
alerts = result.scalars().all()
|
||||
|
||||
if not alerts:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "No alerts found for the specified period",
|
||||
"alert_count": 0
|
||||
}
|
||||
|
||||
# Send digest
|
||||
digest_service = EmailDigestService(config)
|
||||
|
||||
if digest_type == "weekly":
|
||||
success = await digest_service.send_weekly_digest(
|
||||
tenant_id=tenant_id,
|
||||
alerts=alerts,
|
||||
user_email=user_email,
|
||||
user_name=user_name
|
||||
)
|
||||
else:
|
||||
success = await digest_service.send_daily_digest(
|
||||
tenant_id=tenant_id,
|
||||
alerts=alerts,
|
||||
user_email=user_email,
|
||||
user_name=user_name
|
||||
)
|
||||
|
||||
return {
|
||||
"success": success,
|
||||
"message": f"{'Successfully sent' if success else 'Failed to send'} {digest_type} digest",
|
||||
"alert_count": len(alerts),
|
||||
"digest_type": digest_type,
|
||||
"recipient": user_email
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error sending email digest", error=str(e), tenant_id=str(tenant_id))
|
||||
raise HTTPException(status_code=500, detail=f"Failed to send email digest: {str(e)}")
|
||||
|
||||
Reference in New Issue
Block a user