518 lines
19 KiB
Python
518 lines
19 KiB
Python
# services/alert_processor/app/api/alerts.py
|
|
"""
|
|
Alerts API endpoints for dashboard and alert management
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, Query, Path, Depends
|
|
from typing import List, Optional
|
|
from pydantic import BaseModel, Field
|
|
from uuid import UUID
|
|
from datetime import datetime
|
|
import structlog
|
|
|
|
from app.repositories.alerts_repository import AlertsRepository
|
|
from app.models.events import AlertStatus
|
|
from app.dependencies import get_current_user
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
# ============================================================
|
|
# Response Models
|
|
# ============================================================
|
|
|
|
class AlertResponse(BaseModel):
|
|
"""Individual alert response"""
|
|
id: str
|
|
tenant_id: str
|
|
item_type: str
|
|
alert_type: str
|
|
priority_level: str
|
|
priority_score: int
|
|
status: str
|
|
service: str
|
|
title: str
|
|
message: str
|
|
type_class: str
|
|
actions: Optional[List[dict]] = None # smart_actions is a list of action objects
|
|
alert_metadata: Optional[dict] = None
|
|
created_at: datetime
|
|
updated_at: datetime
|
|
resolved_at: Optional[datetime] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class AlertsSummaryResponse(BaseModel):
|
|
"""Alerts summary for dashboard"""
|
|
total_count: int = Field(..., description="Total number of alerts")
|
|
active_count: int = Field(..., description="Number of active (unresolved) alerts")
|
|
critical_count: int = Field(..., description="Number of critical priority alerts")
|
|
high_count: int = Field(..., description="Number of high priority alerts")
|
|
medium_count: int = Field(..., description="Number of medium priority alerts")
|
|
low_count: int = Field(..., description="Number of low priority alerts")
|
|
resolved_count: int = Field(..., description="Number of resolved alerts")
|
|
acknowledged_count: int = Field(..., description="Number of acknowledged alerts")
|
|
|
|
|
|
class AlertsListResponse(BaseModel):
|
|
"""List of alerts with pagination"""
|
|
alerts: List[AlertResponse]
|
|
total: int
|
|
limit: int
|
|
offset: int
|
|
|
|
|
|
# ============================================================
|
|
# API Endpoints
|
|
# ============================================================
|
|
|
|
@router.get(
|
|
"/api/v1/tenants/{tenant_id}/alerts/summary",
|
|
response_model=AlertsSummaryResponse,
|
|
summary="Get alerts summary",
|
|
description="Get summary of alerts by priority level and status for dashboard health indicator"
|
|
)
|
|
async def get_alerts_summary(
|
|
tenant_id: UUID = Path(..., description="Tenant ID")
|
|
) -> AlertsSummaryResponse:
|
|
"""
|
|
Get alerts summary for dashboard
|
|
|
|
Returns counts of alerts grouped by priority level and status.
|
|
Critical count maps to URGENT priority level for dashboard compatibility.
|
|
"""
|
|
from app.config import AlertProcessorConfig
|
|
from shared.database.base import create_database_manager
|
|
|
|
try:
|
|
config = AlertProcessorConfig()
|
|
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
|
|
|
async with db_manager.get_session() as session:
|
|
repo = AlertsRepository(session)
|
|
summary = await repo.get_alerts_summary(tenant_id)
|
|
return AlertsSummaryResponse(**summary)
|
|
|
|
except Exception as e:
|
|
logger.error("Error getting alerts summary", error=str(e), tenant_id=str(tenant_id))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/tenants/{tenant_id}/alerts",
|
|
response_model=AlertsListResponse,
|
|
summary="Get alerts list",
|
|
description="Get filtered list of alerts with pagination"
|
|
)
|
|
async def get_alerts(
|
|
tenant_id: UUID = Path(..., description="Tenant ID"),
|
|
priority_level: Optional[str] = Query(None, description="Filter by priority level: critical, important, standard, info"),
|
|
status: Optional[str] = Query(None, description="Filter by status: active, resolved, acknowledged, ignored"),
|
|
resolved: Optional[bool] = Query(None, description="Filter by resolved status: true=resolved only, false=unresolved only"),
|
|
limit: int = Query(100, ge=1, le=1000, description="Maximum number of results"),
|
|
offset: int = Query(0, ge=0, description="Pagination offset")
|
|
) -> AlertsListResponse:
|
|
"""
|
|
Get filtered list of alerts
|
|
|
|
Supports filtering by:
|
|
- priority_level: critical, important, standard, info
|
|
- status: active, resolved, acknowledged, ignored
|
|
- resolved: boolean filter for resolved status
|
|
- pagination: limit and offset
|
|
"""
|
|
from app.config import AlertProcessorConfig
|
|
from shared.database.base import create_database_manager
|
|
|
|
try:
|
|
# Validate priority_level enum
|
|
valid_priority_levels = ['critical', 'important', 'standard', 'info']
|
|
if priority_level and priority_level not in valid_priority_levels:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid priority level. Must be one of: {valid_priority_levels}"
|
|
)
|
|
|
|
# Validate status enum
|
|
valid_status_values = ['active', 'resolved', 'acknowledged', 'ignored']
|
|
if status and status not in valid_status_values:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Invalid status. Must be one of: {valid_status_values}"
|
|
)
|
|
|
|
config = AlertProcessorConfig()
|
|
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
|
|
|
async with db_manager.get_session() as session:
|
|
repo = AlertsRepository(session)
|
|
alerts = await repo.get_alerts(
|
|
tenant_id=tenant_id,
|
|
priority_level=priority_level,
|
|
status=status,
|
|
resolved=resolved,
|
|
limit=limit,
|
|
offset=offset
|
|
)
|
|
|
|
# Convert to response models
|
|
alert_responses = []
|
|
for alert in alerts:
|
|
# Handle old format actions (strings) by converting to proper dict format
|
|
actions = alert.smart_actions
|
|
if actions and isinstance(actions, list) and len(actions) > 0:
|
|
# Check if actions are strings (old format)
|
|
if isinstance(actions[0], str):
|
|
# Convert old format to new format
|
|
actions = [
|
|
{
|
|
'action_type': action,
|
|
'label': action.replace('_', ' ').title(),
|
|
'variant': 'default',
|
|
'disabled': False
|
|
}
|
|
for action in actions
|
|
]
|
|
|
|
alert_responses.append(AlertResponse(
|
|
id=str(alert.id),
|
|
tenant_id=str(alert.tenant_id),
|
|
item_type=alert.item_type,
|
|
alert_type=alert.alert_type,
|
|
priority_level=alert.priority_level.value if hasattr(alert.priority_level, 'value') else alert.priority_level,
|
|
priority_score=alert.priority_score,
|
|
status=alert.status.value if hasattr(alert.status, 'value') else alert.status,
|
|
service=alert.service,
|
|
title=alert.title,
|
|
message=alert.message,
|
|
type_class=alert.type_class.value if hasattr(alert.type_class, 'value') else alert.type_class,
|
|
actions=actions, # Use converted actions
|
|
alert_metadata=alert.alert_metadata,
|
|
created_at=alert.created_at,
|
|
updated_at=alert.updated_at,
|
|
resolved_at=alert.resolved_at
|
|
))
|
|
|
|
return AlertsListResponse(
|
|
alerts=alert_responses,
|
|
total=len(alert_responses), # In a real implementation, you'd query the total count separately
|
|
limit=limit,
|
|
offset=offset
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Error getting alerts", error=str(e), tenant_id=str(tenant_id))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get(
|
|
"/api/v1/tenants/{tenant_id}/alerts/{alert_id}",
|
|
response_model=AlertResponse,
|
|
summary="Get alert by ID",
|
|
description="Get a specific alert by its ID"
|
|
)
|
|
async def get_alert(
|
|
tenant_id: UUID = Path(..., description="Tenant ID"),
|
|
alert_id: UUID = Path(..., description="Alert ID")
|
|
) -> AlertResponse:
|
|
"""Get a specific alert by ID"""
|
|
from app.config import AlertProcessorConfig
|
|
from shared.database.base import create_database_manager
|
|
|
|
try:
|
|
config = AlertProcessorConfig()
|
|
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
|
|
|
async with db_manager.get_session() as session:
|
|
repo = AlertsRepository(session)
|
|
alert = await repo.get_alert_by_id(tenant_id, alert_id)
|
|
|
|
if not alert:
|
|
raise HTTPException(status_code=404, detail="Alert not found")
|
|
|
|
# Handle old format actions (strings) by converting to proper dict format
|
|
actions = alert.smart_actions
|
|
if actions and isinstance(actions, list) and len(actions) > 0:
|
|
# Check if actions are strings (old format)
|
|
if isinstance(actions[0], str):
|
|
# Convert old format to new format
|
|
actions = [
|
|
{
|
|
'action_type': action,
|
|
'label': action.replace('_', ' ').title(),
|
|
'variant': 'default',
|
|
'disabled': False
|
|
}
|
|
for action in actions
|
|
]
|
|
|
|
return AlertResponse(
|
|
id=str(alert.id),
|
|
tenant_id=str(alert.tenant_id),
|
|
item_type=alert.item_type,
|
|
alert_type=alert.alert_type,
|
|
priority_level=alert.priority_level.value if hasattr(alert.priority_level, 'value') else alert.priority_level,
|
|
priority_score=alert.priority_score,
|
|
status=alert.status.value if hasattr(alert.status, 'value') else alert.status,
|
|
service=alert.service,
|
|
title=alert.title,
|
|
message=alert.message,
|
|
type_class=alert.type_class.value if hasattr(alert.type_class, 'value') else alert.type_class,
|
|
actions=actions, # Use converted actions
|
|
alert_metadata=alert.alert_metadata,
|
|
created_at=alert.created_at,
|
|
updated_at=alert.updated_at,
|
|
resolved_at=alert.resolved_at
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Error getting alert", error=str(e), alert_id=str(alert_id))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post(
|
|
"/api/v1/tenants/{tenant_id}/alerts/{alert_id}/cancel-auto-action",
|
|
summary="Cancel auto-action for escalation alert",
|
|
description="Cancel the pending auto-action for an escalation-type alert"
|
|
)
|
|
async def cancel_auto_action(
|
|
tenant_id: UUID = Path(..., description="Tenant ID"),
|
|
alert_id: UUID = Path(..., description="Alert ID")
|
|
) -> dict:
|
|
"""
|
|
Cancel the auto-action scheduled for an escalation alert.
|
|
This prevents the system from automatically executing the action.
|
|
"""
|
|
from app.config import AlertProcessorConfig
|
|
from shared.database.base import create_database_manager
|
|
from app.models.events import AlertStatus
|
|
|
|
try:
|
|
config = AlertProcessorConfig()
|
|
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
|
|
|
async with db_manager.get_session() as session:
|
|
repo = AlertsRepository(session)
|
|
alert = await repo.get_alert_by_id(tenant_id, alert_id)
|
|
|
|
if not alert:
|
|
raise HTTPException(status_code=404, detail="Alert not found")
|
|
|
|
# Verify this is an escalation alert
|
|
if alert.type_class != 'escalation':
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Alert is not an escalation type, no auto-action to cancel"
|
|
)
|
|
|
|
# Update alert metadata to mark auto-action as cancelled
|
|
alert.alert_metadata = alert.alert_metadata or {}
|
|
alert.alert_metadata['auto_action_cancelled'] = True
|
|
alert.alert_metadata['auto_action_cancelled_at'] = datetime.utcnow().isoformat()
|
|
|
|
# Update urgency context to remove countdown
|
|
if alert.urgency_context:
|
|
alert.urgency_context['auto_action_countdown_seconds'] = None
|
|
alert.urgency_context['auto_action_cancelled'] = True
|
|
|
|
# Change type class from escalation to action_needed
|
|
alert.type_class = 'action_needed'
|
|
|
|
await session.commit()
|
|
await session.refresh(alert)
|
|
|
|
logger.info("Auto-action cancelled", alert_id=str(alert_id), tenant_id=str(tenant_id))
|
|
|
|
return {
|
|
"success": True,
|
|
"alert_id": str(alert_id),
|
|
"message": "Auto-action cancelled successfully",
|
|
"updated_type_class": alert.type_class.value
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Error cancelling auto-action", error=str(e), alert_id=str(alert_id))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post(
|
|
"/api/v1/tenants/{tenant_id}/alerts/{alert_id}/acknowledge",
|
|
summary="Acknowledge alert",
|
|
description="Mark alert as acknowledged"
|
|
)
|
|
async def acknowledge_alert(
|
|
tenant_id: UUID = Path(..., description="Tenant ID"),
|
|
alert_id: UUID = Path(..., description="Alert ID")
|
|
) -> dict:
|
|
"""Mark an alert as acknowledged"""
|
|
from app.config import AlertProcessorConfig
|
|
from shared.database.base import create_database_manager
|
|
from app.models.events import AlertStatus
|
|
|
|
try:
|
|
config = AlertProcessorConfig()
|
|
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
|
|
|
async with db_manager.get_session() as session:
|
|
repo = AlertsRepository(session)
|
|
alert = await repo.get_alert_by_id(tenant_id, alert_id)
|
|
|
|
if not alert:
|
|
raise HTTPException(status_code=404, detail="Alert not found")
|
|
|
|
alert.status = AlertStatus.ACKNOWLEDGED
|
|
await session.commit()
|
|
|
|
logger.info("Alert acknowledged", alert_id=str(alert_id), tenant_id=str(tenant_id))
|
|
|
|
return {
|
|
"success": True,
|
|
"alert_id": str(alert_id),
|
|
"status": alert.status.value
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Error acknowledging alert", error=str(e), alert_id=str(alert_id))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post(
|
|
"/api/v1/tenants/{tenant_id}/alerts/{alert_id}/resolve",
|
|
summary="Resolve alert",
|
|
description="Mark alert as resolved"
|
|
)
|
|
async def resolve_alert(
|
|
tenant_id: UUID = Path(..., description="Tenant ID"),
|
|
alert_id: UUID = Path(..., description="Alert ID")
|
|
) -> dict:
|
|
"""Mark an alert as resolved"""
|
|
from app.config import AlertProcessorConfig
|
|
from shared.database.base import create_database_manager
|
|
from app.models.events import AlertStatus
|
|
|
|
try:
|
|
config = AlertProcessorConfig()
|
|
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
|
|
|
async with db_manager.get_session() as session:
|
|
repo = AlertsRepository(session)
|
|
alert = await repo.get_alert_by_id(tenant_id, alert_id)
|
|
|
|
if not alert:
|
|
raise HTTPException(status_code=404, detail="Alert not found")
|
|
|
|
alert.status = AlertStatus.RESOLVED
|
|
alert.resolved_at = datetime.utcnow()
|
|
await session.commit()
|
|
|
|
logger.info("Alert resolved", alert_id=str(alert_id), tenant_id=str(tenant_id))
|
|
|
|
return {
|
|
"success": True,
|
|
"alert_id": str(alert_id),
|
|
"status": alert.status.value,
|
|
"resolved_at": alert.resolved_at.isoformat()
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Error resolving alert", error=str(e), alert_id=str(alert_id))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.post(
|
|
"/api/v1/tenants/{tenant_id}/alerts/digest/send",
|
|
summary="Send email digest for alerts"
|
|
)
|
|
async def send_alert_digest(
|
|
tenant_id: UUID = Path(..., description="Tenant ID"),
|
|
days: int = Query(1, ge=1, le=7, description="Number of days to include in digest"),
|
|
digest_type: str = Query("daily", description="Type of digest: daily or weekly"),
|
|
user_email: str = Query(..., description="Email address to send digest to"),
|
|
user_name: str = Query(None, description="User name for personalization"),
|
|
current_user: dict = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Send email digest of alerts.
|
|
|
|
Digest includes:
|
|
- AI Impact Summary (prevented issues, savings)
|
|
- Prevented Issues List with AI reasoning
|
|
- Action Needed Alerts
|
|
- Trend Warnings
|
|
"""
|
|
from app.config import AlertProcessorConfig
|
|
from shared.database.base import create_database_manager
|
|
from app.models.events import Alert
|
|
from app.services.enrichment.email_digest import EmailDigestService
|
|
from sqlalchemy import select, and_
|
|
from datetime import datetime, timedelta
|
|
|
|
try:
|
|
config = AlertProcessorConfig()
|
|
db_manager = create_database_manager(config.DATABASE_URL, "alert-processor")
|
|
|
|
async with db_manager.get_session() as session:
|
|
cutoff_date = datetime.utcnow() - timedelta(days=days)
|
|
|
|
# Fetch alerts from the specified period
|
|
query = select(Alert).where(
|
|
and_(
|
|
Alert.tenant_id == tenant_id,
|
|
Alert.created_at >= cutoff_date
|
|
)
|
|
).order_by(Alert.created_at.desc())
|
|
|
|
result = await session.execute(query)
|
|
alerts = result.scalars().all()
|
|
|
|
if not alerts:
|
|
return {
|
|
"success": False,
|
|
"message": "No alerts found for the specified period",
|
|
"alert_count": 0
|
|
}
|
|
|
|
# Send digest
|
|
digest_service = EmailDigestService(config)
|
|
|
|
if digest_type == "weekly":
|
|
success = await digest_service.send_weekly_digest(
|
|
tenant_id=tenant_id,
|
|
alerts=alerts,
|
|
user_email=user_email,
|
|
user_name=user_name
|
|
)
|
|
else:
|
|
success = await digest_service.send_daily_digest(
|
|
tenant_id=tenant_id,
|
|
alerts=alerts,
|
|
user_email=user_email,
|
|
user_name=user_name
|
|
)
|
|
|
|
return {
|
|
"success": success,
|
|
"message": f"{'Successfully sent' if success else 'Failed to send'} {digest_type} digest",
|
|
"alert_count": len(alerts),
|
|
"digest_type": digest_type,
|
|
"recipient": user_email
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Error sending email digest", error=str(e), tenant_id=str(tenant_id))
|
|
raise HTTPException(status_code=500, detail=f"Failed to send email digest: {str(e)}")
|