# ================================================================ # shared/monitoring/health.py # ================================================================ """ Health check utilities for microservices """ import asyncio import logging import time from typing import Dict, List, Callable, Any, Optional from dataclasses import dataclass from enum import Enum from fastapi import APIRouter logger = logging.getLogger(__name__) class HealthStatus(Enum): HEALTHY = "healthy" DEGRADED = "degraded" UNHEALTHY = "unhealthy" @dataclass class HealthCheck: name: str check_function: Callable[[], Any] timeout: float = 5.0 critical: bool = True @dataclass class HealthResult: name: str status: HealthStatus message: str duration: float timestamp: float class HealthChecker: """Health checker for microservices""" def __init__(self, service_name: str): self.service_name = service_name self.checks: List[HealthCheck] = [] self.start_time = time.time() def add_check(self, name: str, check_function: Callable, timeout: float = 5.0, critical: bool = True) -> None: """Add a health check""" self.checks.append(HealthCheck(name, check_function, timeout, critical)) async def run_check(self, check: HealthCheck) -> HealthResult: """Run a single health check""" start_time = time.time() try: # Run the check with timeout result = await asyncio.wait_for( asyncio.create_task(self._execute_check(check.check_function)), timeout=check.timeout ) duration = time.time() - start_time if result is True or (isinstance(result, dict) and result.get('healthy', False)): return HealthResult( name=check.name, status=HealthStatus.HEALTHY, message="OK", duration=duration, timestamp=time.time() ) else: message = str(result) if result else "Check failed" return HealthResult( name=check.name, status=HealthStatus.UNHEALTHY, message=message, duration=duration, timestamp=time.time() ) except asyncio.TimeoutError: duration = time.time() - start_time return HealthResult( name=check.name, status=HealthStatus.UNHEALTHY, message=f"Timeout after {check.timeout}s", duration=duration, timestamp=time.time() ) except Exception as e: duration = time.time() - start_time return HealthResult( name=check.name, status=HealthStatus.UNHEALTHY, message=f"Error: {str(e)}", duration=duration, timestamp=time.time() ) async def _execute_check(self, check_function: Callable) -> Any: """Execute a check function (handles both sync and async)""" if asyncio.iscoroutinefunction(check_function): return await check_function() else: return check_function() async def check_health(self) -> Dict[str, Any]: """Run all health checks and return status""" if not self.checks: return { "service": self.service_name, "status": HealthStatus.HEALTHY.value, "uptime": time.time() - self.start_time, "timestamp": time.time(), "checks": {} } # Run all checks concurrently results = await asyncio.gather( *[self.run_check(check) for check in self.checks], return_exceptions=True ) # Process results check_results = {} overall_status = HealthStatus.HEALTHY for i, result in enumerate(results): check = self.checks[i] if isinstance(result, Exception): check_result = HealthResult( name=check.name, status=HealthStatus.UNHEALTHY, message=f"Exception: {str(result)}", duration=0.0, timestamp=time.time() ) else: check_result = result check_results[check.name] = { "status": check_result.status.value, "message": check_result.message, "duration": check_result.duration, "timestamp": check_result.timestamp } # Determine overall status if check.critical and check_result.status == HealthStatus.UNHEALTHY: overall_status = HealthStatus.UNHEALTHY elif check_result.status == HealthStatus.DEGRADED and overall_status == HealthStatus.HEALTHY: overall_status = HealthStatus.DEGRADED return { "service": self.service_name, "status": overall_status.value, "uptime": time.time() - self.start_time, "timestamp": time.time(), "checks": check_results } # Create FastAPI router for health endpoints router = APIRouter() @router.get("/") async def health_check(): """Basic health check endpoint""" return { "service": "service", "status": "healthy", "timestamp": time.time() }