Refactor all main.py

This commit is contained in:
Urtzi Alfaro
2025-09-29 13:13:12 +02:00
parent 4777e59e7a
commit befcc126b0
35 changed files with 2537 additions and 1993 deletions

View File

@@ -4,12 +4,20 @@ Shared monitoring package for microservices
from .logging import setup_logging
from .metrics import setup_metrics_early, get_metrics_collector, MetricsCollector
from .health import HealthChecker
from .health_checks import (
HealthCheckManager,
FastAPIHealthChecker,
create_health_manager,
setup_fastapi_health_checks
)
__all__ = [
'setup_logging',
'setup_metrics_early',
'setup_metrics_early',
'get_metrics_collector',
'MetricsCollector',
'HealthChecker'
'HealthCheckManager',
'FastAPIHealthChecker',
'create_health_manager',
'setup_fastapi_health_checks'
]

View File

@@ -0,0 +1,370 @@
"""
Enhanced Health Check System for Microservices
Provides unified health check endpoints and database verification based on
the comprehensive implementation from the training service.
"""
from typing import Dict, Any, List, Optional, Callable
from contextlib import asynccontextmanager
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import text, inspect
from fastapi import HTTPException
from fastapi.responses import JSONResponse
import structlog
import time
import datetime
from ..database.base import DatabaseManager
from ..database.exceptions import DatabaseError, HealthCheckError
logger = structlog.get_logger()
class HealthCheckManager:
"""
Unified health check manager for microservices
Provides standardized health check endpoints:
- /health - Basic service health
- /health/ready - Kubernetes readiness probe with comprehensive checks
- /health/live - Kubernetes liveness probe
- /health/database - Detailed database health information
"""
def __init__(
self,
service_name: str,
version: str = "1.0.0",
database_manager: Optional[DatabaseManager] = None,
expected_tables: Optional[List[str]] = None,
custom_checks: Optional[Dict[str, Callable]] = None
):
self.service_name = service_name
self.version = version
self.database_manager = database_manager
self.expected_tables = expected_tables or []
self.custom_checks = custom_checks or {}
self.ready_state = False
def set_ready(self, ready: bool = True):
"""Set service ready state"""
self.ready_state = ready
logger.info(f"Service ready state changed",
service=self.service_name, ready=ready)
async def basic_health_check(self, app_state=None) -> Dict[str, Any]:
"""Basic health check endpoint (/health)"""
# Check app state for ready status if available
ready = self.ready_state
if app_state and hasattr(app_state, 'ready'):
ready = app_state.ready
return {
"status": "healthy" if ready else "starting",
"service": self.service_name,
"version": self.version,
"timestamp": datetime.datetime.utcnow().isoformat()
}
async def readiness_check(self, app_state=None) -> Dict[str, Any]:
"""
Kubernetes readiness probe endpoint (/health/ready)
Returns 200 if ready, 503 if not ready
"""
try:
# Check app state for ready status if available
ready = self.ready_state
if app_state and hasattr(app_state, 'ready'):
ready = app_state.ready
checks = {
"application": ready
}
database_details = {}
# Database connectivity and table verification
if self.database_manager:
db_health = await self._get_comprehensive_db_health()
checks["database_connectivity"] = db_health["connectivity"]
checks["database_tables"] = db_health["tables_exist"]
database_details = {
"status": db_health["status"],
"tables_verified": db_health["tables_verified"],
"missing_tables": db_health["missing_tables"],
"errors": db_health["errors"]
}
# Execute custom checks
for check_name, check_func in self.custom_checks.items():
try:
checks[check_name] = await check_func()
except Exception as e:
checks[check_name] = False
logger.error(f"Custom check '{check_name}' failed", error=str(e))
# Service is ready only if all checks pass
all_ready = all(checks.values())
if self.database_manager:
all_ready = all_ready and database_details.get("status") == "healthy"
response_data = {
"status": "ready" if all_ready else "not ready",
"checks": checks
}
if database_details:
response_data["database"] = database_details
if all_ready:
return response_data
else:
raise HTTPException(status_code=503, detail=response_data)
except HTTPException:
raise
except Exception as e:
logger.error("Readiness check failed", error=str(e))
raise HTTPException(
status_code=503,
detail={
"status": "not ready",
"error": f"Health check failed: {str(e)}"
}
)
async def liveness_check(self) -> Dict[str, Any]:
"""Kubernetes liveness probe endpoint (/health/live)"""
return {"status": "alive"}
async def database_health_check(self) -> Dict[str, Any]:
"""
Detailed database health endpoint (/health/database)
Returns 200 if healthy, 503 if unhealthy
"""
if not self.database_manager:
raise HTTPException(
status_code=404,
detail={"error": "Database health check not available"}
)
try:
db_health = await self._get_comprehensive_db_health()
status_code = 200 if db_health["status"] == "healthy" else 503
if status_code == 503:
raise HTTPException(status_code=503, detail=db_health)
return db_health
except HTTPException:
raise
except Exception as e:
logger.error("Database health check failed", error=str(e))
raise HTTPException(
status_code=503,
detail={
"status": "unhealthy",
"error": f"Health check failed: {str(e)}"
}
)
async def _get_comprehensive_db_health(self) -> Dict[str, Any]:
"""
Comprehensive database health check with table verification
Based on training service implementation
"""
health_status = {
"status": "healthy",
"connectivity": False,
"tables_exist": False,
"tables_verified": [],
"missing_tables": [],
"errors": [],
"connection_info": {},
"response_time_ms": 0
}
if not self.database_manager:
health_status["status"] = "unhealthy"
health_status["errors"].append("Database manager not configured")
return health_status
try:
# Test basic connectivity with timing
start_time = time.time()
health_status["connectivity"] = await self.database_manager.test_connection()
response_time = (time.time() - start_time) * 1000
health_status["response_time_ms"] = round(response_time, 2)
if not health_status["connectivity"]:
health_status["status"] = "unhealthy"
health_status["errors"].append("Database connectivity failed")
return health_status
# Get connection pool information
health_status["connection_info"] = await self.database_manager.get_connection_info()
# Test table existence if expected tables are configured
if self.expected_tables:
tables_verified = await self._verify_tables_exist()
health_status["tables_exist"] = tables_verified
if tables_verified:
health_status["tables_verified"] = self.expected_tables.copy()
else:
health_status["status"] = "unhealthy"
health_status["errors"].append("Required tables missing or inaccessible")
# Identify which specific tables are missing
await self._identify_missing_tables(health_status)
else:
# If no expected tables configured, just mark as verified
health_status["tables_exist"] = True
logger.debug("Comprehensive database health check completed",
service=self.service_name,
status=health_status["status"],
connectivity=health_status["connectivity"],
tables_exist=health_status["tables_exist"])
except Exception as e:
health_status["status"] = "unhealthy"
health_status["errors"].append(f"Health check failed: {str(e)}")
logger.error("Comprehensive database health check failed",
service=self.service_name, error=str(e))
return health_status
async def _verify_tables_exist(self) -> bool:
"""Verify that all expected tables exist and are accessible"""
try:
async with self.database_manager.get_session() as session:
for table_name in self.expected_tables:
try:
await session.execute(text(f"SELECT 1 FROM {table_name} LIMIT 1"))
except Exception:
return False
return True
except Exception as e:
logger.error("Table verification failed", error=str(e))
return False
async def _identify_missing_tables(self, health_status: Dict[str, Any]):
"""Identify which specific tables are missing"""
try:
async with self.database_manager.get_session() as session:
for table_name in self.expected_tables:
try:
await session.execute(text(f"SELECT 1 FROM {table_name} LIMIT 1"))
health_status["tables_verified"].append(table_name)
except Exception:
health_status["missing_tables"].append(table_name)
except Exception as e:
health_status["errors"].append(f"Error checking individual tables: {str(e)}")
class FastAPIHealthChecker:
"""
FastAPI integration for health checks
Provides router setup and endpoint registration
"""
def __init__(self, health_manager: HealthCheckManager):
self.health_manager = health_manager
def setup_health_routes(self, app):
"""Setup health check routes on FastAPI app"""
@app.get("/health")
async def health_check():
"""Basic health check endpoint"""
return await self.health_manager.basic_health_check(app.state)
@app.get("/health/ready")
async def readiness_check():
"""Kubernetes readiness probe endpoint"""
try:
return await self.health_manager.readiness_check(app.state)
except HTTPException as e:
return JSONResponse(
status_code=e.status_code,
content=e.detail
)
@app.get("/health/live")
async def liveness_check():
"""Kubernetes liveness probe endpoint"""
return await self.health_manager.liveness_check()
@app.get("/health/database")
async def database_health_check():
"""Detailed database health endpoint"""
try:
return await self.health_manager.database_health_check()
except HTTPException as e:
return JSONResponse(
status_code=e.status_code,
content=e.detail
)
# Convenience functions for easy integration
def create_health_manager(
service_name: str,
version: str = "1.0.0",
database_manager: Optional[DatabaseManager] = None,
expected_tables: Optional[List[str]] = None,
custom_checks: Optional[Dict[str, Callable]] = None
) -> HealthCheckManager:
"""Factory function to create a HealthCheckManager"""
return HealthCheckManager(
service_name=service_name,
version=version,
database_manager=database_manager,
expected_tables=expected_tables,
custom_checks=custom_checks
)
def setup_fastapi_health_checks(
app,
service_name: str,
version: str = "1.0.0",
database_manager: Optional[DatabaseManager] = None,
expected_tables: Optional[List[str]] = None,
custom_checks: Optional[Dict[str, Callable]] = None
) -> HealthCheckManager:
"""
Convenience function to setup health checks on a FastAPI app
Args:
app: FastAPI application instance
service_name: Name of the service
version: Service version
database_manager: Database manager instance
expected_tables: List of tables that should exist
custom_checks: Dict of custom check functions
Returns:
HealthCheckManager instance for further configuration
"""
health_manager = create_health_manager(
service_name=service_name,
version=version,
database_manager=database_manager,
expected_tables=expected_tables,
custom_checks=custom_checks
)
fastapi_checker = FastAPIHealthChecker(health_manager)
fastapi_checker.setup_health_routes(app)
return health_manager