Refactor all main.py

This commit is contained in:
Urtzi Alfaro
2025-09-29 13:13:12 +02:00
parent 4777e59e7a
commit befcc126b0
35 changed files with 2537 additions and 1993 deletions

View File

@@ -4,12 +4,20 @@ Shared monitoring package for microservices
from .logging import setup_logging
from .metrics import setup_metrics_early, get_metrics_collector, MetricsCollector
from .health import HealthChecker
from .health_checks import (
HealthCheckManager,
FastAPIHealthChecker,
create_health_manager,
setup_fastapi_health_checks
)
__all__ = [
'setup_logging',
'setup_metrics_early',
'setup_metrics_early',
'get_metrics_collector',
'MetricsCollector',
'HealthChecker'
'HealthCheckManager',
'FastAPIHealthChecker',
'create_health_manager',
'setup_fastapi_health_checks'
]

View File

@@ -0,0 +1,370 @@
"""
Enhanced Health Check System for Microservices
Provides unified health check endpoints and database verification based on
the comprehensive implementation from the training service.
"""
from typing import Dict, Any, List, Optional, Callable
from contextlib import asynccontextmanager
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import text, inspect
from fastapi import HTTPException
from fastapi.responses import JSONResponse
import structlog
import time
import datetime
from ..database.base import DatabaseManager
from ..database.exceptions import DatabaseError, HealthCheckError
logger = structlog.get_logger()
class HealthCheckManager:
"""
Unified health check manager for microservices
Provides standardized health check endpoints:
- /health - Basic service health
- /health/ready - Kubernetes readiness probe with comprehensive checks
- /health/live - Kubernetes liveness probe
- /health/database - Detailed database health information
"""
def __init__(
self,
service_name: str,
version: str = "1.0.0",
database_manager: Optional[DatabaseManager] = None,
expected_tables: Optional[List[str]] = None,
custom_checks: Optional[Dict[str, Callable]] = None
):
self.service_name = service_name
self.version = version
self.database_manager = database_manager
self.expected_tables = expected_tables or []
self.custom_checks = custom_checks or {}
self.ready_state = False
def set_ready(self, ready: bool = True):
"""Set service ready state"""
self.ready_state = ready
logger.info(f"Service ready state changed",
service=self.service_name, ready=ready)
async def basic_health_check(self, app_state=None) -> Dict[str, Any]:
"""Basic health check endpoint (/health)"""
# Check app state for ready status if available
ready = self.ready_state
if app_state and hasattr(app_state, 'ready'):
ready = app_state.ready
return {
"status": "healthy" if ready else "starting",
"service": self.service_name,
"version": self.version,
"timestamp": datetime.datetime.utcnow().isoformat()
}
async def readiness_check(self, app_state=None) -> Dict[str, Any]:
"""
Kubernetes readiness probe endpoint (/health/ready)
Returns 200 if ready, 503 if not ready
"""
try:
# Check app state for ready status if available
ready = self.ready_state
if app_state and hasattr(app_state, 'ready'):
ready = app_state.ready
checks = {
"application": ready
}
database_details = {}
# Database connectivity and table verification
if self.database_manager:
db_health = await self._get_comprehensive_db_health()
checks["database_connectivity"] = db_health["connectivity"]
checks["database_tables"] = db_health["tables_exist"]
database_details = {
"status": db_health["status"],
"tables_verified": db_health["tables_verified"],
"missing_tables": db_health["missing_tables"],
"errors": db_health["errors"]
}
# Execute custom checks
for check_name, check_func in self.custom_checks.items():
try:
checks[check_name] = await check_func()
except Exception as e:
checks[check_name] = False
logger.error(f"Custom check '{check_name}' failed", error=str(e))
# Service is ready only if all checks pass
all_ready = all(checks.values())
if self.database_manager:
all_ready = all_ready and database_details.get("status") == "healthy"
response_data = {
"status": "ready" if all_ready else "not ready",
"checks": checks
}
if database_details:
response_data["database"] = database_details
if all_ready:
return response_data
else:
raise HTTPException(status_code=503, detail=response_data)
except HTTPException:
raise
except Exception as e:
logger.error("Readiness check failed", error=str(e))
raise HTTPException(
status_code=503,
detail={
"status": "not ready",
"error": f"Health check failed: {str(e)}"
}
)
async def liveness_check(self) -> Dict[str, Any]:
"""Kubernetes liveness probe endpoint (/health/live)"""
return {"status": "alive"}
async def database_health_check(self) -> Dict[str, Any]:
"""
Detailed database health endpoint (/health/database)
Returns 200 if healthy, 503 if unhealthy
"""
if not self.database_manager:
raise HTTPException(
status_code=404,
detail={"error": "Database health check not available"}
)
try:
db_health = await self._get_comprehensive_db_health()
status_code = 200 if db_health["status"] == "healthy" else 503
if status_code == 503:
raise HTTPException(status_code=503, detail=db_health)
return db_health
except HTTPException:
raise
except Exception as e:
logger.error("Database health check failed", error=str(e))
raise HTTPException(
status_code=503,
detail={
"status": "unhealthy",
"error": f"Health check failed: {str(e)}"
}
)
async def _get_comprehensive_db_health(self) -> Dict[str, Any]:
"""
Comprehensive database health check with table verification
Based on training service implementation
"""
health_status = {
"status": "healthy",
"connectivity": False,
"tables_exist": False,
"tables_verified": [],
"missing_tables": [],
"errors": [],
"connection_info": {},
"response_time_ms": 0
}
if not self.database_manager:
health_status["status"] = "unhealthy"
health_status["errors"].append("Database manager not configured")
return health_status
try:
# Test basic connectivity with timing
start_time = time.time()
health_status["connectivity"] = await self.database_manager.test_connection()
response_time = (time.time() - start_time) * 1000
health_status["response_time_ms"] = round(response_time, 2)
if not health_status["connectivity"]:
health_status["status"] = "unhealthy"
health_status["errors"].append("Database connectivity failed")
return health_status
# Get connection pool information
health_status["connection_info"] = await self.database_manager.get_connection_info()
# Test table existence if expected tables are configured
if self.expected_tables:
tables_verified = await self._verify_tables_exist()
health_status["tables_exist"] = tables_verified
if tables_verified:
health_status["tables_verified"] = self.expected_tables.copy()
else:
health_status["status"] = "unhealthy"
health_status["errors"].append("Required tables missing or inaccessible")
# Identify which specific tables are missing
await self._identify_missing_tables(health_status)
else:
# If no expected tables configured, just mark as verified
health_status["tables_exist"] = True
logger.debug("Comprehensive database health check completed",
service=self.service_name,
status=health_status["status"],
connectivity=health_status["connectivity"],
tables_exist=health_status["tables_exist"])
except Exception as e:
health_status["status"] = "unhealthy"
health_status["errors"].append(f"Health check failed: {str(e)}")
logger.error("Comprehensive database health check failed",
service=self.service_name, error=str(e))
return health_status
async def _verify_tables_exist(self) -> bool:
"""Verify that all expected tables exist and are accessible"""
try:
async with self.database_manager.get_session() as session:
for table_name in self.expected_tables:
try:
await session.execute(text(f"SELECT 1 FROM {table_name} LIMIT 1"))
except Exception:
return False
return True
except Exception as e:
logger.error("Table verification failed", error=str(e))
return False
async def _identify_missing_tables(self, health_status: Dict[str, Any]):
"""Identify which specific tables are missing"""
try:
async with self.database_manager.get_session() as session:
for table_name in self.expected_tables:
try:
await session.execute(text(f"SELECT 1 FROM {table_name} LIMIT 1"))
health_status["tables_verified"].append(table_name)
except Exception:
health_status["missing_tables"].append(table_name)
except Exception as e:
health_status["errors"].append(f"Error checking individual tables: {str(e)}")
class FastAPIHealthChecker:
"""
FastAPI integration for health checks
Provides router setup and endpoint registration
"""
def __init__(self, health_manager: HealthCheckManager):
self.health_manager = health_manager
def setup_health_routes(self, app):
"""Setup health check routes on FastAPI app"""
@app.get("/health")
async def health_check():
"""Basic health check endpoint"""
return await self.health_manager.basic_health_check(app.state)
@app.get("/health/ready")
async def readiness_check():
"""Kubernetes readiness probe endpoint"""
try:
return await self.health_manager.readiness_check(app.state)
except HTTPException as e:
return JSONResponse(
status_code=e.status_code,
content=e.detail
)
@app.get("/health/live")
async def liveness_check():
"""Kubernetes liveness probe endpoint"""
return await self.health_manager.liveness_check()
@app.get("/health/database")
async def database_health_check():
"""Detailed database health endpoint"""
try:
return await self.health_manager.database_health_check()
except HTTPException as e:
return JSONResponse(
status_code=e.status_code,
content=e.detail
)
# Convenience functions for easy integration
def create_health_manager(
service_name: str,
version: str = "1.0.0",
database_manager: Optional[DatabaseManager] = None,
expected_tables: Optional[List[str]] = None,
custom_checks: Optional[Dict[str, Callable]] = None
) -> HealthCheckManager:
"""Factory function to create a HealthCheckManager"""
return HealthCheckManager(
service_name=service_name,
version=version,
database_manager=database_manager,
expected_tables=expected_tables,
custom_checks=custom_checks
)
def setup_fastapi_health_checks(
app,
service_name: str,
version: str = "1.0.0",
database_manager: Optional[DatabaseManager] = None,
expected_tables: Optional[List[str]] = None,
custom_checks: Optional[Dict[str, Callable]] = None
) -> HealthCheckManager:
"""
Convenience function to setup health checks on a FastAPI app
Args:
app: FastAPI application instance
service_name: Name of the service
version: Service version
database_manager: Database manager instance
expected_tables: List of tables that should exist
custom_checks: Dict of custom check functions
Returns:
HealthCheckManager instance for further configuration
"""
health_manager = create_health_manager(
service_name=service_name,
version=version,
database_manager=database_manager,
expected_tables=expected_tables,
custom_checks=custom_checks
)
fastapi_checker = FastAPIHealthChecker(health_manager)
fastapi_checker.setup_health_routes(app)
return health_manager

429
shared/service_base.py Normal file
View File

@@ -0,0 +1,429 @@
"""
Standardized FastAPI Service Base
Provides a unified approach for creating FastAPI microservices with common patterns:
- Logging setup
- Metrics initialization
- Health checks
- Database initialization
- CORS middleware
- Exception handlers
- Lifespan management
"""
import os
import structlog
from typing import Optional, List, Dict, Callable, Any, TYPE_CHECKING
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.routing import APIRouter
from shared.monitoring import setup_logging
from shared.monitoring.metrics import setup_metrics_early
from shared.monitoring.health_checks import setup_fastapi_health_checks
from shared.database.base import DatabaseManager
if TYPE_CHECKING:
import uvicorn
class BaseFastAPIService:
"""
Base class for FastAPI microservices with standardized patterns
"""
def __init__(
self,
service_name: str,
app_name: str,
description: str,
version: str = "1.0.0",
log_level: str = "INFO",
cors_origins: Optional[List[str]] = None,
api_prefix: str = "/api/v1",
database_manager: Optional[DatabaseManager] = None,
expected_tables: Optional[List[str]] = None,
custom_health_checks: Optional[Dict[str, Callable[[], Any]]] = None,
enable_metrics: bool = True,
enable_health_checks: bool = True,
enable_cors: bool = True,
enable_exception_handlers: bool = True,
enable_messaging: bool = False,
custom_metrics: Optional[Dict[str, Dict[str, Any]]] = None,
alert_service_class: Optional[type] = None
):
self.service_name = service_name
self.app_name = app_name
self.description = description
self.version = version
self.log_level = log_level
self.cors_origins = cors_origins or ["*"]
self.api_prefix = api_prefix
self.database_manager = database_manager
self.expected_tables = expected_tables
self.custom_health_checks = custom_health_checks or {}
self.enable_metrics = enable_metrics
self.enable_health_checks = enable_health_checks
self.enable_cors = enable_cors
self.enable_exception_handlers = enable_exception_handlers
self.enable_messaging = enable_messaging
self.custom_metrics = custom_metrics or {}
self.alert_service_class = alert_service_class
# Initialize logging
setup_logging(service_name, log_level)
self.logger = structlog.get_logger()
# Will be set during app creation
self.app: Optional[FastAPI] = None
self.metrics_collector = None
self.health_manager = None
self.alert_service = None
def create_app(self, **fastapi_kwargs) -> FastAPI:
"""
Create and configure FastAPI application with standardized setup
"""
# Default FastAPI configuration
default_config = {
"title": self.app_name,
"description": self.description,
"version": self.version,
"openapi_url": f"{self.api_prefix}/openapi.json",
"docs_url": f"{self.api_prefix}/docs",
"redoc_url": f"{self.api_prefix}/redoc",
}
# Merge with user-provided config
config = {**default_config, **fastapi_kwargs}
# Create FastAPI app
self.app = FastAPI(**config)
# Setup metrics BEFORE middleware and lifespan
if self.enable_metrics:
self.metrics_collector = setup_metrics_early(self.app, self.service_name)
# Setup lifespan
self.app.router.lifespan_context = self._create_lifespan()
# Setup middleware
if self.enable_cors:
self._setup_cors()
# Setup exception handlers
if self.enable_exception_handlers:
self._setup_exception_handlers()
# Setup health checks
if self.enable_health_checks:
self._setup_health_checks()
# Setup root endpoint
self._setup_root_endpoint()
return self.app
def _create_lifespan(self):
"""Create lifespan context manager"""
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup
self.logger.info(f"Starting {self.service_name}", version=self.version)
try:
# Initialize database if provided
if self.database_manager:
await self._initialize_database()
# Setup messaging if enabled
if self.enable_messaging:
await self._setup_messaging()
# Initialize alert service if provided
if self.alert_service_class:
await self._initialize_alert_service(app)
# Register custom metrics if provided
if self.custom_metrics:
self.register_custom_metrics(self.custom_metrics)
# Custom startup logic
await self.on_startup(app)
# Mark service as ready
app.state.ready = True
self.logger.info(f"{self.service_name} started successfully")
yield
except Exception as e:
self.logger.error(f"Startup failed for {self.service_name}", error=str(e))
raise
finally:
# Shutdown
self.logger.info(f"Shutting down {self.service_name}")
try:
await self.on_shutdown(app)
# Cleanup alert service if it exists
if self.alert_service:
await self._cleanup_alert_service()
# Cleanup messaging if enabled
if self.enable_messaging:
await self._cleanup_messaging()
if self.database_manager:
await self._cleanup_database()
except Exception as e:
self.logger.error(f"Shutdown error for {self.service_name}", error=str(e))
return lifespan
async def on_startup(self, app: FastAPI):
"""
Override this method for custom startup logic
Called after database initialization but before marking as ready
"""
pass
async def on_shutdown(self, app: FastAPI):
"""
Override this method for custom shutdown logic
Called before database cleanup
"""
pass
async def _initialize_database(self):
"""Initialize database connection"""
try:
# Test connection
if await self.database_manager.test_connection():
self.logger.info("Database initialized successfully")
else:
raise Exception("Database connection test failed")
except Exception as e:
self.logger.error("Database initialization failed", error=str(e))
raise
async def _cleanup_database(self):
"""Cleanup database connections"""
try:
await self.database_manager.close_connections()
self.logger.info("Database connections closed")
except Exception as e:
self.logger.error("Database cleanup error", error=str(e))
async def _setup_messaging(self):
"""Setup messaging service - to be overridden by services that need it"""
pass
async def _cleanup_messaging(self):
"""Cleanup messaging service - to be overridden by services that need it"""
pass
async def _initialize_alert_service(self, app: FastAPI):
"""Initialize alert service - to be overridden by services that need it"""
pass
async def _cleanup_alert_service(self):
"""Cleanup alert service - to be overridden by services that need it"""
pass
def _setup_cors(self):
"""Setup CORS middleware"""
if self.app is not None:
self.app.add_middleware(
CORSMiddleware,
allow_origins=self.cors_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def _setup_exception_handlers(self):
"""Setup standard exception handlers"""
if self.app is not None:
@self.app.exception_handler(ValueError)
async def value_error_handler(request: Request, exc: ValueError):
"""Handle validation errors"""
return JSONResponse(
status_code=400,
content={
"error": "Validation Error",
"detail": str(exc),
"type": "value_error"
}
)
@self.app.exception_handler(Exception)
async def general_exception_handler(request: Request, exc: Exception):
"""Handle general exceptions"""
self.logger.error(
"Unhandled exception",
error=str(exc),
path=request.url.path,
method=request.method
)
# Record error metric if available
if self.metrics_collector:
self.metrics_collector.increment_counter("errors_total", labels={"type": "unhandled"})
return JSONResponse(
status_code=500,
content={
"error": "Internal Server Error",
"detail": "An unexpected error occurred",
"type": "internal_error"
}
)
def _setup_health_checks(self):
"""Setup health check endpoints"""
self.health_manager = setup_fastapi_health_checks(
app=self.app,
service_name=self.service_name,
version=self.version,
database_manager=self.database_manager,
expected_tables=self.expected_tables,
custom_checks=self.custom_health_checks
)
def _setup_root_endpoint(self):
"""Setup root endpoint with service information"""
if self.app is not None:
@self.app.get("/")
async def root():
"""Root endpoint with service information"""
return {
"service": self.service_name,
"version": self.version,
"description": self.description,
"status": "running",
"docs_url": f"{self.api_prefix}/docs",
"health_url": "/health"
}
def add_router(self, router: APIRouter, **kwargs: Any):
"""Convenience method to add routers with default prefix"""
if self.app is not None:
prefix = kwargs.get('prefix', self.api_prefix)
kwargs['prefix'] = prefix
self.app.include_router(router, **kwargs)
def register_custom_metrics(self, metrics_config: Dict[str, Dict[str, Any]]):
"""
Register custom metrics for the service
Args:
metrics_config: Dict with metric name as key and config as value
Example: {
"user_registrations": {
"type": "counter",
"description": "Total user registrations",
"labels": ["status"]
}
}
"""
if not self.metrics_collector:
self.logger.warning("Metrics collector not available")
return
for metric_name, config in metrics_config.items():
metric_type = config.get("type", "counter")
description = config.get("description", f"{metric_name} metric")
labels = config.get("labels", [])
if metric_type == "counter":
self.metrics_collector.register_counter(metric_name, description, labels=labels)
elif metric_type == "histogram":
self.metrics_collector.register_histogram(metric_name, description, labels=labels)
else:
self.logger.warning(f"Unsupported metric type: {metric_type}")
def run_development_server(self, host: str = "0.0.0.0", port: int = 8000, reload: Optional[bool] = None):
"""
Run development server with uvicorn
"""
import uvicorn
if reload is None:
reload = os.getenv("RELOAD", "false").lower() == "true"
uvicorn.run(
f"{self.__module__}:app",
host=host,
port=port,
reload=reload,
log_level="info"
)
class StandardFastAPIService(BaseFastAPIService):
"""
Standard service implementation for most microservices
Provides additional common patterns for services with database and standard endpoints
"""
def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
def setup_standard_endpoints(self):
"""Setup standard service endpoints"""
if self.app is not None:
@self.app.get(f"{self.api_prefix}/info")
async def service_info():
"""Service information endpoint"""
return {
"service": self.service_name,
"version": self.version,
"description": self.description,
"api_version": "v1",
"environment": os.getenv("ENVIRONMENT", "development"),
"features": self.get_service_features()
}
def get_service_features(self) -> List[str]:
"""
Override this method to return service-specific features
"""
return ["health_checks", "metrics", "standardized_api"]
class MessageProcessorService(BaseFastAPIService):
"""
Service implementation for message processing services
Provides patterns for background processing services
"""
def __init__(self, **kwargs: Any):
# Message processors typically don't need CORS or full API setup
kwargs.setdefault('enable_cors', False)
kwargs.setdefault('api_prefix', '/api/v1')
super().__init__(**kwargs)
async def on_startup(self, app: FastAPI):
"""Initialize message processing components"""
await super().on_startup(app)
await self.setup_message_processing()
async def on_shutdown(self, app: FastAPI):
"""Cleanup message processing components"""
await self.cleanup_message_processing()
await super().on_shutdown(app)
async def setup_message_processing(self):
"""Override this method to setup message processing"""
pass
async def cleanup_message_processing(self):
"""Override this method to cleanup message processing"""
pass