Fix shared issues
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
# ================================================================
|
# ================================================================
|
||||||
# services/auth/app/api/auth.py (ENHANCED VERSION)
|
# services/auth/app/api/auth.py - Updated with modular monitoring
|
||||||
# ================================================================
|
# ================================================================
|
||||||
"""
|
"""
|
||||||
Authentication API routes - Enhanced with proper error handling and logging
|
Authentication API routes - Enhanced with proper metrics access
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
||||||
@@ -16,26 +16,46 @@ from app.schemas.auth import (
|
|||||||
)
|
)
|
||||||
from app.services.auth_service import AuthService
|
from app.services.auth_service import AuthService
|
||||||
from app.core.security import security_manager
|
from app.core.security import security_manager
|
||||||
from shared.monitoring.metrics import MetricsCollector
|
from shared.monitoring.decorators import track_execution_time, count_calls
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
metrics = MetricsCollector("auth_service")
|
|
||||||
|
def get_metrics_collector(request: Request):
|
||||||
|
"""Get metrics collector from app state"""
|
||||||
|
return getattr(request.app.state, 'metrics_collector', None)
|
||||||
|
|
||||||
@router.post("/register", response_model=UserResponse)
|
@router.post("/register", response_model=UserResponse)
|
||||||
|
@track_execution_time("registration_duration_seconds", "auth-service")
|
||||||
async def register(
|
async def register(
|
||||||
user_data: UserRegistration,
|
user_data: UserRegistration,
|
||||||
|
request: Request,
|
||||||
db: AsyncSession = Depends(get_db)
|
db: AsyncSession = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""Register a new user"""
|
"""Register a new user"""
|
||||||
|
metrics = get_metrics_collector(request)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
metrics.increment_counter("auth_registration_total")
|
|
||||||
result = await AuthService.register_user(user_data, db)
|
result = await AuthService.register_user(user_data, db)
|
||||||
|
|
||||||
|
# Record successful registration
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("registration_total", labels={"status": "success"})
|
||||||
|
|
||||||
logger.info(f"User registration successful: {user_data.email}")
|
logger.info(f"User registration successful: {user_data.email}")
|
||||||
return result
|
return result
|
||||||
except HTTPException:
|
|
||||||
|
except HTTPException as e:
|
||||||
|
# Record failed registration
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("registration_total", labels={"status": "failed"})
|
||||||
|
logger.warning(f"Registration failed for {user_data.email}: {e.detail}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Record error
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("registration_total", labels={"status": "error"})
|
||||||
logger.error(f"Registration error for {user_data.email}: {e}")
|
logger.error(f"Registration error for {user_data.email}: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
@@ -43,25 +63,39 @@ async def register(
|
|||||||
)
|
)
|
||||||
|
|
||||||
@router.post("/login", response_model=TokenResponse)
|
@router.post("/login", response_model=TokenResponse)
|
||||||
|
@track_execution_time("login_duration_seconds", "auth-service")
|
||||||
async def login(
|
async def login(
|
||||||
login_data: UserLogin,
|
login_data: UserLogin,
|
||||||
request: Request,
|
request: Request,
|
||||||
db: AsyncSession = Depends(get_db)
|
db: AsyncSession = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""User login"""
|
"""User login"""
|
||||||
|
metrics = get_metrics_collector(request)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ip_address = request.client.host
|
ip_address = request.client.host
|
||||||
user_agent = request.headers.get("user-agent", "")
|
user_agent = request.headers.get("user-agent", "")
|
||||||
|
|
||||||
result = await AuthService.login_user(login_data, db, ip_address, user_agent)
|
result = await AuthService.login_user(login_data, db, ip_address, user_agent)
|
||||||
metrics.increment_counter("auth_login_success_total")
|
|
||||||
|
# Record successful login
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("login_success_total")
|
||||||
|
|
||||||
|
logger.info(f"Login successful for {login_data.email}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
metrics.increment_counter("auth_login_failure_total")
|
# Record failed login
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("login_failure_total", labels={"reason": "auth_failed"})
|
||||||
logger.warning(f"Login failed for {login_data.email}: {e.detail}")
|
logger.warning(f"Login failed for {login_data.email}: {e.detail}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
metrics.increment_counter("auth_login_failure_total")
|
# Record login error
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("login_failure_total", labels={"reason": "error"})
|
||||||
logger.error(f"Login error for {login_data.email}: {e}")
|
logger.error(f"Login error for {login_data.email}: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
@@ -69,16 +103,34 @@ async def login(
|
|||||||
)
|
)
|
||||||
|
|
||||||
@router.post("/refresh", response_model=TokenResponse)
|
@router.post("/refresh", response_model=TokenResponse)
|
||||||
|
@track_execution_time("token_refresh_duration_seconds", "auth-service")
|
||||||
async def refresh_token(
|
async def refresh_token(
|
||||||
refresh_data: RefreshTokenRequest,
|
refresh_data: RefreshTokenRequest,
|
||||||
|
request: Request,
|
||||||
db: AsyncSession = Depends(get_db)
|
db: AsyncSession = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""Refresh access token"""
|
"""Refresh access token"""
|
||||||
|
metrics = get_metrics_collector(request)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return await AuthService.refresh_token(refresh_data.refresh_token, db)
|
result = await AuthService.refresh_token(refresh_data.refresh_token, db)
|
||||||
except HTTPException:
|
|
||||||
|
# Record successful refresh
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("token_refresh_total", labels={"status": "success"})
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except HTTPException as e:
|
||||||
|
# Record failed refresh
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("token_refresh_total", labels={"status": "failed"})
|
||||||
raise
|
raise
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Record refresh error
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("token_refresh_total", labels={"status": "error"})
|
||||||
logger.error(f"Token refresh error: {e}")
|
logger.error(f"Token refresh error: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
@@ -91,27 +143,37 @@ async def verify_token(
|
|||||||
db: AsyncSession = Depends(get_db)
|
db: AsyncSession = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""Verify access token"""
|
"""Verify access token"""
|
||||||
|
metrics = get_metrics_collector(request)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
auth_header = request.headers.get("Authorization")
|
auth_header = request.headers.get("Authorization")
|
||||||
if not auth_header or not auth_header.startswith("Bearer "):
|
if not auth_header or not auth_header.startswith("Bearer "):
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("token_verify_total", labels={"status": "no_token"})
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="Authorization header required"
|
detail="Missing or invalid authorization header"
|
||||||
)
|
)
|
||||||
|
|
||||||
token = auth_header.split(" ")[1]
|
token = auth_header.split(" ")[1]
|
||||||
token_data = await AuthService.verify_token(token)
|
payload = await AuthService.verify_token(token, db)
|
||||||
|
|
||||||
return {
|
# Record successful verification
|
||||||
"valid": True,
|
if metrics:
|
||||||
"user_id": token_data.get("user_id"),
|
metrics.increment_counter("token_verify_total", labels={"status": "success"})
|
||||||
"email": token_data.get("email"),
|
|
||||||
"role": token_data.get("role"),
|
return {"valid": True, "user_id": payload["sub"]}
|
||||||
"tenant_id": token_data.get("tenant_id")
|
|
||||||
}
|
except HTTPException as e:
|
||||||
except HTTPException:
|
# Record failed verification
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("token_verify_total", labels={"status": "failed"})
|
||||||
raise
|
raise
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Record verification error
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("token_verify_total", labels={"status": "error"})
|
||||||
logger.error(f"Token verification error: {e}")
|
logger.error(f"Token verification error: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
@@ -120,27 +182,43 @@ async def verify_token(
|
|||||||
|
|
||||||
@router.post("/logout")
|
@router.post("/logout")
|
||||||
async def logout(
|
async def logout(
|
||||||
refresh_data: RefreshTokenRequest,
|
|
||||||
request: Request,
|
request: Request,
|
||||||
db: AsyncSession = Depends(get_db)
|
db: AsyncSession = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""Logout user"""
|
"""User logout"""
|
||||||
|
metrics = get_metrics_collector(request)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get user from token
|
|
||||||
auth_header = request.headers.get("Authorization")
|
auth_header = request.headers.get("Authorization")
|
||||||
if auth_header and auth_header.startswith("Bearer "):
|
if not auth_header or not auth_header.startswith("Bearer "):
|
||||||
token = auth_header.split(" ")[1]
|
if metrics:
|
||||||
token_data = await AuthService.verify_token(token)
|
metrics.increment_counter("logout_total", labels={"status": "no_token"})
|
||||||
user_id = token_data.get("user_id")
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
if user_id:
|
detail="Missing or invalid authorization header"
|
||||||
success = await AuthService.logout_user(user_id, refresh_data.refresh_token, db)
|
)
|
||||||
return {"success": success}
|
|
||||||
|
token = auth_header.split(" ")[1]
|
||||||
|
await AuthService.logout_user(token, db)
|
||||||
|
|
||||||
|
# Record successful logout
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("logout_total", labels={"status": "success"})
|
||||||
|
|
||||||
|
return {"message": "Logged out successfully"}
|
||||||
|
|
||||||
|
except HTTPException as e:
|
||||||
|
# Record failed logout
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("logout_total", labels={"status": "failed"})
|
||||||
|
raise
|
||||||
|
|
||||||
return {"success": False, "message": "Invalid token"}
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Record logout error
|
||||||
|
if metrics:
|
||||||
|
metrics.increment_counter("logout_total", labels={"status": "error"})
|
||||||
logger.error(f"Logout error: {e}")
|
logger.error(f"Logout error: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail="Logout failed"
|
detail="Logout failed"
|
||||||
)
|
)
|
||||||
@@ -1,9 +1,5 @@
|
|||||||
# ================================================================
|
|
||||||
# services/auth/app/main.py (ENHANCED VERSION)
|
|
||||||
# ================================================================
|
|
||||||
"""
|
"""
|
||||||
Authentication Service Main Application
|
Authentication Service Main Application - Fixed middleware issue
|
||||||
Enhanced version with proper lifecycle management and microservices integration
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
@@ -16,58 +12,108 @@ from app.core.config import settings
|
|||||||
from app.core.database import engine, create_tables
|
from app.core.database import engine, create_tables
|
||||||
from app.api import auth, users
|
from app.api import auth, users
|
||||||
from app.services.messaging import setup_messaging, cleanup_messaging
|
from app.services.messaging import setup_messaging, cleanup_messaging
|
||||||
from shared.monitoring.logging import setup_logging
|
from shared.monitoring import setup_logging, HealthChecker
|
||||||
from shared.monitoring.metrics import MetricsCollector
|
from shared.monitoring.metrics import setup_metrics_early
|
||||||
|
|
||||||
# Setup logging
|
# Setup logging first
|
||||||
setup_logging("auth-service", settings.LOG_LEVEL)
|
setup_logging("auth-service", settings.LOG_LEVEL)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Initialize metrics
|
# Global variables for lifespan access
|
||||||
metrics = MetricsCollector("auth_service")
|
metrics_collector = None
|
||||||
|
health_checker = None
|
||||||
|
|
||||||
@asynccontextmanager
|
# Create FastAPI app FIRST
|
||||||
async def lifespan(app: FastAPI):
|
|
||||||
"""Application lifespan events"""
|
|
||||||
# Startup
|
|
||||||
logger.info("Starting Authentication Service...")
|
|
||||||
|
|
||||||
# Create database tables
|
|
||||||
await create_tables()
|
|
||||||
logger.info("Database tables created")
|
|
||||||
|
|
||||||
# Setup messaging
|
|
||||||
await setup_messaging()
|
|
||||||
logger.info("Messaging setup complete")
|
|
||||||
|
|
||||||
# Register metrics
|
|
||||||
metrics.register_counter("auth_requests_total", "Total authentication requests")
|
|
||||||
metrics.register_counter("auth_login_success_total", "Successful logins")
|
|
||||||
metrics.register_counter("auth_login_failure_total", "Failed logins")
|
|
||||||
metrics.register_counter("auth_registration_total", "User registrations")
|
|
||||||
metrics.register_histogram("auth_request_duration_seconds", "Request duration")
|
|
||||||
|
|
||||||
logger.info("Authentication Service started successfully")
|
|
||||||
|
|
||||||
yield
|
|
||||||
|
|
||||||
# Shutdown
|
|
||||||
logger.info("Shutting down Authentication Service...")
|
|
||||||
await cleanup_messaging()
|
|
||||||
await engine.dispose()
|
|
||||||
logger.info("Authentication Service shutdown complete")
|
|
||||||
|
|
||||||
# Create FastAPI app
|
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="Authentication Service",
|
title="Authentication Service",
|
||||||
description="Handles user authentication and authorization for bakery forecasting platform",
|
description="Handles user authentication and authorization for bakery forecasting platform",
|
||||||
version="1.0.0",
|
version="1.0.0",
|
||||||
docs_url="/docs",
|
docs_url="/docs",
|
||||||
redoc_url="/redoc",
|
redoc_url="/redoc"
|
||||||
lifespan=lifespan
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# CORS middleware
|
# Setup metrics BEFORE any middleware and BEFORE lifespan
|
||||||
|
# This must happen before the app starts
|
||||||
|
metrics_collector = setup_metrics_early(app, "auth-service")
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan events - NO MIDDLEWARE ADDED HERE"""
|
||||||
|
global health_checker
|
||||||
|
|
||||||
|
# Startup
|
||||||
|
logger.info("Starting Authentication Service...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create database tables
|
||||||
|
await create_tables()
|
||||||
|
logger.info("Database tables created")
|
||||||
|
|
||||||
|
# Setup messaging
|
||||||
|
await setup_messaging()
|
||||||
|
logger.info("Messaging setup complete")
|
||||||
|
|
||||||
|
# Register custom metrics (metrics_collector already exists)
|
||||||
|
metrics_collector.register_counter("registration_total", "Total user registrations")
|
||||||
|
metrics_collector.register_counter("login_success_total", "Successful logins")
|
||||||
|
metrics_collector.register_counter("login_failure_total", "Failed logins")
|
||||||
|
metrics_collector.register_counter("token_refresh_total", "Token refresh requests")
|
||||||
|
metrics_collector.register_counter("token_verify_total", "Token verification requests")
|
||||||
|
metrics_collector.register_counter("logout_total", "User logout requests")
|
||||||
|
metrics_collector.register_counter("errors_total", "Total errors")
|
||||||
|
metrics_collector.register_histogram("registration_duration_seconds", "Registration request duration")
|
||||||
|
metrics_collector.register_histogram("login_duration_seconds", "Login request duration")
|
||||||
|
metrics_collector.register_histogram("token_refresh_duration_seconds", "Token refresh duration")
|
||||||
|
|
||||||
|
# Setup health checker
|
||||||
|
health_checker = HealthChecker("auth-service")
|
||||||
|
|
||||||
|
# Add database health check
|
||||||
|
async def check_database():
|
||||||
|
try:
|
||||||
|
from app.core.database import get_db
|
||||||
|
async for db in get_db():
|
||||||
|
await db.execute("SELECT 1")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
return f"Database error: {e}"
|
||||||
|
|
||||||
|
health_checker.add_check("database", check_database, timeout=5.0, critical=True)
|
||||||
|
|
||||||
|
# Add messaging health check
|
||||||
|
def check_messaging():
|
||||||
|
try:
|
||||||
|
# Add your messaging health check logic here
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
return f"Messaging error: {e}"
|
||||||
|
|
||||||
|
health_checker.add_check("messaging", check_messaging, timeout=3.0, critical=False)
|
||||||
|
|
||||||
|
# Store health checker in app state
|
||||||
|
app.state.health_checker = health_checker
|
||||||
|
|
||||||
|
logger.info("Authentication Service started successfully")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to start Authentication Service: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Shutdown
|
||||||
|
logger.info("Shutting down Authentication Service...")
|
||||||
|
try:
|
||||||
|
await cleanup_messaging()
|
||||||
|
await engine.dispose()
|
||||||
|
logger.info("Authentication Service shutdown complete")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during shutdown: {e}")
|
||||||
|
|
||||||
|
# Set lifespan AFTER metrics setup
|
||||||
|
app.router.lifespan_context = lifespan
|
||||||
|
|
||||||
|
# CORS middleware (added after metrics setup)
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=["*"], # Configure properly for production
|
allow_origins=["*"], # Configure properly for production
|
||||||
@@ -80,44 +126,30 @@ app.add_middleware(
|
|||||||
app.include_router(auth.router, prefix="/api/v1/auth", tags=["authentication"])
|
app.include_router(auth.router, prefix="/api/v1/auth", tags=["authentication"])
|
||||||
app.include_router(users.router, prefix="/api/v1/users", tags=["users"])
|
app.include_router(users.router, prefix="/api/v1/users", tags=["users"])
|
||||||
|
|
||||||
# Health check endpoint
|
# Health check endpoint with comprehensive checks
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""Health check endpoint"""
|
"""Comprehensive health check endpoint"""
|
||||||
return {
|
if health_checker:
|
||||||
"service": "auth-service",
|
return await health_checker.check_health()
|
||||||
"status": "healthy",
|
else:
|
||||||
"version": "1.0.0"
|
return {
|
||||||
}
|
"service": "auth-service",
|
||||||
|
"status": "healthy",
|
||||||
# Metrics endpoint
|
"version": "1.0.0"
|
||||||
@app.get("/metrics")
|
}
|
||||||
async def get_metrics():
|
|
||||||
"""Prometheus metrics endpoint"""
|
|
||||||
return metrics.get_metrics()
|
|
||||||
|
|
||||||
# Exception handlers
|
# Exception handlers
|
||||||
@app.exception_handler(Exception)
|
@app.exception_handler(Exception)
|
||||||
async def global_exception_handler(request: Request, exc: Exception):
|
async def global_exception_handler(request: Request, exc: Exception):
|
||||||
"""Global exception handler"""
|
"""Global exception handler with metrics"""
|
||||||
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
||||||
|
|
||||||
|
# Record error metric if available
|
||||||
|
if metrics_collector:
|
||||||
|
metrics_collector.increment_counter("errors_total", labels={"type": "unhandled"})
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
content={"detail": "Internal server error"}
|
content={"detail": "Internal server error"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Request middleware for metrics
|
|
||||||
@app.middleware("http")
|
|
||||||
async def metrics_middleware(request: Request, call_next):
|
|
||||||
"""Middleware to collect metrics"""
|
|
||||||
import time
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
response = await call_next(request)
|
|
||||||
|
|
||||||
# Record metrics
|
|
||||||
duration = time.time() - start_time
|
|
||||||
metrics.observe_histogram("auth_request_duration_seconds", duration)
|
|
||||||
metrics.increment_counter("auth_requests_total")
|
|
||||||
|
|
||||||
return response
|
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
|
# ================================================================
|
||||||
|
# services/data/app/main.py - FIXED VERSION
|
||||||
|
# ================================================================
|
||||||
"""
|
"""
|
||||||
Data Service Main Application
|
Data Service Main Application - Fixed middleware issue
|
||||||
Handles external API integrations (weather, traffic, events)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
@@ -8,45 +10,99 @@ from contextlib import asynccontextmanager
|
|||||||
from fastapi import FastAPI, Request
|
from fastapi import FastAPI, Request
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
import structlog
|
|
||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
from app.core.database import init_db
|
from app.core.database import init_db
|
||||||
from app.api.sales import router as sales_router
|
from app.api.sales import router as sales_router
|
||||||
from app.api.weather import router as weather_router
|
from app.api.weather import router as weather_router
|
||||||
from app.api.traffic import router as traffic_router
|
from app.api.traffic import router as traffic_router
|
||||||
from shared.monitoring.metrics import setup_metrics
|
from shared.monitoring import setup_logging, HealthChecker
|
||||||
from shared.monitoring.logging import setup_logging
|
from shared.monitoring.metrics import setup_metrics_early
|
||||||
|
|
||||||
# Setup logging
|
# Setup logging first
|
||||||
setup_logging("data-service", settings.LOG_LEVEL)
|
setup_logging("data-service", settings.LOG_LEVEL)
|
||||||
logger = structlog.get_logger()
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@asynccontextmanager
|
# Global variables for lifespan access
|
||||||
async def lifespan(app: FastAPI):
|
metrics_collector = None
|
||||||
"""Application lifespan events"""
|
health_checker = None
|
||||||
# Startup
|
|
||||||
logger.info("Starting Data Service")
|
|
||||||
await init_db()
|
|
||||||
yield
|
|
||||||
# Shutdown
|
|
||||||
logger.info("Shutting down Data Service")
|
|
||||||
|
|
||||||
# Create FastAPI app
|
# Create FastAPI app FIRST
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="Bakery Data Service",
|
title="Bakery Data Service",
|
||||||
description="External data integration service for weather, traffic, and sales data",
|
description="External data integration service for weather, traffic, and sales data",
|
||||||
version="1.0.0",
|
version="1.0.0"
|
||||||
lifespan=lifespan
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Setup metrics
|
# Setup metrics BEFORE any middleware and BEFORE lifespan
|
||||||
setup_metrics(app)
|
metrics_collector = setup_metrics_early(app, "data-service")
|
||||||
|
|
||||||
# CORS middleware
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan events - NO MIDDLEWARE ADDED HERE"""
|
||||||
|
global health_checker
|
||||||
|
|
||||||
|
# Startup
|
||||||
|
logger.info("Starting Data Service...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Initialize database
|
||||||
|
await init_db()
|
||||||
|
logger.info("Database initialized")
|
||||||
|
|
||||||
|
# Register custom metrics (metrics_collector already exists)
|
||||||
|
metrics_collector.register_counter("sales_records_created_total", "Total sales records created")
|
||||||
|
metrics_collector.register_counter("sales_queries_total", "Sales record queries")
|
||||||
|
metrics_collector.register_counter("weather_api_calls_total", "Weather API calls")
|
||||||
|
metrics_collector.register_counter("weather_api_success_total", "Successful weather API calls")
|
||||||
|
metrics_collector.register_counter("weather_api_failures_total", "Failed weather API calls")
|
||||||
|
metrics_collector.register_counter("traffic_api_calls_total", "Traffic API calls")
|
||||||
|
metrics_collector.register_counter("import_jobs_total", "Data import jobs")
|
||||||
|
metrics_collector.register_counter("template_downloads_total", "Template downloads")
|
||||||
|
metrics_collector.register_counter("errors_total", "Total errors")
|
||||||
|
metrics_collector.register_histogram("sales_create_duration_seconds", "Sales record creation duration")
|
||||||
|
metrics_collector.register_histogram("sales_list_duration_seconds", "Sales record list duration")
|
||||||
|
metrics_collector.register_histogram("import_duration_seconds", "Data import duration")
|
||||||
|
metrics_collector.register_histogram("weather_current_duration_seconds", "Current weather API duration")
|
||||||
|
metrics_collector.register_histogram("weather_forecast_duration_seconds", "Weather forecast API duration")
|
||||||
|
metrics_collector.register_histogram("external_api_duration_seconds", "External API call duration")
|
||||||
|
|
||||||
|
# Setup health checker
|
||||||
|
health_checker = HealthChecker("data-service")
|
||||||
|
|
||||||
|
# Add database health check
|
||||||
|
async def check_database():
|
||||||
|
try:
|
||||||
|
from app.core.database import get_db
|
||||||
|
async for db in get_db():
|
||||||
|
await db.execute("SELECT 1")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
return f"Database error: {e}"
|
||||||
|
|
||||||
|
health_checker.add_check("database", check_database, timeout=5.0, critical=True)
|
||||||
|
|
||||||
|
# Store health checker in app state
|
||||||
|
app.state.health_checker = health_checker
|
||||||
|
|
||||||
|
logger.info("Data Service started successfully")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to start Data Service: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Shutdown
|
||||||
|
logger.info("Shutting down Data Service...")
|
||||||
|
|
||||||
|
# Set lifespan AFTER metrics setup
|
||||||
|
app.router.lifespan_context = lifespan
|
||||||
|
|
||||||
|
# CORS middleware (added after metrics setup)
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=settings.CORS_ORIGINS,
|
allow_origins=getattr(settings, 'CORS_ORIGINS', ["*"]),
|
||||||
allow_credentials=True,
|
allow_credentials=True,
|
||||||
allow_methods=["*"],
|
allow_methods=["*"],
|
||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
@@ -57,16 +113,30 @@ app.include_router(sales_router, prefix="/api/v1/sales", tags=["sales"])
|
|||||||
app.include_router(weather_router, prefix="/api/v1/weather", tags=["weather"])
|
app.include_router(weather_router, prefix="/api/v1/weather", tags=["weather"])
|
||||||
app.include_router(traffic_router, prefix="/api/v1/traffic", tags=["traffic"])
|
app.include_router(traffic_router, prefix="/api/v1/traffic", tags=["traffic"])
|
||||||
|
|
||||||
|
# Health check endpoint
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""Health check endpoint"""
|
"""Comprehensive health check endpoint"""
|
||||||
return {"status": "healthy", "service": "data-service"}
|
if health_checker:
|
||||||
|
return await health_checker.check_health()
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"service": "data-service",
|
||||||
|
"status": "healthy",
|
||||||
|
"version": "1.0.0"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Exception handlers
|
||||||
@app.exception_handler(Exception)
|
@app.exception_handler(Exception)
|
||||||
async def global_exception_handler(request: Request, exc: Exception):
|
async def global_exception_handler(request: Request, exc: Exception):
|
||||||
"""Global exception handler"""
|
"""Global exception handler with metrics"""
|
||||||
logger.error("Unhandled exception", exc_info=exc, path=request.url.path)
|
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
||||||
|
|
||||||
|
# Record error metric if available
|
||||||
|
if metrics_collector:
|
||||||
|
metrics_collector.increment_counter("errors_total", labels={"type": "unhandled"})
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
content={"detail": "Internal server error"}
|
content={"detail": "Internal server error"}
|
||||||
)
|
)
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
"""
|
||||||
|
Shared monitoring package for microservices
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .logging import setup_logging
|
||||||
|
from .metrics import setup_metrics_early, get_metrics_collector, MetricsCollector
|
||||||
|
from .health import HealthChecker
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'setup_logging',
|
||||||
|
'setup_metrics_early',
|
||||||
|
'get_metrics_collector',
|
||||||
|
'MetricsCollector',
|
||||||
|
'HealthChecker'
|
||||||
|
]
|
||||||
89
shared/monitoring/decorators.py
Normal file
89
shared/monitoring/decorators.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# ================================================================
|
||||||
|
# shared/monitoring/decorators.py
|
||||||
|
# ================================================================
|
||||||
|
"""
|
||||||
|
Decorators for monitoring and metrics
|
||||||
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import functools
|
||||||
|
from typing import Callable, Any, Optional
|
||||||
|
from .metrics import get_metrics_collector
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def track_execution_time(metric_name: str, service_name: str,
|
||||||
|
labels: Optional[dict] = None):
|
||||||
|
"""Decorator to track function execution time"""
|
||||||
|
def decorator(func: Callable) -> Callable:
|
||||||
|
@functools.wraps(func)
|
||||||
|
async def async_wrapper(*args, **kwargs) -> Any:
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
result = await func(*args, **kwargs)
|
||||||
|
duration = time.time() - start_time
|
||||||
|
|
||||||
|
metrics_collector = get_metrics_collector(service_name)
|
||||||
|
if metrics_collector:
|
||||||
|
metrics_collector.observe_histogram(metric_name, duration, labels)
|
||||||
|
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
duration = time.time() - start_time
|
||||||
|
logger.error(f"Function {func.__name__} failed after {duration:.2f}s: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def sync_wrapper(*args, **kwargs) -> Any:
|
||||||
|
start_time = time.time()
|
||||||
|
try:
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
duration = time.time() - start_time
|
||||||
|
|
||||||
|
metrics_collector = get_metrics_collector(service_name)
|
||||||
|
if metrics_collector:
|
||||||
|
metrics_collector.observe_histogram(metric_name, duration, labels)
|
||||||
|
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
duration = time.time() - start_time
|
||||||
|
logger.error(f"Function {func.__name__} failed after {duration:.2f}s: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Return appropriate wrapper based on function type
|
||||||
|
import asyncio
|
||||||
|
if asyncio.iscoroutinefunction(func):
|
||||||
|
return async_wrapper
|
||||||
|
else:
|
||||||
|
return sync_wrapper
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def count_calls(metric_name: str, service_name: str,
|
||||||
|
labels: Optional[dict] = None):
|
||||||
|
"""Decorator to count function calls"""
|
||||||
|
def decorator(func: Callable) -> Callable:
|
||||||
|
@functools.wraps(func)
|
||||||
|
async def async_wrapper(*args, **kwargs) -> Any:
|
||||||
|
metrics_collector = get_metrics_collector(service_name)
|
||||||
|
if metrics_collector:
|
||||||
|
metrics_collector.increment_counter(metric_name, labels=labels)
|
||||||
|
return await func(*args, **kwargs)
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def sync_wrapper(*args, **kwargs) -> Any:
|
||||||
|
metrics_collector = get_metrics_collector(service_name)
|
||||||
|
if metrics_collector:
|
||||||
|
metrics_collector.increment_counter(metric_name, labels=labels)
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
# Return appropriate wrapper based on function type
|
||||||
|
import asyncio
|
||||||
|
if asyncio.iscoroutinefunction(func):
|
||||||
|
return async_wrapper
|
||||||
|
else:
|
||||||
|
return sync_wrapper
|
||||||
|
|
||||||
|
return decorator
|
||||||
162
shared/monitoring/health.py
Normal file
162
shared/monitoring/health.py
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
# ================================================================
|
||||||
|
# shared/monitoring/health.py
|
||||||
|
# ================================================================
|
||||||
|
"""
|
||||||
|
Health check utilities for microservices
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Dict, List, Callable, Any, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class HealthStatus(Enum):
|
||||||
|
HEALTHY = "healthy"
|
||||||
|
DEGRADED = "degraded"
|
||||||
|
UNHEALTHY = "unhealthy"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HealthCheck:
|
||||||
|
name: str
|
||||||
|
check_function: Callable[[], Any]
|
||||||
|
timeout: float = 5.0
|
||||||
|
critical: bool = True
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HealthResult:
|
||||||
|
name: str
|
||||||
|
status: HealthStatus
|
||||||
|
message: str
|
||||||
|
duration: float
|
||||||
|
timestamp: float
|
||||||
|
|
||||||
|
class HealthChecker:
|
||||||
|
"""Health checker for microservices"""
|
||||||
|
|
||||||
|
def __init__(self, service_name: str):
|
||||||
|
self.service_name = service_name
|
||||||
|
self.checks: List[HealthCheck] = []
|
||||||
|
self.start_time = time.time()
|
||||||
|
|
||||||
|
def add_check(self, name: str, check_function: Callable, timeout: float = 5.0,
|
||||||
|
critical: bool = True) -> None:
|
||||||
|
"""Add a health check"""
|
||||||
|
self.checks.append(HealthCheck(name, check_function, timeout, critical))
|
||||||
|
|
||||||
|
async def run_check(self, check: HealthCheck) -> HealthResult:
|
||||||
|
"""Run a single health check"""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run the check with timeout
|
||||||
|
result = await asyncio.wait_for(
|
||||||
|
asyncio.create_task(self._execute_check(check.check_function)),
|
||||||
|
timeout=check.timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
duration = time.time() - start_time
|
||||||
|
|
||||||
|
if result is True or (isinstance(result, dict) and result.get('healthy', False)):
|
||||||
|
return HealthResult(
|
||||||
|
name=check.name,
|
||||||
|
status=HealthStatus.HEALTHY,
|
||||||
|
message="OK",
|
||||||
|
duration=duration,
|
||||||
|
timestamp=time.time()
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
message = str(result) if result else "Check failed"
|
||||||
|
return HealthResult(
|
||||||
|
name=check.name,
|
||||||
|
status=HealthStatus.UNHEALTHY,
|
||||||
|
message=message,
|
||||||
|
duration=duration,
|
||||||
|
timestamp=time.time()
|
||||||
|
)
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
duration = time.time() - start_time
|
||||||
|
return HealthResult(
|
||||||
|
name=check.name,
|
||||||
|
status=HealthStatus.UNHEALTHY,
|
||||||
|
message=f"Timeout after {check.timeout}s",
|
||||||
|
duration=duration,
|
||||||
|
timestamp=time.time()
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
duration = time.time() - start_time
|
||||||
|
return HealthResult(
|
||||||
|
name=check.name,
|
||||||
|
status=HealthStatus.UNHEALTHY,
|
||||||
|
message=f"Error: {str(e)}",
|
||||||
|
duration=duration,
|
||||||
|
timestamp=time.time()
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _execute_check(self, check_function: Callable) -> Any:
|
||||||
|
"""Execute a check function (handles both sync and async)"""
|
||||||
|
if asyncio.iscoroutinefunction(check_function):
|
||||||
|
return await check_function()
|
||||||
|
else:
|
||||||
|
return check_function()
|
||||||
|
|
||||||
|
async def check_health(self) -> Dict[str, Any]:
|
||||||
|
"""Run all health checks and return status"""
|
||||||
|
if not self.checks:
|
||||||
|
return {
|
||||||
|
"service": self.service_name,
|
||||||
|
"status": HealthStatus.HEALTHY.value,
|
||||||
|
"uptime": time.time() - self.start_time,
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"checks": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run all checks concurrently
|
||||||
|
results = await asyncio.gather(
|
||||||
|
*[self.run_check(check) for check in self.checks],
|
||||||
|
return_exceptions=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process results
|
||||||
|
check_results = {}
|
||||||
|
overall_status = HealthStatus.HEALTHY
|
||||||
|
|
||||||
|
for i, result in enumerate(results):
|
||||||
|
check = self.checks[i]
|
||||||
|
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
check_result = HealthResult(
|
||||||
|
name=check.name,
|
||||||
|
status=HealthStatus.UNHEALTHY,
|
||||||
|
message=f"Exception: {str(result)}",
|
||||||
|
duration=0.0,
|
||||||
|
timestamp=time.time()
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
check_result = result
|
||||||
|
|
||||||
|
check_results[check.name] = {
|
||||||
|
"status": check_result.status.value,
|
||||||
|
"message": check_result.message,
|
||||||
|
"duration": check_result.duration,
|
||||||
|
"timestamp": check_result.timestamp
|
||||||
|
}
|
||||||
|
|
||||||
|
# Determine overall status
|
||||||
|
if check.critical and check_result.status == HealthStatus.UNHEALTHY:
|
||||||
|
overall_status = HealthStatus.UNHEALTHY
|
||||||
|
elif check_result.status == HealthStatus.DEGRADED and overall_status == HealthStatus.HEALTHY:
|
||||||
|
overall_status = HealthStatus.DEGRADED
|
||||||
|
|
||||||
|
return {
|
||||||
|
"service": self.service_name,
|
||||||
|
"status": overall_status.value,
|
||||||
|
"uptime": time.time() - self.start_time,
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"checks": check_results
|
||||||
|
}
|
||||||
|
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# ================================================================
|
||||||
|
# shared/monitoring/logging.py
|
||||||
|
# ================================================================
|
||||||
"""
|
"""
|
||||||
Centralized logging configuration for microservices
|
Centralized logging configuration for microservices
|
||||||
"""
|
"""
|
||||||
@@ -5,53 +8,109 @@ Centralized logging configuration for microservices
|
|||||||
import logging
|
import logging
|
||||||
import logging.config
|
import logging.config
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
def setup_logging(service_name: str, log_level: str = "INFO") -> None:
|
def setup_logging(service_name: str, log_level: str = "INFO",
|
||||||
"""Set up logging configuration for a microservice"""
|
enable_json: bool = False, enable_file: bool = True) -> None:
|
||||||
|
"""
|
||||||
|
Set up logging configuration for a microservice with improved error handling.
|
||||||
|
|
||||||
config: Dict[str, Any] = {
|
Args:
|
||||||
"version": 1,
|
service_name: Name of the service for log identification
|
||||||
"disable_existing_loggers": False,
|
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
||||||
"formatters": {
|
enable_json: Whether to use JSON formatting
|
||||||
"standard": {
|
enable_file: Whether to enable file logging
|
||||||
"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
"""
|
||||||
},
|
|
||||||
"detailed": {
|
# Create logs directory if it doesn't exist and file logging is enabled
|
||||||
"format": "%(asctime)s [%(levelname)s] %(name)s [%(filename)s:%(lineno)d] %(message)s"
|
log_dir = "/var/log"
|
||||||
},
|
if enable_file:
|
||||||
"json": {
|
try:
|
||||||
"()": "pythonjsonlogger.jsonlogger.JsonFormatter",
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
"format": "%(asctime)s %(name)s %(levelname)s %(message)s"
|
except PermissionError:
|
||||||
}
|
# Fallback to local directory if can't write to /var/log
|
||||||
|
log_dir = "./logs"
|
||||||
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
|
print(f"Warning: Could not write to /var/log, using {log_dir}")
|
||||||
|
|
||||||
|
# Define formatters
|
||||||
|
formatters = {
|
||||||
|
"standard": {
|
||||||
|
"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||||
|
"datefmt": "%Y-%m-%d %H:%M:%S"
|
||||||
},
|
},
|
||||||
"handlers": {
|
"detailed": {
|
||||||
"console": {
|
"format": "%(asctime)s [%(levelname)s] %(name)s [%(filename)s:%(lineno)d] %(funcName)s(): %(message)s",
|
||||||
"class": "logging.StreamHandler",
|
"datefmt": "%Y-%m-%d %H:%M:%S"
|
||||||
"level": log_level,
|
}
|
||||||
"formatter": "standard",
|
}
|
||||||
"stream": "ext://sys.stdout"
|
|
||||||
},
|
# Add JSON formatter if requested and available
|
||||||
"file": {
|
if enable_json:
|
||||||
"class": "logging.FileHandler",
|
try:
|
||||||
"level": log_level,
|
import pythonjsonlogger.jsonlogger
|
||||||
"formatter": "detailed",
|
formatters["json"] = {
|
||||||
"filename": f"/var/log/{service_name}.log",
|
"()": "pythonjsonlogger.jsonlogger.JsonFormatter",
|
||||||
"mode": "a"
|
"format": "%(asctime)s %(name)s %(levelname)s %(message)s %(filename)s %(lineno)d"
|
||||||
},
|
}
|
||||||
"logstash": {
|
except ImportError:
|
||||||
|
print("Warning: pythonjsonlogger not available, falling back to standard formatting")
|
||||||
|
enable_json = False
|
||||||
|
|
||||||
|
# Define handlers
|
||||||
|
handlers = {
|
||||||
|
"console": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"level": log_level,
|
||||||
|
"formatter": "json" if enable_json else "standard",
|
||||||
|
"stream": "ext://sys.stdout"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add file handler if enabled
|
||||||
|
if enable_file:
|
||||||
|
handlers["file"] = {
|
||||||
|
"class": "logging.FileHandler",
|
||||||
|
"level": log_level,
|
||||||
|
"formatter": "detailed",
|
||||||
|
"filename": f"{log_dir}/{service_name}.log",
|
||||||
|
"mode": "a",
|
||||||
|
"encoding": "utf-8"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add logstash handler if in production
|
||||||
|
logstash_host = os.getenv("LOGSTASH_HOST")
|
||||||
|
if logstash_host and os.getenv("ENVIRONMENT") == "production":
|
||||||
|
try:
|
||||||
|
handlers["logstash"] = {
|
||||||
"class": "logstash.TCPLogstashHandler",
|
"class": "logstash.TCPLogstashHandler",
|
||||||
"host": os.getenv("LOGSTASH_HOST", "localhost"),
|
"host": logstash_host,
|
||||||
"port": int(os.getenv("LOGSTASH_PORT", "5000")),
|
"port": int(os.getenv("LOGSTASH_PORT", "5000")),
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"message_type": "logstash",
|
"message_type": "logstash",
|
||||||
"fqdn": False,
|
"fqdn": False,
|
||||||
"tags": [service_name]
|
"tags": [service_name]
|
||||||
}
|
}
|
||||||
},
|
except Exception as e:
|
||||||
|
print(f"Warning: Could not setup logstash handler: {e}")
|
||||||
|
|
||||||
|
# Define root logger configuration
|
||||||
|
root_handlers = ["console"]
|
||||||
|
if enable_file:
|
||||||
|
root_handlers.append("file")
|
||||||
|
if "logstash" in handlers:
|
||||||
|
root_handlers.append("logstash")
|
||||||
|
|
||||||
|
# Complete logging configuration
|
||||||
|
config: Dict[str, Any] = {
|
||||||
|
"version": 1,
|
||||||
|
"disable_existing_loggers": False,
|
||||||
|
"formatters": formatters,
|
||||||
|
"handlers": handlers,
|
||||||
"loggers": {
|
"loggers": {
|
||||||
"": {
|
"": { # Root logger
|
||||||
"handlers": ["console", "file"],
|
"handlers": root_handlers,
|
||||||
"level": log_level,
|
"level": log_level,
|
||||||
"propagate": False
|
"propagate": False
|
||||||
},
|
},
|
||||||
@@ -64,14 +123,32 @@ def setup_logging(service_name: str, log_level: str = "INFO") -> None:
|
|||||||
"handlers": ["console"],
|
"handlers": ["console"],
|
||||||
"level": log_level,
|
"level": log_level,
|
||||||
"propagate": False
|
"propagate": False
|
||||||
|
},
|
||||||
|
"sqlalchemy": {
|
||||||
|
"handlers": ["console"],
|
||||||
|
"level": "WARNING", # Reduce SQL logging noise
|
||||||
|
"propagate": False
|
||||||
|
},
|
||||||
|
"httpx": {
|
||||||
|
"handlers": ["console"],
|
||||||
|
"level": "WARNING", # Reduce HTTP client logging
|
||||||
|
"propagate": False
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add logstash handler if in production
|
try:
|
||||||
if os.getenv("ENVIRONMENT") == "production":
|
logging.config.dictConfig(config)
|
||||||
config["loggers"][""]["handlers"].append("logstash")
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.info(f"Logging configured for {service_name} at level {log_level}")
|
||||||
logging.config.dictConfig(config)
|
except Exception as e:
|
||||||
logger = logging.getLogger(__name__)
|
# Fallback to basic logging if configuration fails
|
||||||
logger.info(f"Logging configured for {service_name}")
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, log_level.upper()),
|
||||||
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||||
|
handlers=[logging.StreamHandler(sys.stdout)]
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.error(f"Failed to configure advanced logging for {service_name}: {e}")
|
||||||
|
logger.info(f"Using basic logging configuration for {service_name}")
|
||||||
|
|
||||||
|
|||||||
@@ -1,147 +1,150 @@
|
|||||||
# shared/monitoring/metrics.py
|
# ================================================================
|
||||||
|
# shared/monitoring/metrics.py - FIXED VERSION
|
||||||
|
# ================================================================
|
||||||
"""
|
"""
|
||||||
Metrics collection for microservices
|
Centralized metrics collection for microservices - Fixed middleware issue
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List # Added List import
|
from typing import Dict, Any, List, Optional
|
||||||
from prometheus_client import Counter, Histogram, Gauge, start_http_server
|
from prometheus_client import Counter, Histogram, Gauge, start_http_server, generate_latest
|
||||||
from functools import wraps
|
from fastapi import Request, Response
|
||||||
from prometheus_client import generate_latest # Moved this import here for consistency
|
from threading import Lock
|
||||||
from fastapi import Request
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Prometheus metrics
|
# Global registry for metrics collectors
|
||||||
REQUEST_COUNT = Counter(
|
_metrics_registry: Dict[str, 'MetricsCollector'] = {}
|
||||||
|
_registry_lock = Lock()
|
||||||
|
|
||||||
|
# Default Prometheus metrics
|
||||||
|
DEFAULT_REQUEST_COUNT = Counter(
|
||||||
'http_requests_total',
|
'http_requests_total',
|
||||||
'Total HTTP requests',
|
'Total HTTP requests',
|
||||||
['method', 'endpoint', 'status_code', 'service']
|
['method', 'endpoint', 'status_code', 'service']
|
||||||
)
|
)
|
||||||
|
|
||||||
REQUEST_DURATION = Histogram(
|
DEFAULT_REQUEST_DURATION = Histogram(
|
||||||
'http_request_duration_seconds',
|
'http_request_duration_seconds',
|
||||||
'HTTP request duration in seconds',
|
'HTTP request duration in seconds',
|
||||||
['method', 'endpoint', 'service']
|
['method', 'endpoint', 'service']
|
||||||
)
|
)
|
||||||
|
|
||||||
ACTIVE_CONNECTIONS = Gauge(
|
DEFAULT_ACTIVE_CONNECTIONS = Gauge(
|
||||||
'active_connections',
|
'active_connections',
|
||||||
'Active database connections',
|
'Active database connections',
|
||||||
['service']
|
['service']
|
||||||
)
|
)
|
||||||
|
|
||||||
TRAINING_JOBS = Counter(
|
|
||||||
'training_jobs_total',
|
|
||||||
'Total training jobs',
|
|
||||||
['status', 'service']
|
|
||||||
)
|
|
||||||
|
|
||||||
FORECASTS_GENERATED = Counter(
|
|
||||||
'forecasts_generated_total',
|
|
||||||
'Total forecasts generated',
|
|
||||||
['service']
|
|
||||||
)
|
|
||||||
|
|
||||||
class MetricsCollector:
|
class MetricsCollector:
|
||||||
"""Metrics collector for microservices"""
|
"""Thread-safe metrics collector for microservices"""
|
||||||
|
|
||||||
def __init__(self, service_name: str):
|
def __init__(self, service_name: str):
|
||||||
self.service_name = service_name
|
self.service_name = service_name
|
||||||
self.start_time = time.time()
|
self.start_time = time.time()
|
||||||
# Initialize dictionaries to hold custom counters and histograms
|
|
||||||
self._counters: Dict[str, Counter] = {}
|
self._counters: Dict[str, Counter] = {}
|
||||||
self._histograms: Dict[str, Histogram] = {}
|
self._histograms: Dict[str, Histogram] = {}
|
||||||
|
self._gauges: Dict[str, Gauge] = {}
|
||||||
|
self._lock = Lock()
|
||||||
|
|
||||||
|
# Register in global registry
|
||||||
|
with _registry_lock:
|
||||||
|
_metrics_registry[service_name] = self
|
||||||
|
|
||||||
def start_metrics_server(self, port: int = 8080):
|
def start_metrics_server(self, port: int = 8080):
|
||||||
"""Start Prometheus metrics server"""
|
"""Start Prometheus metrics server"""
|
||||||
try:
|
try:
|
||||||
start_http_server(port)
|
start_http_server(port)
|
||||||
logger.info(f"Metrics server started on port {port}")
|
logger.info(f"Metrics server started on port {port} for {self.service_name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to start metrics server: {e}")
|
logger.error(f"Failed to start metrics server for {self.service_name}: {e}")
|
||||||
|
|
||||||
def record_request(self, method: str, endpoint: str, status_code: int, duration: float):
|
def register_counter(self, name: str, documentation: str, labels: List[str] = None) -> Counter:
|
||||||
"""Record HTTP request metrics"""
|
|
||||||
REQUEST_COUNT.labels(
|
|
||||||
method=method,
|
|
||||||
endpoint=endpoint,
|
|
||||||
status_code=status_code,
|
|
||||||
service=self.service_name
|
|
||||||
).inc()
|
|
||||||
|
|
||||||
REQUEST_DURATION.labels(
|
|
||||||
method=method,
|
|
||||||
endpoint=endpoint,
|
|
||||||
service=self.service_name
|
|
||||||
).observe(duration)
|
|
||||||
|
|
||||||
def record_training_job(self, status: str):
|
|
||||||
"""Record training job metrics"""
|
|
||||||
TRAINING_JOBS.labels(
|
|
||||||
status=status,
|
|
||||||
service=self.service_name
|
|
||||||
).inc()
|
|
||||||
|
|
||||||
def record_forecast_generated(self):
|
|
||||||
"""Record forecast generation metrics"""
|
|
||||||
FORECASTS_GENERATED.labels(
|
|
||||||
service=self.service_name
|
|
||||||
).inc()
|
|
||||||
|
|
||||||
def set_active_connections(self, count: int):
|
|
||||||
"""Set active database connections"""
|
|
||||||
ACTIVE_CONNECTIONS.labels(
|
|
||||||
service=self.service_name
|
|
||||||
).set(count)
|
|
||||||
|
|
||||||
def register_counter(self, name: str, documentation: str, labels: List[str] = None):
|
|
||||||
"""Register a custom Counter metric."""
|
"""Register a custom Counter metric."""
|
||||||
if name not in self._counters:
|
with self._lock:
|
||||||
|
if name in self._counters:
|
||||||
|
logger.warning(f"Counter '{name}' already registered for {self.service_name}")
|
||||||
|
return self._counters[name]
|
||||||
|
|
||||||
if labels is None:
|
if labels is None:
|
||||||
labels = ['service']
|
labels = ['service']
|
||||||
elif 'service' not in labels:
|
elif 'service' not in labels:
|
||||||
labels.append('service')
|
labels.append('service')
|
||||||
# Pass labelnames as a keyword argument
|
|
||||||
self._counters[name] = Counter(name, documentation, labelnames=labels)
|
try:
|
||||||
logger.info(f"Registered counter: {name}")
|
counter = Counter(f"{self.service_name.replace('-', '_')}_{name}", documentation, labelnames=labels)
|
||||||
else:
|
self._counters[name] = counter
|
||||||
logger.warning(f"Counter '{name}' already registered.")
|
logger.info(f"Registered counter: {name} for {self.service_name}")
|
||||||
return self._counters[name] # Return the counter for direct use if needed
|
return counter
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to register counter {name} for {self.service_name}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def register_histogram(self, name: str, documentation: str, labels: List[str] = None,
|
||||||
|
buckets: tuple = Histogram.DEFAULT_BUCKETS) -> Histogram:
|
||||||
|
"""Register a custom Histogram metric."""
|
||||||
|
with self._lock:
|
||||||
|
if name in self._histograms:
|
||||||
|
logger.warning(f"Histogram '{name}' already registered for {self.service_name}")
|
||||||
|
return self._histograms[name]
|
||||||
|
|
||||||
|
if labels is None:
|
||||||
|
labels = ['service']
|
||||||
|
elif 'service' not in labels:
|
||||||
|
labels.append('service')
|
||||||
|
|
||||||
|
try:
|
||||||
|
histogram = Histogram(f"{self.service_name.replace('-', '_')}_{name}", documentation,
|
||||||
|
labelnames=labels, buckets=buckets)
|
||||||
|
self._histograms[name] = histogram
|
||||||
|
logger.info(f"Registered histogram: {name} for {self.service_name}")
|
||||||
|
return histogram
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to register histogram {name} for {self.service_name}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def register_gauge(self, name: str, documentation: str, labels: List[str] = None) -> Gauge:
|
||||||
|
"""Register a custom Gauge metric."""
|
||||||
|
with self._lock:
|
||||||
|
if name in self._gauges:
|
||||||
|
logger.warning(f"Gauge '{name}' already registered for {self.service_name}")
|
||||||
|
return self._gauges[name]
|
||||||
|
|
||||||
|
if labels is None:
|
||||||
|
labels = ['service']
|
||||||
|
elif 'service' not in labels:
|
||||||
|
labels.append('service')
|
||||||
|
|
||||||
|
try:
|
||||||
|
gauge = Gauge(f"{self.service_name.replace('-', '_')}_{name}", documentation, labelnames=labels)
|
||||||
|
self._gauges[name] = gauge
|
||||||
|
logger.info(f"Registered gauge: {name} for {self.service_name}")
|
||||||
|
return gauge
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to register gauge {name} for {self.service_name}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
def increment_counter(self, name: str, value: int = 1, labels: Dict[str, str] = None):
|
def increment_counter(self, name: str, value: int = 1, labels: Dict[str, str] = None):
|
||||||
"""Increment a custom Counter metric."""
|
"""Increment a counter metric."""
|
||||||
if name not in self._counters:
|
if name not in self._counters:
|
||||||
logger.error(f"Counter '{name}' not registered. Cannot increment.")
|
logger.error(f"Counter '{name}' not registered for {self.service_name}. Cannot increment.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Ensure the 'service' label is always present
|
|
||||||
if labels is None:
|
if labels is None:
|
||||||
labels = {'service': self.service_name}
|
labels = {'service': self.service_name}
|
||||||
elif 'service' not in labels:
|
elif 'service' not in labels:
|
||||||
labels['service'] = self.service_name
|
labels['service'] = self.service_name
|
||||||
|
|
||||||
self._counters[name].labels(**labels).inc(value)
|
try:
|
||||||
|
self._counters[name].labels(**labels).inc(value)
|
||||||
def register_histogram(self, name: str, documentation: str, labels: List[str] = None, buckets: tuple = Histogram.DEFAULT_BUCKETS):
|
except Exception as e:
|
||||||
"""Register a custom Histogram metric."""
|
logger.error(f"Failed to increment counter {name} for {self.service_name}: {e}")
|
||||||
if name not in self._histograms:
|
|
||||||
if labels is None:
|
|
||||||
labels = ['service']
|
|
||||||
elif 'service' not in labels:
|
|
||||||
labels.append('service')
|
|
||||||
# Pass labelnames and buckets as keyword arguments
|
|
||||||
self._histograms[name] = Histogram(name, documentation, labelnames=labels, buckets=buckets)
|
|
||||||
logger.info(f"Registered histogram: {name}")
|
|
||||||
else:
|
|
||||||
logger.warning(f"Histogram '{name}' already registered.")
|
|
||||||
return self._histograms[name] # Return the histogram for direct use if needed
|
|
||||||
|
|
||||||
def observe_histogram(self, name: str, value: float, labels: Dict[str, str] = None):
|
def observe_histogram(self, name: str, value: float, labels: Dict[str, str] = None):
|
||||||
"""Observe a custom Histogram metric."""
|
"""Observe a histogram metric."""
|
||||||
if name not in self._histograms:
|
if name not in self._histograms:
|
||||||
logger.error(f"Histogram '{name}' not registered. Cannot observe.")
|
logger.error(f"Histogram '{name}' not registered for {self.service_name}. Cannot observe.")
|
||||||
return
|
return
|
||||||
|
|
||||||
if labels is None:
|
if labels is None:
|
||||||
@@ -149,145 +152,146 @@ class MetricsCollector:
|
|||||||
elif 'service' not in labels:
|
elif 'service' not in labels:
|
||||||
labels['service'] = self.service_name
|
labels['service'] = self.service_name
|
||||||
|
|
||||||
self._histograms[name].labels(**labels).observe(value)
|
try:
|
||||||
|
self._histograms[name].labels(**labels).observe(value)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to observe histogram {name} for {self.service_name}: {e}")
|
||||||
|
|
||||||
|
def set_gauge(self, name: str, value: float, labels: Dict[str, str] = None):
|
||||||
|
"""Set a gauge metric."""
|
||||||
|
if name not in self._gauges:
|
||||||
|
logger.error(f"Gauge '{name}' not registered for {self.service_name}. Cannot set.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if labels is None:
|
||||||
|
labels = {'service': self.service_name}
|
||||||
|
elif 'service' not in labels:
|
||||||
|
labels['service'] = self.service_name
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._gauges[name].labels(**labels).set(value)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to set gauge {name} for {self.service_name}: {e}")
|
||||||
|
|
||||||
|
def record_request(self, method: str, endpoint: str, status_code: int, duration: float):
|
||||||
|
"""Record HTTP request metrics using default metrics."""
|
||||||
|
try:
|
||||||
|
DEFAULT_REQUEST_COUNT.labels(
|
||||||
|
method=method,
|
||||||
|
endpoint=endpoint,
|
||||||
|
status_code=status_code,
|
||||||
|
service=self.service_name
|
||||||
|
).inc()
|
||||||
|
|
||||||
|
DEFAULT_REQUEST_DURATION.labels(
|
||||||
|
method=method,
|
||||||
|
endpoint=endpoint,
|
||||||
|
service=self.service_name
|
||||||
|
).observe(duration)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to record request metrics for {self.service_name}: {e}")
|
||||||
|
|
||||||
|
def set_active_connections(self, count: int):
|
||||||
|
"""Set active database connections using default gauge."""
|
||||||
|
try:
|
||||||
|
DEFAULT_ACTIVE_CONNECTIONS.labels(service=self.service_name).set(count)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to set active connections for {self.service_name}: {e}")
|
||||||
|
|
||||||
def get_metrics(self) -> str:
|
def get_metrics(self) -> str:
|
||||||
"""Return Prometheus metrics in exposition format."""
|
"""Return Prometheus metrics in exposition format."""
|
||||||
return generate_latest().decode('utf-8')
|
try:
|
||||||
|
return generate_latest().decode('utf-8')
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to generate metrics for {self.service_name}: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def metrics_middleware(metrics_collector: MetricsCollector):
|
def get_metrics_collector(service_name: str) -> Optional[MetricsCollector]:
|
||||||
"""Middleware to collect metrics"""
|
"""Get metrics collector by service name from global registry."""
|
||||||
|
with _registry_lock:
|
||||||
async def middleware(request, call_next):
|
return _metrics_registry.get(service_name)
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
response = await call_next(request)
|
|
||||||
|
|
||||||
duration = time.time() - start_time
|
|
||||||
|
|
||||||
# Use the specific record_request for HTTP requests
|
|
||||||
metrics_collector.record_request(
|
|
||||||
method=request.method,
|
|
||||||
endpoint=request.url.path,
|
|
||||||
status_code=response.status_code,
|
|
||||||
duration=duration
|
|
||||||
)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
return middleware
|
|
||||||
|
|
||||||
|
|
||||||
def setup_metrics(app):
|
def create_metrics_collector(service_name: str) -> MetricsCollector:
|
||||||
"""
|
"""
|
||||||
Setup metrics collection for FastAPI app
|
Create metrics collector without adding middleware.
|
||||||
|
This should be called BEFORE app startup, not during lifespan.
|
||||||
Args:
|
|
||||||
app: FastAPI application instance
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
MetricsCollector: Configured metrics collector
|
|
||||||
"""
|
"""
|
||||||
|
# Get existing or create new
|
||||||
|
existing = get_metrics_collector(service_name)
|
||||||
|
if existing:
|
||||||
|
return existing
|
||||||
|
|
||||||
# Get service name from app title or default
|
return MetricsCollector(service_name)
|
||||||
service_name = getattr(app, 'title', 'unknown-service').lower().replace(' ', '-')
|
|
||||||
|
|
||||||
# Create metrics collector for this service
|
def add_metrics_middleware(app, metrics_collector: MetricsCollector):
|
||||||
metrics_collector = MetricsCollector(service_name)
|
"""
|
||||||
|
Add metrics middleware to app. Must be called BEFORE app startup.
|
||||||
# Add metrics middleware to collect HTTP request metrics
|
"""
|
||||||
@app.middleware("http")
|
@app.middleware("http")
|
||||||
async def collect_metrics_middleware(request: Request, call_next):
|
async def metrics_middleware(request: Request, call_next):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# Process the request
|
try:
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
|
duration = time.time() - start_time
|
||||||
# Calculate duration
|
|
||||||
duration = time.time() - start_time
|
# Record request metrics
|
||||||
|
metrics_collector.record_request(
|
||||||
# Record metrics
|
method=request.method,
|
||||||
metrics_collector.record_request(
|
endpoint=request.url.path,
|
||||||
method=request.method,
|
status_code=response.status_code,
|
||||||
endpoint=request.url.path,
|
duration=duration
|
||||||
status_code=response.status_code,
|
)
|
||||||
duration=duration
|
|
||||||
)
|
return response
|
||||||
|
except Exception as e:
|
||||||
return response
|
duration = time.time() - start_time
|
||||||
|
# Record failed request
|
||||||
# Add metrics endpoint if it doesn't exist
|
metrics_collector.record_request(
|
||||||
@app.get("/metrics")
|
method=request.method,
|
||||||
async def prometheus_metrics():
|
endpoint=request.url.path,
|
||||||
"""Prometheus metrics endpoint"""
|
status_code=500,
|
||||||
from prometheus_client import generate_latest
|
duration=duration
|
||||||
return Response(
|
)
|
||||||
content=generate_latest(),
|
raise
|
||||||
media_type="text/plain; version=0.0.4; charset=utf-8"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Store metrics collector in app state for later access
|
|
||||||
app.state.metrics_collector = metrics_collector
|
|
||||||
|
|
||||||
logger.info(f"Metrics collection setup completed for service: {service_name}")
|
|
||||||
|
|
||||||
return metrics_collector
|
return metrics_collector
|
||||||
|
|
||||||
|
|
||||||
# Alternative simplified setup function for services that don't need complex metrics
|
def add_metrics_endpoint(app, metrics_collector: MetricsCollector):
|
||||||
def setup_basic_metrics(app, service_name: str = None):
|
"""Add metrics endpoint to app"""
|
||||||
|
@app.get("/metrics")
|
||||||
|
async def prometheus_metrics():
|
||||||
|
"""Prometheus metrics endpoint"""
|
||||||
|
return Response(
|
||||||
|
content=metrics_collector.get_metrics(),
|
||||||
|
media_type="text/plain; version=0.0.4; charset=utf-8"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_metrics_early(app, service_name: str = None) -> MetricsCollector:
|
||||||
"""
|
"""
|
||||||
Setup basic metrics collection without complex dependencies
|
Setup metrics collection BEFORE app startup.
|
||||||
|
This must be called before adding any middleware or starting the app.
|
||||||
Args:
|
|
||||||
app: FastAPI application instance
|
|
||||||
service_name: Optional service name override
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Simple metrics dict
|
|
||||||
"""
|
"""
|
||||||
if service_name is None:
|
if service_name is None:
|
||||||
service_name = getattr(app, 'title', 'unknown-service').lower().replace(' ', '-')
|
service_name = getattr(app, 'title', 'unknown-service').lower().replace(' ', '-').replace('.', '_')
|
||||||
|
|
||||||
# Simple in-memory metrics
|
# Create metrics collector
|
||||||
metrics_data = {
|
metrics_collector = create_metrics_collector(service_name)
|
||||||
"requests_total": 0,
|
|
||||||
"requests_by_method": {},
|
|
||||||
"requests_by_status": {},
|
|
||||||
"service_name": service_name,
|
|
||||||
"start_time": time.time()
|
|
||||||
}
|
|
||||||
|
|
||||||
@app.middleware("http")
|
# Add middleware (must be before app starts)
|
||||||
async def simple_metrics_middleware(request: Request, call_next):
|
add_metrics_middleware(app, metrics_collector)
|
||||||
# Increment total requests
|
|
||||||
metrics_data["requests_total"] += 1
|
|
||||||
|
|
||||||
# Track by method
|
|
||||||
method = request.method
|
|
||||||
metrics_data["requests_by_method"][method] = metrics_data["requests_by_method"].get(method, 0) + 1
|
|
||||||
|
|
||||||
# Process request
|
|
||||||
response = await call_next(request)
|
|
||||||
|
|
||||||
# Track by status code
|
|
||||||
status = str(response.status_code)
|
|
||||||
metrics_data["requests_by_status"][status] = metrics_data["requests_by_status"].get(status, 0) + 1
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
@app.get("/metrics")
|
# Add metrics endpoint
|
||||||
async def simple_metrics():
|
add_metrics_endpoint(app, metrics_collector)
|
||||||
"""Simple metrics endpoint"""
|
|
||||||
uptime = time.time() - metrics_data["start_time"]
|
|
||||||
return {
|
|
||||||
**metrics_data,
|
|
||||||
"uptime_seconds": round(uptime, 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
app.state.simple_metrics = metrics_data
|
# Store in app state for access from routes
|
||||||
|
app.state.metrics_collector = metrics_collector
|
||||||
|
|
||||||
logger.info(f"Basic metrics setup completed for service: {service_name}")
|
logger.info(f"Metrics setup completed for service: {service_name}")
|
||||||
|
return metrics_collector
|
||||||
return metrics_data
|
|
||||||
|
|||||||
Reference in New Issue
Block a user