Fix shared issues

This commit is contained in:
Urtzi Alfaro
2025-07-18 12:34:28 +02:00
parent 592a810762
commit e989e3b362
9 changed files with 913 additions and 386 deletions

View File

@@ -0,0 +1,15 @@
"""
Shared monitoring package for microservices
"""
from .logging import setup_logging
from .metrics import setup_metrics_early, get_metrics_collector, MetricsCollector
from .health import HealthChecker
__all__ = [
'setup_logging',
'setup_metrics_early',
'get_metrics_collector',
'MetricsCollector',
'HealthChecker'
]

View File

@@ -0,0 +1,89 @@
# ================================================================
# shared/monitoring/decorators.py
# ================================================================
"""
Decorators for monitoring and metrics
"""
import time
import logging
import functools
from typing import Callable, Any, Optional
from .metrics import get_metrics_collector
logger = logging.getLogger(__name__)
def track_execution_time(metric_name: str, service_name: str,
labels: Optional[dict] = None):
"""Decorator to track function execution time"""
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
async def async_wrapper(*args, **kwargs) -> Any:
start_time = time.time()
try:
result = await func(*args, **kwargs)
duration = time.time() - start_time
metrics_collector = get_metrics_collector(service_name)
if metrics_collector:
metrics_collector.observe_histogram(metric_name, duration, labels)
return result
except Exception as e:
duration = time.time() - start_time
logger.error(f"Function {func.__name__} failed after {duration:.2f}s: {e}")
raise
@functools.wraps(func)
def sync_wrapper(*args, **kwargs) -> Any:
start_time = time.time()
try:
result = func(*args, **kwargs)
duration = time.time() - start_time
metrics_collector = get_metrics_collector(service_name)
if metrics_collector:
metrics_collector.observe_histogram(metric_name, duration, labels)
return result
except Exception as e:
duration = time.time() - start_time
logger.error(f"Function {func.__name__} failed after {duration:.2f}s: {e}")
raise
# Return appropriate wrapper based on function type
import asyncio
if asyncio.iscoroutinefunction(func):
return async_wrapper
else:
return sync_wrapper
return decorator
def count_calls(metric_name: str, service_name: str,
labels: Optional[dict] = None):
"""Decorator to count function calls"""
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
async def async_wrapper(*args, **kwargs) -> Any:
metrics_collector = get_metrics_collector(service_name)
if metrics_collector:
metrics_collector.increment_counter(metric_name, labels=labels)
return await func(*args, **kwargs)
@functools.wraps(func)
def sync_wrapper(*args, **kwargs) -> Any:
metrics_collector = get_metrics_collector(service_name)
if metrics_collector:
metrics_collector.increment_counter(metric_name, labels=labels)
return func(*args, **kwargs)
# Return appropriate wrapper based on function type
import asyncio
if asyncio.iscoroutinefunction(func):
return async_wrapper
else:
return sync_wrapper
return decorator

162
shared/monitoring/health.py Normal file
View File

@@ -0,0 +1,162 @@
# ================================================================
# shared/monitoring/health.py
# ================================================================
"""
Health check utilities for microservices
"""
import asyncio
import logging
import time
from typing import Dict, List, Callable, Any, Optional
from dataclasses import dataclass
from enum import Enum
logger = logging.getLogger(__name__)
class HealthStatus(Enum):
HEALTHY = "healthy"
DEGRADED = "degraded"
UNHEALTHY = "unhealthy"
@dataclass
class HealthCheck:
name: str
check_function: Callable[[], Any]
timeout: float = 5.0
critical: bool = True
@dataclass
class HealthResult:
name: str
status: HealthStatus
message: str
duration: float
timestamp: float
class HealthChecker:
"""Health checker for microservices"""
def __init__(self, service_name: str):
self.service_name = service_name
self.checks: List[HealthCheck] = []
self.start_time = time.time()
def add_check(self, name: str, check_function: Callable, timeout: float = 5.0,
critical: bool = True) -> None:
"""Add a health check"""
self.checks.append(HealthCheck(name, check_function, timeout, critical))
async def run_check(self, check: HealthCheck) -> HealthResult:
"""Run a single health check"""
start_time = time.time()
try:
# Run the check with timeout
result = await asyncio.wait_for(
asyncio.create_task(self._execute_check(check.check_function)),
timeout=check.timeout
)
duration = time.time() - start_time
if result is True or (isinstance(result, dict) and result.get('healthy', False)):
return HealthResult(
name=check.name,
status=HealthStatus.HEALTHY,
message="OK",
duration=duration,
timestamp=time.time()
)
else:
message = str(result) if result else "Check failed"
return HealthResult(
name=check.name,
status=HealthStatus.UNHEALTHY,
message=message,
duration=duration,
timestamp=time.time()
)
except asyncio.TimeoutError:
duration = time.time() - start_time
return HealthResult(
name=check.name,
status=HealthStatus.UNHEALTHY,
message=f"Timeout after {check.timeout}s",
duration=duration,
timestamp=time.time()
)
except Exception as e:
duration = time.time() - start_time
return HealthResult(
name=check.name,
status=HealthStatus.UNHEALTHY,
message=f"Error: {str(e)}",
duration=duration,
timestamp=time.time()
)
async def _execute_check(self, check_function: Callable) -> Any:
"""Execute a check function (handles both sync and async)"""
if asyncio.iscoroutinefunction(check_function):
return await check_function()
else:
return check_function()
async def check_health(self) -> Dict[str, Any]:
"""Run all health checks and return status"""
if not self.checks:
return {
"service": self.service_name,
"status": HealthStatus.HEALTHY.value,
"uptime": time.time() - self.start_time,
"timestamp": time.time(),
"checks": {}
}
# Run all checks concurrently
results = await asyncio.gather(
*[self.run_check(check) for check in self.checks],
return_exceptions=True
)
# Process results
check_results = {}
overall_status = HealthStatus.HEALTHY
for i, result in enumerate(results):
check = self.checks[i]
if isinstance(result, Exception):
check_result = HealthResult(
name=check.name,
status=HealthStatus.UNHEALTHY,
message=f"Exception: {str(result)}",
duration=0.0,
timestamp=time.time()
)
else:
check_result = result
check_results[check.name] = {
"status": check_result.status.value,
"message": check_result.message,
"duration": check_result.duration,
"timestamp": check_result.timestamp
}
# Determine overall status
if check.critical and check_result.status == HealthStatus.UNHEALTHY:
overall_status = HealthStatus.UNHEALTHY
elif check_result.status == HealthStatus.DEGRADED and overall_status == HealthStatus.HEALTHY:
overall_status = HealthStatus.DEGRADED
return {
"service": self.service_name,
"status": overall_status.value,
"uptime": time.time() - self.start_time,
"timestamp": time.time(),
"checks": check_results
}

View File

@@ -1,3 +1,6 @@
# ================================================================
# shared/monitoring/logging.py
# ================================================================
"""
Centralized logging configuration for microservices
"""
@@ -5,53 +8,109 @@ Centralized logging configuration for microservices
import logging
import logging.config
import os
import sys
from typing import Dict, Any
def setup_logging(service_name: str, log_level: str = "INFO") -> None:
"""Set up logging configuration for a microservice"""
def setup_logging(service_name: str, log_level: str = "INFO",
enable_json: bool = False, enable_file: bool = True) -> None:
"""
Set up logging configuration for a microservice with improved error handling.
config: Dict[str, Any] = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {
"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
},
"detailed": {
"format": "%(asctime)s [%(levelname)s] %(name)s [%(filename)s:%(lineno)d] %(message)s"
},
"json": {
"()": "pythonjsonlogger.jsonlogger.JsonFormatter",
"format": "%(asctime)s %(name)s %(levelname)s %(message)s"
}
Args:
service_name: Name of the service for log identification
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
enable_json: Whether to use JSON formatting
enable_file: Whether to enable file logging
"""
# Create logs directory if it doesn't exist and file logging is enabled
log_dir = "/var/log"
if enable_file:
try:
os.makedirs(log_dir, exist_ok=True)
except PermissionError:
# Fallback to local directory if can't write to /var/log
log_dir = "./logs"
os.makedirs(log_dir, exist_ok=True)
print(f"Warning: Could not write to /var/log, using {log_dir}")
# Define formatters
formatters = {
"standard": {
"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S"
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": log_level,
"formatter": "standard",
"stream": "ext://sys.stdout"
},
"file": {
"class": "logging.FileHandler",
"level": log_level,
"formatter": "detailed",
"filename": f"/var/log/{service_name}.log",
"mode": "a"
},
"logstash": {
"detailed": {
"format": "%(asctime)s [%(levelname)s] %(name)s [%(filename)s:%(lineno)d] %(funcName)s(): %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S"
}
}
# Add JSON formatter if requested and available
if enable_json:
try:
import pythonjsonlogger.jsonlogger
formatters["json"] = {
"()": "pythonjsonlogger.jsonlogger.JsonFormatter",
"format": "%(asctime)s %(name)s %(levelname)s %(message)s %(filename)s %(lineno)d"
}
except ImportError:
print("Warning: pythonjsonlogger not available, falling back to standard formatting")
enable_json = False
# Define handlers
handlers = {
"console": {
"class": "logging.StreamHandler",
"level": log_level,
"formatter": "json" if enable_json else "standard",
"stream": "ext://sys.stdout"
}
}
# Add file handler if enabled
if enable_file:
handlers["file"] = {
"class": "logging.FileHandler",
"level": log_level,
"formatter": "detailed",
"filename": f"{log_dir}/{service_name}.log",
"mode": "a",
"encoding": "utf-8"
}
# Add logstash handler if in production
logstash_host = os.getenv("LOGSTASH_HOST")
if logstash_host and os.getenv("ENVIRONMENT") == "production":
try:
handlers["logstash"] = {
"class": "logstash.TCPLogstashHandler",
"host": os.getenv("LOGSTASH_HOST", "localhost"),
"host": logstash_host,
"port": int(os.getenv("LOGSTASH_PORT", "5000")),
"version": 1,
"message_type": "logstash",
"fqdn": False,
"tags": [service_name]
}
},
except Exception as e:
print(f"Warning: Could not setup logstash handler: {e}")
# Define root logger configuration
root_handlers = ["console"]
if enable_file:
root_handlers.append("file")
if "logstash" in handlers:
root_handlers.append("logstash")
# Complete logging configuration
config: Dict[str, Any] = {
"version": 1,
"disable_existing_loggers": False,
"formatters": formatters,
"handlers": handlers,
"loggers": {
"": {
"handlers": ["console", "file"],
"": { # Root logger
"handlers": root_handlers,
"level": log_level,
"propagate": False
},
@@ -64,14 +123,32 @@ def setup_logging(service_name: str, log_level: str = "INFO") -> None:
"handlers": ["console"],
"level": log_level,
"propagate": False
},
"sqlalchemy": {
"handlers": ["console"],
"level": "WARNING", # Reduce SQL logging noise
"propagate": False
},
"httpx": {
"handlers": ["console"],
"level": "WARNING", # Reduce HTTP client logging
"propagate": False
}
}
}
# Add logstash handler if in production
if os.getenv("ENVIRONMENT") == "production":
config["loggers"][""]["handlers"].append("logstash")
logging.config.dictConfig(config)
logger = logging.getLogger(__name__)
logger.info(f"Logging configured for {service_name}")
try:
logging.config.dictConfig(config)
logger = logging.getLogger(__name__)
logger.info(f"Logging configured for {service_name} at level {log_level}")
except Exception as e:
# Fallback to basic logging if configuration fails
logging.basicConfig(
level=getattr(logging, log_level.upper()),
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger(__name__)
logger.error(f"Failed to configure advanced logging for {service_name}: {e}")
logger.info(f"Using basic logging configuration for {service_name}")

View File

@@ -1,147 +1,150 @@
# shared/monitoring/metrics.py
# ================================================================
# shared/monitoring/metrics.py - FIXED VERSION
# ================================================================
"""
Metrics collection for microservices
Centralized metrics collection for microservices - Fixed middleware issue
"""
import time
import logging
from typing import Dict, Any, List # Added List import
from prometheus_client import Counter, Histogram, Gauge, start_http_server
from functools import wraps
from prometheus_client import generate_latest # Moved this import here for consistency
from fastapi import Request
from typing import Dict, Any, List, Optional
from prometheus_client import Counter, Histogram, Gauge, start_http_server, generate_latest
from fastapi import Request, Response
from threading import Lock
logger = logging.getLogger(__name__)
# Prometheus metrics
REQUEST_COUNT = Counter(
# Global registry for metrics collectors
_metrics_registry: Dict[str, 'MetricsCollector'] = {}
_registry_lock = Lock()
# Default Prometheus metrics
DEFAULT_REQUEST_COUNT = Counter(
'http_requests_total',
'Total HTTP requests',
['method', 'endpoint', 'status_code', 'service']
)
REQUEST_DURATION = Histogram(
DEFAULT_REQUEST_DURATION = Histogram(
'http_request_duration_seconds',
'HTTP request duration in seconds',
['method', 'endpoint', 'service']
)
ACTIVE_CONNECTIONS = Gauge(
DEFAULT_ACTIVE_CONNECTIONS = Gauge(
'active_connections',
'Active database connections',
['service']
)
TRAINING_JOBS = Counter(
'training_jobs_total',
'Total training jobs',
['status', 'service']
)
FORECASTS_GENERATED = Counter(
'forecasts_generated_total',
'Total forecasts generated',
['service']
)
class MetricsCollector:
"""Metrics collector for microservices"""
"""Thread-safe metrics collector for microservices"""
def __init__(self, service_name: str):
self.service_name = service_name
self.start_time = time.time()
# Initialize dictionaries to hold custom counters and histograms
self._counters: Dict[str, Counter] = {}
self._histograms: Dict[str, Histogram] = {}
self._gauges: Dict[str, Gauge] = {}
self._lock = Lock()
# Register in global registry
with _registry_lock:
_metrics_registry[service_name] = self
def start_metrics_server(self, port: int = 8080):
"""Start Prometheus metrics server"""
try:
start_http_server(port)
logger.info(f"Metrics server started on port {port}")
logger.info(f"Metrics server started on port {port} for {self.service_name}")
except Exception as e:
logger.error(f"Failed to start metrics server: {e}")
logger.error(f"Failed to start metrics server for {self.service_name}: {e}")
def record_request(self, method: str, endpoint: str, status_code: int, duration: float):
"""Record HTTP request metrics"""
REQUEST_COUNT.labels(
method=method,
endpoint=endpoint,
status_code=status_code,
service=self.service_name
).inc()
REQUEST_DURATION.labels(
method=method,
endpoint=endpoint,
service=self.service_name
).observe(duration)
def record_training_job(self, status: str):
"""Record training job metrics"""
TRAINING_JOBS.labels(
status=status,
service=self.service_name
).inc()
def record_forecast_generated(self):
"""Record forecast generation metrics"""
FORECASTS_GENERATED.labels(
service=self.service_name
).inc()
def set_active_connections(self, count: int):
"""Set active database connections"""
ACTIVE_CONNECTIONS.labels(
service=self.service_name
).set(count)
def register_counter(self, name: str, documentation: str, labels: List[str] = None):
def register_counter(self, name: str, documentation: str, labels: List[str] = None) -> Counter:
"""Register a custom Counter metric."""
if name not in self._counters:
with self._lock:
if name in self._counters:
logger.warning(f"Counter '{name}' already registered for {self.service_name}")
return self._counters[name]
if labels is None:
labels = ['service']
elif 'service' not in labels:
labels.append('service')
# Pass labelnames as a keyword argument
self._counters[name] = Counter(name, documentation, labelnames=labels)
logger.info(f"Registered counter: {name}")
else:
logger.warning(f"Counter '{name}' already registered.")
return self._counters[name] # Return the counter for direct use if needed
try:
counter = Counter(f"{self.service_name.replace('-', '_')}_{name}", documentation, labelnames=labels)
self._counters[name] = counter
logger.info(f"Registered counter: {name} for {self.service_name}")
return counter
except Exception as e:
logger.error(f"Failed to register counter {name} for {self.service_name}: {e}")
raise
def register_histogram(self, name: str, documentation: str, labels: List[str] = None,
buckets: tuple = Histogram.DEFAULT_BUCKETS) -> Histogram:
"""Register a custom Histogram metric."""
with self._lock:
if name in self._histograms:
logger.warning(f"Histogram '{name}' already registered for {self.service_name}")
return self._histograms[name]
if labels is None:
labels = ['service']
elif 'service' not in labels:
labels.append('service')
try:
histogram = Histogram(f"{self.service_name.replace('-', '_')}_{name}", documentation,
labelnames=labels, buckets=buckets)
self._histograms[name] = histogram
logger.info(f"Registered histogram: {name} for {self.service_name}")
return histogram
except Exception as e:
logger.error(f"Failed to register histogram {name} for {self.service_name}: {e}")
raise
def register_gauge(self, name: str, documentation: str, labels: List[str] = None) -> Gauge:
"""Register a custom Gauge metric."""
with self._lock:
if name in self._gauges:
logger.warning(f"Gauge '{name}' already registered for {self.service_name}")
return self._gauges[name]
if labels is None:
labels = ['service']
elif 'service' not in labels:
labels.append('service')
try:
gauge = Gauge(f"{self.service_name.replace('-', '_')}_{name}", documentation, labelnames=labels)
self._gauges[name] = gauge
logger.info(f"Registered gauge: {name} for {self.service_name}")
return gauge
except Exception as e:
logger.error(f"Failed to register gauge {name} for {self.service_name}: {e}")
raise
def increment_counter(self, name: str, value: int = 1, labels: Dict[str, str] = None):
"""Increment a custom Counter metric."""
"""Increment a counter metric."""
if name not in self._counters:
logger.error(f"Counter '{name}' not registered. Cannot increment.")
logger.error(f"Counter '{name}' not registered for {self.service_name}. Cannot increment.")
return
# Ensure the 'service' label is always present
if labels is None:
labels = {'service': self.service_name}
elif 'service' not in labels:
labels['service'] = self.service_name
self._counters[name].labels(**labels).inc(value)
def register_histogram(self, name: str, documentation: str, labels: List[str] = None, buckets: tuple = Histogram.DEFAULT_BUCKETS):
"""Register a custom Histogram metric."""
if name not in self._histograms:
if labels is None:
labels = ['service']
elif 'service' not in labels:
labels.append('service')
# Pass labelnames and buckets as keyword arguments
self._histograms[name] = Histogram(name, documentation, labelnames=labels, buckets=buckets)
logger.info(f"Registered histogram: {name}")
else:
logger.warning(f"Histogram '{name}' already registered.")
return self._histograms[name] # Return the histogram for direct use if needed
try:
self._counters[name].labels(**labels).inc(value)
except Exception as e:
logger.error(f"Failed to increment counter {name} for {self.service_name}: {e}")
def observe_histogram(self, name: str, value: float, labels: Dict[str, str] = None):
"""Observe a custom Histogram metric."""
"""Observe a histogram metric."""
if name not in self._histograms:
logger.error(f"Histogram '{name}' not registered. Cannot observe.")
logger.error(f"Histogram '{name}' not registered for {self.service_name}. Cannot observe.")
return
if labels is None:
@@ -149,145 +152,146 @@ class MetricsCollector:
elif 'service' not in labels:
labels['service'] = self.service_name
self._histograms[name].labels(**labels).observe(value)
try:
self._histograms[name].labels(**labels).observe(value)
except Exception as e:
logger.error(f"Failed to observe histogram {name} for {self.service_name}: {e}")
def set_gauge(self, name: str, value: float, labels: Dict[str, str] = None):
"""Set a gauge metric."""
if name not in self._gauges:
logger.error(f"Gauge '{name}' not registered for {self.service_name}. Cannot set.")
return
if labels is None:
labels = {'service': self.service_name}
elif 'service' not in labels:
labels['service'] = self.service_name
try:
self._gauges[name].labels(**labels).set(value)
except Exception as e:
logger.error(f"Failed to set gauge {name} for {self.service_name}: {e}")
def record_request(self, method: str, endpoint: str, status_code: int, duration: float):
"""Record HTTP request metrics using default metrics."""
try:
DEFAULT_REQUEST_COUNT.labels(
method=method,
endpoint=endpoint,
status_code=status_code,
service=self.service_name
).inc()
DEFAULT_REQUEST_DURATION.labels(
method=method,
endpoint=endpoint,
service=self.service_name
).observe(duration)
except Exception as e:
logger.error(f"Failed to record request metrics for {self.service_name}: {e}")
def set_active_connections(self, count: int):
"""Set active database connections using default gauge."""
try:
DEFAULT_ACTIVE_CONNECTIONS.labels(service=self.service_name).set(count)
except Exception as e:
logger.error(f"Failed to set active connections for {self.service_name}: {e}")
def get_metrics(self) -> str:
"""Return Prometheus metrics in exposition format."""
return generate_latest().decode('utf-8')
try:
return generate_latest().decode('utf-8')
except Exception as e:
logger.error(f"Failed to generate metrics for {self.service_name}: {e}")
return ""
def metrics_middleware(metrics_collector: MetricsCollector):
"""Middleware to collect metrics"""
async def middleware(request, call_next):
start_time = time.time()
response = await call_next(request)
duration = time.time() - start_time
# Use the specific record_request for HTTP requests
metrics_collector.record_request(
method=request.method,
endpoint=request.url.path,
status_code=response.status_code,
duration=duration
)
return response
return middleware
def get_metrics_collector(service_name: str) -> Optional[MetricsCollector]:
"""Get metrics collector by service name from global registry."""
with _registry_lock:
return _metrics_registry.get(service_name)
def setup_metrics(app):
def create_metrics_collector(service_name: str) -> MetricsCollector:
"""
Setup metrics collection for FastAPI app
Args:
app: FastAPI application instance
Returns:
MetricsCollector: Configured metrics collector
Create metrics collector without adding middleware.
This should be called BEFORE app startup, not during lifespan.
"""
# Get existing or create new
existing = get_metrics_collector(service_name)
if existing:
return existing
# Get service name from app title or default
service_name = getattr(app, 'title', 'unknown-service').lower().replace(' ', '-')
# Create metrics collector for this service
metrics_collector = MetricsCollector(service_name)
# Add metrics middleware to collect HTTP request metrics
return MetricsCollector(service_name)
def add_metrics_middleware(app, metrics_collector: MetricsCollector):
"""
Add metrics middleware to app. Must be called BEFORE app startup.
"""
@app.middleware("http")
async def collect_metrics_middleware(request: Request, call_next):
async def metrics_middleware(request: Request, call_next):
start_time = time.time()
# Process the request
response = await call_next(request)
# Calculate duration
duration = time.time() - start_time
# Record metrics
metrics_collector.record_request(
method=request.method,
endpoint=request.url.path,
status_code=response.status_code,
duration=duration
)
return response
# Add metrics endpoint if it doesn't exist
@app.get("/metrics")
async def prometheus_metrics():
"""Prometheus metrics endpoint"""
from prometheus_client import generate_latest
return Response(
content=generate_latest(),
media_type="text/plain; version=0.0.4; charset=utf-8"
)
# Store metrics collector in app state for later access
app.state.metrics_collector = metrics_collector
logger.info(f"Metrics collection setup completed for service: {service_name}")
try:
response = await call_next(request)
duration = time.time() - start_time
# Record request metrics
metrics_collector.record_request(
method=request.method,
endpoint=request.url.path,
status_code=response.status_code,
duration=duration
)
return response
except Exception as e:
duration = time.time() - start_time
# Record failed request
metrics_collector.record_request(
method=request.method,
endpoint=request.url.path,
status_code=500,
duration=duration
)
raise
return metrics_collector
# Alternative simplified setup function for services that don't need complex metrics
def setup_basic_metrics(app, service_name: str = None):
def add_metrics_endpoint(app, metrics_collector: MetricsCollector):
"""Add metrics endpoint to app"""
@app.get("/metrics")
async def prometheus_metrics():
"""Prometheus metrics endpoint"""
return Response(
content=metrics_collector.get_metrics(),
media_type="text/plain; version=0.0.4; charset=utf-8"
)
def setup_metrics_early(app, service_name: str = None) -> MetricsCollector:
"""
Setup basic metrics collection without complex dependencies
Args:
app: FastAPI application instance
service_name: Optional service name override
Returns:
Simple metrics dict
Setup metrics collection BEFORE app startup.
This must be called before adding any middleware or starting the app.
"""
if service_name is None:
service_name = getattr(app, 'title', 'unknown-service').lower().replace(' ', '-')
service_name = getattr(app, 'title', 'unknown-service').lower().replace(' ', '-').replace('.', '_')
# Simple in-memory metrics
metrics_data = {
"requests_total": 0,
"requests_by_method": {},
"requests_by_status": {},
"service_name": service_name,
"start_time": time.time()
}
# Create metrics collector
metrics_collector = create_metrics_collector(service_name)
@app.middleware("http")
async def simple_metrics_middleware(request: Request, call_next):
# Increment total requests
metrics_data["requests_total"] += 1
# Track by method
method = request.method
metrics_data["requests_by_method"][method] = metrics_data["requests_by_method"].get(method, 0) + 1
# Process request
response = await call_next(request)
# Track by status code
status = str(response.status_code)
metrics_data["requests_by_status"][status] = metrics_data["requests_by_status"].get(status, 0) + 1
return response
# Add middleware (must be before app starts)
add_metrics_middleware(app, metrics_collector)
@app.get("/metrics")
async def simple_metrics():
"""Simple metrics endpoint"""
uptime = time.time() - metrics_data["start_time"]
return {
**metrics_data,
"uptime_seconds": round(uptime, 2)
}
# Add metrics endpoint
add_metrics_endpoint(app, metrics_collector)
app.state.simple_metrics = metrics_data
# Store in app state for access from routes
app.state.metrics_collector = metrics_collector
logger.info(f"Basic metrics setup completed for service: {service_name}")
return metrics_data
logger.info(f"Metrics setup completed for service: {service_name}")
return metrics_collector