Improve metrics
This commit is contained in:
@@ -15,6 +15,7 @@ from app.api import training_jobs, training_operations, models, health, monitori
|
||||
from app.services.training_events import setup_messaging, cleanup_messaging
|
||||
from app.websocket.events import setup_websocket_event_consumer, cleanup_websocket_consumers
|
||||
from shared.service_base import StandardFastAPIService
|
||||
from shared.monitoring.system_metrics import SystemMetricsCollector
|
||||
|
||||
|
||||
class TrainingService(StandardFastAPIService):
|
||||
@@ -77,6 +78,11 @@ class TrainingService(StandardFastAPIService):
|
||||
async def on_startup(self, app: FastAPI):
|
||||
"""Custom startup logic including migration verification"""
|
||||
await self.verify_migrations()
|
||||
|
||||
# Initialize system metrics collection
|
||||
system_metrics = SystemMetricsCollector("training")
|
||||
self.logger.info("System metrics collection started")
|
||||
|
||||
self.logger.info("Training service startup completed")
|
||||
|
||||
async def on_shutdown(self, app: FastAPI):
|
||||
@@ -132,12 +138,14 @@ class TrainingService(StandardFastAPIService):
|
||||
|
||||
def setup_custom_endpoints(self):
|
||||
"""Setup custom endpoints for training service"""
|
||||
@self.app.get("/metrics")
|
||||
async def get_metrics():
|
||||
"""Prometheus metrics endpoint"""
|
||||
if self.metrics_collector:
|
||||
return self.metrics_collector.get_metrics()
|
||||
return {"status": "metrics not available"}
|
||||
# Note: Metrics are exported via OpenTelemetry OTLP to SigNoz
|
||||
# The /metrics endpoint is not needed as metrics are pushed automatically
|
||||
# @self.app.get("/metrics")
|
||||
# async def get_metrics():
|
||||
# """Prometheus metrics endpoint"""
|
||||
# if self.metrics_collector:
|
||||
# return self.metrics_collector.get_metrics()
|
||||
# return {"status": "metrics not available"}
|
||||
|
||||
@self.app.get("/")
|
||||
async def root():
|
||||
|
||||
Reference in New Issue
Block a user