Files
bakery-ia/services/demo_session/app/monitoring/metrics.py

85 lines
2.3 KiB
Python
Raw Normal View History

2025-12-13 23:57:54 +01:00
"""
Prometheus metrics for demo session service
"""
from prometheus_client import Counter, Histogram, Gauge
# Counters
demo_sessions_created_total = Counter(
'demo_sessions_created_total',
'Total number of demo sessions created',
['tier', 'status']
)
demo_sessions_deleted_total = Counter(
'demo_sessions_deleted_total',
'Total number of demo sessions deleted',
['tier', 'status']
)
demo_cloning_errors_total = Counter(
'demo_cloning_errors_total',
'Total number of cloning errors',
['tier', 'service', 'error_type']
)
# Histograms (for latency percentiles)
demo_session_creation_duration_seconds = Histogram(
'demo_session_creation_duration_seconds',
'Duration of demo session creation',
['tier'],
buckets=[1, 2, 5, 7, 10, 12, 15, 18, 20, 25, 30, 40, 50, 60]
)
demo_service_clone_duration_seconds = Histogram(
'demo_service_clone_duration_seconds',
'Duration of individual service cloning',
['tier', 'service'],
buckets=[0.5, 1, 2, 3, 5, 10, 15, 20, 30, 40, 50]
)
demo_session_cleanup_duration_seconds = Histogram(
'demo_session_cleanup_duration_seconds',
'Duration of demo session cleanup',
['tier'],
buckets=[0.5, 1, 2, 5, 10, 15, 20, 30]
)
# Gauges
demo_sessions_active = Gauge(
'demo_sessions_active',
'Number of currently active demo sessions',
['tier']
)
demo_sessions_pending_cleanup = Gauge(
'demo_sessions_pending_cleanup',
'Number of demo sessions pending cleanup'
)
# Alert generation metrics
demo_alerts_generated_total = Counter(
'demo_alerts_generated_total',
'Total number of alerts generated post-clone',
['tier', 'alert_type']
)
demo_ai_insights_generated_total = Counter(
'demo_ai_insights_generated_total',
'Total number of AI insights generated post-clone',
['tier', 'insight_type']
)
# Cross-service metrics
demo_cross_service_calls_total = Counter(
'demo_cross_service_calls_total',
'Total number of cross-service API calls during cloning',
['source_service', 'target_service', 'status']
)
demo_cross_service_call_duration_seconds = Histogram(
'demo_cross_service_call_duration_seconds',
'Duration of cross-service API calls during cloning',
['source_service', 'target_service'],
buckets=[0.1, 0.2, 0.5, 1, 2, 5, 10, 15, 20, 30]
)