Imporve monitoring
This commit is contained in:
@@ -360,18 +360,6 @@ class DemoCleanupService:
|
||||
|
||||
logger.info("Demo session cleanup completed", stats=stats)
|
||||
|
||||
# Update Prometheus metrics
|
||||
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
|
||||
demo_session_cleanup_duration_seconds.labels(tier="all").observe(duration_ms / 1000)
|
||||
|
||||
# Update deleted sessions metrics by tier (we need to determine tiers from sessions)
|
||||
for session in all_sessions_to_cleanup:
|
||||
demo_sessions_deleted_total.labels(
|
||||
tier=session.demo_account_type,
|
||||
status="success"
|
||||
).inc()
|
||||
demo_sessions_active.labels(tier=session.demo_account_type).dec()
|
||||
|
||||
return stats
|
||||
|
||||
async def cleanup_old_destroyed_sessions(self, days: int = 7) -> int:
|
||||
|
||||
@@ -284,9 +284,7 @@ class CloneOrchestrator:
|
||||
)
|
||||
|
||||
start_time = datetime.now(timezone.utc)
|
||||
|
||||
# Update active sessions metric
|
||||
demo_sessions_active.labels(tier=demo_account_type).inc()
|
||||
|
||||
|
||||
# Filter services if specified
|
||||
services_to_clone = self.services
|
||||
@@ -383,29 +381,6 @@ class CloneOrchestrator:
|
||||
services_status=all_services,
|
||||
demo_account_type=demo_account_type
|
||||
)
|
||||
|
||||
# Update Prometheus metrics
|
||||
demo_session_creation_duration_seconds.labels(tier=demo_account_type).observe(duration_ms / 1000)
|
||||
demo_sessions_created_total.labels(tier=demo_account_type, status=overall_status).inc()
|
||||
|
||||
# Update alert and insight metrics if available
|
||||
if result.get("alert_generation"):
|
||||
alert_gen = result["alert_generation"]
|
||||
for alert_type, alerts in alert_gen.items():
|
||||
if isinstance(alerts, dict) and alerts.get("alerts_generated"):
|
||||
demo_alerts_generated_total.labels(
|
||||
tier=demo_account_type,
|
||||
alert_type=alert_type
|
||||
).inc(alerts["alerts_generated"])
|
||||
|
||||
if result.get("ai_insights_generation"):
|
||||
insights_gen = result["ai_insights_generation"]
|
||||
for insight_type, insights in insights_gen.items():
|
||||
if isinstance(insights, dict) and insights.get("insights_posted"):
|
||||
demo_ai_insights_generated_total.labels(
|
||||
tier=demo_account_type,
|
||||
insight_type=insight_type
|
||||
).inc(insights["insights_posted"])
|
||||
|
||||
return result
|
||||
|
||||
@@ -549,20 +524,6 @@ class CloneOrchestrator:
|
||||
duration_ms=duration_ms
|
||||
)
|
||||
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="success"
|
||||
).inc()
|
||||
demo_cross_service_call_duration_seconds.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name
|
||||
).observe(duration_seconds)
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
logger.info(
|
||||
@@ -582,17 +543,6 @@ class CloneOrchestrator:
|
||||
response_text=response.text
|
||||
)
|
||||
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="http_error"
|
||||
).inc()
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
@@ -614,22 +564,6 @@ class CloneOrchestrator:
|
||||
url=service.url
|
||||
)
|
||||
|
||||
# Update error metrics
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="timeout"
|
||||
).inc()
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
@@ -650,22 +584,6 @@ class CloneOrchestrator:
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
# Update error metrics
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="network_error"
|
||||
).inc()
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
@@ -686,22 +604,6 @@ class CloneOrchestrator:
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
# Update error metrics
|
||||
demo_cross_service_calls_total.labels(
|
||||
source_service="demo-session",
|
||||
target_service=service.name,
|
||||
status="failed"
|
||||
).inc()
|
||||
demo_cloning_errors_total.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name,
|
||||
error_type="exception"
|
||||
).inc()
|
||||
demo_service_clone_duration_seconds.labels(
|
||||
tier=demo_account_type,
|
||||
service=service.name
|
||||
).observe(duration_seconds)
|
||||
|
||||
return {
|
||||
"service": service.name,
|
||||
"status": "failed",
|
||||
|
||||
Reference in New Issue
Block a user