Files
bakery-ia/gateway/app/routes/telemetry.py

303 lines
9.6 KiB
Python
Raw Normal View History

"""
Telemetry routes for API Gateway - Handles frontend telemetry data
This module provides endpoints for:
- Receiving OpenTelemetry traces from frontend
- Proxying traces to Signoz OTel collector
- Providing a secure, authenticated endpoint for frontend telemetry
"""
from fastapi import APIRouter, Request, HTTPException, status
from fastapi.responses import JSONResponse, Response
import httpx
import logging
import os
from typing import Optional
from app.core.config import settings
from app.core.header_manager import header_manager
from shared.monitoring.metrics import MetricsCollector, create_metrics_collector
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/telemetry", tags=["telemetry"])
# Get Signoz OTel collector endpoint from environment or use default
SIGNOZ_OTEL_COLLECTOR = os.getenv(
"SIGNOZ_OTEL_COLLECTOR_URL",
"http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
)
@router.post("/v1/traces")
async def receive_frontend_traces(request: Request):
"""
Receive OpenTelemetry traces from frontend and proxy to Signoz
This endpoint:
- Accepts OTLP trace data from frontend
- Validates the request
- Proxies to Signoz OTel collector
- Handles errors gracefully
"""
# Handle OPTIONS requests for CORS
if request.method == "OPTIONS":
return Response(
status_code=200,
headers={
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
"Access-Control-Allow-Credentials": "true",
"Access-Control-Max-Age": "86400"
}
)
try:
# Get the trace data from the request
body = await request.body()
if not body:
logger.warning("Received empty trace data from frontend")
return JSONResponse(
status_code=400,
content={"error": "Empty trace data"}
)
# Log the trace reception (without sensitive data)
logger.info(
"Received frontend traces, content_length=%s, content_type=%s, user_agent=%s",
len(body),
request.headers.get("content-type"),
request.headers.get("user-agent")
)
# Forward to Signoz OTel collector
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/traces"
# Set up headers for the Signoz collector
forward_headers = {
"Content-Type": request.headers.get("content-type", "application/json"),
"User-Agent": "bakery-gateway/1.0",
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
}
# Add authentication if configured
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
if signoz_auth_token:
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
# Send to Signoz collector
timeout_config = httpx.Timeout(
connect=5.0,
read=10.0,
write=5.0,
pool=5.0
)
async with httpx.AsyncClient(timeout=timeout_config) as client:
response = await client.post(
url=target_url,
content=body,
headers=forward_headers
)
# Log the response from Signoz
logger.info(
"Forwarded traces to Signoz, signoz_status=%s, signoz_response_time=%s",
response.status_code,
response.elapsed.total_seconds()
)
# Return success response to frontend
return JSONResponse(
status_code=200,
content={
"message": "Traces received and forwarded to Signoz",
"signoz_status": response.status_code,
"trace_count": 1 # We don't know exact count without parsing
}
)
except httpx.HTTPStatusError as e:
logger.error(
"Signoz collector returned error, status_code=%s, error_message=%s",
e.response.status_code,
str(e)
)
return JSONResponse(
status_code=502,
content={
"error": "Signoz collector error",
"details": str(e),
"signoz_status": e.response.status_code
}
)
except httpx.RequestError as e:
logger.error(
"Failed to connect to Signoz collector, error=%s, collector_url=%s",
str(e),
SIGNOZ_OTEL_COLLECTOR
)
return JSONResponse(
status_code=503,
content={
"error": "Signoz collector unavailable",
"details": str(e)
}
)
except Exception as e:
logger.error(
"Unexpected error processing traces, error=%s, error_type=%s",
str(e),
type(e).__name__
)
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"details": str(e)
}
)
@router.post("/v1/metrics")
async def receive_frontend_metrics(request: Request):
"""
Receive OpenTelemetry metrics from frontend and proxy to Signoz
"""
# Handle OPTIONS requests for CORS
if request.method == "OPTIONS":
return Response(
status_code=200,
headers={
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
"Access-Control-Allow-Credentials": "true",
"Access-Control-Max-Age": "86400"
}
)
try:
body = await request.body()
if not body:
return JSONResponse(
status_code=400,
content={"error": "Empty metrics data"}
)
logger.info(
"Received frontend metrics, content_length=%s, content_type=%s",
len(body),
request.headers.get("content-type")
)
# Forward to Signoz OTel collector
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/metrics"
forward_headers = {
"Content-Type": request.headers.get("content-type", "application/json"),
"User-Agent": "bakery-gateway/1.0",
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
}
# Add authentication if configured
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
if signoz_auth_token:
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
timeout_config = httpx.Timeout(
connect=5.0,
read=10.0,
write=5.0,
pool=5.0
)
async with httpx.AsyncClient(timeout=timeout_config) as client:
response = await client.post(
url=target_url,
content=body,
headers=forward_headers
)
logger.info(
"Forwarded metrics to Signoz, signoz_status=%s",
response.status_code
)
return JSONResponse(
status_code=200,
content={
"message": "Metrics received and forwarded to Signoz",
"signoz_status": response.status_code
}
)
except Exception as e:
logger.error(
"Error processing metrics, error=%s",
str(e)
)
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"details": str(e)
}
)
@router.get("/health")
async def telemetry_health():
"""
Health check endpoint for telemetry service
"""
return JSONResponse(
status_code=200,
content={
"status": "healthy",
"service": "telemetry-gateway",
"signoz_collector": SIGNOZ_OTEL_COLLECTOR
}
)
# Initialize metrics for this module
try:
metrics_collector = create_metrics_collector("gateway-telemetry")
except Exception as e:
logger.error("Failed to create metrics collector, error=%s", str(e))
metrics_collector = None
@router.on_event("startup")
async def startup_event():
"""Initialize telemetry metrics on startup"""
try:
if metrics_collector:
# Register telemetry-specific metrics
metrics_collector.register_counter(
"gateway_telemetry_traces_received",
"Number of trace batches received from frontend"
)
metrics_collector.register_counter(
"gateway_telemetry_metrics_received",
"Number of metric batches received from frontend"
)
metrics_collector.register_counter(
"gateway_telemetry_errors",
"Number of telemetry processing errors"
)
logger.info(
"Telemetry gateway initialized, signoz_collector=%s",
SIGNOZ_OTEL_COLLECTOR
)
except Exception as e:
logger.error(
"Failed to initialize telemetry metrics, error=%s",
str(e)
)