303 lines
9.6 KiB
Python
303 lines
9.6 KiB
Python
"""
|
|
Telemetry routes for API Gateway - Handles frontend telemetry data
|
|
|
|
This module provides endpoints for:
|
|
- Receiving OpenTelemetry traces from frontend
|
|
- Proxying traces to Signoz OTel collector
|
|
- Providing a secure, authenticated endpoint for frontend telemetry
|
|
"""
|
|
|
|
from fastapi import APIRouter, Request, HTTPException, status
|
|
from fastapi.responses import JSONResponse, Response
|
|
import httpx
|
|
import logging
|
|
import os
|
|
from typing import Optional
|
|
|
|
from app.core.config import settings
|
|
from app.core.header_manager import header_manager
|
|
from shared.monitoring.metrics import MetricsCollector, create_metrics_collector
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix="/telemetry", tags=["telemetry"])
|
|
|
|
# Get Signoz OTel collector endpoint from environment or use default
|
|
SIGNOZ_OTEL_COLLECTOR = os.getenv(
|
|
"SIGNOZ_OTEL_COLLECTOR_URL",
|
|
"http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318"
|
|
)
|
|
|
|
@router.post("/v1/traces")
|
|
async def receive_frontend_traces(request: Request):
|
|
"""
|
|
Receive OpenTelemetry traces from frontend and proxy to Signoz
|
|
|
|
This endpoint:
|
|
- Accepts OTLP trace data from frontend
|
|
- Validates the request
|
|
- Proxies to Signoz OTel collector
|
|
- Handles errors gracefully
|
|
"""
|
|
|
|
# Handle OPTIONS requests for CORS
|
|
if request.method == "OPTIONS":
|
|
return Response(
|
|
status_code=200,
|
|
headers={
|
|
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
|
|
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
|
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
|
|
"Access-Control-Allow-Credentials": "true",
|
|
"Access-Control-Max-Age": "86400"
|
|
}
|
|
)
|
|
|
|
try:
|
|
# Get the trace data from the request
|
|
body = await request.body()
|
|
|
|
if not body:
|
|
logger.warning("Received empty trace data from frontend")
|
|
return JSONResponse(
|
|
status_code=400,
|
|
content={"error": "Empty trace data"}
|
|
)
|
|
|
|
# Log the trace reception (without sensitive data)
|
|
logger.info(
|
|
"Received frontend traces, content_length=%s, content_type=%s, user_agent=%s",
|
|
len(body),
|
|
request.headers.get("content-type"),
|
|
request.headers.get("user-agent")
|
|
)
|
|
|
|
# Forward to Signoz OTel collector
|
|
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/traces"
|
|
|
|
# Set up headers for the Signoz collector
|
|
forward_headers = {
|
|
"Content-Type": request.headers.get("content-type", "application/json"),
|
|
"User-Agent": "bakery-gateway/1.0",
|
|
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
|
|
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
|
|
}
|
|
|
|
# Add authentication if configured
|
|
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
|
|
if signoz_auth_token:
|
|
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
|
|
|
|
# Send to Signoz collector
|
|
timeout_config = httpx.Timeout(
|
|
connect=5.0,
|
|
read=10.0,
|
|
write=5.0,
|
|
pool=5.0
|
|
)
|
|
|
|
async with httpx.AsyncClient(timeout=timeout_config) as client:
|
|
response = await client.post(
|
|
url=target_url,
|
|
content=body,
|
|
headers=forward_headers
|
|
)
|
|
|
|
# Log the response from Signoz
|
|
logger.info(
|
|
"Forwarded traces to Signoz, signoz_status=%s, signoz_response_time=%s",
|
|
response.status_code,
|
|
response.elapsed.total_seconds()
|
|
)
|
|
|
|
# Return success response to frontend
|
|
return JSONResponse(
|
|
status_code=200,
|
|
content={
|
|
"message": "Traces received and forwarded to Signoz",
|
|
"signoz_status": response.status_code,
|
|
"trace_count": 1 # We don't know exact count without parsing
|
|
}
|
|
)
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
logger.error(
|
|
"Signoz collector returned error, status_code=%s, error_message=%s",
|
|
e.response.status_code,
|
|
str(e)
|
|
)
|
|
return JSONResponse(
|
|
status_code=502,
|
|
content={
|
|
"error": "Signoz collector error",
|
|
"details": str(e),
|
|
"signoz_status": e.response.status_code
|
|
}
|
|
)
|
|
|
|
except httpx.RequestError as e:
|
|
logger.error(
|
|
"Failed to connect to Signoz collector, error=%s, collector_url=%s",
|
|
str(e),
|
|
SIGNOZ_OTEL_COLLECTOR
|
|
)
|
|
return JSONResponse(
|
|
status_code=503,
|
|
content={
|
|
"error": "Signoz collector unavailable",
|
|
"details": str(e)
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Unexpected error processing traces, error=%s, error_type=%s",
|
|
str(e),
|
|
type(e).__name__
|
|
)
|
|
return JSONResponse(
|
|
status_code=500,
|
|
content={
|
|
"error": "Internal server error",
|
|
"details": str(e)
|
|
}
|
|
)
|
|
|
|
@router.post("/v1/metrics")
|
|
async def receive_frontend_metrics(request: Request):
|
|
"""
|
|
Receive OpenTelemetry metrics from frontend and proxy to Signoz
|
|
"""
|
|
|
|
# Handle OPTIONS requests for CORS
|
|
if request.method == "OPTIONS":
|
|
return Response(
|
|
status_code=200,
|
|
headers={
|
|
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
|
|
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
|
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
|
|
"Access-Control-Allow-Credentials": "true",
|
|
"Access-Control-Max-Age": "86400"
|
|
}
|
|
)
|
|
|
|
try:
|
|
body = await request.body()
|
|
|
|
if not body:
|
|
return JSONResponse(
|
|
status_code=400,
|
|
content={"error": "Empty metrics data"}
|
|
)
|
|
|
|
logger.info(
|
|
"Received frontend metrics, content_length=%s, content_type=%s",
|
|
len(body),
|
|
request.headers.get("content-type")
|
|
)
|
|
|
|
# Forward to Signoz OTel collector
|
|
target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/metrics"
|
|
|
|
forward_headers = {
|
|
"Content-Type": request.headers.get("content-type", "application/json"),
|
|
"User-Agent": "bakery-gateway/1.0",
|
|
"X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"),
|
|
"X-Tenant-ID": request.headers.get("x-tenant-id", "unknown")
|
|
}
|
|
|
|
# Add authentication if configured
|
|
signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN")
|
|
if signoz_auth_token:
|
|
forward_headers["Authorization"] = f"Bearer {signoz_auth_token}"
|
|
|
|
timeout_config = httpx.Timeout(
|
|
connect=5.0,
|
|
read=10.0,
|
|
write=5.0,
|
|
pool=5.0
|
|
)
|
|
|
|
async with httpx.AsyncClient(timeout=timeout_config) as client:
|
|
response = await client.post(
|
|
url=target_url,
|
|
content=body,
|
|
headers=forward_headers
|
|
)
|
|
|
|
logger.info(
|
|
"Forwarded metrics to Signoz, signoz_status=%s",
|
|
response.status_code
|
|
)
|
|
|
|
return JSONResponse(
|
|
status_code=200,
|
|
content={
|
|
"message": "Metrics received and forwarded to Signoz",
|
|
"signoz_status": response.status_code
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error processing metrics, error=%s",
|
|
str(e)
|
|
)
|
|
return JSONResponse(
|
|
status_code=500,
|
|
content={
|
|
"error": "Internal server error",
|
|
"details": str(e)
|
|
}
|
|
)
|
|
|
|
@router.get("/health")
|
|
async def telemetry_health():
|
|
"""
|
|
Health check endpoint for telemetry service
|
|
"""
|
|
return JSONResponse(
|
|
status_code=200,
|
|
content={
|
|
"status": "healthy",
|
|
"service": "telemetry-gateway",
|
|
"signoz_collector": SIGNOZ_OTEL_COLLECTOR
|
|
}
|
|
)
|
|
|
|
# Initialize metrics for this module
|
|
try:
|
|
metrics_collector = create_metrics_collector("gateway-telemetry")
|
|
except Exception as e:
|
|
logger.error("Failed to create metrics collector, error=%s", str(e))
|
|
metrics_collector = None
|
|
|
|
@router.on_event("startup")
|
|
async def startup_event():
|
|
"""Initialize telemetry metrics on startup"""
|
|
try:
|
|
if metrics_collector:
|
|
# Register telemetry-specific metrics
|
|
metrics_collector.register_counter(
|
|
"gateway_telemetry_traces_received",
|
|
"Number of trace batches received from frontend"
|
|
)
|
|
metrics_collector.register_counter(
|
|
"gateway_telemetry_metrics_received",
|
|
"Number of metric batches received from frontend"
|
|
)
|
|
metrics_collector.register_counter(
|
|
"gateway_telemetry_errors",
|
|
"Number of telemetry processing errors"
|
|
)
|
|
|
|
logger.info(
|
|
"Telemetry gateway initialized, signoz_collector=%s",
|
|
SIGNOZ_OTEL_COLLECTOR
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to initialize telemetry metrics, error=%s",
|
|
str(e)
|
|
) |