""" Telemetry routes for API Gateway - Handles frontend telemetry data This module provides endpoints for: - Receiving OpenTelemetry traces from frontend - Proxying traces to Signoz OTel collector - Providing a secure, authenticated endpoint for frontend telemetry """ from fastapi import APIRouter, Request, HTTPException, status from fastapi.responses import JSONResponse, Response import httpx import logging import os from typing import Optional from app.core.config import settings from app.core.header_manager import header_manager from shared.monitoring.metrics import MetricsCollector, create_metrics_collector logger = logging.getLogger(__name__) router = APIRouter(prefix="/telemetry", tags=["telemetry"]) # Get Signoz OTel collector endpoint from environment or use default SIGNOZ_OTEL_COLLECTOR = os.getenv( "SIGNOZ_OTEL_COLLECTOR_URL", "http://signoz-otel-collector.bakery-ia.svc.cluster.local:4318" ) @router.post("/v1/traces") async def receive_frontend_traces(request: Request): """ Receive OpenTelemetry traces from frontend and proxy to Signoz This endpoint: - Accepts OTLP trace data from frontend - Validates the request - Proxies to Signoz OTel collector - Handles errors gracefully """ # Handle OPTIONS requests for CORS if request.method == "OPTIONS": return Response( status_code=200, headers={ "Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST, "Access-Control-Allow-Methods": "POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID", "Access-Control-Allow-Credentials": "true", "Access-Control-Max-Age": "86400" } ) try: # Get the trace data from the request body = await request.body() if not body: logger.warning("Received empty trace data from frontend") return JSONResponse( status_code=400, content={"error": "Empty trace data"} ) # Log the trace reception (without sensitive data) logger.info( "Received frontend traces, content_length=%s, content_type=%s, user_agent=%s", len(body), request.headers.get("content-type"), request.headers.get("user-agent") ) # Forward to Signoz OTel collector target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/traces" # Set up headers for the Signoz collector forward_headers = { "Content-Type": request.headers.get("content-type", "application/json"), "User-Agent": "bakery-gateway/1.0", "X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"), "X-Tenant-ID": request.headers.get("x-tenant-id", "unknown") } # Add authentication if configured signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN") if signoz_auth_token: forward_headers["Authorization"] = f"Bearer {signoz_auth_token}" # Send to Signoz collector timeout_config = httpx.Timeout( connect=5.0, read=10.0, write=5.0, pool=5.0 ) async with httpx.AsyncClient(timeout=timeout_config) as client: response = await client.post( url=target_url, content=body, headers=forward_headers ) # Log the response from Signoz logger.info( "Forwarded traces to Signoz, signoz_status=%s, signoz_response_time=%s", response.status_code, response.elapsed.total_seconds() ) # Return success response to frontend return JSONResponse( status_code=200, content={ "message": "Traces received and forwarded to Signoz", "signoz_status": response.status_code, "trace_count": 1 # We don't know exact count without parsing } ) except httpx.HTTPStatusError as e: logger.error( "Signoz collector returned error, status_code=%s, error_message=%s", e.response.status_code, str(e) ) return JSONResponse( status_code=502, content={ "error": "Signoz collector error", "details": str(e), "signoz_status": e.response.status_code } ) except httpx.RequestError as e: logger.error( "Failed to connect to Signoz collector, error=%s, collector_url=%s", str(e), SIGNOZ_OTEL_COLLECTOR ) return JSONResponse( status_code=503, content={ "error": "Signoz collector unavailable", "details": str(e) } ) except Exception as e: logger.error( "Unexpected error processing traces, error=%s, error_type=%s", str(e), type(e).__name__ ) return JSONResponse( status_code=500, content={ "error": "Internal server error", "details": str(e) } ) @router.post("/v1/metrics") async def receive_frontend_metrics(request: Request): """ Receive OpenTelemetry metrics from frontend and proxy to Signoz """ # Handle OPTIONS requests for CORS if request.method == "OPTIONS": return Response( status_code=200, headers={ "Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST, "Access-Control-Allow-Methods": "POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID", "Access-Control-Allow-Credentials": "true", "Access-Control-Max-Age": "86400" } ) try: body = await request.body() if not body: return JSONResponse( status_code=400, content={"error": "Empty metrics data"} ) logger.info( "Received frontend metrics, content_length=%s, content_type=%s", len(body), request.headers.get("content-type") ) # Forward to Signoz OTel collector target_url = f"{SIGNOZ_OTEL_COLLECTOR}/v1/metrics" forward_headers = { "Content-Type": request.headers.get("content-type", "application/json"), "User-Agent": "bakery-gateway/1.0", "X-Forwarded-For": request.headers.get("x-forwarded-for", "frontend"), "X-Tenant-ID": request.headers.get("x-tenant-id", "unknown") } # Add authentication if configured signoz_auth_token = os.getenv("SIGNOZ_AUTH_TOKEN") if signoz_auth_token: forward_headers["Authorization"] = f"Bearer {signoz_auth_token}" timeout_config = httpx.Timeout( connect=5.0, read=10.0, write=5.0, pool=5.0 ) async with httpx.AsyncClient(timeout=timeout_config) as client: response = await client.post( url=target_url, content=body, headers=forward_headers ) logger.info( "Forwarded metrics to Signoz, signoz_status=%s", response.status_code ) return JSONResponse( status_code=200, content={ "message": "Metrics received and forwarded to Signoz", "signoz_status": response.status_code } ) except Exception as e: logger.error( "Error processing metrics, error=%s", str(e) ) return JSONResponse( status_code=500, content={ "error": "Internal server error", "details": str(e) } ) @router.get("/health") async def telemetry_health(): """ Health check endpoint for telemetry service """ return JSONResponse( status_code=200, content={ "status": "healthy", "service": "telemetry-gateway", "signoz_collector": SIGNOZ_OTEL_COLLECTOR } ) # Initialize metrics for this module try: metrics_collector = create_metrics_collector("gateway-telemetry") except Exception as e: logger.error("Failed to create metrics collector, error=%s", str(e)) metrics_collector = None @router.on_event("startup") async def startup_event(): """Initialize telemetry metrics on startup""" try: if metrics_collector: # Register telemetry-specific metrics metrics_collector.register_counter( "gateway_telemetry_traces_received", "Number of trace batches received from frontend" ) metrics_collector.register_counter( "gateway_telemetry_metrics_received", "Number of metric batches received from frontend" ) metrics_collector.register_counter( "gateway_telemetry_errors", "Number of telemetry processing errors" ) logger.info( "Telemetry gateway initialized, signoz_collector=%s", SIGNOZ_OTEL_COLLECTOR ) except Exception as e: logger.error( "Failed to initialize telemetry metrics, error=%s", str(e) )