REFACTOR ALL APIs fix 1
This commit is contained in:
@@ -21,7 +21,7 @@ from app.middleware.logging import LoggingMiddleware
|
||||
from app.middleware.rate_limit import RateLimitMiddleware
|
||||
from app.middleware.subscription import SubscriptionMiddleware
|
||||
from app.middleware.demo_middleware import DemoMiddleware
|
||||
from app.routes import auth, tenant, notification, nominatim, user, subscription, demo
|
||||
from app.routes import auth, tenant, notification, nominatim, user, subscription, demo, pos
|
||||
from shared.monitoring.logging import setup_logging
|
||||
from shared.monitoring.metrics import MetricsCollector
|
||||
|
||||
@@ -71,6 +71,7 @@ app.include_router(tenant.router, prefix="/api/v1/tenants", tags=["tenants"])
|
||||
app.include_router(subscription.router, prefix="/api/v1", tags=["subscriptions"])
|
||||
app.include_router(notification.router, prefix="/api/v1/notifications", tags=["notifications"])
|
||||
app.include_router(nominatim.router, prefix="/api/v1/nominatim", tags=["location"])
|
||||
app.include_router(pos.router, prefix="/api/v1/pos", tags=["pos"])
|
||||
app.include_router(demo.router, prefix="/api/v1", tags=["demo"])
|
||||
|
||||
|
||||
@@ -251,203 +252,106 @@ async def events_stream(request: Request, tenant_id: str):
|
||||
# WEBSOCKET ROUTING FOR TRAINING SERVICE
|
||||
# ================================================================
|
||||
|
||||
@app.websocket("/api/v1/ws/tenants/{tenant_id}/training/jobs/{job_id}/live")
|
||||
@app.websocket("/api/v1/tenants/{tenant_id}/training/jobs/{job_id}/live")
|
||||
async def websocket_training_progress(websocket: WebSocket, tenant_id: str, job_id: str):
|
||||
"""WebSocket proxy that forwards connections directly to training service with enhanced token validation"""
|
||||
await websocket.accept()
|
||||
|
||||
# Get token from query params
|
||||
"""
|
||||
WebSocket proxy that forwards connections directly to training service.
|
||||
Acts as a pure proxy - does NOT handle websocket logic, just forwards to training service.
|
||||
All auth, message handling, and business logic is in the training service.
|
||||
"""
|
||||
# Get token from query params (required for training service authentication)
|
||||
token = websocket.query_params.get("token")
|
||||
if not token:
|
||||
logger.warning(f"WebSocket connection rejected - missing token for job {job_id}")
|
||||
logger.warning(f"WebSocket proxy rejected - missing token for job {job_id}")
|
||||
await websocket.accept()
|
||||
await websocket.close(code=1008, reason="Authentication token required")
|
||||
return
|
||||
|
||||
# Validate token using auth middleware
|
||||
from app.middleware.auth import jwt_handler
|
||||
try:
|
||||
payload = jwt_handler.verify_token(token)
|
||||
if not payload:
|
||||
logger.warning(f"WebSocket connection rejected - invalid token for job {job_id}")
|
||||
await websocket.close(code=1008, reason="Invalid authentication token")
|
||||
return
|
||||
# Accept the connection immediately
|
||||
await websocket.accept()
|
||||
|
||||
# Check token expiration
|
||||
import time
|
||||
if payload.get('exp', 0) < time.time():
|
||||
logger.warning(f"WebSocket connection rejected - expired token for job {job_id}")
|
||||
await websocket.close(code=1008, reason="Token expired")
|
||||
return
|
||||
logger.info(f"Gateway proxying WebSocket to training service for job {job_id}, tenant {tenant_id}")
|
||||
|
||||
logger.info(f"WebSocket token validated for user {payload.get('email', 'unknown')}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"WebSocket token validation failed for job {job_id}: {e}")
|
||||
await websocket.close(code=1008, reason="Token validation failed")
|
||||
return
|
||||
|
||||
logger.info(f"Proxying WebSocket connection to training service for job {job_id}, tenant {tenant_id}")
|
||||
|
||||
# Build WebSocket URL to training service
|
||||
# Build WebSocket URL to training service - forward to the exact same path
|
||||
training_service_base = settings.TRAINING_SERVICE_URL.rstrip('/')
|
||||
training_ws_url = training_service_base.replace('http://', 'ws://').replace('https://', 'wss://')
|
||||
training_ws_url = f"{training_ws_url}/api/v1/tenants/{tenant_id}/training/jobs/{job_id}/live?token={token}"
|
||||
|
||||
training_ws = None
|
||||
heartbeat_task = None
|
||||
|
||||
try:
|
||||
# Connect to training service WebSocket with proper timeout configuration
|
||||
# Connect to training service WebSocket
|
||||
import websockets
|
||||
|
||||
# Configure timeouts to coordinate with frontend (30s heartbeat) and training service
|
||||
# DISABLE gateway-level ping to avoid dual-ping conflicts - let frontend handle ping/pong
|
||||
training_ws = await websockets.connect(
|
||||
training_ws_url,
|
||||
ping_interval=None, # DISABLED: Let frontend handle ping/pong via message forwarding
|
||||
ping_timeout=None, # DISABLED: No independent ping mechanism
|
||||
close_timeout=15, # Reasonable close timeout
|
||||
max_size=2**20, # 1MB max message size
|
||||
max_queue=32 # Max queued messages
|
||||
ping_interval=None, # Let training service handle heartbeat
|
||||
ping_timeout=None,
|
||||
close_timeout=10,
|
||||
open_timeout=30, # Allow time for training service to setup
|
||||
max_size=2**20,
|
||||
max_queue=32
|
||||
)
|
||||
|
||||
logger.info(f"Connected to training service WebSocket for job {job_id} with gateway ping DISABLED (frontend handles ping/pong)")
|
||||
|
||||
# Track connection state properly due to FastAPI WebSocket state propagation bug
|
||||
connection_alive = True
|
||||
last_activity = asyncio.get_event_loop().time()
|
||||
|
||||
async def check_connection_health():
|
||||
"""Monitor connection health based on activity timestamps only - no WebSocket interference"""
|
||||
nonlocal connection_alive, last_activity
|
||||
|
||||
while connection_alive:
|
||||
try:
|
||||
await asyncio.sleep(30) # Check every 30 seconds (aligned with frontend heartbeat)
|
||||
current_time = asyncio.get_event_loop().time()
|
||||
|
||||
# Check if we haven't received any activity for too long
|
||||
# Frontend sends ping every 30s, so 90s = 3 missed pings before considering dead
|
||||
if current_time - last_activity > 90:
|
||||
logger.warning(f"No frontend activity for 90s on job {job_id} - connection may be dead")
|
||||
# Don't forcibly close - let the forwarding loops handle actual connection issues
|
||||
# This is just monitoring/logging now
|
||||
else:
|
||||
logger.debug(f"Connection health OK for job {job_id} - last activity {int(current_time - last_activity)}s ago")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Connection health monitoring error for job {job_id}: {e}")
|
||||
break
|
||||
logger.info(f"Gateway connected to training service WebSocket for job {job_id}")
|
||||
|
||||
async def forward_to_training():
|
||||
"""Forward messages from frontend to training service with proper error handling"""
|
||||
nonlocal connection_alive, last_activity
|
||||
|
||||
"""Forward messages from frontend to training service"""
|
||||
try:
|
||||
while connection_alive and training_ws and training_ws.open:
|
||||
try:
|
||||
# Use longer timeout to avoid conflicts with frontend 30s heartbeat
|
||||
# Frontend sends ping every 30s, so we need to allow for some latency
|
||||
data = await asyncio.wait_for(websocket.receive(), timeout=45.0)
|
||||
last_activity = asyncio.get_event_loop().time()
|
||||
while training_ws and training_ws.open:
|
||||
data = await websocket.receive()
|
||||
|
||||
# Handle different message types
|
||||
if data.get("type") == "websocket.receive":
|
||||
if "text" in data:
|
||||
message = data["text"]
|
||||
# Forward text messages to training service
|
||||
await training_ws.send(message)
|
||||
logger.debug(f"Forwarded message to training service for job {job_id}: {message[:100]}...")
|
||||
elif "bytes" in data:
|
||||
# Forward binary messages if needed
|
||||
await training_ws.send(data["bytes"])
|
||||
# Ping/pong frames are automatically handled by Starlette/FastAPI
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No message received in 45 seconds, continue loop
|
||||
# This allows for frontend 30s heartbeat + network latency + processing time
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Error receiving from frontend for job {job_id}: {e}")
|
||||
connection_alive = False
|
||||
if data.get("type") == "websocket.receive":
|
||||
if "text" in data:
|
||||
await training_ws.send(data["text"])
|
||||
logger.debug(f"Gateway forwarded frontend->training: {data['text'][:100]}")
|
||||
elif "bytes" in data:
|
||||
await training_ws.send(data["bytes"])
|
||||
elif data.get("type") == "websocket.disconnect":
|
||||
logger.info(f"Frontend disconnected for job {job_id}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in forward_to_training for job {job_id}: {e}")
|
||||
connection_alive = False
|
||||
logger.error(f"Error forwarding frontend->training for job {job_id}: {e}")
|
||||
|
||||
async def forward_to_frontend():
|
||||
"""Forward messages from training service to frontend with proper error handling"""
|
||||
nonlocal connection_alive, last_activity
|
||||
|
||||
"""Forward messages from training service to frontend"""
|
||||
try:
|
||||
while connection_alive and training_ws and training_ws.open:
|
||||
try:
|
||||
# Use coordinated timeout - training service expects messages every 60s
|
||||
# This should be longer than training service timeout to avoid premature closure
|
||||
message = await asyncio.wait_for(training_ws.recv(), timeout=75.0)
|
||||
last_activity = asyncio.get_event_loop().time()
|
||||
|
||||
# Forward the message to frontend
|
||||
await websocket.send_text(message)
|
||||
logger.debug(f"Forwarded message to frontend for job {job_id}: {message[:100]}...")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No message received in 75 seconds, continue loop
|
||||
# Training service sends heartbeats, so this indicates potential issues
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Error receiving from training service for job {job_id}: {e}")
|
||||
connection_alive = False
|
||||
break
|
||||
|
||||
while training_ws and training_ws.open:
|
||||
message = await training_ws.recv()
|
||||
await websocket.send_text(message)
|
||||
logger.debug(f"Gateway forwarded training->frontend: {message[:100]}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in forward_to_frontend for job {job_id}: {e}")
|
||||
connection_alive = False
|
||||
logger.error(f"Error forwarding training->frontend for job {job_id}: {e}")
|
||||
|
||||
# Start connection health monitoring
|
||||
heartbeat_task = asyncio.create_task(check_connection_health())
|
||||
|
||||
# Run both forwarding tasks concurrently with proper error handling
|
||||
try:
|
||||
await asyncio.gather(
|
||||
forward_to_training(),
|
||||
forward_to_frontend(),
|
||||
return_exceptions=True
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in WebSocket forwarding tasks for job {job_id}: {e}")
|
||||
finally:
|
||||
connection_alive = False
|
||||
# Run both forwarding tasks concurrently
|
||||
await asyncio.gather(
|
||||
forward_to_training(),
|
||||
forward_to_frontend(),
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
except websockets.exceptions.ConnectionClosedError as e:
|
||||
logger.warning(f"Training service WebSocket connection closed for job {job_id}: {e}")
|
||||
logger.warning(f"Training service WebSocket closed for job {job_id}: {e}")
|
||||
except websockets.exceptions.WebSocketException as e:
|
||||
logger.error(f"WebSocket exception for job {job_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket proxy error for job {job_id}: {e}")
|
||||
finally:
|
||||
# Cleanup
|
||||
if heartbeat_task and not heartbeat_task.done():
|
||||
heartbeat_task.cancel()
|
||||
try:
|
||||
await heartbeat_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
if training_ws and not training_ws.closed:
|
||||
try:
|
||||
await training_ws.close()
|
||||
logger.info(f"Closed training service WebSocket for job {job_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing training service WebSocket for job {job_id}: {e}")
|
||||
|
||||
try:
|
||||
if not websocket.client_state.name == 'DISCONNECTED':
|
||||
await websocket.close(code=1000, reason="Proxy connection closed")
|
||||
await websocket.close(code=1000, reason="Proxy closed")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing frontend WebSocket for job {job_id}: {e}")
|
||||
|
||||
logger.info(f"WebSocket proxy cleanup completed for job {job_id}")
|
||||
logger.info(f"Gateway WebSocket proxy cleanup completed for job {job_id}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
@@ -36,7 +36,7 @@ PUBLIC_ROUTES = [
|
||||
"/api/v1/nominatim/search",
|
||||
"/api/v1/plans",
|
||||
"/api/v1/demo/accounts",
|
||||
"/api/v1/demo/session/create"
|
||||
"/api/v1/demo/sessions"
|
||||
]
|
||||
|
||||
class AuthMiddleware(BaseHTTPMiddleware):
|
||||
|
||||
@@ -77,10 +77,9 @@ class DemoMiddleware(BaseHTTPMiddleware):
|
||||
# Skip demo middleware for demo service endpoints
|
||||
demo_service_paths = [
|
||||
"/api/v1/demo/accounts",
|
||||
"/api/v1/demo/session/create",
|
||||
"/api/v1/demo/session/extend",
|
||||
"/api/v1/demo/session/destroy",
|
||||
"/api/v1/demo/sessions",
|
||||
"/api/v1/demo/stats",
|
||||
"/api/v1/demo/operations",
|
||||
]
|
||||
|
||||
if any(request.url.path.startswith(path) or request.url.path == path for path in demo_service_paths):
|
||||
@@ -204,7 +203,7 @@ class DemoMiddleware(BaseHTTPMiddleware):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(
|
||||
f"{self.demo_session_url}/api/demo/session/{session_id}"
|
||||
f"{self.demo_session_url}/api/v1/demo/sessions/{session_id}"
|
||||
)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -215,13 +214,9 @@ class DemoMiddleware(BaseHTTPMiddleware):
|
||||
|
||||
async def _update_session_activity(self, session_id: str):
|
||||
"""Update session activity timestamp"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=2.0) as client:
|
||||
await client.post(
|
||||
f"{self.demo_session_url}/api/demo/session/{session_id}/activity"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to update activity", session_id=session_id, error=str(e))
|
||||
# Note: Activity tracking is handled by the demo service internally
|
||||
# No explicit endpoint needed - activity is updated on session access
|
||||
pass
|
||||
|
||||
def _check_blocked_path(self, path: str) -> Optional[dict]:
|
||||
"""Check if path is explicitly blocked for demo accounts"""
|
||||
|
||||
@@ -22,7 +22,7 @@ async def proxy_demo_service(path: str, request: Request):
|
||||
"""
|
||||
# Build the target URL
|
||||
demo_service_url = settings.DEMO_SESSION_SERVICE_URL.rstrip('/')
|
||||
target_url = f"{demo_service_url}/api/demo/{path}"
|
||||
target_url = f"{demo_service_url}/api/v1/demo/{path}"
|
||||
|
||||
# Get request body
|
||||
body = None
|
||||
|
||||
89
gateway/app/routes/pos.py
Normal file
89
gateway/app/routes/pos.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
POS routes for API Gateway - Global POS endpoints
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Request, Response, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
import httpx
|
||||
import logging
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
# ================================================================
|
||||
# GLOBAL POS ENDPOINTS (No tenant context required)
|
||||
# ================================================================
|
||||
|
||||
@router.api_route("/supported-systems", methods=["GET", "OPTIONS"])
|
||||
async def proxy_supported_systems(request: Request):
|
||||
"""Proxy supported POS systems request to POS service"""
|
||||
target_path = "/api/v1/pos/supported-systems"
|
||||
return await _proxy_to_pos_service(request, target_path)
|
||||
|
||||
# ================================================================
|
||||
# PROXY HELPER FUNCTIONS
|
||||
# ================================================================
|
||||
|
||||
async def _proxy_to_pos_service(request: Request, target_path: str):
|
||||
"""Proxy request to POS service"""
|
||||
|
||||
# Handle OPTIONS requests directly for CORS
|
||||
if request.method == "OPTIONS":
|
||||
return Response(
|
||||
status_code=200,
|
||||
headers={
|
||||
"Access-Control-Allow-Origin": settings.CORS_ORIGINS_LIST,
|
||||
"Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Content-Type, Authorization, X-Tenant-ID",
|
||||
"Access-Control-Allow-Credentials": "true",
|
||||
"Access-Control-Max-Age": "86400"
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
url = f"{settings.POS_SERVICE_URL}{target_path}"
|
||||
|
||||
# Forward headers
|
||||
headers = dict(request.headers)
|
||||
headers.pop("host", None)
|
||||
|
||||
# Add query parameters
|
||||
params = dict(request.query_params)
|
||||
|
||||
timeout_config = httpx.Timeout(
|
||||
connect=30.0,
|
||||
read=60.0,
|
||||
write=30.0,
|
||||
pool=30.0
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout_config) as client:
|
||||
response = await client.request(
|
||||
method=request.method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params
|
||||
)
|
||||
|
||||
# Handle different response types
|
||||
if response.headers.get("content-type", "").startswith("application/json"):
|
||||
try:
|
||||
content = response.json()
|
||||
except:
|
||||
content = {"message": "Invalid JSON response from service"}
|
||||
else:
|
||||
content = response.text
|
||||
|
||||
return JSONResponse(
|
||||
status_code=response.status_code,
|
||||
content=content
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error proxying to POS service {target_path}: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Internal gateway error"
|
||||
)
|
||||
@@ -17,10 +17,10 @@ router = APIRouter()
|
||||
# SUBSCRIPTION ENDPOINTS - Direct routing to tenant service
|
||||
# ================================================================
|
||||
|
||||
@router.api_route("/subscriptions/{tenant_id}/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
|
||||
@router.api_route("/tenants/subscriptions/{tenant_id}/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
|
||||
async def proxy_subscription_endpoints(request: Request, tenant_id: str = Path(...), path: str = ""):
|
||||
"""Proxy subscription requests directly to tenant service"""
|
||||
target_path = f"/api/v1/subscriptions/{tenant_id}/{path}".rstrip("/")
|
||||
target_path = f"/api/v1/tenants/subscriptions/{tenant_id}/{path}".rstrip("/")
|
||||
return await _proxy_to_tenant_service(request, target_path)
|
||||
|
||||
@router.api_route("/subscriptions/plans", methods=["GET", "OPTIONS"])
|
||||
|
||||
@@ -300,6 +300,16 @@ async def proxy_tenant_recipes_with_path(request: Request, tenant_id: str = Path
|
||||
target_path = f"/api/v1/tenants/{tenant_id}/recipes/{path}".rstrip("/")
|
||||
return await _proxy_to_recipes_service(request, target_path, tenant_id=tenant_id)
|
||||
|
||||
# ================================================================
|
||||
# TENANT-SCOPED POS SERVICE ENDPOINTS
|
||||
# ================================================================
|
||||
|
||||
@router.api_route("/{tenant_id}/pos/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
|
||||
async def proxy_tenant_pos(request: Request, tenant_id: str = Path(...), path: str = ""):
|
||||
"""Proxy tenant POS requests to POS service"""
|
||||
target_path = f"/api/v1/tenants/{tenant_id}/pos/{path}".rstrip("/")
|
||||
return await _proxy_to_pos_service(request, target_path, tenant_id=tenant_id)
|
||||
|
||||
# ================================================================
|
||||
# PROXY HELPER FUNCTIONS
|
||||
# ================================================================
|
||||
@@ -348,6 +358,10 @@ async def _proxy_to_recipes_service(request: Request, target_path: str, tenant_i
|
||||
"""Proxy request to recipes service"""
|
||||
return await _proxy_request(request, target_path, settings.RECIPES_SERVICE_URL, tenant_id=tenant_id)
|
||||
|
||||
async def _proxy_to_pos_service(request: Request, target_path: str, tenant_id: str = None):
|
||||
"""Proxy request to POS service"""
|
||||
return await _proxy_request(request, target_path, settings.POS_SERVICE_URL, tenant_id=tenant_id)
|
||||
|
||||
async def _proxy_request(request: Request, target_path: str, service_url: str, tenant_id: str = None):
|
||||
"""Generic proxy function with enhanced error handling"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user