Fix new services implementation 5
This commit is contained in:
@@ -122,85 +122,60 @@ async def metrics():
|
||||
|
||||
@app.websocket("/api/v1/ws/tenants/{tenant_id}/training/jobs/{job_id}/live")
|
||||
async def websocket_training_progress(websocket: WebSocket, tenant_id: str, job_id: str):
|
||||
"""WebSocket proxy for training progress updates"""
|
||||
"""WebSocket proxy that forwards connections directly to training service"""
|
||||
await websocket.accept()
|
||||
|
||||
# Get token from query params
|
||||
token = websocket.query_params.get("token")
|
||||
if not token:
|
||||
logger.warning(f"WebSocket connection rejected - missing token for job {job_id}")
|
||||
await websocket.close(code=1008, reason="Authentication token required")
|
||||
return
|
||||
|
||||
# Build HTTP URL to training service (we'll use HTTP client to proxy)
|
||||
logger.info(f"Proxying WebSocket connection to training service for job {job_id}, tenant {tenant_id}")
|
||||
|
||||
# Build WebSocket URL to training service
|
||||
training_service_base = settings.TRAINING_SERVICE_URL.rstrip('/')
|
||||
training_ws_url = f"{training_service_base}/api/v1/ws/tenants/{tenant_id}/training/jobs/{job_id}/live?token={token}"
|
||||
training_ws_url = training_service_base.replace('http://', 'ws://').replace('https://', 'wss://')
|
||||
training_ws_url = f"{training_ws_url}/api/v1/ws/tenants/{tenant_id}/training/jobs/{job_id}/live?token={token}"
|
||||
|
||||
try:
|
||||
# Use HTTP client to connect to training service WebSocket
|
||||
async with httpx.AsyncClient() as client:
|
||||
# Since we can't easily proxy WebSocket with httpx, let's try a different approach
|
||||
# We'll make periodic HTTP requests to get training status
|
||||
logger.info(f"Starting WebSocket proxy for training job {job_id}")
|
||||
# Connect to training service WebSocket
|
||||
import websockets
|
||||
async with websockets.connect(training_ws_url) as training_ws:
|
||||
logger.info(f"Connected to training service WebSocket for job {job_id}")
|
||||
|
||||
# Send initial connection confirmation
|
||||
await websocket.send_json({
|
||||
"type": "connection_established",
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id
|
||||
})
|
||||
|
||||
# Poll for training updates
|
||||
last_status = None
|
||||
while True:
|
||||
async def forward_to_training():
|
||||
"""Forward messages from frontend to training service"""
|
||||
try:
|
||||
# Make HTTP request to get current training status
|
||||
status_url = f"{training_service_base}/api/v1/tenants/{tenant_id}/training/jobs/{job_id}/status"
|
||||
|
||||
response = await client.get(
|
||||
status_url,
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=5.0
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
current_status = response.json()
|
||||
|
||||
# Only send update if status changed
|
||||
if current_status != last_status:
|
||||
await websocket.send_json({
|
||||
"type": "training_progress",
|
||||
"data": current_status
|
||||
})
|
||||
last_status = current_status
|
||||
|
||||
# If training is completed or failed, we can stop polling
|
||||
if current_status.get('status') in ['completed', 'failed', 'cancelled']:
|
||||
await websocket.send_json({
|
||||
"type": "training_" + current_status.get('status', 'completed'),
|
||||
"data": current_status
|
||||
})
|
||||
break
|
||||
|
||||
# Wait before next poll
|
||||
await asyncio.sleep(2)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.info("WebSocket client disconnected")
|
||||
break
|
||||
except httpx.TimeoutException:
|
||||
# Continue polling even if request times out
|
||||
await asyncio.sleep(5)
|
||||
continue
|
||||
async for message in websocket.iter_text():
|
||||
await training_ws.send(message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error polling training status: {e}")
|
||||
await asyncio.sleep(5)
|
||||
continue
|
||||
logger.error(f"Error forwarding to training service: {e}")
|
||||
|
||||
async def forward_to_frontend():
|
||||
"""Forward messages from training service to frontend"""
|
||||
try:
|
||||
async for message in training_ws:
|
||||
await websocket.send_text(message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error forwarding to frontend: {e}")
|
||||
|
||||
# Run both forwarding tasks concurrently
|
||||
await asyncio.gather(
|
||||
forward_to_training(),
|
||||
forward_to_frontend(),
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.info("WebSocket client disconnected during setup")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket proxy error: {e}")
|
||||
await websocket.close(code=1011, reason="Internal server error")
|
||||
logger.error(f"WebSocket proxy error for job {job_id}: {e}")
|
||||
try:
|
||||
await websocket.close(code=1011, reason="Training service connection failed")
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
logger.info(f"WebSocket proxy closed for job {job_id}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
@@ -12,4 +12,5 @@ email-validator==2.0.0
|
||||
aio-pika==9.3.0
|
||||
pytz==2023.3
|
||||
python-logstash==0.4.8
|
||||
structlog==23.2.0
|
||||
structlog==23.2.0
|
||||
websockets==12.0
|
||||
Reference in New Issue
Block a user