# Training Service Dockerfile with MinIO Support # Multi-stage build for optimized production image FROM localhost:5000/python_3.11-slim AS shared WORKDIR /shared COPY shared/ /shared/ # Main service stage FROM localhost:5000/python_3.11-slim WORKDIR /app # Install system dependencies including cmdstan requirements RUN apt-get update && apt-get install -y \ gcc \ g++ \ make \ curl \ build-essential \ && rm -rf /var/lib/apt/lists/* # Copy requirements COPY shared/requirements-tracing.txt /tmp/ COPY services/training/requirements.txt . # Install Python dependencies RUN pip install --no-cache-dir -r /tmp/requirements-tracing.txt RUN pip install --no-cache-dir -r requirements.txt # Copy shared libraries from the shared stage COPY --from=shared /shared /app/shared # Copy application code COPY services/training/ . # Add shared libraries to Python path ENV PYTHONPATH="/app:/app/shared:${PYTHONPATH:-}" # Set TMPDIR for cmdstan (directory will be created at runtime) ENV TMPDIR=/tmp/cmdstan # Install cmdstan for Prophet (required for model optimization) # Suppress verbose output to reduce log noise RUN python -m pip install --no-cache-dir cmdstanpy && \ python -m cmdstanpy.install_cmdstan # Expose port EXPOSE 8000 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD curl -f http://localhost:8000/health || exit 1 # Run application with increased WebSocket ping timeout to handle long training operations # Default uvicorn ws-ping-timeout is 20s, increasing to 300s (5 minutes) to prevent # premature disconnections during CPU-intensive ML training (typically 2-3 minutes) CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--ws-ping-timeout", "300"]