Initial commit - production deployment
This commit is contained in:
70
services/training/Dockerfile
Normal file
70
services/training/Dockerfile
Normal file
@@ -0,0 +1,70 @@
|
||||
# =============================================================================
|
||||
# Training Service Dockerfile - Environment-Configurable Base Images
|
||||
# =============================================================================
|
||||
# Build arguments for registry configuration:
|
||||
# - BASE_REGISTRY: Registry URL (default: docker.io for Docker Hub)
|
||||
# - PYTHON_IMAGE: Python image name and tag (default: python:3.11-slim)
|
||||
# =============================================================================
|
||||
|
||||
ARG BASE_REGISTRY=docker.io
|
||||
ARG PYTHON_IMAGE=python:3.11-slim
|
||||
|
||||
FROM ${BASE_REGISTRY}/${PYTHON_IMAGE} AS shared
|
||||
WORKDIR /shared
|
||||
COPY shared/ /shared/
|
||||
|
||||
ARG BASE_REGISTRY=docker.io
|
||||
ARG PYTHON_IMAGE=python:3.11-slim
|
||||
FROM ${BASE_REGISTRY}/${PYTHON_IMAGE}
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies including cmdstan requirements
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
g++ \
|
||||
make \
|
||||
curl \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements
|
||||
COPY shared/requirements-tracing.txt /tmp/
|
||||
|
||||
COPY services/training/requirements.txt .
|
||||
|
||||
# Install Python dependencies
|
||||
RUN pip install --no-cache-dir -r /tmp/requirements-tracing.txt
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy shared libraries from the shared stage
|
||||
COPY --from=shared /shared /app/shared
|
||||
|
||||
# Copy application code
|
||||
COPY services/training/ .
|
||||
|
||||
|
||||
|
||||
# Add shared libraries to Python path
|
||||
ENV PYTHONPATH="/app:/app/shared:${PYTHONPATH:-}"
|
||||
|
||||
# Set TMPDIR for cmdstan (directory will be created at runtime)
|
||||
ENV TMPDIR=/tmp/cmdstan
|
||||
|
||||
# Install cmdstan for Prophet (required for model optimization)
|
||||
# Suppress verbose output to reduce log noise
|
||||
RUN python -m pip install --no-cache-dir cmdstanpy && \
|
||||
python -m cmdstanpy.install_cmdstan
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/health || exit 1
|
||||
|
||||
# Run application with increased WebSocket ping timeout to handle long training operations
|
||||
# Default uvicorn ws-ping-timeout is 20s, increasing to 300s (5 minutes) to prevent
|
||||
# premature disconnections during CPU-intensive ML training (typically 2-3 minutes)
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--ws-ping-timeout", "300"]
|
||||
Reference in New Issue
Block a user