61 lines
1.7 KiB
Docker
61 lines
1.7 KiB
Docker
# Training Dockerfile
|
|
# Add this stage at the top of each service Dockerfile
|
|
FROM python:3.11-slim AS shared
|
|
WORKDIR /shared
|
|
COPY shared/ /shared/
|
|
|
|
# Then your main service stage
|
|
FROM python:3.11-slim
|
|
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies including cmdstan requirements
|
|
RUN apt-get update && apt-get install -y \
|
|
gcc \
|
|
g++ \
|
|
make \
|
|
curl \
|
|
build-essential \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Copy requirements
|
|
COPY shared/requirements-tracing.txt /tmp/
|
|
|
|
COPY services/training/requirements.txt .
|
|
|
|
# Install Python dependencies
|
|
RUN pip install --no-cache-dir -r /tmp/requirements-tracing.txt
|
|
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
# Copy shared libraries from the shared stage
|
|
COPY --from=shared /shared /app/shared
|
|
|
|
# Copy application code
|
|
COPY services/training/ .
|
|
|
|
|
|
|
|
# Add shared libraries to Python path
|
|
ENV PYTHONPATH="/app:/app/shared:${PYTHONPATH:-}"
|
|
|
|
# Set TMPDIR for cmdstan (directory will be created at runtime)
|
|
ENV TMPDIR=/tmp/cmdstan
|
|
|
|
# Install cmdstan for Prophet (required for model optimization)
|
|
# Suppress verbose output to reduce log noise
|
|
RUN python -m pip install --no-cache-dir cmdstanpy && \
|
|
python -m cmdstanpy.install_cmdstan
|
|
|
|
# Expose port
|
|
EXPOSE 8000
|
|
|
|
# Health check
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
CMD curl -f http://localhost:8000/health || exit 1
|
|
|
|
# Run application with increased WebSocket ping timeout to handle long training operations
|
|
# Default uvicorn ws-ping-timeout is 20s, increasing to 300s (5 minutes) to prevent
|
|
# premature disconnections during CPU-intensive ML training (typically 2-3 minutes)
|
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--ws-ping-timeout", "300"]
|