Dockerfile 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
  2. ENV DEBIAN_FRONTEND=noninteractive
  3. ENV PYTHONUNBUFFERED=1
  4. ENV GRADIO_SERVER_NAME=0.0.0.0
  5. ENV GRADIO_SERVER_PORT=7860
  6. # System dependencies
  7. RUN apt-get update && apt-get install -y --no-install-recommends \
  8. python3.11 \
  9. python3.11-venv \
  10. python3.11-dev \
  11. python3-pip \
  12. git \
  13. git-lfs \
  14. wget \
  15. curl \
  16. build-essential \
  17. && rm -rf /var/lib/apt/lists/* \
  18. && git lfs install
  19. # Set python3.11 as default
  20. RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
  21. && update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
  22. # Upgrade pip
  23. RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel
  24. # Install PyTorch with CUDA 12.4
  25. RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
  26. # Install transformers from git main (qwen3_next model type not in any stable release yet)
  27. RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@main"
  28. # Flash-attention, causal-conv1d, flash-linear-attention are optional perf
  29. # optimizations. Building from source OOMs the HF Spaces builder (~16 GB RAM)
  30. # so we only attempt pre-built binary wheels (--only-binary :all:).
  31. # If no wheel exists for this CUDA/Python combo the install is skipped.
  32. RUN pip install --no-cache-dir --only-binary :all: flash-attn 2>/dev/null \
  33. || echo "flash-attn: no pre-built wheel, skipping"
  34. RUN pip install --no-cache-dir --only-binary :all: causal-conv1d 2>/dev/null \
  35. || echo "causal-conv1d: no pre-built wheel, skipping"
  36. RUN pip install --no-cache-dir --only-binary :all: flash-linear-attention 2>/dev/null \
  37. || echo "flash-linear-attention: no pre-built wheel, skipping"
  38. # Create non-root user FIRST
  39. RUN useradd -m -u 1000 user
  40. # Create app directory
  41. WORKDIR /app
  42. # Copy requirements and install (transformers already installed from git above, pip will skip it)
  43. COPY requirements.txt .
  44. RUN pip install --no-cache-dir -r requirements.txt
  45. # Copy app files
  46. COPY . .
  47. # Create ALL directories under user home (NOT /tmp)
  48. RUN mkdir -p /home/user/hf_cache /home/user/torch_cache /home/user/output /home/user/merged \
  49. && chown -R user:user /home/user /app
  50. # Set cache env vars to user home
  51. ENV HOME=/home/user
  52. ENV HF_HOME=/home/user/hf_cache
  53. ENV TRANSFORMERS_CACHE=/home/user/hf_cache
  54. ENV TORCH_HOME=/home/user/torch_cache
  55. ENV PATH="/home/user/.local/bin:$PATH"
  56. # Limit PyTorch CUDA memory fragmentation and allow expandable segments
  57. ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
  58. USER user
  59. EXPOSE 7860
  60. CMD ["python", "app.py"]