| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
- ENV DEBIAN_FRONTEND=noninteractive
- ENV PYTHONUNBUFFERED=1
- ENV GRADIO_SERVER_NAME=0.0.0.0
- ENV GRADIO_SERVER_PORT=7860
- # System dependencies
- RUN apt-get update && apt-get install -y --no-install-recommends \
- python3.11 \
- python3.11-venv \
- python3.11-dev \
- python3-pip \
- git \
- git-lfs \
- wget \
- curl \
- build-essential \
- && rm -rf /var/lib/apt/lists/* \
- && git lfs install
- # Set python3.11 as default
- RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
- && update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
- # Upgrade pip
- RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel
- # Install PyTorch with CUDA 12.4
- RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
- # Install transformers from git main (qwen3_next model type not in any stable release yet)
- RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@main"
- # Flash-attention, causal-conv1d, flash-linear-attention are optional perf
- # optimizations. Building from source OOMs the HF Spaces builder (~16 GB RAM)
- # so we only attempt pre-built binary wheels (--only-binary :all:).
- # If no wheel exists for this CUDA/Python combo the install is skipped.
- RUN pip install --no-cache-dir --only-binary :all: flash-attn 2>/dev/null \
- || echo "flash-attn: no pre-built wheel, skipping"
- RUN pip install --no-cache-dir --only-binary :all: causal-conv1d 2>/dev/null \
- || echo "causal-conv1d: no pre-built wheel, skipping"
- RUN pip install --no-cache-dir --only-binary :all: flash-linear-attention 2>/dev/null \
- || echo "flash-linear-attention: no pre-built wheel, skipping"
- # Create non-root user FIRST
- RUN useradd -m -u 1000 user
- # Create app directory
- WORKDIR /app
- # Copy requirements and install (transformers already installed from git above, pip will skip it)
- COPY requirements.txt .
- RUN pip install --no-cache-dir -r requirements.txt
- # Copy app files
- COPY . .
- # Create ALL directories under user home (NOT /tmp)
- RUN mkdir -p /home/user/hf_cache /home/user/torch_cache /home/user/output /home/user/merged \
- && chown -R user:user /home/user /app
- # Set cache env vars to user home
- ENV HOME=/home/user
- ENV HF_HOME=/home/user/hf_cache
- ENV TRANSFORMERS_CACHE=/home/user/hf_cache
- ENV TORCH_HOME=/home/user/torch_cache
- ENV PATH="/home/user/.local/bin:$PATH"
- # Limit PyTorch CUDA memory fragmentation and allow expandable segments
- ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
- USER user
- EXPOSE 7860
- CMD ["python", "app.py"]
|