FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 ENV GRADIO_SERVER_NAME=0.0.0.0 ENV GRADIO_SERVER_PORT=7860 # System dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ python3.11 \ python3.11-venv \ python3.11-dev \ python3-pip \ git \ git-lfs \ wget \ curl \ build-essential \ && rm -rf /var/lib/apt/lists/* \ && git lfs install # Set python3.11 as default RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \ && update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1 # Upgrade pip RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel # Install PyTorch with CUDA 12.4 RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 # Install transformers from git main (qwen3_next model type not in any stable release yet) RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@main" # Flash-attention, causal-conv1d, flash-linear-attention are optional perf # optimizations. Building from source OOMs the HF Spaces builder (~16 GB RAM) # so we only attempt pre-built binary wheels (--only-binary :all:). # If no wheel exists for this CUDA/Python combo the install is skipped. RUN pip install --no-cache-dir --only-binary :all: flash-attn 2>/dev/null \ || echo "flash-attn: no pre-built wheel, skipping" RUN pip install --no-cache-dir --only-binary :all: causal-conv1d 2>/dev/null \ || echo "causal-conv1d: no pre-built wheel, skipping" RUN pip install --no-cache-dir --only-binary :all: flash-linear-attention 2>/dev/null \ || echo "flash-linear-attention: no pre-built wheel, skipping" # Create non-root user FIRST RUN useradd -m -u 1000 user # Create app directory WORKDIR /app # Copy requirements and install (transformers already installed from git above, pip will skip it) COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy app files COPY . . # Create ALL directories under user home (NOT /tmp) RUN mkdir -p /home/user/hf_cache /home/user/torch_cache /home/user/output /home/user/merged \ && chown -R user:user /home/user /app # Set cache env vars to user home ENV HOME=/home/user ENV HF_HOME=/home/user/hf_cache ENV TRANSFORMERS_CACHE=/home/user/hf_cache ENV TORCH_HOME=/home/user/torch_cache ENV PATH="/home/user/.local/bin:$PATH" # Limit PyTorch CUDA memory fragmentation and allow expandable segments ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True USER user EXPOSE 7860 CMD ["python", "app.py"]