|
|
@@ -29,19 +29,8 @@ RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel
|
|
|
# Install PyTorch with CUDA 12.4
|
|
|
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
|
|
|
|
|
-# Install transformers from git main (qwen3_next model type not in any stable release yet)
|
|
|
-RUN pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@main"
|
|
|
-
|
|
|
-# Flash-attention, causal-conv1d, flash-linear-attention are optional perf
|
|
|
-# optimizations. Building from source OOMs the HF Spaces builder (~16 GB RAM)
|
|
|
-# so we only attempt pre-built binary wheels (--only-binary :all:).
|
|
|
-# If no wheel exists for this CUDA/Python combo the install is skipped.
|
|
|
-RUN pip install --no-cache-dir --only-binary :all: flash-attn 2>/dev/null \
|
|
|
- || echo "flash-attn: no pre-built wheel, skipping"
|
|
|
-RUN pip install --no-cache-dir --only-binary :all: causal-conv1d 2>/dev/null \
|
|
|
- || echo "causal-conv1d: no pre-built wheel, skipping"
|
|
|
-RUN pip install --no-cache-dir --only-binary :all: flash-linear-attention 2>/dev/null \
|
|
|
- || echo "flash-linear-attention: no pre-built wheel, skipping"
|
|
|
+# Install Unsloth (bundles transformers, peft, trl, bitsandbytes, xformers etc.)
|
|
|
+RUN pip install --upgrade --force-reinstall --no-cache-dir unsloth unsloth_zoo
|
|
|
|
|
|
# Create non-root user FIRST
|
|
|
RUN useradd -m -u 1000 user
|
|
|
@@ -49,7 +38,7 @@ RUN useradd -m -u 1000 user
|
|
|
# Create app directory
|
|
|
WORKDIR /app
|
|
|
|
|
|
-# Copy requirements and install (transformers already installed from git above, pip will skip it)
|
|
|
+# Copy requirements and install remaining deps
|
|
|
COPY requirements.txt .
|
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|