FROM nvidia/cuda:12.4.1-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV GRADIO_SERVER_NAME=0.0.0.0
ENV GRADIO_SERVER_PORT=7860
ENV HF_HOME=/tmp/hf_cache
ENV TRANSFORMERS_CACHE=/tmp/hf_cache
ENV TORCH_HOME=/tmp/torch_cache

# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.11 \
    python3.11-venv \
    python3-pip \
    git \
    git-lfs \
    wget \
    curl \
    && rm -rf /var/lib/apt/lists/* \
    && git lfs install

# Set python3.11 as default
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
    && update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1

# Upgrade pip
RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel

# Install PyTorch with CUDA 12.4
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

# Install flash-attention for faster training
RUN pip install --no-cache-dir flash-attn --no-build-isolation 2>/dev/null || echo "Flash attention build failed, continuing without it"

# Create app directory
WORKDIR /app

# Copy requirements and install
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Copy app files
COPY . .

# Create non-root user for HF Spaces
RUN useradd -m -u 1000 user

# Create cache directories and make them owned by the non-root user
RUN mkdir -p /tmp/hf_cache /tmp/torch_cache /tmp/qwen3-uncensored-lora /tmp/merged_model \
    && chown -R user:user /tmp/hf_cache /tmp/torch_cache /tmp/qwen3-uncensored-lora /tmp/merged_model \
    && chmod -R 777 /tmp/hf_cache /tmp/torch_cache /tmp/qwen3-uncensored-lora /tmp/merged_model \
    && chown -R user:user /app

USER user

ENV HOME=/home/user
ENV PATH="/home/user/.local/bin:$PATH"

EXPOSE 7860

CMD ["python", "app.py"]
