lainlives
/
sft-trainer


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
							"""
Gradio UI for Qwen3-Coder-Next Uncensored Fine-Tuning
Provides a web interface to configure, launch, and monitor training.
"""

import os
import json
import time
import threading
from pathlib import Path

import gradio as gr
import yaml

STATUS_FILE = "/home/user/training_status.json"
LOG_FILE = "/home/user/training.log"

# Track background training thread
training_thread = None
training_active = False


def load_config():
    with open("config.yaml") as f:
        return yaml.safe_load(f)


def read_status():
    try:
        return json.loads(Path(STATUS_FILE).read_text())
    except Exception:
        return {
            "status": "idle",
            "detail": "Ready to start training",
            "progress": 0.0,
            "metrics": {},
        }


def read_logs(num_lines: int = 100):
    try:
        lines = Path(LOG_FILE).read_text().strip().split("\n")
        return "\n".join(lines[-num_lines:])
    except Exception:
        return "No logs yet."


def get_gpu_info():
    try:
        import torch

        if torch.cuda.is_available():
            props = torch.cuda.get_device_properties(0)
            mem_total = props.total_memory / (1024**3)
            mem_used = torch.cuda.memory_allocated(0) / (1024**3)
            mem_reserved = torch.cuda.memory_reserved(0) / (1024**3)
            return (
                f"GPU: {props.name}\n"
                f"VRAM: {mem_total:.1f} GB total | {mem_used:.1f} GB used | {mem_reserved:.1f} GB reserved\n"
                f"CUDA: {torch.version.cuda}\n"
                f"Compute Capability: {props.major}.{props.minor}"
            )
        return "No GPU detected"
    except Exception as e:
        return f"GPU info unavailable: {e}"


# ---------------------------------------------------------------------------
# Training launch
# ---------------------------------------------------------------------------


def start_training(
    method,
    dataset_choice,
    custom_dataset,
    hub_model_id,
    max_samples,
    num_epochs,
    learning_rate,
    lora_r,
    lora_alpha,
    batch_size,
    grad_accum,
    max_seq_length,
    system_prompt,
):
    global training_thread, training_active

    if training_active:
        return "⚠️ Training is already in progress! Wait for it to finish or restart the Space."

    if not hub_model_id.strip():
        return (
            "❌ Hub Model ID is required (e.g., your-username/qwen3-coder-uncensored)"
        )

    if not os.environ.get("HF_TOKEN"):
        return (
            "❌ HF_TOKEN secret not set! Go to Space Settings → Secrets → Add HF_TOKEN"
        )

    # Handle custom dataset upload
    custom_path = None
    if custom_dataset is not None:
        custom_path = (
            custom_dataset.name
            if hasattr(custom_dataset, "name")
            else str(custom_dataset)
        )

    max_samples_int = int(max_samples) if max_samples and int(max_samples) > 0 else None

    def run_training():
        global training_active
        training_active = True
        try:
            from train import train as run_train, abliterate

            if method == "QLoRA Fine-Tuning":
                run_train(
                    dataset_choice=dataset_choice,
                    hub_model_id=hub_model_id.strip(),
                    max_samples=max_samples_int,
                    custom_dataset_path=custom_path,
                    num_epochs=int(num_epochs),
                    learning_rate=float(learning_rate),
                    lora_r=int(lora_r),
                    lora_alpha=int(lora_alpha),
                    batch_size=int(batch_size),
                    grad_accum=int(grad_accum),
                    max_seq_length=int(max_seq_length),
                    system_prompt=system_prompt,
                )
            elif method == "Abliteration (No Training)":
                abliterate(hub_model_id=hub_model_id.strip())
        except Exception as e:
            status_data = {
                "status": "error",
                "detail": str(e),
                "progress": 0.0,
                "metrics": {},
            }
            Path(STATUS_FILE).write_text(json.dumps(status_data))
        finally:
            training_active = False

    training_thread = threading.Thread(target=run_training, daemon=True)
    training_thread.start()

    return "🚀 Training launched! Monitor progress below."


def start_merge(hub_model_id_merge):
    global training_active

    if training_active:
        return "⚠️ Another process is running."

    if not hub_model_id_merge.strip():
        return "❌ Hub Model ID for merged model is required"

    def run_merge():
        global training_active
        training_active = True
        try:
            from scripts.merge_and_push import merge_and_push

            merge_and_push(hub_model_id=hub_model_id_merge.strip())
        except Exception as e:
            Path(STATUS_FILE).write_text(
                json.dumps(
                    {
                        "status": "error",
                        "detail": str(e),
                        "progress": 0.0,
                        "metrics": {},
                    }
                )
            )
        finally:
            training_active = False

    threading.Thread(target=run_merge, daemon=True).start()
    return "🔀 Merge started! This will take a while for an 80B model."


def poll_status():
    """Returns updated status for the UI."""
    s = read_status()
    status_emoji = {
        "idle": "⏸️",
        "initializing": "⚙️",
        "loading_model": "📥",
        "training": "🏋️",
        "saving": "💾",
        "saving_checkpoint": "💾",
        "pushing": "🚀",
        "merging": "🔀",
        "abliterating": "✂️",
        "completed": "✅",
        "error": "❌",
    }.get(s["status"], "❓")

    status_text = f"{status_emoji} **{s['status'].upper()}**: {s['detail']}"
    progress = s["progress"]

    metrics_text = ""
    if s.get("metrics"):
        m = s["metrics"]
        parts = []
        if "step" in m:
            parts.append(f"Step: {m['step']}/{m.get('total_steps', '?')}")
        if "epoch" in m:
            parts.append(f"Epoch: {m['epoch']}")
        if "loss" in m:
            parts.append(f"Loss: {m['loss']}")
        if "learning_rate" in m:
            parts.append(f"LR: {m['learning_rate']:.2e}")
        if "grad_norm" in m:
            parts.append(f"Grad Norm: {m['grad_norm']}")
        if "train_loss" in m:
            parts.append(f"Final Loss: {m['train_loss']}")
        if "train_runtime" in m:
            parts.append(f"Runtime: {m['train_runtime']}s")
        metrics_text = " | ".join(parts)

    logs = read_logs(50)
    gpu = get_gpu_info()

    return status_text, progress, metrics_text, logs, gpu


# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------


def build_ui():
    config = load_config()
    dataset_names = list(config["datasets"].keys())

    with gr.Blocks(
        title="Qwen3-Coder-Next Uncensored Fine-Tuner",
    ) as app:
        gr.Markdown("# Qwen3-Coder-Next Uncensored Fine-Tuner")
        gr.Markdown(
            "*QLoRA fine-tuning & abliteration for Qwen3-Coder-Next (80B MoE / 3B active)*",
        )

        with gr.Tabs():
            # ==================================================================
            # TAB 1: Training Configuration
            # ==================================================================
            with gr.Tab("🎯 Train"):
                with gr.Row():
                    with gr.Column(scale=1):
                        gr.Markdown("### Method")
                        method = gr.Radio(
                            ["QLoRA Fine-Tuning", "Abliteration (No Training)"],
                            value="QLoRA Fine-Tuning",
                            label="Uncensoring Method",
                        )

                        gr.Markdown("### Hub Settings")
                        hub_model_id = gr.Textbox(
                            label="Hub Model ID",
                            placeholder="your-username/qwen3-coder-uncensored",
                            info="Where to push the trained model on Hugging Face",
                        )

                        gr.Markdown("### Dataset")
                        dataset_choice = gr.Dropdown(
                            choices=dataset_names,
                            value=dataset_names[0],
                            label="Pre-built Dataset",
                            info="Choose an uncensored dataset",
                        )
                        custom_dataset = gr.File(
                            label="Or Upload Custom Dataset (JSON/CSV/Parquet)",
                            file_types=[".json", ".jsonl", ".csv", ".parquet"],
                        )
                        max_samples = gr.Number(
                            label="Max Samples (0 = use all)",
                            value=5000,
                            info="Limit dataset size for faster experiments",
                        )

                        gr.Markdown("### System Prompt")
                        system_prompt = gr.Textbox(
                            label="System Prompt",
                            value=config.get("system_prompt", ""),
                            lines=3,
                            info="Embedded in every training sample",
                        )

                    with gr.Column(scale=1):
                        gr.Markdown("### Training Hyperparameters")
                        num_epochs = gr.Slider(1, 10, value=2, step=1, label="Epochs")
                        learning_rate = gr.Number(label="Learning Rate", value=2e-4)
                        batch_size = gr.Slider(
                            1, 8, value=1, step=1, label="Batch Size per Device"
                        )
                        grad_accum = gr.Slider(
                            1, 64, value=8, step=1, label="Gradient Accumulation Steps"
                        )
                        max_seq_length = gr.Slider(
                            256, 4096, value=512, step=256, label="Max Sequence Length"
                        )

                        gr.Markdown("### LoRA Configuration")
                        lora_r = gr.Slider(
                            8, 128, value=16, step=8, label="LoRA Rank (r)"
                        )
                        lora_alpha = gr.Slider(
                            16, 256, value=32, step=16, label="LoRA Alpha"
                        )

                with gr.Row():
                    train_btn = gr.Button(
                        "🚀 Start Training", variant="primary", size="lg"
                    )
                    output_msg = gr.Textbox(label="Status", interactive=False)

                train_btn.click(
                    fn=start_training,
                    inputs=[
                        method,
                        dataset_choice,
                        custom_dataset,
                        hub_model_id,
                        max_samples,
                        num_epochs,
                        learning_rate,
                        lora_r,
                        lora_alpha,
                        batch_size,
                        grad_accum,
                        max_seq_length,
                        system_prompt,
                    ],
                    outputs=output_msg,
                )

            # ==================================================================
            # TAB 2: Monitoring
            # ==================================================================
            with gr.Tab("📊 Monitor"):
                with gr.Row():
                    status_text = gr.Markdown("⏸️ **IDLE**: Ready to start training")
                    refresh_btn = gr.Button("🔄 Refresh", size="sm")

                progress_bar = gr.Slider(
                    0, 1, value=0, label="Progress", interactive=False
                )
                metrics_display = gr.Textbox(
                    label="Training Metrics", interactive=False, lines=2
                )
                gpu_info = gr.Textbox(label="GPU Info", interactive=False, lines=4)
                log_display = gr.Textbox(
                    label="Training Logs (last 50 lines)", interactive=False, lines=20
                )

                refresh_btn.click(
                    fn=poll_status,
                    outputs=[
                        status_text,
                        progress_bar,
                        metrics_display,
                        log_display,
                        gpu_info,
                    ],
                )

                # Auto-refresh every 10 seconds using Timer
                timer = gr.Timer(value=10)
                timer.tick(
                    fn=poll_status,
                    outputs=[
                        status_text,
                        progress_bar,
                        metrics_display,
                        log_display,
                        gpu_info,
                    ],
                )

            # ==================================================================
            # TAB 3: Merge & Push
            # ==================================================================
            with gr.Tab("🔀 Merge LoRA"):
                gr.Markdown("""
                ### Merge LoRA Adapter into Base Model

                After training completes, use this to merge the LoRA adapter into the base model
                and push a standalone model to the Hub.

                **⚠️ Warning**: This loads the full 80B model in bfloat16. Requires ~160GB RAM.
                For the A100 80GB, this may require CPU offloading.
                """)

                hub_model_id_merge = gr.Textbox(
                    label="Hub Model ID for Merged Model",
                    placeholder="your-username/qwen3-coder-uncensored-merged",
                    info="This should be DIFFERENT from the adapter model ID",
                )

                merge_btn = gr.Button("🔀 Merge & Push", variant="primary")
                merge_output = gr.Textbox(label="Status", interactive=False)

                merge_btn.click(
                    fn=start_merge,
                    inputs=[hub_model_id_merge],
                    outputs=merge_output,
                )

            # ==================================================================
            # TAB 4: Help
            # ==================================================================
            with gr.Tab("❓ Help"):
                gr.Markdown("""
                ## Quick Start Guide

                ### 1. Set Your HF Token
                Go to **Space Settings → Secrets** and add:
                - `HF_TOKEN`: Your Hugging Face token with **write** access
                - `WANDB_API_KEY` (optional): For training metrics dashboard

                ### 2. Choose Your Method

                | Method | Pros | Cons |
                |--------|------|------|
                | **QLoRA Fine-Tuning** | More control, better results, uses dataset | Takes hours, needs GPU |
                | **Abliteration** | Fast (~30 min), no dataset needed | Less precise, may affect model quality |

                ### 3. QLoRA Fine-Tuning
                1. Select a dataset (or upload your own)
                2. Set your Hub Model ID (e.g., `your-username/qwen3-uncensored`)
                3. Adjust hyperparameters if needed
                4. Click **Start Training**
                5. Monitor progress in the **Monitor** tab
                6. When done, optionally merge LoRA in the **Merge** tab

                ### 4. Custom Dataset Format
                Your JSON/JSONL should have these fields:
                ```json
                {
                    "instruction": "User's question/request",
                    "output": "Uncensored response"
                }
                ```
                Also supported fields: `prompt`/`response`, `input`/`answer`, `user`/`assistant`

                ### 5. Recommended Settings
                - **First run**: Use `max_samples=1000` to test the pipeline quickly
                - **Full training**: Set `max_samples=0` (all data), `epochs=2-3`
                - **Better quality**: Increase `lora_r` to 128, `lora_alpha` to 256
                - **Faster training**: Decrease `max_seq_length` to 1024

                ### 6. After Training
                The LoRA adapter is automatically pushed to your Hub repo.
                You can:
                - **Use the adapter directly** with PEFT (lightweight)
                - **Merge into base model** using the Merge tab (standalone model)

                ### Using the Adapter
                ```python
                from transformers import AutoModelForCausalLM, AutoTokenizer
                from peft import PeftModel

                base = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-Coder-Next", device_map="auto")
                model = PeftModel.from_pretrained(base, "your-username/qwen3-uncensored")
                tokenizer = AutoTokenizer.from_pretrained("your-username/qwen3-uncensored")
                ```
                """)

    return app


if __name__ == "__main__":
    app = build_ui()
    app.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        theme=gr.themes.Soft(primary_hue="red", secondary_hue="orange"),
    )