| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- """
- Merge LoRA adapter into base model and push to Hugging Face Hub.
- Run this AFTER training completes to create a standalone model.
- """
- import os
- import sys
- import json
- import yaml
- import torch
- import logging
- from pathlib import Path
- from transformers import AutoModelForCausalLM, AutoTokenizer
- from peft import PeftModel
- from huggingface_hub import HfApi
- logging.basicConfig(
- level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
- )
- logger = logging.getLogger(__name__)
- STATUS_FILE = "/home/user/training_status.json"
- def write_status(status: str, detail: str = "", progress: float = 0.0):
- data = {"status": status, "detail": detail, "progress": progress, "metrics": {}}
- Path(STATUS_FILE).write_text(json.dumps(data))
- def merge_and_push(
- adapter_path: str = "/home/user/output/final_adapter",
- hub_model_id: str = "",
- push_to_hub: bool = True,
- ):
- """
- Load the base model, merge the LoRA adapter, and optionally push to Hub.
- WARNING: This requires significant RAM/VRAM because the full model must be loaded.
- For the 80B MoE model, you'll need ~160GB RAM or ~80GB VRAM to merge in bf16.
- """
- hf_token = os.environ.get("HF_TOKEN")
- if not hf_token:
- raise ValueError("HF_TOKEN environment variable is required")
- with open("config.yaml") as f:
- config = yaml.safe_load(f)
- model_name = config["model"]["name"]
- # -----------------------------------------------------------------------
- # 1. Load base model in bf16
- # -----------------------------------------------------------------------
- write_status("merging", "Loading base model in bfloat16...", 0.1)
- logger.info(f"Loading base model: {model_name}")
- model = AutoModelForCausalLM.from_pretrained(
- model_name,
- torch_dtype=torch.bfloat16,
- device_map="auto",
- trust_remote_code=True,
- token=hf_token,
- )
- tokenizer = AutoTokenizer.from_pretrained(
- model_name,
- trust_remote_code=True,
- token=hf_token,
- )
- # -----------------------------------------------------------------------
- # 2. Load and merge LoRA adapter
- # -----------------------------------------------------------------------
- write_status("merging", "Merging LoRA adapter into base model...", 0.4)
- logger.info(f"Loading adapter from: {adapter_path}")
- model = PeftModel.from_pretrained(model, adapter_path)
- model = model.merge_and_unload()
- logger.info("LoRA adapter merged successfully")
- # -----------------------------------------------------------------------
- # 3. Save merged model
- # -----------------------------------------------------------------------
- output_path = "/home/user/merged"
- write_status("merging", "Saving merged model...", 0.6)
- logger.info(f"Saving merged model to: {output_path}")
- model.save_pretrained(output_path, safe_serialization=True, max_shard_size="4GB")
- tokenizer.save_pretrained(output_path)
- # -----------------------------------------------------------------------
- # 4. Push to Hub
- # -----------------------------------------------------------------------
- if push_to_hub and hub_model_id:
- write_status("pushing", f"Pushing merged model to {hub_model_id}...", 0.8)
- logger.info(f"Pushing to: {hub_model_id}")
- api = HfApi(token=hf_token)
- api.create_repo(hub_model_id, exist_ok=True)
- api.upload_folder(
- folder_path=output_path,
- repo_id=hub_model_id,
- commit_message="Upload merged Qwen3-Coder-Next uncensored (LoRA merged)",
- )
- logger.info(f"Model pushed to https://huggingface.co/{hub_model_id}")
- # Create model card
- model_card = f"""---
- license: apache-2.0
- base_model: {model_name}
- tags:
- - qwen3
- - uncensored
- - fine-tuned
- - qlora
- - merged
- ---
- # {hub_model_id.split("/")[-1]}
- Fine-tuned and uncensored version of [{model_name}](https://huggingface.co/{model_name}).
- ## Training Details
- - **Method**: QLoRA 4-bit fine-tuning
- - **Base Model**: {model_name} (80B MoE / 3B active parameters)
- - **LoRA Rank**: {config["lora"]["r"]}
- - **LoRA Alpha**: {config["lora"]["lora_alpha"]}
- - **Target Modules**: {", ".join(config["lora"]["target_modules"])}
- - **Epochs**: {config["training"]["num_train_epochs"]}
- - **Learning Rate**: {config["training"]["learning_rate"]}
- - **Max Seq Length**: {config["training"]["max_seq_length"]}
- ## Usage
- ```python
- from transformers import AutoModelForCausalLM, AutoTokenizer
- model = AutoModelForCausalLM.from_pretrained("{hub_model_id}", torch_dtype="auto", device_map="auto")
- tokenizer = AutoTokenizer.from_pretrained("{hub_model_id}")
- messages = [{{"role": "user", "content": "Your prompt here"}}]
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
- inputs = tokenizer([text], return_tensors="pt").to(model.device)
- outputs = model.generate(**inputs, max_new_tokens=4096)
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
- ```
- """
- api.upload_file(
- path_or_fileobj=model_card.encode(),
- path_in_repo="README.md",
- repo_id=hub_model_id,
- commit_message="Add model card",
- )
- write_status("completed", f"Merge complete! Model at {output_path}", 1.0)
- logger.info("Done!")
- return output_path
- if __name__ == "__main__":
- import argparse
- parser = argparse.ArgumentParser()
- parser.add_argument("--adapter-path", default="/home/user/output/final_adapter")
- parser.add_argument(
- "--hub-model-id",
- required=True,
- help="e.g. your-username/qwen3-coder-uncensored-merged",
- )
- parser.add_argument("--no-push", action="store_true")
- args = parser.parse_args()
- merge_and_push(
- adapter_path=args.adapter_path,
- hub_model_id=args.hub_model_id,
- push_to_hub=not args.no_push,
- )
|