Переглянути джерело

Update config for unsloth/Qwen3-30B-A3B

Sameric 4 місяців тому
батько
коміт
693efc83be
1 змінених файлів з 16 додано та 29 видалено
  1. 16 29
      config.yaml

+ 16 - 29
config.yaml

@@ -1,31 +1,26 @@
 # =============================================================================
-# Qwen3-Coder-Next Uncensored Fine-Tuning Configuration
+# Qwen3 Uncensored Fine-Tuning Configuration (Unsloth)
 # =============================================================================
 
-# Model
+# Model — Unsloth handles quantization internally
 model:
-  name: "Qwen/Qwen3-Coder-Next"
+  name: "unsloth/Qwen3-30B-A3B"
   trust_remote_code: true
-  torch_dtype: "bfloat16"
-
-# Quantization (QLoRA 4-bit)
-quantization:
-  load_in_4bit: true
-  bnb_4bit_quant_type: "nf4"
-  bnb_4bit_compute_dtype: "bfloat16"
-  bnb_4bit_use_double_quant: true
 
 # LoRA Configuration
 lora:
-  r: 64
-  lora_alpha: 128
-  # Use "all-linear" to auto-discover all linear layers in the hybrid
-  # DeltaNet + Gated Attention + MoE architecture (safer than listing
-  # individual module names which may differ from standard Qwen).
-  target_modules: "all-linear"
-  lora_dropout: 0.05
+  r: 16
+  lora_alpha: 32
+  target_modules:
+    - q_proj
+    - k_proj
+    - v_proj
+    - o_proj
+    - gate_proj
+    - up_proj
+    - down_proj
+  lora_dropout: 0
   bias: "none"
-  task_type: "CAUSAL_LM"
 
 # Dataset options (pick one or provide custom)
 datasets:
@@ -70,7 +65,7 @@ training:
   output_dir: "/home/user/output"
   num_train_epochs: 2
   per_device_train_batch_size: 1
-  gradient_accumulation_steps: 16
+  gradient_accumulation_steps: 8
   learning_rate: 0.0002
   lr_scheduler_type: "cosine"
   warmup_ratio: 0.05
@@ -82,16 +77,8 @@ training:
   save_strategy: "steps"
   save_steps: 50
   save_total_limit: 3
-  max_seq_length: 2048
-  gradient_checkpointing: true
-  gradient_checkpointing_kwargs:
-    use_reentrant: false
-  optim: "paged_adamw_8bit"
-  report_to: "wandb"
+  max_seq_length: 512
   seed: 42
-  dataloader_num_workers: 4
-  dataloader_pin_memory: true
-  packing: true
 
 # Hub push settings
 hub: