4 meses atrás · 693efc83be
--- a/config.yaml
+++ b/config.yaml
@@ -1,31 +1,26 @@
 
															 # =============================================================================
														
 
															-# Qwen3-Coder-Next Uncensored Fine-Tuning Configuration
														
 
															+# Qwen3 Uncensored Fine-Tuning Configuration (Unsloth)
														
 
															 # =============================================================================
														
 
															-# Model
														
 
															+# Model — Unsloth handles quantization internally
														
 
															 model:
														
 
															-  name: "Qwen/Qwen3-Coder-Next"
														
 
															+  name: "unsloth/Qwen3-30B-A3B"
														
 
															   trust_remote_code: true
														
 
															-  torch_dtype: "bfloat16"
														
 
															-
														
 
															-# Quantization (QLoRA 4-bit)
														
 
															-quantization:
														
 
															-  load_in_4bit: true
														
 
															-  bnb_4bit_quant_type: "nf4"
														
 
															-  bnb_4bit_compute_dtype: "bfloat16"
														
 
															-  bnb_4bit_use_double_quant: true
														
 
															 # LoRA Configuration
														
 
															 lora:
														
 
															-  r: 64
														
 
															-  lora_alpha: 128
														
 
															-  # Use "all-linear" to auto-discover all linear layers in the hybrid
														
 
															-  # DeltaNet + Gated Attention + MoE architecture (safer than listing
														
 
															-  # individual module names which may differ from standard Qwen).
														
 
															-  target_modules: "all-linear"
														
 
															-  lora_dropout: 0.05
														
 
															+  r: 16
														
 
															+  lora_alpha: 32
														
 
															+  target_modules:
														
 
															+    - q_proj
														
 
															+    - k_proj
														
 
															+    - v_proj
														
 
															+    - o_proj
														
 
															+    - gate_proj
														
 
															+    - up_proj
														
 
															+    - down_proj
														
 
															+  lora_dropout: 0
														
 
															   bias: "none"
														
 
															-  task_type: "CAUSAL_LM"
														
 
															 # Dataset options (pick one or provide custom)
														
 
															 datasets:
														
@@ -70,7 +65,7 @@ training:
 
															   output_dir: "/home/user/output"
														
 
															   num_train_epochs: 2
														
 
															   per_device_train_batch_size: 1
														
 
															-  gradient_accumulation_steps: 16
														
 
															+  gradient_accumulation_steps: 8
														
 
															   learning_rate: 0.0002
														
 
															   lr_scheduler_type: "cosine"
														
 
															   warmup_ratio: 0.05
														
@@ -82,16 +77,8 @@ training:
 
															   save_strategy: "steps"
														
 
															   save_steps: 50
														
 
															   save_total_limit: 3
														
 
															-  max_seq_length: 2048
														
 
															-  gradient_checkpointing: true
														
 
															-  gradient_checkpointing_kwargs:
														
 
															-    use_reentrant: false
														
 
															-  optim: "paged_adamw_8bit"
														
 
															-  report_to: "wandb"
														
 
															+  max_seq_length: 512
														
 
															   seed: 42
														
 
															-  dataloader_num_workers: 4
														
 
															-  dataloader_pin_memory: true
														
 
															-  packing: true
														
 
															 # Hub push settings
														
 
															 hub: