Tuning Qwen3 fine-tuning hyperparameters

戒酒的李白
Commit d2e0cc7daf182767d5789d02d736b32c704e5886 d2e0cc7d 1 parent ad27791e
Showing 3 changed files with 25 additions and 15 deletions
WeiboSentiment_SmallQwen/qwen3_embedding_universal.py
WeiboSentiment_SmallQwen/qwen3_lora_universal.py
WeiboSentiment_SmallQwen/readme.md
--- a/WeiboSentiment_SmallQwen/qwen3_embedding_universal.py
View file @d2e0cc7
+++ b/WeiboSentiment_SmallQwen/qwen3_embedding_universal.py
View file @d2e0cc7
@@ -347,9 +347,9 @@ def main():
         print("Qwen3-Embedding模型训练")
         print("="*40)
         print("可用模型大小:")
-         print("  1. 0.6B - 轻量级，训练快速，显存需求约2GB")
-         print("  2. 4B  - 中等规模，性能均衡，显存需求约8GB") 
-         print("  3. 8B  - 大规模，性能最佳，显存需求约16GB")
+         print("  1. 0.6B - 轻量级，训练快速，显存需求约4GB")
+         print("  2. 4B  - 中等规模，性能均衡，显存需求约16GB") 
+         print("  3. 8B  - 大规模，性能最佳，显存需求约32GB")
         
         while True:
             choice = input("\n请选择模型大小 (1/2/3): ").strip()
--- a/WeiboSentiment_SmallQwen/qwen3_lora_universal.py
View file @d2e0cc7
+++ b/WeiboSentiment_SmallQwen/qwen3_lora_universal.py
View file @d2e0cc7
@@ -155,7 +155,7 @@ class Qwen3LoRAUniversal(BaseQwenModel):
         tokenized = self.tokenizer(
             examples["text"],
             truncation=True,
-             padding=False,
+             padding="max_length",
             max_length=512,
             return_tensors=None
         )
@@ -178,9 +178,15 @@ class Qwen3LoRAUniversal(BaseQwenModel):
         
         self.lora_model = get_peft_model(self.base_model, lora_config)
         
+         # 统计参数
+         total_params = sum(p.numel() for p in self.lora_model.parameters())
+         trainable_params = sum(p.numel() for p in self.lora_model.parameters() if p.requires_grad)
+         
         print(f"LoRA配置完成 (r={lora_r}, alpha={lora_alpha})")
-         print(f"可训练参数: {self.lora_model.num_parameters():,}")
-         print(f"参数比例: {self.lora_model.num_parameters() / self.lora_model.base_model.num_parameters() * 100:.2f}%")
+         print(f"总参数: {total_params:,}")
+         print(f"可训练参数: {trainable_params:,}")
+         print(f"可训练参数比例: {trainable_params / total_params * 100:.2f}%")
+         self.lora_model.print_trainable_parameters()  # PEFT库自带的参数统计
         
         return lora_config
     
@@ -360,7 +366,7 @@ def main():
     parser.add_argument('--batch_size', type=int, help='批大小（可选，使用推荐值）')
     parser.add_argument('--learning_rate', type=float, help='学习率（可选，使用推荐值）')
     parser.add_argument('--lora_r', type=int, help='LoRA秩（可选，使用推荐值）')
-     parser.add_argument('--max_samples', type=int, default=1000, help='最大训练样本数')
+     parser.add_argument('--max_samples', type=int, default=0, help='最大训练样本数（0表示使用全部数据）')
     parser.add_argument('--eval_only', action='store_true', help='仅评估模式')
     
     args = parser.parse_args()
@@ -370,9 +376,9 @@ def main():
         print("Qwen3-LoRA模型训练")
         print("="*40)
         print("可用模型大小:")
-         print("  1. 0.6B - 轻量级，训练快速，显存需求约4GB")
-         print("  2. 4B  - 中等规模，性能均衡，显存需求约16GB") 
-         print("  3. 8B  - 大规模，性能最佳，显存需求约32GB")
+         print("  1. 0.6B - 轻量级，训练快速，显存需求约8GB")
+         print("  2. 4B  - 中等规模，性能均衡，显存需求约32GB") 
+         print("  3. 8B  - 大规模，性能最佳，显存需求约64GB")
         print("\n注意: LoRA微调比Embedding方法需要更多显存")
         
         while True:
@@ -414,9 +420,13 @@ def main():
         # 训练模式
         train_data, test_data = BaseQwenModel.load_data(args.train_path, args.test_path)
         
-         # 由于LoRA训练资源消耗大，使用部分数据
+         # 训练数据处理
+         if args.max_samples > 0:
             train_subset = train_data[:args.max_samples]
             print(f"使用 {len(train_subset)} 条数据进行LoRA训练")
+         else:
+             train_subset = train_data
+             print(f"使用全部 {len(train_subset)} 条数据进行LoRA训练")
         
         # 准备训练参数
         train_kwargs = {'num_epochs': args.epochs}
--- a/WeiboSentiment_SmallQwen/readme.md
View file @d2e0cc7
+++ b/WeiboSentiment_SmallQwen/readme.md
View file @d2e0cc7
@@ -10,7 +10,7 @@ qwen 0.6B模型加线性分类器，做特定领域的文本分类和序列标
 
 在经过了一些相关的调研之后，我觉的将Qwen3的一些小参数模型用在本系统中是一个不错的选择。
 
- 虽然这个参数在LLM时代算小，但作为个人开发者计算资源有限，微调他们还是实属不易。
+ 虽然这个参数在LLM时代算小，但作为个人开发者计算资源有限，微调他们还是实属不易，在一张A100上训练了整整四天，求求star了
 
 ## 问题探究
 
@@ -87,9 +87,9 @@ python predict_universal.py --load_all --text "这个电影太棒了"
 ### 注意事项
 
 1. **显存要求**：
-    - 0.6B: 最低2GB显存
-    - 4B: 最低8GB显存  
-    - 8B: 最低16GB显存
+    - 0.6B: 最低4GB显存
+    - 4B: 最低16GB显存  
+    - 8B: 最低32GB显存
 
 2. **数据格式**：每行格式为`文本内容\t标签`，标签为0（负面）或1（正面）