models_config.py
1.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
"""
Qwen3模型配置文件
定义不同规模的模型参数和配置
"""
# Qwen3模型配置
QWEN3_MODELS = {
"0.6B": {
"base_model": "Qwen/Qwen3-0.6B",
"embedding_model": "Qwen/Qwen3-Embedding-0.6B",
"embedding_dim": 1024,
"max_length": 32768,
"recommended_batch_size": 32,
"recommended_lr": 1e-3,
"lora_r": 16,
"lora_alpha": 32
},
"4B": {
"base_model": "Qwen/Qwen3-4B",
"embedding_model": "Qwen/Qwen3-Embedding-4B",
"embedding_dim": 2560,
"max_length": 32768,
"recommended_batch_size": 16,
"recommended_lr": 5e-4,
"lora_r": 32,
"lora_alpha": 64
},
"8B": {
"base_model": "Qwen/Qwen3-8B",
"embedding_model": "Qwen/Qwen3-Embedding-8B",
"embedding_dim": 4096,
"max_length": 32768,
"recommended_batch_size": 8,
"recommended_lr": 2e-4,
"lora_r": 64,
"lora_alpha": 128
}
}
# 模型文件路径配置
MODEL_PATHS = {
"embedding": {
"0.6B": "./models/qwen3_embedding_0.6b_sentiment.pth",
"4B": "./models/qwen3_embedding_4b_sentiment.pth",
"8B": "./models/qwen3_embedding_8b_sentiment.pth"
},
"lora": {
"0.6B": "./models/qwen3_lora_0.6b_final",
"4B": "./models/qwen3_lora_4b_final",
"8B": "./models/qwen3_lora_8b_final"
}
}