戒酒的李白

Updated how the fine-tuned BERT model is stored.

@@ -181,6 +181,7 @@ WeiboSentiment_Finetuned/GPT2-Lora/models/ @@ -181,6 +181,7 @@ WeiboSentiment_Finetuned/GPT2-Lora/models/
181 WeiboSentiment_Finetuned/GPT2-AdapterTuning/models/ 181 WeiboSentiment_Finetuned/GPT2-AdapterTuning/models/
182 WeiboSentiment_Finetuned/BertChinese-Lora/models/ 182 WeiboSentiment_Finetuned/BertChinese-Lora/models/
183 WeiboSentiment_LLM/models/ 183 WeiboSentiment_LLM/models/
  184 +WeiboSentiment_Finetuned/BertChinese-Lora/model/
184 185
185 # LoRA 和 Adapter 权重 186 # LoRA 和 Adapter 权重
186 */adapter_model.safetensors 187 */adapter_model.safetensors
@@ -64,8 +64,15 @@ print("正面情感" if prediction == 1 else "负面情感") @@ -64,8 +64,15 @@ print("正面情感" if prediction == 1 else "负面情感")
64 - `predict_pipeline.py`: 使用pipeline方式的预测程序 64 - `predict_pipeline.py`: 使用pipeline方式的预测程序
65 - `README.md`: 使用说明 65 - `README.md`: 使用说明
66 66
  67 +## 模型存储
  68 +
  69 +- 首次运行时会自动下载模型到当前目录的 `model` 文件夹
  70 +- 后续运行会直接从本地加载,无需重复下载
  71 +- 模型大小约400MB,首次下载需要网络连接
  72 +
67 ## 注意事项 73 ## 注意事项
68 74
69 - 首次运行时会自动下载模型,需要网络连接 75 - 首次运行时会自动下载模型,需要网络连接
70 -- 模型大小约400MB,下载可能需要一些时间  
71 -- 支持GPU加速,会自动检测可用设备  
  76 +- 模型会保存到当前目录,方便后续使用
  77 +- 支持GPU加速,会自动检测可用设备
  78 +- 如需清理模型文件,删除 `model` 文件夹即可
@@ -16,11 +16,25 @@ def main(): @@ -16,11 +16,25 @@ def main():
16 16
17 # 使用HuggingFace预训练模型 17 # 使用HuggingFace预训练模型
18 model_name = "wsqstar/GISchat-weibo-100k-fine-tuned-bert" 18 model_name = "wsqstar/GISchat-weibo-100k-fine-tuned-bert"
  19 + local_model_path = "./model"
19 20
20 try: 21 try:
21 - # 加载模型和分词器  
22 - tokenizer = AutoTokenizer.from_pretrained(model_name)  
23 - model = AutoModelForSequenceClassification.from_pretrained(model_name) 22 + # 检查本地是否已有模型
  23 + import os
  24 + if os.path.exists(local_model_path):
  25 + print("从本地加载模型...")
  26 + tokenizer = AutoTokenizer.from_pretrained(local_model_path)
  27 + model = AutoModelForSequenceClassification.from_pretrained(local_model_path)
  28 + else:
  29 + print("首次使用,正在下载模型到本地...")
  30 + # 下载并保存到本地
  31 + tokenizer = AutoTokenizer.from_pretrained(model_name)
  32 + model = AutoModelForSequenceClassification.from_pretrained(model_name)
  33 +
  34 + # 保存到本地
  35 + tokenizer.save_pretrained(local_model_path)
  36 + model.save_pretrained(local_model_path)
  37 + print(f"模型已保存到: {local_model_path}")
24 38
25 # 设置设备 39 # 设置设备
26 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 40 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -15,13 +15,36 @@ def main(): @@ -15,13 +15,36 @@ def main():
15 15
16 # 使用pipeline方式 - 更简单 16 # 使用pipeline方式 - 更简单
17 model_name = "wsqstar/GISchat-weibo-100k-fine-tuned-bert" 17 model_name = "wsqstar/GISchat-weibo-100k-fine-tuned-bert"
  18 + local_model_path = "./model"
18 19
19 try: 20 try:
20 - classifier = pipeline(  
21 - "text-classification",  
22 - model=model_name,  
23 - return_all_scores=True  
24 - ) 21 + # 检查本地是否已有模型
  22 + import os
  23 + if os.path.exists(local_model_path):
  24 + print("从本地加载模型...")
  25 + classifier = pipeline(
  26 + "text-classification",
  27 + model=local_model_path,
  28 + return_all_scores=True
  29 + )
  30 + else:
  31 + print("首次使用,正在下载模型到本地...")
  32 + # 先下载模型
  33 + from transformers import AutoTokenizer, AutoModelForSequenceClassification
  34 + tokenizer = AutoTokenizer.from_pretrained(model_name)
  35 + model = AutoModelForSequenceClassification.from_pretrained(model_name)
  36 +
  37 + # 保存到本地
  38 + tokenizer.save_pretrained(local_model_path)
  39 + model.save_pretrained(local_model_path)
  40 + print(f"模型已保存到: {local_model_path}")
  41 +
  42 + # 使用本地模型创建pipeline
  43 + classifier = pipeline(
  44 + "text-classification",
  45 + model=local_model_path,
  46 + return_all_scores=True
  47 + )
25 print("模型加载成功!") 48 print("模型加载成功!")
26 49
27 except Exception as e: 50 except Exception as e: