predict.py
11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# -*- coding: utf-8 -*-
"""
统一的情感分析预测程序
支持加载所有模型进行情感预测
"""
import argparse
import os
import re
from typing import Dict, Tuple, List
import warnings
warnings.filterwarnings("ignore")
# 导入所有模型类
from bayes_train import BayesModel
from svm_train import SVMModel
from xgboost_train import XGBoostModel
from lstm_train import LSTMModel
from bert_train import BertModel_Custom
from utils import processing
class SentimentPredictor:
"""情感分析预测器"""
def __init__(self):
self.models = {}
self.available_models = {
'bayes': BayesModel,
'svm': SVMModel,
'xgboost': XGBoostModel,
'lstm': LSTMModel,
'bert': BertModel_Custom
}
def load_model(self, model_type: str, model_path: str, **kwargs) -> None:
"""加载指定类型的模型
Args:
model_type: 模型类型 ('bayes', 'svm', 'xgboost', 'lstm', 'bert')
model_path: 模型文件路径
**kwargs: 其他参数(如BERT的预训练模型路径)
"""
if model_type not in self.available_models:
raise ValueError(f"不支持的模型类型: {model_type}")
if not os.path.exists(model_path):
print(f"警告: 模型文件不存在: {model_path}")
return
print(f"加载 {model_type.upper()} 模型...")
try:
if model_type == 'bert':
# BERT需要额外的预训练模型路径
bert_path = kwargs.get('bert_path', './model/chinese_wwm_pytorch')
model = BertModel_Custom(bert_path)
else:
model = self.available_models[model_type]()
model.load_model(model_path)
self.models[model_type] = model
print(f"{model_type.upper()} 模型加载成功")
except Exception as e:
print(f"加载 {model_type.upper()} 模型失败: {e}")
def load_all_models(self, model_dir: str = './model', bert_path: str = './model/chinese_wwm_pytorch') -> None:
"""加载所有可用的模型
Args:
model_dir: 模型文件目录
bert_path: BERT预训练模型路径
"""
model_files = {
'bayes': os.path.join(model_dir, 'bayes_model.pkl'),
'svm': os.path.join(model_dir, 'svm_model.pkl'),
'xgboost': os.path.join(model_dir, 'xgboost_model.pkl'),
'lstm': os.path.join(model_dir, 'lstm_model.pth'),
'bert': os.path.join(model_dir, 'bert_model.pth')
}
print("开始加载所有可用模型...")
for model_type, model_path in model_files.items():
self.load_model(model_type, model_path, bert_path=bert_path)
print(f"\n已加载 {len(self.models)} 个模型: {list(self.models.keys())}")
def predict_single(self, text: str, model_type: str = None) -> Dict[str, Tuple[int, float]]:
"""预测单条文本的情感
Args:
text: 待预测文本
model_type: 指定模型类型,如果为None则使用所有已加载的模型
Returns:
Dict[model_type, (prediction, confidence)]
"""
# 文本预处理
processed_text = processing(text)
if model_type:
if model_type not in self.models:
raise ValueError(f"模型 {model_type} 未加载")
prediction, confidence = self.models[model_type].predict_single(processed_text)
return {model_type: (prediction, confidence)}
# 使用所有模型预测
results = {}
for name, model in self.models.items():
try:
prediction, confidence = model.predict_single(processed_text)
results[name] = (prediction, confidence)
except Exception as e:
print(f"模型 {name} 预测失败: {e}")
results[name] = (0, 0.0)
return results
def predict_batch(self, texts: List[str], model_type: str = None) -> Dict[str, List[int]]:
"""批量预测文本情感
Args:
texts: 待预测文本列表
model_type: 指定模型类型,如果为None则使用所有已加载的模型
Returns:
Dict[model_type, predictions]
"""
# 文本预处理
processed_texts = [processing(text) for text in texts]
if model_type:
if model_type not in self.models:
raise ValueError(f"模型 {model_type} 未加载")
predictions = self.models[model_type].predict(processed_texts)
return {model_type: predictions}
# 使用所有模型预测
results = {}
for name, model in self.models.items():
try:
predictions = model.predict(processed_texts)
results[name] = predictions
except Exception as e:
print(f"模型 {name} 预测失败: {e}")
results[name] = [0] * len(texts)
return results
def ensemble_predict(self, text: str, weights: Dict[str, float] = None) -> Tuple[int, float]:
"""集成预测(多个模型投票)
Args:
text: 待预测文本
weights: 模型权重,如果为None则平均权重
Returns:
(prediction, confidence)
"""
if len(self.models) == 0:
raise ValueError("没有加载任何模型")
results = self.predict_single(text)
if weights is None:
weights = {name: 1.0 for name in results.keys()}
# 加权平均
total_weight = 0
weighted_prob = 0
for model_name, (pred, conf) in results.items():
if model_name in weights:
weight = weights[model_name]
prob = conf if pred == 1 else 1 - conf
weighted_prob += prob * weight
total_weight += weight
if total_weight == 0:
return 0, 0.5
final_prob = weighted_prob / total_weight
final_pred = int(final_prob > 0.5)
final_conf = final_prob if final_pred == 1 else 1 - final_prob
return final_pred, final_conf
def interactive_predict(self):
"""交互式预测模式"""
if len(self.models) == 0:
print("错误: 没有加载任何模型,请先加载模型")
return
print("\n" + "="*50)
print("="*50)
print(f"已加载模型: {', '.join(self.models.keys())}")
print("输入 'q' 退出程序")
print("输入 'models' 查看模型列表")
print("输入 'ensemble' 使用集成预测")
print("-"*50)
while True:
try:
text = input("\n请输入要分析的微博内容: ").strip()
if text.lower() == 'q':
print("👋 再见!")
break
if text.lower() == 'models':
print(f"已加载模型: {list(self.models.keys())}")
continue
if text.lower() == 'ensemble':
if len(self.models) > 1:
pred, conf = self.ensemble_predict(text)
sentiment = "😊 正面" if pred == 1 else "😞 负面"
print(f"\n🤖 集成预测结果:")
print(f" 情感倾向: {sentiment}")
print(f" 置信度: {conf:.4f}")
else:
print("❌ 集成预测需要至少2个模型")
continue
if not text:
print("❌ 请输入有效内容")
continue
# 预测
results = self.predict_single(text)
print(f"\n📝 原文: {text}")
print("🔍 预测结果:")
for model_name, (pred, conf) in results.items():
sentiment = "😊 正面" if pred == 1 else "😞 负面"
print(f" {model_name.upper():8}: {sentiment} (置信度: {conf:.4f})")
# 如果有多个模型,显示集成结果
if len(results) > 1:
ensemble_pred, ensemble_conf = self.ensemble_predict(text)
ensemble_sentiment = "😊 正面" if ensemble_pred == 1 else "😞 负面"
print(f" {'集成':8}: {ensemble_sentiment} (置信度: {ensemble_conf:.4f})")
except KeyboardInterrupt:
print("\n\n👋 程序被中断,再见!")
break
except Exception as e:
print(f"❌ 预测过程中出现错误: {e}")
def main():
"""主函数"""
parser = argparse.ArgumentParser(description='微博情感分析统一预测程序')
parser.add_argument('--model_dir', type=str, default='./model',
help='模型文件目录')
parser.add_argument('--bert_path', type=str, default='./model/chinese_wwm_pytorch',
help='BERT预训练模型路径')
parser.add_argument('--model_type', type=str, choices=['bayes', 'svm', 'xgboost', 'lstm', 'bert'],
help='指定单个模型类型进行预测')
parser.add_argument('--text', type=str,
help='直接预测指定文本')
parser.add_argument('--interactive', action='store_true', default=True,
help='交互式预测模式(默认)')
parser.add_argument('--ensemble', action='store_true',
help='使用集成预测')
args = parser.parse_args()
# 创建预测器
predictor = SentimentPredictor()
# 加载模型
if args.model_type:
# 加载指定模型
model_files = {
'bayes': 'bayes_model.pkl',
'svm': 'svm_model.pkl',
'xgboost': 'xgboost_model.pkl',
'lstm': 'lstm_model.pth',
'bert': 'bert_model.pth'
}
model_path = os.path.join(args.model_dir, model_files[args.model_type])
predictor.load_model(args.model_type, model_path, bert_path=args.bert_path)
else:
# 加载所有模型
predictor.load_all_models(args.model_dir, args.bert_path)
# 如果指定了文本,直接预测
if args.text:
if args.ensemble and len(predictor.models) > 1:
pred, conf = predictor.ensemble_predict(args.text)
sentiment = "正面" if pred == 1 else "负面"
print(f"文本: {args.text}")
print(f"集成预测: {sentiment} (置信度: {conf:.4f})")
else:
results = predictor.predict_single(args.text, args.model_type)
print(f"文本: {args.text}")
for model_name, (pred, conf) in results.items():
sentiment = "正面" if pred == 1 else "负面"
print(f"{model_name.upper()}: {sentiment} (置信度: {conf:.4f})")
elif args.interactive:
# 交互式模式
predictor.interactive_predict()
if __name__ == "__main__":
main()