The old emotion recognition model has been replaced with the new model_pro, and …

…the results have been integrated into the project.

The old emotion recognition model has been replaced with the new model_pro, and …
…the results have been integrated into the project.
戒酒的李白
Commit 826de6184d134a7764007ff64dce0869243c44d5 826de618 1 parent a9108a90
Showing 3 changed files with 181 additions and 61 deletions
BCAT_front/predict.py
utils/getEchartsData.py
views/page/page.py
--- a/BCAT_front/predict.py
View file @826de61
+++ b/BCAT_front/predict.py
View file @826de61
@@ -6,12 +6,12 @@ from tqdm import tqdm
 import os
 import sys
 import json
- import chardet  # 导入 chardet
+ import chardet
 
- # 导入您定义的模型和模块
- from MHA import MultiHeadAttentionLayer
- from classifier import FinalClassifier
- from BERT_CTM import BERT_CTM_Model
+ # 导入改进版模型的组件
+ from model_pro.MHA import MultiHeadAttentionLayer
+ from model_pro.classifier import FinalClassifier
+ from model_pro.BERT_CTM import BERT_CTM_Model
 
 # 设置设备
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -30,7 +30,7 @@ def detect_file_encoding(file_path, num_bytes=10000):
     result = chardet.detect(rawdata)
     encoding = result['encoding']
     confidence = result['confidence']
-     print(f"Detected encoding: {encoding} with confidence {confidence}")
+     print(f"检测到的编码: {encoding}, 置信度: {confidence}")
     return encoding
 
 
@@ -42,8 +42,6 @@ def get_bert_ctm_embeddings(texts, bert_model_path, ctm_tokenizer_path, n_compon
         n_components=n_components,
         num_epochs=num_epochs
     )
-     # 加载已保存的CTM模型
-     bert_ctm_model.load_model()
     # 获取嵌入
     embeddings = bert_ctm_model.get_bert_embeddings(texts)
     return embeddings
@@ -60,15 +58,11 @@ def predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_
             num_classes=2):
     try:
         # 加载模型
-         # 修改这里，设置 weights_only=True 以消除 FutureWarning
-         checkpoint = torch.load(model_save_path, map_location=device, weights_only=False)
-         classifier_model = FinalClassifier(input_dim=768, num_classes=num_classes)
-         classifier_model.load_state_dict(checkpoint['classifier_model_state_dict'])
-         classifier_model.to(device)
+         print("加载模型...")
+         classifier_model = torch.load(model_save_path, map_location=device)
         classifier_model.eval()
 
         attention_model = MultiHeadAttentionLayer(embed_size=768, num_heads=8)
-         attention_model.load_state_dict(checkpoint['attention_model_state_dict'])
         attention_model.to(device)
         attention_model.eval()
 
@@ -76,11 +70,12 @@ def predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_
         encoding = detect_file_encoding(input_data_path)
 
         # 读取输入数据
+         print("读取输入数据...")
         data = pd.read_csv(input_data_path, encoding=encoding)
         texts = data['TEXT'].tolist()
 
         # 生成嵌入
-         print("Generating embeddings...")
+         print("生成文本嵌入...")
         embeddings = get_bert_ctm_embeddings(texts, bert_model_path, ctm_tokenizer_path)
 
         # 准备DataLoader
@@ -88,63 +83,89 @@ def predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_
 
         # 存储预测结果
         all_predictions = []
+         all_probabilities = []
 
+         print("开始预测...")
         with torch.no_grad():
-             for batch in tqdm(data_loader, desc="Predicting"):
+             for batch in tqdm(data_loader, desc="预测进度"):
                 batch_x = batch[0].to(device)
                 batch_x = torch.mean(batch_x, dim=1)
+                 
+                 # 使用注意力机制
                 attention_output = attention_model(batch_x, batch_x, batch_x)
+                 
+                 # 获取分类结果
                 outputs = classifier_model(attention_output)
                 outputs = torch.mean(outputs, dim=1)
+                 
+                 # 获取预测概率
+                 probabilities = torch.softmax(outputs, dim=1)
+                 
+                 # 获取预测标签
                 _, predicted = torch.max(outputs, 1)
+                 
                 all_predictions.extend(predicted.cpu().numpy())
+                 all_probabilities.extend(probabilities.cpu().numpy())
 
-         # 保存预测结果
+         # 添加预测结果和概率到数据框
         data['Predicted_Label'] = all_predictions
+         data['Confidence'] = [prob[pred] for prob, pred in zip(all_probabilities, all_predictions)]
+ 
+         # 保存预测结果
         data.to_csv(output_path, index=False, encoding='utf-8')
-         print(f"Predictions saved to {output_path}")
+         print(f"预测结果已保存到 {output_path}")
 
         # 统计标签的个数和占比
         label_counts = data['Predicted_Label'].value_counts()
         total_count = len(data)
-         stats = {}
+         stats = {
+             '统计信息': {
+                 '总样本数': total_count,
+                 '各类别统计': {}
+             }
+         }
+         
         for label, count in label_counts.items():
             label_name = "良好" if label == 0 else "不良"
             percentage = (count / total_count) * 100
-             stats[label_name] = {
-                 'count': count,
-                 'percentage': f"{percentage:.2f}%"
+             confidence_mean = data[data['Predicted_Label'] == label]['Confidence'].mean()
+             
+             stats['统计信息']['各类别统计'][label_name] = {
+                 '数量': int(count),
+                 '占比': f"{percentage:.2f}%",
+                 '平均置信度': f"{confidence_mean:.2f}"
             }
-             print(f"Label: {label_name}, Count: {count}, Percentage: {percentage:.2f}%")
+             print(f"标签: {label_name}, 数量: {count}, 占比: {percentage:.2f}%, 平均置信度: {confidence_mean:.2f}")
 
         # 将统计信息保存到 JSON 文件
         with open(stats_output_path, 'w', encoding='utf-8') as f:
-             json.dump(stats, f, ensure_ascii=False)
+             json.dump(stats, f, ensure_ascii=False, indent=4)
 
-         return True  # 成功执行
+         return True
     except Exception as e:
-         print(f"Error during prediction: {e}")
-         return False  # 执行失败
+         print(f"预测过程中出现错误: {e}")
+         return False
 
 
 if __name__ == "__main__":
     if len(sys.argv) != 3:
-         print("Usage: python using_example.py <input_data_path> <stats_output_path>")
+         print("使用方法: python predict.py <input_data_path> <stats_output_path>")
         sys.exit(1)
 
     input_data_path = sys.argv[1]
     stats_output_path = sys.argv[2]
+     
     # 定义路径
-     model_save_path = 'BCAT/final_model.pt'
-     output_path = 'BCAT/predictions.csv'  # 保存预测结果的文件
-     bert_model_path = 'BCAT/bert_model'
-     ctm_tokenizer_path = 'BCAT/sentence_bert_model'
+     model_save_path = 'model_pro/final_model.pt'
+     output_path = 'model_pro/predictions.csv'
+     bert_model_path = 'model_pro/bert_model'
+     ctm_tokenizer_path = 'model_pro/sentence_bert_model'
 
     # 执行预测
     success = predict(model_save_path, input_data_path, output_path, bert_model_path, ctm_tokenizer_path,
                      stats_output_path)
 
     if success:
-         sys.exit(0)  # 成功
+         sys.exit(0)
     else:
-         sys.exit(1)  # 失败
+         sys.exit(1)
--- a/utils/getEchartsData.py
View file @826de61
+++ b/utils/getEchartsData.py
View file @826de61
 from utils.getPublicData import *  # Import utility functions for data retrieval
 from utils.mynlp import SnowNLP  # Import SnowNLP for sentiment analysis
 from collections import Counter  # Import Counter for counting occurrences
+ import torch
+ from model_pro.MHA import MultiHeadAttentionLayer
+ from model_pro.classifier import FinalClassifier
+ from model_pro.BERT_CTM import BERT_CTM_Model
 
 articleList = getAllArticleData()  # Retrieve all article data
 commentList = getAllCommentsData()  # Retrieve all comment data
 
+ # 设置设备
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ 
+ # 加载模型（全局变量，避免重复加载）
+ model_save_path = 'model_pro/final_model.pt'
+ bert_model_path = 'model_pro/bert_model'
+ ctm_tokenizer_path = 'model_pro/sentence_bert_model'
+ 
+ try:
+     classifier_model = torch.load(model_save_path, map_location=device)
+     classifier_model.eval()
+     attention_model = MultiHeadAttentionLayer(embed_size=768, num_heads=8)
+     attention_model.to(device)
+     attention_model.eval()
+     bert_ctm_model = BERT_CTM_Model(
+         bert_model_path=bert_model_path,
+         ctm_tokenizer_path=ctm_tokenizer_path
+     )
+ except Exception as e:
+     print(f"模型加载失败: {e}")
+ 
+ def predict_sentiment(texts):
+     """使用改进版模型预测情感"""
+     try:
+         # 获取文本嵌入
+         embeddings = bert_ctm_model.get_bert_embeddings(texts)
+         
+         # 转换为tensor
+         batch_x = torch.tensor(embeddings, dtype=torch.float32).to(device)
+         batch_x = torch.mean(batch_x, dim=1)
+         
+         with torch.no_grad():
+             # 使用注意力机制
+             attention_output = attention_model(batch_x, batch_x, batch_x)
+             # 获取分类结果
+             outputs = classifier_model(attention_output)
+             outputs = torch.mean(outputs, dim=1)
+             # 获取预测标签
+             _, predicted = torch.max(outputs, 1)
+             
+         return predicted.cpu().numpy()
+     except Exception as e:
+         print(f"预测过程中出现错误: {e}")
+         return None
+ 
 def getTypeList():
     # Return a list of unique article types
     return list(set([x[8] for x in articleList]))
@@ -119,32 +168,32 @@ def getYuQingCharDataOne():
     return X, Y, biedata
 
 def getYuQingCharDataTwo():
-     # Analyze sentiment of comments and articles
-     comment_sentiments = []
-     for comment in commentList:
-         emotionValue = SnowNLP(comment[4]).sentiments
-         if emotionValue > 0.4:
-             comment_sentiments.append('正面')
-         elif emotionValue < 0.2:
-             comment_sentiments.append('负面')
+     # 分析评论和文章的情感
+     comment_texts = [comment[4] for comment in commentList]
+     article_texts = [article[5] for article in articleList]
+     
+     # 预测评论情感
+     comment_predictions = predict_sentiment(comment_texts)
+     if comment_predictions is not None:
+         comment_sentiments = ['良好' if pred == 0 else '不良' for pred in comment_predictions]
     else:
-             comment_sentiments.append('中性')
-     comment_counts = Counter(comment_sentiments)
+         comment_sentiments = []
     
-     article_sentiments = []
-     for article in articleList:
-         emotionValue = SnowNLP(article[5]).sentiments
-         if emotionValue > 0.4:
-             article_sentiments.append('正面')
-         elif emotionValue < 0.2:
-             article_sentiments.append('负面')
+     # 预测文章情感
+     article_predictions = predict_sentiment(article_texts)
+     if article_predictions is not None:
+         article_sentiments = ['良好' if pred == 0 else '不良' for pred in article_predictions]
     else:
-             article_sentiments.append('中性')
+         article_sentiments = []
+     
+     # 统计结果
+     comment_counts = Counter(comment_sentiments)
     article_counts = Counter(article_sentiments)
     
-     X = ['正面', '中性', '负面']
+     X = ['良好', '不良']
     biedata1 = [{'name': x, 'value': comment_counts.get(x, 0)} for x in X]
     biedata2 = [{'name': x, 'value': article_counts.get(x, 0)} for x in X]
+     
     return biedata1, biedata2
 
 def getYuQingCharDataThree():
--- a/views/page/page.py
View file @826de61
+++ b/views/page/page.py
View file @826de61
@@ -8,12 +8,61 @@ from utils.getEchartsData import *
 from utils.getTopicPageData import *
 from utils.yuqingpredict import *
 from utils.logger import app_logger as logging
+ import torch
+ from model_pro.MHA import MultiHeadAttentionLayer
+ from model_pro.classifier import FinalClassifier
+ from model_pro.BERT_CTM import BERT_CTM_Model
 
 pb = Blueprint('page',
                __name__,
                url_prefix='/page',
                template_folder='templates')
 
+ # 设置设备
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ 
+ # 加载模型（全局变量，避免重复加载）
+ model_save_path = 'model_pro/final_model.pt'
+ bert_model_path = 'model_pro/bert_model'
+ ctm_tokenizer_path = 'model_pro/sentence_bert_model'
+ 
+ try:
+     classifier_model = torch.load(model_save_path, map_location=device)
+     classifier_model.eval()
+     attention_model = MultiHeadAttentionLayer(embed_size=768, num_heads=8)
+     attention_model.to(device)
+     attention_model.eval()
+     bert_ctm_model = BERT_CTM_Model(
+         bert_model_path=bert_model_path,
+         ctm_tokenizer_path=ctm_tokenizer_path
+     )
+ except Exception as e:
+     print(f"模型加载失败: {e}")
+ 
+ def predict_sentiment(text):
+     """使用改进版模型预测单个文本的情感"""
+     try:
+         # 获取文本嵌入
+         embeddings = bert_ctm_model.get_bert_embeddings([text])
+         
+         # 转换为tensor
+         batch_x = torch.tensor(embeddings, dtype=torch.float32).to(device)
+         batch_x = torch.mean(batch_x, dim=1)
+         
+         with torch.no_grad():
+             # 使用注意力机制
+             attention_output = attention_model(batch_x, batch_x, batch_x)
+             # 获取分类结果
+             outputs = classifier_model(attention_output)
+             outputs = torch.mean(outputs, dim=1)
+             # 获取预测标签和概率
+             probabilities = torch.softmax(outputs, dim=1)
+             _, predicted = torch.max(outputs, 1)
+             
+         return predicted.item(), probabilities[0][predicted.item()].item()
+     except Exception as e:
+         print(f"预测过程中出现错误: {e}")
+         return None, None
 
 @pb.route('/home')
 def home():
@@ -172,14 +221,15 @@ def yuqingpredict():
         defaultTopic = request.args.get('Topic')
     TopicLen = getTopicLen(defaultTopic)
     X, Y = getTopicCreatedAtandpredictData(defaultTopic)
-     sentences = ''
-     value = SnowNLP(defaultTopic).sentiments
-     if value == 0.5:
-         sentences = '中性'
-     elif value > 0.5:
-         sentences = '正面'
-     elif value < 0.5:
-         sentences = '负面'
+     
+     # 使用改进版模型进行情感预测
+     predicted_label, confidence = predict_sentiment(defaultTopic)
+     if predicted_label is not None:
+         sentences = '良好' if predicted_label == 0 else '不良'
+         sentences = f"{sentences} (置信度: {confidence:.2f})"
+     else:
+         sentences = '预测失败'
+     
     comments = getCommentFilterDataTopic(defaultTopic)
     return render_template('yuqingpredict.html',
                            username=username,