Showing
1 changed file
with
55 additions
and
48 deletions
| 1 | -import pandas as pd # 用于数据处理 | ||
| 2 | -import numpy as np # 用于科学计算 | ||
| 3 | -import csv # 用于读取CSV文件 | ||
| 4 | -from snownlp import SnowNLP # 用于中文自然语言处理(此处未实际使用) | ||
| 5 | -from sklearn.feature_extraction.text import TfidfVectorizer # 用于文本特征提取 | ||
| 6 | -from sklearn.naive_bayes import MultinomialNB # 用于多项式朴素贝叶斯分类 | ||
| 7 | -from sklearn.model_selection import train_test_split # 用于划分训练集和测试集 | ||
| 8 | -from sklearn.metrics import accuracy_score # 用于计算模型准确度 | ||
| 9 | - | ||
| 10 | - | ||
| 11 | -def getSentiment_data(): | ||
| 12 | - # 从CSV文件中读取情感数据 | ||
| 13 | - sentiment_data = [] | ||
| 14 | - with open('./target.csv', 'r', encoding='utf8') as readerFile: | ||
| 15 | - reader = csv.reader(readerFile) | ||
| 16 | - for data in reader: | ||
| 17 | - sentiment_data.append(data) | ||
| 18 | - return sentiment_data | ||
| 19 | - | ||
| 20 | - | ||
| 21 | -ef | ||
| 22 | -model_train(): | ||
| 23 | -# 获取情感数据并转换为DataFrame | ||
| 24 | -sentiment_data = getSentiment_data() | ||
| 25 | -df = pd.DataFrame(sentiment_data, columns=['text', 'sentiment']) | ||
| 26 | - | ||
| 27 | -# 将数据集划分为训练集和测试集,测试集占20% | ||
| 28 | -train_data, test_data = train_test_split(df, test_size=0.2, random_state=42) | ||
| 29 | - | ||
| 30 | -# 初始化TfidfVectorizer,并对训练集和测试集进行文本特征提取 | ||
| 31 | -vectorize = TfidfVectorizer() | ||
| 32 | -X_train = vectorize.fit_transform(train_data['text']) | ||
| 33 | -y_train = train_data['sentiment'] | ||
| 34 | -X_test = vectorize.transform(test_data['text']) | ||
| 35 | -y_test = test_data['sentiment'] | ||
| 36 | - | ||
| 37 | -# 初始化多项式朴素贝叶斯分类器,并进行训练 | ||
| 38 | -classifier = MultinomialNB() | ||
| 39 | -classifier.fit(X_train, y_train) | ||
| 40 | - | ||
| 41 | -# 对测试集进行预测 | ||
| 42 | -y_pred = classifier.predict(X_test) | ||
| 43 | - | ||
| 44 | -# 计算模型准确度 | ||
| 45 | -accuracy = accuracy_score(y_test, y_pred) | ||
| 46 | - | ||
| 47 | -if __name__ == "__main__": | ||
| 48 | - model_train() # 训练模型并计算准确度 | 1 | +from snownlp import SnowNLP # 引入SnowNLP库,用于中文情感分析 |
| 2 | +import csv # 用于处理CSV文件的读写操作 | ||
| 3 | +import os # 用于操作系统相关功能 | ||
| 4 | +import sys | ||
| 5 | +import os | ||
| 6 | + | ||
| 7 | +# 获取当前文件的绝对路径 | ||
| 8 | +current_file_path = os.path.abspath(__file__) | ||
| 9 | + | ||
| 10 | +# 获取当前文件的父目录路径 | ||
| 11 | +parent_dir = os.path.dirname(current_file_path) | ||
| 12 | + | ||
| 13 | +# 获取父目录的父目录路径,也就是项目根目录 | ||
| 14 | +project_root_dir = os.path.dirname(parent_dir) | ||
| 15 | + | ||
| 16 | +# 将项目根目录添加到 Python 路径中 | ||
| 17 | +sys.path.append(project_root_dir) | ||
| 18 | + | ||
| 19 | +# 现在可以导入 utils 目录中的模块了 | ||
| 20 | +from utils.getPublicData import getAllCommentsData # 自定义函数,用于获取评论数据 | ||
| 21 | + | ||
| 22 | +def targetFile(): | ||
| 23 | + targetFile = 'target.csv' # 定义目标文件名称 | ||
| 24 | + commentsList = getAllCommentsData() # 获取所有评论数据 | ||
| 25 | + | ||
| 26 | + rateData = [] # 用于存储处理后的评论数据 | ||
| 27 | + good = 0 # 记录正面评论数量 | ||
| 28 | + bad = 0 # 记录负面评论数量 | ||
| 29 | + middle = 0 # 记录中性评论数量 | ||
| 30 | + | ||
| 31 | + # 遍历所有评论,进行情感分析 | ||
| 32 | + for index, i in enumerate(commentsList): # enumerate 是 Python 中的一个内置函数,它允许我们在遍历可迭代对象(如列表、元组或字符串)时同时获取元素的索引和值。 | ||
| 33 | + # |articleId|created_at | likes_counts | region | content| authorName | authorGender | authorAddress | authorAvatar | ||
| 34 | + value = SnowNLP(i[4]).sentiments # 对评论内容进行情感分析 | ||
| 35 | + if value > 0.5: # 如果情感值大于0.5,判定为正面评论 | ||
| 36 | + good += 1 | ||
| 37 | + rateData.append([i[4], '正面']) | ||
| 38 | + elif value == 0.5: # 如果情感值等于0.5,判定为中性评论 | ||
| 39 | + middle += 1 | ||
| 40 | + rateData.append([i[4], '中性']) | ||
| 41 | + elif value < 0.5: # 如果情感值小于0.5,判定为负面评论 | ||
| 42 | + bad += 1 | ||
| 43 | + rateData.append([i[4], '负面']) | ||
| 44 | + | ||
| 45 | + # 将处理后的评论数据写入目标文件 | ||
| 46 | + for i in rateData: | ||
| 47 | + with open(targetFile, 'a+', encoding='utf8', newline='') as f: | ||
| 48 | + writer = csv.writer(f) | ||
| 49 | + writer.writerow(i) # 将每条数据写入CSV文件 | ||
| 50 | + | ||
| 51 | +def main(): | ||
| 52 | + targetFile() # 调用targetFile函数进行数据处理 | ||
| 53 | + | ||
| 54 | +if __name__ == '__main__': | ||
| 55 | + main() # 运行主函数 |
-
Please register or login to post a comment