Merge branch 'main' of https://github.com/666ghj/Weibo_PublicOpinion_AnalysisSystem
Showing
1 changed file
with
48 additions
and
0 deletions
| 1 | +import pandas as pd # 用于数据处理 | ||
| 2 | +import numpy as np # 用于科学计算 | ||
| 3 | +import csv # 用于读取CSV文件 | ||
| 4 | +from snownlp import SnowNLP # 用于中文自然语言处理(此处未实际使用) | ||
| 5 | +from sklearn.feature_extraction.text import TfidfVectorizer # 用于文本特征提取 | ||
| 6 | +from sklearn.naive_bayes import MultinomialNB # 用于多项式朴素贝叶斯分类 | ||
| 7 | +from sklearn.model_selection import train_test_split # 用于划分训练集和测试集 | ||
| 8 | +from sklearn.metrics import accuracy_score # 用于计算模型准确度 | ||
| 9 | + | ||
| 10 | + | ||
| 11 | +def getSentiment_data(): | ||
| 12 | + # 从CSV文件中读取情感数据 | ||
| 13 | + sentiment_data = [] | ||
| 14 | + with open('./target.csv', 'r', encoding='utf8') as readerFile: | ||
| 15 | + reader = csv.reader(readerFile) | ||
| 16 | + for data in reader: | ||
| 17 | + sentiment_data.append(data) | ||
| 18 | + return sentiment_data | ||
| 19 | + | ||
| 20 | + | ||
| 21 | +ef | ||
| 22 | +model_train(): | ||
| 23 | +# 获取情感数据并转换为DataFrame | ||
| 24 | +sentiment_data = getSentiment_data() | ||
| 25 | +df = pd.DataFrame(sentiment_data, columns=['text', 'sentiment']) | ||
| 26 | + | ||
| 27 | +# 将数据集划分为训练集和测试集,测试集占20% | ||
| 28 | +train_data, test_data = train_test_split(df, test_size=0.2, random_state=42) | ||
| 29 | + | ||
| 30 | +# 初始化TfidfVectorizer,并对训练集和测试集进行文本特征提取 | ||
| 31 | +vectorize = TfidfVectorizer() | ||
| 32 | +X_train = vectorize.fit_transform(train_data['text']) | ||
| 33 | +y_train = train_data['sentiment'] | ||
| 34 | +X_test = vectorize.transform(test_data['text']) | ||
| 35 | +y_test = test_data['sentiment'] | ||
| 36 | + | ||
| 37 | +# 初始化多项式朴素贝叶斯分类器,并进行训练 | ||
| 38 | +classifier = MultinomialNB() | ||
| 39 | +classifier.fit(X_train, y_train) | ||
| 40 | + | ||
| 41 | +# 对测试集进行预测 | ||
| 42 | +y_pred = classifier.predict(X_test) | ||
| 43 | + | ||
| 44 | +# 计算模型准确度 | ||
| 45 | +accuracy = accuracy_score(y_test, y_pred) | ||
| 46 | + | ||
| 47 | +if __name__ == "__main__": | ||
| 48 | + model_train() # 训练模型并计算准确度 |
-
Please register or login to post a comment