redhong-xy

hong

  1 +import pandas as pd # 用于数据处理
  2 +import numpy as np # 用于科学计算
  3 +import csv # 用于读取CSV文件
  4 +from snownlp import SnowNLP # 用于中文自然语言处理(此处未实际使用)
  5 +from sklearn.feature_extraction.text import TfidfVectorizer # 用于文本特征提取
  6 +from sklearn.naive_bayes import MultinomialNB # 用于多项式朴素贝叶斯分类
  7 +from sklearn.model_selection import train_test_split # 用于划分训练集和测试集
  8 +from sklearn.metrics import accuracy_score # 用于计算模型准确度
  9 +
  10 +
  11 +def getSentiment_data():
  12 + # 从CSV文件中读取情感数据
  13 + sentiment_data = []
  14 + with open('./target.csv', 'r', encoding='utf8') as readerFile:
  15 + reader = csv.reader(readerFile)
  16 + for data in reader:
  17 + sentiment_data.append(data)
  18 + return sentiment_data
  19 +
  20 +
  21 +ef
  22 +model_train():
  23 +# 获取情感数据并转换为DataFrame
  24 +sentiment_data = getSentiment_data()
  25 +df = pd.DataFrame(sentiment_data, columns=['text', 'sentiment'])
  26 +
  27 +# 将数据集划分为训练集和测试集,测试集占20%
  28 +train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
  29 +
  30 +# 初始化TfidfVectorizer,并对训练集和测试集进行文本特征提取
  31 +vectorize = TfidfVectorizer()
  32 +X_train = vectorize.fit_transform(train_data['text'])
  33 +y_train = train_data['sentiment']
  34 +X_test = vectorize.transform(test_data['text'])
  35 +y_test = test_data['sentiment']
  36 +
  37 +# 初始化多项式朴素贝叶斯分类器,并进行训练
  38 +classifier = MultinomialNB()
  39 +classifier.fit(X_train, y_train)
  40 +
  41 +# 对测试集进行预测
  42 +y_pred = classifier.predict(X_test)
  43 +
  44 +# 计算模型准确度
  45 +accuracy = accuracy_score(y_test, y_pred)
  46 +
  47 +if __name__ == "__main__":
  48 + model_train() # 训练模型并计算准确度