redhong-xy

hong

  1 +import pandas as pd # 用于数据处理
  2 +import numpy as np # 用于科学计算
  3 +import csv # 用于读取CSV文件
  4 +from snownlp import SnowNLP # 用于中文自然语言处理(此处未实际使用)
  5 +from sklearn.feature_extraction.text import TfidfVectorizer # 用于文本特征提取
  6 +from sklearn.naive_bayes import MultinomialNB # 用于多项式朴素贝叶斯分类
  7 +from sklearn.model_selection import train_test_split # 用于划分训练集和测试集
  8 +from sklearn.metrics import accuracy_score # 用于计算模型准确度
  9 +
  10 +def getSentiment_data():
  11 + # 从CSV文件中读取情感数据
  12 + sentiment_data = []
  13 + with open('./target.csv', 'r', encoding='utf8') as readerFile:
  14 + reader = csv.reader(readerFile)
  15 + for data in reader:
  16 + sentiment_data.append(data)
  17 + return sentiment_data
  18 +
  19 +def model_train():
  20 + # 获取情感数据并转换为DataFrame
  21 + sentiment_data = getSentiment_data()
  22 + df = pd.DataFrame(sentiment_data, columns=['text', 'sentiment'])
  23 +
  24 + # 将数据集划分为训练集和测试集,测试集占20%
  25 + train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
  26 +
  27 + # 初始化TfidfVectorizer,并对训练集和测试集进行文本特征提取
  28 + vectorize = TfidfVectorizer()
  29 + X_train = vectorize.fit_transform(train_data['text'])
  30 + y_train = train_data['sentiment']
  31 + X_test = vectorize.transform(test_data['text'])
  32 + y_test = test_data['sentiment']
  33 +
  34 + # 初始化多项式朴素贝叶斯分类器,并进行训练
  35 + classifier = MultinomialNB()
  36 + classifier.fit(X_train, y_train)
  37 +
  38 + # 对测试集进行预测
  39 + y_pred = classifier.predict(X_test)
  40 +
  41 + # 计算模型准确度
  42 + accuracy = accuracy_score(y_test, y_pred)
  43 +
  44 +if __name__ == "__main__":
  45 + model_train() # 训练模型并计算准确度