juanboy

舆情预测函数定义

@@ -39,7 +39,7 @@ def topicdefine(): @@ -39,7 +39,7 @@ def topicdefine():
39 for x in articleList: 39 for x in articleList:
40 label_article.append((x[0],predict_topic(x[5]))) 40 label_article.append((x[0],predict_topic(x[5])))
41 for x in commentList: 41 for x in commentList:
42 - label_comments.append((x[5],predict_topic(x[4]))) 42 + label_comments.append((x[8],predict_topic(x[4])))
43 return label_article,label_comments 43 return label_article,label_comments
44 44
45 # 更新数据库 45 # 更新数据库
@@ -53,7 +53,7 @@ def update_data(): @@ -53,7 +53,7 @@ def update_data():
53 params = [str(label),str(id)] 53 params = [str(label),str(id)]
54 query(sql, params) 54 query(sql, params)
55 for row in label_comments: 55 for row in label_comments:
56 - label, id = row 56 + id, label = row
57 sql = "UPDATE comments SET label = %s WHERE authorName = %s" 57 sql = "UPDATE comments SET label = %s WHERE authorName = %s"
58 params = [str(label),str(id)] 58 params = [str(label),str(id)]
59 query(sql, params) 59 query(sql, params)
@@ -128,12 +128,12 @@ def getIPCharByCommentsRegion(): @@ -128,12 +128,12 @@ def getIPCharByCommentsRegion():
128 def getCommentCharDataOne(): 128 def getCommentCharDataOne():
129 xData = [] 129 xData = []
130 rangeNum = 20 130 rangeNum = 20
131 - for item in range(1,100): 131 + for item in range(100):
132 xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1))) 132 xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
133 yData = [0 for x in range(len(xData))] 133 yData = [0 for x in range(len(xData))]
134 for comment in commentList: 134 for comment in commentList:
135 - for item in range(99):  
136 - if int(comment[2]) < rangeNum * (item + 2): 135 + for item in range(100):
  136 + if int(comment[2]) < rangeNum * (item + 1):
137 yData[item] += 1 137 yData[item] += 1
138 break 138 break
139 return xData,yData 139 return xData,yData
  1 +import numpy as np
  2 +import datetime
  3 +import matplotlib.pyplot as plt
  4 +
  5 +
  6 +def datetime_to_number(date: str): # 格式化日期转换为 integer
  7 + date_number = datetime.datetime.strptime(date, "%Y-%m-%d")
  8 + base_number = datetime.datetime.strptime("2024-1-1", "%Y-%m-%d")
  9 + return (date_number - base_number).days
  10 +
  11 +
  12 +def predict_future_values(data):
  13 + # 提取并排序日期
  14 + sorted_dates = sorted(data.keys(), key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))
  15 + sorted_data = {k: data[k] for k in sorted_dates}
  16 +
  17 + # 将日期转换为整数并提取相应的值
  18 + xs = np.array([datetime_to_number(date) for date in sorted_data.keys()])
  19 + ys = np.array([data[date] for date in sorted_data.keys()])
  20 +
  21 + # 拟合线性回归模型
  22 + fit = np.polyfit(xs, ys, 1)
  23 + fn = np.poly1d(fit)
  24 +
  25 + # 获取最新日期,并生成未来三天的日期
  26 + latest_date = sorted_dates[-1]
  27 + latest_date_obj = datetime.datetime.strptime(latest_date, "%Y-%m-%d")
  28 + future_dates = [(latest_date_obj + datetime.timedelta(days=i)).strftime("%Y-%m-%d") for i in range(1, 6)]
  29 +
  30 + # 预测未来日期的值
  31 + predictions = {}
  32 + for date in future_dates:
  33 + date_num = datetime_to_number(date)
  34 + if int(fn(date_num))<=0:
  35 + predictions[date] = 0
  36 + else:
  37 + predictions[date] = int(fn(date_num))
  38 +
  39 + return predictions
  40 +
  41 +
  42 +if __name__ == '__main__':
  43 + data = {'2024-06-15': 1, '2024-06-18': 1, '2024-06-22': 1, '2024-06-23': 1, '2024-07-01': 3, '2024-07-02': 4, '2024-07-03': 4, '2024-07-04': 14}
  44 + predictions = predict_future_values(data)
  45 + print(predictions)
  46 + # for date, value in predictions.items():
  47 + # print(f'{date} PREDICTION: {value}')
  1 +from utils.getPublicData import *
  2 +articleList = getAllArticleData()
  3 +commentList = getAllCommentsData()
  4 +import csv
  5 +import os
  6 +import datetime
  7 +def getTopicByArticle():# 返回文章内容的话题字典
  8 + articleTopicDic = {}
  9 + for i in articleList:
  10 + if i[14] != None:
  11 + if i[14] in articleTopicDic.keys():
  12 + articleTopicDic[i[14]] += 1
  13 + else:
  14 + articleTopicDic[i[14]] = 1
  15 + resultData = []
  16 + for key,value in articleTopicDic.items():
  17 + resultData.append({
  18 + 'name':key,
  19 + 'value':value
  20 + })
  21 + return resultData
  22 +
  23 +def getTopicByComments():# 返回评论内容的话题字典
  24 + commentsTopicDic = {}
  25 + for i in commentList:
  26 + if i[9] != None:
  27 + if i[9] in commentsTopicDic:
  28 + commentsTopicDic[i[9]] += 1
  29 + else:
  30 + commentsTopicDic[i[9]] = 1
  31 + resultData = []
  32 + for key,value in commentsTopicDic.items():
  33 + resultData.append({
  34 + 'name':key,
  35 + 'value':value
  36 + })
  37 + return resultData
  38 +
  39 +def mergeTopics(article_topics, comment_topics):# 合并话题
  40 + merged_dict = {}
  41 + for topic in article_topics + comment_topics:
  42 + if topic['name'] in merged_dict:
  43 + merged_dict[topic['name']] += topic['value']
  44 + else:
  45 + merged_dict[topic['name']] = topic['value']
  46 + merged_list = [{'name': key, 'value': value} for key, value in merged_dict.items()]
  47 + return merged_list
  48 +def getTopicData():
  49 + # 读取合并文件 merge.csv # 取前十个话题
  50 + top_10_topics = pd.read_csv('./merged_topics.csv').head(10)
  51 + # 获取话题名称和对应的值
  52 + xData = top_10_topics['name'].tolist()
  53 + yData = top_10_topics['value'].tolist()
  54 + return xData, yData
  55 +
  56 +def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
  57 + createdAt = {}
  58 + for i in articleList:
  59 + if i[14]==topic:
  60 + if i[7] in createdAt.keys():
  61 + createdAt[i[7]] += 1
  62 + else:
  63 + createdAt[i[7]] = 1
  64 + for i in commentList:
  65 + if i[9]==topic:
  66 + if i[1] in createdAt.keys():
  67 + createdAt[i[1]] += 1
  68 + else:
  69 + createdAt[i[1]] = 1
  70 + sorted_data = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))}
  71 + return topic,sorted_data
  72 + # return topic,list(createdAt.keys()),list(createdAt.values())
  73 + # return topic, createdAt.items()
  74 +
  75 +def writeTopicsToCSV(topics, file_name):
  76 + # 检查文件是否存在,如果存在则附加写入,否则新建一个
  77 + file_exists = os.path.isfile(file_name)
  78 + # 按值的降序排序
  79 + sorted_topics = sorted(topics, key=lambda x: x['value'], reverse=True)
  80 + with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
  81 + fieldnames = ['name', 'value']
  82 + writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
  83 + # 如果文件不存在,则写入表头
  84 + if not file_exists:
  85 + writer.writeheader()
  86 + # 写入数据
  87 + for topic in sorted_topics:
  88 + writer.writerow(topic)
  89 +if __name__ == '__main__':
  90 + # 将话题数据写入 CSV 文件
  91 + # merged_topics = mergeTopics(getTopicByArticle(), getTopicByComments())
  92 + # writeTopicsToCSV(merged_topics, 'merged_topics.csv')
  93 + print(getTopicPageCreatedAtCharData("生活"))