juanboy

舆情预测函数定义

... ... @@ -39,7 +39,7 @@ def topicdefine():
for x in articleList:
label_article.append((x[0],predict_topic(x[5])))
for x in commentList:
label_comments.append((x[5],predict_topic(x[4])))
label_comments.append((x[8],predict_topic(x[4])))
return label_article,label_comments
# 更新数据库
... ... @@ -53,7 +53,7 @@ def update_data():
params = [str(label),str(id)]
query(sql, params)
for row in label_comments:
label, id = row
id, label = row
sql = "UPDATE comments SET label = %s WHERE authorName = %s"
params = [str(label),str(id)]
query(sql, params)
... ...
... ... @@ -128,12 +128,12 @@ def getIPCharByCommentsRegion():
def getCommentCharDataOne():
xData = []
rangeNum = 20
for item in range(1,100):
for item in range(100):
xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
yData = [0 for x in range(len(xData))]
for comment in commentList:
for item in range(99):
if int(comment[2]) < rangeNum * (item + 2):
for item in range(100):
if int(comment[2]) < rangeNum * (item + 1):
yData[item] += 1
break
return xData,yData
... ...
import numpy as np
import datetime
import matplotlib.pyplot as plt
def datetime_to_number(date: str): # 格式化日期转换为 integer
date_number = datetime.datetime.strptime(date, "%Y-%m-%d")
base_number = datetime.datetime.strptime("2024-1-1", "%Y-%m-%d")
return (date_number - base_number).days
def predict_future_values(data):
# 提取并排序日期
sorted_dates = sorted(data.keys(), key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))
sorted_data = {k: data[k] for k in sorted_dates}
# 将日期转换为整数并提取相应的值
xs = np.array([datetime_to_number(date) for date in sorted_data.keys()])
ys = np.array([data[date] for date in sorted_data.keys()])
# 拟合线性回归模型
fit = np.polyfit(xs, ys, 1)
fn = np.poly1d(fit)
# 获取最新日期,并生成未来三天的日期
latest_date = sorted_dates[-1]
latest_date_obj = datetime.datetime.strptime(latest_date, "%Y-%m-%d")
future_dates = [(latest_date_obj + datetime.timedelta(days=i)).strftime("%Y-%m-%d") for i in range(1, 6)]
# 预测未来日期的值
predictions = {}
for date in future_dates:
date_num = datetime_to_number(date)
if int(fn(date_num))<=0:
predictions[date] = 0
else:
predictions[date] = int(fn(date_num))
return predictions
if __name__ == '__main__':
data = {'2024-06-15': 1, '2024-06-18': 1, '2024-06-22': 1, '2024-06-23': 1, '2024-07-01': 3, '2024-07-02': 4, '2024-07-03': 4, '2024-07-04': 14}
predictions = predict_future_values(data)
print(predictions)
# for date, value in predictions.items():
# print(f'{date} PREDICTION: {value}')
... ...
from utils.getPublicData import *
articleList = getAllArticleData()
commentList = getAllCommentsData()
import csv
import os
import datetime
def getTopicByArticle():# 返回文章内容的话题字典
articleTopicDic = {}
for i in articleList:
if i[14] != None:
if i[14] in articleTopicDic.keys():
articleTopicDic[i[14]] += 1
else:
articleTopicDic[i[14]] = 1
resultData = []
for key,value in articleTopicDic.items():
resultData.append({
'name':key,
'value':value
})
return resultData
def getTopicByComments():# 返回评论内容的话题字典
commentsTopicDic = {}
for i in commentList:
if i[9] != None:
if i[9] in commentsTopicDic:
commentsTopicDic[i[9]] += 1
else:
commentsTopicDic[i[9]] = 1
resultData = []
for key,value in commentsTopicDic.items():
resultData.append({
'name':key,
'value':value
})
return resultData
def mergeTopics(article_topics, comment_topics):# 合并话题
merged_dict = {}
for topic in article_topics + comment_topics:
if topic['name'] in merged_dict:
merged_dict[topic['name']] += topic['value']
else:
merged_dict[topic['name']] = topic['value']
merged_list = [{'name': key, 'value': value} for key, value in merged_dict.items()]
return merged_list
def getTopicData():
# 读取合并文件 merge.csv # 取前十个话题
top_10_topics = pd.read_csv('./merged_topics.csv').head(10)
# 获取话题名称和对应的值
xData = top_10_topics['name'].tolist()
yData = top_10_topics['value'].tolist()
return xData, yData
def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
createdAt = {}
for i in articleList:
if i[14]==topic:
if i[7] in createdAt.keys():
createdAt[i[7]] += 1
else:
createdAt[i[7]] = 1
for i in commentList:
if i[9]==topic:
if i[1] in createdAt.keys():
createdAt[i[1]] += 1
else:
createdAt[i[1]] = 1
sorted_data = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))}
return topic,sorted_data
# return topic,list(createdAt.keys()),list(createdAt.values())
# return topic, createdAt.items()
def writeTopicsToCSV(topics, file_name):
# 检查文件是否存在,如果存在则附加写入,否则新建一个
file_exists = os.path.isfile(file_name)
# 按值的降序排序
sorted_topics = sorted(topics, key=lambda x: x['value'], reverse=True)
with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['name', 'value']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# 如果文件不存在,则写入表头
if not file_exists:
writer.writeheader()
# 写入数据
for topic in sorted_topics:
writer.writerow(topic)
if __name__ == '__main__':
# 将话题数据写入 CSV 文件
# merged_topics = mergeTopics(getTopicByArticle(), getTopicByComments())
# writeTopicsToCSV(merged_topics, 'merged_topics.csv')
print(getTopicPageCreatedAtCharData("生活"))
... ...