Showing
4 changed files
with
145 additions
and
5 deletions
| @@ -39,7 +39,7 @@ def topicdefine(): | @@ -39,7 +39,7 @@ def topicdefine(): | ||
| 39 | for x in articleList: | 39 | for x in articleList: |
| 40 | label_article.append((x[0],predict_topic(x[5]))) | 40 | label_article.append((x[0],predict_topic(x[5]))) |
| 41 | for x in commentList: | 41 | for x in commentList: |
| 42 | - label_comments.append((x[5],predict_topic(x[4]))) | 42 | + label_comments.append((x[8],predict_topic(x[4]))) |
| 43 | return label_article,label_comments | 43 | return label_article,label_comments |
| 44 | 44 | ||
| 45 | # 更新数据库 | 45 | # 更新数据库 |
| @@ -53,7 +53,7 @@ def update_data(): | @@ -53,7 +53,7 @@ def update_data(): | ||
| 53 | params = [str(label),str(id)] | 53 | params = [str(label),str(id)] |
| 54 | query(sql, params) | 54 | query(sql, params) |
| 55 | for row in label_comments: | 55 | for row in label_comments: |
| 56 | - label, id = row | 56 | + id, label = row |
| 57 | sql = "UPDATE comments SET label = %s WHERE authorName = %s" | 57 | sql = "UPDATE comments SET label = %s WHERE authorName = %s" |
| 58 | params = [str(label),str(id)] | 58 | params = [str(label),str(id)] |
| 59 | query(sql, params) | 59 | query(sql, params) |
| @@ -128,12 +128,12 @@ def getIPCharByCommentsRegion(): | @@ -128,12 +128,12 @@ def getIPCharByCommentsRegion(): | ||
| 128 | def getCommentCharDataOne(): | 128 | def getCommentCharDataOne(): |
| 129 | xData = [] | 129 | xData = [] |
| 130 | rangeNum = 20 | 130 | rangeNum = 20 |
| 131 | - for item in range(1,100): | 131 | + for item in range(100): |
| 132 | xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1))) | 132 | xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1))) |
| 133 | yData = [0 for x in range(len(xData))] | 133 | yData = [0 for x in range(len(xData))] |
| 134 | for comment in commentList: | 134 | for comment in commentList: |
| 135 | - for item in range(99): | ||
| 136 | - if int(comment[2]) < rangeNum * (item + 2): | 135 | + for item in range(100): |
| 136 | + if int(comment[2]) < rangeNum * (item + 1): | ||
| 137 | yData[item] += 1 | 137 | yData[item] += 1 |
| 138 | break | 138 | break |
| 139 | return xData,yData | 139 | return xData,yData |
utils/predict_demo1.py
0 → 100644
| 1 | +import numpy as np | ||
| 2 | +import datetime | ||
| 3 | +import matplotlib.pyplot as plt | ||
| 4 | + | ||
| 5 | + | ||
| 6 | +def datetime_to_number(date: str): # 格式化日期转换为 integer | ||
| 7 | + date_number = datetime.datetime.strptime(date, "%Y-%m-%d") | ||
| 8 | + base_number = datetime.datetime.strptime("2024-1-1", "%Y-%m-%d") | ||
| 9 | + return (date_number - base_number).days | ||
| 10 | + | ||
| 11 | + | ||
| 12 | +def predict_future_values(data): | ||
| 13 | + # 提取并排序日期 | ||
| 14 | + sorted_dates = sorted(data.keys(), key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d")) | ||
| 15 | + sorted_data = {k: data[k] for k in sorted_dates} | ||
| 16 | + | ||
| 17 | + # 将日期转换为整数并提取相应的值 | ||
| 18 | + xs = np.array([datetime_to_number(date) for date in sorted_data.keys()]) | ||
| 19 | + ys = np.array([data[date] for date in sorted_data.keys()]) | ||
| 20 | + | ||
| 21 | + # 拟合线性回归模型 | ||
| 22 | + fit = np.polyfit(xs, ys, 1) | ||
| 23 | + fn = np.poly1d(fit) | ||
| 24 | + | ||
| 25 | + # 获取最新日期,并生成未来三天的日期 | ||
| 26 | + latest_date = sorted_dates[-1] | ||
| 27 | + latest_date_obj = datetime.datetime.strptime(latest_date, "%Y-%m-%d") | ||
| 28 | + future_dates = [(latest_date_obj + datetime.timedelta(days=i)).strftime("%Y-%m-%d") for i in range(1, 6)] | ||
| 29 | + | ||
| 30 | + # 预测未来日期的值 | ||
| 31 | + predictions = {} | ||
| 32 | + for date in future_dates: | ||
| 33 | + date_num = datetime_to_number(date) | ||
| 34 | + if int(fn(date_num))<=0: | ||
| 35 | + predictions[date] = 0 | ||
| 36 | + else: | ||
| 37 | + predictions[date] = int(fn(date_num)) | ||
| 38 | + | ||
| 39 | + return predictions | ||
| 40 | + | ||
| 41 | + | ||
| 42 | +if __name__ == '__main__': | ||
| 43 | + data = {'2024-06-15': 1, '2024-06-18': 1, '2024-06-22': 1, '2024-06-23': 1, '2024-07-01': 3, '2024-07-02': 4, '2024-07-03': 4, '2024-07-04': 14} | ||
| 44 | + predictions = predict_future_values(data) | ||
| 45 | + print(predictions) | ||
| 46 | + # for date, value in predictions.items(): | ||
| 47 | + # print(f'{date} PREDICTION: {value}') |
utils/yuqingpredict.py
0 → 100644
| 1 | +from utils.getPublicData import * | ||
| 2 | +articleList = getAllArticleData() | ||
| 3 | +commentList = getAllCommentsData() | ||
| 4 | +import csv | ||
| 5 | +import os | ||
| 6 | +import datetime | ||
| 7 | +def getTopicByArticle():# 返回文章内容的话题字典 | ||
| 8 | + articleTopicDic = {} | ||
| 9 | + for i in articleList: | ||
| 10 | + if i[14] != None: | ||
| 11 | + if i[14] in articleTopicDic.keys(): | ||
| 12 | + articleTopicDic[i[14]] += 1 | ||
| 13 | + else: | ||
| 14 | + articleTopicDic[i[14]] = 1 | ||
| 15 | + resultData = [] | ||
| 16 | + for key,value in articleTopicDic.items(): | ||
| 17 | + resultData.append({ | ||
| 18 | + 'name':key, | ||
| 19 | + 'value':value | ||
| 20 | + }) | ||
| 21 | + return resultData | ||
| 22 | + | ||
| 23 | +def getTopicByComments():# 返回评论内容的话题字典 | ||
| 24 | + commentsTopicDic = {} | ||
| 25 | + for i in commentList: | ||
| 26 | + if i[9] != None: | ||
| 27 | + if i[9] in commentsTopicDic: | ||
| 28 | + commentsTopicDic[i[9]] += 1 | ||
| 29 | + else: | ||
| 30 | + commentsTopicDic[i[9]] = 1 | ||
| 31 | + resultData = [] | ||
| 32 | + for key,value in commentsTopicDic.items(): | ||
| 33 | + resultData.append({ | ||
| 34 | + 'name':key, | ||
| 35 | + 'value':value | ||
| 36 | + }) | ||
| 37 | + return resultData | ||
| 38 | + | ||
| 39 | +def mergeTopics(article_topics, comment_topics):# 合并话题 | ||
| 40 | + merged_dict = {} | ||
| 41 | + for topic in article_topics + comment_topics: | ||
| 42 | + if topic['name'] in merged_dict: | ||
| 43 | + merged_dict[topic['name']] += topic['value'] | ||
| 44 | + else: | ||
| 45 | + merged_dict[topic['name']] = topic['value'] | ||
| 46 | + merged_list = [{'name': key, 'value': value} for key, value in merged_dict.items()] | ||
| 47 | + return merged_list | ||
| 48 | +def getTopicData(): | ||
| 49 | + # 读取合并文件 merge.csv # 取前十个话题 | ||
| 50 | + top_10_topics = pd.read_csv('./merged_topics.csv').head(10) | ||
| 51 | + # 获取话题名称和对应的值 | ||
| 52 | + xData = top_10_topics['name'].tolist() | ||
| 53 | + yData = top_10_topics['value'].tolist() | ||
| 54 | + return xData, yData | ||
| 55 | + | ||
| 56 | +def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量 | ||
| 57 | + createdAt = {} | ||
| 58 | + for i in articleList: | ||
| 59 | + if i[14]==topic: | ||
| 60 | + if i[7] in createdAt.keys(): | ||
| 61 | + createdAt[i[7]] += 1 | ||
| 62 | + else: | ||
| 63 | + createdAt[i[7]] = 1 | ||
| 64 | + for i in commentList: | ||
| 65 | + if i[9]==topic: | ||
| 66 | + if i[1] in createdAt.keys(): | ||
| 67 | + createdAt[i[1]] += 1 | ||
| 68 | + else: | ||
| 69 | + createdAt[i[1]] = 1 | ||
| 70 | + sorted_data = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))} | ||
| 71 | + return topic,sorted_data | ||
| 72 | + # return topic,list(createdAt.keys()),list(createdAt.values()) | ||
| 73 | + # return topic, createdAt.items() | ||
| 74 | + | ||
| 75 | +def writeTopicsToCSV(topics, file_name): | ||
| 76 | + # 检查文件是否存在,如果存在则附加写入,否则新建一个 | ||
| 77 | + file_exists = os.path.isfile(file_name) | ||
| 78 | + # 按值的降序排序 | ||
| 79 | + sorted_topics = sorted(topics, key=lambda x: x['value'], reverse=True) | ||
| 80 | + with open(file_name, 'w', newline='', encoding='utf-8') as csvfile: | ||
| 81 | + fieldnames = ['name', 'value'] | ||
| 82 | + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | ||
| 83 | + # 如果文件不存在,则写入表头 | ||
| 84 | + if not file_exists: | ||
| 85 | + writer.writeheader() | ||
| 86 | + # 写入数据 | ||
| 87 | + for topic in sorted_topics: | ||
| 88 | + writer.writerow(topic) | ||
| 89 | +if __name__ == '__main__': | ||
| 90 | + # 将话题数据写入 CSV 文件 | ||
| 91 | + # merged_topics = mergeTopics(getTopicByArticle(), getTopicByComments()) | ||
| 92 | + # writeTopicsToCSV(merged_topics, 'merged_topics.csv') | ||
| 93 | + print(getTopicPageCreatedAtCharData("生活")) |
-
Please register or login to post a comment