Showing
1 changed file
with
11 additions
and
84 deletions
| 1 | from utils.getPublicData import * | 1 | from utils.getPublicData import * |
| 2 | -from utils.predict import * | ||
| 3 | -articleList = getAllArticleData() | ||
| 4 | -commentList = getAllCommentsData() | 2 | +from utils.predict import predict_future_values # Use the new function |
| 5 | import csv | 3 | import csv |
| 6 | import os | 4 | import os |
| 7 | import datetime | 5 | import datetime |
| 8 | -def getTopicByArticle():# 返回文章内容的话题字典 | ||
| 9 | - articleTopicDic = {} | ||
| 10 | - for i in articleList: | ||
| 11 | - if i[14] != None: | ||
| 12 | - if i[14] in articleTopicDic.keys(): | ||
| 13 | - articleTopicDic[i[14]] += 1 | ||
| 14 | - else: | ||
| 15 | - articleTopicDic[i[14]] = 1 | ||
| 16 | - resultData = [] | ||
| 17 | - for key,value in articleTopicDic.items(): | ||
| 18 | - resultData.append({ | ||
| 19 | - 'name':key, | ||
| 20 | - 'value':value | ||
| 21 | - }) | ||
| 22 | - return resultData | ||
| 23 | - | ||
| 24 | -def getTopicByComments():# 返回评论内容的话题字典 | ||
| 25 | - commentsTopicDic = {} | ||
| 26 | - for i in commentList: | ||
| 27 | - if i[9] != None: | ||
| 28 | - if i[9] in commentsTopicDic: | ||
| 29 | - commentsTopicDic[i[9]] += 1 | ||
| 30 | - else: | ||
| 31 | - commentsTopicDic[i[9]] = 1 | ||
| 32 | - resultData = [] | ||
| 33 | - for key,value in commentsTopicDic.items(): | ||
| 34 | - resultData.append({ | ||
| 35 | - 'name':key, | ||
| 36 | - 'value':value | ||
| 37 | - }) | ||
| 38 | - return resultData | ||
| 39 | - | ||
| 40 | -def mergeTopics(article_topics, comment_topics):# 合并话题 | ||
| 41 | - merged_dict = {} | ||
| 42 | - for topic in article_topics + comment_topics: | ||
| 43 | - if topic['name'] in merged_dict: | ||
| 44 | - merged_dict[topic['name']] += topic['value'] | ||
| 45 | - else: | ||
| 46 | - merged_dict[topic['name']] = topic['value'] | ||
| 47 | - merged_dict = sorted(merged_dict.items(), key=lambda item: item[1], reverse=True) | ||
| 48 | - merged_list = [[key, str(value)] for key, value in merged_dict] | ||
| 49 | - return merged_list | ||
| 50 | -def getAllTopicData(): | ||
| 51 | - # 读取合并文件 merge.csv | ||
| 52 | - # data = [] | ||
| 53 | - # df = pd.read_csv('./merged_topics.csv',encoding='utf8') | ||
| 54 | - # for i in df.values: | ||
| 55 | - # try: | ||
| 56 | - # data.append([ | ||
| 57 | - # re.search('[\u4e00-\u9fa5]+',str(i)).group(), | ||
| 58 | - # re.search('\d+',str(i)).group() | ||
| 59 | - # ]) | ||
| 60 | - # except: | ||
| 61 | - # continue | ||
| 62 | - return mergeTopics(getTopicByArticle(), getTopicByComments()) | 6 | +import pandas as pd |
| 63 | 7 | ||
| 64 | -def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量 | 8 | +def getTopicCreatedAtandpredictData(topic): |
| 65 | createdAt = {} | 9 | createdAt = {} |
| 66 | for i in articleList: | 10 | for i in articleList: |
| 67 | if i[14]==topic: | 11 | if i[14]==topic: |
| @@ -75,30 +19,13 @@ def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每 | @@ -75,30 +19,13 @@ def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每 | ||
| 75 | createdAt[i[1]] += 1 | 19 | createdAt[i[1]] += 1 |
| 76 | else: | 20 | else: |
| 77 | createdAt[i[1]] = 1 | 21 | createdAt[i[1]] = 1 |
| 78 | - createdAt = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))} | ||
| 79 | - createdAt.update(predict_future_values(createdAt)) | ||
| 80 | - sorted_data = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))} | ||
| 81 | - # result_list = [0] * (len(sorted_data) - 5) + [1] * 5 | ||
| 82 | - print(list(createdAt.keys()),list(createdAt.values())) | ||
| 83 | - return list(createdAt.keys()),list(createdAt.values()) | ||
| 84 | 22 | ||
| 85 | -def writeTopicsToCSV(topics, file_name): | ||
| 86 | - # 检查文件是否存在,如果存在则附加写入,否则新建一个 | ||
| 87 | - file_exists = os.path.isfile(file_name) | ||
| 88 | - # 按值的降序排序 | ||
| 89 | - sorted_topics = sorted(topics, key=lambda x: x['value'], reverse=True) | ||
| 90 | - with open(file_name, 'w', newline='', encoding='utf-8') as csvfile: | ||
| 91 | - fieldnames = ['name', 'value'] | ||
| 92 | - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | ||
| 93 | - # 如果文件不存在,则写入表头 | ||
| 94 | - if not file_exists: | ||
| 95 | - writer.writeheader() | ||
| 96 | - # 写入数据 | ||
| 97 | - for topic in sorted_topics: | ||
| 98 | - writer.writerow(topic) | ||
| 99 | -if __name__ == '__main__': | ||
| 100 | - # 将话题数据写入 CSV 文件 | ||
| 101 | - # print(mergeTopics(getTopicByArticle(), getTopicByComments())) | ||
| 102 | - # writeTopicsToCSV(merged_topics, 'merged_topics.csv') | ||
| 103 | - print(getAllTopicData()) | 23 | + # Use the improved time series prediction approach |
| 24 | + predictions = predict_future_values(createdAt, forecast_days=5) | ||
| 25 | + | ||
| 26 | + # Merge historical data and predictions | ||
| 27 | + combined_data = {**createdAt, **predictions} | ||
| 28 | + combined_data = {k: combined_data[k] for k in sorted(combined_data, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))} | ||
| 104 | 29 | ||
| 30 | + print(list(combined_data.keys()), list(combined_data.values())) | ||
| 31 | + return list(combined_data.keys()), list(combined_data.values()) |
-
Please register or login to post a comment