juanboy

爬虫结束自动打标注

@@ -2,6 +2,7 @@ import os @@ -2,6 +2,7 @@ import os
2 from sqlalchemy import create_engine 2 from sqlalchemy import create_engine
3 import pandas as pd 3 import pandas as pd
4 from spiderDataPackage.settings import articleAddr,commentsAddr 4 from spiderDataPackage.settings import articleAddr,commentsAddr
  5 +from model.topicDefine import *
5 6
6 engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4') 7 engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4')
7 8
@@ -28,6 +29,7 @@ def saveData(): @@ -28,6 +29,7 @@ def saveData():
28 29
29 os.remove(articleAddr) 30 os.remove(articleAddr)
30 os.remove(commentsAddr) 31 os.remove(commentsAddr)
  32 + update_data()
31 33
32 if __name__ == '__main__': 34 if __name__ == '__main__':
33 saveData() 35 saveData()
1 from utils.getPublicData import * 1 from utils.getPublicData import *
  2 +from utils.predict import *
2 articleList = getAllArticleData() 3 articleList = getAllArticleData()
3 commentList = getAllCommentsData() 4 commentList = getAllCommentsData()
4 import csv 5 import csv
@@ -53,7 +54,7 @@ def getTopicData(): @@ -53,7 +54,7 @@ def getTopicData():
53 yData = top_10_topics['value'].tolist() 54 yData = top_10_topics['value'].tolist()
54 return xData, yData 55 return xData, yData
55 56
56 -def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量 57 +def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
57 createdAt = {} 58 createdAt = {}
58 for i in articleList: 59 for i in articleList:
59 if i[14]==topic: 60 if i[14]==topic:
@@ -67,6 +68,10 @@ def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个 @@ -67,6 +68,10 @@ def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个
67 createdAt[i[1]] += 1 68 createdAt[i[1]] += 1
68 else: 69 else:
69 createdAt[i[1]] = 1 70 createdAt[i[1]] = 1
  71 + createdAt = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))}
  72 + print(createdAt)
  73 + createdAt.update(predict_future_values(createdAt))
  74 + print(createdAt)
70 sorted_data = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))} 75 sorted_data = {k: createdAt[k] for k in sorted(createdAt, key=lambda date: datetime.datetime.strptime(date, "%Y-%m-%d"))}
71 return topic,sorted_data 76 return topic,sorted_data
72 # return topic,list(createdAt.keys()),list(createdAt.values()) 77 # return topic,list(createdAt.keys()),list(createdAt.values())
@@ -90,4 +95,4 @@ if __name__ == '__main__': @@ -90,4 +95,4 @@ if __name__ == '__main__':
90 # 将话题数据写入 CSV 文件 95 # 将话题数据写入 CSV 文件
91 # merged_topics = mergeTopics(getTopicByArticle(), getTopicByComments()) 96 # merged_topics = mergeTopics(getTopicByArticle(), getTopicByComments())
92 # writeTopicsToCSV(merged_topics, 'merged_topics.csv') 97 # writeTopicsToCSV(merged_topics, 'merged_topics.csv')
93 - print(getTopicPageCreatedAtCharData("生活")) 98 + print(getTopicCreatedAtandpredictData("生活"))