juanboy

爬虫结束自动打标注

@@ -13,85 +13,85 @@ def getArticleByType(type): @@ -13,85 +13,85 @@ def getArticleByType(type):
13 articles.append(i) 13 articles.append(i)
14 return articles 14 return articles
15 15
16 -def getArticleCharLikeCount(type): 16 +def getArticleLikeCount(type):
17 articles = getArticleByType(type) 17 articles = getArticleByType(type)
18 - xData = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']  
19 - yData = [0 for x in range(len(xData))] 18 + X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
  19 + Y = [0 for x in range(len(X))]
20 for article in articles: 20 for article in articles:
21 likeCount = int(article[1]) 21 likeCount = int(article[1])
22 if likeCount < 100: 22 if likeCount < 100:
23 - yData[0] += 1 23 + Y[0] += 1
24 elif likeCount < 1000: 24 elif likeCount < 1000:
25 - yData[1] += 1 25 + Y[1] += 1
26 elif likeCount < 5000: 26 elif likeCount < 5000:
27 - yData[2] += 1 27 + Y[2] += 1
28 elif likeCount < 15000: 28 elif likeCount < 15000:
29 - yData[3] += 1 29 + Y[3] += 1
30 elif likeCount < 30000: 30 elif likeCount < 30000:
31 - yData[4] += 1 31 + Y[4] += 1
32 elif likeCount < 50000: 32 elif likeCount < 50000:
33 - yData[5] += 1 33 + Y[5] += 1
34 elif likeCount >= 50000: 34 elif likeCount >= 50000:
35 - yData[6] += 1  
36 - return xData,yData 35 + Y[6] += 1
  36 + return X,Y
37 37
38 -def getArticleCharCommentsLen(type): 38 +def getArticleCommentsLen(type):
39 articles = getArticleByType(type) 39 articles = getArticleByType(type)
40 - xData = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']  
41 - yData = [0 for x in range(len(xData))] 40 + X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
  41 + Y = [0 for x in range(len(X))]
42 for article in articles: 42 for article in articles:
43 commentLen = int(article[2]) 43 commentLen = int(article[2])
44 if commentLen < 100: 44 if commentLen < 100:
45 - yData[0] += 1 45 + Y[0] += 1
46 elif commentLen < 500: 46 elif commentLen < 500:
47 - yData[1] += 1 47 + Y[1] += 1
48 elif commentLen < 5000: 48 elif commentLen < 5000:
49 - yData[2] += 1 49 + Y[2] += 1
50 elif commentLen < 1000: 50 elif commentLen < 1000:
51 - yData[3] += 1 51 + Y[3] += 1
52 elif commentLen < 1500: 52 elif commentLen < 1500:
53 - yData[4] += 1 53 + Y[4] += 1
54 elif commentLen < 3000: 54 elif commentLen < 3000:
55 - yData[5] += 1 55 + Y[5] += 1
56 elif commentLen < 5000: 56 elif commentLen < 5000:
57 - yData[6] += 1 57 + Y[6] += 1
58 elif commentLen < 10000: 58 elif commentLen < 10000:
59 - yData[7] += 1 59 + Y[7] += 1
60 elif commentLen >= 15000: 60 elif commentLen >= 15000:
61 - yData[8] += 1  
62 - return xData,yData 61 + Y[8] += 1
  62 + return X,Y
63 63
64 -def getArticleCharRepotsLen(type): 64 +def getArticleRepotsLen(type):
65 articles = getArticleByType(type) 65 articles = getArticleByType(type)
66 - xData = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']  
67 - yData = [0 for x in range(len(xData))] 66 + X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
  67 + Y = [0 for x in range(len(X))]
68 for article in articles: 68 for article in articles:
69 repostsCount = int(article[3]) 69 repostsCount = int(article[3])
70 if repostsCount < 100: 70 if repostsCount < 100:
71 - yData[0] += 1 71 + Y[0] += 1
72 elif repostsCount < 300: 72 elif repostsCount < 300:
73 - yData[1] += 1 73 + Y[1] += 1
74 elif repostsCount < 500: 74 elif repostsCount < 500:
75 - yData[2] += 1 75 + Y[2] += 1
76 elif repostsCount < 1000: 76 elif repostsCount < 1000:
77 - yData[3] += 1 77 + Y[3] += 1
78 elif repostsCount < 3000: 78 elif repostsCount < 3000:
79 - yData[4] += 1 79 + Y[4] += 1
80 elif repostsCount < 4000: 80 elif repostsCount < 4000:
81 - yData[5] += 1 81 + Y[5] += 1
82 elif repostsCount < 5000: 82 elif repostsCount < 5000:
83 - yData[6] += 1 83 + Y[6] += 1
84 elif repostsCount < 10000: 84 elif repostsCount < 10000:
85 - yData[7] += 1 85 + Y[7] += 1
86 elif repostsCount < 15000: 86 elif repostsCount < 15000:
87 - yData[8] += 1 87 + Y[8] += 1
88 elif repostsCount < 30000: 88 elif repostsCount < 30000:
89 - yData[9] += 1 89 + Y[9] += 1
90 elif repostsCount < 70000: 90 elif repostsCount < 70000:
91 - yData[10] += 1 91 + Y[10] += 1
92 elif repostsCount >= 70000: 92 elif repostsCount >= 70000:
93 - yData[11] += 1  
94 - return xData,yData 93 + Y[11] += 1
  94 + return X,Y
95 95
96 def getIPCharByArticleRegion(): 96 def getIPCharByArticleRegion():
97 articleRegionDic = {} 97 articleRegionDic = {}
@@ -125,26 +125,26 @@ def getIPCharByCommentsRegion(): @@ -125,26 +125,26 @@ def getIPCharByCommentsRegion():
125 }) 125 })
126 return resultData 126 return resultData
127 127
128 -def getCommentCharDataOne():  
129 - xData = [] 128 +def getCommentDataOne():
  129 + X = []
130 rangeNum = 20 130 rangeNum = 20
131 for item in range(100): 131 for item in range(100):
132 - xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))  
133 - yData = [0 for x in range(len(xData))] 132 + X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
  133 + Y = [0 for x in range(len(X))]
134 for comment in commentList: 134 for comment in commentList:
135 for item in range(100): 135 for item in range(100):
136 if int(comment[2]) < rangeNum * (item + 1): 136 if int(comment[2]) < rangeNum * (item + 1):
137 - yData[item] += 1 137 + Y[item] += 1
138 break 138 break
139 - return xData,yData 139 + return X,Y
140 140
141 -def getCommentCharDataTwo(): 141 +def getCommentDataTwo():
142 genderDic = {} 142 genderDic = {}
143 for i in commentList: 143 for i in commentList:
144 - if genderDic.get(i[6],-1) == -1:  
145 - genderDic[i[6]] = 1  
146 - else: 144 + if i[6] in genderDic.keys():
147 genderDic[i[6]] += 1 145 genderDic[i[6]] += 1
  146 + else:
  147 + genderDic[i[6]] = 1
148 resultData = [{ 148 resultData = [{
149 'name':x[0], 149 'name':x[0],
150 'value':x[1] 150 'value':x[1]
@@ -153,50 +153,50 @@ def getCommentCharDataTwo(): @@ -153,50 +153,50 @@ def getCommentCharDataTwo():
153 153
154 def getYuQingCharDataOne(): 154 def getYuQingCharDataOne():
155 hotWordList = getAllHotWords() 155 hotWordList = getAllHotWords()
156 - xData = ['正面','中性','负面']  
157 - yData = [0,0,0] 156 + X = ['正面','中性','负面']
  157 + Y = [0,0,0]
158 for word in hotWordList: 158 for word in hotWordList:
159 emotionValue = SnowNLP(word[0]).sentiments 159 emotionValue = SnowNLP(word[0]).sentiments
160 if emotionValue > 0.4: 160 if emotionValue > 0.4:
161 - yData[0] += 1 161 + Y[0] += 1
162 elif emotionValue < 0.2: 162 elif emotionValue < 0.2:
163 - yData[2] += 1 163 + Y[2] += 1
164 else: 164 else:
165 - yData[1] += 1  
166 - bieData = [{ 165 + Y[1] += 1
  166 + finaldata = [{
167 'name':x, 167 'name':x,
168 - 'value':yData[index]  
169 - } for index,x in enumerate(xData)]  
170 - return xData,yData,bieData 168 + 'value':Y[index]
  169 + } for index,x in enumerate(X)]
  170 + return X,Y,finaldata
171 171
172 def getYuQingCharDataTwo(): 172 def getYuQingCharDataTwo():
173 - xData = ['正面', '中性', '负面']  
174 - bieData1 = [{ 173 + X = ['正面', '中性', '负面']
  174 + finaldata1 = [{
175 'name':x, 175 'name':x,
176 'value':0 176 'value':0
177 - } for x in xData]  
178 - bieData2 = [{ 177 + } for x in X]
  178 + finaldata2 = [{
179 'name': x, 179 'name': x,
180 'value': 0 180 'value': 0
181 - } for x in xData] 181 + } for x in X]
182 182
183 for comment in commentList: 183 for comment in commentList:
184 emotionValue = SnowNLP(comment[4]).sentiments 184 emotionValue = SnowNLP(comment[4]).sentiments
185 if emotionValue > 0.4: 185 if emotionValue > 0.4:
186 - bieData1[0]['value'] += 1 186 + finaldata1[0]['value'] += 1
187 elif emotionValue < 0.2: 187 elif emotionValue < 0.2:
188 - bieData1[2]['value'] += 1 188 + finaldata1[2]['value'] += 1
189 else: 189 else:
190 - bieData1[1]['value'] += 1 190 + finaldata1[1]['value'] += 1
191 for artile in articleList: 191 for artile in articleList:
192 emotionValue = SnowNLP(artile[5]).sentiments 192 emotionValue = SnowNLP(artile[5]).sentiments
193 if emotionValue > 0.4: 193 if emotionValue > 0.4:
194 - bieData2[0]['value'] += 1 194 + finaldata2[0]['value'] += 1
195 elif emotionValue < 0.2: 195 elif emotionValue < 0.2:
196 - bieData2[2]['value'] += 1 196 + finaldata2[2]['value'] += 1
197 else: 197 else:
198 - bieData2[1]['value'] += 1  
199 - return bieData1,bieData2 198 + finaldata2[1]['value'] += 1
  199 + return finaldata1,finaldata2
200 200
201 def getYuQingCharDataThree(): 201 def getYuQingCharDataThree():
202 hotWordList = getAllHotWords() 202 hotWordList = getAllHotWords()
@@ -27,14 +27,14 @@ def getHomeCommentsLikeCountTopFore():# 获取评论中点赞最高的前四条 @@ -27,14 +27,14 @@ def getHomeCommentsLikeCountTopFore():# 获取评论中点赞最高的前四条
27 return list(sorted(commentsList,key=lambda x:int(x[2]),reverse=True))[:4] 27 return list(sorted(commentsList,key=lambda x:int(x[2]),reverse=True))[:4]
28 28
29 def getHomeArticleCreatedAtChart():# 根据日期分别计算该日期的文章数 29 def getHomeArticleCreatedAtChart():# 根据日期分别计算该日期的文章数
30 - xData = list(set([x[7] for x in articleList]))  
31 - xData = list(sorted(xData,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))  
32 - yData = [0 for x in range(len(xData))] 30 + X = list(set([x[7] for x in articleList]))
  31 + X = list(sorted(X,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))
  32 + Y = [0 for x in range(len(X))]
33 for article in articleList: 33 for article in articleList:
34 - for index,j in enumerate(xData):# 返回索引和值 34 + for index,j in enumerate(X):# 返回索引和值
35 if article[7] == j: 35 if article[7] == j:
36 - yData[index] += 1  
37 - return xData,yData 36 + Y[index] += 1
  37 + return X,Y
38 38
39 def getHomeTypeChart():# 统计每种类型的文章数量 39 def getHomeTypeChart():# 统计每种类型的文章数量
40 typeDic = {} 40 typeDic = {}
@@ -50,9 +50,9 @@ def getTopicData(): @@ -50,9 +50,9 @@ def getTopicData():
50 # 读取合并文件 merge.csv # 取前十个话题 50 # 读取合并文件 merge.csv # 取前十个话题
51 top_10_topics = pd.read_csv('./merged_topics.csv').head(10) 51 top_10_topics = pd.read_csv('./merged_topics.csv').head(10)
52 # 获取话题名称和对应的值 52 # 获取话题名称和对应的值
53 - xData = top_10_topics['name'].tolist()  
54 - yData = top_10_topics['value'].tolist()  
55 - return xData, yData 53 + X = top_10_topics['name'].tolist()
  54 + Y = top_10_topics['value'].tolist()
  55 + return X, Y
56 56
57 def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量 57 def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
58 createdAt = {} 58 createdAt = {}
@@ -18,7 +18,7 @@ def home(): @@ -18,7 +18,7 @@ def home():
18 username = session.get('username') 18 username = session.get('username')
19 articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData() 19 articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData()
20 commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore() 20 commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore()
21 - xData, yData = getHomeArticleCreatedAtChart() 21 + X, Y = getHomeArticleCreatedAtChart()
22 typeChart = getHomeTypeChart() 22 typeChart = getHomeTypeChart()
23 createAtChart = getHomeCommentCreatedChart() 23 createAtChart = getHomeCommentCreatedChart()
24 # getUserNameWordCloud() 24 # getUserNameWordCloud()
@@ -28,8 +28,8 @@ def home(): @@ -28,8 +28,8 @@ def home():
28 likeCountMaxAuthorName=likeCountMaxAuthorName, 28 likeCountMaxAuthorName=likeCountMaxAuthorName,
29 cityMax=cityMax, 29 cityMax=cityMax,
30 commentsLikeCountTopFore=commentsLikeCountTopFore, 30 commentsLikeCountTopFore=commentsLikeCountTopFore,
31 - xData=xData,  
32 - yData=yData, 31 + X=X,
  32 + Y=Y,
33 typeChart=typeChart, 33 typeChart=typeChart,
34 createAtChart=createAtChart) 34 createAtChart=createAtChart)
35 35
@@ -42,7 +42,7 @@ def hotWord(): @@ -42,7 +42,7 @@ def hotWord():
42 if request.args.get('hotWord'): 42 if request.args.get('hotWord'):
43 defaultHotWord = request.args.get('hotWord') 43 defaultHotWord = request.args.get('hotWord')
44 hotWordLen = getHotWordLen(defaultHotWord) 44 hotWordLen = getHotWordLen(defaultHotWord)
45 - xData, yData = getHotWordPageCreatedAtCharData(defaultHotWord) 45 + X, Y = getHotWordPageCreatedAtCharData(defaultHotWord)
46 sentences = '' 46 sentences = ''
47 value = SnowNLP(defaultHotWord).sentiments 47 value = SnowNLP(defaultHotWord).sentiments
48 if value == 0.5: 48 if value == 0.5:
@@ -59,8 +59,8 @@ def hotWord(): @@ -59,8 +59,8 @@ def hotWord():
59 defaultHotWord=defaultHotWord, 59 defaultHotWord=defaultHotWord,
60 hotWordLen=hotWordLen, 60 hotWordLen=hotWordLen,
61 sentences=sentences, 61 sentences=sentences,
62 - xData=xData,  
63 - yData=yData, 62 + X=X,
  63 + Y=Y,
64 comments=comments) 64 comments=comments)
65 65
66 66
@@ -72,7 +72,7 @@ def hotTopic(): @@ -72,7 +72,7 @@ def hotTopic():
72 if request.args.get('topic'): 72 if request.args.get('topic'):
73 defaultTopic = request.args.get('topic') 73 defaultTopic = request.args.get('topic')
74 topicLen = getTopicLen(defaultTopic) 74 topicLen = getTopicLen(defaultTopic)
75 - xData, yData = getTopicPageCreatedAtCharData() 75 + X, Y = getTopicPageCreatedAtCharData()
76 sentences = '' 76 sentences = ''
77 77
78 # ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences 78 # ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences
@@ -84,8 +84,8 @@ def hotTopic(): @@ -84,8 +84,8 @@ def hotTopic():
84 defaultTopic=defaultTopic, 84 defaultTopic=defaultTopic,
85 topicLen=topicLen, 85 topicLen=topicLen,
86 sentences=sentences, 86 sentences=sentences,
87 - xData=xData,  
88 - yData=yData, 87 + X=X,
  88 + Y=Y,
89 comments=comments) 89 comments=comments)
90 90
91 91
@@ -107,15 +107,15 @@ def articleChar(): @@ -107,15 +107,15 @@ def articleChar():
107 typeList = getTypeList() 107 typeList = getTypeList()
108 defaultType = typeList[0] 108 defaultType = typeList[0]
109 if request.args.get('type'): defaultType = request.args.get('type') 109 if request.args.get('type'): defaultType = request.args.get('type')
110 - xData, yData = getArticleCharLikeCount(defaultType)  
111 - x1Data, y1Data = getArticleCharCommentsLen(defaultType)  
112 - x2Data, y2Data = getArticleCharRepotsLen(defaultType) 110 + X, Y = getArticleLikeCount(defaultType)
  111 + x1Data, y1Data = getArticleCommentsLen(defaultType)
  112 + x2Data, y2Data = getArticleRepotsLen(defaultType)
113 return render_template('articleChar.html', 113 return render_template('articleChar.html',
114 username=username, 114 username=username,
115 typeList=typeList, 115 typeList=typeList,
116 defaultType=defaultType, 116 defaultType=defaultType,
117 - xData=xData,  
118 - yData=yData, 117 + X=X,
  118 + Y=Y,
119 x1Data=x1Data, 119 x1Data=x1Data,
120 y1Data=y1Data, 120 y1Data=y1Data,
121 x2Data=x2Data, 121 x2Data=x2Data,
@@ -136,28 +136,28 @@ def ipChar(): @@ -136,28 +136,28 @@ def ipChar():
136 @pb.route('/commentChar') 136 @pb.route('/commentChar')
137 def commentChar(): 137 def commentChar():
138 username = session.get('username') 138 username = session.get('username')
139 - xData, yData = getCommentCharDataOne()  
140 - genderPieData = getCommentCharDataTwo() 139 + X, Y = getCommentDataOne()
  140 + genderPieData = getCommentDataTwo()
141 return render_template('commentChar.html', 141 return render_template('commentChar.html',
142 username=username, 142 username=username,
143 - xData=xData,  
144 - yData=yData, 143 + X=X,
  144 + Y=Y,
145 genderPieData=genderPieData) 145 genderPieData=genderPieData)
146 146
147 147
148 @pb.route('/yuqingChar') 148 @pb.route('/yuqingChar')
149 def yuqingChar(): 149 def yuqingChar():
150 username = session.get('username') 150 username = session.get('username')
151 - xData, yData, bieData = getYuQingCharDataOne()  
152 - bieData1, bieData2 = getYuQingCharDataTwo() 151 + X, Y, finaldata = getYuQingCharDataOne()
  152 + finaldata1, finaldata2 = getYuQingCharDataTwo()
153 x1Data, y1Data = getYuQingCharDataThree() 153 x1Data, y1Data = getYuQingCharDataThree()
154 return render_template('yuqingChar.html', 154 return render_template('yuqingChar.html',
155 username=username, 155 username=username,
156 - xData=xData,  
157 - yData=yData,  
158 - bieData=bieData,  
159 - bieData1=bieData1,  
160 - bieData2=bieData2, 156 + X=X,
  157 + Y=Y,
  158 + finaldata=finaldata,
  159 + finaldata1=finaldata1,
  160 + finaldata2=finaldata2,
161 x1Data=x1Data, 161 x1Data=x1Data,
162 y1Data=y1Data) 162 y1Data=y1Data)
163 163
@@ -234,7 +234,7 @@ @@ -234,7 +234,7 @@
234 series: [ 234 series: [
235 { 235 {
236 type: 'treemap', 236 type: 'treemap',
237 - data: {{ bieData | tojson }} 237 + data: {{ finaldata | tojson }}
238 } 238 }
239 ] 239 ]
240 }; 240 };
@@ -272,7 +272,7 @@ @@ -272,7 +272,7 @@
272 labelLine: { 272 labelLine: {
273 show: false 273 show: false
274 }, 274 },
275 - data: {{ bieData1 | tojson }} 275 + data: {{ finaldata1 | tojson }}
276 }, 276 },
277 { 277 {
278 name: '文章舆情结果', 278 name: '文章舆情结果',
@@ -313,7 +313,7 @@ @@ -313,7 +313,7 @@
313 } 313 }
314 } 314 }
315 }, 315 },
316 - data: {{ bieData2 | tojson }} 316 + data: {{ finaldata2 | tojson }}
317 } 317 }
318 ] 318 ]
319 }; 319 };