juanboy

爬虫结束自动打标注

... ... @@ -13,85 +13,85 @@ def getArticleByType(type):
articles.append(i)
return articles
def getArticleCharLikeCount(type):
def getArticleLikeCount(type):
articles = getArticleByType(type)
xData = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
yData = [0 for x in range(len(xData))]
X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
Y = [0 for x in range(len(X))]
for article in articles:
likeCount = int(article[1])
if likeCount < 100:
yData[0] += 1
Y[0] += 1
elif likeCount < 1000:
yData[1] += 1
Y[1] += 1
elif likeCount < 5000:
yData[2] += 1
Y[2] += 1
elif likeCount < 15000:
yData[3] += 1
Y[3] += 1
elif likeCount < 30000:
yData[4] += 1
Y[4] += 1
elif likeCount < 50000:
yData[5] += 1
Y[5] += 1
elif likeCount >= 50000:
yData[6] += 1
return xData,yData
Y[6] += 1
return X,Y
def getArticleCharCommentsLen(type):
def getArticleCommentsLen(type):
articles = getArticleByType(type)
xData = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
yData = [0 for x in range(len(xData))]
X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
Y = [0 for x in range(len(X))]
for article in articles:
commentLen = int(article[2])
if commentLen < 100:
yData[0] += 1
Y[0] += 1
elif commentLen < 500:
yData[1] += 1
Y[1] += 1
elif commentLen < 5000:
yData[2] += 1
Y[2] += 1
elif commentLen < 1000:
yData[3] += 1
Y[3] += 1
elif commentLen < 1500:
yData[4] += 1
Y[4] += 1
elif commentLen < 3000:
yData[5] += 1
Y[5] += 1
elif commentLen < 5000:
yData[6] += 1
Y[6] += 1
elif commentLen < 10000:
yData[7] += 1
Y[7] += 1
elif commentLen >= 15000:
yData[8] += 1
return xData,yData
Y[8] += 1
return X,Y
def getArticleCharRepotsLen(type):
def getArticleRepotsLen(type):
articles = getArticleByType(type)
xData = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
yData = [0 for x in range(len(xData))]
X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
Y = [0 for x in range(len(X))]
for article in articles:
repostsCount = int(article[3])
if repostsCount < 100:
yData[0] += 1
Y[0] += 1
elif repostsCount < 300:
yData[1] += 1
Y[1] += 1
elif repostsCount < 500:
yData[2] += 1
Y[2] += 1
elif repostsCount < 1000:
yData[3] += 1
Y[3] += 1
elif repostsCount < 3000:
yData[4] += 1
Y[4] += 1
elif repostsCount < 4000:
yData[5] += 1
Y[5] += 1
elif repostsCount < 5000:
yData[6] += 1
Y[6] += 1
elif repostsCount < 10000:
yData[7] += 1
Y[7] += 1
elif repostsCount < 15000:
yData[8] += 1
Y[8] += 1
elif repostsCount < 30000:
yData[9] += 1
Y[9] += 1
elif repostsCount < 70000:
yData[10] += 1
Y[10] += 1
elif repostsCount >= 70000:
yData[11] += 1
return xData,yData
Y[11] += 1
return X,Y
def getIPCharByArticleRegion():
articleRegionDic = {}
... ... @@ -125,26 +125,26 @@ def getIPCharByCommentsRegion():
})
return resultData
def getCommentCharDataOne():
xData = []
def getCommentDataOne():
X = []
rangeNum = 20
for item in range(100):
xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
yData = [0 for x in range(len(xData))]
X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
Y = [0 for x in range(len(X))]
for comment in commentList:
for item in range(100):
if int(comment[2]) < rangeNum * (item + 1):
yData[item] += 1
Y[item] += 1
break
return xData,yData
return X,Y
def getCommentCharDataTwo():
def getCommentDataTwo():
genderDic = {}
for i in commentList:
if genderDic.get(i[6],-1) == -1:
genderDic[i[6]] = 1
else:
if i[6] in genderDic.keys():
genderDic[i[6]] += 1
else:
genderDic[i[6]] = 1
resultData = [{
'name':x[0],
'value':x[1]
... ... @@ -153,50 +153,50 @@ def getCommentCharDataTwo():
def getYuQingCharDataOne():
hotWordList = getAllHotWords()
xData = ['正面','中性','负面']
yData = [0,0,0]
X = ['正面','中性','负面']
Y = [0,0,0]
for word in hotWordList:
emotionValue = SnowNLP(word[0]).sentiments
if emotionValue > 0.4:
yData[0] += 1
Y[0] += 1
elif emotionValue < 0.2:
yData[2] += 1
Y[2] += 1
else:
yData[1] += 1
bieData = [{
Y[1] += 1
finaldata = [{
'name':x,
'value':yData[index]
} for index,x in enumerate(xData)]
return xData,yData,bieData
'value':Y[index]
} for index,x in enumerate(X)]
return X,Y,finaldata
def getYuQingCharDataTwo():
xData = ['正面', '中性', '负面']
bieData1 = [{
X = ['正面', '中性', '负面']
finaldata1 = [{
'name':x,
'value':0
} for x in xData]
bieData2 = [{
} for x in X]
finaldata2 = [{
'name': x,
'value': 0
} for x in xData]
} for x in X]
for comment in commentList:
emotionValue = SnowNLP(comment[4]).sentiments
if emotionValue > 0.4:
bieData1[0]['value'] += 1
finaldata1[0]['value'] += 1
elif emotionValue < 0.2:
bieData1[2]['value'] += 1
finaldata1[2]['value'] += 1
else:
bieData1[1]['value'] += 1
finaldata1[1]['value'] += 1
for artile in articleList:
emotionValue = SnowNLP(artile[5]).sentiments
if emotionValue > 0.4:
bieData2[0]['value'] += 1
finaldata2[0]['value'] += 1
elif emotionValue < 0.2:
bieData2[2]['value'] += 1
finaldata2[2]['value'] += 1
else:
bieData2[1]['value'] += 1
return bieData1,bieData2
finaldata2[1]['value'] += 1
return finaldata1,finaldata2
def getYuQingCharDataThree():
hotWordList = getAllHotWords()
... ...
... ... @@ -27,14 +27,14 @@ def getHomeCommentsLikeCountTopFore():# 获取评论中点赞最高的前四条
return list(sorted(commentsList,key=lambda x:int(x[2]),reverse=True))[:4]
def getHomeArticleCreatedAtChart():# 根据日期分别计算该日期的文章数
xData = list(set([x[7] for x in articleList]))
xData = list(sorted(xData,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))
yData = [0 for x in range(len(xData))]
X = list(set([x[7] for x in articleList]))
X = list(sorted(X,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))
Y = [0 for x in range(len(X))]
for article in articleList:
for index,j in enumerate(xData):# 返回索引和值
for index,j in enumerate(X):# 返回索引和值
if article[7] == j:
yData[index] += 1
return xData,yData
Y[index] += 1
return X,Y
def getHomeTypeChart():# 统计每种类型的文章数量
typeDic = {}
... ...
... ... @@ -50,9 +50,9 @@ def getTopicData():
# 读取合并文件 merge.csv # 取前十个话题
top_10_topics = pd.read_csv('./merged_topics.csv').head(10)
# 获取话题名称和对应的值
xData = top_10_topics['name'].tolist()
yData = top_10_topics['value'].tolist()
return xData, yData
X = top_10_topics['name'].tolist()
Y = top_10_topics['value'].tolist()
return X, Y
def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
createdAt = {}
... ...
... ... @@ -18,7 +18,7 @@ def home():
username = session.get('username')
articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData()
commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore()
xData, yData = getHomeArticleCreatedAtChart()
X, Y = getHomeArticleCreatedAtChart()
typeChart = getHomeTypeChart()
createAtChart = getHomeCommentCreatedChart()
# getUserNameWordCloud()
... ... @@ -28,8 +28,8 @@ def home():
likeCountMaxAuthorName=likeCountMaxAuthorName,
cityMax=cityMax,
commentsLikeCountTopFore=commentsLikeCountTopFore,
xData=xData,
yData=yData,
X=X,
Y=Y,
typeChart=typeChart,
createAtChart=createAtChart)
... ... @@ -42,7 +42,7 @@ def hotWord():
if request.args.get('hotWord'):
defaultHotWord = request.args.get('hotWord')
hotWordLen = getHotWordLen(defaultHotWord)
xData, yData = getHotWordPageCreatedAtCharData(defaultHotWord)
X, Y = getHotWordPageCreatedAtCharData(defaultHotWord)
sentences = ''
value = SnowNLP(defaultHotWord).sentiments
if value == 0.5:
... ... @@ -59,8 +59,8 @@ def hotWord():
defaultHotWord=defaultHotWord,
hotWordLen=hotWordLen,
sentences=sentences,
xData=xData,
yData=yData,
X=X,
Y=Y,
comments=comments)
... ... @@ -72,7 +72,7 @@ def hotTopic():
if request.args.get('topic'):
defaultTopic = request.args.get('topic')
topicLen = getTopicLen(defaultTopic)
xData, yData = getTopicPageCreatedAtCharData()
X, Y = getTopicPageCreatedAtCharData()
sentences = ''
# ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences
... ... @@ -84,8 +84,8 @@ def hotTopic():
defaultTopic=defaultTopic,
topicLen=topicLen,
sentences=sentences,
xData=xData,
yData=yData,
X=X,
Y=Y,
comments=comments)
... ... @@ -107,15 +107,15 @@ def articleChar():
typeList = getTypeList()
defaultType = typeList[0]
if request.args.get('type'): defaultType = request.args.get('type')
xData, yData = getArticleCharLikeCount(defaultType)
x1Data, y1Data = getArticleCharCommentsLen(defaultType)
x2Data, y2Data = getArticleCharRepotsLen(defaultType)
X, Y = getArticleLikeCount(defaultType)
x1Data, y1Data = getArticleCommentsLen(defaultType)
x2Data, y2Data = getArticleRepotsLen(defaultType)
return render_template('articleChar.html',
username=username,
typeList=typeList,
defaultType=defaultType,
xData=xData,
yData=yData,
X=X,
Y=Y,
x1Data=x1Data,
y1Data=y1Data,
x2Data=x2Data,
... ... @@ -136,28 +136,28 @@ def ipChar():
@pb.route('/commentChar')
def commentChar():
username = session.get('username')
xData, yData = getCommentCharDataOne()
genderPieData = getCommentCharDataTwo()
X, Y = getCommentDataOne()
genderPieData = getCommentDataTwo()
return render_template('commentChar.html',
username=username,
xData=xData,
yData=yData,
X=X,
Y=Y,
genderPieData=genderPieData)
@pb.route('/yuqingChar')
def yuqingChar():
username = session.get('username')
xData, yData, bieData = getYuQingCharDataOne()
bieData1, bieData2 = getYuQingCharDataTwo()
X, Y, finaldata = getYuQingCharDataOne()
finaldata1, finaldata2 = getYuQingCharDataTwo()
x1Data, y1Data = getYuQingCharDataThree()
return render_template('yuqingChar.html',
username=username,
xData=xData,
yData=yData,
bieData=bieData,
bieData1=bieData1,
bieData2=bieData2,
X=X,
Y=Y,
finaldata=finaldata,
finaldata1=finaldata1,
finaldata2=finaldata2,
x1Data=x1Data,
y1Data=y1Data)
... ...
... ... @@ -234,7 +234,7 @@
series: [
{
type: 'treemap',
data: {{ bieData | tojson }}
data: {{ finaldata | tojson }}
}
]
};
... ... @@ -272,7 +272,7 @@
labelLine: {
show: false
},
data: {{ bieData1 | tojson }}
data: {{ finaldata1 | tojson }}
},
{
name: '文章舆情结果',
... ... @@ -313,7 +313,7 @@
}
}
},
data: {{ bieData2 | tojson }}
data: {{ finaldata2 | tojson }}
}
]
};
... ...