Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
juanboy
2024-07-04 15:33:20 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
b3b3cff5050a17051175c4d2df09cbf77bd0e6aa
b3b3cff5
1 parent
c9225962
爬虫结束自动打标注
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
2 deletions
spider/saveData.py
utils/predict_demo1.py → utils/predict.py
utils/yuqingpredict.py
spider/saveData.py
View file @
b3b3cff
...
...
@@ -2,6 +2,7 @@ import os
from
sqlalchemy
import
create_engine
import
pandas
as
pd
from
spiderDataPackage.settings
import
articleAddr
,
commentsAddr
from
model.topicDefine
import
*
engine
=
create_engine
(
'mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4'
)
...
...
@@ -28,6 +29,7 @@ def saveData():
os
.
remove
(
articleAddr
)
os
.
remove
(
commentsAddr
)
update_data
()
if
__name__
==
'__main__'
:
saveData
()
\ No newline at end of file
...
...
utils/predict
_demo1
.py → utils/predict.py
View file @
b3b3cff
utils/yuqingpredict.py
View file @
b3b3cff
from
utils.getPublicData
import
*
from
utils.predict
import
*
articleList
=
getAllArticleData
()
commentList
=
getAllCommentsData
()
import
csv
...
...
@@ -53,7 +54,7 @@ def getTopicData():
yData
=
top_10_topics
[
'value'
]
.
tolist
()
return
xData
,
yData
def
getTopic
PageCreatedAtChar
Data
(
topic
):
# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
def
getTopic
CreatedAtandpredict
Data
(
topic
):
# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
createdAt
=
{}
for
i
in
articleList
:
if
i
[
14
]
==
topic
:
...
...
@@ -67,6 +68,10 @@ def getTopicPageCreatedAtCharData(topic):# 统计特定话题的评论在每个
createdAt
[
i
[
1
]]
+=
1
else
:
createdAt
[
i
[
1
]]
=
1
createdAt
=
{
k
:
createdAt
[
k
]
for
k
in
sorted
(
createdAt
,
key
=
lambda
date
:
datetime
.
datetime
.
strptime
(
date
,
"
%
Y-
%
m-
%
d"
))}
print
(
createdAt
)
createdAt
.
update
(
predict_future_values
(
createdAt
))
print
(
createdAt
)
sorted_data
=
{
k
:
createdAt
[
k
]
for
k
in
sorted
(
createdAt
,
key
=
lambda
date
:
datetime
.
datetime
.
strptime
(
date
,
"
%
Y-
%
m-
%
d"
))}
return
topic
,
sorted_data
# return topic,list(createdAt.keys()),list(createdAt.values())
...
...
@@ -90,4 +95,4 @@ if __name__ == '__main__':
# 将话题数据写入 CSV 文件
# merged_topics = mergeTopics(getTopicByArticle(), getTopicByComments())
# writeTopicsToCSV(merged_topics, 'merged_topics.csv')
print
(
getTopic
PageCreatedAtChar
Data
(
"生活"
))
print
(
getTopic
CreatedAtandpredict
Data
(
"生活"
))
...
...
Please
register
or
login
to post a comment