Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
YYL469
2024-07-04 13:25:09 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
a491308f89ad996e2044b32e3141efd420453420
a491308f
1 parent
b9558ad9
优化爬虫代码
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
6 deletions
spider/saveData.py
spider/saveData.py
View file @
a491308
import
os
from
sqlalchemy
import
create_engine
import
pandas
as
pd
from
spiderDataPackage.settings
import
articleAddr
,
commentsAddr
engine
=
create_engine
(
'mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4'
)
def
saveData
():
try
:
oldArticle
=
pd
.
read_sql
(
'select * from article'
,
engine
)
newArticle
=
pd
.
read_csv
(
'article.csv'
)
newArticle
=
pd
.
read_csv
(
articleAddr
)
oldComment
=
pd
.
read_sql
(
'select * from comments'
,
engine
)
newComment
=
pd
.
read_csv
(
'comments.csv'
)
newComment
=
pd
.
read_csv
(
commentsAddr
)
mergeArticle
=
pd
.
concat
([
newArticle
,
oldArticle
],
join
=
'inner'
)
mergeComment
=
pd
.
concat
([
newComment
,
oldComment
],
join
=
'inner'
)
...
...
@@ -20,13 +21,13 @@ def saveData():
mergeArticle
.
to_sql
(
'article'
,
con
=
engine
,
if_exists
=
'replace'
,
index
=
False
)
mergeComment
.
to_sql
(
'comments'
,
con
=
engine
,
if_exists
=
'replace'
,
index
=
False
)
except
:
newArticle
=
pd
.
read_csv
(
'article.csv'
)
newComment
=
pd
.
read_csv
(
'comments.csv'
)
newArticle
=
pd
.
read_csv
(
articleAddr
)
newComment
=
pd
.
read_csv
(
commentsAddr
)
newArticle
.
to_sql
(
'article'
,
con
=
engine
,
if_exists
=
'replace'
,
index
=
False
)
newComment
.
to_sql
(
'comments'
,
con
=
engine
,
if_exists
=
'replace'
,
index
=
False
)
os
.
remove
(
'./article.csv'
)
os
.
remove
(
'./comments.csv'
)
os
.
remove
(
articleAddr
)
os
.
remove
(
commentsAddr
)
if
__name__
==
'__main__'
:
saveData
()
\ No newline at end of file
...
...
Please
register
or
login
to post a comment