Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
YYL469
2024-07-04 13:19:34 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
b9558ad9439492f33070e75aa05751398475d884
b9558ad9
1 parent
579cb817
修改爬虫bug
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
9 additions
and
8 deletions
app.py
spider/main.py
spider/saveData.py
spider/spiderData.py
spider/spiderDataPackage/spiderComments.py
spider/spiderDataPackage/spiderContent.py
spider/spiderDataPackage/spiderNav.py
app.py
View file @
b9558ad
...
...
@@ -36,7 +36,7 @@ def run_spider_script():
if
__name__
==
'__main__'
:
scheduler
=
BackgroundScheduler
(
timezone
=
utc
)
scheduler
.
add_job
(
run_spider_script
,
'interval'
,
minutes
=
1
)
scheduler
.
add_job
(
run_spider_script
,
'interval'
,
hours
=
5
)
scheduler
.
start
()
try
:
...
...
spider/main.py
View file @
b9558ad
from
spiderData
import
spiderData
from
saveData
import
save
_to_sql
as
save
Data
from
saveData
import
saveData
def
main
():
print
(
'正在爬取数据'
)
...
...
spider/saveData.py
View file @
b9558ad
...
...
@@ -4,7 +4,7 @@ import pandas as pd
engine
=
create_engine
(
'mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4'
)
def
save
_to_sql
():
def
save
Data
():
try
:
oldArticle
=
pd
.
read_sql
(
'select * from article'
,
engine
)
newArticle
=
pd
.
read_csv
(
'article.csv'
)
...
...
@@ -29,4 +29,4 @@ def save_to_sql():
os
.
remove
(
'./comments.csv'
)
if
__name__
==
'__main__'
:
save_to_sql
()
\ No newline at end of file
saveData
()
\ No newline at end of file
...
...
spider/spiderData.py
View file @
b9558ad
from
spiderDataPackage.spiderNav
import
start
as
spiderNav
from
spiderDataPackage.spiderContent
import
start
as
spiderContent
from
spiderDataPackage.spiderComments
import
start
as
spiderComments
from
spiderDataPackage.settings
import
navAddr
import
os
def
spiderData
():
if
not
os
.
path
.
exists
(
'./nav.csv'
):
if
not
os
.
path
.
exists
(
navAddr
):
print
(
'正在爬取导航栏数据'
)
spiderNav
()
print
(
'正在爬取文章数据'
)
...
...
spider/spiderDataPackage/spiderComments.py
View file @
b9558ad
...
...
@@ -3,7 +3,7 @@ import requests
import
csv
import
os
from
datetime
import
datetime
from
settings
import
articleAddr
,
commentsAddr
from
.
settings
import
articleAddr
,
commentsAddr
def
init
():
if
not
os
.
path
.
exists
(
commentsAddr
):
...
...
spider/spiderDataPackage/spiderContent.py
View file @
b9558ad
...
...
@@ -3,7 +3,7 @@ import requests
import
csv
import
os
from
datetime
import
datetime
from
settings
import
navAddr
,
articleAddr
from
.
settings
import
navAddr
,
articleAddr
def
init
():
if
not
os
.
path
.
exists
(
articleAddr
):
...
...
spider/spiderDataPackage/spiderNav.py
View file @
b9558ad
...
...
@@ -2,7 +2,7 @@ import requests
import
csv
import
numpy
as
np
import
os
from
settings
import
navAddr
from
.
settings
import
navAddr
def
init
():
if
not
os
.
path
.
exists
(
navAddr
):
with
open
(
navAddr
,
'w'
,
encoding
=
'utf-8'
,
newline
=
''
)
as
csvFile
:
...
...
Please
register
or
login
to post a comment