YYL469

修改爬虫bug

... ... @@ -36,7 +36,7 @@ def run_spider_script():
if __name__ == '__main__':
scheduler = BackgroundScheduler(timezone=utc)
scheduler.add_job(run_spider_script, 'interval', minutes=1)
scheduler.add_job(run_spider_script, 'interval', hours=5)
scheduler.start()
try:
... ...
from spiderData import spiderData
from saveData import save_to_sql as saveData
from saveData import saveData
def main():
print('正在爬取数据')
... ...
... ... @@ -4,7 +4,7 @@ import pandas as pd
engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4')
def save_to_sql():
def saveData():
try:
oldArticle = pd.read_sql('select * from article',engine)
newArticle = pd.read_csv('article.csv')
... ... @@ -29,4 +29,4 @@ def save_to_sql():
os.remove('./comments.csv')
if __name__ == '__main__':
save_to_sql()
\ No newline at end of file
saveData()
\ No newline at end of file
... ...
from spiderDataPackage.spiderNav import start as spiderNav
from spiderDataPackage.spiderContent import start as spiderContent
from spiderDataPackage.spiderComments import start as spiderComments
from spiderDataPackage.settings import navAddr
import os
def spiderData():
if not os.path.exists('./nav.csv'):
if not os.path.exists(navAddr):
print('正在爬取导航栏数据')
spiderNav()
print('正在爬取文章数据')
... ...
... ... @@ -3,7 +3,7 @@ import requests
import csv
import os
from datetime import datetime
from settings import articleAddr,commentsAddr
from .settings import articleAddr,commentsAddr
def init():
if not os.path.exists(commentsAddr):
... ...
... ... @@ -3,7 +3,7 @@ import requests
import csv
import os
from datetime import datetime
from settings import navAddr,articleAddr
from .settings import navAddr,articleAddr
def init():
if not os.path.exists(articleAddr):
... ...
... ... @@ -2,7 +2,7 @@ import requests
import csv
import numpy as np
import os
from settings import navAddr
from .settings import navAddr
def init():
if not os.path.exists(navAddr):
with open(navAddr,'w',encoding='utf-8',newline='') as csvFile:
... ...