YYL469

修改爬虫bug

@@ -36,7 +36,7 @@ def run_spider_script(): @@ -36,7 +36,7 @@ def run_spider_script():
36 36
37 if __name__ == '__main__': 37 if __name__ == '__main__':
38 scheduler = BackgroundScheduler(timezone=utc) 38 scheduler = BackgroundScheduler(timezone=utc)
39 - scheduler.add_job(run_spider_script, 'interval', minutes=1) 39 + scheduler.add_job(run_spider_script, 'interval', hours=5)
40 scheduler.start() 40 scheduler.start()
41 41
42 try: 42 try:
1 from spiderData import spiderData 1 from spiderData import spiderData
2 -from saveData import save_to_sql as saveData 2 +from saveData import saveData
3 3
4 def main(): 4 def main():
5 print('正在爬取数据') 5 print('正在爬取数据')
@@ -4,7 +4,7 @@ import pandas as pd @@ -4,7 +4,7 @@ import pandas as pd
4 4
5 engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4') 5 engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4')
6 6
7 -def save_to_sql(): 7 +def saveData():
8 try: 8 try:
9 oldArticle = pd.read_sql('select * from article',engine) 9 oldArticle = pd.read_sql('select * from article',engine)
10 newArticle = pd.read_csv('article.csv') 10 newArticle = pd.read_csv('article.csv')
@@ -29,4 +29,4 @@ def save_to_sql(): @@ -29,4 +29,4 @@ def save_to_sql():
29 os.remove('./comments.csv') 29 os.remove('./comments.csv')
30 30
31 if __name__ == '__main__': 31 if __name__ == '__main__':
32 - save_to_sql()  
  32 + saveData()
1 from spiderDataPackage.spiderNav import start as spiderNav 1 from spiderDataPackage.spiderNav import start as spiderNav
2 from spiderDataPackage.spiderContent import start as spiderContent 2 from spiderDataPackage.spiderContent import start as spiderContent
3 from spiderDataPackage.spiderComments import start as spiderComments 3 from spiderDataPackage.spiderComments import start as spiderComments
  4 +from spiderDataPackage.settings import navAddr
4 import os 5 import os
5 6
6 def spiderData(): 7 def spiderData():
7 - if not os.path.exists('./nav.csv'): 8 + if not os.path.exists(navAddr):
8 print('正在爬取导航栏数据') 9 print('正在爬取导航栏数据')
9 spiderNav() 10 spiderNav()
10 print('正在爬取文章数据') 11 print('正在爬取文章数据')
@@ -3,7 +3,7 @@ import requests @@ -3,7 +3,7 @@ import requests
3 import csv 3 import csv
4 import os 4 import os
5 from datetime import datetime 5 from datetime import datetime
6 -from settings import articleAddr,commentsAddr 6 +from .settings import articleAddr,commentsAddr
7 7
8 def init(): 8 def init():
9 if not os.path.exists(commentsAddr): 9 if not os.path.exists(commentsAddr):
@@ -3,7 +3,7 @@ import requests @@ -3,7 +3,7 @@ import requests
3 import csv 3 import csv
4 import os 4 import os
5 from datetime import datetime 5 from datetime import datetime
6 -from settings import navAddr,articleAddr 6 +from .settings import navAddr,articleAddr
7 7
8 def init(): 8 def init():
9 if not os.path.exists(articleAddr): 9 if not os.path.exists(articleAddr):
@@ -2,7 +2,7 @@ import requests @@ -2,7 +2,7 @@ import requests
2 import csv 2 import csv
3 import numpy as np 3 import numpy as np
4 import os 4 import os
5 -from settings import navAddr 5 +from .settings import navAddr
6 def init(): 6 def init():
7 if not os.path.exists(navAddr): 7 if not os.path.exists(navAddr):
8 with open(navAddr,'w',encoding='utf-8',newline='') as csvFile: 8 with open(navAddr,'w',encoding='utf-8',newline='') as csvFile: