Showing
7 changed files
with
9 additions
and
8 deletions
| @@ -36,7 +36,7 @@ def run_spider_script(): | @@ -36,7 +36,7 @@ def run_spider_script(): | ||
| 36 | 36 | ||
| 37 | if __name__ == '__main__': | 37 | if __name__ == '__main__': |
| 38 | scheduler = BackgroundScheduler(timezone=utc) | 38 | scheduler = BackgroundScheduler(timezone=utc) |
| 39 | - scheduler.add_job(run_spider_script, 'interval', minutes=1) | 39 | + scheduler.add_job(run_spider_script, 'interval', hours=5) |
| 40 | scheduler.start() | 40 | scheduler.start() |
| 41 | 41 | ||
| 42 | try: | 42 | try: |
| @@ -4,7 +4,7 @@ import pandas as pd | @@ -4,7 +4,7 @@ import pandas as pd | ||
| 4 | 4 | ||
| 5 | engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4') | 5 | engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4') |
| 6 | 6 | ||
| 7 | -def save_to_sql(): | 7 | +def saveData(): |
| 8 | try: | 8 | try: |
| 9 | oldArticle = pd.read_sql('select * from article',engine) | 9 | oldArticle = pd.read_sql('select * from article',engine) |
| 10 | newArticle = pd.read_csv('article.csv') | 10 | newArticle = pd.read_csv('article.csv') |
| @@ -29,4 +29,4 @@ def save_to_sql(): | @@ -29,4 +29,4 @@ def save_to_sql(): | ||
| 29 | os.remove('./comments.csv') | 29 | os.remove('./comments.csv') |
| 30 | 30 | ||
| 31 | if __name__ == '__main__': | 31 | if __name__ == '__main__': |
| 32 | - save_to_sql() | ||
| 32 | + saveData() |
| 1 | from spiderDataPackage.spiderNav import start as spiderNav | 1 | from spiderDataPackage.spiderNav import start as spiderNav |
| 2 | from spiderDataPackage.spiderContent import start as spiderContent | 2 | from spiderDataPackage.spiderContent import start as spiderContent |
| 3 | from spiderDataPackage.spiderComments import start as spiderComments | 3 | from spiderDataPackage.spiderComments import start as spiderComments |
| 4 | +from spiderDataPackage.settings import navAddr | ||
| 4 | import os | 5 | import os |
| 5 | 6 | ||
| 6 | def spiderData(): | 7 | def spiderData(): |
| 7 | - if not os.path.exists('./nav.csv'): | 8 | + if not os.path.exists(navAddr): |
| 8 | print('正在爬取导航栏数据') | 9 | print('正在爬取导航栏数据') |
| 9 | spiderNav() | 10 | spiderNav() |
| 10 | print('正在爬取文章数据') | 11 | print('正在爬取文章数据') |
| @@ -3,7 +3,7 @@ import requests | @@ -3,7 +3,7 @@ import requests | ||
| 3 | import csv | 3 | import csv |
| 4 | import os | 4 | import os |
| 5 | from datetime import datetime | 5 | from datetime import datetime |
| 6 | -from settings import articleAddr,commentsAddr | 6 | +from .settings import articleAddr,commentsAddr |
| 7 | 7 | ||
| 8 | def init(): | 8 | def init(): |
| 9 | if not os.path.exists(commentsAddr): | 9 | if not os.path.exists(commentsAddr): |
| @@ -3,7 +3,7 @@ import requests | @@ -3,7 +3,7 @@ import requests | ||
| 3 | import csv | 3 | import csv |
| 4 | import os | 4 | import os |
| 5 | from datetime import datetime | 5 | from datetime import datetime |
| 6 | -from settings import navAddr,articleAddr | 6 | +from .settings import navAddr,articleAddr |
| 7 | 7 | ||
| 8 | def init(): | 8 | def init(): |
| 9 | if not os.path.exists(articleAddr): | 9 | if not os.path.exists(articleAddr): |
| @@ -2,7 +2,7 @@ import requests | @@ -2,7 +2,7 @@ import requests | ||
| 2 | import csv | 2 | import csv |
| 3 | import numpy as np | 3 | import numpy as np |
| 4 | import os | 4 | import os |
| 5 | -from settings import navAddr | 5 | +from .settings import navAddr |
| 6 | def init(): | 6 | def init(): |
| 7 | if not os.path.exists(navAddr): | 7 | if not os.path.exists(navAddr): |
| 8 | with open(navAddr,'w',encoding='utf-8',newline='') as csvFile: | 8 | with open(navAddr,'w',encoding='utf-8',newline='') as csvFile: |
-
Please register or login to post a comment