Showing
8 changed files
with
42 additions
and
22 deletions
| @@ -36,7 +36,7 @@ def run_spider_script(): | @@ -36,7 +36,7 @@ def run_spider_script(): | ||
| 36 | 36 | ||
| 37 | if __name__ == '__main__': | 37 | if __name__ == '__main__': |
| 38 | scheduler = BackgroundScheduler(timezone=utc) | 38 | scheduler = BackgroundScheduler(timezone=utc) |
| 39 | - scheduler.add_job(run_spider_script, 'interval', hours=5) | 39 | + scheduler.add_job(run_spider_script, 'interval', minutes=1) |
| 40 | scheduler.start() | 40 | scheduler.start() |
| 41 | 41 | ||
| 42 | try: | 42 | try: |
| 1 | -from spiderContent import start as spiderContent | ||
| 2 | -from spiderComments import start as spiderComments | 1 | +from spiderData import spiderData |
| 3 | from saveData import save_to_sql as saveData | 2 | from saveData import save_to_sql as saveData |
| 4 | 3 | ||
| 5 | def main(): | 4 | def main(): |
| 6 | - print('正在爬取文章数据') | ||
| 7 | - spiderContent(1,1) | ||
| 8 | - print('正在爬取文章评论数据') | ||
| 9 | - spiderComments() | 5 | + print('正在爬取数据') |
| 6 | + spiderData() | ||
| 10 | print('正在存储数据') | 7 | print('正在存储数据') |
| 11 | saveData() | 8 | saveData() |
| 12 | print("爬取数据更新") | 9 | print("爬取数据更新") |
spider/spiderData.py
0 → 100644
| 1 | +from spiderDataPackage.spiderNav import start as spiderNav | ||
| 2 | +from spiderDataPackage.spiderContent import start as spiderContent | ||
| 3 | +from spiderDataPackage.spiderComments import start as spiderComments | ||
| 4 | +import os | ||
| 5 | + | ||
| 6 | +def spiderData(): | ||
| 7 | + if not os.path.exists('./nav.csv'): | ||
| 8 | + print('正在爬取导航栏数据') | ||
| 9 | + spiderNav() | ||
| 10 | + print('正在爬取文章数据') | ||
| 11 | + spiderContent(1,1) | ||
| 12 | + print('正在爬取文章评论数据') | ||
| 13 | + spiderComments() | ||
| 14 | + | ||
| 15 | +if __name__ == '__main__': | ||
| 16 | + spiderData() |
spider/spiderDataPackage/__init__.py
0 → 100644
spider/spiderDataPackage/settings.py
0 → 100644
| @@ -3,10 +3,11 @@ import requests | @@ -3,10 +3,11 @@ import requests | ||
| 3 | import csv | 3 | import csv |
| 4 | import os | 4 | import os |
| 5 | from datetime import datetime | 5 | from datetime import datetime |
| 6 | +from settings import articleAddr,commentsAddr | ||
| 6 | 7 | ||
| 7 | def init(): | 8 | def init(): |
| 8 | - if not os.path.exists('./comments.csv'): | ||
| 9 | - with open('./comments.csv','w',encoding='utf-8',newline='') as csvFile: | 9 | + if not os.path.exists(commentsAddr): |
| 10 | + with open(commentsAddr,'w',encoding='utf-8',newline='') as csvFile: | ||
| 10 | writer = csv.writer(csvFile) | 11 | writer = csv.writer(csvFile) |
| 11 | writer.writerow([ | 12 | writer.writerow([ |
| 12 | 'articleId', | 13 | 'articleId', |
| @@ -21,7 +22,7 @@ def init(): | @@ -21,7 +22,7 @@ def init(): | ||
| 21 | ]) | 22 | ]) |
| 22 | 23 | ||
| 23 | def write(row): | 24 | def write(row): |
| 24 | - with open('./comments.csv', 'a', encoding='utf-8', newline='') as csvFile: | 25 | + with open(commentsAddr, 'a', encoding='utf-8', newline='') as csvFile: |
| 25 | writer = csv.writer(csvFile) | 26 | writer = csv.writer(csvFile) |
| 26 | writer.writerow(row) | 27 | writer.writerow(row) |
| 27 | 28 | ||
| @@ -38,7 +39,7 @@ def fetchData(url,params): | @@ -38,7 +39,7 @@ def fetchData(url,params): | ||
| 38 | 39 | ||
| 39 | def getArticleList(): | 40 | def getArticleList(): |
| 40 | articleList = [] | 41 | articleList = [] |
| 41 | - with open('./article.csv','r',encoding='utf-8') as reader: | 42 | + with open(articleAddr,'r',encoding='utf-8') as reader: |
| 42 | readerCsv = csv.reader(reader) | 43 | readerCsv = csv.reader(reader) |
| 43 | next(reader) | 44 | next(reader) |
| 44 | for nav in readerCsv: | 45 | for nav in readerCsv: |
| @@ -3,10 +3,11 @@ import requests | @@ -3,10 +3,11 @@ import requests | ||
| 3 | import csv | 3 | import csv |
| 4 | import os | 4 | import os |
| 5 | from datetime import datetime | 5 | from datetime import datetime |
| 6 | +from settings import navAddr,articleAddr | ||
| 6 | 7 | ||
| 7 | def init(): | 8 | def init(): |
| 8 | - if not os.path.exists('./article.csv'): | ||
| 9 | - with open('./article.csv','w',encoding='utf-8',newline='') as csvFile: | 9 | + if not os.path.exists(articleAddr): |
| 10 | + with open(articleAddr,'w',encoding='utf-8',newline='') as csvFile: | ||
| 10 | writer = csv.writer(csvFile) | 11 | writer = csv.writer(csvFile) |
| 11 | writer.writerow([ | 12 | writer.writerow([ |
| 12 | 'id', | 13 | 'id', |
| @@ -26,7 +27,7 @@ def init(): | @@ -26,7 +27,7 @@ def init(): | ||
| 26 | ]) | 27 | ]) |
| 27 | 28 | ||
| 28 | def write(row): | 29 | def write(row): |
| 29 | - with open('./article.csv', 'a', encoding='utf-8', newline='') as csvFile: | 30 | + with open(articleAddr, 'a', encoding='utf-8', newline='') as csvFile: |
| 30 | writer = csv.writer(csvFile) | 31 | writer = csv.writer(csvFile) |
| 31 | writer.writerow(row) | 32 | writer.writerow(row) |
| 32 | 33 | ||
| @@ -43,7 +44,7 @@ def fetchData(url,params): | @@ -43,7 +44,7 @@ def fetchData(url,params): | ||
| 43 | 44 | ||
| 44 | def getTypeList(): | 45 | def getTypeList(): |
| 45 | typeList = [] | 46 | typeList = [] |
| 46 | - with open('./nav.csv','r',encoding='utf-8') as reader: | 47 | + with open(navAddr,'r',encoding='utf-8') as reader: |
| 47 | readerCsv = csv.reader(reader) | 48 | readerCsv = csv.reader(reader) |
| 48 | next(reader) | 49 | next(reader) |
| 49 | for nav in readerCsv: | 50 | for nav in readerCsv: |
| @@ -2,10 +2,10 @@ import requests | @@ -2,10 +2,10 @@ import requests | ||
| 2 | import csv | 2 | import csv |
| 3 | import numpy as np | 3 | import numpy as np |
| 4 | import os | 4 | import os |
| 5 | - | 5 | +from settings import navAddr |
| 6 | def init(): | 6 | def init(): |
| 7 | - if not os.path.exists('./nav.csv'): | ||
| 8 | - with open('./nav.csv','w',encoding='utf-8',newline='') as csvFile: | 7 | + if not os.path.exists(navAddr): |
| 8 | + with open(navAddr,'w',encoding='utf-8',newline='') as csvFile: | ||
| 9 | writer = csv.writer(csvFile) | 9 | writer = csv.writer(csvFile) |
| 10 | writer.writerow([ | 10 | writer.writerow([ |
| 11 | 'typeName', | 11 | 'typeName', |
| @@ -14,7 +14,7 @@ def init(): | @@ -14,7 +14,7 @@ def init(): | ||
| 14 | ]) | 14 | ]) |
| 15 | 15 | ||
| 16 | def write(row): | 16 | def write(row): |
| 17 | - with open('./nav.csv', 'a', encoding='utf-8', newline='') as csvFile: | 17 | + with open(navAddr, 'a', encoding='utf-8', newline='') as csvFile: |
| 18 | writer = csv.writer(csvFile) | 18 | writer = csv.writer(csvFile) |
| 19 | writer.writerow(row) | 19 | writer.writerow(row) |
| 20 | 20 | ||
| @@ -45,9 +45,11 @@ def readJson(response): | @@ -45,9 +45,11 @@ def readJson(response): | ||
| 45 | containerid | 45 | containerid |
| 46 | ]) | 46 | ]) |
| 47 | 47 | ||
| 48 | - | ||
| 49 | -if __name__ == '__main__': | 48 | +def start(): |
| 50 | init() | 49 | init() |
| 51 | url = 'https://weibo.com/ajax/feed/allGroups' | 50 | url = 'https://weibo.com/ajax/feed/allGroups' |
| 52 | response = fetchData(url) | 51 | response = fetchData(url) |
| 53 | - readJson(response) | ||
| 52 | + readJson(response) | ||
| 53 | + | ||
| 54 | +if __name__ == '__main__': | ||
| 55 | + start() |
-
Please register or login to post a comment