戒酒的李白
1 -from spiderContent import start as spiderContentStart  
2 -from spiderComments import start as spiderCommentsStart 1 +from spiderData import spiderData
3 from saveData import save_to_sql as saveData 2 from saveData import save_to_sql as saveData
4 3
5 def main(): 4 def main():
6 - print('正在爬取文章数据')  
7 - spiderContentStart(1,1)  
8 - print('正在爬取文章评论数据')  
9 - spiderCommentsStart()  
10 - print('正在存储数据')  
11 - saveData() 5 + try:
  6 + spiderData()
  7 + saveData()
  8 + print("爬取数据更新")
  9 + except:
  10 + print("爬取数据失败")
12 11
13 if __name__ == '__main__': 12 if __name__ == '__main__':
14 main() 13 main()
1 -typeName,gid,containerid  
2 -热门,102803,102803  
3 -同城,1028032222,102803_2222  
4 -榜单,102803600169,102803_ctg1_600169_-_ctg1_600169  
5 -男篮,102803600279,102803_ctg1_600279_-_ctg1_600279  
6 -明星,1028034288,102803_ctg1_4288_-_ctg1_4288  
7 -车展,1028035188,102803_ctg1_5188_-_ctg1_5188  
8 -搞笑,1028034388,102803_ctg1_4388_-_ctg1_4388  
9 -情感,1028031988,102803_ctg1_1988_-_ctg1_1988  
10 -周末,102803600195,102803_ctg1_600195_-_ctg1_600195  
11 -电影,1028033288,102803_ctg1_3288_-_ctg1_3288  
12 -社会,1028034188,102803_ctg1_4188_-_ctg1_4188  
13 -电视剧,1028032488,102803_ctg1_2488_-_ctg1_2488  
14 -美食,1028032688,102803_ctg1_2688_-_ctg1_2688  
15 -俄乌局势,102803600267,102803_ctg1_600267_-_ctg1_600267  
16 -国际,1028036288,102803_ctg1_6288_-_ctg1_6288  
17 -深度,102803600155,102803_ctg1_600155_-_ctg1_600155  
18 -财经,1028036388,102803_ctg1_6388_-_ctg1_6388  
19 -读书,1028034588,102803_ctg1_4588_-_ctg1_4588  
20 -摄影,1028034988,102803_ctg1_4988_-_ctg1_4988  
21 -颜值,102803600165,102803_ctg1_600165_-_ctg1_600165  
22 -体育,1028031388,102803_ctg1_1388_-_ctg1_1388  
23 -数码,1028035088,102803_ctg1_5088_-_ctg1_5088  
24 -综艺,1028034688,102803_ctg1_4688_-_ctg1_4688  
25 -时尚,1028034488,102803_ctg1_4488_-_ctg1_4488  
26 -星座,1028031688,102803_ctg1_1688_-_ctg1_1688  
27 -军事,1028036688,102803_ctg1_6688_-_ctg1_6688  
28 -股市,1028031288,102803_ctg1_1288_-_ctg1_1288  
29 -房产,1028035588,102803_ctg1_5588_-_ctg1_5588  
30 -家居,1028035888,102803_ctg1_5888_-_ctg1_5888  
31 -萌宠,1028032788,102803_ctg1_2788_-_ctg1_2788  
32 -科技,1028032088,102803_ctg1_2088_-_ctg1_2088  
33 -科普,1028035988,102803_ctg1_5988_-_ctg1_5988  
34 -动漫,1028032388,102803_ctg1_2388_-_ctg1_2388  
35 -运动健身,1028034788,102803_ctg1_4788_-_ctg1_4788  
36 -旅游,1028032588,102803_ctg1_2588_-_ctg1_2588  
37 -瘦身,1028036488,102803_ctg1_6488_-_ctg1_6488  
38 -好物,102803600094,102803_ctg1_600094_-_ctg1_600094  
39 -历史,1028036788,102803_ctg1_6788_-_ctg1_6788  
40 -艺术,1028035488,102803_ctg1_5488_-_ctg1_5488  
41 -美妆,1028031588,102803_ctg1_1588_-_ctg1_1588  
42 -法律,1028037388,102803_ctg1_7388_-_ctg1_7388  
43 -设计,1028035388,102803_ctg1_5388_-_ctg1_5388  
44 -健康,1028032188,102803_ctg1_2188_-_ctg1_2188  
45 -音乐,1028035288,102803_ctg1_5288_-_ctg1_5288  
46 -游戏,1028034888,102803_ctg1_4888_-_ctg1_4888  
47 -新时代,1028037968,102803_ctg1_7968_-_ctg1_7968  
48 -校园,102803600177,102803_ctg1_600177_-_ctg1_600177  
49 -收藏,1028038189,102803_ctg1_8189_-_ctg1_8189  
50 -政务,1028035788,102803_ctg1_5788_-_ctg1_5788  
51 -养生,1028036588,102803_ctg1_6588_-_ctg1_6588  
52 -育儿,1028033188,102803_ctg1_3188_-_ctg1_3188  
53 -抽奖,102803600037,102803_ctg1_600037_-_ctg1_600037  
54 -教育,102803600080,102803_ctg1_600080_-_ctg1_600080  
55 -婚恋,1028031788,102803_ctg1_1788_-_ctg1_1788  
56 -舞蹈,1028038788,102803_ctg1_8788_-_ctg1_8788  
57 -辟谣,1028036988,102803_ctg1_6988_-_ctg1_6988  
58 -公益,102803600057,102803_ctg1_600057_-_ctg1_600057  
59 -问答,1028037977,102803_ctg1_7977_-_ctg1_7977  
60 -三农,1028037188,102803_ctg1_7188_-_ctg1_7188  
  1 +from spiderDataPack.spiderNav import start as spiderNavStart
  2 +from spiderDataPack.spiderContent import start as spiderContentStart
  3 +from spiderDataPack.spiderComments import start as spiderCommentsStart
  4 +import os
  5 +
  6 +def spiderData():
  7 + if not os.path.exists('./nav.csv'):
  8 + spiderNavStart()
  9 + spiderContentStart(1,1)
  10 + spiderCommentsStart()
  11 +
  12 +if __name__ == '__main__':
  13 + spiderData()
@@ -5,8 +5,8 @@ import os @@ -5,8 +5,8 @@ import os
5 from datetime import datetime 5 from datetime import datetime
6 6
7 def init(): 7 def init():
8 - if not os.path.exists('./articleComments.csv'):  
9 - with open('./articleComments.csv','w',encoding='utf-8',newline='') as csvFile: 8 + if not os.path.exists('./comments.csv'):
  9 + with open('./comments.csv','w',encoding='utf-8',newline='') as csvFile:
10 writer = csv.writer(csvFile) 10 writer = csv.writer(csvFile)
11 writer.writerow([ 11 writer.writerow([
12 'articleId', 12 'articleId',
@@ -21,7 +21,7 @@ def init(): @@ -21,7 +21,7 @@ def init():
21 ]) 21 ])
22 22
23 def writerRow(row): 23 def writerRow(row):
24 - with open('./articleComments.csv', 'a', encoding='utf-8', newline='') as csvFile: 24 + with open('./comments.csv', 'a', encoding='utf-8', newline='') as csvFile:
25 writer = csv.writer(csvFile) 25 writer = csv.writer(csvFile)
26 writer.writerow(row) 26 writer.writerow(row)
27 27
@@ -38,7 +38,7 @@ def get_data(url,params): @@ -38,7 +38,7 @@ def get_data(url,params):
38 38
39 def getAllArticleList(): 39 def getAllArticleList():
40 artileList = [] 40 artileList = []
41 - with open('./articleData.csv','r',encoding='utf-8') as reader: 41 + with open('./article.csv','r',encoding='utf-8') as reader:
42 readerCsv = csv.reader(reader) 42 readerCsv = csv.reader(reader)
43 next(reader) 43 next(reader)
44 for nav in readerCsv: 44 for nav in readerCsv:
@@ -5,8 +5,8 @@ import os @@ -5,8 +5,8 @@ import os
5 from datetime import datetime 5 from datetime import datetime
6 6
7 def init(): 7 def init():
8 - if not os.path.exists('./articleData.csv'):  
9 - with open('./articleData.csv','w',encoding='utf-8',newline='') as csvFile: 8 + if not os.path.exists('./article.csv'):
  9 + with open('./article.csv','w',encoding='utf-8',newline='') as csvFile:
10 writer = csv.writer(csvFile) 10 writer = csv.writer(csvFile)
11 writer.writerow([ 11 writer.writerow([
12 'id', 12 'id',
@@ -26,7 +26,7 @@ def init(): @@ -26,7 +26,7 @@ def init():
26 ]) 26 ])
27 27
28 def writerRow(row): 28 def writerRow(row):
29 - with open('./articleData.csv', 'a', encoding='utf-8', newline='') as csvFile: 29 + with open('./article.csv', 'a', encoding='utf-8', newline='') as csvFile:
30 writer = csv.writer(csvFile) 30 writer = csv.writer(csvFile)
31 writer.writerow(row) 31 writer.writerow(row)
32 32
@@ -43,7 +43,7 @@ def get_data(url,params): @@ -43,7 +43,7 @@ def get_data(url,params):
43 43
44 def getAllTypeList(): 44 def getAllTypeList():
45 typeList = [] 45 typeList = []
46 - with open('./navData.csv','r',encoding='utf-8') as reader: 46 + with open('./nav.csv','r',encoding='utf-8') as reader:
47 readerCsv = csv.reader(reader) 47 readerCsv = csv.reader(reader)
48 next(reader) 48 next(reader)
49 for nav in readerCsv: 49 for nav in readerCsv:
@@ -4,8 +4,8 @@ import numpy as np @@ -4,8 +4,8 @@ import numpy as np
4 import os 4 import os
5 5
6 def init(): 6 def init():
7 - if not os.path.exists('./navData.csv'):  
8 - with open('./navData.csv','w',encoding='utf-8',newline='') as csvFile: 7 + if not os.path.exists('./nav.csv'):
  8 + with open('./nav.csv','w',encoding='utf-8',newline='') as csvFile:
9 writer = csv.writer(csvFile) 9 writer = csv.writer(csvFile)
10 writer.writerow([ 10 writer.writerow([
11 'typeName', 11 'typeName',
@@ -14,7 +14,7 @@ def init(): @@ -14,7 +14,7 @@ def init():
14 ]) 14 ])
15 15
16 def writerRow(row): 16 def writerRow(row):
17 - with open('./navData.csv', 'a', encoding='utf-8', newline='') as csvFile: 17 + with open('./nav.csv', 'a', encoding='utf-8', newline='') as csvFile:
18 writer = csv.writer(csvFile) 18 writer = csv.writer(csvFile)
19 writer.writerow(row) 19 writer.writerow(row)
20 20
@@ -45,9 +45,11 @@ def parse_json(response): @@ -45,9 +45,11 @@ def parse_json(response):
45 containerid 45 containerid
46 ]) 46 ])
47 47
48 -  
49 -if __name__ == '__main__': 48 +def start():
50 init() 49 init()
51 url = 'https://weibo.com/ajax/feed/allGroups' 50 url = 'https://weibo.com/ajax/feed/allGroups'
52 response = get_data(url) 51 response = get_data(url)
53 - parse_json(response)  
  52 + parse_json(response)
  53 +
  54 +if __name__ == '__main__':
  55 + start()