Merge branch 'main' of https://github.com/666ghj/Weibo_PublicOpinion_AnalysisSystem

戒酒的李白
Commit 510f09af593845178b4f9f14c8d5901337e8609c 510f09af 2 parents 261dfa46 b3f0ea0c
Showing 11 changed files with 36 additions and 82 deletions
spider/main.py
spider/navData.csv
spider/spiderData.py
spider/spiderDataPack/__init__.py
spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc
spider/spiderDataPack/__pycache__/spiderComments.cpython-38.pyc
spider/spiderDataPack/__pycache__/spiderContent.cpython-38.pyc
spider/spiderDataPack/__pycache__/spiderNav.cpython-38.pyc
spider/spiderComments.py → spider/spiderDataPack/spiderComments.py
spider/spiderContent.py → spider/spiderDataPack/spiderContent.py
spider/spiderNav.py → spider/spiderDataPack/spiderNav.py
--- a/spider/main.py
View file @510f09a
+++ b/spider/main.py
View file @510f09a
- from spiderContent import start as spiderContentStart
- from spiderComments import start as spiderCommentsStart
+ from spiderData import spiderData 
 from saveData import save_to_sql as saveData
 
 def main():
-     print('正在爬取文章数据')
-     spiderContentStart(1,1)
-     print('正在爬取文章评论数据')
-     spiderCommentsStart()
-     print('正在存储数据')
-     saveData()
+     try:
+         spiderData()
+         saveData()
+         print("爬取数据更新")
+     except:
+         print("爬取数据失败")
 
 if __name__ == '__main__':
     main()
\ No newline at end of file
--- a/spider/navData.csv deleted 100644 → 0
View file @261dfa4
+++ b/spider/navData.csv deleted 100644 → 0
View file @261dfa4
- typeName,gid,containerid
- 热门,102803,102803
- 同城,1028032222,102803_2222
- 榜单,102803600169,102803_ctg1_600169_-_ctg1_600169
- 男篮,102803600279,102803_ctg1_600279_-_ctg1_600279
- 明星,1028034288,102803_ctg1_4288_-_ctg1_4288
- 车展,1028035188,102803_ctg1_5188_-_ctg1_5188
- 搞笑,1028034388,102803_ctg1_4388_-_ctg1_4388
- 情感,1028031988,102803_ctg1_1988_-_ctg1_1988
- 周末,102803600195,102803_ctg1_600195_-_ctg1_600195
- 电影,1028033288,102803_ctg1_3288_-_ctg1_3288
- 社会,1028034188,102803_ctg1_4188_-_ctg1_4188
- 电视剧,1028032488,102803_ctg1_2488_-_ctg1_2488
- 美食,1028032688,102803_ctg1_2688_-_ctg1_2688
- 俄乌局势,102803600267,102803_ctg1_600267_-_ctg1_600267
- 国际,1028036288,102803_ctg1_6288_-_ctg1_6288
- 深度,102803600155,102803_ctg1_600155_-_ctg1_600155
- 财经,1028036388,102803_ctg1_6388_-_ctg1_6388
- 读书,1028034588,102803_ctg1_4588_-_ctg1_4588
- 摄影,1028034988,102803_ctg1_4988_-_ctg1_4988
- 颜值,102803600165,102803_ctg1_600165_-_ctg1_600165
- 体育,1028031388,102803_ctg1_1388_-_ctg1_1388
- 数码,1028035088,102803_ctg1_5088_-_ctg1_5088
- 综艺,1028034688,102803_ctg1_4688_-_ctg1_4688
- 时尚,1028034488,102803_ctg1_4488_-_ctg1_4488
- 星座,1028031688,102803_ctg1_1688_-_ctg1_1688
- 军事,1028036688,102803_ctg1_6688_-_ctg1_6688
- 股市,1028031288,102803_ctg1_1288_-_ctg1_1288
- 房产,1028035588,102803_ctg1_5588_-_ctg1_5588
- 家居,1028035888,102803_ctg1_5888_-_ctg1_5888
- 萌宠,1028032788,102803_ctg1_2788_-_ctg1_2788
- 科技,1028032088,102803_ctg1_2088_-_ctg1_2088
- 科普,1028035988,102803_ctg1_5988_-_ctg1_5988
- 动漫,1028032388,102803_ctg1_2388_-_ctg1_2388
- 运动健身,1028034788,102803_ctg1_4788_-_ctg1_4788
- 旅游,1028032588,102803_ctg1_2588_-_ctg1_2588
- 瘦身,1028036488,102803_ctg1_6488_-_ctg1_6488
- 好物,102803600094,102803_ctg1_600094_-_ctg1_600094
- 历史,1028036788,102803_ctg1_6788_-_ctg1_6788
- 艺术,1028035488,102803_ctg1_5488_-_ctg1_5488
- 美妆,1028031588,102803_ctg1_1588_-_ctg1_1588
- 法律,1028037388,102803_ctg1_7388_-_ctg1_7388
- 设计,1028035388,102803_ctg1_5388_-_ctg1_5388
- 健康,1028032188,102803_ctg1_2188_-_ctg1_2188
- 音乐,1028035288,102803_ctg1_5288_-_ctg1_5288
- 游戏,1028034888,102803_ctg1_4888_-_ctg1_4888
- 新时代,1028037968,102803_ctg1_7968_-_ctg1_7968
- 校园,102803600177,102803_ctg1_600177_-_ctg1_600177
- 收藏,1028038189,102803_ctg1_8189_-_ctg1_8189
- 政务,1028035788,102803_ctg1_5788_-_ctg1_5788
- 养生,1028036588,102803_ctg1_6588_-_ctg1_6588
- 育儿,1028033188,102803_ctg1_3188_-_ctg1_3188
- 抽奖,102803600037,102803_ctg1_600037_-_ctg1_600037
- 教育,102803600080,102803_ctg1_600080_-_ctg1_600080
- 婚恋,1028031788,102803_ctg1_1788_-_ctg1_1788
- 舞蹈,1028038788,102803_ctg1_8788_-_ctg1_8788
- 辟谣,1028036988,102803_ctg1_6988_-_ctg1_6988
- 公益,102803600057,102803_ctg1_600057_-_ctg1_600057
- 问答,1028037977,102803_ctg1_7977_-_ctg1_7977
- 三农,1028037188,102803_ctg1_7188_-_ctg1_7188
--- a/spider/spiderData.py 0 → 100644
View file @510f09a
+++ b/spider/spiderData.py 0 → 100644
View file @510f09a
+ from spiderDataPack.spiderNav import start as spiderNavStart
+ from spiderDataPack.spiderContent import start as spiderContentStart
+ from spiderDataPack.spiderComments import start as spiderCommentsStart
+ import os
+ 
+ def spiderData():
+     if not os.path.exists('./nav.csv'):
+         spiderNavStart()
+     spiderContentStart(1,1)
+     spiderCommentsStart()
+ 
+ if __name__ == '__main__':
+     spiderData()
\ No newline at end of file
--- a/spider/spiderDataPack/__init__.py 0 → 100644
View file @510f09a
+++ b/spider/spiderDataPack/__init__.py 0 → 100644
View file @510f09a
--- a/spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc 0 → 100644
View file @510f09a
+++ b/spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc 0 → 100644
View file @510f09a
--- a/spider/spiderDataPack/__pycache__/spiderComments.cpython-38.pyc 0 → 100644
View file @510f09a
+++ b/spider/spiderDataPack/__pycache__/spiderComments.cpython-38.pyc 0 → 100644
View file @510f09a
--- a/spider/spiderDataPack/__pycache__/spiderContent.cpython-38.pyc 0 → 100644
View file @510f09a
+++ b/spider/spiderDataPack/__pycache__/spiderContent.cpython-38.pyc 0 → 100644
View file @510f09a
--- a/spider/spiderDataPack/__pycache__/spiderNav.cpython-38.pyc 0 → 100644
View file @510f09a
+++ b/spider/spiderDataPack/__pycache__/spiderNav.cpython-38.pyc 0 → 100644
View file @510f09a
--- a/spider/spiderComments.py → spider/spiderDataPack/spiderComments.py
View file @510f09a
+++ b/spider/spiderComments.py → spider/spiderDataPack/spiderComments.py
View file @510f09a
@@ -5,8 +5,8 @@ import os
 from datetime import datetime
 
 def init():
-     if not os.path.exists('./articleComments.csv'):
-         with open('./articleComments.csv','w',encoding='utf-8',newline='') as csvFile:
+     if not os.path.exists('./comments.csv'):
+         with open('./comments.csv','w',encoding='utf-8',newline='') as csvFile:
             writer = csv.writer(csvFile)
             writer.writerow([
                 'articleId',
@@ -21,7 +21,7 @@ def init():
             ])
 
 def writerRow(row):
-     with open('./articleComments.csv', 'a', encoding='utf-8', newline='') as csvFile:
+     with open('./comments.csv', 'a', encoding='utf-8', newline='') as csvFile:
         writer = csv.writer(csvFile)
         writer.writerow(row)
 
@@ -38,7 +38,7 @@ def get_data(url,params):
 
 def getAllArticleList():
     artileList = []
-     with open('./articleData.csv','r',encoding='utf-8') as reader:
+     with open('./article.csv','r',encoding='utf-8') as reader:
         readerCsv = csv.reader(reader)
         next(reader)
         for nav in readerCsv:
--- a/spider/spiderContent.py → spider/spiderDataPack/spiderContent.py
View file @510f09a
+++ b/spider/spiderContent.py → spider/spiderDataPack/spiderContent.py
View file @510f09a
@@ -5,8 +5,8 @@ import os
 from datetime import datetime
 
 def init():
-     if not os.path.exists('./articleData.csv'):
-         with open('./articleData.csv','w',encoding='utf-8',newline='') as csvFile:
+     if not os.path.exists('./article.csv'):
+         with open('./article.csv','w',encoding='utf-8',newline='') as csvFile:
             writer = csv.writer(csvFile)
             writer.writerow([
                 'id',
@@ -26,7 +26,7 @@ def init():
             ])
 
 def writerRow(row):
-     with open('./articleData.csv', 'a', encoding='utf-8', newline='') as csvFile:
+     with open('./article.csv', 'a', encoding='utf-8', newline='') as csvFile:
         writer = csv.writer(csvFile)
         writer.writerow(row)
 
@@ -43,7 +43,7 @@ def get_data(url,params):
 
 def getAllTypeList():
     typeList = []
-     with open('./navData.csv','r',encoding='utf-8') as reader:
+     with open('./nav.csv','r',encoding='utf-8') as reader:
         readerCsv = csv.reader(reader)
         next(reader)
         for nav in readerCsv:
--- a/spider/spiderNav.py → spider/spiderDataPack/spiderNav.py
View file @510f09a
+++ b/spider/spiderNav.py → spider/spiderDataPack/spiderNav.py
View file @510f09a
@@ -4,8 +4,8 @@ import numpy as np
 import os
 
 def init():
-     if not os.path.exists('./navData.csv'):
-         with open('./navData.csv','w',encoding='utf-8',newline='') as csvFile:
+     if not os.path.exists('./nav.csv'):
+         with open('./nav.csv','w',encoding='utf-8',newline='') as csvFile:
             writer = csv.writer(csvFile)
             writer.writerow([
                 'typeName',
@@ -14,7 +14,7 @@ def init():
             ])
 
 def writerRow(row):
-     with open('./navData.csv', 'a', encoding='utf-8', newline='') as csvFile:
+     with open('./nav.csv', 'a', encoding='utf-8', newline='') as csvFile:
         writer = csv.writer(csvFile)
         writer.writerow(row)
 
@@ -45,9 +45,11 @@ def parse_json(response):
             containerid
         ])
 
- 
- if __name__ == '__main__':
+ def start():
     init()
     url = 'https://weibo.com/ajax/feed/allGroups'
     response = get_data(url)
-     parse_json(response)
\ No newline at end of file
+     parse_json(response)
+ 
+ if __name__ == '__main__':
+     start()
\ No newline at end of file