Showing
7 changed files
with
61 additions
and
45 deletions
| 1 | from flask import Flask,session,request,redirect,render_template | 1 | from flask import Flask,session,request,redirect,render_template |
| 2 | import re | 2 | import re |
| 3 | +from apscheduler.schedulers.background import BackgroundScheduler | ||
| 4 | +import subprocess | ||
| 5 | +import os | ||
| 6 | +from pytz import utc | ||
| 7 | + | ||
| 3 | app = Flask(__name__) | 8 | app = Flask(__name__) |
| 4 | app.secret_key = 'this is secret_key you know ?' | 9 | app.secret_key = 'this is secret_key you know ?' |
| 5 | 10 | ||
| @@ -24,5 +29,17 @@ def before_reuqest(): | @@ -24,5 +29,17 @@ def before_reuqest(): | ||
| 24 | def catch_all(path): | 29 | def catch_all(path): |
| 25 | return render_template('404.html') | 30 | return render_template('404.html') |
| 26 | 31 | ||
| 32 | +def run_spider_script(): | ||
| 33 | + current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
| 34 | + spider_script = os.path.join(current_dir, 'spider', 'main.py') | ||
| 35 | + subprocess.run(['python', spider_script]) | ||
| 36 | + | ||
| 27 | if __name__ == '__main__': | 37 | if __name__ == '__main__': |
| 28 | - app.run() | 38 | + scheduler = BackgroundScheduler(timezone=utc) |
| 39 | + scheduler.add_job(run_spider_script, 'interval', hours=5) | ||
| 40 | + scheduler.start() | ||
| 41 | + | ||
| 42 | + try: | ||
| 43 | + app.run() | ||
| 44 | + finally: | ||
| 45 | + scheduler.shutdown() |
spider/article.csv
deleted
100644 → 0
| 1 | -id,likeNum,commentsLen,reposts_count,region,content,contentLen,created_at,type,detailUrl,authorAvatar,authorName,authorDetail,isVip |
| 1 | -from spiderContent import start as spiderContentStart | ||
| 2 | -from spiderComments import start as spiderCommentsStart | 1 | +from spiderContent import start as spiderContent |
| 2 | +from spiderComments import start as spiderComments | ||
| 3 | from saveData import save_to_sql as saveData | 3 | from saveData import save_to_sql as saveData |
| 4 | 4 | ||
| 5 | def main(): | 5 | def main(): |
| 6 | print('正在爬取文章数据') | 6 | print('正在爬取文章数据') |
| 7 | - spiderContentStart(1,1) | 7 | + spiderContent(1,1) |
| 8 | print('正在爬取文章评论数据') | 8 | print('正在爬取文章评论数据') |
| 9 | - spiderCommentsStart() | 9 | + spiderComments() |
| 10 | print('正在存储数据') | 10 | print('正在存储数据') |
| 11 | saveData() | 11 | saveData() |
| 12 | print("爬取数据更新") | 12 | print("爬取数据更新") |
| @@ -6,24 +6,24 @@ engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@10.92.35.13/Weibo_Pu | @@ -6,24 +6,24 @@ engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@10.92.35.13/Weibo_Pu | ||
| 6 | 6 | ||
| 7 | def save_to_sql(): | 7 | def save_to_sql(): |
| 8 | try: | 8 | try: |
| 9 | - artileOldPd = pd.read_sql('select * from article',engine) | ||
| 10 | - articleNewPd = pd.read_csv('article.csv') | ||
| 11 | - commentOldPd = pd.read_sql('select * from comments',engine) | ||
| 12 | - commentNewPd = pd.read_csv('comments.csv') | 9 | + oldArticle = pd.read_sql('select * from article',engine) |
| 10 | + newArticle = pd.read_csv('article.csv') | ||
| 11 | + oldComment = pd.read_sql('select * from comments',engine) | ||
| 12 | + newComment = pd.read_csv('comments.csv') | ||
| 13 | 13 | ||
| 14 | - concatArticlePd = pd.concat([articleNewPd,artileOldPd],join='inner') | ||
| 15 | - concatCommentsPd = pd.concat([commentNewPd,commentOldPd],join='inner') | 14 | + mergeArticle = pd.concat([newArticle,oldArticle],join='inner') |
| 15 | + mergeComment = pd.concat([newComment,oldComment],join='inner') | ||
| 16 | 16 | ||
| 17 | - concatArticlePd.drop_duplicates(subset='id',keep='last',inplace=True) | ||
| 18 | - concatCommentsPd.drop_duplicates(subset='content',keep='last',inplace=True) | 17 | + mergeArticle.drop_duplicates(subset='id',keep='last',inplace=True) |
| 18 | + mergeComment.drop_duplicates(subset='content',keep='last',inplace=True) | ||
| 19 | 19 | ||
| 20 | - concatArticlePd.to_sql('article', con=engine, if_exists='replace', index=False) | ||
| 21 | - concatCommentsPd.to_sql('comments', con=engine, if_exists='replace', index=False) | 20 | + mergeArticle.to_sql('article', con=engine, if_exists='replace', index=False) |
| 21 | + mergeComment.to_sql('comments', con=engine, if_exists='replace', index=False) | ||
| 22 | except: | 22 | except: |
| 23 | - articleNewPd = pd.read_csv('article.csv') | ||
| 24 | - commentNewPd = pd.read_csv('comments.csv') | ||
| 25 | - articleNewPd.to_sql('article',con=engine,if_exists='replace',index=False) | ||
| 26 | - commentNewPd.to_sql('comments',con=engine,if_exists='replace',index=False) | 23 | + newArticle = pd.read_csv('article.csv') |
| 24 | + newComment = pd.read_csv('comments.csv') | ||
| 25 | + newArticle.to_sql('article',con=engine,if_exists='replace',index=False) | ||
| 26 | + newComment.to_sql('comments',con=engine,if_exists='replace',index=False) | ||
| 27 | 27 | ||
| 28 | os.remove('./article.csv') | 28 | os.remove('./article.csv') |
| 29 | os.remove('./comments.csv') | 29 | os.remove('./comments.csv') |
| @@ -20,12 +20,12 @@ def init(): | @@ -20,12 +20,12 @@ def init(): | ||
| 20 | 'authorAvatar' | 20 | 'authorAvatar' |
| 21 | ]) | 21 | ]) |
| 22 | 22 | ||
| 23 | -def writerRow(row): | 23 | +def write(row): |
| 24 | with open('./comments.csv', 'a', encoding='utf-8', newline='') as csvFile: | 24 | with open('./comments.csv', 'a', encoding='utf-8', newline='') as csvFile: |
| 25 | writer = csv.writer(csvFile) | 25 | writer = csv.writer(csvFile) |
| 26 | writer.writerow(row) | 26 | writer.writerow(row) |
| 27 | 27 | ||
| 28 | -def get_data(url,params): | 28 | +def fetchData(url,params): |
| 29 | headers = { | 29 | headers = { |
| 30 | 'Cookie':'SINAGLOBAL=2555941826014.1074.1676801766625; ULV=1719829459275:6:1:2:4660996305989.918.1719827559898:1719743122299; UOR=,,www.baidu.com; XSRF-TOKEN=VtLXviYSIs8lor7sz4iGyigL; SUB=_2A25LhvU9DeRhGeFH6FIX-S3MyD2IHXVo-gj1rDV8PUJbkNAGLRXMkW1Ne2nhI3Gle25QJK0Z99J3trq_NZn6YKJ-; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW3Mv8V5EupQbbKh.vaZIwU5JpX5KzhUgL.FoM4e05c1Ke7e022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM41hz41hqReKqN; WBPSESS=Dt2hbAUaXfkVprjyrAZT_LRaDLsnxG-kIbeYwnBb5OUKZiwfVr_UrcYfWuqG-4ZVDM5HeU3HXkDNK_thfRfdS9Ao6ezT30jDksv-CpaVmlTAqGUHjJ7PYkH5aCK4HLxmRq14ZalmQNwzfWMPa4y0VNRLuYdg7L1s49ymNq_5v5vusoz0r4ki6u-MHGraF0fbUTgX14x0kHayEwOoxfLI-w==; SCF=AqmJWo31oFV5itnRgWNU1-wHQTL6PmkBLf3gDuqpdqAIfaWguDTMre6Oxjf5Uzs74JAh2r0DdV1sJ1g6m-wJ5NQ.; _s_tentry=-; Apache=4660996305989.918.1719827559898; PC_TOKEN=7955a7ab1f; appkey=; geetest_token=602cd4e3a7ed1898808f8adfe1a2048b; ALF=1722421868', | 30 | 'Cookie':'SINAGLOBAL=2555941826014.1074.1676801766625; ULV=1719829459275:6:1:2:4660996305989.918.1719827559898:1719743122299; UOR=,,www.baidu.com; XSRF-TOKEN=VtLXviYSIs8lor7sz4iGyigL; SUB=_2A25LhvU9DeRhGeFH6FIX-S3MyD2IHXVo-gj1rDV8PUJbkNAGLRXMkW1Ne2nhI3Gle25QJK0Z99J3trq_NZn6YKJ-; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW3Mv8V5EupQbbKh.vaZIwU5JpX5KzhUgL.FoM4e05c1Ke7e022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM41hz41hqReKqN; WBPSESS=Dt2hbAUaXfkVprjyrAZT_LRaDLsnxG-kIbeYwnBb5OUKZiwfVr_UrcYfWuqG-4ZVDM5HeU3HXkDNK_thfRfdS9Ao6ezT30jDksv-CpaVmlTAqGUHjJ7PYkH5aCK4HLxmRq14ZalmQNwzfWMPa4y0VNRLuYdg7L1s49ymNq_5v5vusoz0r4ki6u-MHGraF0fbUTgX14x0kHayEwOoxfLI-w==; SCF=AqmJWo31oFV5itnRgWNU1-wHQTL6PmkBLf3gDuqpdqAIfaWguDTMre6Oxjf5Uzs74JAh2r0DdV1sJ1g6m-wJ5NQ.; _s_tentry=-; Apache=4660996305989.918.1719827559898; PC_TOKEN=7955a7ab1f; appkey=; geetest_token=602cd4e3a7ed1898808f8adfe1a2048b; ALF=1722421868', |
| 31 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0' | 31 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0' |
| @@ -36,16 +36,16 @@ def get_data(url,params): | @@ -36,16 +36,16 @@ def get_data(url,params): | ||
| 36 | else: | 36 | else: |
| 37 | return None | 37 | return None |
| 38 | 38 | ||
| 39 | -def getAllArticleList(): | ||
| 40 | - artileList = [] | 39 | +def getArticleList(): |
| 40 | + articleList = [] | ||
| 41 | with open('./article.csv','r',encoding='utf-8') as reader: | 41 | with open('./article.csv','r',encoding='utf-8') as reader: |
| 42 | readerCsv = csv.reader(reader) | 42 | readerCsv = csv.reader(reader) |
| 43 | next(reader) | 43 | next(reader) |
| 44 | for nav in readerCsv: | 44 | for nav in readerCsv: |
| 45 | - artileList.append(nav) | ||
| 46 | - return artileList | 45 | + articleList.append(nav) |
| 46 | + return articleList | ||
| 47 | 47 | ||
| 48 | -def parse_json(response,artileId): | 48 | +def readJson(response,artileId): |
| 49 | for comment in response: | 49 | for comment in response: |
| 50 | created_at = datetime.strptime(comment['created_at'],'%a %b %d %H:%M:%S %z %Y').strftime('%Y-%m-%d') | 50 | created_at = datetime.strptime(comment['created_at'],'%a %b %d %H:%M:%S %z %Y').strftime('%Y-%m-%d') |
| 51 | likes_counts = comment['like_counts'] | 51 | likes_counts = comment['like_counts'] |
| @@ -58,7 +58,7 @@ def parse_json(response,artileId): | @@ -58,7 +58,7 @@ def parse_json(response,artileId): | ||
| 58 | authorGender = comment['user']['gender'] | 58 | authorGender = comment['user']['gender'] |
| 59 | authorAddress = comment['user']['location'] | 59 | authorAddress = comment['user']['location'] |
| 60 | authorAvatar = comment['user']['avatar_large'] | 60 | authorAvatar = comment['user']['avatar_large'] |
| 61 | - writerRow([ | 61 | + write([ |
| 62 | artileId, | 62 | artileId, |
| 63 | created_at, | 63 | created_at, |
| 64 | likes_counts, | 64 | likes_counts, |
| @@ -73,7 +73,7 @@ def parse_json(response,artileId): | @@ -73,7 +73,7 @@ def parse_json(response,artileId): | ||
| 73 | def start(): | 73 | def start(): |
| 74 | commentUrl = 'https://weibo.com/ajax/statuses/buildComments' | 74 | commentUrl = 'https://weibo.com/ajax/statuses/buildComments' |
| 75 | init() | 75 | init() |
| 76 | - articleList = getAllArticleList() | 76 | + articleList = getArticleList() |
| 77 | for article in articleList: | 77 | for article in articleList: |
| 78 | articleId = article[0] | 78 | articleId = article[0] |
| 79 | print('正在爬取id值为%s的文章评论' % articleId) | 79 | print('正在爬取id值为%s的文章评论' % articleId) |
| @@ -82,8 +82,8 @@ def start(): | @@ -82,8 +82,8 @@ def start(): | ||
| 82 | 'id':int(articleId), | 82 | 'id':int(articleId), |
| 83 | 'is_show_bulletin':2 | 83 | 'is_show_bulletin':2 |
| 84 | } | 84 | } |
| 85 | - response = get_data(commentUrl,params) | ||
| 86 | - parse_json(response,articleId) | 85 | + response = fetchData(commentUrl,params) |
| 86 | + readJson(response,articleId) | ||
| 87 | 87 | ||
| 88 | 88 | ||
| 89 | 89 |
| @@ -25,12 +25,12 @@ def init(): | @@ -25,12 +25,12 @@ def init(): | ||
| 25 | 'isVip' # v_plus | 25 | 'isVip' # v_plus |
| 26 | ]) | 26 | ]) |
| 27 | 27 | ||
| 28 | -def writerRow(row): | 28 | +def write(row): |
| 29 | with open('./article.csv', 'a', encoding='utf-8', newline='') as csvFile: | 29 | with open('./article.csv', 'a', encoding='utf-8', newline='') as csvFile: |
| 30 | writer = csv.writer(csvFile) | 30 | writer = csv.writer(csvFile) |
| 31 | writer.writerow(row) | 31 | writer.writerow(row) |
| 32 | 32 | ||
| 33 | -def get_data(url,params): | 33 | +def fetchData(url,params): |
| 34 | headers = { | 34 | headers = { |
| 35 | 'Cookie':'SINAGLOBAL=2555941826014.1074.1676801766625; ULV=1719829459275:6:1:2:4660996305989.918.1719827559898:1719743122299; UOR=,,www.baidu.com; XSRF-TOKEN=VtLXviYSIs8lor7sz4iGyigL; SUB=_2A25LhvU9DeRhGeFH6FIX-S3MyD2IHXVo-gj1rDV8PUJbkNAGLRXMkW1Ne2nhI3Gle25QJK0Z99J3trq_NZn6YKJ-; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW3Mv8V5EupQbbKh.vaZIwU5JpX5KzhUgL.FoM4e05c1Ke7e022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM41hz41hqReKqN; WBPSESS=Dt2hbAUaXfkVprjyrAZT_LRaDLsnxG-kIbeYwnBb5OUKZiwfVr_UrcYfWuqG-4ZVDM5HeU3HXkDNK_thfRfdS9Ao6ezT30jDksv-CpaVmlTAqGUHjJ7PYkH5aCK4HLxmRq14ZalmQNwzfWMPa4y0VNRLuYdg7L1s49ymNq_5v5vusoz0r4ki6u-MHGraF0fbUTgX14x0kHayEwOoxfLI-w==; SCF=AqmJWo31oFV5itnRgWNU1-wHQTL6PmkBLf3gDuqpdqAIfaWguDTMre6Oxjf5Uzs74JAh2r0DdV1sJ1g6m-wJ5NQ.; _s_tentry=-; Apache=4660996305989.918.1719827559898; PC_TOKEN=7955a7ab1f; appkey=; geetest_token=602cd4e3a7ed1898808f8adfe1a2048b; ALF=1722421868', | 35 | 'Cookie':'SINAGLOBAL=2555941826014.1074.1676801766625; ULV=1719829459275:6:1:2:4660996305989.918.1719827559898:1719743122299; UOR=,,www.baidu.com; XSRF-TOKEN=VtLXviYSIs8lor7sz4iGyigL; SUB=_2A25LhvU9DeRhGeFH6FIX-S3MyD2IHXVo-gj1rDV8PUJbkNAGLRXMkW1Ne2nhI3Gle25QJK0Z99J3trq_NZn6YKJ-; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW3Mv8V5EupQbbKh.vaZIwU5JpX5KzhUgL.FoM4e05c1Ke7e022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM41hz41hqReKqN; WBPSESS=Dt2hbAUaXfkVprjyrAZT_LRaDLsnxG-kIbeYwnBb5OUKZiwfVr_UrcYfWuqG-4ZVDM5HeU3HXkDNK_thfRfdS9Ao6ezT30jDksv-CpaVmlTAqGUHjJ7PYkH5aCK4HLxmRq14ZalmQNwzfWMPa4y0VNRLuYdg7L1s49ymNq_5v5vusoz0r4ki6u-MHGraF0fbUTgX14x0kHayEwOoxfLI-w==; SCF=AqmJWo31oFV5itnRgWNU1-wHQTL6PmkBLf3gDuqpdqAIfaWguDTMre6Oxjf5Uzs74JAh2r0DdV1sJ1g6m-wJ5NQ.; _s_tentry=-; Apache=4660996305989.918.1719827559898; PC_TOKEN=7955a7ab1f; appkey=; geetest_token=602cd4e3a7ed1898808f8adfe1a2048b; ALF=1722421868', |
| 36 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0' | 36 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0' |
| @@ -41,7 +41,7 @@ def get_data(url,params): | @@ -41,7 +41,7 @@ def get_data(url,params): | ||
| 41 | else: | 41 | else: |
| 42 | return None | 42 | return None |
| 43 | 43 | ||
| 44 | -def getAllTypeList(): | 44 | +def getTypeList(): |
| 45 | typeList = [] | 45 | typeList = [] |
| 46 | with open('./nav.csv','r',encoding='utf-8') as reader: | 46 | with open('./nav.csv','r',encoding='utf-8') as reader: |
| 47 | readerCsv = csv.reader(reader) | 47 | readerCsv = csv.reader(reader) |
| @@ -50,7 +50,7 @@ def getAllTypeList(): | @@ -50,7 +50,7 @@ def getAllTypeList(): | ||
| 50 | typeList.append(nav) | 50 | typeList.append(nav) |
| 51 | return typeList | 51 | return typeList |
| 52 | 52 | ||
| 53 | -def parse_json(response,type): | 53 | +def readJson(response,type): |
| 54 | for artice in response: | 54 | for artice in response: |
| 55 | id = artice['id'] | 55 | id = artice['id'] |
| 56 | likeNum = artice['attitudes_count'] | 56 | likeNum = artice['attitudes_count'] |
| @@ -72,7 +72,7 @@ def parse_json(response,type): | @@ -72,7 +72,7 @@ def parse_json(response,type): | ||
| 72 | authorName = artice['user']['screen_name'] | 72 | authorName = artice['user']['screen_name'] |
| 73 | authorDetail = 'https://weibo.com/u/' + str(artice['user']['id']) | 73 | authorDetail = 'https://weibo.com/u/' + str(artice['user']['id']) |
| 74 | isVip = artice['user']['v_plus'] | 74 | isVip = artice['user']['v_plus'] |
| 75 | - writerRow([ | 75 | + write([ |
| 76 | id, | 76 | id, |
| 77 | likeNum, | 77 | likeNum, |
| 78 | commentsLen, | 78 | commentsLen, |
| @@ -92,7 +92,7 @@ def parse_json(response,type): | @@ -92,7 +92,7 @@ def parse_json(response,type): | ||
| 92 | def start(typeNum=1,pageNum=1): | 92 | def start(typeNum=1,pageNum=1): |
| 93 | articleUrl = 'https://weibo.com/ajax/feed/hottimeline' | 93 | articleUrl = 'https://weibo.com/ajax/feed/hottimeline' |
| 94 | init() | 94 | init() |
| 95 | - typeList = getAllTypeList() | 95 | + typeList = getTypeList() |
| 96 | typeNumCount = 0 | 96 | typeNumCount = 0 |
| 97 | for type in typeList: | 97 | for type in typeList: |
| 98 | if typeNumCount > typeNum:return | 98 | if typeNumCount > typeNum:return |
| @@ -107,8 +107,8 @@ def start(typeNum=1,pageNum=1): | @@ -107,8 +107,8 @@ def start(typeNum=1,pageNum=1): | ||
| 107 | 'count':10, | 107 | 'count':10, |
| 108 | 'extparam':'discover|new_feed' | 108 | 'extparam':'discover|new_feed' |
| 109 | } | 109 | } |
| 110 | - response = get_data(articleUrl,parmas) | ||
| 111 | - parse_json(response,type[0]) | 110 | + response = fetchData(articleUrl,parmas) |
| 111 | + readJson(response,type[0]) | ||
| 112 | typeNumCount += 1 | 112 | typeNumCount += 1 |
| 113 | 113 | ||
| 114 | if __name__ == '__main__': | 114 | if __name__ == '__main__': |
| @@ -13,12 +13,12 @@ def init(): | @@ -13,12 +13,12 @@ def init(): | ||
| 13 | 'containerid' | 13 | 'containerid' |
| 14 | ]) | 14 | ]) |
| 15 | 15 | ||
| 16 | -def writerRow(row): | 16 | +def write(row): |
| 17 | with open('./nav.csv', 'a', encoding='utf-8', newline='') as csvFile: | 17 | with open('./nav.csv', 'a', encoding='utf-8', newline='') as csvFile: |
| 18 | writer = csv.writer(csvFile) | 18 | writer = csv.writer(csvFile) |
| 19 | writer.writerow(row) | 19 | writer.writerow(row) |
| 20 | 20 | ||
| 21 | -def get_data(url): | 21 | +def fetchData(url): |
| 22 | headers = { | 22 | headers = { |
| 23 | 'Cookie':'SINAGLOBAL=2555941826014.1074.1676801766625; ULV=1719829459275:6:1:2:4660996305989.918.1719827559898:1719743122299; UOR=,,www.baidu.com; XSRF-TOKEN=VtLXviYSIs8lor7sz4iGyigL; SUB=_2A25LhvU9DeRhGeFH6FIX-S3MyD2IHXVo-gj1rDV8PUJbkNAGLRXMkW1Ne2nhI3Gle25QJK0Z99J3trq_NZn6YKJ-; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW3Mv8V5EupQbbKh.vaZIwU5JpX5KzhUgL.FoM4e05c1Ke7e022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM41hz41hqReKqN; WBPSESS=Dt2hbAUaXfkVprjyrAZT_LRaDLsnxG-kIbeYwnBb5OUKZiwfVr_UrcYfWuqG-4ZVDM5HeU3HXkDNK_thfRfdS9Ao6ezT30jDksv-CpaVmlTAqGUHjJ7PYkH5aCK4HLxmRq14ZalmQNwzfWMPa4y0VNRLuYdg7L1s49ymNq_5v5vusoz0r4ki6u-MHGraF0fbUTgX14x0kHayEwOoxfLI-w==; SCF=AqmJWo31oFV5itnRgWNU1-wHQTL6PmkBLf3gDuqpdqAIfaWguDTMre6Oxjf5Uzs74JAh2r0DdV1sJ1g6m-wJ5NQ.; _s_tentry=-; Apache=4660996305989.918.1719827559898; PC_TOKEN=7955a7ab1f; appkey=; geetest_token=602cd4e3a7ed1898808f8adfe1a2048b; ALF=1722421868', | 23 | 'Cookie':'SINAGLOBAL=2555941826014.1074.1676801766625; ULV=1719829459275:6:1:2:4660996305989.918.1719827559898:1719743122299; UOR=,,www.baidu.com; XSRF-TOKEN=VtLXviYSIs8lor7sz4iGyigL; SUB=_2A25LhvU9DeRhGeFH6FIX-S3MyD2IHXVo-gj1rDV8PUJbkNAGLRXMkW1Ne2nhI3Gle25QJK0Z99J3trq_NZn6YKJ-; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WW3Mv8V5EupQbbKh.vaZIwU5JpX5KzhUgL.FoM4e05c1Ke7e022dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM41hz41hqReKqN; WBPSESS=Dt2hbAUaXfkVprjyrAZT_LRaDLsnxG-kIbeYwnBb5OUKZiwfVr_UrcYfWuqG-4ZVDM5HeU3HXkDNK_thfRfdS9Ao6ezT30jDksv-CpaVmlTAqGUHjJ7PYkH5aCK4HLxmRq14ZalmQNwzfWMPa4y0VNRLuYdg7L1s49ymNq_5v5vusoz0r4ki6u-MHGraF0fbUTgX14x0kHayEwOoxfLI-w==; SCF=AqmJWo31oFV5itnRgWNU1-wHQTL6PmkBLf3gDuqpdqAIfaWguDTMre6Oxjf5Uzs74JAh2r0DdV1sJ1g6m-wJ5NQ.; _s_tentry=-; Apache=4660996305989.918.1719827559898; PC_TOKEN=7955a7ab1f; appkey=; geetest_token=602cd4e3a7ed1898808f8adfe1a2048b; ALF=1722421868', |
| 24 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0' | 24 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0' |
| @@ -33,13 +33,13 @@ def get_data(url): | @@ -33,13 +33,13 @@ def get_data(url): | ||
| 33 | else: | 33 | else: |
| 34 | return None | 34 | return None |
| 35 | 35 | ||
| 36 | -def parse_json(response): | 36 | +def readJson(response): |
| 37 | navList = np.append(response['groups'][3]['group'],response['groups'][4]['group']) | 37 | navList = np.append(response['groups'][3]['group'],response['groups'][4]['group']) |
| 38 | for nav in navList: | 38 | for nav in navList: |
| 39 | navName = nav['title'] | 39 | navName = nav['title'] |
| 40 | gid = nav['gid'] | 40 | gid = nav['gid'] |
| 41 | containerid = nav['containerid'] | 41 | containerid = nav['containerid'] |
| 42 | - writerRow([ | 42 | + write([ |
| 43 | navName, | 43 | navName, |
| 44 | gid, | 44 | gid, |
| 45 | containerid | 45 | containerid |
| @@ -49,5 +49,5 @@ def parse_json(response): | @@ -49,5 +49,5 @@ def parse_json(response): | ||
| 49 | if __name__ == '__main__': | 49 | if __name__ == '__main__': |
| 50 | init() | 50 | init() |
| 51 | url = 'https://weibo.com/ajax/feed/allGroups' | 51 | url = 'https://weibo.com/ajax/feed/allGroups' |
| 52 | - response = get_data(url) | ||
| 53 | - parse_json(response) | ||
| 52 | + response = fetchData(url) | ||
| 53 | + readJson(response) |
-
Please register or login to post a comment