juanboy
1 -.conda  
  1 +.conda
  2 +*__pycache__/
No preview for this file type
No preview for this file type
  1 +id,likeNum,commentsLen,reposts_count,region,content,contentLen,created_at,type,detailUrl,authorAvatar,authorName,authorDetail,isVip
1 -from spiderData import spiderData 1 +from spiderContent import start as spiderContentStart
  2 +from spiderComments import start as spiderCommentsStart
2 from saveData import save_to_sql as saveData 3 from saveData import save_to_sql as saveData
3 4
4 def main(): 5 def main():
5 - try:  
6 - spiderData()  
7 - saveData()  
8 - print("爬取数据更新")  
9 - except:  
10 - print("爬取数据失败") 6 + print('正在爬取文章数据')
  7 + spiderContentStart(1,1)
  8 + print('正在爬取文章评论数据')
  9 + spiderCommentsStart()
  10 + print('正在存储数据')
  11 + saveData()
  12 + print("爬取数据更新")
11 13
12 if __name__ == '__main__': 14 if __name__ == '__main__':
13 main() 15 main()
  1 +typeName,gid,containerid
  2 +热门,102803,102803
  3 +同城,1028032222,102803_2222
  4 +榜单,102803600169,102803_ctg1_600169_-_ctg1_600169
  5 +男篮,102803600279,102803_ctg1_600279_-_ctg1_600279
  6 +明星,1028034288,102803_ctg1_4288_-_ctg1_4288
  7 +车展,1028035188,102803_ctg1_5188_-_ctg1_5188
  8 +搞笑,1028034388,102803_ctg1_4388_-_ctg1_4388
  9 +情感,1028031988,102803_ctg1_1988_-_ctg1_1988
  10 +周末,102803600195,102803_ctg1_600195_-_ctg1_600195
  11 +电影,1028033288,102803_ctg1_3288_-_ctg1_3288
  12 +社会,1028034188,102803_ctg1_4188_-_ctg1_4188
  13 +电视剧,1028032488,102803_ctg1_2488_-_ctg1_2488
  14 +美食,1028032688,102803_ctg1_2688_-_ctg1_2688
  15 +俄乌局势,102803600267,102803_ctg1_600267_-_ctg1_600267
  16 +国际,1028036288,102803_ctg1_6288_-_ctg1_6288
  17 +深度,102803600155,102803_ctg1_600155_-_ctg1_600155
  18 +财经,1028036388,102803_ctg1_6388_-_ctg1_6388
  19 +读书,1028034588,102803_ctg1_4588_-_ctg1_4588
  20 +摄影,1028034988,102803_ctg1_4988_-_ctg1_4988
  21 +颜值,102803600165,102803_ctg1_600165_-_ctg1_600165
  22 +体育,1028031388,102803_ctg1_1388_-_ctg1_1388
  23 +数码,1028035088,102803_ctg1_5088_-_ctg1_5088
  24 +综艺,1028034688,102803_ctg1_4688_-_ctg1_4688
  25 +时尚,1028034488,102803_ctg1_4488_-_ctg1_4488
  26 +星座,1028031688,102803_ctg1_1688_-_ctg1_1688
  27 +军事,1028036688,102803_ctg1_6688_-_ctg1_6688
  28 +股市,1028031288,102803_ctg1_1288_-_ctg1_1288
  29 +房产,1028035588,102803_ctg1_5588_-_ctg1_5588
  30 +家居,1028035888,102803_ctg1_5888_-_ctg1_5888
  31 +萌宠,1028032788,102803_ctg1_2788_-_ctg1_2788
  32 +科技,1028032088,102803_ctg1_2088_-_ctg1_2088
  33 +科普,1028035988,102803_ctg1_5988_-_ctg1_5988
  34 +动漫,1028032388,102803_ctg1_2388_-_ctg1_2388
  35 +运动健身,1028034788,102803_ctg1_4788_-_ctg1_4788
  36 +旅游,1028032588,102803_ctg1_2588_-_ctg1_2588
  37 +瘦身,1028036488,102803_ctg1_6488_-_ctg1_6488
  38 +好物,102803600094,102803_ctg1_600094_-_ctg1_600094
  39 +历史,1028036788,102803_ctg1_6788_-_ctg1_6788
  40 +艺术,1028035488,102803_ctg1_5488_-_ctg1_5488
  41 +美妆,1028031588,102803_ctg1_1588_-_ctg1_1588
  42 +法律,1028037388,102803_ctg1_7388_-_ctg1_7388
  43 +设计,1028035388,102803_ctg1_5388_-_ctg1_5388
  44 +健康,1028032188,102803_ctg1_2188_-_ctg1_2188
  45 +音乐,1028035288,102803_ctg1_5288_-_ctg1_5288
  46 +游戏,1028034888,102803_ctg1_4888_-_ctg1_4888
  47 +新时代,1028037968,102803_ctg1_7968_-_ctg1_7968
  48 +校园,102803600177,102803_ctg1_600177_-_ctg1_600177
  49 +收藏,1028038189,102803_ctg1_8189_-_ctg1_8189
  50 +政务,1028035788,102803_ctg1_5788_-_ctg1_5788
  51 +养生,1028036588,102803_ctg1_6588_-_ctg1_6588
  52 +育儿,1028033188,102803_ctg1_3188_-_ctg1_3188
  53 +抽奖,102803600037,102803_ctg1_600037_-_ctg1_600037
  54 +教育,102803600080,102803_ctg1_600080_-_ctg1_600080
  55 +婚恋,1028031788,102803_ctg1_1788_-_ctg1_1788
  56 +舞蹈,1028038788,102803_ctg1_8788_-_ctg1_8788
  57 +辟谣,1028036988,102803_ctg1_6988_-_ctg1_6988
  58 +公益,102803600057,102803_ctg1_600057_-_ctg1_600057
  59 +问答,1028037977,102803_ctg1_7977_-_ctg1_7977
  60 +三农,1028037188,102803_ctg1_7188_-_ctg1_7188
1 -from spiderDataPack.spiderNav import start as spiderNavStart  
2 -from spiderDataPack.spiderContent import start as spiderContentStart  
3 -from spiderDataPack.spiderComments import start as spiderCommentsStart  
4 -import os  
5 -  
6 -def spiderData():  
7 - if not os.path.exists('./nav.csv'):  
8 - spiderNavStart()  
9 - spiderContentStart(1,1)  
10 - spiderCommentsStart()  
11 -  
12 -if __name__ == '__main__':  
13 - spiderData()  
@@ -45,11 +45,9 @@ def parse_json(response): @@ -45,11 +45,9 @@ def parse_json(response):
45 containerid 45 containerid
46 ]) 46 ])
47 47
48 -def start(): 48 +
  49 +if __name__ == '__main__':
49 init() 50 init()
50 url = 'https://weibo.com/ajax/feed/allGroups' 51 url = 'https://weibo.com/ajax/feed/allGroups'
51 response = get_data(url) 52 response = get_data(url)
52 - parse_json(response)  
53 -  
54 -if __name__ == '__main__':  
55 - start()  
  53 + parse_json(response)
1 import time 1 import time
2 -from flask import Blueprint, redirect, render_template, request,Flask, session 2 +import hashlib
  3 +from flask import Blueprint, redirect, render_template, request, Flask, session
3 4
4 from utils.query import query 5 from utils.query import query
5 from utils.errorResponse import errorResponse 6 from utils.errorResponse import errorResponse
6 7
  8 +ub = Blueprint('user',
  9 + __name__,
  10 + url_prefix='/user',
  11 + template_folder='templates')
7 12
8 -ub = Blueprint('user',__name__,url_prefix='/user',template_folder='templates')  
9 13
10 -@ub.route('/login',methods=['GET','POST']) 14 +@ub.route('/login', methods=['GET', 'POST'])
11 def login(): 15 def login():
12 if request.method == 'GET': 16 if request.method == 'GET':
13 return render_template('login.html') 17 return render_template('login.html')
14 else: 18 else:
  19 +
15 def filter_fn(user): 20 def filter_fn(user):
16 - return request.form['username'] in user and request.form['password'] in user 21 + hash_with_salt = hashlib.sha256('XiaoXueQi2024'.encode('utf-8'))
  22 + hash_with_salt.update(request.form['password'].encode('utf-8'))
  23 + return request.form[
  24 + 'username'] in user and hash_with_salt.hexdigest() in user
  25 +
17 users = query('select * from user', [], 'select') 26 users = query('select * from user', [], 'select')
18 - login_success = list(filter(filter_fn,users))  
19 - if not len(login_success):return errorResponse('账号或密码错误') 27 + login_success = list(filter(filter_fn, users))
  28 + if not len(login_success): return errorResponse('账号或密码错误')
20 29
21 session['username'] = request.form['username'] 30 session['username'] = request.form['username']
22 return redirect('/page/home') 31 return redirect('/page/home')
23 -  
24 -@ub.route('/register',methods=['GET','POST']) 32 +
  33 +
  34 +@ub.route('/register', methods=['GET', 'POST'])
25 def register(): 35 def register():
26 if request.method == 'GET': 36 if request.method == 'GET':
27 return render_template('register.html') 37 return render_template('register.html')
28 else: 38 else:
29 - if request.form['password'] != request.form['checkPassword']:return errorResponse('两次密码不符合') 39 + if request.form['password'] != request.form['checkPassword']:
  40 + return errorResponse('两次密码不符合')
  41 +
30 def filter_fn(user): 42 def filter_fn(user):
31 return request.form['username'] in user 43 return request.form['username'] in user
32 44
33 - users = query('select * from user',[],'select')  
34 - filter_list = list(filter(filter_fn,users)) 45 + users = query('select * from user', [], 'select')
  46 + filter_list = list(filter(filter_fn, users))
35 if len(filter_list): 47 if len(filter_list):
36 return errorResponse('该用户名已被注册') 48 return errorResponse('该用户名已被注册')
37 else: 49 else:
38 time_tuple = time.localtime(time.time()) 50 time_tuple = time.localtime(time.time())
39 - query(''' 51 + hash_with_salt = hashlib.sha256('XiaoXueQi2024'.encode('utf-8'))
  52 + hash_with_salt.update(request.form['password'].encode('utf-8'))
  53 + query(
  54 + '''
40 insert into user(username,password,createTime) values(%s,%s,%s) 55 insert into user(username,password,createTime) values(%s,%s,%s)
41 - ''',[request.form['username'],request.form['password'],str(time_tuple[0]) + '-' + str(time_tuple[1]) + '-' + str(time_tuple[2])]) 56 + ''', [
  57 + request.form['username'],
  58 + hash_with_salt.hexdigest(),
  59 + str(time_tuple[0]) + '-' + str(time_tuple[1]) + '-' +
  60 + str(time_tuple[2])
  61 + ])
42 62
43 return redirect('/user/login') 63 return redirect('/user/login')
44 - 64 +
  65 +
45 @ub.route('/logOut') 66 @ub.route('/logOut')
46 def logOut(): 67 def logOut():
47 - session.clear()  
48 - return redirect('/user/login')  
  68 + session.clear()
  69 + return redirect('/user/login')