YYL469

修改【main.py】,增加【saveData.py】,将数据存储模块与调度模块分离

1 from spiderContent import start as spiderContentStart 1 from spiderContent import start as spiderContentStart
2 from spiderComments import start as spiderCommentsStart 2 from spiderComments import start as spiderCommentsStart
3 -import os  
4 -from sqlalchemy import create_engine  
5 -import pandas as pd  
6 -  
7 -engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@10.92.35.13/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4')  
8 -  
9 -def save_to_sql():  
10 - try:  
11 - artileOldPd = pd.read_sql('select * from article',engine)  
12 - articleNewPd = pd.read_csv('articleData.csv')  
13 - commentOldPd = pd.read_sql('select * from comments',engine)  
14 - commentNewPd = pd.read_csv('articleComments.csv')  
15 -  
16 - concatArticlePd = pd.concat([articleNewPd,artileOldPd],join='inner')  
17 - concatCommentsPd = pd.concat([commentNewPd,commentOldPd],join='inner')  
18 -  
19 - concatArticlePd.drop_duplicates(subset='id',keep='last',inplace=True)  
20 - concatCommentsPd.drop_duplicates(subset='content',keep='last',inplace=True)  
21 -  
22 - concatArticlePd.to_sql('article', con=engine, if_exists='replace', index=False)  
23 - concatCommentsPd.to_sql('comments', con=engine, if_exists='replace', index=False)  
24 - except:  
25 - articleNewPd = pd.read_csv('articleData.csv')  
26 - commentNewPd = pd.read_csv('articleComments.csv')  
27 - articleNewPd.to_sql('article',con=engine,if_exists='replace',index=False)  
28 - commentNewPd.to_sql('comments',con=engine,if_exists='replace',index=False)  
29 -  
30 - os.remove('./articleData.csv')  
31 - os.remove('./articleComments.csv') 3 +from saveData import save_to_sql as saveData
32 4
33 def main(): 5 def main():
34 print('正在爬取文章数据') 6 print('正在爬取文章数据')
@@ -36,8 +8,7 @@ def main(): @@ -36,8 +8,7 @@ def main():
36 print('正在爬取文章评论数据') 8 print('正在爬取文章评论数据')
37 spiderCommentsStart() 9 spiderCommentsStart()
38 print('正在存储数据') 10 print('正在存储数据')
39 - save_to_sql()  
40 - 11 + saveData()
41 12
42 if __name__ == '__main__': 13 if __name__ == '__main__':
43 main() 14 main()
  1 +import os
  2 +from sqlalchemy import create_engine
  3 +import pandas as pd
  4 +
  5 +engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@10.92.35.13/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4')
  6 +
  7 +def save_to_sql():
  8 + try:
  9 + artileOldPd = pd.read_sql('select * from article',engine)
  10 + articleNewPd = pd.read_csv('articleData.csv')
  11 + commentOldPd = pd.read_sql('select * from comments',engine)
  12 + commentNewPd = pd.read_csv('articleComments.csv')
  13 +
  14 + concatArticlePd = pd.concat([articleNewPd,artileOldPd],join='inner')
  15 + concatCommentsPd = pd.concat([commentNewPd,commentOldPd],join='inner')
  16 +
  17 + concatArticlePd.drop_duplicates(subset='id',keep='last',inplace=True)
  18 + concatCommentsPd.drop_duplicates(subset='content',keep='last',inplace=True)
  19 +
  20 + concatArticlePd.to_sql('article', con=engine, if_exists='replace', index=False)
  21 + concatCommentsPd.to_sql('comments', con=engine, if_exists='replace', index=False)
  22 + except:
  23 + articleNewPd = pd.read_csv('articleData.csv')
  24 + commentNewPd = pd.read_csv('articleComments.csv')
  25 + articleNewPd.to_sql('article',con=engine,if_exists='replace',index=False)
  26 + commentNewPd.to_sql('comments',con=engine,if_exists='replace',index=False)
  27 +
  28 + os.remove('./articleData.csv')
  29 + os.remove('./articleComments.csv')
  30 +
  31 +if __name__ == '__main__':
  32 + save_to_sql()