戒酒的李白

Modify the database hardcoding to switch to command-line interactive database connection.

@@ -12,3 +12,4 @@ model2/* @@ -12,3 +12,4 @@ model2/*
12 *.pyz 12 *.pyz
13 *.pywz 13 *.pywz
14 .vscode 14 .vscode
  15 +.VSCodeCounter
1 import os 1 import os
2 -from sqlalchemy import create_engine  
3 import pandas as pd 2 import pandas as pd
4 -from spiderDataPackage.settings import articleAddr,commentsAddr  
5 -# from ..model.topicDefine import * 3 +from sqlalchemy import create_engine
  4 +from getpass import getpass
  5 +import logging
  6 +
  7 +# 配置日志
  8 +logging.basicConfig(
  9 + level=logging.INFO,
  10 + format='%(asctime)s [%(levelname)s] %(message)s',
  11 + handlers=[
  12 + logging.FileHandler("save_data.log"),
  13 + logging.StreamHandler()
  14 + ]
  15 +)
6 16
7 -engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4') 17 +# 假设 articleAddr 和 commentsAddr 是绝对路径或相对于脚本的路径
  18 +from spiderDataPackage.settings import articleAddr, commentsAddr
8 19
9 -def saveData(): 20 +def get_db_connection_interactive():
  21 + """
  22 + 通过终端交互获取数据库连接参数,若按回车则使用默认值。
  23 + 返回 SQLAlchemy 的数据库引擎。
  24 + """
  25 + print("请依次输入数据库连接信息(直接按回车使用默认值):")
  26 +
  27 + host = input(" 1. 主机 (默认: localhost): ") or "localhost"
  28 + port_str = input(" 2. 端口 (默认: 3306): ") or "3306"
10 try: 29 try:
11 - oldArticle = pd.read_sql('select * from article',engine) 30 + port = int(port_str)
  31 + except ValueError:
  32 + logging.warning("端口号无效,使用默认端口 3306。")
  33 + port = 3306
  34 +
  35 + user = input(" 3. 用户名 (默认: root): ") or "root"
  36 + password = getpass(" 4. 密码 (默认: 12345678): ") or "12345678"
  37 + db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem"
  38 +
  39 + # 构建数据库连接字符串
  40 + connection_str = f"mysql+pymysql://{user}:{password}@{host}:{port}/{db_name}?charset=utf8mb4"
  41 +
  42 + try:
  43 + engine = create_engine(connection_str)
  44 + # 测试连接
  45 + with engine.connect() as connection:
  46 + logging.info(f"成功连接到数据库: {user}@{host}:{port}/{db_name}")
  47 + return engine
  48 + except Exception as e:
  49 + logging.error(f"无法连接到数据库: {e}")
  50 + exit(1)
  51 +
  52 +def saveData(engine):
  53 + """
  54 + 从数据库和CSV文件读取数据,合并后去重并保存回数据库。
  55 + 最后删除CSV文件。
  56 + """
  57 + try:
  58 + # 读取旧数据
  59 + oldArticle = pd.read_sql('SELECT * FROM article', engine)
  60 + oldComment = pd.read_sql('SELECT * FROM comments', engine)
  61 + logging.info("成功从数据库读取旧的文章和评论数据。")
  62 +
  63 + # 读取新数据
12 newArticle = pd.read_csv(articleAddr) 64 newArticle = pd.read_csv(articleAddr)
13 - oldComment = pd.read_sql('select * from comments',engine)  
14 newComment = pd.read_csv(commentsAddr) 65 newComment = pd.read_csv(commentsAddr)
  66 + logging.info("成功从CSV文件读取新的文章和评论数据。")
15 67
16 - mergeArticle = pd.concat([newArticle,oldArticle],join='inner')  
17 - mergeComment = pd.concat([newComment,oldComment],join='inner') 68 + # 合并数据
  69 + mergeArticle = pd.concat([newArticle, oldArticle], ignore_index=True, sort=False)
  70 + mergeComment = pd.concat([newComment, oldComment], ignore_index=True, sort=False)
  71 + logging.info("成功合并新旧文章和评论数据。")
18 72
19 - mergeArticle.drop_duplicates(subset='id',keep='last',inplace=True)  
20 - mergeComment.drop_duplicates(subset='content',keep='last',inplace=True) 73 + # 去重
  74 + mergeArticle.drop_duplicates(subset='id', keep='last', inplace=True)
  75 + mergeComment.drop_duplicates(subset='content', keep='last', inplace=True)
  76 + logging.info("成功去除重复的文章和评论数据。")
21 77
  78 + # 保存回数据库
22 mergeArticle.to_sql('article', con=engine, if_exists='replace', index=False) 79 mergeArticle.to_sql('article', con=engine, if_exists='replace', index=False)
23 mergeComment.to_sql('comments', con=engine, if_exists='replace', index=False) 80 mergeComment.to_sql('comments', con=engine, if_exists='replace', index=False)
24 - except:  
25 - newArticle = pd.read_csv(articleAddr)  
26 - newComment = pd.read_csv(commentsAddr)  
27 - newArticle.to_sql('article',con=engine,if_exists='replace',index=False)  
28 - newComment.to_sql('comments',con=engine,if_exists='replace',index=False) 81 + logging.info("成功将合并后的数据保存回数据库。")
29 82
  83 + except pd.errors.EmptyDataError as e:
  84 + logging.error(f"读取CSV文件时出错: {e}")
  85 + except Exception as e:
  86 + logging.error(f"保存数据时出错: {e}")
  87 + else:
  88 + # 删除CSV文件
  89 + try:
30 os.remove(articleAddr) 90 os.remove(articleAddr)
31 os.remove(commentsAddr) 91 os.remove(commentsAddr)
32 - # update_data() 92 + logging.info("成功删除CSV文件。")
  93 + except Exception as e:
  94 + logging.warning(f"删除CSV文件时出错: {e}")
  95 +
  96 +def main():
  97 + # 获取数据库连接
  98 + engine = get_db_connection_interactive()
  99 +
  100 + # 保存数据
  101 + saveData(engine)
  102 +
  103 + # 关闭引擎(可选,因为SQLAlchemy引擎会自动管理连接池)
  104 + engine.dispose()
  105 + logging.info("数据库连接已关闭。")
33 106
34 if __name__ == '__main__': 107 if __name__ == '__main__':
35 - saveData()  
  108 + main()
1 -from pymysql import *  
2 -conn = connect(host='47.92.235.6',port=3306,user='XiaoXueQi',password='XiaoXueQi',database='Weibo_PublicOpinion_AnalysisSystem') 1 +import getpass
  2 +import pymysql
  3 +import logging
  4 +
  5 +# 配置日志
  6 +logging.basicConfig(
  7 + level=logging.INFO,
  8 + format='%(asctime)s [%(levelname)s] %(message)s',
  9 + handlers=[
  10 + logging.FileHandler("database_operations.log"),
  11 + logging.StreamHandler()
  12 + ]
  13 +)
  14 +
  15 +def get_db_connection_interactive():
  16 + """
  17 + 通过终端交互获取数据库连接参数,若按回车则使用默认值。
  18 + 返回一个连接对象。
  19 + """
  20 + print("请依次输入数据库连接信息(直接按回车使用默认值):")
  21 +
  22 + host = input(" 1. 主机 (默认: localhost): ") or "localhost"
  23 + port_str = input(" 2. 端口 (默认: 3306): ") or "3306"
  24 + try:
  25 + port = int(port_str)
  26 + except ValueError:
  27 + logging.warning("端口号无效,使用默认端口 3306。")
  28 + port = 3306
  29 +
  30 + user = input(" 3. 用户名 (默认: root): ") or "root"
  31 + password = getpass.getpass(" 4. 密码 (默认: 312517): ") or "312517"
  32 + db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem"
  33 +
  34 + logging.info(f"尝试连接到数据库: {user}@{host}:{port}/{db_name}")
  35 +
  36 + try:
  37 + connection = pymysql.connect(
  38 + host=host,
  39 + port=port,
  40 + user=user,
  41 + password=password,
  42 + database=db_name,
  43 + charset='utf8mb4',
  44 + cursorclass=pymysql.cursors.DictCursor # 返回字典格式
  45 + )
  46 + logging.info("数据库连接成功。")
  47 + return connection
  48 + except pymysql.MySQLError as e:
  49 + logging.error(f"数据库连接失败: {e}")
  50 + exit(1)
  51 +
  52 +# 获取数据库连接
  53 +conn = get_db_connection_interactive()
  54 +
  55 +# 获取游标
3 cursor = conn.cursor() 56 cursor = conn.cursor()
4 -def query(sql,params,type="no_select"): 57 +
  58 +def query(sql, params=None, query_type="no_select"):
  59 + """
  60 + 执行SQL查询或操作。
  61 +
  62 + :param sql: SQL语句
  63 + :param params: SQL参数(可选)
  64 + :param query_type: 查询类型,默认为 "no_select"
  65 + 如果不是 "no_select",则执行 fetch 操作
  66 + :return: 如果是查询操作,返回数据列表;否则返回 None
  67 + """
  68 + try:
  69 + if params:
5 params = tuple(params) 70 params = tuple(params)
6 - cursor.execute(sql,params) 71 + cursor.execute(sql, params)
  72 + else:
  73 + cursor.execute(sql)
  74 +
  75 + # 确保连接保持活跃
7 conn.ping(reconnect=True) 76 conn.ping(reconnect=True)
8 - if type != 'no_select': 77 +
  78 + if query_type != "no_select":
9 data_list = cursor.fetchall() 79 data_list = cursor.fetchall()
10 conn.commit() 80 conn.commit()
  81 + logging.info("查询成功,已获取数据。")
11 return data_list 82 return data_list
12 else: 83 else:
13 conn.commit() 84 conn.commit()
  85 + logging.info("操作成功,已提交事务。")
  86 + except pymysql.MySQLError as e:
  87 + logging.error(f"执行SQL时出错: {e}")
  88 + conn.rollback()
  89 + return None
  90 +
  91 +def main():
  92 + # 示例用法
  93 +
  94 + # 执行查询操作
  95 + select_sql = "SELECT * FROM article LIMIT 5"
  96 + articles = query(select_sql, query_type="select")
  97 + if articles:
  98 + for article in articles:
  99 + print(article)
  100 +
  101 + # 执行插入操作(根据实际表结构修改)
  102 + insert_sql = "INSERT INTO article (id, content) VALUES (%s, %s)"
  103 + new_article = (12345, "这是一条新的文章内容。")
  104 + result = query(insert_sql, params=new_article, query_type="no_select")
  105 + if result is None:
  106 + logging.info("插入操作完成。")
  107 +
  108 + # 关闭游标和连接
  109 + cursor.close()
  110 + conn.close()
  111 + logging.info("数据库连接已关闭。")
  112 +
  113 +if __name__ == '__main__':
  114 + main()
  1 +import os
1 import jieba 2 import jieba
2 from wordcloud import WordCloud 3 from wordcloud import WordCloud
3 import matplotlib.pyplot as plt 4 import matplotlib.pyplot as plt
4 -from PIL import Image,ImageDraw  
5 -from pymysql import *  
6 -import json 5 +from PIL import Image
7 import numpy as np 6 import numpy as np
8 -def stopWordList():  
9 - return [line.strip() for line in open('./model/stopWords.txt',encoding='utf8').readlines()]  
10 -  
11 -def get_img(field,tableName,targetImgSrc,resImgSrc):  
12 - con = connect(host='47.92.235.6',user='XiaoXueQi',password='XiaoXueQi',database='Weibo_PublicOpinion_AnalysisSystem',port=3306,charset='utf8mb4')  
13 - cuser = con.cursor()  
14 - sql = f'select {field} from {tableName}'  
15 - cuser.execute(sql)  
16 - data = cuser.fetchall()  
17 - text = ''  
18 - for item in data:  
19 - text += item[0]  
20 - cuser.close()  
21 - con.close() 7 +import pymysql
22 8
23 - cut = jieba.cut(text)  
24 - newCut = []  
25 - for word in cut:  
26 - if word not in stopWordList():newCut.append(word)  
27 - string = ' '.join(newCut) 9 +def stopWordList():
  10 + """
  11 + 如果 stopWords.txt 文件内容较大,或被频繁读取,
  12 + 可以考虑将其缓存起来,避免重复读文件。
  13 + """
  14 + with open('./model/stopWords.txt', encoding='utf8') as f:
  15 + return [line.strip() for line in f.readlines()]
28 16
29 - img = Image.open(targetImgSrc) 17 +def generate_word_cloud(text, mask_path, font_path, output_path):
  18 + """生成词云并保存到 output_path"""
  19 + img = Image.open(mask_path)
30 img_arr = np.array(img) 20 img_arr = np.array(img)
  21 +
31 wc = WordCloud( 22 wc = WordCloud(
32 background_color="#fff", 23 background_color="#fff",
33 mask=img_arr, 24 mask=img_arr,
34 - font_path='STHUPO.TTF' 25 + font_path=font_path
35 ) 26 )
36 - wc.generate_from_text(string)  
37 -  
38 - fig = plt.figure(1)  
39 - plt.imshow(wc) 27 + wc.generate_from_text(text)
40 28
  29 + plt.figure(figsize=(8, 6))
  30 + plt.imshow(wc, interpolation='bilinear')
41 plt.axis('off') 31 plt.axis('off')
  32 + plt.savefig(output_path, dpi=300, bbox_inches='tight')
  33 + plt.close() # 保存后关闭
  34 +
  35 +def get_db_connection_interactive():
  36 + """
  37 + 通过终端交互获取数据库连接参数,若按回车则使用默认值。
  38 + """
  39 + print("请依次输入数据库连接信息(直接按回车使用默认值):")
  40 +
  41 + host = input(" 1. 主机 (默认: localhost): ") or "localhost"
  42 + port_str = input(" 2. 端口 (默认: 3306): ") or "3306"
  43 + port = int(port_str)
  44 +
  45 + user = input(" 3. 用户名 (默认: root): ") or "root"
  46 + password = input(" 4. 密码 (默认: 312517): ") or "12345678"
  47 + db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem"
  48 +
  49 + print(f"\n即将连接到数据库: {user}@{host}:{port}/{db_name}\n")
42 50
43 - plt.savefig(resImgSrc,dpi=500) 51 + return pymysql.connect(
  52 + host=host,
  53 + user=user,
  54 + password=password,
  55 + database=db_name,
  56 + port=port,
  57 + charset='utf8mb4'
  58 + )
  59 +
  60 +def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'):
  61 + """
  62 + 从数据库拉取指定字段的文本数据,分词处理后生成词云。
  63 + :param field: 数据库字段名
  64 + :param table_name: 数据表名
  65 + :param target_img_src: 词云形状图
  66 + :param res_img_src: 输出词云文件路径
  67 + :param connection: 已建立的数据库连接
  68 + :param font_path: 字体文件路径
  69 + """
  70 + cursor = connection.cursor()
  71 + sql = f'SELECT {field} FROM {table_name}'
  72 + cursor.execute(sql)
  73 + data = cursor.fetchall()
  74 +
  75 + text = ''
  76 + for item in data:
  77 + text += item[0] # item 是元组 (内容,),取第一个元素即可
44 78
  79 + cursor.close()
  80 +
  81 + # 分词 & 去停用词
  82 + cut_words = jieba.cut(text)
  83 + stop_words = set(stopWordList())
  84 + filtered_words = [word for word in cut_words if word not in stop_words]
  85 + final_text = ' '.join(filtered_words)
  86 +
  87 + # 生成词云
  88 + generate_word_cloud(final_text, target_img_src, font_path, res_img_src)
  89 +
  90 +def main():
  91 + # 1. 获取数据库连接(交互式输入)
  92 + connection = get_db_connection_interactive()
  93 +
  94 + # 2. 根据需求生成词云
  95 + # 例如:从 article 表的 content 字段生成词云
  96 + try:
  97 + get_img(
  98 + field='content',
  99 + table_name='article',
  100 + target_img_src='./static/content.jpg',
  101 + res_img_src='./static/contentCloud.jpg',
  102 + connection=connection
  103 + )
  104 + print("词云生成完毕!")
  105 + finally:
  106 + # 关闭数据库连接
  107 + connection.close()
45 108
46 -# get_img('content','comments','./static/comment.jpg','./static/commentCloud.jpg')  
47 -get_img('content','article','./static/content.jpg','./static/contentCloud.jpg') 109 +if __name__ == '__main__':
  110 + main()