Asy0y0
Committed by GitHub

Optimize word cloud generation script by adding logging and exception handling.

1 -2025-01-09 23:29:06,246 [INFO] 尝试连接到数据库: root@localhost:3306/Weibo_PublicOpinion_AnalysisSystem  
2 -2025-01-09 23:29:06,346 [ERROR] 数据库连接失败: (1045, "Access denied for user 'root'@'localhost' (using password: YES)")  
@@ -5,50 +5,113 @@ import matplotlib.pyplot as plt @@ -5,50 +5,113 @@ import matplotlib.pyplot as plt
5 from PIL import Image 5 from PIL import Image
6 import numpy as np 6 import numpy as np
7 import pymysql 7 import pymysql
8 -  
9 -def stopWordList(): 8 +import logging
  9 +
  10 +# Configure logging
  11 +logging.basicConfig(
  12 + level=logging.INFO,
  13 + format='%(asctime)s [%(levelname)s] %(message)s',
  14 + handlers=[
  15 + logging.FileHandler("wordcloud_generator.log"),
  16 + logging.StreamHandler()
  17 + ]
  18 +)
  19 +
  20 +# Global cache for stop words
  21 +STOP_WORDS = set()
  22 +
  23 +def load_stop_words():
10 """ 24 """
11 - 如果 stopWords.txt 文件内容较大,或被频繁读取,  
12 - 可以考虑将其缓存起来,避免重复读文件。 25 + Load and cache stop words.
  26 + If the stop words file does not exist or fails to read, log an error and return an empty set.
13 """ 27 """
14 - with open('./model/stopWords.txt', encoding='utf8') as f:  
15 - return [line.strip() for line in f.readlines()] 28 + global STOP_WORDS
  29 + if STOP_WORDS:
  30 + return STOP_WORDS
  31 + stop_words_path = './model/stopWords.txt'
  32 + if not os.path.exists(stop_words_path):
  33 + logging.error(f"Stop words file does not exist: {stop_words_path}")
  34 + return set()
  35 + try:
  36 + with open(stop_words_path, encoding='utf8') as f:
  37 + STOP_WORDS = set(line.strip() for line in f if line.strip())
  38 + logging.info(f"Loaded {len(STOP_WORDS)} stop words")
  39 + except Exception as e:
  40 + logging.error(f"Failed to load stop words file: {e}")
  41 + return STOP_WORDS
16 42
17 def generate_word_cloud(text, mask_path, font_path, output_path): 43 def generate_word_cloud(text, mask_path, font_path, output_path):
18 - """生成词云并保存到 output_path""" 44 + """
  45 + Generate a word cloud and save it to output_path.
  46 +
  47 + :param text: Processed text
  48 + :param mask_path: Path to the mask image
  49 + :param font_path: Path to the font file
  50 + :param output_path: Path to save the generated word cloud image
  51 + """
  52 + if not os.path.exists(mask_path):
  53 + logging.error(f"Mask image file does not exist: {mask_path}")
  54 + return
  55 + try:
19 img = Image.open(mask_path) 56 img = Image.open(mask_path)
20 img_arr = np.array(img) 57 img_arr = np.array(img)
  58 + logging.info(f"Successfully loaded mask image: {mask_path}")
  59 + except Exception as e:
  60 + logging.error(f"Failed to load mask image: {e}")
  61 + return
21 62
  63 + try:
22 wc = WordCloud( 64 wc = WordCloud(
23 background_color="#fff", 65 background_color="#fff",
24 mask=img_arr, 66 mask=img_arr,
25 - font_path=font_path 67 + font_path=font_path,
  68 + max_words=2000,
  69 + max_font_size=100,
  70 + random_state=42,
  71 + width=800,
  72 + height=600
26 ) 73 )
27 wc.generate_from_text(text) 74 wc.generate_from_text(text)
  75 + logging.info("Word cloud generated successfully")
  76 + except Exception as e:
  77 + logging.error(f"Failed to generate word cloud: {e}")
  78 + return
28 79
  80 + try:
29 plt.figure(figsize=(8, 6)) 81 plt.figure(figsize=(8, 6))
30 plt.imshow(wc, interpolation='bilinear') 82 plt.imshow(wc, interpolation='bilinear')
31 plt.axis('off') 83 plt.axis('off')
32 plt.savefig(output_path, dpi=300, bbox_inches='tight') 84 plt.savefig(output_path, dpi=300, bbox_inches='tight')
33 - plt.close() # 保存后关闭 85 + plt.close()
  86 + logging.info(f"Word cloud saved to: {output_path}")
  87 + except Exception as e:
  88 + logging.error(f"Failed to save word cloud image: {e}")
34 89
35 def get_db_connection_interactive(): 90 def get_db_connection_interactive():
36 """ 91 """
37 - 通过终端交互获取数据库连接参数,若按回车则使用默认值。 92 + Interactively obtain database connection parameters from the terminal.
  93 + Press Enter to use default values.
  94 +
  95 + :return: pymysql.connections.Connection object
38 """ 96 """
39 - print("请依次输入数据库连接信息(直接按回车使用默认值):") 97 + print("Please enter database connection information (press Enter to use default values):")
40 98
41 - host = input(" 1. 主机 (默认: localhost): ") or "localhost"  
42 - port_str = input(" 2. 端口 (默认: 3306): ") or "3306" 99 + host = input(" 1. Host (default: localhost): ") or "localhost"
  100 + port_str = input(" 2. Port (default: 3306): ") or "3306"
  101 + try:
43 port = int(port_str) 102 port = int(port_str)
  103 + except ValueError:
  104 + logging.error(f"Invalid port number: {port_str}")
  105 + port = 3306
44 106
45 - user = input(" 3. 用户名 (默认: root): ") or "root"  
46 - password = input(" 4. 密码 (默认: 312517): ") or "12345678"  
47 - db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem" 107 + user = input(" 3. Username (default: root): ") or "root"
  108 + password = input(" 4. Password (default: 12345678): ") or "12345678"
  109 + db_name = input(" 5. Database name (default: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem"
48 110
49 - print(f"\n即将连接到数据库: {user}@{host}:{port}/{db_name}\n") 111 + logging.info(f"Attempting to connect to database: {user}@{host}:{port}/{db_name}")
50 112
51 - return pymysql.connect( 113 + try:
  114 + connection = pymysql.connect(
52 host=host, 115 host=host,
53 user=user, 116 user=user,
54 password=password, 117 password=password,
@@ -56,44 +119,66 @@ def get_db_connection_interactive(): @@ -56,44 +119,66 @@ def get_db_connection_interactive():
56 port=port, 119 port=port,
57 charset='utf8mb4' 120 charset='utf8mb4'
58 ) 121 )
  122 + logging.info("Database connection successful")
  123 + return connection
  124 + except pymysql.MySQLError as e:
  125 + logging.error(f"Database connection failed: {e}")
  126 + raise
59 127
60 def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'): 128 def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'):
61 """ 129 """
62 - 从数据库拉取指定字段的文本数据,分词处理后生成词云。  
63 - :param field: 数据库字段名  
64 - :param table_name: 数据表名  
65 - :param target_img_src: 词云形状图  
66 - :param res_img_src: 输出词云文件路径  
67 - :param connection: 已建立的数据库连接  
68 - :param font_path: 字体文件路径 130 + Retrieve text data from a specified field and table in the database,
  131 + perform word segmentation and stop word removal, then generate a word cloud.
  132 +
  133 + :param field: Database field name
  134 + :param table_name: Database table name
  135 + :param target_img_src: Path to the mask image
  136 + :param res_img_src: Path to save the generated word cloud image
  137 + :param connection: Established database connection
  138 + :param font_path: Path to the font file
69 """ 139 """
70 - cursor = connection.cursor() 140 + try:
  141 + with connection.cursor() as cursor:
71 sql = f'SELECT {field} FROM {table_name}' 142 sql = f'SELECT {field} FROM {table_name}'
72 cursor.execute(sql) 143 cursor.execute(sql)
73 data = cursor.fetchall() 144 data = cursor.fetchall()
  145 + logging.info(f"Fetched {len(data)} records from '{table_name}' table, field '{field}'")
  146 + except pymysql.MySQLError as e:
  147 + logging.error(f"Database query failed: {e}")
  148 + return
74 149
75 - text = ''  
76 - for item in data:  
77 - text += item[0] # item 是元组 (内容,),取第一个元素即可 150 + text = ''.join(item[0] for item in data if item[0])
78 151
79 - cursor.close()  
80 -  
81 - # 分词 & 去停用词 152 + # Tokenization & Stop word removal
  153 + try:
  154 + stop_words = load_stop_words()
  155 + if not stop_words:
  156 + logging.warning("Stop words set is empty, proceeding without stop word removal")
82 cut_words = jieba.cut(text) 157 cut_words = jieba.cut(text)
83 - stop_words = set(stopWordList())  
84 filtered_words = [word for word in cut_words if word not in stop_words] 158 filtered_words = [word for word in cut_words if word not in stop_words]
85 final_text = ' '.join(filtered_words) 159 final_text = ' '.join(filtered_words)
  160 + logging.info(f"Completed tokenization and stop word removal, generated {len(filtered_words)} words")
  161 + except Exception as e:
  162 + logging.error(f"Text processing failed: {e}")
  163 + return
86 164
87 - # 生成词云 165 + # Generate word cloud
88 generate_word_cloud(final_text, target_img_src, font_path, res_img_src) 166 generate_word_cloud(final_text, target_img_src, font_path, res_img_src)
89 167
90 def main(): 168 def main():
91 - # 1. 获取数据库连接(交互式输入) 169 + """
  170 + Main function to execute the word cloud generation process.
  171 + """
  172 + try:
  173 + # Obtain database connection interactively
92 connection = get_db_connection_interactive() 174 connection = get_db_connection_interactive()
  175 + except Exception:
  176 + logging.error("Failed to establish database connection, terminating program")
  177 + return
93 178
94 - # 2. 根据需求生成词云  
95 - # 例如:从 article 表的 content 字段生成词云  
96 try: 179 try:
  180 + # Generate word cloud as per requirements
  181 + # Example: Generate word cloud from 'content' field in 'article' table
97 get_img( 182 get_img(
98 field='content', 183 field='content',
99 table_name='article', 184 table_name='article',
@@ -101,10 +186,16 @@ def main(): @@ -101,10 +186,16 @@ def main():
101 res_img_src='./static/contentCloud.jpg', 186 res_img_src='./static/contentCloud.jpg',
102 connection=connection 187 connection=connection
103 ) 188 )
104 - print("词云生成完毕!") 189 + print("Word cloud generation completed!")
  190 + except Exception as e:
  191 + logging.error(f"An error occurred during word cloud generation: {e}")
105 finally: 192 finally:
106 - # 关闭数据库连接 193 + # Close the database connection
  194 + try:
107 connection.close() 195 connection.close()
  196 + logging.info("Database connection closed")
  197 + except Exception as e:
  198 + logging.error(f"Error closing database connection: {e}")
108 199
109 if __name__ == '__main__': 200 if __name__ == '__main__':
110 main() 201 main()