Committed by
GitHub
Optimize word cloud generation script by adding logging and exception handling.
Showing
3 changed files
with
130 additions
and
41 deletions
app.log
deleted
100644 → 0
database_operations.log
deleted
100644 → 0
| @@ -5,50 +5,113 @@ import matplotlib.pyplot as plt | @@ -5,50 +5,113 @@ import matplotlib.pyplot as plt | ||
| 5 | from PIL import Image | 5 | from PIL import Image |
| 6 | import numpy as np | 6 | import numpy as np |
| 7 | import pymysql | 7 | import pymysql |
| 8 | - | ||
| 9 | -def stopWordList(): | 8 | +import logging |
| 9 | + | ||
| 10 | +# Configure logging | ||
| 11 | +logging.basicConfig( | ||
| 12 | + level=logging.INFO, | ||
| 13 | + format='%(asctime)s [%(levelname)s] %(message)s', | ||
| 14 | + handlers=[ | ||
| 15 | + logging.FileHandler("wordcloud_generator.log"), | ||
| 16 | + logging.StreamHandler() | ||
| 17 | + ] | ||
| 18 | +) | ||
| 19 | + | ||
| 20 | +# Global cache for stop words | ||
| 21 | +STOP_WORDS = set() | ||
| 22 | + | ||
| 23 | +def load_stop_words(): | ||
| 10 | """ | 24 | """ |
| 11 | - 如果 stopWords.txt 文件内容较大,或被频繁读取, | ||
| 12 | - 可以考虑将其缓存起来,避免重复读文件。 | 25 | + Load and cache stop words. |
| 26 | + If the stop words file does not exist or fails to read, log an error and return an empty set. | ||
| 13 | """ | 27 | """ |
| 14 | - with open('./model/stopWords.txt', encoding='utf8') as f: | ||
| 15 | - return [line.strip() for line in f.readlines()] | 28 | + global STOP_WORDS |
| 29 | + if STOP_WORDS: | ||
| 30 | + return STOP_WORDS | ||
| 31 | + stop_words_path = './model/stopWords.txt' | ||
| 32 | + if not os.path.exists(stop_words_path): | ||
| 33 | + logging.error(f"Stop words file does not exist: {stop_words_path}") | ||
| 34 | + return set() | ||
| 35 | + try: | ||
| 36 | + with open(stop_words_path, encoding='utf8') as f: | ||
| 37 | + STOP_WORDS = set(line.strip() for line in f if line.strip()) | ||
| 38 | + logging.info(f"Loaded {len(STOP_WORDS)} stop words") | ||
| 39 | + except Exception as e: | ||
| 40 | + logging.error(f"Failed to load stop words file: {e}") | ||
| 41 | + return STOP_WORDS | ||
| 16 | 42 | ||
| 17 | def generate_word_cloud(text, mask_path, font_path, output_path): | 43 | def generate_word_cloud(text, mask_path, font_path, output_path): |
| 18 | - """生成词云并保存到 output_path""" | 44 | + """ |
| 45 | + Generate a word cloud and save it to output_path. | ||
| 46 | + | ||
| 47 | + :param text: Processed text | ||
| 48 | + :param mask_path: Path to the mask image | ||
| 49 | + :param font_path: Path to the font file | ||
| 50 | + :param output_path: Path to save the generated word cloud image | ||
| 51 | + """ | ||
| 52 | + if not os.path.exists(mask_path): | ||
| 53 | + logging.error(f"Mask image file does not exist: {mask_path}") | ||
| 54 | + return | ||
| 55 | + try: | ||
| 19 | img = Image.open(mask_path) | 56 | img = Image.open(mask_path) |
| 20 | img_arr = np.array(img) | 57 | img_arr = np.array(img) |
| 58 | + logging.info(f"Successfully loaded mask image: {mask_path}") | ||
| 59 | + except Exception as e: | ||
| 60 | + logging.error(f"Failed to load mask image: {e}") | ||
| 61 | + return | ||
| 21 | 62 | ||
| 63 | + try: | ||
| 22 | wc = WordCloud( | 64 | wc = WordCloud( |
| 23 | background_color="#fff", | 65 | background_color="#fff", |
| 24 | mask=img_arr, | 66 | mask=img_arr, |
| 25 | - font_path=font_path | 67 | + font_path=font_path, |
| 68 | + max_words=2000, | ||
| 69 | + max_font_size=100, | ||
| 70 | + random_state=42, | ||
| 71 | + width=800, | ||
| 72 | + height=600 | ||
| 26 | ) | 73 | ) |
| 27 | wc.generate_from_text(text) | 74 | wc.generate_from_text(text) |
| 75 | + logging.info("Word cloud generated successfully") | ||
| 76 | + except Exception as e: | ||
| 77 | + logging.error(f"Failed to generate word cloud: {e}") | ||
| 78 | + return | ||
| 28 | 79 | ||
| 80 | + try: | ||
| 29 | plt.figure(figsize=(8, 6)) | 81 | plt.figure(figsize=(8, 6)) |
| 30 | plt.imshow(wc, interpolation='bilinear') | 82 | plt.imshow(wc, interpolation='bilinear') |
| 31 | plt.axis('off') | 83 | plt.axis('off') |
| 32 | plt.savefig(output_path, dpi=300, bbox_inches='tight') | 84 | plt.savefig(output_path, dpi=300, bbox_inches='tight') |
| 33 | - plt.close() # 保存后关闭 | 85 | + plt.close() |
| 86 | + logging.info(f"Word cloud saved to: {output_path}") | ||
| 87 | + except Exception as e: | ||
| 88 | + logging.error(f"Failed to save word cloud image: {e}") | ||
| 34 | 89 | ||
| 35 | def get_db_connection_interactive(): | 90 | def get_db_connection_interactive(): |
| 36 | """ | 91 | """ |
| 37 | - 通过终端交互获取数据库连接参数,若按回车则使用默认值。 | 92 | + Interactively obtain database connection parameters from the terminal. |
| 93 | + Press Enter to use default values. | ||
| 94 | + | ||
| 95 | + :return: pymysql.connections.Connection object | ||
| 38 | """ | 96 | """ |
| 39 | - print("请依次输入数据库连接信息(直接按回车使用默认值):") | 97 | + print("Please enter database connection information (press Enter to use default values):") |
| 40 | 98 | ||
| 41 | - host = input(" 1. 主机 (默认: localhost): ") or "localhost" | ||
| 42 | - port_str = input(" 2. 端口 (默认: 3306): ") or "3306" | 99 | + host = input(" 1. Host (default: localhost): ") or "localhost" |
| 100 | + port_str = input(" 2. Port (default: 3306): ") or "3306" | ||
| 101 | + try: | ||
| 43 | port = int(port_str) | 102 | port = int(port_str) |
| 103 | + except ValueError: | ||
| 104 | + logging.error(f"Invalid port number: {port_str}") | ||
| 105 | + port = 3306 | ||
| 44 | 106 | ||
| 45 | - user = input(" 3. 用户名 (默认: root): ") or "root" | ||
| 46 | - password = input(" 4. 密码 (默认: 312517): ") or "12345678" | ||
| 47 | - db_name = input(" 5. 数据库名 (默认: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem" | 107 | + user = input(" 3. Username (default: root): ") or "root" |
| 108 | + password = input(" 4. Password (default: 12345678): ") or "12345678" | ||
| 109 | + db_name = input(" 5. Database name (default: Weibo_PublicOpinion_AnalysisSystem): ") or "Weibo_PublicOpinion_AnalysisSystem" | ||
| 48 | 110 | ||
| 49 | - print(f"\n即将连接到数据库: {user}@{host}:{port}/{db_name}\n") | 111 | + logging.info(f"Attempting to connect to database: {user}@{host}:{port}/{db_name}") |
| 50 | 112 | ||
| 51 | - return pymysql.connect( | 113 | + try: |
| 114 | + connection = pymysql.connect( | ||
| 52 | host=host, | 115 | host=host, |
| 53 | user=user, | 116 | user=user, |
| 54 | password=password, | 117 | password=password, |
| @@ -56,44 +119,66 @@ def get_db_connection_interactive(): | @@ -56,44 +119,66 @@ def get_db_connection_interactive(): | ||
| 56 | port=port, | 119 | port=port, |
| 57 | charset='utf8mb4' | 120 | charset='utf8mb4' |
| 58 | ) | 121 | ) |
| 122 | + logging.info("Database connection successful") | ||
| 123 | + return connection | ||
| 124 | + except pymysql.MySQLError as e: | ||
| 125 | + logging.error(f"Database connection failed: {e}") | ||
| 126 | + raise | ||
| 59 | 127 | ||
| 60 | def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'): | 128 | def get_img(field, table_name, target_img_src, res_img_src, connection, font_path='STHUPO.TTF'): |
| 61 | """ | 129 | """ |
| 62 | - 从数据库拉取指定字段的文本数据,分词处理后生成词云。 | ||
| 63 | - :param field: 数据库字段名 | ||
| 64 | - :param table_name: 数据表名 | ||
| 65 | - :param target_img_src: 词云形状图 | ||
| 66 | - :param res_img_src: 输出词云文件路径 | ||
| 67 | - :param connection: 已建立的数据库连接 | ||
| 68 | - :param font_path: 字体文件路径 | 130 | + Retrieve text data from a specified field and table in the database, |
| 131 | + perform word segmentation and stop word removal, then generate a word cloud. | ||
| 132 | + | ||
| 133 | + :param field: Database field name | ||
| 134 | + :param table_name: Database table name | ||
| 135 | + :param target_img_src: Path to the mask image | ||
| 136 | + :param res_img_src: Path to save the generated word cloud image | ||
| 137 | + :param connection: Established database connection | ||
| 138 | + :param font_path: Path to the font file | ||
| 69 | """ | 139 | """ |
| 70 | - cursor = connection.cursor() | 140 | + try: |
| 141 | + with connection.cursor() as cursor: | ||
| 71 | sql = f'SELECT {field} FROM {table_name}' | 142 | sql = f'SELECT {field} FROM {table_name}' |
| 72 | cursor.execute(sql) | 143 | cursor.execute(sql) |
| 73 | data = cursor.fetchall() | 144 | data = cursor.fetchall() |
| 145 | + logging.info(f"Fetched {len(data)} records from '{table_name}' table, field '{field}'") | ||
| 146 | + except pymysql.MySQLError as e: | ||
| 147 | + logging.error(f"Database query failed: {e}") | ||
| 148 | + return | ||
| 74 | 149 | ||
| 75 | - text = '' | ||
| 76 | - for item in data: | ||
| 77 | - text += item[0] # item 是元组 (内容,),取第一个元素即可 | 150 | + text = ''.join(item[0] for item in data if item[0]) |
| 78 | 151 | ||
| 79 | - cursor.close() | ||
| 80 | - | ||
| 81 | - # 分词 & 去停用词 | 152 | + # Tokenization & Stop word removal |
| 153 | + try: | ||
| 154 | + stop_words = load_stop_words() | ||
| 155 | + if not stop_words: | ||
| 156 | + logging.warning("Stop words set is empty, proceeding without stop word removal") | ||
| 82 | cut_words = jieba.cut(text) | 157 | cut_words = jieba.cut(text) |
| 83 | - stop_words = set(stopWordList()) | ||
| 84 | filtered_words = [word for word in cut_words if word not in stop_words] | 158 | filtered_words = [word for word in cut_words if word not in stop_words] |
| 85 | final_text = ' '.join(filtered_words) | 159 | final_text = ' '.join(filtered_words) |
| 160 | + logging.info(f"Completed tokenization and stop word removal, generated {len(filtered_words)} words") | ||
| 161 | + except Exception as e: | ||
| 162 | + logging.error(f"Text processing failed: {e}") | ||
| 163 | + return | ||
| 86 | 164 | ||
| 87 | - # 生成词云 | 165 | + # Generate word cloud |
| 88 | generate_word_cloud(final_text, target_img_src, font_path, res_img_src) | 166 | generate_word_cloud(final_text, target_img_src, font_path, res_img_src) |
| 89 | 167 | ||
| 90 | def main(): | 168 | def main(): |
| 91 | - # 1. 获取数据库连接(交互式输入) | 169 | + """ |
| 170 | + Main function to execute the word cloud generation process. | ||
| 171 | + """ | ||
| 172 | + try: | ||
| 173 | + # Obtain database connection interactively | ||
| 92 | connection = get_db_connection_interactive() | 174 | connection = get_db_connection_interactive() |
| 175 | + except Exception: | ||
| 176 | + logging.error("Failed to establish database connection, terminating program") | ||
| 177 | + return | ||
| 93 | 178 | ||
| 94 | - # 2. 根据需求生成词云 | ||
| 95 | - # 例如:从 article 表的 content 字段生成词云 | ||
| 96 | try: | 179 | try: |
| 180 | + # Generate word cloud as per requirements | ||
| 181 | + # Example: Generate word cloud from 'content' field in 'article' table | ||
| 97 | get_img( | 182 | get_img( |
| 98 | field='content', | 183 | field='content', |
| 99 | table_name='article', | 184 | table_name='article', |
| @@ -101,10 +186,16 @@ def main(): | @@ -101,10 +186,16 @@ def main(): | ||
| 101 | res_img_src='./static/contentCloud.jpg', | 186 | res_img_src='./static/contentCloud.jpg', |
| 102 | connection=connection | 187 | connection=connection |
| 103 | ) | 188 | ) |
| 104 | - print("词云生成完毕!") | 189 | + print("Word cloud generation completed!") |
| 190 | + except Exception as e: | ||
| 191 | + logging.error(f"An error occurred during word cloud generation: {e}") | ||
| 105 | finally: | 192 | finally: |
| 106 | - # 关闭数据库连接 | 193 | + # Close the database connection |
| 194 | + try: | ||
| 107 | connection.close() | 195 | connection.close() |
| 196 | + logging.info("Database connection closed") | ||
| 197 | + except Exception as e: | ||
| 198 | + logging.error(f"Error closing database connection: {e}") | ||
| 108 | 199 | ||
| 109 | if __name__ == '__main__': | 200 | if __name__ == '__main__': |
| 110 | main() | 201 | main() |
-
Please register or login to post a comment