Showing
1 changed file
with
32 additions
and
24 deletions
| @@ -377,34 +377,42 @@ def parse_forum_log_line(line): | @@ -377,34 +377,42 @@ def parse_forum_log_line(line): | ||
| 377 | """解析forum.log行内容,提取对话信息""" | 377 | """解析forum.log行内容,提取对话信息""" |
| 378 | import re | 378 | import re |
| 379 | 379 | ||
| 380 | - # 匹配格式: [时间] [来源] 内容 | ||
| 381 | - pattern = r'\[(\d{2}:\d{2}:\d{2})\]\s*\[([A-Z]+)\]\s*(.*)' | 380 | + # 匹配格式: [时间] [来源] 内容(来源允许大小写及空格) |
| 381 | + pattern = r'\[(\d{2}:\d{2}:\d{2})\]\s*\[([^\]]+)\]\s*(.*)' | ||
| 382 | match = re.match(pattern, line) | 382 | match = re.match(pattern, line) |
| 383 | 383 | ||
| 384 | - if match: | ||
| 385 | - timestamp, source, content = match.groups() | ||
| 386 | - | ||
| 387 | - # 过滤掉系统消息和空内容 | ||
| 388 | - if source == 'SYSTEM' or not content.strip(): | ||
| 389 | - return None | ||
| 390 | - | ||
| 391 | - # 只处理三个Engine的消息 | ||
| 392 | - if source not in ['QUERY', 'INSIGHT', 'MEDIA']: | ||
| 393 | - return None | ||
| 394 | - | ||
| 395 | - # 根据来源确定消息类型和发送者 | 384 | + if not match: |
| 385 | + return None | ||
| 386 | + | ||
| 387 | + timestamp, raw_source, content = match.groups() | ||
| 388 | + source = raw_source.strip().upper() | ||
| 389 | + | ||
| 390 | + # 过滤掉系统消息和空内容 | ||
| 391 | + if source == 'SYSTEM' or not content.strip(): | ||
| 392 | + return None | ||
| 393 | + | ||
| 394 | + # 支持三个Agent和主持人 | ||
| 395 | + if source not in ['QUERY', 'INSIGHT', 'MEDIA', 'HOST']: | ||
| 396 | + return None | ||
| 397 | + | ||
| 398 | + # 解码日志中的转义换行,保留多行格式 | ||
| 399 | + cleaned_content = content.replace('\\n', '\n').replace('\\r', '').strip() | ||
| 400 | + | ||
| 401 | + # 根据来源确定消息类型和发送者 | ||
| 402 | + if source == 'HOST': | ||
| 403 | + message_type = 'host' | ||
| 404 | + sender = 'Forum Host' | ||
| 405 | + else: | ||
| 396 | message_type = 'agent' | 406 | message_type = 'agent' |
| 397 | - sender = f'{source} Engine' | ||
| 398 | - | ||
| 399 | - return { | ||
| 400 | - 'type': message_type, | ||
| 401 | - 'sender': sender, | ||
| 402 | - 'content': content.strip(), | ||
| 403 | - 'timestamp': timestamp, | ||
| 404 | - 'source': source | ||
| 405 | - } | 407 | + sender = f'{source.title()} Engine' |
| 406 | 408 | ||
| 407 | - return None | 409 | + return { |
| 410 | + 'type': message_type, | ||
| 411 | + 'sender': sender, | ||
| 412 | + 'content': cleaned_content, | ||
| 413 | + 'timestamp': timestamp, | ||
| 414 | + 'source': source | ||
| 415 | + } | ||
| 408 | 416 | ||
| 409 | # Forum日志监听器 | 417 | # Forum日志监听器 |
| 410 | # 存储每个客户端的历史日志发送位置 | 418 | # 存储每个客户端的历史日志发送位置 |
-
Please register or login to post a comment