Showing
1 changed file
with
18 additions
and
10 deletions
| @@ -377,35 +377,43 @@ def parse_forum_log_line(line): | @@ -377,35 +377,43 @@ def parse_forum_log_line(line): | ||
| 377 | """解析forum.log行内容,提取对话信息""" | 377 | """解析forum.log行内容,提取对话信息""" |
| 378 | import re | 378 | import re |
| 379 | 379 | ||
| 380 | - # 匹配格式: [时间] [来源] 内容 | ||
| 381 | - pattern = r'\[(\d{2}:\d{2}:\d{2})\]\s*\[([A-Z]+)\]\s*(.*)' | 380 | + # 匹配格式: [时间] [来源] 内容(来源允许大小写及空格) |
| 381 | + pattern = r'\[(\d{2}:\d{2}:\d{2})\]\s*\[([^\]]+)\]\s*(.*)' | ||
| 382 | match = re.match(pattern, line) | 382 | match = re.match(pattern, line) |
| 383 | 383 | ||
| 384 | - if match: | ||
| 385 | - timestamp, source, content = match.groups() | 384 | + if not match: |
| 385 | + return None | ||
| 386 | + | ||
| 387 | + timestamp, raw_source, content = match.groups() | ||
| 388 | + source = raw_source.strip().upper() | ||
| 386 | 389 | ||
| 387 | # 过滤掉系统消息和空内容 | 390 | # 过滤掉系统消息和空内容 |
| 388 | if source == 'SYSTEM' or not content.strip(): | 391 | if source == 'SYSTEM' or not content.strip(): |
| 389 | return None | 392 | return None |
| 390 | 393 | ||
| 391 | - # 只处理三个Engine的消息 | ||
| 392 | - if source not in ['QUERY', 'INSIGHT', 'MEDIA']: | 394 | + # 支持三个Agent和主持人 |
| 395 | + if source not in ['QUERY', 'INSIGHT', 'MEDIA', 'HOST']: | ||
| 393 | return None | 396 | return None |
| 394 | 397 | ||
| 398 | + # 解码日志中的转义换行,保留多行格式 | ||
| 399 | + cleaned_content = content.replace('\\n', '\n').replace('\\r', '').strip() | ||
| 400 | + | ||
| 395 | # 根据来源确定消息类型和发送者 | 401 | # 根据来源确定消息类型和发送者 |
| 402 | + if source == 'HOST': | ||
| 403 | + message_type = 'host' | ||
| 404 | + sender = 'Forum Host' | ||
| 405 | + else: | ||
| 396 | message_type = 'agent' | 406 | message_type = 'agent' |
| 397 | - sender = f'{source} Engine' | 407 | + sender = f'{source.title()} Engine' |
| 398 | 408 | ||
| 399 | return { | 409 | return { |
| 400 | 'type': message_type, | 410 | 'type': message_type, |
| 401 | 'sender': sender, | 411 | 'sender': sender, |
| 402 | - 'content': content.strip(), | 412 | + 'content': cleaned_content, |
| 403 | 'timestamp': timestamp, | 413 | 'timestamp': timestamp, |
| 404 | 'source': source | 414 | 'source': source |
| 405 | } | 415 | } |
| 406 | 416 | ||
| 407 | - return None | ||
| 408 | - | ||
| 409 | # Forum日志监听器 | 417 | # Forum日志监听器 |
| 410 | # 存储每个客户端的历史日志发送位置 | 418 | # 存储每个客户端的历史日志发送位置 |
| 411 | forum_log_positions = {} | 419 | forum_log_positions = {} |
-
Please register or login to post a comment