马一丁

Allows storage of logs generated when querying GraphRAG

Showing 1 changed file with 52 additions and 0 deletions
@@ -13,6 +13,7 @@ os.environ['PYTHONUNBUFFERED'] = '1' # 禁用Python输出缓冲,确保日志 @@ -13,6 +13,7 @@ os.environ['PYTHONUNBUFFERED'] = '1' # 禁用Python输出缓冲,确保日志
13 import subprocess 13 import subprocess
14 import time 14 import time
15 import threading 15 import threading
  16 +import json
16 from datetime import datetime 17 from datetime import datetime
17 from queue import Queue 18 from queue import Queue
18 from flask import Flask, render_template, request, jsonify, Response 19 from flask import Flask, render_template, request, jsonify, Response
@@ -363,6 +364,45 @@ def init_forum_log(): @@ -363,6 +364,45 @@ def init_forum_log():
363 # 初始化forum.log 364 # 初始化forum.log
364 init_forum_log() 365 init_forum_log()
365 366
  367 +# ===== 知识库查询日志(与 Forum 日志格式类似) =====
  368 +knowledge_log_lock = threading.Lock()
  369 +KNOWLEDGE_LOG_FILE = LOG_DIR / "knowledge_query.log"
  370 +
  371 +
  372 +def _sanitize_log_text(text: str) -> str:
  373 + """移除换行/回车,防止日志污染。"""
  374 + return str(text).replace("\n", " ").replace("\r", " ").strip()
  375 +
  376 +
  377 +def init_knowledge_log():
  378 + """初始化知识库查询日志文件。"""
  379 + try:
  380 + start_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  381 + KNOWLEDGE_LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
  382 + with knowledge_log_lock, open(KNOWLEDGE_LOG_FILE, 'w', encoding='utf-8') as f:
  383 + f.write(f"=== Knowledge Query Log 初始化 - {start_time} ===\n")
  384 + logger.info("Knowledge Query: knowledge_query.log 已初始化")
  385 + except Exception as exc: # pragma: no cover - 仅运行时执行
  386 + logger.exception(f"Knowledge Query: 初始化日志失败: {exc}")
  387 +
  388 +
  389 +def append_knowledge_log(source: str, payload: dict):
  390 + """记录知识库查询关键词与完整请求数据,防止日志污染。"""
  391 + try:
  392 + timestamp = datetime.now().strftime('%H:%M:%S')
  393 + clean_source = _sanitize_log_text(source or "UNKNOWN")
  394 + # JSON 序列化并截断,避免超大日志污染
  395 + serialized = json.dumps(payload, ensure_ascii=False)
  396 + sanitized = _sanitize_log_text(serialized)
  397 + with knowledge_log_lock, open(KNOWLEDGE_LOG_FILE, 'a', encoding='utf-8') as f:
  398 + f.write(f"[{timestamp}] [KNOWLEDGE] [{clean_source}] {sanitized}\n")
  399 + except Exception as exc: # pragma: no cover - 日志失败不影响主流程
  400 + logger.warning(f"Knowledge Query: 写日志失败: {exc}")
  401 +
  402 +
  403 +# 初始化 knowledge_query.log
  404 +init_knowledge_log()
  405 +
366 # 启动ForumEngine智能监控 406 # 启动ForumEngine智能监控
367 def start_forum_engine(): 407 def start_forum_engine():
368 """启动ForumEngine论坛""" 408 """启动ForumEngine论坛"""
@@ -1466,6 +1506,18 @@ def query_graph(): @@ -1466,6 +1506,18 @@ def query_graph():
1466 1506
1467 data = request.get_json() or {} 1507 data = request.get_json() or {}
1468 report_id = data.get('report_id') 1508 report_id = data.get('report_id')
  1509 +
  1510 + # 记录查询日志(关键词、过滤条件等)
  1511 + append_knowledge_log(
  1512 + 'GRAPH_QUERY',
  1513 + {
  1514 + 'report_id': report_id,
  1515 + 'keywords': data.get('keywords', []),
  1516 + 'node_types': data.get('node_types'),
  1517 + 'depth': data.get('depth', 1),
  1518 + 'engine_filter': data.get('engine_filter')
  1519 + }
  1520 + )
1469 1521
1470 storage = GraphStorage() 1522 storage = GraphStorage()
1471 1523