马一丁

Update the storage scheme for GraphRAG query logs

@@ -11,6 +11,7 @@ from dataclasses import dataclass, field @@ -11,6 +11,7 @@ from dataclasses import dataclass, field
11 from typing import Dict, Any, List, Optional 11 from typing import Dict, Any, List, Optional
12 12
13 from loguru import logger 13 from loguru import logger
  14 +from utils.knowledge_logger import append_knowledge_log, compact_records
14 15
15 from .base_node import BaseNode 16 from .base_node import BaseNode
16 from ..llms.base import LLMClient 17 from ..llms.base import LLMClient
@@ -122,6 +123,8 @@ class GraphRAGQueryNode(BaseNode): @@ -122,6 +123,8 @@ class GraphRAGQueryNode(BaseNode):
122 合并后的查询结果 123 合并后的查询结果
123 """ 124 """
124 self.log_info(f"开始 GraphRAG 查询,章节: {section.get('title', 'unknown')}") 125 self.log_info(f"开始 GraphRAG 查询,章节: {section.get('title', 'unknown')}")
  126 + chapter_id = section.get("id") or section.get("chapter_id") or section.get("chapterId")
  127 + chapter_title = section.get("title", "unknown")
125 128
126 query_engine = QueryEngine(graph) 129 query_engine = QueryEngine(graph)
127 history = QueryHistory() 130 history = QueryHistory()
@@ -154,11 +157,38 @@ class GraphRAGQueryNode(BaseNode): @@ -154,11 +157,38 @@ class GraphRAGQueryNode(BaseNode):
154 engine_filter=decision.get('engine_filter'), 157 engine_filter=decision.get('engine_filter'),
155 depth=decision.get('depth', 1) 158 depth=decision.get('depth', 1)
156 ) 159 )
  160 + params_dict = {
  161 + 'keywords': params.keywords,
  162 + 'node_types': params.node_types,
  163 + 'engine_filter': params.engine_filter,
  164 + 'depth': params.depth,
  165 + }
157 166
158 result = query_engine.query(params) 167 result = query_engine.query(params)
159 all_results.append(result) 168 all_results.append(result)
160 169
161 self.log_info(f"查询返回 {result.total_nodes} 个节点") 170 self.log_info(f"查询返回 {result.total_nodes} 个节点")
  171 + try:
  172 + append_knowledge_log(
  173 + "GRAPH_QUERY_NODE",
  174 + {
  175 + "chapter_id": chapter_id or "",
  176 + "chapter_title": chapter_title,
  177 + "round": round_idx + 1,
  178 + "params": params_dict,
  179 + "result_counts": {
  180 + "matched_sections": len(result.matched_sections),
  181 + "matched_queries": len(result.matched_queries),
  182 + "matched_sources": len(result.matched_sources),
  183 + "total_nodes": result.total_nodes,
  184 + },
  185 + "matched_sections": compact_records(result.matched_sections[:5]),
  186 + "matched_queries": compact_records(result.matched_queries[:5]),
  187 + "matched_sources": compact_records(result.matched_sources[:5]),
  188 + },
  189 + )
  190 + except Exception as log_exc: # pragma: no cover - 日志失败不阻塞流程
  191 + logger.warning(f"Knowledge Query: GraphRAG 节点写日志失败: {log_exc}")
162 192
163 # 5. 记录历史 193 # 5. 记录历史
164 history.add(decision, result) 194 history.add(decision, result)
@@ -169,6 +199,22 @@ class GraphRAGQueryNode(BaseNode): @@ -169,6 +199,22 @@ class GraphRAGQueryNode(BaseNode):
169 199
170 self.log_info(f"GraphRAG 查询完成,共 {len(all_results)} 轮," 200 self.log_info(f"GraphRAG 查询完成,共 {len(all_results)} 轮,"
171 f"获取 {merged.get('total_nodes', 0)} 个节点") 201 f"获取 {merged.get('total_nodes', 0)} 个节点")
  202 + try:
  203 + append_knowledge_log(
  204 + "GRAPH_QUERY_SUMMARY",
  205 + {
  206 + "chapter_id": chapter_id or "",
  207 + "chapter_title": chapter_title,
  208 + "rounds": len(all_results),
  209 + "total_nodes": merged.get("total_nodes", 0),
  210 + "matched_sections": compact_records(merged.get("matched_sections", [])[:10]),
  211 + "matched_queries": compact_records(merged.get("matched_queries", [])[:10]),
  212 + "matched_sources": compact_records(merged.get("matched_sources", [])[:10]),
  213 + "cross_engine_insights": merged.get("cross_engine_insights", []),
  214 + },
  215 + )
  216 + except Exception as log_exc: # pragma: no cover - 日志失败不阻塞流程
  217 + logger.warning(f"Knowledge Query: 汇总写日志失败: {log_exc}")
172 218
173 return merged 219 return merged
174 220
@@ -24,6 +24,11 @@ from loguru import logger @@ -24,6 +24,11 @@ from loguru import logger
24 import importlib 24 import importlib
25 from pathlib import Path 25 from pathlib import Path
26 from MindSpider.main import MindSpider 26 from MindSpider.main import MindSpider
  27 +from utils.knowledge_logger import (
  28 + append_knowledge_log,
  29 + compact_records as _compact_records,
  30 + init_knowledge_log,
  31 +)
27 32
28 # 导入ReportEngine 33 # 导入ReportEngine
29 try: 34 try:
@@ -364,72 +369,6 @@ def init_forum_log(): @@ -364,72 +369,6 @@ def init_forum_log():
364 # 初始化forum.log 369 # 初始化forum.log
365 init_forum_log() 370 init_forum_log()
366 371
367 -# ===== 知识库查询日志(与 Forum 日志格式类似) =====  
368 -knowledge_log_lock = threading.Lock()  
369 -KNOWLEDGE_LOG_FILE = LOG_DIR / "knowledge_query.log"  
370 -  
371 -  
372 -def _sanitize_log_text(text: str) -> str:  
373 - """移除换行/回车,防止日志污染。"""  
374 - return str(text).replace("\n", " ").replace("\r", " ").strip()  
375 -  
376 -  
377 -def init_knowledge_log():  
378 - """初始化知识库查询日志文件。"""  
379 - try:  
380 - start_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')  
381 - KNOWLEDGE_LOG_FILE.parent.mkdir(parents=True, exist_ok=True)  
382 - with knowledge_log_lock, open(KNOWLEDGE_LOG_FILE, 'w', encoding='utf-8') as f:  
383 - f.write(f"=== Knowledge Query Log 初始化 - {start_time} ===\n")  
384 - logger.info("Knowledge Query: knowledge_query.log 已初始化")  
385 - except Exception as exc: # pragma: no cover - 仅运行时执行  
386 - logger.exception(f"Knowledge Query: 初始化日志失败: {exc}")  
387 -  
388 -  
389 -def append_knowledge_log(source: str, payload: dict):  
390 - """记录知识库查询关键词与完整请求数据,防止日志污染。"""  
391 - try:  
392 - timestamp = datetime.now().strftime('%H:%M:%S')  
393 - clean_source = _sanitize_log_text(source or "UNKNOWN")  
394 - # JSON 序列化并截断,避免超大日志污染  
395 - serialized = json.dumps(payload, ensure_ascii=False)  
396 - sanitized = _sanitize_log_text(serialized)  
397 - with knowledge_log_lock, open(KNOWLEDGE_LOG_FILE, 'a', encoding='utf-8') as f:  
398 - f.write(f"[{timestamp}] [KNOWLEDGE] [{clean_source}] {sanitized}\n")  
399 - except Exception as exc: # pragma: no cover - 日志失败不影响主流程  
400 - logger.warning(f"Knowledge Query: 写日志失败: {exc}")  
401 -  
402 -  
403 -def _trim_text(text: str, limit: int = 300) -> str:  
404 - text = _sanitize_log_text(text)  
405 - return text if len(text) <= limit else text[:limit] + "..."  
406 -  
407 -  
408 -def _compact_records(items):  
409 - """将节点/记录压缩为简洁日志格式,避免污染。"""  
410 - compacted = []  
411 - if not items:  
412 - return compacted  
413 -  
414 - for item in items:  
415 - if not isinstance(item, dict):  
416 - compacted.append(_trim_text(str(item)))  
417 - continue  
418 -  
419 - entry = {}  
420 - for key, value in item.items():  
421 - # 仅记录必要字段,其他字段做字符串压缩  
422 - if isinstance(value, (str, int, float, bool)):  
423 - entry[key] = _trim_text(str(value))  
424 - else:  
425 - try:  
426 - entry[key] = _trim_text(json.dumps(value, ensure_ascii=False))  
427 - except Exception:  
428 - entry[key] = _trim_text(str(value))  
429 - compacted.append(entry)  
430 - return compacted  
431 -  
432 -  
433 # 初始化 knowledge_query.log 372 # 初始化 knowledge_query.log
434 init_knowledge_log() 373 init_knowledge_log()
435 374
  1 +"""
  2 +统一的知识图谱查询日志记录工具。
  3 +
  4 +用于在不同模块(Flask接口、GraphRAG 查询节点等)之间共享
  5 +knowledge_query.log 的写入逻辑,避免分散实现导致日志缺失。
  6 +"""
  7 +
  8 +import json
  9 +import threading
  10 +from datetime import datetime
  11 +from pathlib import Path
  12 +
  13 +from loguru import logger
  14 +
  15 +# 日志文件路径
  16 +ROOT_DIR = Path(__file__).resolve().parent.parent
  17 +LOG_DIR = ROOT_DIR / "logs"
  18 +KNOWLEDGE_LOG_FILE = LOG_DIR / "knowledge_query.log"
  19 +
  20 +_log_lock = threading.Lock()
  21 +
  22 +
  23 +def _sanitize_log_text(text: str) -> str:
  24 + """移除换行/回车,防止日志污染。"""
  25 + return str(text).replace("\n", " ").replace("\r", " ").strip()
  26 +
  27 +
  28 +def _trim_text(text: str, limit: int = 300) -> str:
  29 + """对长文本进行截断,避免日志过长。"""
  30 + text = _sanitize_log_text(text)
  31 + return text if len(text) <= limit else text[:limit] + "..."
  32 +
  33 +
  34 +def compact_records(items):
  35 + """
  36 + 将节点/记录压缩为简洁日志格式,避免日志被大字段污染。
  37 + """
  38 + compacted = []
  39 + if not items:
  40 + return compacted
  41 +
  42 + for item in items:
  43 + if not isinstance(item, dict):
  44 + compacted.append(_trim_text(str(item)))
  45 + continue
  46 +
  47 + entry = {}
  48 + for key, value in item.items():
  49 + if isinstance(value, (str, int, float, bool)):
  50 + entry[key] = _trim_text(str(value))
  51 + else:
  52 + try:
  53 + entry[key] = _trim_text(json.dumps(value, ensure_ascii=False))
  54 + except Exception:
  55 + entry[key] = _trim_text(str(value))
  56 + compacted.append(entry)
  57 + return compacted
  58 +
  59 +
  60 +def init_knowledge_log(force_reset: bool = True):
  61 + """
  62 + 初始化知识库查询日志文件。
  63 +
  64 + Args:
  65 + force_reset: True 时重置文件并写入初始化标记;False 时仅在文件不存在时写入。
  66 + """
  67 + try:
  68 + start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  69 + LOG_DIR.mkdir(parents=True, exist_ok=True)
  70 + mode = "w" if force_reset or not KNOWLEDGE_LOG_FILE.exists() else "a"
  71 + with _log_lock, open(KNOWLEDGE_LOG_FILE, mode, encoding="utf-8") as f:
  72 + f.write(f"=== Knowledge Query Log 初始化 - {start_time} ===\n")
  73 + logger.info("Knowledge Query: knowledge_query.log 已初始化")
  74 + except Exception as exc: # pragma: no cover - 仅运行时执行
  75 + logger.exception(f"Knowledge Query: 初始化日志失败: {exc}")
  76 +
  77 +
  78 +def _ensure_log_file():
  79 + """确保日志文件已创建且可写,不会覆盖现有内容。"""
  80 + if not KNOWLEDGE_LOG_FILE.exists():
  81 + init_knowledge_log(force_reset=False)
  82 +
  83 +
  84 +def append_knowledge_log(source: str, payload: dict):
  85 + """记录知识库查询关键词与完整请求数据。"""
  86 + try:
  87 + _ensure_log_file()
  88 + timestamp = datetime.now().strftime("%H:%M:%S")
  89 + clean_source = _sanitize_log_text(source or "UNKNOWN")
  90 + serialized = json.dumps(payload, ensure_ascii=False)
  91 + sanitized = _sanitize_log_text(serialized)
  92 + with _log_lock, open(KNOWLEDGE_LOG_FILE, "a", encoding="utf-8") as f:
  93 + f.write(f"[{timestamp}] [KNOWLEDGE] [{clean_source}] {sanitized}\n")
  94 + except Exception as exc: # pragma: no cover - 日志失败不影响主流程
  95 + logger.warning(f"Knowledge Query: 写日志失败: {exc}")