Showing
3 changed files
with
146 additions
and
66 deletions
| @@ -11,6 +11,7 @@ from dataclasses import dataclass, field | @@ -11,6 +11,7 @@ from dataclasses import dataclass, field | ||
| 11 | from typing import Dict, Any, List, Optional | 11 | from typing import Dict, Any, List, Optional |
| 12 | 12 | ||
| 13 | from loguru import logger | 13 | from loguru import logger |
| 14 | +from utils.knowledge_logger import append_knowledge_log, compact_records | ||
| 14 | 15 | ||
| 15 | from .base_node import BaseNode | 16 | from .base_node import BaseNode |
| 16 | from ..llms.base import LLMClient | 17 | from ..llms.base import LLMClient |
| @@ -122,6 +123,8 @@ class GraphRAGQueryNode(BaseNode): | @@ -122,6 +123,8 @@ class GraphRAGQueryNode(BaseNode): | ||
| 122 | 合并后的查询结果 | 123 | 合并后的查询结果 |
| 123 | """ | 124 | """ |
| 124 | self.log_info(f"开始 GraphRAG 查询,章节: {section.get('title', 'unknown')}") | 125 | self.log_info(f"开始 GraphRAG 查询,章节: {section.get('title', 'unknown')}") |
| 126 | + chapter_id = section.get("id") or section.get("chapter_id") or section.get("chapterId") | ||
| 127 | + chapter_title = section.get("title", "unknown") | ||
| 125 | 128 | ||
| 126 | query_engine = QueryEngine(graph) | 129 | query_engine = QueryEngine(graph) |
| 127 | history = QueryHistory() | 130 | history = QueryHistory() |
| @@ -154,11 +157,38 @@ class GraphRAGQueryNode(BaseNode): | @@ -154,11 +157,38 @@ class GraphRAGQueryNode(BaseNode): | ||
| 154 | engine_filter=decision.get('engine_filter'), | 157 | engine_filter=decision.get('engine_filter'), |
| 155 | depth=decision.get('depth', 1) | 158 | depth=decision.get('depth', 1) |
| 156 | ) | 159 | ) |
| 160 | + params_dict = { | ||
| 161 | + 'keywords': params.keywords, | ||
| 162 | + 'node_types': params.node_types, | ||
| 163 | + 'engine_filter': params.engine_filter, | ||
| 164 | + 'depth': params.depth, | ||
| 165 | + } | ||
| 157 | 166 | ||
| 158 | result = query_engine.query(params) | 167 | result = query_engine.query(params) |
| 159 | all_results.append(result) | 168 | all_results.append(result) |
| 160 | 169 | ||
| 161 | self.log_info(f"查询返回 {result.total_nodes} 个节点") | 170 | self.log_info(f"查询返回 {result.total_nodes} 个节点") |
| 171 | + try: | ||
| 172 | + append_knowledge_log( | ||
| 173 | + "GRAPH_QUERY_NODE", | ||
| 174 | + { | ||
| 175 | + "chapter_id": chapter_id or "", | ||
| 176 | + "chapter_title": chapter_title, | ||
| 177 | + "round": round_idx + 1, | ||
| 178 | + "params": params_dict, | ||
| 179 | + "result_counts": { | ||
| 180 | + "matched_sections": len(result.matched_sections), | ||
| 181 | + "matched_queries": len(result.matched_queries), | ||
| 182 | + "matched_sources": len(result.matched_sources), | ||
| 183 | + "total_nodes": result.total_nodes, | ||
| 184 | + }, | ||
| 185 | + "matched_sections": compact_records(result.matched_sections[:5]), | ||
| 186 | + "matched_queries": compact_records(result.matched_queries[:5]), | ||
| 187 | + "matched_sources": compact_records(result.matched_sources[:5]), | ||
| 188 | + }, | ||
| 189 | + ) | ||
| 190 | + except Exception as log_exc: # pragma: no cover - 日志失败不阻塞流程 | ||
| 191 | + logger.warning(f"Knowledge Query: GraphRAG 节点写日志失败: {log_exc}") | ||
| 162 | 192 | ||
| 163 | # 5. 记录历史 | 193 | # 5. 记录历史 |
| 164 | history.add(decision, result) | 194 | history.add(decision, result) |
| @@ -169,6 +199,22 @@ class GraphRAGQueryNode(BaseNode): | @@ -169,6 +199,22 @@ class GraphRAGQueryNode(BaseNode): | ||
| 169 | 199 | ||
| 170 | self.log_info(f"GraphRAG 查询完成,共 {len(all_results)} 轮," | 200 | self.log_info(f"GraphRAG 查询完成,共 {len(all_results)} 轮," |
| 171 | f"获取 {merged.get('total_nodes', 0)} 个节点") | 201 | f"获取 {merged.get('total_nodes', 0)} 个节点") |
| 202 | + try: | ||
| 203 | + append_knowledge_log( | ||
| 204 | + "GRAPH_QUERY_SUMMARY", | ||
| 205 | + { | ||
| 206 | + "chapter_id": chapter_id or "", | ||
| 207 | + "chapter_title": chapter_title, | ||
| 208 | + "rounds": len(all_results), | ||
| 209 | + "total_nodes": merged.get("total_nodes", 0), | ||
| 210 | + "matched_sections": compact_records(merged.get("matched_sections", [])[:10]), | ||
| 211 | + "matched_queries": compact_records(merged.get("matched_queries", [])[:10]), | ||
| 212 | + "matched_sources": compact_records(merged.get("matched_sources", [])[:10]), | ||
| 213 | + "cross_engine_insights": merged.get("cross_engine_insights", []), | ||
| 214 | + }, | ||
| 215 | + ) | ||
| 216 | + except Exception as log_exc: # pragma: no cover - 日志失败不阻塞流程 | ||
| 217 | + logger.warning(f"Knowledge Query: 汇总写日志失败: {log_exc}") | ||
| 172 | 218 | ||
| 173 | return merged | 219 | return merged |
| 174 | 220 |
| @@ -24,6 +24,11 @@ from loguru import logger | @@ -24,6 +24,11 @@ from loguru import logger | ||
| 24 | import importlib | 24 | import importlib |
| 25 | from pathlib import Path | 25 | from pathlib import Path |
| 26 | from MindSpider.main import MindSpider | 26 | from MindSpider.main import MindSpider |
| 27 | +from utils.knowledge_logger import ( | ||
| 28 | + append_knowledge_log, | ||
| 29 | + compact_records as _compact_records, | ||
| 30 | + init_knowledge_log, | ||
| 31 | +) | ||
| 27 | 32 | ||
| 28 | # 导入ReportEngine | 33 | # 导入ReportEngine |
| 29 | try: | 34 | try: |
| @@ -364,72 +369,6 @@ def init_forum_log(): | @@ -364,72 +369,6 @@ def init_forum_log(): | ||
| 364 | # 初始化forum.log | 369 | # 初始化forum.log |
| 365 | init_forum_log() | 370 | init_forum_log() |
| 366 | 371 | ||
| 367 | -# ===== 知识库查询日志(与 Forum 日志格式类似) ===== | ||
| 368 | -knowledge_log_lock = threading.Lock() | ||
| 369 | -KNOWLEDGE_LOG_FILE = LOG_DIR / "knowledge_query.log" | ||
| 370 | - | ||
| 371 | - | ||
| 372 | -def _sanitize_log_text(text: str) -> str: | ||
| 373 | - """移除换行/回车,防止日志污染。""" | ||
| 374 | - return str(text).replace("\n", " ").replace("\r", " ").strip() | ||
| 375 | - | ||
| 376 | - | ||
| 377 | -def init_knowledge_log(): | ||
| 378 | - """初始化知识库查询日志文件。""" | ||
| 379 | - try: | ||
| 380 | - start_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') | ||
| 381 | - KNOWLEDGE_LOG_FILE.parent.mkdir(parents=True, exist_ok=True) | ||
| 382 | - with knowledge_log_lock, open(KNOWLEDGE_LOG_FILE, 'w', encoding='utf-8') as f: | ||
| 383 | - f.write(f"=== Knowledge Query Log 初始化 - {start_time} ===\n") | ||
| 384 | - logger.info("Knowledge Query: knowledge_query.log 已初始化") | ||
| 385 | - except Exception as exc: # pragma: no cover - 仅运行时执行 | ||
| 386 | - logger.exception(f"Knowledge Query: 初始化日志失败: {exc}") | ||
| 387 | - | ||
| 388 | - | ||
| 389 | -def append_knowledge_log(source: str, payload: dict): | ||
| 390 | - """记录知识库查询关键词与完整请求数据,防止日志污染。""" | ||
| 391 | - try: | ||
| 392 | - timestamp = datetime.now().strftime('%H:%M:%S') | ||
| 393 | - clean_source = _sanitize_log_text(source or "UNKNOWN") | ||
| 394 | - # JSON 序列化并截断,避免超大日志污染 | ||
| 395 | - serialized = json.dumps(payload, ensure_ascii=False) | ||
| 396 | - sanitized = _sanitize_log_text(serialized) | ||
| 397 | - with knowledge_log_lock, open(KNOWLEDGE_LOG_FILE, 'a', encoding='utf-8') as f: | ||
| 398 | - f.write(f"[{timestamp}] [KNOWLEDGE] [{clean_source}] {sanitized}\n") | ||
| 399 | - except Exception as exc: # pragma: no cover - 日志失败不影响主流程 | ||
| 400 | - logger.warning(f"Knowledge Query: 写日志失败: {exc}") | ||
| 401 | - | ||
| 402 | - | ||
| 403 | -def _trim_text(text: str, limit: int = 300) -> str: | ||
| 404 | - text = _sanitize_log_text(text) | ||
| 405 | - return text if len(text) <= limit else text[:limit] + "..." | ||
| 406 | - | ||
| 407 | - | ||
| 408 | -def _compact_records(items): | ||
| 409 | - """将节点/记录压缩为简洁日志格式,避免污染。""" | ||
| 410 | - compacted = [] | ||
| 411 | - if not items: | ||
| 412 | - return compacted | ||
| 413 | - | ||
| 414 | - for item in items: | ||
| 415 | - if not isinstance(item, dict): | ||
| 416 | - compacted.append(_trim_text(str(item))) | ||
| 417 | - continue | ||
| 418 | - | ||
| 419 | - entry = {} | ||
| 420 | - for key, value in item.items(): | ||
| 421 | - # 仅记录必要字段,其他字段做字符串压缩 | ||
| 422 | - if isinstance(value, (str, int, float, bool)): | ||
| 423 | - entry[key] = _trim_text(str(value)) | ||
| 424 | - else: | ||
| 425 | - try: | ||
| 426 | - entry[key] = _trim_text(json.dumps(value, ensure_ascii=False)) | ||
| 427 | - except Exception: | ||
| 428 | - entry[key] = _trim_text(str(value)) | ||
| 429 | - compacted.append(entry) | ||
| 430 | - return compacted | ||
| 431 | - | ||
| 432 | - | ||
| 433 | # 初始化 knowledge_query.log | 372 | # 初始化 knowledge_query.log |
| 434 | init_knowledge_log() | 373 | init_knowledge_log() |
| 435 | 374 |
utils/knowledge_logger.py
0 → 100644
| 1 | +""" | ||
| 2 | +统一的知识图谱查询日志记录工具。 | ||
| 3 | + | ||
| 4 | +用于在不同模块(Flask接口、GraphRAG 查询节点等)之间共享 | ||
| 5 | +knowledge_query.log 的写入逻辑,避免分散实现导致日志缺失。 | ||
| 6 | +""" | ||
| 7 | + | ||
| 8 | +import json | ||
| 9 | +import threading | ||
| 10 | +from datetime import datetime | ||
| 11 | +from pathlib import Path | ||
| 12 | + | ||
| 13 | +from loguru import logger | ||
| 14 | + | ||
| 15 | +# 日志文件路径 | ||
| 16 | +ROOT_DIR = Path(__file__).resolve().parent.parent | ||
| 17 | +LOG_DIR = ROOT_DIR / "logs" | ||
| 18 | +KNOWLEDGE_LOG_FILE = LOG_DIR / "knowledge_query.log" | ||
| 19 | + | ||
| 20 | +_log_lock = threading.Lock() | ||
| 21 | + | ||
| 22 | + | ||
| 23 | +def _sanitize_log_text(text: str) -> str: | ||
| 24 | + """移除换行/回车,防止日志污染。""" | ||
| 25 | + return str(text).replace("\n", " ").replace("\r", " ").strip() | ||
| 26 | + | ||
| 27 | + | ||
| 28 | +def _trim_text(text: str, limit: int = 300) -> str: | ||
| 29 | + """对长文本进行截断,避免日志过长。""" | ||
| 30 | + text = _sanitize_log_text(text) | ||
| 31 | + return text if len(text) <= limit else text[:limit] + "..." | ||
| 32 | + | ||
| 33 | + | ||
| 34 | +def compact_records(items): | ||
| 35 | + """ | ||
| 36 | + 将节点/记录压缩为简洁日志格式,避免日志被大字段污染。 | ||
| 37 | + """ | ||
| 38 | + compacted = [] | ||
| 39 | + if not items: | ||
| 40 | + return compacted | ||
| 41 | + | ||
| 42 | + for item in items: | ||
| 43 | + if not isinstance(item, dict): | ||
| 44 | + compacted.append(_trim_text(str(item))) | ||
| 45 | + continue | ||
| 46 | + | ||
| 47 | + entry = {} | ||
| 48 | + for key, value in item.items(): | ||
| 49 | + if isinstance(value, (str, int, float, bool)): | ||
| 50 | + entry[key] = _trim_text(str(value)) | ||
| 51 | + else: | ||
| 52 | + try: | ||
| 53 | + entry[key] = _trim_text(json.dumps(value, ensure_ascii=False)) | ||
| 54 | + except Exception: | ||
| 55 | + entry[key] = _trim_text(str(value)) | ||
| 56 | + compacted.append(entry) | ||
| 57 | + return compacted | ||
| 58 | + | ||
| 59 | + | ||
| 60 | +def init_knowledge_log(force_reset: bool = True): | ||
| 61 | + """ | ||
| 62 | + 初始化知识库查询日志文件。 | ||
| 63 | + | ||
| 64 | + Args: | ||
| 65 | + force_reset: True 时重置文件并写入初始化标记;False 时仅在文件不存在时写入。 | ||
| 66 | + """ | ||
| 67 | + try: | ||
| 68 | + start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | ||
| 69 | + LOG_DIR.mkdir(parents=True, exist_ok=True) | ||
| 70 | + mode = "w" if force_reset or not KNOWLEDGE_LOG_FILE.exists() else "a" | ||
| 71 | + with _log_lock, open(KNOWLEDGE_LOG_FILE, mode, encoding="utf-8") as f: | ||
| 72 | + f.write(f"=== Knowledge Query Log 初始化 - {start_time} ===\n") | ||
| 73 | + logger.info("Knowledge Query: knowledge_query.log 已初始化") | ||
| 74 | + except Exception as exc: # pragma: no cover - 仅运行时执行 | ||
| 75 | + logger.exception(f"Knowledge Query: 初始化日志失败: {exc}") | ||
| 76 | + | ||
| 77 | + | ||
| 78 | +def _ensure_log_file(): | ||
| 79 | + """确保日志文件已创建且可写,不会覆盖现有内容。""" | ||
| 80 | + if not KNOWLEDGE_LOG_FILE.exists(): | ||
| 81 | + init_knowledge_log(force_reset=False) | ||
| 82 | + | ||
| 83 | + | ||
| 84 | +def append_knowledge_log(source: str, payload: dict): | ||
| 85 | + """记录知识库查询关键词与完整请求数据。""" | ||
| 86 | + try: | ||
| 87 | + _ensure_log_file() | ||
| 88 | + timestamp = datetime.now().strftime("%H:%M:%S") | ||
| 89 | + clean_source = _sanitize_log_text(source or "UNKNOWN") | ||
| 90 | + serialized = json.dumps(payload, ensure_ascii=False) | ||
| 91 | + sanitized = _sanitize_log_text(serialized) | ||
| 92 | + with _log_lock, open(KNOWLEDGE_LOG_FILE, "a", encoding="utf-8") as f: | ||
| 93 | + f.write(f"[{timestamp}] [KNOWLEDGE] [{clean_source}] {sanitized}\n") | ||
| 94 | + except Exception as exc: # pragma: no cover - 日志失败不影响主流程 | ||
| 95 | + logger.warning(f"Knowledge Query: 写日志失败: {exc}") |
-
Please register or login to post a comment