马一丁

GraphRAG

@@ -76,3 +76,11 @@ ANSPIRE_API_KEY= @@ -76,3 +76,11 @@ ANSPIRE_API_KEY=
76 # Bocha AI Search API(用于Bocha多模态搜索,这里密钥名称虽然是Web Search,但其实是要AI Search的,申请地址:https://open.bochaai.com/) 76 # Bocha AI Search API(用于Bocha多模态搜索,这里密钥名称虽然是Web Search,但其实是要AI Search的,申请地址:https://open.bochaai.com/)
77 BOCHA_BASE_URL=https://api.bocha.cn/v1/ai-search 77 BOCHA_BASE_URL=https://api.bocha.cn/v1/ai-search
78 BOCHA_WEB_SEARCH_API_KEY= 78 BOCHA_WEB_SEARCH_API_KEY=
  79 +# ================== GraphRAG 配置 ====================
  80 +# GraphRAG 功能开关(true/false),默认关闭
  81 +# 开启后会构建知识图谱并在章节生成前进行图谱查询
  82 +GRAPHRAG_ENABLED=false
  83 +
  84 +# GraphRAG 查询次数上限(每个章节生成前LLM可查询知识图谱的最大次数)
  85 +# 仅在 GRAPHRAG_ENABLED=true 时生效
  86 +GRAPHRAG_MAX_QUERIES=3
@@ -39,6 +39,21 @@ from .renderers import HTMLRenderer @@ -39,6 +39,21 @@ from .renderers import HTMLRenderer
39 from .state import ReportState 39 from .state import ReportState
40 from .utils.config import settings, Settings 40 from .utils.config import settings, Settings
41 41
  42 +# GraphRAG 模块导入
  43 +from .graphrag import (
  44 + StateParser,
  45 + ForumParser,
  46 + GraphBuilder,
  47 + GraphStorage,
  48 + Graph,
  49 + QueryEngine,
  50 +)
  51 +from .nodes import GraphRAGQueryNode
  52 +from .graphrag.prompts import (
  53 + SYSTEM_PROMPT_CHAPTER_GRAPH_ENHANCEMENT,
  54 + format_graph_results_for_prompt
  55 +)
  56 +
42 57
43 class StageOutputFormatError(ValueError): 58 class StageOutputFormatError(ValueError):
44 """阶段性输出结构不符合预期时抛出的受控异常。""" 59 """阶段性输出结构不符合预期时抛出的受控异常。"""
@@ -559,6 +574,37 @@ class ReportAgent: @@ -559,6 +574,37 @@ class ReportAgent:
559 self._persist_planning_artifacts(run_dir, layout_design, word_plan, template_overview) 574 self._persist_planning_artifacts(run_dir, layout_design, word_plan, template_overview)
560 emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)}) 575 emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)})
561 576
  577 + # ==================== GraphRAG 初始化 ====================
  578 + graphrag_enabled = getattr(self.config, 'GRAPHRAG_ENABLED', False)
  579 + knowledge_graph = None
  580 + graphrag_query_node = None
  581 +
  582 + if graphrag_enabled:
  583 + logger.info("GraphRAG 已启用,开始构建知识图谱...")
  584 + emit('stage', {'stage': 'graphrag_building', 'message': '正在构建知识图谱'})
  585 +
  586 + try:
  587 + knowledge_graph = self._build_knowledge_graph(
  588 + query, normalized_reports, forum_logs, run_dir
  589 + )
  590 + if knowledge_graph:
  591 + graphrag_query_node = GraphRAGQueryNode(self.llm_client)
  592 + graph_stats = knowledge_graph.get_stats()
  593 + emit('stage', {
  594 + 'stage': 'graphrag_built',
  595 + 'node_count': graph_stats.get('total_nodes', 0),
  596 + 'edge_count': graph_stats.get('total_edges', 0)
  597 + })
  598 + logger.info(f"知识图谱构建完成: {graph_stats}")
  599 + else:
  600 + logger.warning("知识图谱构建失败,将使用原始流程")
  601 + graphrag_enabled = False
  602 + except Exception as graph_error:
  603 + logger.exception(f"GraphRAG 构建异常: {graph_error}")
  604 + graphrag_enabled = False
  605 + emit('stage', {'stage': 'graphrag_error', 'error': str(graph_error)})
  606 + # ==================== GraphRAG 初始化结束 ====================
  607 +
562 chapters = [] 608 chapters = []
563 chapter_max_attempts = max( 609 chapter_max_attempts = max(
564 self._CONTENT_SPARSE_MIN_ATTEMPTS, self.config.CHAPTER_JSON_MAX_ATTEMPTS 610 self._CONTENT_SPARSE_MIN_ATTEMPTS, self.config.CHAPTER_JSON_MAX_ATTEMPTS
@@ -594,11 +640,47 @@ class ReportAgent: @@ -594,11 +640,47 @@ class ReportAgent:
594 best_sparse_candidate: Dict[str, Any] | None = None 640 best_sparse_candidate: Dict[str, Any] | None = None
595 best_sparse_score = -1 641 best_sparse_score = -1
596 fallback_used = False 642 fallback_used = False
  643 +
  644 + # ==================== GraphRAG 查询 ====================
  645 + graph_results = None
  646 + chapter_context = generation_context.copy()
  647 +
  648 + if graphrag_enabled and knowledge_graph and graphrag_query_node:
  649 + try:
  650 + max_queries = getattr(self.config, 'GRAPHRAG_MAX_QUERIES', 3)
  651 + section_info = {
  652 + 'title': section.title,
  653 + 'id': section.chapter_id,
  654 + 'role': section.description,
  655 + 'target_words': chapter_targets.get(section.chapter_id, {}).get('targetWords', 500),
  656 + 'emphasis': chapter_targets.get(section.chapter_id, {}).get('emphasisPoints', '')
  657 + }
  658 +
  659 + graph_results = graphrag_query_node.run(
  660 + section_info,
  661 + {
  662 + 'query': query,
  663 + 'template_name': template_result.get('template_name'),
  664 + 'chapters': word_plan.get('chapters', [])
  665 + },
  666 + knowledge_graph,
  667 + max_queries=max_queries
  668 + )
  669 +
  670 + if graph_results and graph_results.get('total_nodes', 0) > 0:
  671 + # 将图谱结果注入生成上下文
  672 + chapter_context['graph_results'] = graph_results
  673 + chapter_context['graph_enhancement_prompt'] = format_graph_results_for_prompt(graph_results)
  674 + logger.info(f"章节 {section.title} GraphRAG 查询完成: {graph_results.get('total_nodes', 0)} 节点")
  675 + except Exception as graph_query_error:
  676 + logger.warning(f"GraphRAG 查询失败 ({section.title}): {graph_query_error}")
  677 + # ==================== GraphRAG 查询结束 ====================
  678 +
597 while attempt <= chapter_max_attempts: 679 while attempt <= chapter_max_attempts:
598 try: 680 try:
599 chapter_payload = self.chapter_generation_node.run( 681 chapter_payload = self.chapter_generation_node.run(
600 section, 682 section,
601 - generation_context, 683 + chapter_context, # 使用包含图谱结果的上下文
602 run_dir, 684 run_dir,
603 stream_callback=chunk_callback 685 stream_callback=chunk_callback
604 ) 686 )
@@ -796,6 +878,62 @@ class ReportAgent: @@ -796,6 +878,62 @@ class ReportAgent:
796 self.state.metadata.template_used = fallback_template['template_name'] 878 self.state.metadata.template_used = fallback_template['template_name']
797 return fallback_template 879 return fallback_template
798 880
  881 + def _build_knowledge_graph(
  882 + self,
  883 + query: str,
  884 + reports: Dict[str, str],
  885 + forum_logs: str,
  886 + run_dir: Path
  887 + ) -> Optional[Graph]:
  888 + """
  889 + 构建知识图谱。
  890 +
  891 + 从已加载的 State JSON 和论坛日志中提取结构化数据,
  892 + 构建知识图谱供后续章节生成时查询。
  893 +
  894 + 参数:
  895 + query: 用户查询主题。
  896 + reports: 归一化后的报告映射。
  897 + forum_logs: 论坛日志内容。
  898 + run_dir: 运行目录,用于保存图谱。
  899 +
  900 + 返回:
  901 + Graph: 构建好的知识图谱;失败返回 None。
  902 + """
  903 + try:
  904 + # 解析 State JSON(如果在 load_input_files 时已加载)
  905 + states = {}
  906 + state_parser = StateParser()
  907 +
  908 + # 尝试从 reports 目录查找 State JSON
  909 + # 注意:这里假设 reports 字典的键对应引擎目录
  910 + for engine in ['insight', 'media', 'query']:
  911 + # 尝试从全局状态获取(如果之前已加载)
  912 + if hasattr(self, '_loaded_states') and engine in self._loaded_states:
  913 + states[engine] = self._loaded_states[engine]
  914 +
  915 + # 解析论坛日志
  916 + forum_entries = []
  917 + if forum_logs:
  918 + forum_parser = ForumParser()
  919 + forum_entries = forum_parser.parse(forum_logs)
  920 + logger.info(f"解析论坛日志: {len(forum_entries)} 条记录")
  921 +
  922 + # 构建图谱
  923 + builder = GraphBuilder()
  924 + graph = builder.build(query, states, forum_entries)
  925 +
  926 + # 保存图谱
  927 + storage = GraphStorage()
  928 + graph_path = storage.save(graph, self.state.task_id, run_dir)
  929 + logger.info(f"知识图谱已保存: {graph_path}")
  930 +
  931 + return graph
  932 +
  933 + except Exception as e:
  934 + logger.exception(f"构建知识图谱失败: {e}")
  935 + return None
  936 +
799 def _slice_template(self, template_markdown: str) -> List[TemplateSection]: 937 def _slice_template(self, template_markdown: str) -> List[TemplateSection]:
800 """ 938 """
801 将模板切成章节列表,若为空则提供fallback。 939 将模板切成章节列表,若为空则提供fallback。
@@ -1464,15 +1602,18 @@ class ReportAgent: @@ -1464,15 +1602,18 @@ class ReportAgent:
1464 file_paths: 文件路径字典 1602 file_paths: 文件路径字典
1465 1603
1466 Returns: 1604 Returns:
1467 - 加载的内容字典,包含 `reports` 列表与 `forum_logs` 字符串 1605 + 加载的内容字典,包含 `reports` 列表、`forum_logs` 字符串和 `states` 字典
1468 """ 1606 """
1469 content = { 1607 content = {
1470 'reports': [], 1608 'reports': [],
1471 - 'forum_logs': '' 1609 + 'forum_logs': '',
  1610 + 'states': {} # 新增:用于 GraphRAG 的 State JSON
1472 } 1611 }
1473 1612
1474 # 加载报告文件 1613 # 加载报告文件
1475 engines = ['query', 'media', 'insight'] 1614 engines = ['query', 'media', 'insight']
  1615 + state_parser = StateParser()
  1616 +
1476 for engine in engines: 1617 for engine in engines:
1477 if engine in file_paths: 1618 if engine in file_paths:
1478 try: 1619 try:
@@ -1480,6 +1621,20 @@ class ReportAgent: @@ -1480,6 +1621,20 @@ class ReportAgent:
1480 report_content = f.read() 1621 report_content = f.read()
1481 content['reports'].append(report_content) 1622 content['reports'].append(report_content)
1482 logger.info(f"已加载 {engine} 报告: {len(report_content)} 字符") 1623 logger.info(f"已加载 {engine} 报告: {len(report_content)} 字符")
  1624 +
  1625 + # 新增:尝试查找并加载对应的 State JSON(用于 GraphRAG)
  1626 + if self.config.GRAPHRAG_ENABLED:
  1627 + state_path = state_parser.find_state_json(file_paths[engine])
  1628 + if state_path:
  1629 + parsed_state = state_parser.parse_from_file(engine, state_path)
  1630 + if parsed_state:
  1631 + content['states'][engine] = parsed_state
  1632 + # 同时保存到实例属性,供 _build_knowledge_graph 使用
  1633 + if not hasattr(self, '_loaded_states'):
  1634 + self._loaded_states = {}
  1635 + self._loaded_states[engine] = parsed_state
  1636 + logger.info(f"已加载 {engine} State JSON: {len(parsed_state.sections)} 个段落")
  1637 +
1483 except Exception as e: 1638 except Exception as e:
1484 logger.exception(f"加载 {engine} 报告失败: {str(e)}") 1639 logger.exception(f"加载 {engine} 报告失败: {str(e)}")
1485 content['reports'].append("") 1640 content['reports'].append("")
  1 +"""
  2 +GraphRAG 知识图谱模块
  3 +
  4 +提供基于结构化数据的知识图谱构建、存储与查询功能。
  5 +"""
  6 +
  7 +from .state_parser import StateParser, ParsedState, ParsedSection, SearchRecord
  8 +from .forum_parser import ForumParser, ForumEntry
  9 +from .graph_builder import GraphBuilder
  10 +from .graph_storage import GraphStorage, Graph, Node, Edge
  11 +from .query_engine import QueryEngine, QueryParams, QueryResult
  12 +
  13 +__all__ = [
  14 + # 解析器
  15 + 'StateParser',
  16 + 'ParsedState',
  17 + 'ParsedSection',
  18 + 'SearchRecord',
  19 + 'ForumParser',
  20 + 'ForumEntry',
  21 + # 图谱核心
  22 + 'GraphBuilder',
  23 + 'GraphStorage',
  24 + 'Graph',
  25 + 'Node',
  26 + 'Edge',
  27 + # 查询引擎
  28 + 'QueryEngine',
  29 + 'QueryParams',
  30 + 'QueryResult',
  31 +]
  1 +"""
  2 +Forum 日志解析器
  3 +
  4 +解析 forum.log 文件,提取结构化的讨论记录用于构建知识图谱。
  5 +"""
  6 +
  7 +from dataclasses import dataclass
  8 +from typing import List, Optional
  9 +import re
  10 +
  11 +
  12 +@dataclass
  13 +class ForumEntry:
  14 + """论坛讨论条目"""
  15 + timestamp: str
  16 + speaker: str
  17 + content: str
  18 +
  19 + @property
  20 + def is_host(self) -> bool:
  21 + """是否为主持人发言"""
  22 + return self.speaker.upper() == 'HOST'
  23 +
  24 + @property
  25 + def is_system(self) -> bool:
  26 + """是否为系统消息"""
  27 + return self.speaker.upper() == 'SYSTEM'
  28 +
  29 + @property
  30 + def engine_name(self) -> Optional[str]:
  31 + """获取对应的引擎名称(小写)"""
  32 + speaker_upper = self.speaker.upper()
  33 + if speaker_upper in ['INSIGHT', 'MEDIA', 'QUERY', 'HOST']:
  34 + return speaker_upper.lower()
  35 + return None
  36 +
  37 +
  38 +class ForumParser:
  39 + """
  40 + Forum 日志解析器
  41 +
  42 + 解析 forum.log,提取结构化的讨论记录。
  43 + 日志格式: [HH:MM:SS] [SPEAKER] content
  44 + """
  45 +
  46 + # 匹配日志行的正则表达式
  47 + PATTERN = re.compile(r'\[(\d{2}:\d{2}:\d{2})\]\s*\[(\w+)\]\s*(.+)')
  48 +
  49 + # 有效的发言者
  50 + VALID_SPEAKERS = {'INSIGHT', 'MEDIA', 'QUERY', 'HOST', 'SYSTEM'}
  51 +
  52 + def parse(self, forum_logs: str) -> List[ForumEntry]:
  53 + """
  54 + 解析 forum.log 内容
  55 +
  56 + Args:
  57 + forum_logs: forum.log 文件内容
  58 +
  59 + Returns:
  60 + ForumEntry 列表
  61 + """
  62 + if not forum_logs:
  63 + return []
  64 +
  65 + entries = []
  66 +
  67 + for line in forum_logs.strip().split('\n'):
  68 + if not line.strip():
  69 + continue
  70 +
  71 + match = self.PATTERN.match(line)
  72 + if match:
  73 + timestamp, speaker, content = match.groups()
  74 + speaker_upper = speaker.upper()
  75 +
  76 + if speaker_upper in self.VALID_SPEAKERS:
  77 + # 处理转义的换行符
  78 + content = content.replace('\\n', '\n')
  79 +
  80 + entries.append(ForumEntry(
  81 + timestamp=timestamp,
  82 + speaker=speaker_upper,
  83 + content=content
  84 + ))
  85 +
  86 + return entries
  87 +
  88 + def get_host_insights(self, entries: List[ForumEntry]) -> List[str]:
  89 + """
  90 + 提取 Host(主持人)的发言内容
  91 +
  92 + Args:
  93 + entries: ForumEntry 列表
  94 +
  95 + Returns:
  96 + Host 发言内容列表
  97 + """
  98 + return [e.content for e in entries if e.is_host]
  99 +
  100 + def get_engine_entries(self, entries: List[ForumEntry],
  101 + engine: str) -> List[ForumEntry]:
  102 + """
  103 + 获取指定引擎的发言
  104 +
  105 + Args:
  106 + entries: ForumEntry 列表
  107 + engine: 引擎名称 (insight/media/query/host)
  108 +
  109 + Returns:
  110 + 该引擎的 ForumEntry 列表
  111 + """
  112 + engine_upper = engine.upper()
  113 + return [e for e in entries if e.speaker == engine_upper]
  114 +
  115 + def get_summary_by_engine(self, entries: List[ForumEntry]) -> dict:
  116 + """
  117 + 按引擎分组统计发言
  118 +
  119 + Args:
  120 + entries: ForumEntry 列表
  121 +
  122 + Returns:
  123 + {engine: [contents]} 字典
  124 + """
  125 + result = {
  126 + 'insight': [],
  127 + 'media': [],
  128 + 'query': [],
  129 + 'host': []
  130 + }
  131 +
  132 + for entry in entries:
  133 + engine = entry.engine_name
  134 + if engine and engine in result:
  135 + result[engine].append(entry.content)
  136 +
  137 + return result
  138 +
  139 + def extract_key_points(self, entries: List[ForumEntry],
  140 + max_points: int = 10) -> List[str]:
  141 + """
  142 + 提取关键观点(优先 Host 发言)
  143 +
  144 + Args:
  145 + entries: ForumEntry 列表
  146 + max_points: 最大提取数量
  147 +
  148 + Returns:
  149 + 关键观点列表
  150 + """
  151 + key_points = []
  152 +
  153 + # 优先提取 Host 的发言
  154 + for entry in entries:
  155 + if entry.is_host and not entry.is_system:
  156 + # 提取前 200 字作为摘要
  157 + summary = entry.content[:200]
  158 + if len(entry.content) > 200:
  159 + summary += '...'
  160 + key_points.append(f"[{entry.speaker}] {summary}")
  161 +
  162 + if len(key_points) >= max_points:
  163 + break
  164 +
  165 + return key_points
  1 +"""
  2 +知识图谱构建器
  3 +
  4 +基于结构化的 State JSON 和 Forum 日志构建知识图谱,无需 LLM 提取实体。
  5 +"""
  6 +
  7 +from typing import Dict, List, Optional
  8 +import hashlib
  9 +
  10 +from .state_parser import ParsedState, ParsedSection
  11 +from .forum_parser import ForumEntry
  12 +from .graph_storage import Graph, Node
  13 +
  14 +
  15 +class GraphBuilder:
  16 + """
  17 + 知识图谱构建器
  18 +
  19 + 基于已有的结构化数据(State JSON、Forum 日志)构建图谱,
  20 + 无需 LLM 进行实体/关系提取。
  21 +
  22 + 节点类型(5种):
  23 + - topic: 用户查询主题
  24 + - engine: 四个引擎来源 (insight/media/query/host)
  25 + - section: 报告段落/章节
  26 + - search_query: 搜索关键词
  27 + - source: 信息来源 URL
  28 +
  29 + 关系类型(4种):
  30 + - analyzed_by: 主题由引擎分析 (Topic → Engine)
  31 + - contains: 引擎包含段落 (Engine → Section)
  32 + - searched: 段落执行搜索 (Section → SearchQuery)
  33 + - found: 搜索发现来源 (SearchQuery → Source)
  34 + """
  35 +
  36 + def build(self, topic: str, states: Dict[str, ParsedState],
  37 + forum_entries: Optional[List[ForumEntry]] = None) -> Graph:
  38 + """
  39 + 构建知识图谱
  40 +
  41 + Args:
  42 + topic: 用户查询主题
  43 + states: 引擎状态字典 {engine_name: ParsedState}
  44 + forum_entries: Forum 日志条目列表
  45 +
  46 + Returns:
  47 + 构建的 Graph 对象
  48 + """
  49 + graph = Graph()
  50 +
  51 + # 1. 创建主题节点
  52 + topic_node = graph.add_node(
  53 + node_type="topic",
  54 + name=topic,
  55 + node_id=f"T_{self._hash(topic)}"
  56 + )
  57 +
  58 + # 2. 处理每个引擎的状态
  59 + for engine_name, state in states.items():
  60 + self._add_engine_nodes(graph, topic_node, engine_name, state)
  61 +
  62 + # 3. 处理 Forum 日志(添加 Host 节点)
  63 + if forum_entries:
  64 + self._add_forum_nodes(graph, topic_node, forum_entries)
  65 +
  66 + return graph
  67 +
  68 + def _add_engine_nodes(self, graph: Graph, topic_node: Node,
  69 + engine_name: str, state: ParsedState) -> None:
  70 + """添加引擎相关节点"""
  71 + # 创建引擎节点
  72 + engine_node = graph.add_node(
  73 + node_type="engine",
  74 + name=engine_name,
  75 + node_id=engine_name,
  76 + report_title=state.report_title,
  77 + original_query=state.query
  78 + )
  79 +
  80 + # Topic → Engine 关系
  81 + graph.add_edge(topic_node, engine_node, "analyzed_by")
  82 +
  83 + # 处理段落
  84 + for section in state.sections:
  85 + self._add_section_nodes(graph, engine_node, engine_name, section)
  86 +
  87 + def _add_section_nodes(self, graph: Graph, engine_node: Node,
  88 + engine_name: str, section: ParsedSection) -> None:
  89 + """添加段落相关节点"""
  90 + # 创建段落节点
  91 + section_id = f"{engine_name}_S{section.order}"
  92 + section_node = graph.add_node(
  93 + node_type="section",
  94 + name=section.title,
  95 + node_id=section_id,
  96 + title=section.title,
  97 + order=section.order,
  98 + summary=section.summary,
  99 + engine=engine_name
  100 + )
  101 +
  102 + # Engine → Section 关系
  103 + graph.add_edge(engine_node, section_node, "contains")
  104 +
  105 + # 处理搜索历史
  106 + seen_queries = set() # 去重
  107 + for idx, search in enumerate(section.search_history):
  108 + if not search.query:
  109 + continue
  110 +
  111 + # 搜索词去重
  112 + query_key = search.query.strip().lower()
  113 + if query_key in seen_queries:
  114 + continue
  115 + seen_queries.add(query_key)
  116 +
  117 + # 创建搜索词节点
  118 + query_id = f"{section_id}_Q{idx}"
  119 + query_node = graph.add_node(
  120 + node_type="search_query",
  121 + name=search.query[:50], # 截断长查询
  122 + node_id=query_id,
  123 + query_text=search.query,
  124 + section_ref=section_id,
  125 + engine=engine_name
  126 + )
  127 +
  128 + # Section → SearchQuery 关系
  129 + graph.add_edge(section_node, query_node, "searched")
  130 +
  131 + # 处理来源
  132 + if search.url:
  133 + self._add_source_node(graph, query_node, search)
  134 +
  135 + def _add_source_node(self, graph: Graph, query_node: Node,
  136 + search) -> None:
  137 + """添加来源节点"""
  138 + # 使用 URL 的哈希作为 ID,避免重复
  139 + source_id = f"SRC_{self._hash(search.url)}"
  140 +
  141 + # 检查是否已存在
  142 + existing = graph.get_node(source_id)
  143 + if existing:
  144 + source_node = existing
  145 + else:
  146 + source_node = graph.add_node(
  147 + node_type="source",
  148 + name=search.title[:50] if search.title else search.url[:50],
  149 + node_id=source_id,
  150 + url=search.url,
  151 + title=search.title,
  152 + preview=search.content[:100] if search.content else '',
  153 + score=search.score
  154 + )
  155 +
  156 + # SearchQuery → Source 关系
  157 + graph.add_edge(query_node, source_node, "found")
  158 +
  159 + def _add_forum_nodes(self, graph: Graph, topic_node: Node,
  160 + entries: List[ForumEntry]) -> None:
  161 + """添加 Forum 日志相关节点"""
  162 + # 创建 Host 引擎节点(如果不存在)
  163 + host_node = graph.get_node('host')
  164 + if not host_node:
  165 + host_node = graph.add_node(
  166 + node_type="engine",
  167 + name="host",
  168 + node_id="host",
  169 + report_title="论坛主持人总结"
  170 + )
  171 + graph.add_edge(topic_node, host_node, "analyzed_by")
  172 +
  173 + # 提取 Host 的关键发言作为 Section
  174 + host_entries = [e for e in entries if e.is_host and not e.is_system]
  175 +
  176 + for idx, entry in enumerate(host_entries[:5]): # 最多取 5 条
  177 + section_id = f"host_S{idx}"
  178 + section_node = graph.add_node(
  179 + node_type="section",
  180 + name=f"主持人总结 {idx + 1}",
  181 + node_id=section_id,
  182 + title=f"[{entry.timestamp}] 主持人总结",
  183 + order=idx,
  184 + summary=entry.content[:300],
  185 + engine="host",
  186 + timestamp=entry.timestamp
  187 + )
  188 +
  189 + graph.add_edge(host_node, section_node, "contains")
  190 +
  191 + @staticmethod
  192 + def _hash(text: str) -> str:
  193 + """生成短哈希"""
  194 + return hashlib.md5(text.encode()).hexdigest()[:8]
  1 +"""
  2 +知识图谱存储模块
  3 +
  4 +定义图谱的核心数据结构(Node、Edge、Graph)及 JSON 存储功能。
  5 +"""
  6 +
  7 +from dataclasses import dataclass, field
  8 +from typing import Dict, Any, List, Optional, Set
  9 +from datetime import datetime
  10 +import json
  11 +from pathlib import Path
  12 +import hashlib
  13 +
  14 +
  15 +@dataclass
  16 +class Node:
  17 + """图谱节点"""
  18 + id: str
  19 + type: str # topic, engine, section, search_query, source
  20 + name: str = ""
  21 + attributes: Dict[str, Any] = field(default_factory=dict)
  22 +
  23 + @property
  24 + def label(self) -> str:
  25 + """获取显示标签(兼容前端)"""
  26 + return self.name
  27 +
  28 + @property
  29 + def properties(self) -> Dict[str, Any]:
  30 + """获取属性(兼容前端)"""
  31 + return self.attributes
  32 +
  33 + def to_dict(self) -> Dict[str, Any]:
  34 + """转换为字典"""
  35 + return {
  36 + 'id': self.id,
  37 + 'type': self.type,
  38 + 'name': self.name,
  39 + 'label': self.name, # 兼容字段
  40 + 'attributes': self.attributes,
  41 + 'properties': self.attributes # 兼容字段
  42 + }
  43 +
  44 + @classmethod
  45 + def from_dict(cls, data: Dict[str, Any]) -> 'Node':
  46 + """从字典创建"""
  47 + return cls(
  48 + id=data['id'],
  49 + type=data['type'],
  50 + name=data.get('name', data.get('label', '')),
  51 + attributes=data.get('attributes', data.get('properties', {}))
  52 + )
  53 +
  54 + def get(self, key: str, default: Any = None) -> Any:
  55 + """获取属性值"""
  56 + if key == 'id':
  57 + return self.id
  58 + if key == 'type':
  59 + return self.type
  60 + if key in ('name', 'label'):
  61 + return self.name
  62 + return self.attributes.get(key, default)
  63 +
  64 +
  65 +@dataclass
  66 +class Edge:
  67 + """图谱边"""
  68 + from_id: str
  69 + to_id: str
  70 + relation: str # analyzed_by, contains, searched, found
  71 + weight: float = 1.0
  72 + attributes: Dict[str, Any] = field(default_factory=dict)
  73 +
  74 + @property
  75 + def source(self) -> str:
  76 + """起始节点ID(兼容前端)"""
  77 + return self.from_id
  78 +
  79 + @property
  80 + def target(self) -> str:
  81 + """目标节点ID(兼容前端)"""
  82 + return self.to_id
  83 +
  84 + def to_dict(self) -> Dict[str, Any]:
  85 + """转换为字典"""
  86 + return {
  87 + 'from': self.from_id,
  88 + 'to': self.to_id,
  89 + 'source': self.from_id, # 兼容字段
  90 + 'target': self.to_id, # 兼容字段
  91 + 'relation': self.relation,
  92 + 'weight': self.weight,
  93 + 'attributes': self.attributes
  94 + }
  95 +
  96 + @classmethod
  97 + def from_dict(cls, data: Dict[str, Any]) -> 'Edge':
  98 + """从字典创建"""
  99 + return cls(
  100 + from_id=data.get('from', data.get('source', '')),
  101 + to_id=data.get('to', data.get('target', '')),
  102 + relation=data['relation'],
  103 + weight=data.get('weight', 1.0),
  104 + attributes=data.get('attributes', {})
  105 + )
  106 +
  107 +
  108 +class Graph:
  109 + """知识图谱"""
  110 +
  111 + def __init__(self):
  112 + self._nodes: Dict[str, Node] = {}
  113 + self._edges: List[Edge] = []
  114 + self._adjacency: Dict[str, Set[str]] = {} # 邻接表
  115 +
  116 + @property
  117 + def nodes(self) -> Dict[str, Node]:
  118 + """获取所有节点(字典形式,兼容前端API)"""
  119 + return self._nodes
  120 +
  121 + @property
  122 + def node_list(self) -> List[Node]:
  123 + """获取所有节点(列表形式)"""
  124 + return list(self._nodes.values())
  125 +
  126 + @property
  127 + def edges(self) -> List[Edge]:
  128 + """获取所有边"""
  129 + return self._edges
  130 +
  131 + @property
  132 + def node_count(self) -> int:
  133 + """节点数量"""
  134 + return len(self._nodes)
  135 +
  136 + @property
  137 + def edge_count(self) -> int:
  138 + """边数量"""
  139 + return len(self._edges)
  140 +
  141 + def add_node(self, node_type: str, name: str = "",
  142 + node_id: Optional[str] = None, **attributes) -> Node:
  143 + """
  144 + 添加节点
  145 +
  146 + Args:
  147 + node_type: 节点类型
  148 + name: 节点名称
  149 + node_id: 节点ID,不提供则自动生成
  150 + **attributes: 其他属性
  151 +
  152 + Returns:
  153 + 创建的节点
  154 + """
  155 + if node_id is None:
  156 + # 基于类型和名称生成ID
  157 + hash_input = f"{node_type}_{name}_{len(self._nodes)}"
  158 + node_id = f"{node_type[:3].upper()}_{hashlib.md5(hash_input.encode()).hexdigest()[:8]}"
  159 +
  160 + # 如果已存在,返回现有节点
  161 + if node_id in self._nodes:
  162 + return self._nodes[node_id]
  163 +
  164 + node = Node(
  165 + id=node_id,
  166 + type=node_type,
  167 + name=name,
  168 + attributes=attributes
  169 + )
  170 +
  171 + self._nodes[node_id] = node
  172 + self._adjacency[node_id] = set()
  173 +
  174 + return node
  175 +
  176 + def get_node(self, node_id: str) -> Optional[Node]:
  177 + """获取节点"""
  178 + return self._nodes.get(node_id)
  179 +
  180 + def add_edge(self, from_node: Node, to_node: Node,
  181 + relation: str, weight: float = 1.0, **attributes) -> Edge:
  182 + """
  183 + 添加边
  184 +
  185 + Args:
  186 + from_node: 起始节点
  187 + to_node: 目标节点
  188 + relation: 关系类型
  189 + weight: 权重
  190 + **attributes: 其他属性
  191 +
  192 + Returns:
  193 + 创建的边
  194 + """
  195 + edge = Edge(
  196 + from_id=from_node.id,
  197 + to_id=to_node.id,
  198 + relation=relation,
  199 + weight=weight,
  200 + attributes=attributes
  201 + )
  202 +
  203 + self._edges.append(edge)
  204 +
  205 + # 更新邻接表
  206 + if from_node.id in self._adjacency:
  207 + self._adjacency[from_node.id].add(to_node.id)
  208 + if to_node.id in self._adjacency:
  209 + self._adjacency[to_node.id].add(from_node.id)
  210 +
  211 + return edge
  212 +
  213 + def get_neighbors(self, node_id: str) -> List[Node]:
  214 + """获取邻居节点"""
  215 + neighbor_ids = self._adjacency.get(node_id, set())
  216 + return [self._nodes[nid] for nid in neighbor_ids if nid in self._nodes]
  217 +
  218 + def get_edges_from(self, node_id: str) -> List[Edge]:
  219 + """获取从指定节点出发的边"""
  220 + return [e for e in self._edges if e.from_id == node_id]
  221 +
  222 + def get_edges_to(self, node_id: str) -> List[Edge]:
  223 + """获取指向指定节点的边"""
  224 + return [e for e in self._edges if e.to_id == node_id]
  225 +
  226 + def get_nodes_by_type(self, node_type: str) -> List[Node]:
  227 + """按类型获取节点"""
  228 + return [n for n in self._nodes.values() if n.type == node_type]
  229 +
  230 + def get_stats(self) -> Dict[str, int]:
  231 + """获取图谱统计信息"""
  232 + type_counts = {}
  233 + for node in self._nodes.values():
  234 + type_counts[node.type] = type_counts.get(node.type, 0) + 1
  235 +
  236 + return {
  237 + 'total_nodes': self.node_count,
  238 + 'total_edges': self.edge_count,
  239 + **type_counts
  240 + }
  241 +
  242 + def get_summary(self) -> Dict[str, Any]:
  243 + """获取图谱概览(用于提示词)"""
  244 + stats = self.get_stats()
  245 +
  246 + # 获取各类型节点的样例
  247 + section_titles = [n.name for n in self.get_nodes_by_type('section')][:10]
  248 + search_queries = [n.get('query_text', n.name)
  249 + for n in self.get_nodes_by_type('search_query')][:20]
  250 +
  251 + return {
  252 + 'stats': stats,
  253 + 'section_titles': section_titles,
  254 + 'sample_queries': search_queries,
  255 + 'topic': next((n.name for n in self.get_nodes_by_type('topic')), ''),
  256 + 'engines': [n.name for n in self.get_nodes_by_type('engine')]
  257 + }
  258 +
  259 + def to_dict(self) -> Dict[str, Any]:
  260 + """转换为字典"""
  261 + return {
  262 + 'nodes': [n.to_dict() for n in self.node_list],
  263 + 'edges': [e.to_dict() for e in self.edges],
  264 + 'stats': self.get_stats()
  265 + }
  266 +
  267 + @classmethod
  268 + def from_dict(cls, data: Dict[str, Any]) -> 'Graph':
  269 + """从字典创建"""
  270 + graph = cls()
  271 +
  272 + # 添加节点
  273 + for node_data in data.get('nodes', []):
  274 + node = Node.from_dict(node_data)
  275 + graph._nodes[node.id] = node
  276 + graph._adjacency[node.id] = set()
  277 +
  278 + # 添加边
  279 + for edge_data in data.get('edges', []):
  280 + edge = Edge.from_dict(edge_data)
  281 + graph._edges.append(edge)
  282 + # 更新邻接表
  283 + if edge.from_id in graph._adjacency:
  284 + graph._adjacency[edge.from_id].add(edge.to_id)
  285 + if edge.to_id in graph._adjacency:
  286 + graph._adjacency[edge.to_id].add(edge.from_id)
  287 +
  288 + return graph
  289 +
  290 +
  291 +class GraphStorage:
  292 + """图谱存储管理器"""
  293 +
  294 + FILENAME = "graphrag.json"
  295 + DEFAULT_CHAPTERS_DIR = Path("chapters")
  296 +
  297 + def save(self, graph: Graph, task_id: str, run_dir: Path) -> Path:
  298 + """
  299 + 保存图谱到 JSON 文件
  300 +
  301 + Args:
  302 + graph: 图谱对象
  303 + task_id: 任务ID
  304 + run_dir: 运行目录
  305 +
  306 + Returns:
  307 + 保存的文件路径
  308 + """
  309 + run_dir = Path(run_dir)
  310 + run_dir.mkdir(parents=True, exist_ok=True)
  311 +
  312 + output = {
  313 + 'task_id': task_id,
  314 + 'created_at': datetime.now().isoformat(),
  315 + **graph.to_dict()
  316 + }
  317 +
  318 + file_path = run_dir / self.FILENAME
  319 + with open(file_path, 'w', encoding='utf-8') as f:
  320 + json.dump(output, f, ensure_ascii=False, indent=2)
  321 +
  322 + return file_path
  323 +
  324 + def load(self, path: Path) -> Optional[Graph]:
  325 + """
  326 + 从 JSON 文件加载图谱
  327 +
  328 + Args:
  329 + path: 文件路径或运行目录
  330 +
  331 + Returns:
  332 + Graph 对象,失败返回 None
  333 + """
  334 + path = Path(path)
  335 +
  336 + # 如果是目录,添加文件名
  337 + if path.is_dir():
  338 + file_path = path / self.FILENAME
  339 + else:
  340 + file_path = path
  341 +
  342 + if not file_path.exists():
  343 + return None
  344 +
  345 + try:
  346 + with open(file_path, 'r', encoding='utf-8') as f:
  347 + data = json.load(f)
  348 + return Graph.from_dict(data)
  349 + except Exception:
  350 + return None
  351 +
  352 + def exists(self, run_dir: Path) -> bool:
  353 + """检查图谱文件是否存在"""
  354 + return (Path(run_dir) / self.FILENAME).exists()
  355 +
  356 + def find_graph_by_report_id(self, report_id: str) -> Optional[Path]:
  357 + """
  358 + 根据报告ID查找图谱文件
  359 +
  360 + Args:
  361 + report_id: 报告ID
  362 +
  363 + Returns:
  364 + 图谱文件路径,未找到返回 None
  365 + """
  366 + # 在默认目录中搜索
  367 + chapters_dir = self.DEFAULT_CHAPTERS_DIR
  368 + if not chapters_dir.exists():
  369 + return None
  370 +
  371 + # 查找匹配报告ID的目录
  372 + for run_dir in chapters_dir.iterdir():
  373 + if not run_dir.is_dir():
  374 + continue
  375 +
  376 + # 检查目录名是否包含报告ID
  377 + if report_id in run_dir.name:
  378 + graph_path = run_dir / self.FILENAME
  379 + if graph_path.exists():
  380 + return graph_path
  381 +
  382 + return None
  383 +
  384 + def find_latest_graph(self) -> Optional[Path]:
  385 + """
  386 + 查找最新的图谱文件
  387 +
  388 + Returns:
  389 + 最新图谱文件路径,未找到返回 None
  390 + """
  391 + chapters_dir = self.DEFAULT_CHAPTERS_DIR
  392 + if not chapters_dir.exists():
  393 + return None
  394 +
  395 + latest_path = None
  396 + latest_time = None
  397 +
  398 + # 遍历所有运行目录
  399 + for run_dir in chapters_dir.iterdir():
  400 + if not run_dir.is_dir():
  401 + continue
  402 +
  403 + graph_path = run_dir / self.FILENAME
  404 + if graph_path.exists():
  405 + mtime = graph_path.stat().st_mtime
  406 + if latest_time is None or mtime > latest_time:
  407 + latest_time = mtime
  408 + latest_path = graph_path
  409 +
  410 + return latest_path
  411 +
  412 + def list_all_graphs(self) -> List[Dict[str, Any]]:
  413 + """
  414 + 列出所有可用的图谱
  415 +
  416 + Returns:
  417 + 图谱信息列表,包含路径、报告ID、创建时间等
  418 + """
  419 + chapters_dir = self.DEFAULT_CHAPTERS_DIR
  420 + if not chapters_dir.exists():
  421 + return []
  422 +
  423 + graphs = []
  424 + for run_dir in chapters_dir.iterdir():
  425 + if not run_dir.is_dir():
  426 + continue
  427 +
  428 + graph_path = run_dir / self.FILENAME
  429 + if graph_path.exists():
  430 + try:
  431 + with open(graph_path, 'r', encoding='utf-8') as f:
  432 + data = json.load(f)
  433 +
  434 + graphs.append({
  435 + 'path': str(graph_path),
  436 + 'report_id': data.get('task_id', run_dir.name),
  437 + 'created_at': data.get('created_at'),
  438 + 'stats': data.get('stats', {}),
  439 + 'dir_name': run_dir.name
  440 + })
  441 + except Exception:
  442 + continue
  443 +
  444 + # 按创建时间排序
  445 + graphs.sort(key=lambda x: x.get('created_at', ''), reverse=True)
  446 + return graphs
  1 +"""
  2 +GraphRAG 提示词模块
  3 +
  4 +包含查询决策和章节增强的完整提示词定义。
  5 +"""
  6 +
  7 +# ================== 查询决策提示词 ==================
  8 +
  9 +GRAPHRAG_QUERY_DECISION_SYSTEM = """你是一个智能舆情分析助手,负责决定如何查询知识图谱以获取生成报告章节所需的信息。
  10 +
  11 +知识图谱包含以下节点类型:
  12 +- Topic: 用户查询的主题
  13 +- Engine: 四个分析引擎(Insight/Media/Query/Host)
  14 +- Section: 各引擎报告的段落章节
  15 +- SearchQuery: 引擎执行过的搜索关键词
  16 +- Source: 搜索发现的信息来源(URL、标题、内容摘要)
  17 +
  18 +你的任务是根据当前章节的需求,决定查询参数以获取最相关的信息。"""
  19 +
  20 +GRAPHRAG_QUERY_DECISION_USER = """
  21 +=== 当前任务 ===
  22 +正在生成报告章节: "{chapter_title}"
  23 +章节编号: {chapter_id}
  24 +章节在模板中的定位: {chapter_role}
  25 +目标字数: {target_words}字
  26 +章节要点: {chapter_emphasis}
  27 +
  28 +=== 完整报告规划 ===
  29 +报告主题: {report_topic}
  30 +模板类型: {template_name}
  31 +全书章节概览:
  32 +{chapters_overview}
  33 +
  34 +=== 知识图谱概览 ===
  35 +图谱统计:
  36 +- 主题节点: 1个 ({topic_name})
  37 +- 引擎节点: {engine_count}个
  38 +- 段落节点: {section_count}个
  39 +- 搜索词节点: {query_count}个
  40 +- 来源节点: {source_count}个
  41 +
  42 +各引擎段落标题:
  43 +{section_titles_by_engine}
  44 +
  45 +搜索关键词样例(前20个):
  46 +{sample_search_queries}
  47 +
  48 +=== 查询历史记录(本章节已执行的查询) ===
  49 +{query_history_detail}
  50 +
  51 +=== 请决定查询参数 ===
  52 +请输出JSON格式的查询参数:
  53 +```json
  54 +{{
  55 + "should_query": true/false,
  56 + "keywords": ["关键词1", "关键词2", ...],
  57 + "node_types": ["section", "search_query", "source"],
  58 + "engine_filter": ["insight", "media", "query", "host"],
  59 + "depth": 1-3,
  60 + "reasoning": "选择这些参数的原因,以及期望获取什么信息"
  61 +}}
  62 +```
  63 +
  64 +注意事项:
  65 +1. 仔细查看查询历史,**避免重复查询相同或相似的关键词**
  66 +2. 关键词应与当前章节主题紧密相关
  67 +3. 如果查询历史已经覆盖了章节所需的主要信息,设置 should_query=false
  68 +4. depth建议:1=精确匹配,2=包含关联,3=扩展探索(信息量大但可能有噪音)
  69 +5. 可以通过 engine_filter 聚焦特定引擎的分析视角
  70 +"""
  71 +
  72 +# ================== 章节增强提示词(GraphRAG 开启时使用) ==================
  73 +
  74 +SYSTEM_PROMPT_CHAPTER_GRAPH_ENHANCEMENT = """
  75 +=== GraphRAG 知识图谱增强 ===
  76 +本次章节生成已通过知识图谱查询获取了跨引擎的关联信息。
  77 +在生成内容时,请特别注意:
  78 +
  79 +1. **跨引擎关联**: graphResults 中包含了来自不同引擎的相关信息,
  80 + 请综合利用这些多视角的分析结果,形成更全面的观点。
  81 +
  82 +2. **信息溯源**: 对于重要观点,可以引用 graphResults.matched_sources
  83 + 中的来源信息,增强可信度。
  84 +
  85 +3. **搜索词关联**: graphResults.matched_queries 显示了各引擎为本主题
  86 + 执行的相关搜索,这些搜索词本身就是重要的语义线索。
  87 +
  88 +4. **避免重复**: 不同引擎可能有相似的分析,请整合而非重复。
  89 +"""
  90 +
  91 +USER_PROMPT_GRAPH_RESULTS_TEMPLATE = """
  92 +=== GraphRAG 知识图谱查询结果 ===
  93 +
  94 +**查询轮次**: {query_rounds}次
  95 +
  96 +**匹配的相关段落** (来自其他引擎的相关分析):
  97 +{matched_sections}
  98 +
  99 +**相关搜索关键词** (各引擎执行的相关搜索):
  100 +{matched_queries}
  101 +
  102 +**相关信息来源** (搜索发现的相关URL和内容):
  103 +{matched_sources}
  104 +
  105 +**跨引擎关联洞察**:
  106 +{cross_engine_insights}
  107 +
  108 +请在生成本章节时,充分利用以上知识图谱查询结果,
  109 +特别是跨引擎的关联信息,以丰富内容的多维度分析。
  110 +===
  111 +
  112 +"""
  113 +
  114 +
  115 +def format_graph_results_for_prompt(graph_results: dict) -> str:
  116 + """
  117 + 格式化 GraphRAG 查询结果用于提示词
  118 +
  119 + Args:
  120 + graph_results: 查询结果字典
  121 +
  122 + Returns:
  123 + 格式化的字符串
  124 + """
  125 + if not graph_results:
  126 + return ""
  127 +
  128 + # 格式化段落
  129 + matched_sections = graph_results.get('matched_sections', [])
  130 + sections_text = _format_matched_sections(matched_sections)
  131 +
  132 + # 格式化搜索词
  133 + matched_queries = graph_results.get('matched_queries', [])
  134 + queries_text = _format_matched_queries(matched_queries)
  135 +
  136 + # 格式化来源
  137 + matched_sources = graph_results.get('matched_sources', [])
  138 + sources_text = _format_matched_sources(matched_sources)
  139 +
  140 + # 跨引擎洞察
  141 + insights = graph_results.get('cross_engine_insights', [])
  142 + insights_text = _format_cross_engine_insights(insights)
  143 +
  144 + return USER_PROMPT_GRAPH_RESULTS_TEMPLATE.format(
  145 + query_rounds=graph_results.get('query_rounds', 0),
  146 + matched_sections=sections_text,
  147 + matched_queries=queries_text,
  148 + matched_sources=sources_text,
  149 + cross_engine_insights=insights_text
  150 + )
  151 +
  152 +
  153 +def _format_matched_sections(sections: list) -> str:
  154 + """格式化匹配的段落"""
  155 + if not sections:
  156 + return "(无匹配段落)"
  157 +
  158 + lines = []
  159 + for s in sections[:10]: # 限制数量
  160 + engine = s.get('engine', 'unknown')
  161 + title = s.get('title', '未知标题')
  162 + summary = s.get('summary', '')[:100]
  163 + lines.append(f"- [{engine}] {title}: {summary}...")
  164 +
  165 + return "\n".join(lines)
  166 +
  167 +
  168 +def _format_matched_queries(queries: list) -> str:
  169 + """格式化匹配的搜索词"""
  170 + if not queries:
  171 + return "(无匹配搜索词)"
  172 +
  173 + by_engine = {}
  174 + for q in queries:
  175 + engine = q.get('engine', 'unknown')
  176 + if engine not in by_engine:
  177 + by_engine[engine] = []
  178 + query_text = q.get('query_text', q.get('name', ''))
  179 + if query_text and query_text not in by_engine[engine]:
  180 + by_engine[engine].append(query_text)
  181 +
  182 + lines = []
  183 + for engine, query_list in by_engine.items():
  184 + lines.append(f"- {engine}: {', '.join(query_list[:5])}")
  185 +
  186 + return "\n".join(lines)
  187 +
  188 +
  189 +def _format_matched_sources(sources: list) -> str:
  190 + """格式化匹配的来源"""
  191 + if not sources:
  192 + return "(无匹配来源)"
  193 +
  194 + lines = []
  195 + for s in sources[:8]:
  196 + title = s.get('title', '未知标题')
  197 + url = s.get('url', '#')
  198 + preview = s.get('preview', '')
  199 + lines.append(f"- [{title}]({url})")
  200 + if preview:
  201 + lines.append(f" 摘要: {preview[:80]}...")
  202 +
  203 + return "\n".join(lines)
  204 +
  205 +
  206 +def _format_cross_engine_insights(insights: list) -> str:
  207 + """格式化跨引擎洞察"""
  208 + if not insights:
  209 + return "(无跨引擎关联发现)"
  210 +
  211 + return "\n".join([f"- {insight}" for insight in insights[:5]])
  1 +"""
  2 +图查询引擎
  3 +
  4 +支持基于关键词、节点类型、引擎来源和深度的知识图谱查询。
  5 +"""
  6 +
  7 +from dataclasses import dataclass, field
  8 +from typing import Dict, Any, List, Optional, Set
  9 +
  10 +from .graph_storage import Graph, Node
  11 +
  12 +
  13 +@dataclass
  14 +class QueryParams:
  15 + """查询参数"""
  16 + keywords: List[str] = field(default_factory=list)
  17 + node_types: Optional[List[str]] = None # None 表示全部类型
  18 + engine_filter: Optional[List[str]] = None # 限定引擎来源
  19 + depth: int = 1 # 扩展深度
  20 +
  21 +
  22 +@dataclass
  23 +class QueryResult:
  24 + """查询结果"""
  25 + matched_sections: List[Dict[str, Any]] = field(default_factory=list)
  26 + matched_queries: List[Dict[str, Any]] = field(default_factory=list)
  27 + matched_sources: List[Dict[str, Any]] = field(default_factory=list)
  28 + total_nodes: int = 0
  29 + query_params: Optional[Dict[str, Any]] = None
  30 +
  31 + def to_dict(self) -> Dict[str, Any]:
  32 + """转换为字典"""
  33 + return {
  34 + 'matched_sections': self.matched_sections,
  35 + 'matched_queries': self.matched_queries,
  36 + 'matched_sources': self.matched_sources,
  37 + 'total_nodes': self.total_nodes,
  38 + 'query_params': self.query_params
  39 + }
  40 +
  41 + def get_summary(self, max_length: int = 200) -> str:
  42 + """获取结果摘要"""
  43 + parts = []
  44 +
  45 + if self.matched_sections:
  46 + section_titles = [s.get('title', '')[:30] for s in self.matched_sections[:3]]
  47 + parts.append(f"段落({len(self.matched_sections)}): {', '.join(section_titles)}")
  48 +
  49 + if self.matched_queries:
  50 + query_texts = [q.get('query_text', '')[:20] for q in self.matched_queries[:3]]
  51 + parts.append(f"搜索词({len(self.matched_queries)}): {', '.join(query_texts)}")
  52 +
  53 + if self.matched_sources:
  54 + parts.append(f"来源({len(self.matched_sources)})")
  55 +
  56 + summary = "; ".join(parts) if parts else "无匹配结果"
  57 + return summary[:max_length]
  58 +
  59 +
  60 +class QueryEngine:
  61 + """
  62 + 图查询引擎
  63 +
  64 + 支持以下查询能力:
  65 + 1. 关键词匹配:在节点名称和属性中搜索
  66 + 2. 类型筛选:限定节点类型 (section/search_query/source)
  67 + 3. 引擎筛选:限定来源引擎 (insight/media/query/host)
  68 + 4. 深度扩展:从匹配节点向外扩展指定深度
  69 + """
  70 +
  71 + def __init__(self, graph: Graph):
  72 + """
  73 + 初始化查询引擎
  74 +
  75 + Args:
  76 + graph: 知识图谱对象
  77 + """
  78 + self.graph = graph
  79 +
  80 + def query(self, params: QueryParams) -> QueryResult:
  81 + """
  82 + 执行图谱查询
  83 +
  84 + Args:
  85 + params: 查询参数
  86 +
  87 + Returns:
  88 + QueryResult 查询结果
  89 + """
  90 + # 1. 关键词匹配获取初始节点
  91 + matched_nodes = self._match_keywords(params)
  92 +
  93 + # 2. 深度扩展
  94 + if params.depth > 0 and matched_nodes:
  95 + expanded_nodes = self._expand_depth(matched_nodes, params.depth)
  96 + matched_nodes = matched_nodes.union(expanded_nodes)
  97 +
  98 + # 3. 整理结果
  99 + result = self._organize_results(matched_nodes, params)
  100 +
  101 + return result
  102 +
  103 + def _match_keywords(self, params: QueryParams) -> Set[str]:
  104 + """关键词匹配"""
  105 + matched_ids = set()
  106 +
  107 + for node in self.graph.nodes:
  108 + # 类型筛选
  109 + if params.node_types and node.type not in params.node_types:
  110 + continue
  111 +
  112 + # 引擎筛选
  113 + if params.engine_filter:
  114 + node_engine = node.get('engine')
  115 + if node_engine and node_engine not in params.engine_filter:
  116 + continue
  117 +
  118 + # 关键词匹配
  119 + if self._matches_keywords(node, params.keywords):
  120 + matched_ids.add(node.id)
  121 +
  122 + return matched_ids
  123 +
  124 + def _matches_keywords(self, node: Node, keywords: List[str]) -> bool:
  125 + """检查节点是否匹配关键词"""
  126 + if not keywords:
  127 + return True # 无关键词时全部匹配
  128 +
  129 + # 构建搜索文本
  130 + search_text = f"{node.name} {node.get('title', '')} {node.get('query_text', '')} {node.get('summary', '')}"
  131 + search_text = search_text.lower()
  132 +
  133 + # 任一关键词匹配即可
  134 + for keyword in keywords:
  135 + if keyword.lower() in search_text:
  136 + return True
  137 +
  138 + return False
  139 +
  140 + def _expand_depth(self, node_ids: Set[str], depth: int) -> Set[str]:
  141 + """从匹配节点向外扩展指定深度"""
  142 + expanded = set()
  143 + current_layer = node_ids.copy()
  144 +
  145 + for _ in range(depth):
  146 + next_layer = set()
  147 +
  148 + for node_id in current_layer:
  149 + # 获取邻居节点
  150 + neighbors = self.graph.get_neighbors(node_id)
  151 + for neighbor in neighbors:
  152 + if neighbor.id not in node_ids and neighbor.id not in expanded:
  153 + next_layer.add(neighbor.id)
  154 + expanded.add(neighbor.id)
  155 +
  156 + if not next_layer:
  157 + break
  158 +
  159 + current_layer = next_layer
  160 +
  161 + return expanded
  162 +
  163 + def _organize_results(self, node_ids: Set[str],
  164 + params: QueryParams) -> QueryResult:
  165 + """整理查询结果"""
  166 + matched_sections = []
  167 + matched_queries = []
  168 + matched_sources = []
  169 +
  170 + for node_id in node_ids:
  171 + node = self.graph.get_node(node_id)
  172 + if not node:
  173 + continue
  174 +
  175 + node_dict = {
  176 + 'id': node.id,
  177 + 'name': node.name,
  178 + 'type': node.type,
  179 + **node.attributes
  180 + }
  181 +
  182 + if node.type == 'section':
  183 + matched_sections.append(node_dict)
  184 + elif node.type == 'search_query':
  185 + matched_queries.append(node_dict)
  186 + elif node.type == 'source':
  187 + matched_sources.append(node_dict)
  188 +
  189 + # 排序:段落按 order,其他按名称
  190 + matched_sections.sort(key=lambda x: x.get('order', 0))
  191 + matched_queries.sort(key=lambda x: x.get('query_text', ''))
  192 + matched_sources.sort(key=lambda x: x.get('title', ''))
  193 +
  194 + return QueryResult(
  195 + matched_sections=matched_sections,
  196 + matched_queries=matched_queries,
  197 + matched_sources=matched_sources,
  198 + total_nodes=len(node_ids),
  199 + query_params={
  200 + 'keywords': params.keywords,
  201 + 'node_types': params.node_types,
  202 + 'engine_filter': params.engine_filter,
  203 + 'depth': params.depth
  204 + }
  205 + )
  206 +
  207 + def get_node_summary(self) -> Dict[str, Any]:
  208 + """获取图谱节点概览(用于提示词)"""
  209 + return self.graph.get_summary()
  210 +
  211 + def get_section_titles_by_engine(self) -> Dict[str, List[str]]:
  212 + """按引擎获取所有段落标题"""
  213 + result = {}
  214 +
  215 + for node in self.graph.get_nodes_by_type('section'):
  216 + engine = node.get('engine', 'unknown')
  217 + if engine not in result:
  218 + result[engine] = []
  219 + result[engine].append(node.get('title', node.name))
  220 +
  221 + return result
  222 +
  223 + def get_sample_search_queries(self, limit: int = 20) -> List[str]:
  224 + """获取搜索词样例"""
  225 + queries = []
  226 +
  227 + for node in self.graph.get_nodes_by_type('search_query'):
  228 + query_text = node.get('query_text', node.name)
  229 + if query_text and query_text not in queries:
  230 + queries.append(query_text)
  231 + if len(queries) >= limit:
  232 + break
  233 +
  234 + return queries
  1 +"""
  2 +State JSON 解析器
  3 +
  4 +解析 Insight/Media/Query 三引擎的 State JSON 文件,
  5 +提取结构化数据用于构建知识图谱。
  6 +"""
  7 +
  8 +from dataclasses import dataclass, field
  9 +from typing import Dict, Any, List, Optional
  10 +import json
  11 +from pathlib import Path
  12 +
  13 +
  14 +@dataclass
  15 +class SearchRecord:
  16 + """单条搜索记录"""
  17 + query: str = ""
  18 + url: str = ""
  19 + title: str = ""
  20 + content: str = ""
  21 + score: Optional[float] = None
  22 + timestamp: str = ""
  23 +
  24 +
  25 +@dataclass
  26 +class ParsedSection:
  27 + """解析后的段落/章节"""
  28 + title: str = ""
  29 + order: int = 0
  30 + summary: str = ""
  31 + search_history: List[SearchRecord] = field(default_factory=list)
  32 +
  33 +
  34 +@dataclass
  35 +class ParsedState:
  36 + """解析后的引擎状态"""
  37 + engine: str = ""
  38 + query: str = ""
  39 + report_title: str = ""
  40 + sections: List[ParsedSection] = field(default_factory=list)
  41 +
  42 +
  43 +class StateParser:
  44 + """
  45 + State JSON 解析器
  46 +
  47 + 解析三引擎的 State JSON,提取用于构建知识图谱的结构化数据。
  48 + """
  49 +
  50 + def parse(self, engine_name: str, state_json: Dict[str, Any]) -> ParsedState:
  51 + """
  52 + 解析单个引擎的 State JSON
  53 +
  54 + Args:
  55 + engine_name: 引擎名称 (insight/media/query)
  56 + state_json: State JSON 字典
  57 +
  58 + Returns:
  59 + ParsedState 对象
  60 + """
  61 + return ParsedState(
  62 + engine=engine_name,
  63 + query=state_json.get('query', ''),
  64 + report_title=state_json.get('report_title', ''),
  65 + sections=[
  66 + self._parse_paragraph(p)
  67 + for p in state_json.get('paragraphs', [])
  68 + ]
  69 + )
  70 +
  71 + def _parse_paragraph(self, para: Dict[str, Any]) -> ParsedSection:
  72 + """解析单个段落"""
  73 + research = para.get('research', {})
  74 +
  75 + # 提取搜索历史
  76 + search_history = []
  77 + for search in research.get('search_history', []):
  78 + search_history.append(SearchRecord(
  79 + query=search.get('query', ''),
  80 + url=search.get('url', ''),
  81 + title=search.get('title', ''),
  82 + content=search.get('content', '')[:200] if search.get('content') else '',
  83 + score=search.get('score'),
  84 + timestamp=search.get('timestamp', '')
  85 + ))
  86 +
  87 + # 获取摘要,优先使用 latest_summary
  88 + summary = research.get('latest_summary', '')
  89 + if not summary:
  90 + summary = para.get('content', '')
  91 +
  92 + return ParsedSection(
  93 + title=para.get('title', ''),
  94 + order=para.get('order', 0),
  95 + summary=summary[:300] if summary else '',
  96 + search_history=search_history
  97 + )
  98 +
  99 + def parse_from_file(self, engine_name: str, file_path: str) -> Optional[ParsedState]:
  100 + """
  101 + 从文件解析 State JSON
  102 +
  103 + Args:
  104 + engine_name: 引擎名称
  105 + file_path: JSON 文件路径
  106 +
  107 + Returns:
  108 + ParsedState 对象,失败返回 None
  109 + """
  110 + try:
  111 + path = Path(file_path)
  112 + if not path.exists():
  113 + return None
  114 +
  115 + with open(path, 'r', encoding='utf-8') as f:
  116 + state_json = json.load(f)
  117 +
  118 + return self.parse(engine_name, state_json)
  119 + except Exception:
  120 + return None
  121 +
  122 + def find_state_json(self, md_path: str) -> Optional[str]:
  123 + """
  124 + 根据 Markdown 报告路径查找对应的 State JSON 文件
  125 +
  126 + State JSON 通常与 MD 文件在同一目录下,命名格式为 state_*.json
  127 +
  128 + Args:
  129 + md_path: Markdown 文件路径
  130 +
  131 + Returns:
  132 + State JSON 路径,未找到返回 None
  133 + """
  134 + md_file = Path(md_path)
  135 + if not md_file.exists():
  136 + return None
  137 +
  138 + parent_dir = md_file.parent
  139 +
  140 + # 尝试匹配 state_*.json 文件
  141 + state_files = list(parent_dir.glob('state_*.json'))
  142 +
  143 + if not state_files:
  144 + return None
  145 +
  146 + # 如果有多个,尝试通过时间戳匹配
  147 + md_stem = md_file.stem # e.g., "武汉大学_20250825_180214"
  148 +
  149 + for state_file in state_files:
  150 + state_stem = state_file.stem # e.g., "state_武汉大学_20250825_180214"
  151 + # 检查是否包含相同的查询词和时间戳
  152 + if md_stem in state_stem or state_stem.replace('state_', '') == md_stem:
  153 + return str(state_file)
  154 +
  155 + # 否则返回最新的
  156 + state_files.sort(key=lambda f: f.stat().st_mtime, reverse=True)
  157 + return str(state_files[0])
@@ -14,6 +14,7 @@ from .chapter_generation_node import ( @@ -14,6 +14,7 @@ from .chapter_generation_node import (
14 ) 14 )
15 from .document_layout_node import DocumentLayoutNode 15 from .document_layout_node import DocumentLayoutNode
16 from .word_budget_node import WordBudgetNode 16 from .word_budget_node import WordBudgetNode
  17 +from .graphrag_query_node import GraphRAGQueryNode, QueryHistory
17 18
18 __all__ = [ 19 __all__ = [
19 "BaseNode", 20 "BaseNode",
@@ -25,4 +26,6 @@ __all__ = [ @@ -25,4 +26,6 @@ __all__ = [
25 "ChapterValidationError", 26 "ChapterValidationError",
26 "DocumentLayoutNode", 27 "DocumentLayoutNode",
27 "WordBudgetNode", 28 "WordBudgetNode",
  29 + "GraphRAGQueryNode",
  30 + "QueryHistory",
28 ] 31 ]
@@ -205,11 +205,15 @@ class ChapterGenerationNode(BaseNode): @@ -205,11 +205,15 @@ class ChapterGenerationNode(BaseNode):
205 llm_payload = self._build_payload(section, context) 205 llm_payload = self._build_payload(section, context)
206 user_message = build_chapter_user_prompt(llm_payload) 206 user_message = build_chapter_user_prompt(llm_payload)
207 207
  208 + # 检查是否有GraphRAG结果,决定是否使用增强提示词
  209 + graph_enhanced = bool(context.get("graph_results"))
  210 +
208 raw_text = self._stream_llm( 211 raw_text = self._stream_llm(
209 user_message, 212 user_message,
210 chapter_dir, 213 chapter_dir,
211 stream_callback=stream_callback, 214 stream_callback=stream_callback,
212 section_meta=chapter_meta, 215 section_meta=chapter_meta,
  216 + graph_enhanced=graph_enhanced,
213 **kwargs, 217 **kwargs,
214 ) 218 )
215 parse_context: List[str] = [] 219 parse_context: List[str] = []
@@ -351,6 +355,22 @@ class ChapterGenerationNode(BaseNode): @@ -351,6 +355,22 @@ class ChapterGenerationNode(BaseNode):
351 "chapterPlan": chapter_plan, 355 "chapterPlan": chapter_plan,
352 "wordPlan": context.get("word_plan"), 356 "wordPlan": context.get("word_plan"),
353 } 357 }
  358 +
  359 + # GraphRAG 增强:如果上下文中包含图谱查询结果,添加到payload
  360 + graph_results = context.get("graph_results")
  361 + if graph_results:
  362 + payload["graphResults"] = {
  363 + "totalNodes": graph_results.get("total_nodes", 0),
  364 + "queryRounds": graph_results.get("query_rounds", 0),
  365 + "matchedSections": graph_results.get("matched_sections", []),
  366 + "matchedQueries": graph_results.get("matched_queries", []),
  367 + "matchedSources": graph_results.get("matched_sources", []),
  368 + }
  369 + # 同时添加增强提示(如果有)
  370 + graph_enhancement = context.get("graph_enhancement_prompt")
  371 + if graph_enhancement:
  372 + payload["graphEnhancementPrompt"] = graph_enhancement
  373 +
354 if chapter_plan: 374 if chapter_plan:
355 constraints = payload["constraints"] 375 constraints = payload["constraints"]
356 if chapter_plan.get("targetWords"): 376 if chapter_plan.get("targetWords"):
@@ -438,6 +458,7 @@ class ChapterGenerationNode(BaseNode): @@ -438,6 +458,7 @@ class ChapterGenerationNode(BaseNode):
438 chapter_dir: Path, 458 chapter_dir: Path,
439 stream_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None, 459 stream_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
440 section_meta: Optional[Dict[str, Any]] = None, 460 section_meta: Optional[Dict[str, Any]] = None,
  461 + graph_enhanced: bool = False,
441 **kwargs, 462 **kwargs,
442 ) -> str: 463 ) -> str:
443 """ 464 """
@@ -448,15 +469,23 @@ class ChapterGenerationNode(BaseNode): @@ -448,15 +469,23 @@ class ChapterGenerationNode(BaseNode):
448 chapter_dir: 章节的本地缓存目录,用于存放 stream.raw。 469 chapter_dir: 章节的本地缓存目录,用于存放 stream.raw。
449 stream_callback: SSE流式推送的回调函数。 470 stream_callback: SSE流式推送的回调函数。
450 section_meta: 附带的章节ID/标题,用于回调payload。 471 section_meta: 附带的章节ID/标题,用于回调payload。
  472 + graph_enhanced: 是否启用GraphRAG增强的系统提示词。
451 **kwargs: 透传温度、top_p等参数。 473 **kwargs: 透传温度、top_p等参数。
452 474
453 返回: 475 返回:
454 str: 将所有delta拼接后的原始文本。 476 str: 将所有delta拼接后的原始文本。
455 """ 477 """
  478 + # 根据是否启用GraphRAG选择不同的系统提示词
  479 + if graph_enhanced:
  480 + from ..graphrag.prompts import SYSTEM_PROMPT_CHAPTER_GRAPH_ENHANCEMENT
  481 + system_prompt = SYSTEM_PROMPT_CHAPTER_JSON + "\n\n" + SYSTEM_PROMPT_CHAPTER_GRAPH_ENHANCEMENT
  482 + else:
  483 + system_prompt = SYSTEM_PROMPT_CHAPTER_JSON
  484 +
456 chunks: List[str] = [] 485 chunks: List[str] = []
457 with self.storage.capture_stream(chapter_dir) as stream_fp: 486 with self.storage.capture_stream(chapter_dir) as stream_fp:
458 stream = self.llm_client.stream_invoke( 487 stream = self.llm_client.stream_invoke(
459 - SYSTEM_PROMPT_CHAPTER_JSON, 488 + system_prompt,
460 user_message, 489 user_message,
461 temperature=kwargs.get("temperature", 0.2), 490 temperature=kwargs.get("temperature", 0.2),
462 top_p=kwargs.get("top_p", 0.95), 491 top_p=kwargs.get("top_p", 0.95),
  1 +"""
  2 +GraphRAG 查询节点
  3 +
  4 +负责与知识图谱交互,让 LLM 决定查询参数并执行多轮查询。
  5 +包含查询历史机制以防止重复查询。
  6 +"""
  7 +
  8 +import json
  9 +import re
  10 +from dataclasses import dataclass, field
  11 +from typing import Dict, Any, List, Optional
  12 +
  13 +from loguru import logger
  14 +
  15 +from .base_node import BaseNode
  16 +from ..llms.base import LLMClient
  17 +from ..graphrag.graph_storage import Graph
  18 +from ..graphrag.query_engine import QueryEngine, QueryParams, QueryResult
  19 +from ..graphrag.prompts import (
  20 + GRAPHRAG_QUERY_DECISION_SYSTEM,
  21 + GRAPHRAG_QUERY_DECISION_USER
  22 +)
  23 +
  24 +
  25 +@dataclass
  26 +class QueryRound:
  27 + """单轮查询记录"""
  28 + round: int
  29 + params: Dict[str, Any]
  30 + result_count: int
  31 + summary: str
  32 +
  33 +
  34 +class QueryHistory:
  35 + """
  36 + 查询历史记录器
  37 +
  38 + 记录每次查询的参数和结果摘要,用于防止 LLM 重复查询相同内容。
  39 + """
  40 +
  41 + def __init__(self):
  42 + self.rounds: List[QueryRound] = []
  43 +
  44 + def add(self, params: Dict[str, Any], result: QueryResult) -> None:
  45 + """
  46 + 记录一次查询
  47 +
  48 + Args:
  49 + params: 查询参数
  50 + result: 查询结果
  51 + """
  52 + self.rounds.append(QueryRound(
  53 + round=len(self.rounds) + 1,
  54 + params=params,
  55 + result_count=result.total_nodes,
  56 + summary=result.get_summary()
  57 + ))
  58 +
  59 + def to_prompt(self) -> str:
  60 + """
  61 + 生成供 LLM 参考的历史上下文
  62 +
  63 + Returns:
  64 + 格式化的历史记录字符串
  65 + """
  66 + if not self.rounds:
  67 + return "(这是第1次查询,无历史记录)"
  68 +
  69 + lines = ["=== 已完成的查询历史 ==="]
  70 + for r in self.rounds:
  71 + keywords = r.params.get('keywords', [])
  72 + node_types = r.params.get('node_types', ['all'])
  73 + engine_filter = r.params.get('engine_filter', ['all'])
  74 +
  75 + lines.append(f"第{r.round}次查询:")
  76 + lines.append(f" 关键词: {', '.join(keywords) if keywords else '无'}")
  77 + lines.append(f" 节点类型: {', '.join(node_types) if node_types else '全部'}")
  78 + lines.append(f" 引擎筛选: {', '.join(engine_filter) if engine_filter else '全部'}")
  79 + lines.append(f" 返回节点数: {r.result_count}")
  80 + lines.append(f" 结果摘要: {r.summary}")
  81 + lines.append("")
  82 +
  83 + lines.append("=== 请避免重复上述查询,探索新的角度 ===")
  84 + return "\n".join(lines)
  85 +
  86 + def get_all_keywords(self) -> List[str]:
  87 + """获取所有已查询的关键词"""
  88 + keywords = []
  89 + for r in self.rounds:
  90 + keywords.extend(r.params.get('keywords', []))
  91 + return keywords
  92 +
  93 +
  94 +class GraphRAGQueryNode(BaseNode):
  95 + """
  96 + GraphRAG 查询节点
  97 +
  98 + 核心职责:
  99 + 1. 接收完整上下文(报告、章节规划、图谱概览)
  100 + 2. 维护查询历史记录,防止重复查询
  101 + 3. 调用 LLM 决定查询参数
  102 + 4. 执行 GraphRAG 查询
  103 + 5. 最多允许 max_queries 次查询
  104 + 6. 将查询结果整合返回
  105 + """
  106 +
  107 + def __init__(self, llm_client: LLMClient):
  108 + super().__init__(llm_client, "GraphRAGQueryNode")
  109 +
  110 + def run(self, section: Dict[str, Any], context: Dict[str, Any],
  111 + graph: Graph, max_queries: int = 3) -> Dict[str, Any]:
  112 + """
  113 + 执行 GraphRAG 查询流程
  114 +
  115 + Args:
  116 + section: 当前章节信息
  117 + context: 生成上下文(报告、规划等)
  118 + graph: 知识图谱
  119 + max_queries: 最大查询次数
  120 +
  121 + Returns:
  122 + 合并后的查询结果
  123 + """
  124 + self.log_info(f"开始 GraphRAG 查询,章节: {section.get('title', 'unknown')}")
  125 +
  126 + query_engine = QueryEngine(graph)
  127 + history = QueryHistory()
  128 + all_results: List[QueryResult] = []
  129 +
  130 + for round_idx in range(max_queries):
  131 + self.log_info(f"查询轮次 {round_idx + 1}/{max_queries}")
  132 +
  133 + # 1. 构建决策提示词
  134 + prompt = self._build_decision_prompt(
  135 + section, context, query_engine, history
  136 + )
  137 +
  138 + # 2. 调用 LLM 决定查询参数
  139 + decision = self._get_query_decision(prompt)
  140 +
  141 + if decision is None:
  142 + self.log_error("LLM 返回无效决策,终止查询")
  143 + break
  144 +
  145 + # 3. 检查是否停止
  146 + if not decision.get('should_query', False):
  147 + self.log_info(f"LLM 决定停止查询: {decision.get('reasoning', '无原因')}")
  148 + break
  149 +
  150 + # 4. 执行查询
  151 + params = QueryParams(
  152 + keywords=decision.get('keywords', []),
  153 + node_types=decision.get('node_types'),
  154 + engine_filter=decision.get('engine_filter'),
  155 + depth=decision.get('depth', 1)
  156 + )
  157 +
  158 + result = query_engine.query(params)
  159 + all_results.append(result)
  160 +
  161 + self.log_info(f"查询返回 {result.total_nodes} 个节点")
  162 +
  163 + # 5. 记录历史
  164 + history.add(decision, result)
  165 +
  166 + # 6. 合并所有结果
  167 + merged = self._merge_results(all_results)
  168 + merged['query_rounds'] = len(all_results)
  169 +
  170 + self.log_info(f"GraphRAG 查询完成,共 {len(all_results)} 轮,"
  171 + f"获取 {merged.get('total_nodes', 0)} 个节点")
  172 +
  173 + return merged
  174 +
  175 + def _build_decision_prompt(self, section: Dict[str, Any],
  176 + context: Dict[str, Any],
  177 + query_engine: QueryEngine,
  178 + history: QueryHistory) -> Dict[str, str]:
  179 + """构建查询决策提示词"""
  180 + # 获取图谱概览
  181 + summary = query_engine.get_node_summary()
  182 + stats = summary.get('stats', {})
  183 +
  184 + # 获取段落标题(按引擎分组)
  185 + section_titles = query_engine.get_section_titles_by_engine()
  186 + section_titles_text = ""
  187 + for engine, titles in section_titles.items():
  188 + section_titles_text += f"\n{engine}: {', '.join(titles[:5])}"
  189 +
  190 + # 获取搜索词样例
  191 + sample_queries = query_engine.get_sample_search_queries(20)
  192 +
  193 + # 获取章节概览
  194 + chapters = context.get('chapters', [])
  195 + chapters_text = "\n".join([
  196 + f"- {c.get('id', '')}: {c.get('title', '')}"
  197 + for c in chapters[:10]
  198 + ])
  199 +
  200 + user_prompt = GRAPHRAG_QUERY_DECISION_USER.format(
  201 + chapter_title=section.get('title', ''),
  202 + chapter_id=section.get('id', ''),
  203 + chapter_role=section.get('role', ''),
  204 + target_words=section.get('target_words', 500),
  205 + chapter_emphasis=section.get('emphasis', ''),
  206 + report_topic=context.get('query', ''),
  207 + template_name=context.get('template_name', ''),
  208 + chapters_overview=chapters_text,
  209 + topic_name=summary.get('topic', ''),
  210 + engine_count=len(summary.get('engines', [])),
  211 + section_count=stats.get('section', 0),
  212 + query_count=stats.get('search_query', 0),
  213 + source_count=stats.get('source', 0),
  214 + section_titles_by_engine=section_titles_text,
  215 + sample_search_queries=', '.join(sample_queries),
  216 + query_history_detail=history.to_prompt()
  217 + )
  218 +
  219 + return {
  220 + 'system': GRAPHRAG_QUERY_DECISION_SYSTEM,
  221 + 'user': user_prompt
  222 + }
  223 +
  224 + def _get_query_decision(self, prompt: Dict[str, str]) -> Optional[Dict[str, Any]]:
  225 + """调用 LLM 获取查询决策"""
  226 + try:
  227 + response = self.llm_client.invoke(
  228 + system=prompt['system'],
  229 + user=prompt['user']
  230 + )
  231 +
  232 + # 解析 JSON 响应
  233 + return self._parse_json_response(response)
  234 + except Exception as e:
  235 + self.log_error(f"LLM 调用失败: {e}")
  236 + return None
  237 +
  238 + def _parse_json_response(self, response: str) -> Optional[Dict[str, Any]]:
  239 + """解析 LLM 返回的 JSON"""
  240 + try:
  241 + # 尝试直接解析
  242 + return json.loads(response)
  243 + except json.JSONDecodeError:
  244 + pass
  245 +
  246 + # 尝试提取 JSON 块
  247 + json_match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
  248 + if json_match:
  249 + try:
  250 + return json.loads(json_match.group(1))
  251 + except json.JSONDecodeError:
  252 + pass
  253 +
  254 + # 尝试提取花括号内容
  255 + brace_match = re.search(r'\{.*\}', response, re.DOTALL)
  256 + if brace_match:
  257 + try:
  258 + return json.loads(brace_match.group())
  259 + except json.JSONDecodeError:
  260 + pass
  261 +
  262 + self.log_error(f"无法解析 JSON 响应: {response[:200]}")
  263 + return None
  264 +
  265 + def _merge_results(self, results: List[QueryResult]) -> Dict[str, Any]:
  266 + """合并多轮查询结果"""
  267 + merged = {
  268 + 'matched_sections': [],
  269 + 'matched_queries': [],
  270 + 'matched_sources': [],
  271 + 'total_nodes': 0,
  272 + 'cross_engine_insights': []
  273 + }
  274 +
  275 + seen_section_ids = set()
  276 + seen_query_ids = set()
  277 + seen_source_ids = set()
  278 +
  279 + for result in results:
  280 + # 合并段落(去重)
  281 + for section in result.matched_sections:
  282 + sid = section.get('id')
  283 + if sid and sid not in seen_section_ids:
  284 + seen_section_ids.add(sid)
  285 + merged['matched_sections'].append(section)
  286 +
  287 + # 合并搜索词(去重)
  288 + for query in result.matched_queries:
  289 + qid = query.get('id')
  290 + if qid and qid not in seen_query_ids:
  291 + seen_query_ids.add(qid)
  292 + merged['matched_queries'].append(query)
  293 +
  294 + # 合并来源(去重)
  295 + for source in result.matched_sources:
  296 + sid = source.get('id')
  297 + if sid and sid not in seen_source_ids:
  298 + seen_source_ids.add(sid)
  299 + merged['matched_sources'].append(source)
  300 +
  301 + merged['total_nodes'] = (
  302 + len(merged['matched_sections']) +
  303 + len(merged['matched_queries']) +
  304 + len(merged['matched_sources'])
  305 + )
  306 +
  307 + # 生成跨引擎洞察
  308 + merged['cross_engine_insights'] = self._generate_cross_engine_insights(merged)
  309 +
  310 + return merged
  311 +
  312 + def _generate_cross_engine_insights(self, merged: Dict[str, Any]) -> List[str]:
  313 + """生成跨引擎关联洞察"""
  314 + insights = []
  315 +
  316 + # 统计各引擎的段落数
  317 + engine_sections = {}
  318 + for section in merged['matched_sections']:
  319 + engine = section.get('engine', 'unknown')
  320 + engine_sections[engine] = engine_sections.get(engine, 0) + 1
  321 +
  322 + if len(engine_sections) > 1:
  323 + engines = list(engine_sections.keys())
  324 + insights.append(f"跨引擎信息来源: {', '.join(engines)}")
  325 +
  326 + # 统计搜索词的引擎分布
  327 + engine_queries = {}
  328 + for query in merged['matched_queries']:
  329 + engine = query.get('engine', 'unknown')
  330 + if engine not in engine_queries:
  331 + engine_queries[engine] = []
  332 + engine_queries[engine].append(query.get('query_text', ''))
  333 +
  334 + if len(engine_queries) > 1:
  335 + insights.append(f"多引擎搜索视角: {len(engine_queries)} 个引擎提供了相关搜索")
  336 +
  337 + return insights
@@ -512,3 +512,129 @@ def build_document_layout_prompt(payload: dict) -> str: @@ -512,3 +512,129 @@ def build_document_layout_prompt(payload: dict) -> str:
512 def build_word_budget_prompt(payload: dict) -> str: 512 def build_word_budget_prompt(payload: dict) -> str:
513 """将篇幅规划输入转为字符串,便于送入LLM并保持字段精确。""" 513 """将篇幅规划输入转为字符串,便于送入LLM并保持字段精确。"""
514 return json.dumps(payload, ensure_ascii=False, indent=2) 514 return json.dumps(payload, ensure_ascii=False, indent=2)
  515 +
  516 +
  517 +# ==================== GraphRAG 增强提示词 ====================
  518 +
  519 +GRAPHRAG_CHAPTER_ENHANCEMENT_INTRO = """
  520 +<知识图谱查询结果>
  521 +以下是针对本章节从知识图谱中查询到的相关信息,这些信息来自对Insight/Media/Query三个分析引擎结构化数据的聚合:
  522 +
  523 +{graph_results}
  524 +
  525 +请在生成本章内容时:
  526 +1. 充分利用上述图谱查询结果中的具体数据点、关键发现和关联关系
  527 +2. 优先引用图谱中标注的来源(搜索关键词、数据来源等)
  528 +3. 当图谱结果与三引擎报告有重叠时,以图谱中的结构化数据为准
  529 +4. 注意图谱中节点之间的关联关系,体现因果或递进逻辑
  530 +5. 如果图谱结果中有明确的数值或时间点,务必准确引用
  531 +</知识图谱查询结果>
  532 +"""
  533 +
  534 +
  535 +def build_graphrag_enhanced_user_prompt(payload: dict) -> str:
  536 + """
  537 + 构造包含GraphRAG查询结果的章节用户提示词。
  538 +
  539 + 当GraphRAG启用且有查询结果时,在标准payload基础上
  540 + 注入图谱查询摘要,指导LLM在章节生成时优先利用这些信息。
  541 +
  542 + Args:
  543 + payload: 包含标准章节上下文和可选 graph_enhancement_prompt 的字典
  544 +
  545 + Returns:
  546 + 序列化后的用户提示词字符串
  547 + """
  548 + # 提取图谱增强内容(如果有)
  549 + graph_prompt = payload.pop('graph_enhancement_prompt', None)
  550 +
  551 + base_prompt = json.dumps(payload, ensure_ascii=False, indent=2)
  552 +
  553 + if graph_prompt:
  554 + return f"{base_prompt}\n\n{graph_prompt}"
  555 +
  556 + return base_prompt
  557 +
  558 +
  559 +def format_graph_nodes_for_prompt(nodes: list) -> str:
  560 + """
  561 + 将图谱节点列表格式化为提示词友好的文本。
  562 +
  563 + Args:
  564 + nodes: 节点数据列表,每个节点包含 id, type, label, properties
  565 +
  566 + Returns:
  567 + 格式化的节点描述文本
  568 + """
  569 + if not nodes:
  570 + return "(无相关节点)"
  571 +
  572 + lines = []
  573 + # 按类型分组
  574 + by_type = {}
  575 + for node in nodes:
  576 + node_type = node.get('type', 'unknown')
  577 + if node_type not in by_type:
  578 + by_type[node_type] = []
  579 + by_type[node_type].append(node)
  580 +
  581 + type_labels = {
  582 + 'topic': '主题',
  583 + 'engine': '分析引擎',
  584 + 'section': '报告段落',
  585 + 'search_query': '搜索关键词',
  586 + 'source': '数据来源'
  587 + }
  588 +
  589 + for node_type, type_nodes in by_type.items():
  590 + type_label = type_labels.get(node_type, node_type)
  591 + lines.append(f"\n【{type_label}】")
  592 + for n in type_nodes[:10]: # 每类最多10个
  593 + label = n.get('label', n.get('id', ''))
  594 + props = n.get('properties', {})
  595 + prop_str = ''
  596 + if props:
  597 + key_props = {k: v for k, v in props.items() if k in ['summary', 'content', 'headline', 'url', 'query', 'source']}
  598 + if key_props:
  599 + prop_str = ' | ' + ', '.join(f"{k}:{str(v)[:100]}" for k, v in key_props.items())
  600 + lines.append(f" • {label}{prop_str}")
  601 +
  602 + return '\n'.join(lines)
  603 +
  604 +
  605 +def format_graph_edges_for_prompt(edges: list) -> str:
  606 + """
  607 + 将图谱边列表格式化为提示词友好的文本。
  608 +
  609 + Args:
  610 + edges: 边数据列表,每条边包含 source, target, relation
  611 +
  612 + Returns:
  613 + 格式化的关系描述文本
  614 + """
  615 + if not edges:
  616 + return "(无关联关系)"
  617 +
  618 + relation_labels = {
  619 + 'analyzed_by': '被分析于',
  620 + 'contains': '包含',
  621 + 'searched': '搜索了',
  622 + 'found': '发现于'
  623 + }
  624 +
  625 + lines = []
  626 + seen = set()
  627 + for edge in edges[:20]: # 最多20条关系
  628 + source = edge.get('source', '')
  629 + target = edge.get('target', '')
  630 + relation = edge.get('relation', 'related')
  631 +
  632 + key = f"{source}-{relation}-{target}"
  633 + if key in seen:
  634 + continue
  635 + seen.add(key)
  636 +
  637 + rel_label = relation_labels.get(relation, relation)
  638 + lines.append(f" • {source} —[{rel_label}]→ {target}")
  639 +
  640 + return '\n'.join(lines) if lines else "(无关联关系)"
@@ -67,6 +67,14 @@ class Settings(BaseSettings): @@ -67,6 +67,14 @@ class Settings(BaseSettings):
67 "logs/json_repair_failures", description="无法修复的JSON块落盘目录" 67 "logs/json_repair_failures", description="无法修复的JSON块落盘目录"
68 ) 68 )
69 69
  70 + # GraphRAG 配置
  71 + GRAPHRAG_ENABLED: bool = Field(
  72 + default=False, description="是否启用GraphRAG知识图谱功能"
  73 + )
  74 + GRAPHRAG_MAX_QUERIES: int = Field(
  75 + default=3, description="GraphRAG每章节查询次数上限"
  76 + )
  77 +
70 class Config: 78 class Config:
71 """Pydantic配置:允许从.env读取并兼容大小写""" 79 """Pydantic配置:允许从.env读取并兼容大小写"""
72 env_file = ".env" 80 env_file = ".env"
@@ -113,7 +113,9 @@ CONFIG_KEYS = [ @@ -113,7 +113,9 @@ CONFIG_KEYS = [
113 'TAVILY_API_KEY', 113 'TAVILY_API_KEY',
114 'SEARCH_TOOL_TYPE', 114 'SEARCH_TOOL_TYPE',
115 'BOCHA_WEB_SEARCH_API_KEY', 115 'BOCHA_WEB_SEARCH_API_KEY',
116 - 'ANSPIRE_API_KEY' 116 + 'ANSPIRE_API_KEY',
  117 + 'GRAPHRAG_ENABLED',
  118 + 'GRAPHRAG_MAX_QUERIES'
117 ] 119 ]
118 120
119 121
@@ -1295,6 +1297,247 @@ def shutdown_system(): @@ -1295,6 +1297,247 @@ def shutdown_system():
1295 logger.exception("系统关闭过程中出现异常") 1297 logger.exception("系统关闭过程中出现异常")
1296 return jsonify({'success': False, 'message': f'系统关闭异常: {exc}'}), 500 1298 return jsonify({'success': False, 'message': f'系统关闭异常: {exc}'}), 500
1297 1299
  1300 +# ==================== GraphRAG API 端点 ====================
  1301 +
  1302 +@app.route('/api/graph/<report_id>')
  1303 +def get_graph_data(report_id):
  1304 + """
  1305 + 获取指定报告的知识图谱数据。
  1306 +
  1307 + 返回格式适合前端 Vis.js 渲染:
  1308 + - nodes: [{id, label, group, title, properties}]
  1309 + - edges: [{from, to, label}]
  1310 + """
  1311 + try:
  1312 + from ReportEngine.graphrag import GraphStorage, Graph
  1313 +
  1314 + # 从默认存储位置查找图谱文件
  1315 + storage = GraphStorage()
  1316 + graph_path = storage.find_graph_by_report_id(report_id)
  1317 +
  1318 + if not graph_path or not graph_path.exists():
  1319 + return jsonify({
  1320 + 'success': False,
  1321 + 'message': f'未找到报告 {report_id} 的知识图谱数据'
  1322 + }), 404
  1323 +
  1324 + graph = storage.load(graph_path)
  1325 +
  1326 + # 转换为 Vis.js 格式
  1327 + vis_nodes = []
  1328 + vis_edges = []
  1329 +
  1330 + for node_id, node in graph.nodes.items():
  1331 + vis_nodes.append({
  1332 + 'id': node_id,
  1333 + 'label': node.label or node_id,
  1334 + 'group': node.type,
  1335 + 'title': _format_node_tooltip(node),
  1336 + 'properties': node.properties
  1337 + })
  1338 +
  1339 + for edge in graph.edges:
  1340 + vis_edges.append({
  1341 + 'from': edge.source,
  1342 + 'to': edge.target,
  1343 + 'label': edge.relation,
  1344 + 'arrows': 'to'
  1345 + })
  1346 +
  1347 + return jsonify({
  1348 + 'success': True,
  1349 + 'graph': {
  1350 + 'nodes': vis_nodes,
  1351 + 'edges': vis_edges,
  1352 + 'stats': graph.get_stats()
  1353 + }
  1354 + })
  1355 +
  1356 + except Exception as e:
  1357 + logger.exception(f"获取图谱数据失败: {e}")
  1358 + return jsonify({
  1359 + 'success': False,
  1360 + 'message': f'获取图谱数据失败: {str(e)}'
  1361 + }), 500
  1362 +
  1363 +
  1364 +@app.route('/api/graph/latest')
  1365 +def get_latest_graph():
  1366 + """获取最近一次生成的知识图谱数据。"""
  1367 + try:
  1368 + from ReportEngine.graphrag import GraphStorage
  1369 +
  1370 + storage = GraphStorage()
  1371 + latest_path = storage.find_latest_graph()
  1372 +
  1373 + if not latest_path or not latest_path.exists():
  1374 + return jsonify({
  1375 + 'success': False,
  1376 + 'message': '暂无可用的知识图谱数据'
  1377 + }), 404
  1378 +
  1379 + graph = storage.load(latest_path)
  1380 + report_id = latest_path.parent.name if latest_path.parent else 'unknown'
  1381 +
  1382 + # 转换为 Vis.js 格式
  1383 + vis_nodes = []
  1384 + vis_edges = []
  1385 +
  1386 + for node_id, node in graph.nodes.items():
  1387 + vis_nodes.append({
  1388 + 'id': node_id,
  1389 + 'label': node.label or node_id,
  1390 + 'group': node.type,
  1391 + 'title': _format_node_tooltip(node),
  1392 + 'properties': node.properties
  1393 + })
  1394 +
  1395 + for edge in graph.edges:
  1396 + vis_edges.append({
  1397 + 'from': edge.source,
  1398 + 'to': edge.target,
  1399 + 'label': edge.relation,
  1400 + 'arrows': 'to'
  1401 + })
  1402 +
  1403 + return jsonify({
  1404 + 'success': True,
  1405 + 'report_id': report_id,
  1406 + 'graph': {
  1407 + 'nodes': vis_nodes,
  1408 + 'edges': vis_edges,
  1409 + 'stats': graph.get_stats()
  1410 + }
  1411 + })
  1412 +
  1413 + except Exception as e:
  1414 + logger.exception(f"获取最新图谱失败: {e}")
  1415 + return jsonify({
  1416 + 'success': False,
  1417 + 'message': f'获取最新图谱失败: {str(e)}'
  1418 + }), 500
  1419 +
  1420 +
  1421 +@app.route('/graph-viewer')
  1422 +@app.route('/graph-viewer/')
  1423 +@app.route('/graph-viewer/<report_id>')
  1424 +def graph_viewer(report_id=None):
  1425 + """
  1426 + 知识图谱可视化页面。
  1427 +
  1428 + 提供交互式图谱展示,支持:
  1429 + - 全屏模式
  1430 + - 缩放、拖拽
  1431 + - 节点详情查看
  1432 + - 筛选和搜索
  1433 + """
  1434 + return render_template('graph_viewer.html', report_id=report_id)
  1435 +
  1436 +
  1437 +@app.route('/api/graph/query', methods=['POST'])
  1438 +def query_graph():
  1439 + """
  1440 + 查询知识图谱。
  1441 +
  1442 + 请求体:
  1443 + {
  1444 + "report_id": "xxx", // 可选,默认使用最新图谱
  1445 + "keywords": ["关键词1", "关键词2"],
  1446 + "node_types": ["section", "source"],
  1447 + "depth": 2
  1448 + }
  1449 + """
  1450 + try:
  1451 + from ReportEngine.graphrag import GraphStorage, QueryEngine, QueryParams
  1452 +
  1453 + data = request.get_json() or {}
  1454 + report_id = data.get('report_id')
  1455 +
  1456 + storage = GraphStorage()
  1457 +
  1458 + if report_id:
  1459 + graph_path = storage.find_graph_by_report_id(report_id)
  1460 + else:
  1461 + graph_path = storage.find_latest_graph()
  1462 +
  1463 + if not graph_path or not graph_path.exists():
  1464 + return jsonify({
  1465 + 'success': False,
  1466 + 'message': '未找到可用的知识图谱'
  1467 + }), 404
  1468 +
  1469 + graph = storage.load(graph_path)
  1470 + query_engine = QueryEngine(graph)
  1471 +
  1472 + params = QueryParams(
  1473 + keywords=data.get('keywords', []),
  1474 + node_types=data.get('node_types'),
  1475 + engine_filter=data.get('engine_filter'),
  1476 + depth=data.get('depth', 1)
  1477 + )
  1478 +
  1479 + result = query_engine.query(params)
  1480 +
  1481 + return jsonify({
  1482 + 'success': True,
  1483 + 'result': {
  1484 + 'matched_nodes': [
  1485 + {
  1486 + 'id': n.id,
  1487 + 'type': n.type,
  1488 + 'label': n.label,
  1489 + 'properties': n.properties
  1490 + }
  1491 + for n in result.matched_nodes
  1492 + ],
  1493 + 'related_edges': [
  1494 + {
  1495 + 'source': e.source,
  1496 + 'target': e.target,
  1497 + 'relation': e.relation
  1498 + }
  1499 + for e in result.related_edges
  1500 + ],
  1501 + 'expanded_nodes': [
  1502 + {
  1503 + 'id': n.id,
  1504 + 'type': n.type,
  1505 + 'label': n.label,
  1506 + 'properties': n.properties
  1507 + }
  1508 + for n in result.expanded_nodes
  1509 + ]
  1510 + }
  1511 + })
  1512 +
  1513 + except Exception as e:
  1514 + logger.exception(f"图谱查询失败: {e}")
  1515 + return jsonify({
  1516 + 'success': False,
  1517 + 'message': f'图谱查询失败: {str(e)}'
  1518 + }), 500
  1519 +
  1520 +
  1521 +def _format_node_tooltip(node) -> str:
  1522 + """格式化节点悬停提示文本。"""
  1523 + lines = [f"<b>{node.label or node.id}</b>"]
  1524 + lines.append(f"类型: {node.type}")
  1525 +
  1526 + props = node.properties or {}
  1527 + if 'summary' in props:
  1528 + lines.append(f"摘要: {props['summary'][:100]}...")
  1529 + if 'content' in props:
  1530 + lines.append(f"内容: {props['content'][:80]}...")
  1531 + if 'url' in props:
  1532 + lines.append(f"链接: {props['url']}")
  1533 + if 'query' in props:
  1534 + lines.append(f"查询: {props['query']}")
  1535 +
  1536 + return "<br>".join(lines)
  1537 +
  1538 +
  1539 +# ==================== GraphRAG API 端点结束 ====================
  1540 +
1298 @socketio.on('connect') 1541 @socketio.on('connect')
1299 def handle_connect(): 1542 def handle_connect():
1300 """客户端连接""" 1543 """客户端连接"""
  1 +<!DOCTYPE html>
  2 +<html lang="zh-CN">
  3 +<head>
  4 + <meta charset="UTF-8">
  5 + <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 + <title>知识图谱可视化 - BettaFish</title>
  7 + <!-- Vis.js -->
  8 + <script src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
  9 + <style>
  10 + :root {
  11 + --primary-color: #4F46E5;
  12 + --primary-light: #818CF8;
  13 + --bg-color: #0F172A;
  14 + --card-bg: #1E293B;
  15 + --text-color: #F1F5F9;
  16 + --text-muted: #94A3B8;
  17 + --border-color: #334155;
  18 + --success-color: #10B981;
  19 + --warning-color: #F59E0B;
  20 + --error-color: #EF4444;
  21 + }
  22 +
  23 + * {
  24 + margin: 0;
  25 + padding: 0;
  26 + box-sizing: border-box;
  27 + }
  28 +
  29 + body {
  30 + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
  31 + background-color: var(--bg-color);
  32 + color: var(--text-color);
  33 + min-height: 100vh;
  34 + }
  35 +
  36 + /* 顶部工具栏 */
  37 + .toolbar {
  38 + position: fixed;
  39 + top: 0;
  40 + left: 0;
  41 + right: 0;
  42 + height: 60px;
  43 + background: var(--card-bg);
  44 + border-bottom: 1px solid var(--border-color);
  45 + display: flex;
  46 + align-items: center;
  47 + padding: 0 20px;
  48 + gap: 16px;
  49 + z-index: 1000;
  50 + }
  51 +
  52 + .toolbar h1 {
  53 + font-size: 1.25rem;
  54 + font-weight: 600;
  55 + display: flex;
  56 + align-items: center;
  57 + gap: 8px;
  58 + }
  59 +
  60 + .toolbar h1 svg {
  61 + width: 24px;
  62 + height: 24px;
  63 + color: var(--primary-color);
  64 + }
  65 +
  66 + .toolbar-divider {
  67 + width: 1px;
  68 + height: 30px;
  69 + background: var(--border-color);
  70 + }
  71 +
  72 + .btn {
  73 + display: flex;
  74 + align-items: center;
  75 + gap: 6px;
  76 + padding: 8px 16px;
  77 + border: 1px solid var(--border-color);
  78 + border-radius: 6px;
  79 + background: transparent;
  80 + color: var(--text-color);
  81 + cursor: pointer;
  82 + font-size: 0.875rem;
  83 + transition: all 0.2s;
  84 + }
  85 +
  86 + .btn:hover {
  87 + background: var(--primary-color);
  88 + border-color: var(--primary-color);
  89 + }
  90 +
  91 + .btn-primary {
  92 + background: var(--primary-color);
  93 + border-color: var(--primary-color);
  94 + }
  95 +
  96 + .btn svg {
  97 + width: 16px;
  98 + height: 16px;
  99 + }
  100 +
  101 + .search-box {
  102 + flex: 1;
  103 + max-width: 400px;
  104 + position: relative;
  105 + }
  106 +
  107 + .search-box input {
  108 + width: 100%;
  109 + padding: 8px 16px 8px 40px;
  110 + border: 1px solid var(--border-color);
  111 + border-radius: 6px;
  112 + background: var(--bg-color);
  113 + color: var(--text-color);
  114 + font-size: 0.875rem;
  115 + }
  116 +
  117 + .search-box input:focus {
  118 + outline: none;
  119 + border-color: var(--primary-color);
  120 + }
  121 +
  122 + .search-box svg {
  123 + position: absolute;
  124 + left: 12px;
  125 + top: 50%;
  126 + transform: translateY(-50%);
  127 + width: 16px;
  128 + height: 16px;
  129 + color: var(--text-muted);
  130 + }
  131 +
  132 + /* 统计信息 */
  133 + .stats {
  134 + display: flex;
  135 + gap: 16px;
  136 + margin-left: auto;
  137 + }
  138 +
  139 + .stat-item {
  140 + display: flex;
  141 + align-items: center;
  142 + gap: 6px;
  143 + font-size: 0.875rem;
  144 + }
  145 +
  146 + .stat-item .label {
  147 + color: var(--text-muted);
  148 + }
  149 +
  150 + .stat-item .value {
  151 + font-weight: 600;
  152 + color: var(--primary-light);
  153 + }
  154 +
  155 + /* 左侧面板 */
  156 + .sidebar {
  157 + position: fixed;
  158 + top: 60px;
  159 + left: 0;
  160 + width: 300px;
  161 + bottom: 0;
  162 + background: var(--card-bg);
  163 + border-right: 1px solid var(--border-color);
  164 + overflow-y: auto;
  165 + padding: 16px;
  166 + transition: transform 0.3s;
  167 + z-index: 100;
  168 + }
  169 +
  170 + .sidebar.collapsed {
  171 + transform: translateX(-100%);
  172 + }
  173 +
  174 + .sidebar h3 {
  175 + font-size: 0.875rem;
  176 + font-weight: 600;
  177 + color: var(--text-muted);
  178 + text-transform: uppercase;
  179 + letter-spacing: 0.05em;
  180 + margin-bottom: 12px;
  181 + }
  182 +
  183 + .filter-group {
  184 + margin-bottom: 20px;
  185 + }
  186 +
  187 + .filter-item {
  188 + display: flex;
  189 + align-items: center;
  190 + gap: 10px;
  191 + padding: 8px 0;
  192 + cursor: pointer;
  193 + }
  194 +
  195 + .filter-item input[type="checkbox"] {
  196 + width: 16px;
  197 + height: 16px;
  198 + accent-color: var(--primary-color);
  199 + }
  200 +
  201 + .filter-item .color-dot {
  202 + width: 12px;
  203 + height: 12px;
  204 + border-radius: 50%;
  205 + }
  206 +
  207 + .filter-item .count {
  208 + margin-left: auto;
  209 + font-size: 0.75rem;
  210 + color: var(--text-muted);
  211 + }
  212 +
  213 + /* 节点详情 */
  214 + .node-detail {
  215 + margin-top: 20px;
  216 + padding-top: 20px;
  217 + border-top: 1px solid var(--border-color);
  218 + }
  219 +
  220 + .node-detail .detail-title {
  221 + font-weight: 600;
  222 + margin-bottom: 8px;
  223 + color: var(--primary-light);
  224 + }
  225 +
  226 + .node-detail .detail-type {
  227 + font-size: 0.75rem;
  228 + color: var(--text-muted);
  229 + margin-bottom: 12px;
  230 + }
  231 +
  232 + .node-detail .detail-props {
  233 + font-size: 0.875rem;
  234 + }
  235 +
  236 + .node-detail .prop-item {
  237 + padding: 6px 0;
  238 + border-bottom: 1px solid var(--border-color);
  239 + }
  240 +
  241 + .node-detail .prop-key {
  242 + color: var(--text-muted);
  243 + font-size: 0.75rem;
  244 + }
  245 +
  246 + .node-detail .prop-value {
  247 + margin-top: 2px;
  248 + word-break: break-all;
  249 + }
  250 +
  251 + /* 图谱容器 */
  252 + .graph-container {
  253 + position: fixed;
  254 + top: 60px;
  255 + left: 300px;
  256 + right: 0;
  257 + bottom: 0;
  258 + transition: left 0.3s;
  259 + }
  260 +
  261 + .graph-container.fullwidth {
  262 + left: 0;
  263 + }
  264 +
  265 + #network {
  266 + width: 100%;
  267 + height: 100%;
  268 + background: var(--bg-color);
  269 + }
  270 +
  271 + /* 加载状态 */
  272 + .loading-overlay {
  273 + position: absolute;
  274 + top: 0;
  275 + left: 0;
  276 + right: 0;
  277 + bottom: 0;
  278 + display: flex;
  279 + flex-direction: column;
  280 + align-items: center;
  281 + justify-content: center;
  282 + background: var(--bg-color);
  283 + z-index: 500;
  284 + }
  285 +
  286 + .loading-spinner {
  287 + width: 48px;
  288 + height: 48px;
  289 + border: 4px solid var(--border-color);
  290 + border-top-color: var(--primary-color);
  291 + border-radius: 50%;
  292 + animation: spin 1s linear infinite;
  293 + }
  294 +
  295 + @keyframes spin {
  296 + to { transform: rotate(360deg); }
  297 + }
  298 +
  299 + .loading-text {
  300 + margin-top: 16px;
  301 + color: var(--text-muted);
  302 + }
  303 +
  304 + /* 空状态 */
  305 + .empty-state {
  306 + position: absolute;
  307 + top: 50%;
  308 + left: 50%;
  309 + transform: translate(-50%, -50%);
  310 + text-align: center;
  311 + color: var(--text-muted);
  312 + }
  313 +
  314 + .empty-state svg {
  315 + width: 64px;
  316 + height: 64px;
  317 + margin-bottom: 16px;
  318 + opacity: 0.5;
  319 + }
  320 +
  321 + /* 提示信息 */
  322 + .toast {
  323 + position: fixed;
  324 + bottom: 20px;
  325 + right: 20px;
  326 + padding: 12px 20px;
  327 + background: var(--card-bg);
  328 + border: 1px solid var(--border-color);
  329 + border-radius: 8px;
  330 + display: none;
  331 + animation: slideIn 0.3s;
  332 + z-index: 2000;
  333 + }
  334 +
  335 + @keyframes slideIn {
  336 + from {
  337 + transform: translateX(100%);
  338 + opacity: 0;
  339 + }
  340 + }
  341 +
  342 + /* 图例 */
  343 + .legend {
  344 + position: fixed;
  345 + bottom: 20px;
  346 + left: 320px;
  347 + background: var(--card-bg);
  348 + border: 1px solid var(--border-color);
  349 + border-radius: 8px;
  350 + padding: 12px 16px;
  351 + display: flex;
  352 + gap: 16px;
  353 + z-index: 100;
  354 + transition: left 0.3s;
  355 + }
  356 +
  357 + .legend.fullwidth {
  358 + left: 20px;
  359 + }
  360 +
  361 + .legend-item {
  362 + display: flex;
  363 + align-items: center;
  364 + gap: 6px;
  365 + font-size: 0.75rem;
  366 + }
  367 +
  368 + .legend-item .dot {
  369 + width: 10px;
  370 + height: 10px;
  371 + border-radius: 50%;
  372 + }
  373 +
  374 + /* 全屏模式 */
  375 + .fullscreen-btn {
  376 + position: fixed;
  377 + bottom: 20px;
  378 + right: 20px;
  379 + z-index: 100;
  380 + }
  381 +
  382 + /* 节点类型颜色 */
  383 + .color-topic { background-color: #EF4444; }
  384 + .color-engine { background-color: #F59E0B; }
  385 + .color-section { background-color: #10B981; }
  386 + .color-search_query { background-color: #3B82F6; }
  387 + .color-source { background-color: #8B5CF6; }
  388 + </style>
  389 +</head>
  390 +<body>
  391 + <!-- 顶部工具栏 -->
  392 + <div class="toolbar">
  393 + <h1>
  394 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  395 + <circle cx="12" cy="5" r="3"/>
  396 + <circle cx="5" cy="19" r="3"/>
  397 + <circle cx="19" cy="19" r="3"/>
  398 + <line x1="12" y1="8" x2="5" y2="16"/>
  399 + <line x1="12" y1="8" x2="19" y2="16"/>
  400 + </svg>
  401 + 知识图谱
  402 + </h1>
  403 +
  404 + <div class="toolbar-divider"></div>
  405 +
  406 + <button class="btn" id="toggleSidebar" title="切换侧边栏">
  407 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  408 + <rect x="3" y="3" width="18" height="18" rx="2"/>
  409 + <line x1="9" y1="3" x2="9" y2="21"/>
  410 + </svg>
  411 + </button>
  412 +
  413 + <button class="btn" id="fitBtn" title="适应视图">
  414 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  415 + <path d="M15 3h6v6M9 21H3v-6M21 3l-7 7M3 21l7-7"/>
  416 + </svg>
  417 + 适应
  418 + </button>
  419 +
  420 + <button class="btn" id="zoomInBtn" title="放大">
  421 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  422 + <circle cx="11" cy="11" r="8"/>
  423 + <line x1="21" y1="21" x2="16.65" y2="16.65"/>
  424 + <line x1="11" y1="8" x2="11" y2="14"/>
  425 + <line x1="8" y1="11" x2="14" y2="11"/>
  426 + </svg>
  427 + </button>
  428 +
  429 + <button class="btn" id="zoomOutBtn" title="缩小">
  430 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  431 + <circle cx="11" cy="11" r="8"/>
  432 + <line x1="21" y1="21" x2="16.65" y2="16.65"/>
  433 + <line x1="8" y1="11" x2="14" y2="11"/>
  434 + </svg>
  435 + </button>
  436 +
  437 + <div class="search-box">
  438 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  439 + <circle cx="11" cy="11" r="8"/>
  440 + <line x1="21" y1="21" x2="16.65" y2="16.65"/>
  441 + </svg>
  442 + <input type="text" id="searchInput" placeholder="搜索节点...">
  443 + </div>
  444 +
  445 + <div class="stats" id="statsContainer">
  446 + <div class="stat-item">
  447 + <span class="label">节点</span>
  448 + <span class="value" id="nodeCount">0</span>
  449 + </div>
  450 + <div class="stat-item">
  451 + <span class="label">关系</span>
  452 + <span class="value" id="edgeCount">0</span>
  453 + </div>
  454 + </div>
  455 + </div>
  456 +
  457 + <!-- 左侧面板 -->
  458 + <div class="sidebar" id="sidebar">
  459 + <div class="filter-group">
  460 + <h3>节点类型</h3>
  461 + <label class="filter-item">
  462 + <input type="checkbox" checked data-type="topic">
  463 + <span class="color-dot color-topic"></span>
  464 + <span>主题</span>
  465 + <span class="count" id="count-topic">0</span>
  466 + </label>
  467 + <label class="filter-item">
  468 + <input type="checkbox" checked data-type="engine">
  469 + <span class="color-dot color-engine"></span>
  470 + <span>分析引擎</span>
  471 + <span class="count" id="count-engine">0</span>
  472 + </label>
  473 + <label class="filter-item">
  474 + <input type="checkbox" checked data-type="section">
  475 + <span class="color-dot color-section"></span>
  476 + <span>报告段落</span>
  477 + <span class="count" id="count-section">0</span>
  478 + </label>
  479 + <label class="filter-item">
  480 + <input type="checkbox" checked data-type="search_query">
  481 + <span class="color-dot color-search_query"></span>
  482 + <span>搜索关键词</span>
  483 + <span class="count" id="count-search_query">0</span>
  484 + </label>
  485 + <label class="filter-item">
  486 + <input type="checkbox" checked data-type="source">
  487 + <span class="color-dot color-source"></span>
  488 + <span>数据来源</span>
  489 + <span class="count" id="count-source">0</span>
  490 + </label>
  491 + </div>
  492 +
  493 + <div class="node-detail" id="nodeDetail" style="display: none;">
  494 + <h3>节点详情</h3>
  495 + <div class="detail-title" id="detailTitle"></div>
  496 + <div class="detail-type" id="detailType"></div>
  497 + <div class="detail-props" id="detailProps"></div>
  498 + </div>
  499 + </div>
  500 +
  501 + <!-- 图谱容器 -->
  502 + <div class="graph-container" id="graphContainer">
  503 + <div id="network"></div>
  504 +
  505 + <!-- 加载状态 -->
  506 + <div class="loading-overlay" id="loadingOverlay">
  507 + <div class="loading-spinner"></div>
  508 + <div class="loading-text">正在加载知识图谱...</div>
  509 + </div>
  510 +
  511 + <!-- 空状态 -->
  512 + <div class="empty-state" id="emptyState" style="display: none;">
  513 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1">
  514 + <circle cx="12" cy="12" r="10"/>
  515 + <path d="M8 15h8"/>
  516 + <path d="M9 9h.01"/>
  517 + <path d="M15 9h.01"/>
  518 + </svg>
  519 + <h3>暂无图谱数据</h3>
  520 + <p>请先生成报告以创建知识图谱</p>
  521 + </div>
  522 + </div>
  523 +
  524 + <!-- 图例 -->
  525 + <div class="legend" id="legend">
  526 + <div class="legend-item">
  527 + <span class="dot color-topic"></span>
  528 + <span>主题</span>
  529 + </div>
  530 + <div class="legend-item">
  531 + <span class="dot color-engine"></span>
  532 + <span>引擎</span>
  533 + </div>
  534 + <div class="legend-item">
  535 + <span class="dot color-section"></span>
  536 + <span>段落</span>
  537 + </div>
  538 + <div class="legend-item">
  539 + <span class="dot color-search_query"></span>
  540 + <span>搜索词</span>
  541 + </div>
  542 + <div class="legend-item">
  543 + <span class="dot color-source"></span>
  544 + <span>来源</span>
  545 + </div>
  546 + </div>
  547 +
  548 + <!-- 全屏按钮 -->
  549 + <button class="btn fullscreen-btn" id="fullscreenBtn" title="全屏">
  550 + <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
  551 + <path d="M8 3H5a2 2 0 00-2 2v3m18 0V5a2 2 0 00-2-2h-3m0 18h3a2 2 0 002-2v-3M3 16v3a2 2 0 002 2h3"/>
  552 + </svg>
  553 + </button>
  554 +
  555 + <!-- 提示 -->
  556 + <div class="toast" id="toast"></div>
  557 +
  558 + <script>
  559 + // 配置
  560 + const NODE_COLORS = {
  561 + topic: '#EF4444',
  562 + engine: '#F59E0B',
  563 + section: '#10B981',
  564 + search_query: '#3B82F6',
  565 + source: '#8B5CF6'
  566 + };
  567 +
  568 + const NODE_SHAPES = {
  569 + topic: 'star',
  570 + engine: 'diamond',
  571 + section: 'dot',
  572 + search_query: 'triangle',
  573 + source: 'square'
  574 + };
  575 +
  576 + // 全局变量
  577 + let network = null;
  578 + let allNodes = [];
  579 + let allEdges = [];
  580 + let reportId = {{ report_id | tojson if report_id else 'null' }};
  581 +
  582 + // 初始化
  583 + document.addEventListener('DOMContentLoaded', () => {
  584 + loadGraphData();
  585 + setupEventListeners();
  586 + });
  587 +
  588 + // 加载图谱数据
  589 + async function loadGraphData() {
  590 + showLoading(true);
  591 +
  592 + try {
  593 + const url = reportId
  594 + ? `/api/graph/${reportId}`
  595 + : '/api/graph/latest';
  596 +
  597 + const response = await fetch(url);
  598 + const data = await response.json();
  599 +
  600 + if (data.success && data.graph) {
  601 + allNodes = data.graph.nodes;
  602 + allEdges = data.graph.edges;
  603 +
  604 + updateStats(data.graph.stats);
  605 + renderGraph();
  606 + showLoading(false);
  607 + } else {
  608 + showEmpty(true);
  609 + showLoading(false);
  610 + }
  611 + } catch (error) {
  612 + console.error('加载图谱失败:', error);
  613 + showToast('加载图谱失败: ' + error.message);
  614 + showEmpty(true);
  615 + showLoading(false);
  616 + }
  617 + }
  618 +
  619 + // 渲染图谱
  620 + function renderGraph() {
  621 + const container = document.getElementById('network');
  622 +
  623 + // 处理节点
  624 + const visibleTypes = getVisibleTypes();
  625 + const filteredNodes = allNodes.filter(n => visibleTypes.includes(n.group));
  626 + const filteredNodeIds = new Set(filteredNodes.map(n => n.id));
  627 +
  628 + const nodes = new vis.DataSet(filteredNodes.map(node => ({
  629 + id: node.id,
  630 + label: truncateLabel(node.label, 20),
  631 + title: node.title,
  632 + group: node.group,
  633 + color: {
  634 + background: NODE_COLORS[node.group] || '#6B7280',
  635 + border: NODE_COLORS[node.group] || '#6B7280',
  636 + highlight: {
  637 + background: lightenColor(NODE_COLORS[node.group] || '#6B7280'),
  638 + border: NODE_COLORS[node.group] || '#6B7280'
  639 + }
  640 + },
  641 + shape: NODE_SHAPES[node.group] || 'dot',
  642 + size: node.group === 'topic' ? 30 : (node.group === 'engine' ? 25 : 15),
  643 + font: {
  644 + color: '#F1F5F9',
  645 + size: 12
  646 + },
  647 + // 保存原始数据
  648 + _data: node
  649 + })));
  650 +
  651 + // 处理边
  652 + const edges = new vis.DataSet(allEdges
  653 + .filter(e => filteredNodeIds.has(e.from) && filteredNodeIds.has(e.to))
  654 + .map(edge => ({
  655 + from: edge.from,
  656 + to: edge.to,
  657 + label: edge.label,
  658 + arrows: edge.arrows || 'to',
  659 + color: {
  660 + color: '#475569',
  661 + highlight: '#818CF8'
  662 + },
  663 + font: {
  664 + color: '#94A3B8',
  665 + size: 10,
  666 + strokeWidth: 0
  667 + },
  668 + smooth: {
  669 + type: 'continuous'
  670 + }
  671 + }))
  672 + );
  673 +
  674 + // 图谱配置
  675 + const options = {
  676 + nodes: {
  677 + borderWidth: 2,
  678 + shadow: true
  679 + },
  680 + edges: {
  681 + width: 1,
  682 + shadow: true
  683 + },
  684 + physics: {
  685 + enabled: true,
  686 + solver: 'forceAtlas2Based',
  687 + forceAtlas2Based: {
  688 + gravitationalConstant: -100,
  689 + centralGravity: 0.01,
  690 + springLength: 150,
  691 + springConstant: 0.08,
  692 + damping: 0.5
  693 + },
  694 + stabilization: {
  695 + enabled: true,
  696 + iterations: 200
  697 + }
  698 + },
  699 + interaction: {
  700 + hover: true,
  701 + tooltipDelay: 100,
  702 + zoomView: true,
  703 + dragView: true
  704 + }
  705 + };
  706 +
  707 + // 创建网络
  708 + network = new vis.Network(container, { nodes, edges }, options);
  709 +
  710 + // 节点点击事件
  711 + network.on('click', (params) => {
  712 + if (params.nodes.length > 0) {
  713 + const nodeId = params.nodes[0];
  714 + const node = allNodes.find(n => n.id === nodeId);
  715 + if (node) {
  716 + showNodeDetail(node);
  717 + }
  718 + } else {
  719 + hideNodeDetail();
  720 + }
  721 + });
  722 +
  723 + // 稳定后适应视图
  724 + network.once('stabilizationIterationsDone', () => {
  725 + network.fit({ animation: true });
  726 + });
  727 + }
  728 +
  729 + // 显示节点详情
  730 + function showNodeDetail(node) {
  731 + const detailPanel = document.getElementById('nodeDetail');
  732 + const titleEl = document.getElementById('detailTitle');
  733 + const typeEl = document.getElementById('detailType');
  734 + const propsEl = document.getElementById('detailProps');
  735 +
  736 + titleEl.textContent = node.label;
  737 +
  738 + const typeLabels = {
  739 + topic: '主题',
  740 + engine: '分析引擎',
  741 + section: '报告段落',
  742 + search_query: '搜索关键词',
  743 + source: '数据来源'
  744 + };
  745 + typeEl.textContent = typeLabels[node.group] || node.group;
  746 +
  747 + // 显示属性
  748 + let propsHtml = '';
  749 + const props = node.properties || {};
  750 + for (const [key, value] of Object.entries(props)) {
  751 + if (value) {
  752 + propsHtml += `
  753 + <div class="prop-item">
  754 + <div class="prop-key">${key}</div>
  755 + <div class="prop-value">${truncateText(String(value), 200)}</div>
  756 + </div>
  757 + `;
  758 + }
  759 + }
  760 + propsEl.innerHTML = propsHtml || '<div class="prop-item">无附加属性</div>';
  761 +
  762 + detailPanel.style.display = 'block';
  763 + }
  764 +
  765 + // 隐藏节点详情
  766 + function hideNodeDetail() {
  767 + document.getElementById('nodeDetail').style.display = 'none';
  768 + }
  769 +
  770 + // 更新统计
  771 + function updateStats(stats) {
  772 + document.getElementById('nodeCount').textContent = stats.total_nodes || 0;
  773 + document.getElementById('edgeCount').textContent = stats.total_edges || 0;
  774 +
  775 + // 更新各类型计数
  776 + document.getElementById('count-topic').textContent = stats.topic || 0;
  777 + document.getElementById('count-engine').textContent = stats.engine || 0;
  778 + document.getElementById('count-section').textContent = stats.section || 0;
  779 + document.getElementById('count-search_query').textContent = stats.search_query || 0;
  780 + document.getElementById('count-source').textContent = stats.source || 0;
  781 + }
  782 +
  783 + // 获取可见类型
  784 + function getVisibleTypes() {
  785 + const types = [];
  786 + document.querySelectorAll('.filter-item input[type="checkbox"]').forEach(cb => {
  787 + if (cb.checked) {
  788 + types.push(cb.dataset.type);
  789 + }
  790 + });
  791 + return types;
  792 + }
  793 +
  794 + // 设置事件监听
  795 + function setupEventListeners() {
  796 + // 侧边栏切换
  797 + document.getElementById('toggleSidebar').addEventListener('click', () => {
  798 + const sidebar = document.getElementById('sidebar');
  799 + const container = document.getElementById('graphContainer');
  800 + const legend = document.getElementById('legend');
  801 +
  802 + sidebar.classList.toggle('collapsed');
  803 + container.classList.toggle('fullwidth');
  804 + legend.classList.toggle('fullwidth');
  805 + });
  806 +
  807 + // 适应视图
  808 + document.getElementById('fitBtn').addEventListener('click', () => {
  809 + if (network) network.fit({ animation: true });
  810 + });
  811 +
  812 + // 放大
  813 + document.getElementById('zoomInBtn').addEventListener('click', () => {
  814 + if (network) {
  815 + const scale = network.getScale() * 1.2;
  816 + network.moveTo({ scale, animation: true });
  817 + }
  818 + });
  819 +
  820 + // 缩小
  821 + document.getElementById('zoomOutBtn').addEventListener('click', () => {
  822 + if (network) {
  823 + const scale = network.getScale() / 1.2;
  824 + network.moveTo({ scale, animation: true });
  825 + }
  826 + });
  827 +
  828 + // 全屏
  829 + document.getElementById('fullscreenBtn').addEventListener('click', () => {
  830 + if (!document.fullscreenElement) {
  831 + document.documentElement.requestFullscreen();
  832 + } else {
  833 + document.exitFullscreen();
  834 + }
  835 + });
  836 +
  837 + // 搜索
  838 + document.getElementById('searchInput').addEventListener('input', (e) => {
  839 + const query = e.target.value.toLowerCase();
  840 + if (!query) {
  841 + if (network) network.selectNodes([]);
  842 + return;
  843 + }
  844 +
  845 + const matchedIds = allNodes
  846 + .filter(n => n.label.toLowerCase().includes(query))
  847 + .map(n => n.id);
  848 +
  849 + if (network && matchedIds.length > 0) {
  850 + network.selectNodes(matchedIds);
  851 + network.focus(matchedIds[0], { animation: true, scale: 1.5 });
  852 + }
  853 + });
  854 +
  855 + // 筛选
  856 + document.querySelectorAll('.filter-item input[type="checkbox"]').forEach(cb => {
  857 + cb.addEventListener('change', () => {
  858 + renderGraph();
  859 + });
  860 + });
  861 + }
  862 +
  863 + // 辅助函数
  864 + function showLoading(show) {
  865 + document.getElementById('loadingOverlay').style.display = show ? 'flex' : 'none';
  866 + }
  867 +
  868 + function showEmpty(show) {
  869 + document.getElementById('emptyState').style.display = show ? 'block' : 'none';
  870 + }
  871 +
  872 + function showToast(message) {
  873 + const toast = document.getElementById('toast');
  874 + toast.textContent = message;
  875 + toast.style.display = 'block';
  876 + setTimeout(() => {
  877 + toast.style.display = 'none';
  878 + }, 3000);
  879 + }
  880 +
  881 + function truncateLabel(text, maxLen) {
  882 + if (!text) return '';
  883 + return text.length > maxLen ? text.slice(0, maxLen) + '...' : text;
  884 + }
  885 +
  886 + function truncateText(text, maxLen) {
  887 + if (!text) return '';
  888 + return text.length > maxLen ? text.slice(0, maxLen) + '...' : text;
  889 + }
  890 +
  891 + function lightenColor(color) {
  892 + // 简单的颜色变亮
  893 + const hex = color.replace('#', '');
  894 + const r = Math.min(255, parseInt(hex.slice(0, 2), 16) + 40);
  895 + const g = Math.min(255, parseInt(hex.slice(2, 4), 16) + 40);
  896 + const b = Math.min(255, parseInt(hex.slice(4, 6), 16) + 40);
  897 + return `rgb(${r}, ${g}, ${b})`;
  898 + }
  899 + </script>
  900 +</body>
  901 +</html>