马一丁

Enhance Repair Capabilities

@@ -13,7 +13,7 @@ import os @@ -13,7 +13,7 @@ import os
13 from pathlib import Path 13 from pathlib import Path
14 from uuid import uuid4 14 from uuid import uuid4
15 from datetime import datetime 15 from datetime import datetime
16 -from typing import Optional, Dict, Any, List, Callable 16 +from typing import Optional, Dict, Any, List, Callable, Tuple
17 17
18 from loguru import logger 18 from loguru import logger
19 19
@@ -199,6 +199,7 @@ class ReportAgent: @@ -199,6 +199,7 @@ class ReportAgent:
199 199
200 # 初始化LLM客户端 200 # 初始化LLM客户端
201 self.llm_client = self._initialize_llm() 201 self.llm_client = self._initialize_llm()
  202 + self.json_rescue_clients = self._initialize_rescue_llms()
202 203
203 # 初始化章级存储/校验/渲染组件 204 # 初始化章级存储/校验/渲染组件
204 self.chapter_storage = ChapterStorage(self.config.CHAPTER_OUTPUT_DIR) 205 self.chapter_storage = ChapterStorage(self.config.CHAPTER_OUTPUT_DIR)
@@ -263,6 +264,46 @@ class ReportAgent: @@ -263,6 +264,46 @@ class ReportAgent:
263 model_name=self.config.REPORT_ENGINE_MODEL_NAME, 264 model_name=self.config.REPORT_ENGINE_MODEL_NAME,
264 base_url=self.config.REPORT_ENGINE_BASE_URL, 265 base_url=self.config.REPORT_ENGINE_BASE_URL,
265 ) 266 )
  267 +
  268 + def _initialize_rescue_llms(self) -> List[Tuple[str, LLMClient]]:
  269 + """
  270 + 初始化跨引擎章节修复所需的LLM客户端列表。
  271 +
  272 + 顺序遵循“Report → Forum → Insight → Media”,缺失配置会被自动跳过。
  273 + """
  274 + clients: List[Tuple[str, LLMClient]] = []
  275 + if self.llm_client:
  276 + clients.append(("report_engine", self.llm_client))
  277 + fallback_specs = [
  278 + (
  279 + "forum_engine",
  280 + self.config.FORUM_HOST_API_KEY,
  281 + self.config.FORUM_HOST_MODEL_NAME,
  282 + self.config.FORUM_HOST_BASE_URL,
  283 + ),
  284 + (
  285 + "insight_engine",
  286 + self.config.INSIGHT_ENGINE_API_KEY,
  287 + self.config.INSIGHT_ENGINE_MODEL_NAME,
  288 + self.config.INSIGHT_ENGINE_BASE_URL,
  289 + ),
  290 + (
  291 + "media_engine",
  292 + self.config.MEDIA_ENGINE_API_KEY,
  293 + self.config.MEDIA_ENGINE_MODEL_NAME,
  294 + self.config.MEDIA_ENGINE_BASE_URL,
  295 + ),
  296 + ]
  297 + for label, api_key, model_name, base_url in fallback_specs:
  298 + if not api_key or not model_name:
  299 + continue
  300 + try:
  301 + client = LLMClient(api_key=api_key, model_name=model_name, base_url=base_url)
  302 + except Exception as exc:
  303 + logger.warning(f"{label} LLM初始化失败,跳过该修复通道: {exc}")
  304 + continue
  305 + clients.append((label, client))
  306 + return clients
266 307
267 def _initialize_nodes(self): 308 def _initialize_nodes(self):
268 """ 309 """
@@ -280,7 +321,9 @@ class ReportAgent: @@ -280,7 +321,9 @@ class ReportAgent:
280 self.chapter_generation_node = ChapterGenerationNode( 321 self.chapter_generation_node = ChapterGenerationNode(
281 self.llm_client, 322 self.llm_client,
282 self.validator, 323 self.validator,
283 - self.chapter_storage 324 + self.chapter_storage,
  325 + fallback_llm_clients=self.json_rescue_clients,
  326 + error_log_dir=self.config.JSON_ERROR_LOG_DIR,
284 ) 327 )
285 328
286 def generate_report(self, query: str, reports: List[Any], forum_logs: str = "", 329 def generate_report(self, query: str, reports: List[Any], forum_logs: str = "",
@@ -8,9 +8,10 @@ @@ -8,9 +8,10 @@
8 from __future__ import annotations 8 from __future__ import annotations
9 9
10 import json 10 import json
  11 +from datetime import datetime
11 from pathlib import Path 12 from pathlib import Path
12 import re 13 import re
13 -from typing import Any, Dict, List, Tuple, Callable, Optional 14 +from typing import Any, Dict, List, Tuple, Callable, Optional, Set
14 15
15 from loguru import logger 16 from loguru import logger
16 17
@@ -19,7 +20,9 @@ from ..ir import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IRValidator @@ -19,7 +20,9 @@ from ..ir import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IRValidator
19 from ..prompts import ( 20 from ..prompts import (
20 SYSTEM_PROMPT_CHAPTER_JSON, 21 SYSTEM_PROMPT_CHAPTER_JSON,
21 SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, 22 SYSTEM_PROMPT_CHAPTER_JSON_REPAIR,
  23 + SYSTEM_PROMPT_CHAPTER_JSON_RECOVERY,
22 build_chapter_repair_prompt, 24 build_chapter_repair_prompt,
  25 + build_chapter_recovery_payload,
23 build_chapter_user_prompt, 26 build_chapter_user_prompt,
24 ) 27 )
25 from .base_node import BaseNode 28 from .base_node import BaseNode
@@ -96,7 +99,14 @@ class ChapterGenerationNode(BaseNode): @@ -96,7 +99,14 @@ class ChapterGenerationNode(BaseNode):
96 _PARAGRAPH_FRAGMENT_NO_TERMINATOR_MAX_CHARS = 240 99 _PARAGRAPH_FRAGMENT_NO_TERMINATOR_MAX_CHARS = 240
97 _TERMINATION_PUNCTUATION = set("。!?!?;;……") 100 _TERMINATION_PUNCTUATION = set("。!?!?;;……")
98 101
99 - def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage): 102 + def __init__(
  103 + self,
  104 + llm_client,
  105 + validator: IRValidator,
  106 + storage: ChapterStorage,
  107 + fallback_llm_clients: Optional[List[Tuple[str, Any]]] = None,
  108 + error_log_dir: Optional[str | Path] = None,
  109 + ):
100 """ 110 """
101 记录LLM客户端/校验器/章节存储器,便于run方法调度。 111 记录LLM客户端/校验器/章节存储器,便于run方法调度。
102 112
@@ -108,6 +118,17 @@ class ChapterGenerationNode(BaseNode): @@ -108,6 +118,17 @@ class ChapterGenerationNode(BaseNode):
108 super().__init__(llm_client, "ChapterGenerationNode") 118 super().__init__(llm_client, "ChapterGenerationNode")
109 self.validator = validator 119 self.validator = validator
110 self.storage = storage 120 self.storage = storage
  121 + self.fallback_llm_clients: List[Tuple[str, Any]] = fallback_llm_clients or [
  122 + ("report_engine", llm_client)
  123 + ]
  124 + error_dir = Path(error_log_dir or "logs/json_repair_failures")
  125 + error_dir.mkdir(parents=True, exist_ok=True)
  126 + self.error_log_dir = error_dir
  127 + self._failed_block_counter = 0
  128 + self._active_run_id: Optional[str] = None
  129 + self._rescue_attempted_labels: Dict[str, Set[str]] = {}
  130 + self._skipped_placeholder_chapters: Set[str] = set()
  131 + self._archived_failed_json: Dict[str, str] = {}
111 132
112 def run( 133 def run(
113 self, 134 self,
@@ -141,6 +162,8 @@ class ChapterGenerationNode(BaseNode): @@ -141,6 +162,8 @@ class ChapterGenerationNode(BaseNode):
141 "order": section.order, 162 "order": section.order,
142 } 163 }
143 chapter_dir = self.storage.begin_chapter(run_dir, chapter_meta) 164 chapter_dir = self.storage.begin_chapter(run_dir, chapter_meta)
  165 + run_id = run_dir.name
  166 + self._ensure_run_state(run_id)
144 llm_payload = self._build_payload(section, context) 167 llm_payload = self._build_payload(section, context)
145 user_message = build_chapter_user_prompt(llm_payload) 168 user_message = build_chapter_user_prompt(llm_payload)
146 169
@@ -151,7 +174,30 @@ class ChapterGenerationNode(BaseNode): @@ -151,7 +174,30 @@ class ChapterGenerationNode(BaseNode):
151 section_meta=chapter_meta, 174 section_meta=chapter_meta,
152 **kwargs, 175 **kwargs,
153 ) 176 )
154 - chapter_json = self._parse_chapter(raw_text) 177 + parse_context: List[str] = []
  178 + placeholder_created = False
  179 + try:
  180 + chapter_json = self._parse_chapter(raw_text)
  181 + except ChapterJsonParseError as parse_error:
  182 + logger.warning(f"{section.title} 章节JSON解析失败,尝试跨引擎修复: {parse_error}")
  183 + parse_context.append(str(parse_error))
  184 + self._archive_failed_output(section, raw_text)
  185 + recovered = self._attempt_cross_engine_json_rescue(
  186 + section,
  187 + llm_payload,
  188 + raw_text,
  189 + run_id,
  190 + )
  191 + if recovered:
  192 + chapter_json = recovered
  193 + logger.info(f"{section.title} 章节JSON已通过跨引擎修复")
  194 + else:
  195 + placeholder = self._build_placeholder_chapter(section, raw_text, parse_error)
  196 + if not placeholder:
  197 + raise
  198 + chapter_json, placeholder_notes = placeholder
  199 + parse_context.extend(placeholder_notes)
  200 + placeholder_created = True
155 201
156 # 自动补全关键字段后再校验 202 # 自动补全关键字段后再校验
157 chapter_json.setdefault("chapterId", section.chapter_id) 203 chapter_json.setdefault("chapterId", section.chapter_id)
@@ -176,13 +222,13 @@ class ChapterGenerationNode(BaseNode): @@ -176,13 +222,13 @@ class ChapterGenerationNode(BaseNode):
176 self._sanitize_chapter_blocks(chapter_json) 222 self._sanitize_chapter_blocks(chapter_json)
177 valid, errors = self.validator.validate_chapter(chapter_json) 223 valid, errors = self.validator.validate_chapter(chapter_json)
178 content_error: ChapterContentError | None = None 224 content_error: ChapterContentError | None = None
179 - if valid: 225 + if valid and not placeholder_created:
180 try: 226 try:
181 self._ensure_content_density(chapter_json) 227 self._ensure_content_density(chapter_json)
182 except ChapterContentError as exc: 228 except ChapterContentError as exc:
183 content_error = exc 229 content_error = exc
184 230
185 - error_messages: List[str] = [] 231 + error_messages: List[str] = parse_context.copy()
186 if not valid and errors: 232 if not valid and errors:
187 error_messages.extend(errors) 233 error_messages.extend(errors)
188 if content_error: 234 if content_error:
@@ -314,6 +360,154 @@ class ChapterGenerationNode(BaseNode): @@ -314,6 +360,154 @@ class ChapterGenerationNode(BaseNode):
314 logger.warning(f"章节流式回调失败: {callback_error}") 360 logger.warning(f"章节流式回调失败: {callback_error}")
315 return "".join(chunks) 361 return "".join(chunks)
316 362
  363 + def _attempt_cross_engine_json_rescue(
  364 + self,
  365 + section: TemplateSection,
  366 + generation_payload: Dict[str, Any],
  367 + raw_text: str,
  368 + run_id: str,
  369 + ) -> Optional[Dict[str, Any]]:
  370 + """
  371 + 依次调用Report/Forum/Insight/Media四套API尝试修复无法解析的JSON。
  372 +
  373 + Returns:
  374 + dict | None: 成功修复时返回章节JSON,否则为None。
  375 + """
  376 + if not self.fallback_llm_clients:
  377 + return None
  378 + if self._chapter_already_skipped(section):
  379 + logger.info(f"[{run_id}] {section.title} 已标记为占位,不再触发跨引擎修复")
  380 + return None
  381 + section_payload = {
  382 + "chapterId": section.chapter_id,
  383 + "title": section.title,
  384 + "slug": section.slug,
  385 + "order": section.order,
  386 + "number": section.number,
  387 + "outline": section.outline,
  388 + }
  389 + repair_prompt = build_chapter_recovery_payload(
  390 + section_payload,
  391 + generation_payload,
  392 + raw_text,
  393 + )
  394 + attempted_labels = self._rescue_attempted_labels.setdefault(section.chapter_id, set())
  395 + for label, client in self.fallback_llm_clients:
  396 + if label in attempted_labels:
  397 + continue
  398 + attempt_index = len(attempted_labels) + 1
  399 + attempted_labels.add(label)
  400 + logger.info(
  401 + f"[{run_id}] 章节 {section.title} 触发 {label} API JSON抢修(第{attempt_index}次尝试)"
  402 + )
  403 + try:
  404 + response = client.invoke(
  405 + SYSTEM_PROMPT_CHAPTER_JSON_RECOVERY,
  406 + repair_prompt,
  407 + temperature=0.0,
  408 + top_p=0.05,
  409 + )
  410 + except Exception as exc:
  411 + logger.warning(f"{label} JSON修复调用失败: {exc}")
  412 + continue
  413 + if not response:
  414 + continue
  415 + try:
  416 + repaired = self._parse_chapter(response)
  417 + except Exception as exc:
  418 + logger.warning(f"{label} JSON修复输出仍无法解析: {exc}")
  419 + continue
  420 + logger.warning(f"[{run_id}] {label} API已修复章节JSON")
  421 + self._archived_failed_json.pop(section.chapter_id, None)
  422 + return repaired
  423 + return None
  424 +
  425 + def _ensure_run_state(self, run_id: str):
  426 + """确保每次报告运行时的修复状态隔离,防止上一份任务的记录影响新任务。"""
  427 + if self._active_run_id == run_id:
  428 + return
  429 + self._active_run_id = run_id
  430 + self._rescue_attempted_labels = {}
  431 + self._skipped_placeholder_chapters = set()
  432 + self._archived_failed_json = {}
  433 +
  434 + def _archive_failed_output(self, section: TemplateSection, raw_text: str):
  435 + """缓存当前章节的原始错误JSON,以便后续占位或人工使用。"""
  436 + if not raw_text:
  437 + return
  438 + self._archived_failed_json[section.chapter_id] = raw_text
  439 +
  440 + def _get_archived_failed_output(self, section: TemplateSection) -> Optional[str]:
  441 + """获取章节最近一次失败的原始输出。"""
  442 + return self._archived_failed_json.get(section.chapter_id)
  443 +
  444 + def _mark_chapter_skipped(self, section: TemplateSection):
  445 + """记录该章节已经降级为占位,避免重复触发跨引擎修复。"""
  446 + self._skipped_placeholder_chapters.add(section.chapter_id)
  447 +
  448 + def _chapter_already_skipped(self, section: TemplateSection) -> bool:
  449 + """判断章节是否已经被标记为占位。"""
  450 + return section.chapter_id in self._skipped_placeholder_chapters
  451 +
  452 + def _build_placeholder_chapter(
  453 + self,
  454 + section: TemplateSection,
  455 + raw_text: str,
  456 + parse_error: Exception,
  457 + ) -> Optional[Tuple[Dict[str, Any], List[str]]]:
  458 + """
  459 + 在所有修复失败时构造可渲染的占位章节,并记录日志文件供后续排查。
  460 + """
  461 + snapshot = self._get_archived_failed_output(section) or raw_text
  462 + log_ref = self._persist_error_payload(section, snapshot, parse_error)
  463 + if not log_ref:
  464 + logger.error(f"{section.title} 章节JSON完全损坏且无法写入日志")
  465 + return None
  466 + importance = "critical" if self._is_section_critical(section) else "standard"
  467 + message = (
  468 + f"LLM返回块解析错误,详情请见 {log_ref['relativeFile']} 的 {log_ref['entryId']} 记录。"
  469 + )
  470 + heading_block = {
  471 + "type": "heading",
  472 + "level": 2 if importance == "critical" else 3,
  473 + "text": section.title,
  474 + "anchor": section.slug,
  475 + }
  476 + callout_block = {
  477 + "type": "callout",
  478 + "tone": "danger" if importance == "critical" else "warning",
  479 + "title": "LLM返回块解析错误",
  480 + "blocks": [
  481 + {
  482 + "type": "paragraph",
  483 + "inlines": [
  484 + {
  485 + "text": message,
  486 + }
  487 + ],
  488 + }
  489 + ],
  490 + "meta": {
  491 + "errorLogRef": log_ref,
  492 + "rawJsonPreview": (snapshot or "")[:2000],
  493 + "errorMessage": message,
  494 + "importance": importance,
  495 + },
  496 + }
  497 + placeholder = {
  498 + "chapterId": section.chapter_id,
  499 + "title": section.title,
  500 + "anchor": section.slug,
  501 + "order": section.order,
  502 + "blocks": [heading_block, callout_block],
  503 + "errorPlaceholder": True,
  504 + }
  505 + errors = [
  506 + f"{section.title} 章节JSON解析失败,已降级为占位。参考 {log_ref['relativeFile']}#{log_ref['entryId']}"
  507 + ]
  508 + self._mark_chapter_skipped(section)
  509 + return placeholder, errors
  510 +
317 def _parse_chapter(self, raw_text: str) -> Dict[str, Any]: 511 def _parse_chapter(self, raw_text: str) -> Dict[str, Any]:
318 """ 512 """
319 清洗LLM输出并解析JSON。 513 清洗LLM输出并解析JSON。
@@ -375,6 +569,58 @@ class ChapterGenerationNode(BaseNode): @@ -375,6 +569,58 @@ class ChapterGenerationNode(BaseNode):
375 return item 569 return item
376 raise ValueError("章节JSON缺少chapter字段") 570 raise ValueError("章节JSON缺少chapter字段")
377 571
  572 + def _persist_error_payload(
  573 + self,
  574 + section: TemplateSection,
  575 + raw_text: str,
  576 + parse_error: Exception,
  577 + ) -> Optional[Dict[str, str]]:
  578 + """将无法解析的JSON文本落盘,便于在HTML中指向具体文件。"""
  579 + try:
  580 + self._failed_block_counter += 1
  581 + entry_id = f"E{self._failed_block_counter:04d}"
  582 + timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
  583 + slug = section.slug or "section"
  584 + filename = f"{timestamp}-{slug}-{entry_id}.json"
  585 + file_path = self.error_log_dir / filename
  586 + payload = {
  587 + "chapterId": section.chapter_id,
  588 + "title": section.title,
  589 + "slug": section.slug,
  590 + "order": section.order,
  591 + "rawOutput": raw_text,
  592 + "error": str(parse_error),
  593 + "loggedAt": timestamp,
  594 + }
  595 + file_path.write_text(
  596 + json.dumps(payload, ensure_ascii=False, indent=2),
  597 + encoding="utf-8",
  598 + )
  599 + try:
  600 + relative_path = str(file_path.relative_to(Path.cwd()))
  601 + except ValueError:
  602 + relative_path = str(file_path)
  603 + return {
  604 + "file": str(file_path),
  605 + "relativeFile": relative_path,
  606 + "entryId": entry_id,
  607 + "timestamp": timestamp,
  608 + }
  609 + except Exception as exc:
  610 + logger.error(f"记录章节JSON错误日志失败: {exc}")
  611 + return None
  612 +
  613 + def _is_section_critical(self, section: TemplateSection) -> bool:
  614 + """基于章节深度/编号判断是否会影响目录,从而决定提示强度。"""
  615 + if not section:
  616 + return False
  617 + if section.depth <= 2:
  618 + return True
  619 + number = section.number or ""
  620 + if number and number.count(".") <= 1:
  621 + return True
  622 + return False
  623 +
378 def _repair_llm_json(self, text: str) -> str: 624 def _repair_llm_json(self, text: str) -> str:
379 """ 625 """
380 处理常见的LLM错误(如":=导致的非法JSON)。 626 处理常见的LLM错误(如":=导致的非法JSON)。
@@ -9,6 +9,7 @@ from .prompts import ( @@ -9,6 +9,7 @@ from .prompts import (
9 SYSTEM_PROMPT_HTML_GENERATION, 9 SYSTEM_PROMPT_HTML_GENERATION,
10 SYSTEM_PROMPT_CHAPTER_JSON, 10 SYSTEM_PROMPT_CHAPTER_JSON,
11 SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, 11 SYSTEM_PROMPT_CHAPTER_JSON_REPAIR,
  12 + SYSTEM_PROMPT_CHAPTER_JSON_RECOVERY,
12 SYSTEM_PROMPT_DOCUMENT_LAYOUT, 13 SYSTEM_PROMPT_DOCUMENT_LAYOUT,
13 SYSTEM_PROMPT_WORD_BUDGET, 14 SYSTEM_PROMPT_WORD_BUDGET,
14 output_schema_template_selection, 15 output_schema_template_selection,
@@ -16,6 +17,7 @@ from .prompts import ( @@ -16,6 +17,7 @@ from .prompts import (
16 chapter_generation_input_schema, 17 chapter_generation_input_schema,
17 build_chapter_user_prompt, 18 build_chapter_user_prompt,
18 build_chapter_repair_prompt, 19 build_chapter_repair_prompt,
  20 + build_chapter_recovery_payload,
19 build_document_layout_prompt, 21 build_document_layout_prompt,
20 build_word_budget_prompt, 22 build_word_budget_prompt,
21 ) 23 )
@@ -27,11 +29,13 @@ __all__ = [ @@ -27,11 +29,13 @@ __all__ = [
27 "SYSTEM_PROMPT_CHAPTER_JSON_REPAIR", 29 "SYSTEM_PROMPT_CHAPTER_JSON_REPAIR",
28 "SYSTEM_PROMPT_DOCUMENT_LAYOUT", 30 "SYSTEM_PROMPT_DOCUMENT_LAYOUT",
29 "SYSTEM_PROMPT_WORD_BUDGET", 31 "SYSTEM_PROMPT_WORD_BUDGET",
  32 + "SYSTEM_PROMPT_CHAPTER_JSON_RECOVERY",
30 "output_schema_template_selection", 33 "output_schema_template_selection",
31 "input_schema_html_generation", 34 "input_schema_html_generation",
32 "chapter_generation_input_schema", 35 "chapter_generation_input_schema",
33 "build_chapter_user_prompt", 36 "build_chapter_user_prompt",
34 "build_chapter_repair_prompt", 37 "build_chapter_repair_prompt",
  38 + "build_chapter_recovery_payload",
35 "build_document_layout_prompt", 39 "build_document_layout_prompt",
36 "build_word_budget_prompt", 40 "build_word_budget_prompt",
37 ] 41 ]
@@ -335,6 +335,24 @@ SYSTEM_PROMPT_CHAPTER_JSON_REPAIR = f""" @@ -335,6 +335,24 @@ SYSTEM_PROMPT_CHAPTER_JSON_REPAIR = f"""
335 只返回JSON,不要添加注释或自然语言。 335 只返回JSON,不要添加注释或自然语言。
336 """ 336 """
337 337
  338 +SYSTEM_PROMPT_CHAPTER_JSON_RECOVERY = f"""
  339 +你是Report/Forum/Insight/Media联合的“JSON抢修官”,会拿到章节生成时的全部约束(generationPayload)以及原始失败输出(rawChapterOutput)。
  340 +
  341 +请遵守:
  342 +1. 章节必须满足IR版本 {IR_VERSION} 规范,block.type 仅能使用:{', '.join(ALLOWED_BLOCK_TYPES)};
  343 +2. paragraph.inlines中的marks仅可出现:{', '.join(ALLOWED_INLINE_MARKS)},并保留原始文字顺序;
  344 +3. 请以 generationPayload 中的 section 信息为主导,heading.text 与 anchor 必须与章节slug保持一致;
  345 +4. 仅对JSON语法/字段/嵌套做最小必要修复,不改写事实与结论;
  346 +5. 输出严格遵循 {{\"chapter\": {{...}}}} 格式,不添加说明。
  347 +
  348 +输入字段:
  349 +- generationPayload:章节原始需求与素材,请完整遵守;
  350 +- rawChapterOutput:无法解析的JSON文本,请尽可能复用其中内容;
  351 +- section:章节元信息,便于保持锚点/标题一致。
  352 +
  353 +请直接返回修复后的JSON。
  354 +"""
  355 +
338 # 文档标题/目录/主题设计提示词 356 # 文档标题/目录/主题设计提示词
339 SYSTEM_PROMPT_DOCUMENT_LAYOUT = f""" 357 SYSTEM_PROMPT_DOCUMENT_LAYOUT = f"""
340 你是报告首席设计官,需要结合模板大纲与三个分析引擎的内容,为整本报告确定最终的标题、导语区、目录样式与美学要素。 358 你是报告首席设计官,需要结合模板大纲与三个分析引擎的内容,为整本报告确定最终的标题、导语区、目录样式与美学要素。
@@ -399,6 +417,22 @@ def build_chapter_repair_prompt(chapter: dict, errors, original_text=None) -> st @@ -399,6 +417,22 @@ def build_chapter_repair_prompt(chapter: dict, errors, original_text=None) -> st
399 return json.dumps(payload, ensure_ascii=False, indent=2) 417 return json.dumps(payload, ensure_ascii=False, indent=2)
400 418
401 419
  420 +def build_chapter_recovery_payload(
  421 + section: dict, generation_payload: dict, raw_output: str
  422 +) -> str:
  423 + """
  424 + 构造跨引擎JSON抢修输入,附带章节元信息、生成指令与原始输出。
  425 +
  426 + 为避免提示词过长,仅保留原始输出的尾部片段以定位问题。
  427 + """
  428 + payload = {
  429 + "section": section,
  430 + "generationPayload": generation_payload,
  431 + "rawChapterOutput": raw_output[-8000:] if isinstance(raw_output, str) else raw_output,
  432 + }
  433 + return json.dumps(payload, ensure_ascii=False, indent=2)
  434 +
  435 +
402 def build_document_layout_prompt(payload: dict) -> str: 436 def build_document_layout_prompt(payload: dict) -> str:
403 """将文档设计所需的上下文序列化为JSON字符串,供布局节点发送给LLM。""" 437 """将文档设计所需的上下文序列化为JSON字符串,供布局节点发送给LLM。"""
404 return json.dumps(payload, ensure_ascii=False, indent=2) 438 return json.dumps(payload, ensure_ascii=False, indent=2)
@@ -15,6 +15,34 @@ class Settings(BaseSettings): @@ -15,6 +15,34 @@ class Settings(BaseSettings):
15 REPORT_ENGINE_BASE_URL: Optional[str] = Field(None, description="Report Engine LLM基础URL") 15 REPORT_ENGINE_BASE_URL: Optional[str] = Field(None, description="Report Engine LLM基础URL")
16 REPORT_ENGINE_MODEL_NAME: Optional[str] = Field(None, description="Report Engine LLM模型名称") 16 REPORT_ENGINE_MODEL_NAME: Optional[str] = Field(None, description="Report Engine LLM模型名称")
17 REPORT_ENGINE_PROVIDER: Optional[str] = Field(None, description="模型服务商,仅兼容保留") 17 REPORT_ENGINE_PROVIDER: Optional[str] = Field(None, description="模型服务商,仅兼容保留")
  18 + # 其他引擎API(用于跨引擎修复)
  19 + FORUM_HOST_API_KEY: Optional[str] = Field(
  20 + None, description="Forum Engine / Forum Host 的LLM API密钥(用于章节修复兜底)"
  21 + )
  22 + FORUM_HOST_BASE_URL: Optional[str] = Field(
  23 + None, description="Forum Engine API Base URL(为空则使用LLM默认配置)"
  24 + )
  25 + FORUM_HOST_MODEL_NAME: Optional[str] = Field(
  26 + None, description="Forum Engine LLM模型名称"
  27 + )
  28 + INSIGHT_ENGINE_API_KEY: Optional[str] = Field(
  29 + None, description="Insight Engine LLM API密钥,用于跨引擎章节修复"
  30 + )
  31 + INSIGHT_ENGINE_BASE_URL: Optional[str] = Field(
  32 + None, description="Insight Engine API Base URL"
  33 + )
  34 + INSIGHT_ENGINE_MODEL_NAME: Optional[str] = Field(
  35 + None, description="Insight Engine LLM模型名称"
  36 + )
  37 + MEDIA_ENGINE_API_KEY: Optional[str] = Field(
  38 + None, description="Media Engine LLM API密钥,用于跨引擎章节修复"
  39 + )
  40 + MEDIA_ENGINE_BASE_URL: Optional[str] = Field(
  41 + None, description="Media Engine API Base URL"
  42 + )
  43 + MEDIA_ENGINE_MODEL_NAME: Optional[str] = Field(
  44 + None, description="Media Engine LLM模型名称"
  45 + )
18 MAX_CONTENT_LENGTH: int = Field(200000, description="最大内容长度") 46 MAX_CONTENT_LENGTH: int = Field(200000, description="最大内容长度")
19 OUTPUT_DIR: str = Field("final_reports", description="主输出目录") 47 OUTPUT_DIR: str = Field("final_reports", description="主输出目录")
20 # 章节分块JSON会存储在该目录,便于溯源与断点续传 48 # 章节分块JSON会存储在该目录,便于溯源与断点续传
@@ -35,6 +63,9 @@ class Settings(BaseSettings): @@ -35,6 +63,9 @@ class Settings(BaseSettings):
35 LOG_FILE: str = Field("logs/report.log", description="日志输出文件") 63 LOG_FILE: str = Field("logs/report.log", description="日志输出文件")
36 ENABLE_PDF_EXPORT: bool = Field(True, description="是否允许导出PDF") 64 ENABLE_PDF_EXPORT: bool = Field(True, description="是否允许导出PDF")
37 CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/") 65 CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/")
  66 + JSON_ERROR_LOG_DIR: str = Field(
  67 + "logs/json_repair_failures", description="无法修复的JSON块落盘目录"
  68 + )
38 69
39 class Config: 70 class Config:
40 """Pydantic配置:允许从.env读取并兼容大小写""" 71 """Pydantic配置:允许从.env读取并兼容大小写"""