Showing
3 changed files
with
82 additions
and
0 deletions
| @@ -18,6 +18,8 @@ from ..core import TemplateSection, ChapterStorage | @@ -18,6 +18,8 @@ from ..core import TemplateSection, ChapterStorage | ||
| 18 | from ..ir import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IRValidator | 18 | from ..ir import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IRValidator |
| 19 | from ..prompts import ( | 19 | from ..prompts import ( |
| 20 | SYSTEM_PROMPT_CHAPTER_JSON, | 20 | SYSTEM_PROMPT_CHAPTER_JSON, |
| 21 | + SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, | ||
| 22 | + build_chapter_repair_prompt, | ||
| 21 | build_chapter_user_prompt, | 23 | build_chapter_user_prompt, |
| 22 | ) | 24 | ) |
| 23 | from .base_node import BaseNode | 25 | from .base_node import BaseNode |
| @@ -151,6 +153,20 @@ class ChapterGenerationNode(BaseNode): | @@ -151,6 +153,20 @@ class ChapterGenerationNode(BaseNode): | ||
| 151 | self._sanitize_chapter_blocks(chapter_json) | 153 | self._sanitize_chapter_blocks(chapter_json) |
| 152 | 154 | ||
| 153 | valid, errors = self.validator.validate_chapter(chapter_json) | 155 | valid, errors = self.validator.validate_chapter(chapter_json) |
| 156 | + if not valid and errors: | ||
| 157 | + repaired = self._attempt_llm_structural_repair( | ||
| 158 | + chapter_json, | ||
| 159 | + errors, | ||
| 160 | + raw_text=raw_text, | ||
| 161 | + ) | ||
| 162 | + if repaired: | ||
| 163 | + chapter_json = repaired | ||
| 164 | + chapter_json.setdefault("chapterId", section.chapter_id) | ||
| 165 | + chapter_json.setdefault("anchor", section.slug) | ||
| 166 | + chapter_json.setdefault("title", section.title) | ||
| 167 | + chapter_json.setdefault("order", section.order) | ||
| 168 | + self._sanitize_chapter_blocks(chapter_json) | ||
| 169 | + valid, errors = self.validator.validate_chapter(chapter_json) | ||
| 154 | content_error: ChapterContentError | None = None | 170 | content_error: ChapterContentError | None = None |
| 155 | if valid: | 171 | if valid: |
| 156 | try: | 172 | try: |
| @@ -537,6 +553,36 @@ class ChapterGenerationNode(BaseNode): | @@ -537,6 +553,36 @@ class ChapterGenerationNode(BaseNode): | ||
| 537 | logger.warning("已使用json_repair自动修复章节JSON语法") | 553 | logger.warning("已使用json_repair自动修复章节JSON语法") |
| 538 | return fixed | 554 | return fixed |
| 539 | 555 | ||
| 556 | + def _attempt_llm_structural_repair( | ||
| 557 | + self, | ||
| 558 | + chapter: Dict[str, Any], | ||
| 559 | + validation_errors: List[str], | ||
| 560 | + raw_text: Optional[str] = None, | ||
| 561 | + ) -> Optional[Dict[str, Any]]: | ||
| 562 | + """将结构性错误的章节交给LLM兜底修复,保持Report Engine相同的API设置。""" | ||
| 563 | + if not validation_errors: | ||
| 564 | + return None | ||
| 565 | + payload = build_chapter_repair_prompt(chapter, validation_errors, raw_text) | ||
| 566 | + try: | ||
| 567 | + response = self.llm_client.invoke( | ||
| 568 | + SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, | ||
| 569 | + payload, | ||
| 570 | + temperature=0.0, | ||
| 571 | + top_p=0.05, | ||
| 572 | + ) | ||
| 573 | + except Exception as exc: # pragma: no cover - 网络或API异常仅记录 | ||
| 574 | + logger.error(f"章节JSON LLM修复调用失败: {exc}") | ||
| 575 | + return None | ||
| 576 | + if not response: | ||
| 577 | + return None | ||
| 578 | + try: | ||
| 579 | + repaired = self._parse_chapter(response) | ||
| 580 | + except Exception as exc: | ||
| 581 | + logger.error(f"LLM修复后的章节JSON解析失败: {exc}") | ||
| 582 | + return None | ||
| 583 | + logger.warning("章节JSON经多次本地修复仍不合规,已成功启用LLM兜底修复") | ||
| 584 | + return repaired | ||
| 585 | + | ||
| 540 | def _sanitize_chapter_blocks(self, chapter: Dict[str, Any]): | 586 | def _sanitize_chapter_blocks(self, chapter: Dict[str, Any]): |
| 541 | """ | 587 | """ |
| 542 | 修正常见的结构性错误(例如list.items嵌套过深)。 | 588 | 修正常见的结构性错误(例如list.items嵌套过深)。 |
| @@ -8,12 +8,14 @@ from .prompts import ( | @@ -8,12 +8,14 @@ from .prompts import ( | ||
| 8 | SYSTEM_PROMPT_TEMPLATE_SELECTION, | 8 | SYSTEM_PROMPT_TEMPLATE_SELECTION, |
| 9 | SYSTEM_PROMPT_HTML_GENERATION, | 9 | SYSTEM_PROMPT_HTML_GENERATION, |
| 10 | SYSTEM_PROMPT_CHAPTER_JSON, | 10 | SYSTEM_PROMPT_CHAPTER_JSON, |
| 11 | + SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, | ||
| 11 | SYSTEM_PROMPT_DOCUMENT_LAYOUT, | 12 | SYSTEM_PROMPT_DOCUMENT_LAYOUT, |
| 12 | SYSTEM_PROMPT_WORD_BUDGET, | 13 | SYSTEM_PROMPT_WORD_BUDGET, |
| 13 | output_schema_template_selection, | 14 | output_schema_template_selection, |
| 14 | input_schema_html_generation, | 15 | input_schema_html_generation, |
| 15 | chapter_generation_input_schema, | 16 | chapter_generation_input_schema, |
| 16 | build_chapter_user_prompt, | 17 | build_chapter_user_prompt, |
| 18 | + build_chapter_repair_prompt, | ||
| 17 | build_document_layout_prompt, | 19 | build_document_layout_prompt, |
| 18 | build_word_budget_prompt, | 20 | build_word_budget_prompt, |
| 19 | ) | 21 | ) |
| @@ -22,12 +24,14 @@ __all__ = [ | @@ -22,12 +24,14 @@ __all__ = [ | ||
| 22 | "SYSTEM_PROMPT_TEMPLATE_SELECTION", | 24 | "SYSTEM_PROMPT_TEMPLATE_SELECTION", |
| 23 | "SYSTEM_PROMPT_HTML_GENERATION", | 25 | "SYSTEM_PROMPT_HTML_GENERATION", |
| 24 | "SYSTEM_PROMPT_CHAPTER_JSON", | 26 | "SYSTEM_PROMPT_CHAPTER_JSON", |
| 27 | + "SYSTEM_PROMPT_CHAPTER_JSON_REPAIR", | ||
| 25 | "SYSTEM_PROMPT_DOCUMENT_LAYOUT", | 28 | "SYSTEM_PROMPT_DOCUMENT_LAYOUT", |
| 26 | "SYSTEM_PROMPT_WORD_BUDGET", | 29 | "SYSTEM_PROMPT_WORD_BUDGET", |
| 27 | "output_schema_template_selection", | 30 | "output_schema_template_selection", |
| 28 | "input_schema_html_generation", | 31 | "input_schema_html_generation", |
| 29 | "chapter_generation_input_schema", | 32 | "chapter_generation_input_schema", |
| 30 | "build_chapter_user_prompt", | 33 | "build_chapter_user_prompt", |
| 34 | + "build_chapter_repair_prompt", | ||
| 31 | "build_document_layout_prompt", | 35 | "build_document_layout_prompt", |
| 32 | "build_word_budget_prompt", | 36 | "build_word_budget_prompt", |
| 33 | ] | 37 | ] |
| @@ -9,6 +9,7 @@ import json | @@ -9,6 +9,7 @@ import json | ||
| 9 | 9 | ||
| 10 | from ..ir import ( | 10 | from ..ir import ( |
| 11 | ALLOWED_BLOCK_TYPES, | 11 | ALLOWED_BLOCK_TYPES, |
| 12 | + ALLOWED_INLINE_MARKS, | ||
| 12 | CHAPTER_JSON_SCHEMA_TEXT, | 13 | CHAPTER_JSON_SCHEMA_TEXT, |
| 13 | IR_VERSION, | 14 | IR_VERSION, |
| 14 | ) | 15 | ) |
| @@ -317,6 +318,23 @@ SYSTEM_PROMPT_CHAPTER_JSON = f""" | @@ -317,6 +318,23 @@ SYSTEM_PROMPT_CHAPTER_JSON = f""" | ||
| 317 | 严禁添加除JSON以外的任何文本或注释。 | 318 | 严禁添加除JSON以外的任何文本或注释。 |
| 318 | """ | 319 | """ |
| 319 | 320 | ||
| 321 | +SYSTEM_PROMPT_CHAPTER_JSON_REPAIR = f""" | ||
| 322 | +你现在扮演Report Engine的“章节JSON修复官”,负责在章节草稿无法通过IR校验时进行兜底修复。 | ||
| 323 | + | ||
| 324 | +请牢记: | ||
| 325 | +1. 所有chapter必须满足IR版本 {IR_VERSION} 约束,仅允许以下block.type:{', '.join(ALLOWED_BLOCK_TYPES)}; | ||
| 326 | +2. paragraph.inlines中的marks必须来自以下集合:{', '.join(ALLOWED_INLINE_MARKS)}; | ||
| 327 | +3. 允许的结构、字段与嵌套规则全部写在《CHAPTER JSON SCHEMA》中,任何缺少字段、数组嵌套错误或list.items不是二维数组的情况都必须修复; | ||
| 328 | +4. 不得更改事实、数值与结论,只能对结构/字段名/嵌套层级做最小修改以通过校验; | ||
| 329 | +5. 最终输出只能包含合法JSON,格式严格为:{{"chapter": {{...修复后的章节JSON...}}}},禁止额外解释或Markdown。 | ||
| 330 | + | ||
| 331 | +<CHAPTER JSON SCHEMA> | ||
| 332 | +{CHAPTER_JSON_SCHEMA_TEXT} | ||
| 333 | +</CHAPTER JSON SCHEMA> | ||
| 334 | + | ||
| 335 | +只返回JSON,不要添加注释或自然语言。 | ||
| 336 | +""" | ||
| 337 | + | ||
| 320 | # 文档标题/目录/主题设计提示词 | 338 | # 文档标题/目录/主题设计提示词 |
| 321 | SYSTEM_PROMPT_DOCUMENT_LAYOUT = f""" | 339 | SYSTEM_PROMPT_DOCUMENT_LAYOUT = f""" |
| 322 | 你是报告首席设计官,需要结合模板大纲与三个分析引擎的内容,为整本报告确定最终的标题、导语区、目录样式与美学要素。 | 340 | 你是报告首席设计官,需要结合模板大纲与三个分析引擎的内容,为整本报告确定最终的标题、导语区、目录样式与美学要素。 |
| @@ -367,6 +385,20 @@ def build_chapter_user_prompt(payload: dict) -> str: | @@ -367,6 +385,20 @@ def build_chapter_user_prompt(payload: dict) -> str: | ||
| 367 | return json.dumps(payload, ensure_ascii=False, indent=2) | 385 | return json.dumps(payload, ensure_ascii=False, indent=2) |
| 368 | 386 | ||
| 369 | 387 | ||
| 388 | +def build_chapter_repair_prompt(chapter: dict, errors, original_text=None) -> str: | ||
| 389 | + """ | ||
| 390 | + 构造章节修复输入payload,包含原始章节与校验错误。 | ||
| 391 | + """ | ||
| 392 | + payload: dict = { | ||
| 393 | + "failedChapter": chapter, | ||
| 394 | + "validatorErrors": errors, | ||
| 395 | + } | ||
| 396 | + if original_text: | ||
| 397 | + snippet = original_text[-2000:] | ||
| 398 | + payload["rawOutputTail"] = snippet | ||
| 399 | + return json.dumps(payload, ensure_ascii=False, indent=2) | ||
| 400 | + | ||
| 401 | + | ||
| 370 | def build_document_layout_prompt(payload: dict) -> str: | 402 | def build_document_layout_prompt(payload: dict) -> str: |
| 371 | """将文档设计所需的上下文序列化为JSON字符串,供布局节点发送给LLM。""" | 403 | """将文档设计所需的上下文序列化为JSON字符串,供布局节点发送给LLM。""" |
| 372 | return json.dumps(payload, ensure_ascii=False, indent=2) | 404 | return json.dumps(payload, ensure_ascii=False, indent=2) |
-
Please register or login to post a comment