马一丁

Allow LLM Repair

... ... @@ -18,6 +18,8 @@ from ..core import TemplateSection, ChapterStorage
from ..ir import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IRValidator
from ..prompts import (
SYSTEM_PROMPT_CHAPTER_JSON,
SYSTEM_PROMPT_CHAPTER_JSON_REPAIR,
build_chapter_repair_prompt,
build_chapter_user_prompt,
)
from .base_node import BaseNode
... ... @@ -151,6 +153,20 @@ class ChapterGenerationNode(BaseNode):
self._sanitize_chapter_blocks(chapter_json)
valid, errors = self.validator.validate_chapter(chapter_json)
if not valid and errors:
repaired = self._attempt_llm_structural_repair(
chapter_json,
errors,
raw_text=raw_text,
)
if repaired:
chapter_json = repaired
chapter_json.setdefault("chapterId", section.chapter_id)
chapter_json.setdefault("anchor", section.slug)
chapter_json.setdefault("title", section.title)
chapter_json.setdefault("order", section.order)
self._sanitize_chapter_blocks(chapter_json)
valid, errors = self.validator.validate_chapter(chapter_json)
content_error: ChapterContentError | None = None
if valid:
try:
... ... @@ -537,6 +553,36 @@ class ChapterGenerationNode(BaseNode):
logger.warning("已使用json_repair自动修复章节JSON语法")
return fixed
def _attempt_llm_structural_repair(
self,
chapter: Dict[str, Any],
validation_errors: List[str],
raw_text: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
"""将结构性错误的章节交给LLM兜底修复,保持Report Engine相同的API设置。"""
if not validation_errors:
return None
payload = build_chapter_repair_prompt(chapter, validation_errors, raw_text)
try:
response = self.llm_client.invoke(
SYSTEM_PROMPT_CHAPTER_JSON_REPAIR,
payload,
temperature=0.0,
top_p=0.05,
)
except Exception as exc: # pragma: no cover - 网络或API异常仅记录
logger.error(f"章节JSON LLM修复调用失败: {exc}")
return None
if not response:
return None
try:
repaired = self._parse_chapter(response)
except Exception as exc:
logger.error(f"LLM修复后的章节JSON解析失败: {exc}")
return None
logger.warning("章节JSON经多次本地修复仍不合规,已成功启用LLM兜底修复")
return repaired
def _sanitize_chapter_blocks(self, chapter: Dict[str, Any]):
"""
修正常见的结构性错误(例如list.items嵌套过深)。
... ...
... ... @@ -8,12 +8,14 @@ from .prompts import (
SYSTEM_PROMPT_TEMPLATE_SELECTION,
SYSTEM_PROMPT_HTML_GENERATION,
SYSTEM_PROMPT_CHAPTER_JSON,
SYSTEM_PROMPT_CHAPTER_JSON_REPAIR,
SYSTEM_PROMPT_DOCUMENT_LAYOUT,
SYSTEM_PROMPT_WORD_BUDGET,
output_schema_template_selection,
input_schema_html_generation,
chapter_generation_input_schema,
build_chapter_user_prompt,
build_chapter_repair_prompt,
build_document_layout_prompt,
build_word_budget_prompt,
)
... ... @@ -22,12 +24,14 @@ __all__ = [
"SYSTEM_PROMPT_TEMPLATE_SELECTION",
"SYSTEM_PROMPT_HTML_GENERATION",
"SYSTEM_PROMPT_CHAPTER_JSON",
"SYSTEM_PROMPT_CHAPTER_JSON_REPAIR",
"SYSTEM_PROMPT_DOCUMENT_LAYOUT",
"SYSTEM_PROMPT_WORD_BUDGET",
"output_schema_template_selection",
"input_schema_html_generation",
"chapter_generation_input_schema",
"build_chapter_user_prompt",
"build_chapter_repair_prompt",
"build_document_layout_prompt",
"build_word_budget_prompt",
]
... ...
... ... @@ -9,6 +9,7 @@ import json
from ..ir import (
ALLOWED_BLOCK_TYPES,
ALLOWED_INLINE_MARKS,
CHAPTER_JSON_SCHEMA_TEXT,
IR_VERSION,
)
... ... @@ -317,6 +318,23 @@ SYSTEM_PROMPT_CHAPTER_JSON = f"""
严禁添加除JSON以外的任何文本或注释。
"""
SYSTEM_PROMPT_CHAPTER_JSON_REPAIR = f"""
你现在扮演Report Engine的“章节JSON修复官”,负责在章节草稿无法通过IR校验时进行兜底修复。
请牢记:
1. 所有chapter必须满足IR版本 {IR_VERSION} 约束,仅允许以下block.type:{', '.join(ALLOWED_BLOCK_TYPES)};
2. paragraph.inlines中的marks必须来自以下集合:{', '.join(ALLOWED_INLINE_MARKS)};
3. 允许的结构、字段与嵌套规则全部写在《CHAPTER JSON SCHEMA》中,任何缺少字段、数组嵌套错误或list.items不是二维数组的情况都必须修复;
4. 不得更改事实、数值与结论,只能对结构/字段名/嵌套层级做最小修改以通过校验;
5. 最终输出只能包含合法JSON,格式严格为:{{"chapter": {{...修复后的章节JSON...}}}},禁止额外解释或Markdown。
<CHAPTER JSON SCHEMA>
{CHAPTER_JSON_SCHEMA_TEXT}
</CHAPTER JSON SCHEMA>
只返回JSON,不要添加注释或自然语言。
"""
# 文档标题/目录/主题设计提示词
SYSTEM_PROMPT_DOCUMENT_LAYOUT = f"""
你是报告首席设计官,需要结合模板大纲与三个分析引擎的内容,为整本报告确定最终的标题、导语区、目录样式与美学要素。
... ... @@ -367,6 +385,20 @@ def build_chapter_user_prompt(payload: dict) -> str:
return json.dumps(payload, ensure_ascii=False, indent=2)
def build_chapter_repair_prompt(chapter: dict, errors, original_text=None) -> str:
"""
构造章节修复输入payload,包含原始章节与校验错误。
"""
payload: dict = {
"failedChapter": chapter,
"validatorErrors": errors,
}
if original_text:
snippet = original_text[-2000:]
payload["rawOutputTail"] = snippet
return json.dumps(payload, ensure_ascii=False, indent=2)
def build_document_layout_prompt(payload: dict) -> str:
"""将文档设计所需的上下文序列化为JSON字符串,供布局节点发送给LLM。"""
return json.dumps(payload, ensure_ascii=False, indent=2)
... ...