Showing
7 changed files
with
90 additions
and
16 deletions
| @@ -11,6 +11,7 @@ from .schema import ( | @@ -11,6 +11,7 @@ from .schema import ( | ||
| 11 | CHAPTER_JSON_SCHEMA_TEXT, | 11 | CHAPTER_JSON_SCHEMA_TEXT, |
| 12 | ALLOWED_BLOCK_TYPES, | 12 | ALLOWED_BLOCK_TYPES, |
| 13 | ALLOWED_INLINE_MARKS, | 13 | ALLOWED_INLINE_MARKS, |
| 14 | + ENGINE_AGENT_TITLES, | ||
| 14 | ) | 15 | ) |
| 15 | from .validator import IRValidator | 16 | from .validator import IRValidator |
| 16 | 17 | ||
| @@ -20,5 +21,6 @@ __all__ = [ | @@ -20,5 +21,6 @@ __all__ = [ | ||
| 20 | "CHAPTER_JSON_SCHEMA_TEXT", | 21 | "CHAPTER_JSON_SCHEMA_TEXT", |
| 21 | "ALLOWED_BLOCK_TYPES", | 22 | "ALLOWED_BLOCK_TYPES", |
| 22 | "ALLOWED_INLINE_MARKS", | 23 | "ALLOWED_INLINE_MARKS", |
| 24 | + "ENGINE_AGENT_TITLES", | ||
| 23 | "IRValidator", | 25 | "IRValidator", |
| 24 | ] | 26 | ] |
| @@ -45,6 +45,12 @@ ALLOWED_BLOCK_TYPES: List[str] = [ | @@ -45,6 +45,12 @@ ALLOWED_BLOCK_TYPES: List[str] = [ | ||
| 45 | "toc", | 45 | "toc", |
| 46 | ] | 46 | ] |
| 47 | 47 | ||
| 48 | +ENGINE_AGENT_TITLES: Dict[str, str] = { | ||
| 49 | + "insight": "Insight Agent", | ||
| 50 | + "media": "Media Agent", | ||
| 51 | + "query": "Query Agent", | ||
| 52 | +} | ||
| 53 | + | ||
| 48 | # ====== Schema定义 ====== | 54 | # ====== Schema定义 ====== |
| 49 | inline_mark_schema: Dict[str, Any] = { | 55 | inline_mark_schema: Dict[str, Any] = { |
| 50 | "type": "object", | 56 | "type": "object", |
| @@ -190,7 +196,21 @@ engine_quote_block: Dict[str, Any] = { | @@ -190,7 +196,21 @@ engine_quote_block: Dict[str, Any] = { | ||
| 190 | "items": {"$ref": "#/definitions/block"}, | 196 | "items": {"$ref": "#/definitions/block"}, |
| 191 | }, | 197 | }, |
| 192 | }, | 198 | }, |
| 193 | - "required": ["type", "engine", "blocks"], | 199 | + "required": ["type", "engine", "blocks", "title"], |
| 200 | + "allOf": [ | ||
| 201 | + { | ||
| 202 | + "if": {"properties": {"engine": {"const": "insight"}}}, | ||
| 203 | + "then": {"properties": {"title": {"const": ENGINE_AGENT_TITLES["insight"]}}}, | ||
| 204 | + }, | ||
| 205 | + { | ||
| 206 | + "if": {"properties": {"engine": {"const": "media"}}}, | ||
| 207 | + "then": {"properties": {"title": {"const": ENGINE_AGENT_TITLES["media"]}}}, | ||
| 208 | + }, | ||
| 209 | + { | ||
| 210 | + "if": {"properties": {"engine": {"const": "query"}}}, | ||
| 211 | + "then": {"properties": {"title": {"const": ENGINE_AGENT_TITLES["query"]}}}, | ||
| 212 | + }, | ||
| 213 | + ], | ||
| 194 | "additionalProperties": True, | 214 | "additionalProperties": True, |
| 195 | } | 215 | } |
| 196 | 216 | ||
| @@ -384,4 +404,5 @@ __all__ = [ | @@ -384,4 +404,5 @@ __all__ = [ | ||
| 384 | "ALLOWED_BLOCK_TYPES", | 404 | "ALLOWED_BLOCK_TYPES", |
| 385 | "CHAPTER_JSON_SCHEMA", | 405 | "CHAPTER_JSON_SCHEMA", |
| 386 | "CHAPTER_JSON_SCHEMA_TEXT", | 406 | "CHAPTER_JSON_SCHEMA_TEXT", |
| 407 | + "ENGINE_AGENT_TITLES", | ||
| 387 | ] | 408 | ] |
| @@ -10,7 +10,12 @@ from __future__ import annotations | @@ -10,7 +10,12 @@ from __future__ import annotations | ||
| 10 | 10 | ||
| 11 | from typing import Any, Dict, List, Tuple | 11 | from typing import Any, Dict, List, Tuple |
| 12 | 12 | ||
| 13 | -from .schema import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IR_VERSION | 13 | +from .schema import ( |
| 14 | + ALLOWED_BLOCK_TYPES, | ||
| 15 | + ALLOWED_INLINE_MARKS, | ||
| 16 | + ENGINE_AGENT_TITLES, | ||
| 17 | + IR_VERSION, | ||
| 18 | +) | ||
| 14 | 19 | ||
| 15 | 20 | ||
| 16 | class IRValidator: | 21 | class IRValidator: |
| @@ -142,9 +147,20 @@ class IRValidator: | @@ -142,9 +147,20 @@ class IRValidator: | ||
| 142 | self, block: Dict[str, Any], path: str, errors: List[str] | 147 | self, block: Dict[str, Any], path: str, errors: List[str] |
| 143 | ): | 148 | ): |
| 144 | """单引擎发言块需标注engine并包含子blocks""" | 149 | """单引擎发言块需标注engine并包含子blocks""" |
| 145 | - engine = block.get("engine") | 150 | + engine_raw = block.get("engine") |
| 151 | + engine = engine_raw.lower() if isinstance(engine_raw, str) else None | ||
| 146 | if engine not in {"insight", "media", "query"}: | 152 | if engine not in {"insight", "media", "query"}: |
| 147 | - errors.append(f"{path}.engine 取值非法: {engine}") | 153 | + errors.append(f"{path}.engine 取值非法: {engine_raw}") |
| 154 | + title = block.get("title") | ||
| 155 | + expected_title = ENGINE_AGENT_TITLES.get(engine) if engine else None | ||
| 156 | + if title is None: | ||
| 157 | + errors.append(f"{path}.title 缺失") | ||
| 158 | + elif not isinstance(title, str): | ||
| 159 | + errors.append(f"{path}.title 必须是字符串") | ||
| 160 | + elif expected_title and title != expected_title: | ||
| 161 | + errors.append( | ||
| 162 | + f"{path}.title 必须与engine一致,使用对应Agent名称: {expected_title}" | ||
| 163 | + ) | ||
| 148 | inner = block.get("blocks") | 164 | inner = block.get("blocks") |
| 149 | if not isinstance(inner, list) or not inner: | 165 | if not isinstance(inner, list) or not inner: |
| 150 | errors.append(f"{path}.blocks 必须是非空数组") | 166 | errors.append(f"{path}.blocks 必须是非空数组") |
| @@ -16,7 +16,12 @@ from typing import Any, Dict, List, Tuple, Callable, Optional, Set | @@ -16,7 +16,12 @@ from typing import Any, Dict, List, Tuple, Callable, Optional, Set | ||
| 16 | from loguru import logger | 16 | from loguru import logger |
| 17 | 17 | ||
| 18 | from ..core import TemplateSection, ChapterStorage | 18 | from ..core import TemplateSection, ChapterStorage |
| 19 | -from ..ir import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IRValidator | 19 | +from ..ir import ( |
| 20 | + ALLOWED_BLOCK_TYPES, | ||
| 21 | + ALLOWED_INLINE_MARKS, | ||
| 22 | + ENGINE_AGENT_TITLES, | ||
| 23 | + IRValidator, | ||
| 24 | +) | ||
| 20 | from ..prompts import ( | 25 | from ..prompts import ( |
| 21 | SYSTEM_PROMPT_CHAPTER_JSON, | 26 | SYSTEM_PROMPT_CHAPTER_JSON, |
| 22 | SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, | 27 | SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, |
| @@ -1081,7 +1086,13 @@ class ChapterGenerationNode(BaseNode): | @@ -1081,7 +1086,13 @@ class ChapterGenerationNode(BaseNode): | ||
| 1081 | block["rows"] = rows | 1086 | block["rows"] = rows |
| 1082 | 1087 | ||
| 1083 | def _sanitize_engine_quote_block(self, block: Dict[str, Any]): | 1088 | def _sanitize_engine_quote_block(self, block: Dict[str, Any]): |
| 1084 | - """engineQuote内部仅允许paragraph,且仅保留bold/italic样式""" | 1089 | + """engineQuote仅用于单Agent发言,内部仅允许paragraph且title需锁定Agent名称""" |
| 1090 | + engine_raw = block.get("engine") | ||
| 1091 | + engine = engine_raw.lower() if isinstance(engine_raw, str) else None | ||
| 1092 | + if engine not in ENGINE_AGENT_TITLES: | ||
| 1093 | + engine = "insight" | ||
| 1094 | + block["engine"] = engine | ||
| 1095 | + block["title"] = ENGINE_AGENT_TITLES[engine] | ||
| 1085 | allowed_marks = {"bold", "italic"} | 1096 | allowed_marks = {"bold", "italic"} |
| 1086 | raw_blocks = block.get("blocks") | 1097 | raw_blocks = block.get("blocks") |
| 1087 | candidates = raw_blocks if isinstance(raw_blocks, list) else ([raw_blocks] if raw_blocks else []) | 1098 | candidates = raw_blocks if isinstance(raw_blocks, list) else ([raw_blocks] if raw_blocks else []) |
| @@ -306,7 +306,7 @@ SYSTEM_PROMPT_CHAPTER_JSON = f""" | @@ -306,7 +306,7 @@ SYSTEM_PROMPT_CHAPTER_JSON = f""" | ||
| 306 | 5. 表格需给出rows/cells/align,KPI卡请使用kpiGrid,分割线用hr。 | 306 | 5. 表格需给出rows/cells/align,KPI卡请使用kpiGrid,分割线用hr。 |
| 307 | 6. 如需引用图表/交互组件,统一用widgetType表示(例如chart.js/line、chart.js/doughnut)。 | 307 | 6. 如需引用图表/交互组件,统一用widgetType表示(例如chart.js/line、chart.js/doughnut)。 |
| 308 | 7. 鼓励结合outline中列出的子标题,生成多层heading与细粒度内容,同时可补充callout、blockquote等。 | 308 | 7. 鼓励结合outline中列出的子标题,生成多层heading与细粒度内容,同时可补充callout、blockquote等。 |
| 309 | -8. 如需标注某个引擎的原话,请用 block.type="engineQuote",engine 取值 insight/media/query(仅限这三种),内部 blocks 只允许 paragraph,paragraph.inlines 的 marks 仅可使用 bold/italic(可留空),禁止在 engineQuote 中放表格/图表/引用/公式等。 | 309 | +8. engineQuote 仅用于呈现单Agent的原话:使用 block.type="engineQuote",engine 取值 insight/media/query,title 必须固定为对应Agent名字(insight->Insight Agent,media->Media Agent,query->Query Agent,不可自定义),内部 blocks 只允许 paragraph,paragraph.inlines 的 marks 仅可使用 bold/italic(可留空),禁止在 engineQuote 中放表格/图表/引用/公式等。 |
| 310 | 9. 如果chapterPlan中包含target/min/max或sections细分预算,请尽量贴合,必要时在notes允许的范围内突破,同时在结构上体现详略; | 310 | 9. 如果chapterPlan中包含target/min/max或sections细分预算,请尽量贴合,必要时在notes允许的范围内突破,同时在结构上体现详略; |
| 311 | 10. 一级标题需使用中文数字(“一、二、三”),二级标题使用阿拉伯数字(“1.1、1.2”),heading.text中直接写好编号,与outline顺序对应; | 311 | 10. 一级标题需使用中文数字(“一、二、三”),二级标题使用阿拉伯数字(“1.1、1.2”),heading.text中直接写好编号,与outline顺序对应; |
| 312 | 11. 严禁输出外部图片/AI生图链接,仅可使用Chart.js图表、表格、色块、callout等HTML原生组件;如需视觉辅助请改为文字描述或数据表; | 312 | 11. 严禁输出外部图片/AI生图链接,仅可使用Chart.js图表、表格、色块、callout等HTML原生组件;如需视觉辅助请改为文字描述或数据表; |
| @@ -20,6 +20,7 @@ from pathlib import Path | @@ -20,6 +20,7 @@ from pathlib import Path | ||
| 20 | from typing import Any, Dict, List | 20 | from typing import Any, Dict, List |
| 21 | from loguru import logger | 21 | from loguru import logger |
| 22 | 22 | ||
| 23 | +from ReportEngine.ir.schema import ENGINE_AGENT_TITLES | ||
| 23 | from ReportEngine.utils.chart_validator import ( | 24 | from ReportEngine.utils.chart_validator import ( |
| 24 | ChartValidator, | 25 | ChartValidator, |
| 25 | ChartRepairer, | 26 | ChartRepairer, |
| @@ -1287,15 +1288,10 @@ class HTMLRenderer: | @@ -1287,15 +1288,10 @@ class HTMLRenderer: | ||
| 1287 | def _render_engine_quote(self, block: Dict[str, Any]) -> str: | 1288 | def _render_engine_quote(self, block: Dict[str, Any]) -> str: |
| 1288 | """渲染单Engine发言块,带独立配色与标题""" | 1289 | """渲染单Engine发言块,带独立配色与标题""" |
| 1289 | engine_raw = (block.get("engine") or "").lower() | 1290 | engine_raw = (block.get("engine") or "").lower() |
| 1290 | - engine = engine_raw if engine_raw in {"insight", "media", "query"} else "insight" | ||
| 1291 | - title = ( | ||
| 1292 | - block.get("title") | ||
| 1293 | - or { | ||
| 1294 | - "insight": "Insight Engine 发言", | ||
| 1295 | - "media": "Media Engine 发言", | ||
| 1296 | - "query": "Query Engine 发言", | ||
| 1297 | - }.get(engine, "Engine 发言") | ||
| 1298 | - ) | 1291 | + engine = engine_raw if engine_raw in ENGINE_AGENT_TITLES else "insight" |
| 1292 | + expected_title = ENGINE_AGENT_TITLES.get(engine, ENGINE_AGENT_TITLES["insight"]) | ||
| 1293 | + title_raw = block.get("title") if isinstance(block.get("title"), str) else "" | ||
| 1294 | + title = title_raw if title_raw == expected_title else expected_title | ||
| 1299 | inner = self._render_blocks(block.get("blocks", [])) | 1295 | inner = self._render_blocks(block.get("blocks", [])) |
| 1300 | return ( | 1296 | return ( |
| 1301 | f'<div class="engine-quote engine-{self._escape_attr(engine)}">' | 1297 | f'<div class="engine-quote engine-{self._escape_attr(engine)}">' |
| @@ -63,6 +63,7 @@ class ChapterSanitizationTestCase(unittest.TestCase): | @@ -63,6 +63,7 @@ class ChapterSanitizationTestCase(unittest.TestCase): | ||
| 63 | { | 63 | { |
| 64 | "type": "engineQuote", | 64 | "type": "engineQuote", |
| 65 | "engine": "insight", | 65 | "engine": "insight", |
| 66 | + "title": "Insight Agent", | ||
| 66 | "blocks": [ | 67 | "blocks": [ |
| 67 | { | 68 | { |
| 68 | "type": "paragraph", | 69 | "type": "paragraph", |
| @@ -87,6 +88,7 @@ class ChapterSanitizationTestCase(unittest.TestCase): | @@ -87,6 +88,7 @@ class ChapterSanitizationTestCase(unittest.TestCase): | ||
| 87 | { | 88 | { |
| 88 | "type": "engineQuote", | 89 | "type": "engineQuote", |
| 89 | "engine": "media", | 90 | "engine": "media", |
| 91 | + "title": "Media Agent", | ||
| 90 | "blocks": [ | 92 | "blocks": [ |
| 91 | {"type": "math", "latex": "x=y"}, | 93 | {"type": "math", "latex": "x=y"}, |
| 92 | { | 94 | { |
| @@ -129,6 +131,7 @@ class ChapterSanitizationTestCase(unittest.TestCase): | @@ -129,6 +131,7 @@ class ChapterSanitizationTestCase(unittest.TestCase): | ||
| 129 | node._sanitize_chapter_blocks(chapter) | 131 | node._sanitize_chapter_blocks(chapter) |
| 130 | eq_block = chapter["blocks"][0] | 132 | eq_block = chapter["blocks"][0] |
| 131 | self.assertEqual(eq_block["type"], "engineQuote") | 133 | self.assertEqual(eq_block["type"], "engineQuote") |
| 134 | + self.assertEqual(eq_block.get("title"), "Query Agent") | ||
| 132 | inner_blocks = eq_block.get("blocks") | 135 | inner_blocks = eq_block.get("blocks") |
| 133 | self.assertTrue(all(b.get("type") == "paragraph" for b in inner_blocks)) | 136 | self.assertTrue(all(b.get("type") == "paragraph" for b in inner_blocks)) |
| 134 | marks = inner_blocks[0]["inlines"][0].get("marks") | 137 | marks = inner_blocks[0]["inlines"][0].get("marks") |
| @@ -136,6 +139,31 @@ class ChapterSanitizationTestCase(unittest.TestCase): | @@ -136,6 +139,31 @@ class ChapterSanitizationTestCase(unittest.TestCase): | ||
| 136 | marks2 = inner_blocks[1]["inlines"][0].get("marks") | 139 | marks2 = inner_blocks[1]["inlines"][0].get("marks") |
| 137 | self.assertEqual(marks2, [{"type": "bold"}]) | 140 | self.assertEqual(marks2, [{"type": "bold"}]) |
| 138 | 141 | ||
| 142 | + def test_engine_quote_title_must_match_engine(self): | ||
| 143 | + validator = IRValidator() | ||
| 144 | + chapter = { | ||
| 145 | + "chapterId": "S1", | ||
| 146 | + "title": "Engine 引用校验", | ||
| 147 | + "anchor": "section-1", | ||
| 148 | + "order": 1, | ||
| 149 | + "blocks": [ | ||
| 150 | + { | ||
| 151 | + "type": "engineQuote", | ||
| 152 | + "engine": "query", | ||
| 153 | + "title": "Media Agent", | ||
| 154 | + "blocks": [ | ||
| 155 | + { | ||
| 156 | + "type": "paragraph", | ||
| 157 | + "inlines": [{"text": "错误标题"}], | ||
| 158 | + } | ||
| 159 | + ], | ||
| 160 | + } | ||
| 161 | + ], | ||
| 162 | + } | ||
| 163 | + valid, errors = validator.validate_chapter(chapter) | ||
| 164 | + self.assertFalse(valid) | ||
| 165 | + self.assertTrue(any("title 必须与engine一致" in err for err in errors)) | ||
| 166 | + | ||
| 139 | 167 | ||
| 140 | if __name__ == "__main__": | 168 | if __name__ == "__main__": |
| 141 | unittest.main() | 169 | unittest.main() |
-
Please register or login to post a comment