stitcher.py
3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
章节装订器:负责把多个章节JSON合并为整本IR。
DocumentComposer 会注入缺失锚点、统一顺序,并补齐 IR 级元数据。
"""
from __future__ import annotations
from datetime import datetime
from typing import Dict, List, Set
from ..ir import IR_VERSION
class DocumentComposer:
"""
将章节拼接成Document IR的简单装订器。
作用:
- 按order排序章节,补充默认chapterId;
- 防止anchor重复,生成全局唯一锚点;
- 注入 IR 版本与生成时间戳。
"""
def __init__(self):
"""初始化装订器并记录已使用的锚点,避免重复"""
self._seen_anchors: Set[str] = set()
def build_document(
self,
report_id: str,
metadata: Dict[str, object],
chapters: List[Dict[str, object]],
) -> Dict[str, object]:
"""
把所有章节按order排序并注入唯一锚点,形成整本IR。
同时合并 metadata/themeTokens/assets,供渲染器直接消费。
参数:
report_id: 本次报告ID。
metadata: 全局元信息(标题、主题、toc等)。
chapters: 章节payload列表。
返回:
dict: 满足渲染器需求的Document IR。
"""
ordered = sorted(chapters, key=lambda c: c.get("order", 0))
for idx, chapter in enumerate(ordered, start=1):
chapter.setdefault("chapterId", f"S{idx}")
anchor = chapter.get("anchor") or f"section-{idx}"
chapter["anchor"] = self._ensure_unique_anchor(anchor)
chapter.setdefault("order", idx * 10)
if chapter.get("errorPlaceholder"):
self._ensure_heading_block(chapter)
document = {
"version": IR_VERSION,
"reportId": report_id,
"metadata": {
**metadata,
"generatedAt": metadata.get("generatedAt")
or datetime.utcnow().isoformat() + "Z",
},
"themeTokens": metadata.get("themeTokens", {}),
"chapters": ordered,
"assets": metadata.get("assets", {}),
}
return document
def _ensure_unique_anchor(self, anchor: str) -> str:
"""若存在重复锚点则追加序号,确保全局唯一。"""
base = anchor
counter = 2
while anchor in self._seen_anchors:
anchor = f"{base}-{counter}"
counter += 1
self._seen_anchors.add(anchor)
return anchor
def _ensure_heading_block(self, chapter: Dict[str, object]) -> None:
"""保证占位章节仍然拥有可用于目录的heading block。"""
blocks = chapter.get("blocks")
if isinstance(blocks, list):
for block in blocks:
if isinstance(block, dict) and block.get("type") == "heading":
return
heading = {
"type": "heading",
"level": 2,
"text": chapter.get("title") or "占位章节",
"anchor": chapter.get("anchor"),
}
if isinstance(blocks, list):
blocks.insert(0, heading)
else:
chapter["blocks"] = [heading]
__all__ = ["DocumentComposer"]