stitcher.py
4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""
章节装订器:负责把多个章节JSON合并为整本IR。
DocumentComposer 会注入缺失锚点、统一顺序,并补齐 IR 级元数据。
"""
from __future__ import annotations
from datetime import datetime
from typing import Dict, List, Set
from ..ir import IR_VERSION
class DocumentComposer:
"""
将章节拼接成Document IR的简单装订器。
作用:
- 按order排序章节,补充默认chapterId;
- 防止anchor重复,生成全局唯一锚点;
- 注入 IR 版本与生成时间戳。
"""
def __init__(self):
"""初始化装订器并记录已使用的锚点,避免重复"""
self._seen_anchors: Set[str] = set()
def build_document(
self,
report_id: str,
metadata: Dict[str, object],
chapters: List[Dict[str, object]],
) -> Dict[str, object]:
"""
把所有章节按order排序并注入唯一锚点,形成整本IR。
同时合并 metadata/themeTokens/assets,供渲染器直接消费。
参数:
report_id: 本次报告ID。
metadata: 全局元信息(标题、主题、toc等)。
chapters: 章节payload列表。
返回:
dict: 满足渲染器需求的Document IR。
"""
# 构建从chapterId到toc anchor的映射
toc_anchor_map = self._build_toc_anchor_map(metadata)
ordered = sorted(chapters, key=lambda c: c.get("order", 0))
for idx, chapter in enumerate(ordered, start=1):
chapter.setdefault("chapterId", f"S{idx}")
# 优先级:1. 目录配置的anchor 2. 章节自带的anchor 3. 默认anchor
chapter_id = chapter.get("chapterId")
anchor = (
toc_anchor_map.get(chapter_id) or
chapter.get("anchor") or
f"section-{idx}"
)
chapter["anchor"] = self._ensure_unique_anchor(anchor)
chapter.setdefault("order", idx * 10)
if chapter.get("errorPlaceholder"):
self._ensure_heading_block(chapter)
document = {
"version": IR_VERSION,
"reportId": report_id,
"metadata": {
**metadata,
"generatedAt": metadata.get("generatedAt")
or datetime.utcnow().isoformat() + "Z",
},
"themeTokens": metadata.get("themeTokens", {}),
"chapters": ordered,
"assets": metadata.get("assets", {}),
}
return document
def _ensure_unique_anchor(self, anchor: str) -> str:
"""若存在重复锚点则追加序号,确保全局唯一。"""
base = anchor
counter = 2
while anchor in self._seen_anchors:
anchor = f"{base}-{counter}"
counter += 1
self._seen_anchors.add(anchor)
return anchor
def _build_toc_anchor_map(self, metadata: Dict[str, object]) -> Dict[str, str]:
"""
从metadata.toc.customEntries构建chapterId到anchor的映射。
参数:
metadata: 文档元信息。
返回:
dict: chapterId -> anchor 的映射。
"""
toc_config = metadata.get("toc") or {}
custom_entries = toc_config.get("customEntries") or []
anchor_map = {}
for entry in custom_entries:
if isinstance(entry, dict):
chapter_id = entry.get("chapterId")
anchor = entry.get("anchor")
if chapter_id and anchor:
anchor_map[chapter_id] = anchor
return anchor_map
def _ensure_heading_block(self, chapter: Dict[str, object]) -> None:
"""保证占位章节仍然拥有可用于目录的heading block。"""
blocks = chapter.get("blocks")
if isinstance(blocks, list):
for block in blocks:
if isinstance(block, dict) and block.get("type") == "heading":
return
heading = {
"type": "heading",
"level": 2,
"text": chapter.get("title") or "占位章节",
"anchor": chapter.get("anchor"),
}
if isinstance(blocks, list):
blocks.insert(0, heading)
else:
chapter["blocks"] = [heading]
__all__ = ["DocumentComposer"]