stitcher.py 3.22 KB
"""
章节装订器:负责把多个章节JSON合并为整本IR。

DocumentComposer 会注入缺失锚点、统一顺序,并补齐 IR 级元数据。
"""

from __future__ import annotations

from datetime import datetime
from typing import Dict, List, Set

from ..ir import IR_VERSION


class DocumentComposer:
    """
    将章节拼接成Document IR的简单装订器。

    作用:
        - 按order排序章节,补充默认chapterId;
        - 防止anchor重复,生成全局唯一锚点;
        - 注入 IR 版本与生成时间戳。
    """

    def __init__(self):
        """初始化装订器并记录已使用的锚点,避免重复"""
        self._seen_anchors: Set[str] = set()

    def build_document(
        self,
        report_id: str,
        metadata: Dict[str, object],
        chapters: List[Dict[str, object]],
    ) -> Dict[str, object]:
        """
        把所有章节按order排序并注入唯一锚点,形成整本IR。

        同时合并 metadata/themeTokens/assets,供渲染器直接消费。

        参数:
            report_id: 本次报告ID。
            metadata: 全局元信息(标题、主题、toc等)。
            chapters: 章节payload列表。

        返回:
            dict: 满足渲染器需求的Document IR。
        """
        ordered = sorted(chapters, key=lambda c: c.get("order", 0))
        for idx, chapter in enumerate(ordered, start=1):
            chapter.setdefault("chapterId", f"S{idx}")
            anchor = chapter.get("anchor") or f"section-{idx}"
            chapter["anchor"] = self._ensure_unique_anchor(anchor)
            chapter.setdefault("order", idx * 10)
            if chapter.get("errorPlaceholder"):
                self._ensure_heading_block(chapter)

        document = {
            "version": IR_VERSION,
            "reportId": report_id,
            "metadata": {
                **metadata,
                "generatedAt": metadata.get("generatedAt")
                or datetime.utcnow().isoformat() + "Z",
            },
            "themeTokens": metadata.get("themeTokens", {}),
            "chapters": ordered,
            "assets": metadata.get("assets", {}),
        }
        return document

    def _ensure_unique_anchor(self, anchor: str) -> str:
        """若存在重复锚点则追加序号,确保全局唯一。"""
        base = anchor
        counter = 2
        while anchor in self._seen_anchors:
            anchor = f"{base}-{counter}"
            counter += 1
        self._seen_anchors.add(anchor)
        return anchor

    def _ensure_heading_block(self, chapter: Dict[str, object]) -> None:
        """保证占位章节仍然拥有可用于目录的heading block。"""
        blocks = chapter.get("blocks")
        if isinstance(blocks, list):
            for block in blocks:
                if isinstance(block, dict) and block.get("type") == "heading":
                    return
        heading = {
            "type": "heading",
            "level": 2,
            "text": chapter.get("title") or "占位章节",
            "anchor": chapter.get("anchor"),
        }
        if isinstance(blocks, list):
            blocks.insert(0, heading)
        else:
            chapter["blocks"] = [heading]


__all__ = ["DocumentComposer"]