Fixed an issue where level 1 headings were rendered repeatedly when rendering Markdown
Showing
1 changed file
with
46 additions
and
2 deletions
| @@ -42,10 +42,17 @@ class MarkdownRenderer: | @@ -42,10 +42,17 @@ class MarkdownRenderer: | ||
| 42 | def _render_chapter(self, chapter: Dict[str, Any]) -> str: | 42 | def _render_chapter(self, chapter: Dict[str, Any]) -> str: |
| 43 | lines: List[str] = [] | 43 | lines: List[str] = [] |
| 44 | title = chapter.get("title") or chapter.get("chapterId") | 44 | title = chapter.get("title") or chapter.get("chapterId") |
| 45 | + blocks = chapter.get("blocks", []) if isinstance(chapter.get("blocks"), list) else [] | ||
| 46 | + | ||
| 47 | + # 章节标题使用一级标题格式,并避免与首个heading重复 | ||
| 45 | if title: | 48 | if title: |
| 46 | - lines.append(f"## {self._escape_text(title)}") | 49 | + lines.append(f"# {self._escape_text(title)}") |
| 47 | lines.append("") | 50 | lines.append("") |
| 48 | - body = self._render_blocks(chapter.get("blocks", [])) | 51 | + |
| 52 | + if blocks and self._is_heading_duplicate(blocks[0], title): | ||
| 53 | + blocks = blocks[1:] | ||
| 54 | + | ||
| 55 | + body = self._render_blocks(blocks) | ||
| 49 | if body: | 56 | if body: |
| 50 | lines.append(body) | 57 | lines.append(body) |
| 51 | return "\n".join(lines).strip() | 58 | return "\n".join(lines).strip() |
| @@ -456,6 +463,43 @@ class MarkdownRenderer: | @@ -456,6 +463,43 @@ class MarkdownRenderer: | ||
| 456 | # 颜色/字体等非通用标记直接降级为纯文本 | 463 | # 颜色/字体等非通用标记直接降级为纯文本 |
| 457 | return result | 464 | return result |
| 458 | 465 | ||
| 466 | + def _is_heading_duplicate(self, block: Dict[str, Any], chapter_title: str | None) -> bool: | ||
| 467 | + """判断首个heading是否与章节标题重复""" | ||
| 468 | + if not isinstance(block, dict) or block.get("type") != "heading": | ||
| 469 | + return False | ||
| 470 | + if not chapter_title: | ||
| 471 | + return False | ||
| 472 | + heading_text = block.get("text") or "" | ||
| 473 | + return self._normalize_heading_text(heading_text) == self._normalize_heading_text(chapter_title) | ||
| 474 | + | ||
| 475 | + def _normalize_heading_text(self, text: Any) -> str: | ||
| 476 | + """去除序号前缀并统一空白""" | ||
| 477 | + if not isinstance(text, str): | ||
| 478 | + return "" | ||
| 479 | + stripped = text.strip() | ||
| 480 | + # 去掉类似“1.”、“1.1”、“一、” | ||
| 481 | + for sep in (" ", "、"): | ||
| 482 | + if sep in stripped: | ||
| 483 | + maybe_prefix, rest = stripped.split(sep, 1) | ||
| 484 | + if self._looks_like_prefix(maybe_prefix): | ||
| 485 | + stripped = rest.strip() | ||
| 486 | + break | ||
| 487 | + else: | ||
| 488 | + parts = stripped.split(".", 1) | ||
| 489 | + if len(parts) == 2 and self._looks_like_prefix(parts[0]): | ||
| 490 | + stripped = parts[1].strip() | ||
| 491 | + return stripped | ||
| 492 | + | ||
| 493 | + @staticmethod | ||
| 494 | + def _looks_like_prefix(token: str) -> bool: | ||
| 495 | + """判断token是否像序号前缀""" | ||
| 496 | + if not token: | ||
| 497 | + return False | ||
| 498 | + if token.isdigit(): | ||
| 499 | + return True | ||
| 500 | + chinese_numerals = set("一二三四五六七八九十零〇壹贰叁肆伍陆柒捌玖拾") | ||
| 501 | + return all(ch in chinese_numerals or ch == "." for ch in token) | ||
| 502 | + | ||
| 459 | def _quote_lines(self, text: str) -> str: | 503 | def _quote_lines(self, text: str) -> str: |
| 460 | if not text: | 504 | if not text: |
| 461 | return "" | 505 | return "" |
-
Please register or login to post a comment