马一丁

Fixed an issue where level 1 headings were rendered repeatedly when rendering Markdown

@@ -42,10 +42,17 @@ class MarkdownRenderer: @@ -42,10 +42,17 @@ class MarkdownRenderer:
42 def _render_chapter(self, chapter: Dict[str, Any]) -> str: 42 def _render_chapter(self, chapter: Dict[str, Any]) -> str:
43 lines: List[str] = [] 43 lines: List[str] = []
44 title = chapter.get("title") or chapter.get("chapterId") 44 title = chapter.get("title") or chapter.get("chapterId")
  45 + blocks = chapter.get("blocks", []) if isinstance(chapter.get("blocks"), list) else []
  46 +
  47 + # 章节标题使用一级标题格式,并避免与首个heading重复
45 if title: 48 if title:
46 - lines.append(f"## {self._escape_text(title)}") 49 + lines.append(f"# {self._escape_text(title)}")
47 lines.append("") 50 lines.append("")
48 - body = self._render_blocks(chapter.get("blocks", [])) 51 +
  52 + if blocks and self._is_heading_duplicate(blocks[0], title):
  53 + blocks = blocks[1:]
  54 +
  55 + body = self._render_blocks(blocks)
49 if body: 56 if body:
50 lines.append(body) 57 lines.append(body)
51 return "\n".join(lines).strip() 58 return "\n".join(lines).strip()
@@ -456,6 +463,43 @@ class MarkdownRenderer: @@ -456,6 +463,43 @@ class MarkdownRenderer:
456 # 颜色/字体等非通用标记直接降级为纯文本 463 # 颜色/字体等非通用标记直接降级为纯文本
457 return result 464 return result
458 465
  466 + def _is_heading_duplicate(self, block: Dict[str, Any], chapter_title: str | None) -> bool:
  467 + """判断首个heading是否与章节标题重复"""
  468 + if not isinstance(block, dict) or block.get("type") != "heading":
  469 + return False
  470 + if not chapter_title:
  471 + return False
  472 + heading_text = block.get("text") or ""
  473 + return self._normalize_heading_text(heading_text) == self._normalize_heading_text(chapter_title)
  474 +
  475 + def _normalize_heading_text(self, text: Any) -> str:
  476 + """去除序号前缀并统一空白"""
  477 + if not isinstance(text, str):
  478 + return ""
  479 + stripped = text.strip()
  480 + # 去掉类似“1.”、“1.1”、“一、”
  481 + for sep in (" ", "、"):
  482 + if sep in stripped:
  483 + maybe_prefix, rest = stripped.split(sep, 1)
  484 + if self._looks_like_prefix(maybe_prefix):
  485 + stripped = rest.strip()
  486 + break
  487 + else:
  488 + parts = stripped.split(".", 1)
  489 + if len(parts) == 2 and self._looks_like_prefix(parts[0]):
  490 + stripped = parts[1].strip()
  491 + return stripped
  492 +
  493 + @staticmethod
  494 + def _looks_like_prefix(token: str) -> bool:
  495 + """判断token是否像序号前缀"""
  496 + if not token:
  497 + return False
  498 + if token.isdigit():
  499 + return True
  500 + chinese_numerals = set("一二三四五六七八九十零〇壹贰叁肆伍陆柒捌玖拾")
  501 + return all(ch in chinese_numerals or ch == "." for ch in token)
  502 +
459 def _quote_lines(self, text: str) -> str: 503 def _quote_lines(self, text: str) -> str:
460 if not text: 504 if not text:
461 return "" 505 return ""