马一丁

Fixed the display of charts and lists

@@ -885,7 +885,12 @@ class HTMLRenderer: @@ -885,7 +885,12 @@ class HTMLRenderer:
885 """粗略判断dict是否符合block结构""" 885 """粗略判断dict是否符合block结构"""
886 if not isinstance(payload, dict): 886 if not isinstance(payload, dict):
887 return False 887 return False
888 - if "type" in payload and isinstance(payload["type"], str): 888 + block_type = payload.get("type")
  889 + if block_type and isinstance(block_type, str):
  890 + # 排除内联类型(inlineRun 等),它们不是块级元素
  891 + inline_types = {"inlineRun", "inline", "text"}
  892 + if block_type in inline_types:
  893 + return False
889 return True 894 return True
890 structural_keys = {"blocks", "rows", "items", "widgetId", "widgetType", "data"} 895 structural_keys = {"blocks", "rows", "items", "widgetId", "widgetType", "data"}
891 return any(key in payload for key in structural_keys) 896 return any(key in payload for key in structural_keys)
@@ -896,6 +901,12 @@ class HTMLRenderer: @@ -896,6 +901,12 @@ class HTMLRenderer:
896 if isinstance(payload, dict): 901 if isinstance(payload, dict):
897 block_list = payload.get("blocks") 902 block_list = payload.get("blocks")
898 block_type = payload.get("type") 903 block_type = payload.get("type")
  904 +
  905 + # 排除内联类型,它们不是块级元素
  906 + inline_types = {"inlineRun", "inline", "text"}
  907 + if block_type in inline_types:
  908 + return collected
  909 +
899 if isinstance(block_list, list) and not block_type: 910 if isinstance(block_list, list) and not block_type:
900 for candidate in block_list: 911 for candidate in block_list:
901 collected.extend(self._collect_blocks_from_payload(candidate)) 912 collected.extend(self._collect_blocks_from_payload(candidate))
@@ -2933,6 +2944,19 @@ class HTMLRenderer: @@ -2933,6 +2944,19 @@ class HTMLRenderer:
2933 if not isinstance(run, dict): 2944 if not isinstance(run, dict):
2934 return ("" if run is None else str(run)), [] 2945 return ("" if run is None else str(run)), []
2935 2946
  2947 + # 处理 inlineRun 类型:递归展开其 inlines 数组
  2948 + if run.get("type") == "inlineRun":
  2949 + inner_inlines = run.get("inlines") or []
  2950 + outer_marks = run.get("marks") or []
  2951 + # 递归合并所有内部 inlines 的文本
  2952 + texts = []
  2953 + all_marks = list(outer_marks)
  2954 + for inline in inner_inlines:
  2955 + inner_text, inner_marks = self._normalize_inline_payload(inline)
  2956 + texts.append(inner_text)
  2957 + all_marks.extend(inner_marks)
  2958 + return "".join(texts), all_marks
  2959 +
2936 marks = list(run.get("marks") or []) 2960 marks = list(run.get("marks") or [])
2937 text_value: Any = run.get("text", "") 2961 text_value: Any = run.get("text", "")
2938 seen: set[int] = set() 2962 seen: set[int] = set()
@@ -2980,6 +3004,9 @@ class HTMLRenderer: @@ -2980,6 +3004,9 @@ class HTMLRenderer:
2980 else: 3004 else:
2981 inline_payload = self._coerce_inline_payload(payload) 3005 inline_payload = self._coerce_inline_payload(payload)
2982 if inline_payload: 3006 if inline_payload:
  3007 + # 处理 inlineRun 类型
  3008 + if inline_payload.get("type") == "inlineRun":
  3009 + return self._normalize_inline_payload(inline_payload)
2983 nested_text = inline_payload.get("text") 3010 nested_text = inline_payload.get("text")
2984 if nested_text is not None: 3011 if nested_text is not None:
2985 text_value = nested_text 3012 text_value = nested_text
@@ -3073,9 +3100,12 @@ class HTMLRenderer: @@ -3073,9 +3100,12 @@ class HTMLRenderer:
3073 if not isinstance(payload, dict): 3100 if not isinstance(payload, dict):
3074 return None 3101 return None
3075 inline_type = payload.get("type") 3102 inline_type = payload.get("type")
  3103 + # 支持 inlineRun 类型:包含嵌套的 inlines 数组
  3104 + if inline_type == "inlineRun":
  3105 + return payload
3076 if inline_type and inline_type not in {"inline", "text"}: 3106 if inline_type and inline_type not in {"inline", "text"}:
3077 return None 3107 return None
3078 - if "text" not in payload and "marks" not in payload: 3108 + if "text" not in payload and "marks" not in payload and "inlines" not in payload:
3079 return None 3109 return None
3080 return payload 3110 return payload
3081 3111
@@ -647,11 +647,29 @@ class MarkdownRenderer: @@ -647,11 +647,29 @@ class MarkdownRenderer:
647 647
648 def _render_inline_run(self, run: Any, for_table: bool = False) -> str: 648 def _render_inline_run(self, run: Any, for_table: bool = False) -> str:
649 if isinstance(run, dict): 649 if isinstance(run, dict):
  650 + # 处理 inlineRun 类型:嵌套的 inlines 数组
  651 + if run.get("type") == "inlineRun":
  652 + inner_inlines = run.get("inlines") or []
  653 + outer_marks = run.get("marks") or []
  654 + # 递归渲染内部的 inlines
  655 + inner_text = self._render_inlines(inner_inlines, for_table=for_table)
  656 + # 应用外层的 marks
  657 + result = inner_text
  658 + for mark in outer_marks:
  659 + result = self._apply_mark(result, mark)
  660 + return result
650 text = run.get("text", "") 661 text = run.get("text", "")
651 marks = run.get("marks") or [] 662 marks = run.get("marks") or []
652 else: 663 else:
653 text = run if isinstance(run, str) else "" 664 text = run if isinstance(run, str) else ""
654 marks = [] 665 marks = []
  666 +
  667 + # 尝试检测并解析被错误序列化为字符串的 inlineRun JSON
  668 + if isinstance(text, str) and text.startswith('{"type": "inlineRun"'):
  669 + parsed = self._try_parse_inline_run_string(text)
  670 + if parsed:
  671 + return self._render_inline_run(parsed, for_table=for_table)
  672 +
655 result = self._escape_text(text, for_table=for_table) 673 result = self._escape_text(text, for_table=for_table)
656 for mark in marks: 674 for mark in marks:
657 if not isinstance(mark, dict): 675 if not isinstance(mark, dict):
@@ -683,6 +701,66 @@ class MarkdownRenderer: @@ -683,6 +701,66 @@ class MarkdownRenderer:
683 # 颜色/字体等非通用标记直接降级为纯文本 701 # 颜色/字体等非通用标记直接降级为纯文本
684 return result 702 return result
685 703
  704 + def _apply_mark(self, text: str, mark: Any) -> str:
  705 + """
  706 + 对文本应用单个 mark 格式。
  707 +
  708 + 用于处理 inlineRun 类型的外层 marks。
  709 + """
  710 + if not isinstance(mark, dict):
  711 + return text
  712 + mtype = mark.get("type")
  713 + if mtype == "bold":
  714 + return f"**{text}**"
  715 + elif mtype == "italic":
  716 + return f"*{text}*"
  717 + elif mtype == "underline":
  718 + return f"__{text}__"
  719 + elif mtype == "strike":
  720 + return f"~~{text}~~"
  721 + elif mtype == "code":
  722 + return f"`{text}`"
  723 + elif mtype == "link":
  724 + href = mark.get("href") or mark.get("value")
  725 + href = str(href) if href else ""
  726 + return f"[{text}]({href})" if href else text
  727 + elif mtype == "highlight":
  728 + return f"=={text}=="
  729 + elif mtype == "subscript":
  730 + return f"~{text}~"
  731 + elif mtype == "superscript":
  732 + return f"^{text}^"
  733 + elif mtype == "math":
  734 + latex = self._normalize_math(mark.get("value") or text)
  735 + return f"${latex}$" if latex else text
  736 + return text
  737 +
  738 + def _try_parse_inline_run_string(self, text: str) -> dict | None:
  739 + """
  740 + 尝试解析被错误序列化为字符串的 inlineRun JSON。
  741 +
  742 + 某些 LLM 生成的内容会将 inlineRun 结构意外地作为字符串
  743 + 存入 text 字段,本方法尝试识别并解析这种情况。
  744 +
  745 + 参数:
  746 + text: 可能包含 JSON 的字符串
  747 +
  748 + 返回:
  749 + dict | None: 解析成功返回 inlineRun 字典,否则返回 None
  750 + """
  751 + if not text or not isinstance(text, str):
  752 + return None
  753 + text = text.strip()
  754 + if not text.startswith('{"type": "inlineRun"'):
  755 + return None
  756 + try:
  757 + parsed = json.loads(text)
  758 + if isinstance(parsed, dict) and parsed.get("type") == "inlineRun":
  759 + return parsed
  760 + except json.JSONDecodeError:
  761 + pass
  762 + return None
  763 +
686 def _is_heading_duplicate(self, block: Dict[str, Any], chapter_title: str | None) -> bool: 764 def _is_heading_duplicate(self, block: Dict[str, Any], chapter_title: str | None) -> bool:
687 """判断首个heading是否与章节标题重复""" 765 """判断首个heading是否与章节标题重复"""
688 if not isinstance(block, dict) or block.get("type") != "heading": 766 if not isinstance(block, dict) or block.get("type") != "heading":