马一丁

Optimize the Rendering of Inline Formulas, Subscripts and Superscripts, Bubble C…

…harts, and Horizontal Bars
@@ -160,6 +160,19 @@ class ChartToSVGConverter: @@ -160,6 +160,19 @@ class ChartToSVGConverter:
160 if props.get('type'): 160 if props.get('type'):
161 chart_type = props['type'] 161 chart_type = props['type']
162 162
  163 + # Chart.js v4已移除horizontalBar类型,这里自动降级为bar并设置横向坐标
  164 + horizontal_bar = False
  165 + if chart_type and str(chart_type).lower() == 'horizontalbar':
  166 + chart_type = 'bar'
  167 + horizontal_bar = True
  168 +
  169 + # 支持通过indexAxis: 'y' 强制横向柱状图
  170 + if isinstance(props, dict):
  171 + options = props.get('options') or {}
  172 + index_axis = (options.get('indexAxis') or props.get('indexAxis') or '').lower()
  173 + if index_axis == 'y':
  174 + horizontal_bar = True
  175 +
163 # 提取数据 176 # 提取数据
164 data = widget_data.get('data', {}) 177 data = widget_data.get('data', {})
165 if not data: 178 if not data:
@@ -172,6 +185,12 @@ class ChartToSVGConverter: @@ -172,6 +185,12 @@ class ChartToSVGConverter:
172 logger.debug("检测到词云图表,跳过chart_to_svg转换") 185 logger.debug("检测到词云图表,跳过chart_to_svg转换")
173 return None 186 return None
174 187
  188 + # 分派渲染方法,特殊处理横向柱状图
  189 + if chart_type == 'bar':
  190 + return self._render_bar(data, props, width, height, dpi, horizontal=horizontal_bar)
  191 + elif chart_type == 'bubble':
  192 + return self._render_bubble(data, props, width, height, dpi)
  193 + else:
175 render_method = getattr(self, f'_render_{chart_type}', None) 194 render_method = getattr(self, f'_render_{chart_type}', None)
176 if not render_method: 195 if not render_method:
177 logger.warning(f"不支持的图表类型: {chart_type}") 196 logger.warning(f"不支持的图表类型: {chart_type}")
@@ -687,9 +706,10 @@ class ChartToSVGConverter: @@ -687,9 +706,10 @@ class ChartToSVGConverter:
687 props: Dict[str, Any], 706 props: Dict[str, Any],
688 width: int, 707 width: int,
689 height: int, 708 height: int,
690 - dpi: int 709 + dpi: int,
  710 + horizontal: bool = False
691 ) -> Optional[str]: 711 ) -> Optional[str]:
692 - """渲染柱状图""" 712 + """渲染柱状图(支持横向barh)"""
693 try: 713 try:
694 labels = data.get('labels', []) 714 labels = data.get('labels', [])
695 datasets = data.get('datasets', []) 715 datasets = data.get('datasets', [])
@@ -703,18 +723,31 @@ class ChartToSVGConverter: @@ -703,18 +723,31 @@ class ChartToSVGConverter:
703 colors = self._get_colors(datasets) 723 colors = self._get_colors(datasets)
704 724
705 # 计算柱子位置 725 # 计算柱子位置
706 - x = np.arange(len(labels)) 726 + positions = np.arange(len(labels))
707 width_bar = 0.8 / len(datasets) if len(datasets) > 1 else 0.6 727 width_bar = 0.8 / len(datasets) if len(datasets) > 1 else 0.6
708 728
709 - # 绘制每个数据系列 729 + # 横向/纵向绘制
710 for i, dataset in enumerate(datasets): 730 for i, dataset in enumerate(datasets):
711 dataset_data = dataset.get('data', []) 731 dataset_data = dataset.get('data', [])
712 label = dataset.get('label', f'系列{i+1}') 732 label = dataset.get('label', f'系列{i+1}')
713 color = colors[i] 733 color = colors[i]
714 734
715 offset = (i - len(datasets)/2 + 0.5) * width_bar 735 offset = (i - len(datasets)/2 + 0.5) * width_bar
  736 +
  737 + if horizontal:
  738 + ax.barh(
  739 + positions + offset,
  740 + dataset_data,
  741 + height=width_bar,
  742 + label=label,
  743 + color=color,
  744 + alpha=0.8,
  745 + edgecolor='white',
  746 + linewidth=0.5
  747 + )
  748 + else:
716 ax.bar( 749 ax.bar(
717 - x + offset, 750 + positions + offset,
718 dataset_data, 751 dataset_data,
719 width_bar, 752 width_bar,
720 label=label, 753 label=label,
@@ -724,23 +757,113 @@ class ChartToSVGConverter: @@ -724,23 +757,113 @@ class ChartToSVGConverter:
724 linewidth=0.5 757 linewidth=0.5
725 ) 758 )
726 759
727 - # 设置x轴标签  
728 - ax.set_xticks(x) 760 + # 轴标签/网格
  761 + if horizontal:
  762 + ax.set_yticks(positions)
  763 + ax.set_yticklabels(labels)
  764 + ax.invert_yaxis() # 与Chart.js横向排列保持一致
  765 + ax.grid(True, alpha=0.3, linestyle='--', axis='x')
  766 + else:
  767 + ax.set_xticks(positions)
729 ax.set_xticklabels(labels, rotation=45, ha='right') 768 ax.set_xticklabels(labels, rotation=45, ha='right')
  769 + ax.grid(True, alpha=0.3, linestyle='--', axis='y')
730 770
731 # 显示图例 771 # 显示图例
732 if len(datasets) > 1: 772 if len(datasets) > 1:
733 ax.legend(loc='best', framealpha=0.9) 773 ax.legend(loc='best', framealpha=0.9)
734 774
735 - # 网格  
736 - ax.grid(True, alpha=0.3, linestyle='--', axis='y')  
737 -  
738 return self._figure_to_svg(fig) 775 return self._figure_to_svg(fig)
739 776
740 except Exception as e: 777 except Exception as e:
741 logger.error(f"渲染柱状图失败: {e}") 778 logger.error(f"渲染柱状图失败: {e}")
742 return None 779 return None
743 780
  781 + def _render_bubble(
  782 + self,
  783 + data: Dict[str, Any],
  784 + props: Dict[str, Any],
  785 + width: int,
  786 + height: int,
  787 + dpi: int
  788 + ) -> Optional[str]:
  789 + """渲染气泡图"""
  790 + try:
  791 + datasets = data.get('datasets', [])
  792 + if not datasets:
  793 + return None
  794 +
  795 + title = props.get('title')
  796 + fig, ax = self._create_figure(width, height, dpi, title)
  797 + colors = self._get_colors(datasets)
  798 +
  799 + def _safe_radius(raw) -> float:
  800 + try:
  801 + val = float(raw)
  802 + return max(val, 0.5)
  803 + except Exception:
  804 + return 1.0
  805 +
  806 + all_x: list[float] = []
  807 + all_y: list[float] = []
  808 + max_r: float = 0.0
  809 +
  810 + for i, dataset in enumerate(datasets):
  811 + points = dataset.get('data', [])
  812 + label = dataset.get('label', f'系列{i+1}')
  813 + color = colors[i]
  814 +
  815 + if points and isinstance(points[0], dict):
  816 + xs = [p.get('x', 0) for p in points]
  817 + ys = [p.get('y', 0) for p in points]
  818 + rs = [_safe_radius(p.get('r', 1)) for p in points]
  819 + else:
  820 + xs = list(range(len(points)))
  821 + ys = points
  822 + rs = [1.0 for _ in points]
  823 +
  824 + all_x.extend(xs)
  825 + all_y.extend(ys)
  826 + if rs:
  827 + max_r = max(max_r, max(rs))
  828 +
  829 + # 适度放大半径,近似Chart.js像素尺寸(动态尺度,避免过大遮挡)
  830 + size_scale = 8.0 if max_r <= 20 else 6.5
  831 + sizes = [(r * size_scale) ** 2 for r in rs]
  832 +
  833 + ax.scatter(
  834 + xs,
  835 + ys,
  836 + s=sizes,
  837 + label=label,
  838 + color=color,
  839 + alpha=0.45,
  840 + edgecolors='white',
  841 + linewidth=0.6
  842 + )
  843 +
  844 + if len(datasets) > 1:
  845 + ax.legend(loc='best', framealpha=0.9)
  846 +
  847 + # 适度留白,避免大气泡被裁切
  848 + if all_x and all_y:
  849 + x_min, x_max = min(all_x), max(all_x)
  850 + y_min, y_max = min(all_y), max(all_y)
  851 + x_span = max(x_max - x_min, 1e-6)
  852 + y_span = max(y_max - y_min, 1e-6)
  853 + pad_x = max(x_span * 0.12, max_r * 1.2)
  854 + pad_y = max(y_span * 0.12, max_r * 1.2)
  855 + ax.set_xlim(x_min - pad_x, x_max + pad_x)
  856 + ax.set_ylim(y_min - pad_y, y_max + pad_y)
  857 + # 额外安全边距
  858 + ax.margins(x=0.05, y=0.05)
  859 +
  860 + ax.grid(True, alpha=0.3, linestyle='--')
  861 + return self._figure_to_svg(fig)
  862 +
  863 + except Exception as e:
  864 + logger.error(f"渲染气泡图失败: {e}", exc_info=True)
  865 + return None
  866 +
744 def _render_pie( 867 def _render_pie(
745 self, 868 self,
746 data: Dict[str, Any], 869 data: Dict[str, Any],
@@ -1263,7 +1263,9 @@ class HTMLRenderer: @@ -1263,7 +1263,9 @@ class HTMLRenderer:
1263 def _render_math(self, block: Dict[str, Any]) -> str: 1263 def _render_math(self, block: Dict[str, Any]) -> str:
1264 """渲染数学公式,占位符交给外部MathJax或后处理""" 1264 """渲染数学公式,占位符交给外部MathJax或后处理"""
1265 latex = self._escape_html(block.get("latex", "")) 1265 latex = self._escape_html(block.get("latex", ""))
1266 - return f'<div class="math-block">$$ {latex} $$</div>' 1266 + math_id = self._escape_attr(block.get("mathId", "")) if block.get("mathId") else ""
  1267 + id_attr = f' data-math-id="{math_id}"' if math_id else ""
  1268 + return f'<div class="math-block"{id_attr}>$$ {latex} $$</div>'
1267 1269
1268 def _render_figure(self, block: Dict[str, Any]) -> str: 1270 def _render_figure(self, block: Dict[str, Any]) -> str:
1269 """根据新规范默认不渲染外部图片,改为友好提示""" 1271 """根据新规范默认不渲染外部图片,改为友好提示"""
@@ -2012,7 +2014,9 @@ class HTMLRenderer: @@ -2012,7 +2014,9 @@ class HTMLRenderer:
2012 latex = math_mark.get("value") 2014 latex = math_mark.get("value")
2013 if not isinstance(latex, str) or not latex.strip(): 2015 if not isinstance(latex, str) or not latex.strip():
2014 latex = text_value 2016 latex = text_value
2015 - return f'<span class="math-inline">\\( {self._escape_html(latex)} \\)</span>' 2017 + math_id = self._escape_attr(run.get("mathId", "")) if run.get("mathId") else ""
  2018 + id_attr = f' data-math-id="{math_id}"' if math_id else ""
  2019 + return f'<span class="math-inline"{id_attr}>\\( {self._escape_html(latex)} \\)</span>'
2016 text = self._escape_html(text_value) 2020 text = self._escape_html(text_value)
2017 styles: List[str] = [] 2021 styles: List[str] = []
2018 prefix: List[str] = [] 2022 prefix: List[str] = []
@@ -535,6 +535,33 @@ class PDFRenderer: @@ -535,6 +535,33 @@ class PDFRenderer:
535 if block_counter is None: 535 if block_counter is None:
536 block_counter = [0] 536 block_counter = [0]
537 537
  538 + def _extract_inline_math_from_inlines(inlines: list):
  539 + """从段落内联节点中提取数学公式"""
  540 + if not isinstance(inlines, list):
  541 + return
  542 + for run in inlines:
  543 + if not isinstance(run, dict):
  544 + continue
  545 + marks = run.get('marks') or []
  546 + math_mark = next((m for m in marks if m.get('type') == 'math'), None)
  547 + if not math_mark:
  548 + continue
  549 + latex = (math_mark.get('value') or run.get('text') or '').strip()
  550 + if not latex:
  551 + continue
  552 + block_counter[0] += 1
  553 + math_id = f"math-inline-{block_counter[0]}"
  554 + try:
  555 + svg_content = self.math_converter.convert_inline_to_svg(latex)
  556 + if svg_content:
  557 + svg_map[math_id] = svg_content
  558 + run['mathId'] = math_id
  559 + logger.debug(f"公式 {math_id} 转换为SVG成功")
  560 + else:
  561 + logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...")
  562 + except Exception as exc:
  563 + logger.error(f"转换内联公式 {latex[:50]}... 时出错: {exc}")
  564 +
538 for block in blocks: 565 for block in blocks:
539 if not isinstance(block, dict): 566 if not isinstance(block, dict):
540 continue 567 continue
@@ -547,7 +574,6 @@ class PDFRenderer: @@ -547,7 +574,6 @@ class PDFRenderer:
547 if latex: 574 if latex:
548 block_counter[0] += 1 575 block_counter[0] += 1
549 math_id = f"math-block-{block_counter[0]}" 576 math_id = f"math-block-{block_counter[0]}"
550 -  
551 try: 577 try:
552 svg_content = self.math_converter.convert_display_to_svg(latex) 578 svg_content = self.math_converter.convert_display_to_svg(latex)
553 if svg_content: 579 if svg_content:
@@ -559,6 +585,11 @@ class PDFRenderer: @@ -559,6 +585,11 @@ class PDFRenderer:
559 logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...") 585 logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...")
560 except Exception as e: 586 except Exception as e:
561 logger.error(f"转换公式 {latex[:50]}... 时出错: {e}") 587 logger.error(f"转换公式 {latex[:50]}... 时出错: {e}")
  588 + else:
  589 + # 提取段落、表格等内部的内联公式
  590 + inlines = block.get('inlines')
  591 + if inlines:
  592 + _extract_inline_math_from_inlines(inlines)
562 593
563 # 递归处理嵌套的blocks 594 # 递归处理嵌套的blocks
564 nested_blocks = block.get('blocks') 595 nested_blocks = block.get('blocks')
@@ -614,9 +645,8 @@ class PDFRenderer: @@ -614,9 +645,8 @@ class PDFRenderer:
614 # 创建SVG容器HTML 645 # 创建SVG容器HTML
615 svg_html = f'<div class="chart-svg-container">{svg_content}</div>' 646 svg_html = f'<div class="chart-svg-container">{svg_content}</div>'
616 647
617 - # 查找包含此widgetId的配置脚本  
618 - # 格式: <script type="application/json" id="chart-config-N">{"widgetId":"widget_id",...}</script>  
619 - config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>\s*\{{[^}}]*"widgetId"\s*:\s*"{re.escape(widget_id)}"[^}}]*\}}' 648 + # 查找包含此widgetId的配置脚本(限制在同一个</script>内,避免跨标签误配)
  649 + config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>(?:(?!</script>).)*?"widgetId"\s*:\s*"{re.escape(widget_id)}"(?:(?!</script>).)*?</script>'
620 match = re.search(config_pattern, html, re.DOTALL) 650 match = re.search(config_pattern, html, re.DOTALL)
621 651
622 if match: 652 if match:
@@ -627,8 +657,11 @@ class PDFRenderer: @@ -627,8 +657,11 @@ class PDFRenderer:
627 canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>' 657 canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>'
628 658
629 # 【修复】替换canvas为SVG,使用lambda避免反斜杠转义问题 659 # 【修复】替换canvas为SVG,使用lambda避免反斜杠转义问题
630 - html = re.sub(canvas_pattern, lambda m: svg_html, html) 660 + html, replaced = re.subn(canvas_pattern, lambda m: svg_html, html, count=1)
  661 + if replaced:
631 logger.debug(f"已替换图表 {widget_id} 的canvas为SVG") 662 logger.debug(f"已替换图表 {widget_id} 的canvas为SVG")
  663 + else:
  664 + logger.warning(f"未找到图表 {widget_id} 的canvas进行替换")
632 665
633 # 将对应fallback标记为隐藏,避免PDF中出现重复表格 666 # 将对应fallback标记为隐藏,避免PDF中出现重复表格
634 fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>' 667 fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>'
@@ -661,7 +694,7 @@ class PDFRenderer: @@ -661,7 +694,7 @@ class PDFRenderer:
661 f'</div>' 694 f'</div>'
662 ) 695 )
663 696
664 - config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>\s*\{{[^}}]*"widgetId"\s*:\s*"{re.escape(widget_id)}"[^}}]*\}}' 697 + config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>(?:(?!</script>).)*?"widgetId"\s*:\s*"{re.escape(widget_id)}"(?:(?!</script>).)*?</script>'
665 match = re.search(config_pattern, html, re.DOTALL) 698 match = re.search(config_pattern, html, re.DOTALL)
666 if not match: 699 if not match:
667 logger.debug(f"未找到词云 {widget_id} 的配置脚本,跳过注入") 700 logger.debug(f"未找到词云 {widget_id} 的配置脚本,跳过注入")
@@ -670,8 +703,11 @@ class PDFRenderer: @@ -670,8 +703,11 @@ class PDFRenderer:
670 config_id = match.group(1) 703 config_id = match.group(1)
671 canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>' 704 canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>'
672 705
673 - html = re.sub(canvas_pattern, lambda m: img_html, html) 706 + html, replaced = re.subn(canvas_pattern, lambda m: img_html, html, count=1)
  707 + if replaced:
674 logger.debug(f"已替换词云 {widget_id} 的canvas为PNG图片") 708 logger.debug(f"已替换词云 {widget_id} 的canvas为PNG图片")
  709 + else:
  710 + logger.warning(f"未找到词云 {widget_id} 的canvas进行替换")
675 711
676 fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>' 712 fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>'
677 713
@@ -701,31 +737,39 @@ class PDFRenderer: @@ -701,31 +737,39 @@ class PDFRenderer:
701 737
702 import re 738 import re
703 739
704 - # 为每个math block查找对应的div并替换为SVG 740 + # 优先替换内联公式,再替换块级公式,保持顺序一致
705 for math_id, svg_content in svg_map.items(): 741 for math_id, svg_content in svg_map.items():
706 # 清理SVG内容(移除XML声明,因为SVG将嵌入HTML) 742 # 清理SVG内容(移除XML声明,因为SVG将嵌入HTML)
707 svg_content = re.sub(r'<\?xml[^>]+\?>', '', svg_content) 743 svg_content = re.sub(r'<\?xml[^>]+\?>', '', svg_content)
708 svg_content = re.sub(r'<!DOCTYPE[^>]+>', '', svg_content) 744 svg_content = re.sub(r'<!DOCTYPE[^>]+>', '', svg_content)
709 svg_content = svg_content.strip() 745 svg_content = svg_content.strip()
710 746
711 - # 创建SVG容器HTML  
712 - svg_html = f'<div class="math-svg-container">{svg_content}</div>'  
713 -  
714 - # 查找对应的math-block div  
715 - # 格式: <div class="math-block">$$ latex $$</div>  
716 - # 我们需要找到包含特定LaTeX内容的div  
717 - # 但由于我们在转换时已经给block添加了mathId,我们可以用另一种方式  
718 -  
719 - # 方案:在HTML渲染器中为math-block添加data-math-id属性  
720 - # 但这需要修改HTMLRenderer,暂时我们使用更简单的方法:  
721 - # 按顺序替换所有math-block  
722 -  
723 - # 暂时使用简单的替换方案  
724 - # 找到第一个math-block div并替换  
725 - math_block_pattern = r'<div class="math-block">\$\$[^$]*\$\$</div>'  
726 - # 【修复】使用lambda函数避免re.sub将SVG内容中的反斜杠解释为转义序列  
727 - # lambda函数中的返回值会被当作字面字符串,不会进行转义处理  
728 - html = re.sub(math_block_pattern, lambda m: svg_html, html, count=1) 747 + svg_block_html = f'<div class="math-svg-container">{svg_content}</div>'
  748 + svg_inline_html = f'<span class="math-svg-inline">{svg_content}</span>'
  749 +
  750 + replaced = False
  751 + # 优先按 data-math-id 精确替换
  752 + inline_pattern = rf'<span class="math-inline"[^>]*data-math-id="{re.escape(math_id)}"[^>]*>.*?</span>'
  753 + if re.search(inline_pattern, html, re.DOTALL):
  754 + html = re.sub(inline_pattern, lambda m: svg_inline_html, html, count=1)
  755 + replaced = True
  756 + else:
  757 + block_pattern = rf'<div class="math-block"[^>]*data-math-id="{re.escape(math_id)}"[^>]*>.*?</div>'
  758 + if re.search(block_pattern, html, re.DOTALL):
  759 + html = re.sub(block_pattern, lambda m: svg_block_html, html, count=1)
  760 + replaced = True
  761 +
  762 + # 如果没有找到特定ID,按出现顺序兜底替换
  763 + if not replaced:
  764 + html, sub_inline = re.subn(r'<span class="math-inline">[^<]*</span>', lambda m: svg_inline_html, html, count=1)
  765 + if sub_inline:
  766 + replaced = True
  767 + else:
  768 + html, sub_block = re.subn(r'<div class="math-block">\$\$[^$]*\$\$</div>', lambda m: svg_block_html, html, count=1)
  769 + if sub_block:
  770 + replaced = True
  771 +
  772 + if replaced:
729 logger.debug(f"已替换公式 {math_id} 为SVG") 773 logger.debug(f"已替换公式 {math_id} 为SVG")
730 774
731 return html 775 return html
@@ -787,10 +831,8 @@ class PDFRenderer: @@ -787,10 +831,8 @@ class PDFRenderer:
787 logger.info("开始转换数学公式为SVG矢量图形...") 831 logger.info("开始转换数学公式为SVG矢量图形...")
788 math_svg_map = self._convert_math_to_svg(preprocessed_ir) 832 math_svg_map = self._convert_math_to_svg(preprocessed_ir)
789 833
790 - # 使用HTML渲染器生成基础HTML(使用原始IR,因为HTMLRenderer会自己修复)  
791 - # 注意:这里仍使用原始document_ir,因为HTMLRenderer内部会进行相同的修复  
792 - # 这确保了HTML和SVG使用相同的修复逻辑  
793 - html = self.html_renderer.render(document_ir) 834 + # 使用HTML渲染器生成基础HTML(使用预处理后的IR,以便复用mathId等标记)
  835 + html = self.html_renderer.render(preprocessed_ir)
794 836
795 # 注入图表SVG 837 # 注入图表SVG
796 if svg_map: 838 if svg_map: