Optimize the Rendering of Inline Formulas, Subscripts and Superscripts, Bubble C…
…harts, and Horizontal Bars
Showing
3 changed files
with
226 additions
and
57 deletions
| @@ -160,6 +160,19 @@ class ChartToSVGConverter: | @@ -160,6 +160,19 @@ class ChartToSVGConverter: | ||
| 160 | if props.get('type'): | 160 | if props.get('type'): |
| 161 | chart_type = props['type'] | 161 | chart_type = props['type'] |
| 162 | 162 | ||
| 163 | + # Chart.js v4已移除horizontalBar类型,这里自动降级为bar并设置横向坐标 | ||
| 164 | + horizontal_bar = False | ||
| 165 | + if chart_type and str(chart_type).lower() == 'horizontalbar': | ||
| 166 | + chart_type = 'bar' | ||
| 167 | + horizontal_bar = True | ||
| 168 | + | ||
| 169 | + # 支持通过indexAxis: 'y' 强制横向柱状图 | ||
| 170 | + if isinstance(props, dict): | ||
| 171 | + options = props.get('options') or {} | ||
| 172 | + index_axis = (options.get('indexAxis') or props.get('indexAxis') or '').lower() | ||
| 173 | + if index_axis == 'y': | ||
| 174 | + horizontal_bar = True | ||
| 175 | + | ||
| 163 | # 提取数据 | 176 | # 提取数据 |
| 164 | data = widget_data.get('data', {}) | 177 | data = widget_data.get('data', {}) |
| 165 | if not data: | 178 | if not data: |
| @@ -172,10 +185,16 @@ class ChartToSVGConverter: | @@ -172,10 +185,16 @@ class ChartToSVGConverter: | ||
| 172 | logger.debug("检测到词云图表,跳过chart_to_svg转换") | 185 | logger.debug("检测到词云图表,跳过chart_to_svg转换") |
| 173 | return None | 186 | return None |
| 174 | 187 | ||
| 175 | - render_method = getattr(self, f'_render_{chart_type}', None) | ||
| 176 | - if not render_method: | ||
| 177 | - logger.warning(f"不支持的图表类型: {chart_type}") | ||
| 178 | - return None | 188 | + # 分派渲染方法,特殊处理横向柱状图 |
| 189 | + if chart_type == 'bar': | ||
| 190 | + return self._render_bar(data, props, width, height, dpi, horizontal=horizontal_bar) | ||
| 191 | + elif chart_type == 'bubble': | ||
| 192 | + return self._render_bubble(data, props, width, height, dpi) | ||
| 193 | + else: | ||
| 194 | + render_method = getattr(self, f'_render_{chart_type}', None) | ||
| 195 | + if not render_method: | ||
| 196 | + logger.warning(f"不支持的图表类型: {chart_type}") | ||
| 197 | + return None | ||
| 179 | 198 | ||
| 180 | # 创建图表并转换为SVG | 199 | # 创建图表并转换为SVG |
| 181 | return render_method(data, props, width, height, dpi) | 200 | return render_method(data, props, width, height, dpi) |
| @@ -687,9 +706,10 @@ class ChartToSVGConverter: | @@ -687,9 +706,10 @@ class ChartToSVGConverter: | ||
| 687 | props: Dict[str, Any], | 706 | props: Dict[str, Any], |
| 688 | width: int, | 707 | width: int, |
| 689 | height: int, | 708 | height: int, |
| 690 | - dpi: int | 709 | + dpi: int, |
| 710 | + horizontal: bool = False | ||
| 691 | ) -> Optional[str]: | 711 | ) -> Optional[str]: |
| 692 | - """渲染柱状图""" | 712 | + """渲染柱状图(支持横向barh)""" |
| 693 | try: | 713 | try: |
| 694 | labels = data.get('labels', []) | 714 | labels = data.get('labels', []) |
| 695 | datasets = data.get('datasets', []) | 715 | datasets = data.get('datasets', []) |
| @@ -703,42 +723,145 @@ class ChartToSVGConverter: | @@ -703,42 +723,145 @@ class ChartToSVGConverter: | ||
| 703 | colors = self._get_colors(datasets) | 723 | colors = self._get_colors(datasets) |
| 704 | 724 | ||
| 705 | # 计算柱子位置 | 725 | # 计算柱子位置 |
| 706 | - x = np.arange(len(labels)) | 726 | + positions = np.arange(len(labels)) |
| 707 | width_bar = 0.8 / len(datasets) if len(datasets) > 1 else 0.6 | 727 | width_bar = 0.8 / len(datasets) if len(datasets) > 1 else 0.6 |
| 708 | 728 | ||
| 709 | - # 绘制每个数据系列 | 729 | + # 横向/纵向绘制 |
| 710 | for i, dataset in enumerate(datasets): | 730 | for i, dataset in enumerate(datasets): |
| 711 | dataset_data = dataset.get('data', []) | 731 | dataset_data = dataset.get('data', []) |
| 712 | label = dataset.get('label', f'系列{i+1}') | 732 | label = dataset.get('label', f'系列{i+1}') |
| 713 | color = colors[i] | 733 | color = colors[i] |
| 714 | 734 | ||
| 715 | offset = (i - len(datasets)/2 + 0.5) * width_bar | 735 | offset = (i - len(datasets)/2 + 0.5) * width_bar |
| 716 | - ax.bar( | ||
| 717 | - x + offset, | ||
| 718 | - dataset_data, | ||
| 719 | - width_bar, | 736 | + |
| 737 | + if horizontal: | ||
| 738 | + ax.barh( | ||
| 739 | + positions + offset, | ||
| 740 | + dataset_data, | ||
| 741 | + height=width_bar, | ||
| 742 | + label=label, | ||
| 743 | + color=color, | ||
| 744 | + alpha=0.8, | ||
| 745 | + edgecolor='white', | ||
| 746 | + linewidth=0.5 | ||
| 747 | + ) | ||
| 748 | + else: | ||
| 749 | + ax.bar( | ||
| 750 | + positions + offset, | ||
| 751 | + dataset_data, | ||
| 752 | + width_bar, | ||
| 753 | + label=label, | ||
| 754 | + color=color, | ||
| 755 | + alpha=0.8, | ||
| 756 | + edgecolor='white', | ||
| 757 | + linewidth=0.5 | ||
| 758 | + ) | ||
| 759 | + | ||
| 760 | + # 轴标签/网格 | ||
| 761 | + if horizontal: | ||
| 762 | + ax.set_yticks(positions) | ||
| 763 | + ax.set_yticklabels(labels) | ||
| 764 | + ax.invert_yaxis() # 与Chart.js横向排列保持一致 | ||
| 765 | + ax.grid(True, alpha=0.3, linestyle='--', axis='x') | ||
| 766 | + else: | ||
| 767 | + ax.set_xticks(positions) | ||
| 768 | + ax.set_xticklabels(labels, rotation=45, ha='right') | ||
| 769 | + ax.grid(True, alpha=0.3, linestyle='--', axis='y') | ||
| 770 | + | ||
| 771 | + # 显示图例 | ||
| 772 | + if len(datasets) > 1: | ||
| 773 | + ax.legend(loc='best', framealpha=0.9) | ||
| 774 | + | ||
| 775 | + return self._figure_to_svg(fig) | ||
| 776 | + | ||
| 777 | + except Exception as e: | ||
| 778 | + logger.error(f"渲染柱状图失败: {e}") | ||
| 779 | + return None | ||
| 780 | + | ||
| 781 | + def _render_bubble( | ||
| 782 | + self, | ||
| 783 | + data: Dict[str, Any], | ||
| 784 | + props: Dict[str, Any], | ||
| 785 | + width: int, | ||
| 786 | + height: int, | ||
| 787 | + dpi: int | ||
| 788 | + ) -> Optional[str]: | ||
| 789 | + """渲染气泡图""" | ||
| 790 | + try: | ||
| 791 | + datasets = data.get('datasets', []) | ||
| 792 | + if not datasets: | ||
| 793 | + return None | ||
| 794 | + | ||
| 795 | + title = props.get('title') | ||
| 796 | + fig, ax = self._create_figure(width, height, dpi, title) | ||
| 797 | + colors = self._get_colors(datasets) | ||
| 798 | + | ||
| 799 | + def _safe_radius(raw) -> float: | ||
| 800 | + try: | ||
| 801 | + val = float(raw) | ||
| 802 | + return max(val, 0.5) | ||
| 803 | + except Exception: | ||
| 804 | + return 1.0 | ||
| 805 | + | ||
| 806 | + all_x: list[float] = [] | ||
| 807 | + all_y: list[float] = [] | ||
| 808 | + max_r: float = 0.0 | ||
| 809 | + | ||
| 810 | + for i, dataset in enumerate(datasets): | ||
| 811 | + points = dataset.get('data', []) | ||
| 812 | + label = dataset.get('label', f'系列{i+1}') | ||
| 813 | + color = colors[i] | ||
| 814 | + | ||
| 815 | + if points and isinstance(points[0], dict): | ||
| 816 | + xs = [p.get('x', 0) for p in points] | ||
| 817 | + ys = [p.get('y', 0) for p in points] | ||
| 818 | + rs = [_safe_radius(p.get('r', 1)) for p in points] | ||
| 819 | + else: | ||
| 820 | + xs = list(range(len(points))) | ||
| 821 | + ys = points | ||
| 822 | + rs = [1.0 for _ in points] | ||
| 823 | + | ||
| 824 | + all_x.extend(xs) | ||
| 825 | + all_y.extend(ys) | ||
| 826 | + if rs: | ||
| 827 | + max_r = max(max_r, max(rs)) | ||
| 828 | + | ||
| 829 | + # 适度放大半径,近似Chart.js像素尺寸(动态尺度,避免过大遮挡) | ||
| 830 | + size_scale = 8.0 if max_r <= 20 else 6.5 | ||
| 831 | + sizes = [(r * size_scale) ** 2 for r in rs] | ||
| 832 | + | ||
| 833 | + ax.scatter( | ||
| 834 | + xs, | ||
| 835 | + ys, | ||
| 836 | + s=sizes, | ||
| 720 | label=label, | 837 | label=label, |
| 721 | color=color, | 838 | color=color, |
| 722 | - alpha=0.8, | ||
| 723 | - edgecolor='white', | ||
| 724 | - linewidth=0.5 | 839 | + alpha=0.45, |
| 840 | + edgecolors='white', | ||
| 841 | + linewidth=0.6 | ||
| 725 | ) | 842 | ) |
| 726 | 843 | ||
| 727 | - # 设置x轴标签 | ||
| 728 | - ax.set_xticks(x) | ||
| 729 | - ax.set_xticklabels(labels, rotation=45, ha='right') | ||
| 730 | - | ||
| 731 | - # 显示图例 | ||
| 732 | if len(datasets) > 1: | 844 | if len(datasets) > 1: |
| 733 | ax.legend(loc='best', framealpha=0.9) | 845 | ax.legend(loc='best', framealpha=0.9) |
| 734 | 846 | ||
| 735 | - # 网格 | ||
| 736 | - ax.grid(True, alpha=0.3, linestyle='--', axis='y') | 847 | + # 适度留白,避免大气泡被裁切 |
| 848 | + if all_x and all_y: | ||
| 849 | + x_min, x_max = min(all_x), max(all_x) | ||
| 850 | + y_min, y_max = min(all_y), max(all_y) | ||
| 851 | + x_span = max(x_max - x_min, 1e-6) | ||
| 852 | + y_span = max(y_max - y_min, 1e-6) | ||
| 853 | + pad_x = max(x_span * 0.12, max_r * 1.2) | ||
| 854 | + pad_y = max(y_span * 0.12, max_r * 1.2) | ||
| 855 | + ax.set_xlim(x_min - pad_x, x_max + pad_x) | ||
| 856 | + ax.set_ylim(y_min - pad_y, y_max + pad_y) | ||
| 857 | + # 额外安全边距 | ||
| 858 | + ax.margins(x=0.05, y=0.05) | ||
| 737 | 859 | ||
| 860 | + ax.grid(True, alpha=0.3, linestyle='--') | ||
| 738 | return self._figure_to_svg(fig) | 861 | return self._figure_to_svg(fig) |
| 739 | 862 | ||
| 740 | except Exception as e: | 863 | except Exception as e: |
| 741 | - logger.error(f"渲染柱状图失败: {e}") | 864 | + logger.error(f"渲染气泡图失败: {e}", exc_info=True) |
| 742 | return None | 865 | return None |
| 743 | 866 | ||
| 744 | def _render_pie( | 867 | def _render_pie( |
| @@ -1263,7 +1263,9 @@ class HTMLRenderer: | @@ -1263,7 +1263,9 @@ class HTMLRenderer: | ||
| 1263 | def _render_math(self, block: Dict[str, Any]) -> str: | 1263 | def _render_math(self, block: Dict[str, Any]) -> str: |
| 1264 | """渲染数学公式,占位符交给外部MathJax或后处理""" | 1264 | """渲染数学公式,占位符交给外部MathJax或后处理""" |
| 1265 | latex = self._escape_html(block.get("latex", "")) | 1265 | latex = self._escape_html(block.get("latex", "")) |
| 1266 | - return f'<div class="math-block">$$ {latex} $$</div>' | 1266 | + math_id = self._escape_attr(block.get("mathId", "")) if block.get("mathId") else "" |
| 1267 | + id_attr = f' data-math-id="{math_id}"' if math_id else "" | ||
| 1268 | + return f'<div class="math-block"{id_attr}>$$ {latex} $$</div>' | ||
| 1267 | 1269 | ||
| 1268 | def _render_figure(self, block: Dict[str, Any]) -> str: | 1270 | def _render_figure(self, block: Dict[str, Any]) -> str: |
| 1269 | """根据新规范默认不渲染外部图片,改为友好提示""" | 1271 | """根据新规范默认不渲染外部图片,改为友好提示""" |
| @@ -2012,7 +2014,9 @@ class HTMLRenderer: | @@ -2012,7 +2014,9 @@ class HTMLRenderer: | ||
| 2012 | latex = math_mark.get("value") | 2014 | latex = math_mark.get("value") |
| 2013 | if not isinstance(latex, str) or not latex.strip(): | 2015 | if not isinstance(latex, str) or not latex.strip(): |
| 2014 | latex = text_value | 2016 | latex = text_value |
| 2015 | - return f'<span class="math-inline">\\( {self._escape_html(latex)} \\)</span>' | 2017 | + math_id = self._escape_attr(run.get("mathId", "")) if run.get("mathId") else "" |
| 2018 | + id_attr = f' data-math-id="{math_id}"' if math_id else "" | ||
| 2019 | + return f'<span class="math-inline"{id_attr}>\\( {self._escape_html(latex)} \\)</span>' | ||
| 2016 | text = self._escape_html(text_value) | 2020 | text = self._escape_html(text_value) |
| 2017 | styles: List[str] = [] | 2021 | styles: List[str] = [] |
| 2018 | prefix: List[str] = [] | 2022 | prefix: List[str] = [] |
| @@ -535,6 +535,33 @@ class PDFRenderer: | @@ -535,6 +535,33 @@ class PDFRenderer: | ||
| 535 | if block_counter is None: | 535 | if block_counter is None: |
| 536 | block_counter = [0] | 536 | block_counter = [0] |
| 537 | 537 | ||
| 538 | + def _extract_inline_math_from_inlines(inlines: list): | ||
| 539 | + """从段落内联节点中提取数学公式""" | ||
| 540 | + if not isinstance(inlines, list): | ||
| 541 | + return | ||
| 542 | + for run in inlines: | ||
| 543 | + if not isinstance(run, dict): | ||
| 544 | + continue | ||
| 545 | + marks = run.get('marks') or [] | ||
| 546 | + math_mark = next((m for m in marks if m.get('type') == 'math'), None) | ||
| 547 | + if not math_mark: | ||
| 548 | + continue | ||
| 549 | + latex = (math_mark.get('value') or run.get('text') or '').strip() | ||
| 550 | + if not latex: | ||
| 551 | + continue | ||
| 552 | + block_counter[0] += 1 | ||
| 553 | + math_id = f"math-inline-{block_counter[0]}" | ||
| 554 | + try: | ||
| 555 | + svg_content = self.math_converter.convert_inline_to_svg(latex) | ||
| 556 | + if svg_content: | ||
| 557 | + svg_map[math_id] = svg_content | ||
| 558 | + run['mathId'] = math_id | ||
| 559 | + logger.debug(f"公式 {math_id} 转换为SVG成功") | ||
| 560 | + else: | ||
| 561 | + logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...") | ||
| 562 | + except Exception as exc: | ||
| 563 | + logger.error(f"转换内联公式 {latex[:50]}... 时出错: {exc}") | ||
| 564 | + | ||
| 538 | for block in blocks: | 565 | for block in blocks: |
| 539 | if not isinstance(block, dict): | 566 | if not isinstance(block, dict): |
| 540 | continue | 567 | continue |
| @@ -547,7 +574,6 @@ class PDFRenderer: | @@ -547,7 +574,6 @@ class PDFRenderer: | ||
| 547 | if latex: | 574 | if latex: |
| 548 | block_counter[0] += 1 | 575 | block_counter[0] += 1 |
| 549 | math_id = f"math-block-{block_counter[0]}" | 576 | math_id = f"math-block-{block_counter[0]}" |
| 550 | - | ||
| 551 | try: | 577 | try: |
| 552 | svg_content = self.math_converter.convert_display_to_svg(latex) | 578 | svg_content = self.math_converter.convert_display_to_svg(latex) |
| 553 | if svg_content: | 579 | if svg_content: |
| @@ -559,6 +585,11 @@ class PDFRenderer: | @@ -559,6 +585,11 @@ class PDFRenderer: | ||
| 559 | logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...") | 585 | logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...") |
| 560 | except Exception as e: | 586 | except Exception as e: |
| 561 | logger.error(f"转换公式 {latex[:50]}... 时出错: {e}") | 587 | logger.error(f"转换公式 {latex[:50]}... 时出错: {e}") |
| 588 | + else: | ||
| 589 | + # 提取段落、表格等内部的内联公式 | ||
| 590 | + inlines = block.get('inlines') | ||
| 591 | + if inlines: | ||
| 592 | + _extract_inline_math_from_inlines(inlines) | ||
| 562 | 593 | ||
| 563 | # 递归处理嵌套的blocks | 594 | # 递归处理嵌套的blocks |
| 564 | nested_blocks = block.get('blocks') | 595 | nested_blocks = block.get('blocks') |
| @@ -614,9 +645,8 @@ class PDFRenderer: | @@ -614,9 +645,8 @@ class PDFRenderer: | ||
| 614 | # 创建SVG容器HTML | 645 | # 创建SVG容器HTML |
| 615 | svg_html = f'<div class="chart-svg-container">{svg_content}</div>' | 646 | svg_html = f'<div class="chart-svg-container">{svg_content}</div>' |
| 616 | 647 | ||
| 617 | - # 查找包含此widgetId的配置脚本 | ||
| 618 | - # 格式: <script type="application/json" id="chart-config-N">{"widgetId":"widget_id",...}</script> | ||
| 619 | - config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>\s*\{{[^}}]*"widgetId"\s*:\s*"{re.escape(widget_id)}"[^}}]*\}}' | 648 | + # 查找包含此widgetId的配置脚本(限制在同一个</script>内,避免跨标签误配) |
| 649 | + config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>(?:(?!</script>).)*?"widgetId"\s*:\s*"{re.escape(widget_id)}"(?:(?!</script>).)*?</script>' | ||
| 620 | match = re.search(config_pattern, html, re.DOTALL) | 650 | match = re.search(config_pattern, html, re.DOTALL) |
| 621 | 651 | ||
| 622 | if match: | 652 | if match: |
| @@ -627,8 +657,11 @@ class PDFRenderer: | @@ -627,8 +657,11 @@ class PDFRenderer: | ||
| 627 | canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>' | 657 | canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>' |
| 628 | 658 | ||
| 629 | # 【修复】替换canvas为SVG,使用lambda避免反斜杠转义问题 | 659 | # 【修复】替换canvas为SVG,使用lambda避免反斜杠转义问题 |
| 630 | - html = re.sub(canvas_pattern, lambda m: svg_html, html) | ||
| 631 | - logger.debug(f"已替换图表 {widget_id} 的canvas为SVG") | 660 | + html, replaced = re.subn(canvas_pattern, lambda m: svg_html, html, count=1) |
| 661 | + if replaced: | ||
| 662 | + logger.debug(f"已替换图表 {widget_id} 的canvas为SVG") | ||
| 663 | + else: | ||
| 664 | + logger.warning(f"未找到图表 {widget_id} 的canvas进行替换") | ||
| 632 | 665 | ||
| 633 | # 将对应fallback标记为隐藏,避免PDF中出现重复表格 | 666 | # 将对应fallback标记为隐藏,避免PDF中出现重复表格 |
| 634 | fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>' | 667 | fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>' |
| @@ -661,7 +694,7 @@ class PDFRenderer: | @@ -661,7 +694,7 @@ class PDFRenderer: | ||
| 661 | f'</div>' | 694 | f'</div>' |
| 662 | ) | 695 | ) |
| 663 | 696 | ||
| 664 | - config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>\s*\{{[^}}]*"widgetId"\s*:\s*"{re.escape(widget_id)}"[^}}]*\}}' | 697 | + config_pattern = rf'<script[^>]+id="([^"]+)"[^>]*>(?:(?!</script>).)*?"widgetId"\s*:\s*"{re.escape(widget_id)}"(?:(?!</script>).)*?</script>' |
| 665 | match = re.search(config_pattern, html, re.DOTALL) | 698 | match = re.search(config_pattern, html, re.DOTALL) |
| 666 | if not match: | 699 | if not match: |
| 667 | logger.debug(f"未找到词云 {widget_id} 的配置脚本,跳过注入") | 700 | logger.debug(f"未找到词云 {widget_id} 的配置脚本,跳过注入") |
| @@ -670,8 +703,11 @@ class PDFRenderer: | @@ -670,8 +703,11 @@ class PDFRenderer: | ||
| 670 | config_id = match.group(1) | 703 | config_id = match.group(1) |
| 671 | canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>' | 704 | canvas_pattern = rf'<canvas[^>]+data-config-id="{re.escape(config_id)}"[^>]*></canvas>' |
| 672 | 705 | ||
| 673 | - html = re.sub(canvas_pattern, lambda m: img_html, html) | ||
| 674 | - logger.debug(f"已替换词云 {widget_id} 的canvas为PNG图片") | 706 | + html, replaced = re.subn(canvas_pattern, lambda m: img_html, html, count=1) |
| 707 | + if replaced: | ||
| 708 | + logger.debug(f"已替换词云 {widget_id} 的canvas为PNG图片") | ||
| 709 | + else: | ||
| 710 | + logger.warning(f"未找到词云 {widget_id} 的canvas进行替换") | ||
| 675 | 711 | ||
| 676 | fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>' | 712 | fallback_pattern = rf'<div class="chart-fallback"([^>]*data-widget-id="{re.escape(widget_id)}"[^>]*)>' |
| 677 | 713 | ||
| @@ -701,32 +737,40 @@ class PDFRenderer: | @@ -701,32 +737,40 @@ class PDFRenderer: | ||
| 701 | 737 | ||
| 702 | import re | 738 | import re |
| 703 | 739 | ||
| 704 | - # 为每个math block查找对应的div并替换为SVG | 740 | + # 优先替换内联公式,再替换块级公式,保持顺序一致 |
| 705 | for math_id, svg_content in svg_map.items(): | 741 | for math_id, svg_content in svg_map.items(): |
| 706 | # 清理SVG内容(移除XML声明,因为SVG将嵌入HTML) | 742 | # 清理SVG内容(移除XML声明,因为SVG将嵌入HTML) |
| 707 | svg_content = re.sub(r'<\?xml[^>]+\?>', '', svg_content) | 743 | svg_content = re.sub(r'<\?xml[^>]+\?>', '', svg_content) |
| 708 | svg_content = re.sub(r'<!DOCTYPE[^>]+>', '', svg_content) | 744 | svg_content = re.sub(r'<!DOCTYPE[^>]+>', '', svg_content) |
| 709 | svg_content = svg_content.strip() | 745 | svg_content = svg_content.strip() |
| 710 | 746 | ||
| 711 | - # 创建SVG容器HTML | ||
| 712 | - svg_html = f'<div class="math-svg-container">{svg_content}</div>' | ||
| 713 | - | ||
| 714 | - # 查找对应的math-block div | ||
| 715 | - # 格式: <div class="math-block">$$ latex $$</div> | ||
| 716 | - # 我们需要找到包含特定LaTeX内容的div | ||
| 717 | - # 但由于我们在转换时已经给block添加了mathId,我们可以用另一种方式 | 747 | + svg_block_html = f'<div class="math-svg-container">{svg_content}</div>' |
| 748 | + svg_inline_html = f'<span class="math-svg-inline">{svg_content}</span>' | ||
| 718 | 749 | ||
| 719 | - # 方案:在HTML渲染器中为math-block添加data-math-id属性 | ||
| 720 | - # 但这需要修改HTMLRenderer,暂时我们使用更简单的方法: | ||
| 721 | - # 按顺序替换所有math-block | ||
| 722 | - | ||
| 723 | - # 暂时使用简单的替换方案 | ||
| 724 | - # 找到第一个math-block div并替换 | ||
| 725 | - math_block_pattern = r'<div class="math-block">\$\$[^$]*\$\$</div>' | ||
| 726 | - # 【修复】使用lambda函数避免re.sub将SVG内容中的反斜杠解释为转义序列 | ||
| 727 | - # lambda函数中的返回值会被当作字面字符串,不会进行转义处理 | ||
| 728 | - html = re.sub(math_block_pattern, lambda m: svg_html, html, count=1) | ||
| 729 | - logger.debug(f"已替换公式 {math_id} 为SVG") | 750 | + replaced = False |
| 751 | + # 优先按 data-math-id 精确替换 | ||
| 752 | + inline_pattern = rf'<span class="math-inline"[^>]*data-math-id="{re.escape(math_id)}"[^>]*>.*?</span>' | ||
| 753 | + if re.search(inline_pattern, html, re.DOTALL): | ||
| 754 | + html = re.sub(inline_pattern, lambda m: svg_inline_html, html, count=1) | ||
| 755 | + replaced = True | ||
| 756 | + else: | ||
| 757 | + block_pattern = rf'<div class="math-block"[^>]*data-math-id="{re.escape(math_id)}"[^>]*>.*?</div>' | ||
| 758 | + if re.search(block_pattern, html, re.DOTALL): | ||
| 759 | + html = re.sub(block_pattern, lambda m: svg_block_html, html, count=1) | ||
| 760 | + replaced = True | ||
| 761 | + | ||
| 762 | + # 如果没有找到特定ID,按出现顺序兜底替换 | ||
| 763 | + if not replaced: | ||
| 764 | + html, sub_inline = re.subn(r'<span class="math-inline">[^<]*</span>', lambda m: svg_inline_html, html, count=1) | ||
| 765 | + if sub_inline: | ||
| 766 | + replaced = True | ||
| 767 | + else: | ||
| 768 | + html, sub_block = re.subn(r'<div class="math-block">\$\$[^$]*\$\$</div>', lambda m: svg_block_html, html, count=1) | ||
| 769 | + if sub_block: | ||
| 770 | + replaced = True | ||
| 771 | + | ||
| 772 | + if replaced: | ||
| 773 | + logger.debug(f"已替换公式 {math_id} 为SVG") | ||
| 730 | 774 | ||
| 731 | return html | 775 | return html |
| 732 | 776 | ||
| @@ -787,10 +831,8 @@ class PDFRenderer: | @@ -787,10 +831,8 @@ class PDFRenderer: | ||
| 787 | logger.info("开始转换数学公式为SVG矢量图形...") | 831 | logger.info("开始转换数学公式为SVG矢量图形...") |
| 788 | math_svg_map = self._convert_math_to_svg(preprocessed_ir) | 832 | math_svg_map = self._convert_math_to_svg(preprocessed_ir) |
| 789 | 833 | ||
| 790 | - # 使用HTML渲染器生成基础HTML(使用原始IR,因为HTMLRenderer会自己修复) | ||
| 791 | - # 注意:这里仍使用原始document_ir,因为HTMLRenderer内部会进行相同的修复 | ||
| 792 | - # 这确保了HTML和SVG使用相同的修复逻辑 | ||
| 793 | - html = self.html_renderer.render(document_ir) | 834 | + # 使用HTML渲染器生成基础HTML(使用预处理后的IR,以便复用mathId等标记) |
| 835 | + html = self.html_renderer.render(preprocessed_ir) | ||
| 794 | 836 | ||
| 795 | # 注入图表SVG | 837 | # 注入图表SVG |
| 796 | if svg_map: | 838 | if svg_map: |
-
Please register or login to post a comment