Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-11-27 10:29:27 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
5e9da9cfbf7d5bba68403398084d70b51cf430ae
5e9da9cf
1 parent
4e882560
Add Support for Rendering Various Inline and Block-level Mathematical Formulas
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
185 additions
and
11 deletions
ReportEngine/renderers/html_renderer.py
ReportEngine/renderers/math_to_svg.py
ReportEngine/renderers/pdf_renderer.py
ReportEngine/renderers/html_renderer.py
View file @
5e9da9c
...
...
@@ -1262,7 +1262,8 @@ class HTMLRenderer:
def
_render_math
(
self
,
block
:
Dict
[
str
,
Any
])
->
str
:
"""渲染数学公式,占位符交给外部MathJax或后处理"""
latex
=
self
.
_escape_html
(
block
.
get
(
"latex"
,
""
))
latex_raw
=
block
.
get
(
"latex"
,
""
)
latex
=
self
.
_escape_html
(
self
.
_normalize_latex_string
(
latex_raw
))
math_id
=
self
.
_escape_attr
(
block
.
get
(
"mathId"
,
""
))
if
block
.
get
(
"mathId"
)
else
""
id_attr
=
f
' data-math-id="{math_id}"'
if
math_id
else
""
return
f
'<div class="math-block"{id_attr}>$$ {latex} $$</div>'
...
...
@@ -1989,6 +1990,66 @@ class HTMLRenderer:
return
text_value
,
marks
@staticmethod
def
_normalize_latex_string
(
raw
:
Any
)
->
str
:
"""去除外层数学定界符,兼容 $...$、$$...$$、
\\
(
\\
)、
\\
[
\\
] 等格式"""
if
not
isinstance
(
raw
,
str
):
return
""
latex
=
raw
.
strip
()
patterns
=
[
r'^
\
$
\
$(.*)
\
$
\
$$'
,
r'^
\
$(.*)
\
$$'
,
r'^
\\
\
[(.*)
\\
\
]$'
,
r'^
\\
\
((.*)
\\
\
)$'
,
]
for
pat
in
patterns
:
m
=
re
.
match
(
pat
,
latex
,
re
.
DOTALL
)
if
m
:
latex
=
m
.
group
(
1
)
.
strip
()
break
return
latex
def
_render_text_with_inline_math
(
self
,
text
:
Any
,
math_id
:
str
|
list
|
None
=
None
)
->
str
|
None
:
"""
识别纯文本中的数学定界符并渲染为math-inline/math-block,提升兼容性。
- 支持 $...$、$$...$$、
\\
(
\\
)、
\\
[
\\
]。
- 若未检测到公式,返回None。
"""
if
not
isinstance
(
text
,
str
)
or
not
text
:
return
None
pattern
=
re
.
compile
(
r'(
\
$
\
$(.+?)
\
$
\
$|
\
$(.+?)
\
$|
\\
\
((.+?)
\\
\
)|
\\
\
[(.+?)
\\
\
])'
,
re
.
S
)
cursor
=
0
parts
:
List
[
str
]
=
[]
idx
=
0
id_iter
=
iter
(
math_id
)
if
isinstance
(
math_id
,
list
)
else
None
for
m
in
pattern
.
finditer
(
text
):
start
,
end
=
m
.
span
()
if
start
>
cursor
:
parts
.
append
(
self
.
_escape_html
(
text
[
cursor
:
start
]))
raw
=
next
(
g
for
g
in
m
.
groups
()[
1
:]
if
g
is
not
None
)
latex
=
self
.
_normalize_latex_string
(
raw
)
idx
+=
1
# 若已有math_id,直接使用,避免与SVG注入ID不一致;否则按局部序号生成
if
id_iter
:
mid
=
next
(
id_iter
,
f
"auto-math-{idx}"
)
else
:
mid
=
math_id
or
f
"auto-math-{idx}"
id_attr
=
f
' data-math-id="{self._escape_attr(mid)}"'
is_display
=
m
.
group
(
1
)
.
startswith
(
'$$'
)
or
m
.
group
(
1
)
.
startswith
(
'
\\
['
)
if
is_display
:
parts
.
append
(
f
'<div class="math-block"{id_attr}>$$ {self._escape_html(latex)} $$</div>'
)
else
:
parts
.
append
(
f
'<span class="math-inline"{id_attr}>
\\
( {self._escape_html(latex)}
\\
)</span>'
)
cursor
=
end
if
cursor
==
0
:
return
None
if
cursor
<
len
(
text
):
parts
.
append
(
self
.
_escape_html
(
text
[
cursor
:]))
return
""
.
join
(
parts
)
@staticmethod
def
_coerce_inline_payload
(
payload
:
Dict
[
str
,
Any
])
->
Dict
[
str
,
Any
]
|
None
:
"""尽力将字符串里的内联节点恢复为dict,修复渲染遗漏"""
if
not
isinstance
(
payload
,
dict
):
...
...
@@ -2013,12 +2074,19 @@ class HTMLRenderer:
text_value
,
marks
=
self
.
_normalize_inline_payload
(
run
)
math_mark
=
next
((
mark
for
mark
in
marks
if
mark
.
get
(
"type"
)
==
"math"
),
None
)
if
math_mark
:
latex
=
math_mark
.
get
(
"value"
)
latex
=
self
.
_normalize_latex_string
(
math_mark
.
get
(
"value"
)
)
if
not
isinstance
(
latex
,
str
)
or
not
latex
.
strip
():
latex
=
text_value
latex
=
self
.
_normalize_latex_string
(
text_value
)
math_id
=
self
.
_escape_attr
(
run
.
get
(
"mathId"
,
""
))
if
run
.
get
(
"mathId"
)
else
""
id_attr
=
f
' data-math-id="{math_id}"'
if
math_id
else
""
return
f
'<span class="math-inline"{id_attr}>
\\
( {self._escape_html(latex)}
\\
)</span>'
# 尝试从纯文本中提取数学公式(即便没有math mark)
math_id_hint
=
run
.
get
(
"mathIds"
)
or
run
.
get
(
"mathId"
)
mathified
=
self
.
_render_text_with_inline_math
(
text_value
,
math_id_hint
)
if
mathified
is
not
None
:
return
mathified
text
=
self
.
_escape_html
(
text_value
)
styles
:
List
[
str
]
=
[]
prefix
:
List
[
str
]
=
[]
...
...
ReportEngine/renderers/math_to_svg.py
View file @
5e9da9c
...
...
@@ -4,6 +4,7 @@ LaTeX 数学公式转 SVG 渲染器
"""
import
io
import
re
from
typing
import
Optional
import
matplotlib
import
matplotlib.pyplot
as
plt
...
...
@@ -40,8 +41,22 @@ class MathToSVG:
SVG 字符串,如果转换失败则返回 None
"""
try
:
# 清理 LaTeX 字符串
latex
=
latex
.
strip
()
# 清理 LaTeX 字符串,去除外层定界符,兼容 $...$ / $$...$$ / \\( \\) / \\[ \\]
latex
=
(
latex
or
""
)
.
strip
()
patterns
=
[
r'^
\
$
\
$(.*)
\
$
\
$$'
,
r'^
\
$(.*)
\
$$'
,
r'^
\\
\
[(.*)
\\
\
]$'
,
r'^
\\
\
((.*)
\\
\
)$'
,
]
for
pat
in
patterns
:
m
=
re
.
match
(
pat
,
latex
,
re
.
DOTALL
)
if
m
:
latex
=
m
.
group
(
1
)
.
strip
()
break
# 清理控制字符并做常见兼容
latex
=
re
.
sub
(
r'[
\
x00-
\
x1f
\
x7f]'
,
''
,
latex
)
latex
=
latex
.
replace
(
r'
\\
tfrac'
,
r'
\\
frac'
)
.
replace
(
r'
\\
dfrac'
,
r'
\\
frac'
)
if
not
latex
:
logger
.
warning
(
"空的 LaTeX 公式"
)
return
None
...
...
ReportEngine/renderers/pdf_renderer.py
View file @
5e9da9c
...
...
@@ -10,6 +10,7 @@ import copy
import
os
import
sys
import
io
import
re
from
pathlib
import
Path
from
typing
import
Any
,
Dict
from
datetime
import
datetime
...
...
@@ -544,23 +545,62 @@ class PDFRenderer:
continue
marks
=
run
.
get
(
'marks'
)
or
[]
math_mark
=
next
((
m
for
m
in
marks
if
m
.
get
(
'type'
)
==
'math'
),
None
)
if
not
math_mark
:
if
math_mark
:
# 仅单个math mark
raw
=
math_mark
.
get
(
'value'
)
or
run
.
get
(
'text'
)
or
''
latex
=
self
.
_normalize_latex
(
raw
)
is_display
=
bool
(
re
.
match
(
r'^
\
s*(
\
$
\
$|
\\
\
[)'
,
str
(
raw
)))
if
not
latex
:
continue
block_counter
[
0
]
+=
1
math_id
=
run
.
get
(
'mathId'
)
or
f
"math-inline-{block_counter[0]}"
run
[
'mathId'
]
=
math_id
try
:
svg_content
=
(
self
.
math_converter
.
convert_display_to_svg
(
latex
)
if
is_display
else
self
.
math_converter
.
convert_inline_to_svg
(
latex
)
)
if
svg_content
:
svg_map
[
math_id
]
=
svg_content
logger
.
debug
(
f
"公式 {math_id} 转换为SVG成功"
)
else
:
logger
.
warning
(
f
"公式 {math_id} 转换为SVG失败: {latex[:50]}..."
)
except
Exception
as
exc
:
logger
.
error
(
f
"转换内联公式 {latex[:50]}... 时出错: {exc}"
)
continue
latex
=
(
math_mark
.
get
(
'value'
)
or
run
.
get
(
'text'
)
or
''
)
.
strip
()
# 无math mark,尝试解析文本中的多个公式
text_val
=
run
.
get
(
'text'
)
if
not
isinstance
(
text_val
,
str
):
continue
segments
=
self
.
_find_all_math_in_text
(
text_val
)
if
not
segments
:
continue
ids_for_html
:
list
[
str
]
=
[]
for
idx
,
(
latex
,
is_display
)
in
enumerate
(
segments
,
start
=
1
):
if
not
latex
:
continue
block_counter
[
0
]
+=
1
math_id
=
f
"math-inline-{block_counter[0]}"
math_id
=
f
"auto-math-{block_counter[0]}"
ids_for_html
.
append
(
math_id
)
try
:
svg_content
=
self
.
math_converter
.
convert_inline_to_svg
(
latex
)
svg_content
=
(
self
.
math_converter
.
convert_display_to_svg
(
latex
)
if
is_display
else
self
.
math_converter
.
convert_inline_to_svg
(
latex
)
)
if
svg_content
:
svg_map
[
math_id
]
=
svg_content
run
[
'mathId'
]
=
math_id
logger
.
debug
(
f
"公式 {math_id} 转换为SVG成功"
)
else
:
logger
.
warning
(
f
"公式 {math_id} 转换为SVG失败: {latex[:50]}..."
)
except
Exception
as
exc
:
logger
.
error
(
f
"转换内联公式 {latex[:50]}... 时出错: {exc}"
)
if
ids_for_html
:
# 将ID列表写回run,便于HTML渲染时使用相同ID(顺序对应segments)
run
[
'mathIds'
]
=
ids_for_html
for
block
in
blocks
:
if
not
isinstance
(
block
,
dict
):
...
...
@@ -570,7 +610,7 @@ class PDFRenderer:
# 处理math类型
if
block_type
==
'math'
:
latex
=
block
.
get
(
'latex'
,
''
)
.
strip
(
)
latex
=
self
.
_normalize_latex
(
block
.
get
(
'latex'
,
''
)
)
if
latex
:
block_counter
[
0
]
+=
1
math_id
=
f
"math-block-{block_counter[0]}"
...
...
@@ -679,6 +719,57 @@ class PDFRenderer:
return
html
@staticmethod
def
_normalize_latex
(
raw
:
Any
)
->
str
:
"""去除外层数学定界符,兼容 $...$、$$...$$、
\\
(
\\
)、
\\
[
\\
] 等格式"""
if
not
isinstance
(
raw
,
str
):
return
""
latex
=
raw
.
strip
()
patterns
=
[
r'^
\
$
\
$(.*)
\
$
\
$$'
,
r'^
\
$(.*)
\
$$'
,
r'^
\\
\
[(.*)
\\
\
]$'
,
r'^
\\
\
((.*)
\\
\
)$'
,
]
for
pat
in
patterns
:
m
=
re
.
match
(
pat
,
latex
,
re
.
DOTALL
)
if
m
:
latex
=
m
.
group
(
1
)
.
strip
()
break
# 清理控制字符、防止mathtext解析失败
latex
=
re
.
sub
(
r'[
\
x00-
\
x1f
\
x7f]'
,
''
,
latex
)
# 常见兼容:\tfrac/\dfrac -> \frac
latex
=
latex
.
replace
(
r'
\
tfrac'
,
r'
\
frac'
)
.
replace
(
r'
\
dfrac'
,
r'
\
frac'
)
return
latex
@staticmethod
def
_find_first_math_in_text
(
text
:
Any
)
->
tuple
[
str
,
bool
]
|
None
:
"""从纯文本中提取首个数学片段,返回(内容, 是否display)"""
if
not
isinstance
(
text
,
str
):
return
None
pattern
=
re
.
compile
(
r'
\
$
\
$(.+?)
\
$
\
$|
\
$(.+?)
\
$|
\\
\
((.+?)
\\
\
)|
\\
\
[(.+?)
\\
\
]'
,
re
.
S
)
m
=
pattern
.
search
(
text
)
if
not
m
:
return
None
raw
=
next
(
g
for
g
in
m
.
groups
()
if
g
is
not
None
)
latex
=
raw
.
strip
()
is_display
=
bool
(
m
.
group
(
1
)
or
m
.
group
(
4
))
# $$ or \[ \]
return
latex
,
is_display
@staticmethod
def
_find_all_math_in_text
(
text
:
Any
)
->
list
[
tuple
[
str
,
bool
]]:
"""从纯文本中提取所有数学片段,返回[(内容, 是否display)]"""
if
not
isinstance
(
text
,
str
):
return
[]
pattern
=
re
.
compile
(
r'
\
$
\
$(.+?)
\
$
\
$|
\
$(.+?)
\
$|
\\
\
((.+?)
\\
\
)|
\\
\
[(.+?)
\\
\
]'
,
re
.
S
)
results
=
[]
for
m
in
pattern
.
finditer
(
text
):
raw
=
next
(
g
for
g
in
m
.
groups
()
if
g
is
not
None
)
latex
=
raw
.
strip
()
is_display
=
bool
(
m
.
group
(
1
)
or
m
.
group
(
4
))
results
.
append
((
latex
,
is_display
))
return
results
def
_inject_wordcloud_images
(
self
,
html
:
str
,
img_map
:
Dict
[
str
,
str
])
->
str
:
"""
将词云PNG data URI注入HTML,替换对应canvas
...
...
Please
register
or
login
to post a comment