Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-12-15 19:46:48 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
2327b13e93e7c0c178f1eb59832cb05b2381bd38
2327b13e
1 parent
997a3283
Avoid repeated icon repairs
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
242 additions
and
166 deletions
ReportEngine/renderers/html_renderer.py
ReportEngine/renderers/pdf_renderer.py
ReportEngine/renderers/html_renderer.py
View file @
2327b13
...
...
@@ -182,6 +182,18 @@ class HTMLRenderer:
self
.
_pdf_font_base64
=
""
return
self
.
_pdf_font_base64
def
_reset_chart_validation_stats
(
self
)
->
None
:
"""重置图表校验统计并清除失败计数标记"""
self
.
chart_validation_stats
=
{
'total'
:
0
,
'valid'
:
0
,
'repaired_locally'
:
0
,
'repaired_api'
:
0
,
'failed'
:
0
}
# 保留失败原因缓存,但重置本次渲染的计数
self
.
_chart_failure_recorded
=
set
()
def
_build_script_with_fallback
(
self
,
inline_code
:
str
,
...
...
@@ -267,6 +279,8 @@ class HTMLRenderer:
str: 可直接写入磁盘的完整HTML文档。
"""
self
.
document
=
document_ir
or
{}
# 先对图表做统一审查与修复,并将结果回写,供后续PDF/HTML共用
self
.
review_and_patch_document
(
self
.
document
,
reset_stats
=
True
)
self
.
widget_scripts
=
[]
self
.
chart_counter
=
0
self
.
heading_counter
=
0
...
...
@@ -282,17 +296,6 @@ class HTMLRenderer:
self
.
heading_label_map
=
self
.
_compute_heading_labels
(
self
.
chapters
)
self
.
toc_entries
=
self
.
_collect_toc_entries
(
self
.
chapters
)
# 重置图表验证统计
self
.
chart_validation_stats
=
{
'total'
:
0
,
'valid'
:
0
,
'repaired_locally'
:
0
,
'repaired_api'
:
0
,
'failed'
:
0
}
# 每次渲染重新统计失败计数,但保留失败原因,避免重复LLM调用
self
.
_chart_failure_recorded
=
set
()
metadata
=
self
.
metadata
theme_tokens
=
metadata
.
get
(
"themeTokens"
)
or
self
.
document
.
get
(
"themeTokens"
,
{})
title
=
metadata
.
get
(
"title"
)
or
metadata
.
get
(
"query"
)
or
"智能舆情报告"
...
...
@@ -2087,6 +2090,31 @@ class HTMLRenderer:
if
cache_key
:
self
.
_chart_failure_recorded
.
add
(
cache_key
)
def
_apply_cached_review_stats
(
self
,
block
:
Dict
[
str
,
Any
])
->
None
:
"""
在已审查过的图表上重新累计统计信息,避免重复修复。
当渲染流程重置了统计但图表已经审查过(_chart_reviewed=True),
直接根据记录的状态累加各项计数,防止再次触发 ChartRepairer。
"""
if
not
isinstance
(
block
,
dict
):
return
status
=
block
.
get
(
"_chart_review_status"
)
or
"valid"
method
=
(
block
.
get
(
"_chart_review_method"
)
or
"none"
)
.
lower
()
cache_key
=
self
.
_chart_cache_key
(
block
)
self
.
chart_validation_stats
[
'total'
]
+=
1
if
status
==
"failed"
:
self
.
_record_chart_failure_stat
(
cache_key
)
elif
status
==
"repaired"
:
if
method
==
"api"
:
self
.
chart_validation_stats
[
'repaired_api'
]
+=
1
else
:
self
.
chart_validation_stats
[
'repaired_locally'
]
+=
1
else
:
self
.
chart_validation_stats
[
'valid'
]
+=
1
def
_format_chart_error_reason
(
self
,
validation_result
:
ValidationResult
|
None
=
None
,
...
...
@@ -2211,6 +2239,177 @@ class HTMLRenderer:
if
labels_from_data
:
data_ref
[
"labels"
]
=
labels_from_data
def
_ensure_chart_reviewed
(
self
,
block
:
Dict
[
str
,
Any
],
chapter_context
:
Dict
[
str
,
Any
]
|
None
=
None
,
*
,
increment_stats
:
bool
=
True
)
->
tuple
[
bool
,
str
|
None
]:
"""
确保图表已完成审查/修复,并将结果回写到原始block。
返回:
(renderable, fail_reason)
"""
if
not
isinstance
(
block
,
dict
):
return
True
,
None
widget_type
=
block
.
get
(
'widgetType'
,
''
)
is_chart
=
isinstance
(
widget_type
,
str
)
and
widget_type
.
startswith
(
'chart.js'
)
if
not
is_chart
:
return
True
,
None
is_wordcloud
=
'wordcloud'
in
widget_type
.
lower
()
if
isinstance
(
widget_type
,
str
)
else
False
cache_key
=
self
.
_chart_cache_key
(
block
)
# 已有失败记录或显式标记为不可渲染,直接复用结果
if
block
.
get
(
"_chart_renderable"
)
is
False
:
if
increment_stats
:
self
.
chart_validation_stats
[
'total'
]
+=
1
self
.
_record_chart_failure_stat
(
cache_key
)
reason
=
block
.
get
(
"_chart_error_reason"
)
block
[
"_chart_reviewed"
]
=
True
block
[
"_chart_review_status"
]
=
block
.
get
(
"_chart_review_status"
)
or
"failed"
block
[
"_chart_review_method"
]
=
block
.
get
(
"_chart_review_method"
)
or
"none"
if
reason
:
self
.
_note_chart_failure
(
cache_key
,
reason
)
return
False
,
reason
if
block
.
get
(
"_chart_reviewed"
):
if
increment_stats
:
self
.
_apply_cached_review_stats
(
block
)
failed
,
cached_reason
=
self
.
_has_chart_failure
(
block
)
renderable
=
not
failed
and
block
.
get
(
"_chart_renderable"
,
True
)
is
not
False
return
renderable
,
block
.
get
(
"_chart_error_reason"
)
or
cached_reason
# 首次审查:先补全结构,再验证/修复
self
.
_normalize_chart_block
(
block
,
chapter_context
)
if
increment_stats
:
self
.
chart_validation_stats
[
'total'
]
+=
1
if
is_wordcloud
:
if
increment_stats
:
self
.
chart_validation_stats
[
'valid'
]
+=
1
block
[
"_chart_reviewed"
]
=
True
block
[
"_chart_review_status"
]
=
"valid"
block
[
"_chart_review_method"
]
=
"none"
return
True
,
None
validation_result
=
self
.
chart_validator
.
validate
(
block
)
if
not
validation_result
.
is_valid
:
logger
.
warning
(
f
"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}"
)
repair_result
=
self
.
chart_repairer
.
repair
(
block
,
validation_result
)
if
repair_result
.
success
and
repair_result
.
repaired_block
:
# 修复成功,回写修复后的数据
repaired_block
=
repair_result
.
repaired_block
block
.
clear
()
block
.
update
(
repaired_block
)
method
=
repair_result
.
method
or
"local"
logger
.
info
(
f
"图表 {block.get('widgetId', 'unknown')} 修复成功 "
f
"(方法: {method}): {repair_result.changes}"
)
if
increment_stats
:
if
method
==
'local'
:
self
.
chart_validation_stats
[
'repaired_locally'
]
+=
1
elif
method
==
'api'
:
self
.
chart_validation_stats
[
'repaired_api'
]
+=
1
block
[
"_chart_review_status"
]
=
"repaired"
block
[
"_chart_review_method"
]
=
method
block
[
"_chart_reviewed"
]
=
True
return
True
,
None
# 修复失败,记录失败并输出占位提示
fail_reason
=
self
.
_format_chart_error_reason
(
validation_result
)
block
[
"_chart_renderable"
]
=
False
block
[
"_chart_error_reason"
]
=
fail_reason
block
[
"_chart_review_status"
]
=
"failed"
block
[
"_chart_review_method"
]
=
"none"
block
[
"_chart_reviewed"
]
=
True
self
.
_note_chart_failure
(
cache_key
,
fail_reason
)
if
increment_stats
:
self
.
_record_chart_failure_stat
(
cache_key
)
logger
.
warning
(
f
"图表 {block.get('widgetId', 'unknown')} 修复失败,已跳过渲染: {fail_reason}"
)
return
False
,
fail_reason
# 验证通过
if
increment_stats
:
self
.
chart_validation_stats
[
'valid'
]
+=
1
if
validation_result
.
warnings
:
logger
.
info
(
f
"图表 {block.get('widgetId', 'unknown')} 验证通过,"
f
"但有警告: {validation_result.warnings}"
)
block
[
"_chart_review_status"
]
=
"valid"
block
[
"_chart_review_method"
]
=
"none"
block
[
"_chart_reviewed"
]
=
True
return
True
,
None
def
review_and_patch_document
(
self
,
document_ir
:
Dict
[
str
,
Any
],
*
,
reset_stats
:
bool
=
True
,
clone
:
bool
=
False
)
->
Dict
[
str
,
Any
]:
"""
全局审查并修复图表,将修复结果回写到原始 IR,避免多次渲染重复修复。
参数:
document_ir: 原始 Document IR
reset_stats: 是否重置统计数据
clone: 是否返回修复后的深拷贝(原始 IR 仍会被回写修复结果)
返回:
修复后的 IR(可能是原对象或其深拷贝)
"""
if
reset_stats
:
self
.
_reset_chart_validation_stats
()
target_ir
=
document_ir
or
{}
def
_walk_blocks
(
blocks
:
list
,
chapter_ctx
:
Dict
[
str
,
Any
]
|
None
=
None
)
->
None
:
for
blk
in
blocks
or
[]:
if
not
isinstance
(
blk
,
dict
):
continue
if
blk
.
get
(
"type"
)
==
"widget"
:
self
.
_ensure_chart_reviewed
(
blk
,
chapter_ctx
,
increment_stats
=
True
)
nested_blocks
=
blk
.
get
(
"blocks"
)
if
isinstance
(
nested_blocks
,
list
):
_walk_blocks
(
nested_blocks
,
chapter_ctx
)
if
blk
.
get
(
"type"
)
==
"list"
:
for
item
in
blk
.
get
(
"items"
,
[]):
if
isinstance
(
item
,
list
):
_walk_blocks
(
item
,
chapter_ctx
)
if
blk
.
get
(
"type"
)
==
"table"
:
for
row
in
blk
.
get
(
"rows"
,
[]):
cells
=
row
.
get
(
"cells"
,
[])
for
cell
in
cells
:
if
isinstance
(
cell
,
dict
):
cell_blocks
=
cell
.
get
(
"blocks"
,
[])
if
isinstance
(
cell_blocks
,
list
):
_walk_blocks
(
cell_blocks
,
chapter_ctx
)
for
chapter
in
target_ir
.
get
(
"chapters"
,
[])
or
[]:
if
not
isinstance
(
chapter
,
dict
):
continue
_walk_blocks
(
chapter
.
get
(
"blocks"
,
[]),
chapter
)
return
copy
.
deepcopy
(
target_ir
)
if
clone
else
target_ir
def
_render_widget
(
self
,
block
:
Dict
[
str
,
Any
])
->
str
:
"""
渲染Chart.js等交互组件的占位容器,并记录配置JSON。
...
...
@@ -2230,75 +2429,28 @@ class HTMLRenderer:
返回:
str: 含canvas与配置脚本的HTML。
"""
# 先在block层面做一次容错补全(scales、章节级数据等)
self
.
_normalize_chart_block
(
block
,
getattr
(
self
,
"_current_chapter"
,
None
))
# 统计
# 统一的审查/修复入口,避免后续重复修复
widget_type
=
block
.
get
(
'widgetType'
,
''
)
is_chart
=
isinstance
(
widget_type
,
str
)
and
widget_type
.
startswith
(
'chart.js'
)
is_wordcloud
=
isinstance
(
widget_type
,
str
)
and
'wordcloud'
in
widget_type
.
lower
()
widget_id
=
block
.
get
(
'widgetId'
)
cache_key
=
self
.
_chart_cache_key
(
block
)
if
is_chart
else
""
props_snapshot
=
block
.
get
(
"props"
)
if
isinstance
(
block
.
get
(
"props"
),
dict
)
else
{}
display_title
=
props_snapshot
.
get
(
"title"
)
or
block
.
get
(
"title"
)
or
widget_id
or
"图表"
reviewed
=
bool
(
block
.
get
(
"_chart_reviewed"
))
renderable
=
True
fail_reason
=
None
if
is_chart
:
self
.
chart_validation_stats
[
'total'
]
+=
1
# 词云使用专用渲染逻辑,不按Chart.js规则验证,直接跳过防止误判
if
is_wordcloud
:
self
.
chart_validation_stats
[
'valid'
]
+=
1
else
:
# 如果此前已记录失败,直接使用占位提示,避免重复修复
has_failed
,
cached_reason
=
self
.
_has_chart_failure
(
block
)
if
has_failed
:
self
.
_record_chart_failure_stat
(
cache_key
)
reason
=
cached_reason
or
"LLM返回的图表信息格式有误,无法正常显示"
return
self
.
_render_chart_error_placeholder
(
display_title
,
reason
,
widget_id
)
# 验证图表数据
validation_result
=
self
.
chart_validator
.
validate
(
block
)
renderable
,
fail_reason
=
self
.
_ensure_chart_reviewed
(
block
,
getattr
(
self
,
"_current_chapter"
,
None
),
increment_stats
=
not
reviewed
)
if
not
validation_result
.
is_valid
:
logger
.
warning
(
f
"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}"
)
widget_id
=
block
.
get
(
'widgetId'
)
props_snapshot
=
block
.
get
(
"props"
)
if
isinstance
(
block
.
get
(
"props"
),
dict
)
else
{}
display_title
=
props_snapshot
.
get
(
"title"
)
or
block
.
get
(
"title"
)
or
widget_id
or
"图表"
# 尝试修复
repair_result
=
self
.
chart_repairer
.
repair
(
block
,
validation_result
)
if
repair_result
.
success
and
repair_result
.
repaired_block
:
# 修复成功,使用修复后的数据
block
=
repair_result
.
repaired_block
logger
.
info
(
f
"图表 {block.get('widgetId', 'unknown')} 修复成功 "
f
"(方法: {repair_result.method}): {repair_result.changes}"
)
# 更新统计
if
repair_result
.
method
==
'local'
:
self
.
chart_validation_stats
[
'repaired_locally'
]
+=
1
elif
repair_result
.
method
==
'api'
:
self
.
chart_validation_stats
[
'repaired_api'
]
+=
1
else
:
# 修复失败,记录失败并输出占位提示
fail_reason
=
self
.
_format_chart_error_reason
(
validation_result
)
block
[
"_chart_renderable"
]
=
False
block
[
"_chart_error_reason"
]
=
fail_reason
self
.
_note_chart_failure
(
cache_key
,
fail_reason
)
self
.
_record_chart_failure_stat
(
cache_key
)
logger
.
warning
(
f
"图表 {block.get('widgetId', 'unknown')} 修复失败,已跳过渲染: {fail_reason}"
)
return
self
.
_render_chart_error_placeholder
(
display_title
,
fail_reason
,
widget_id
)
else
:
# 验证通过
self
.
chart_validation_stats
[
'valid'
]
+=
1
if
validation_result
.
warnings
:
logger
.
info
(
f
"图表 {block.get('widgetId', 'unknown')} 验证通过,"
f
"但有警告: {validation_result.warnings}"
)
if
is_chart
and
not
renderable
:
reason
=
fail_reason
or
"LLM返回的图表信息格式有误,无法正常显示"
return
self
.
_render_chart_error_placeholder
(
display_title
,
reason
,
widget_id
)
# 渲染图表HTML
self
.
chart_counter
+=
1
...
...
ReportEngine/renderers/pdf_renderer.py
View file @
2327b13
...
...
@@ -157,10 +157,11 @@ class PDFRenderer:
def
_preprocess_charts
(
self
,
document_ir
:
Dict
[
str
,
Any
])
->
Dict
[
str
,
Any
]:
"""
预处理图表:验证
和修复所有图表数据
预处理图表:验证
并修复所有图表数据,结果回写原始IR。
这个方法确保在转换为SVG之前,所有图表数据都是有效的。
使用与HTMLRenderer相同的验证和修复逻辑,保证PDF和HTML的一致性。
先统一审查并修复图表,把修复结果直接写回传入的 IR,
然后返回修复后的深拷贝供后续 SVG/词云转换使用,避免
HTML 和 PDF 分别重复触发 ChartRepairer。
参数:
document_ir: Document IR数据
...
...
@@ -168,101 +169,24 @@ class PDFRenderer:
返回:
Dict[str, Any]: 修复后的Document IR(深拷贝)
"""
# 深拷贝以避免修改原始IR
ir_copy
=
copy
.
deepcopy
(
document_ir
)
repair_stats
=
{
'total'
:
0
,
'repaired'
:
0
,
'failed'
:
0
}
def
repair_widgets_in_blocks
(
blocks
:
list
,
chapter_context
:
Dict
[
str
,
Any
]
|
None
=
None
)
->
None
:
"""递归修复blocks中的所有widget"""
for
block
in
blocks
:
if
not
isinstance
(
block
,
dict
):
continue
# 处理widget类型
if
block
.
get
(
'type'
)
==
'widget'
:
# 先用HTML渲染器的容错逻辑补全字段
try
:
self
.
html_renderer
.
_normalize_chart_block
(
block
,
chapter_context
)
except
Exception
as
exc
:
# 防御性处理,避免单个图表阻断流程
logger
.
debug
(
f
"预处理图表 {block.get('widgetId')} 时出错: {exc}"
)
widget_type
=
block
.
get
(
'widgetType'
,
''
)
if
widget_type
.
startswith
(
'chart.js'
):
repair_stats
[
'total'
]
+=
1
# 使用HTMLRenderer的验证器和修复器
validation
=
self
.
html_renderer
.
chart_validator
.
validate
(
block
)
if
not
validation
.
is_valid
:
logger
.
debug
(
f
"图表 {block.get('widgetId')} 需要修复: {validation.errors}"
)
# 尝试修复
repair_result
=
self
.
html_renderer
.
chart_repairer
.
repair
(
block
,
validation
)
if
repair_result
.
success
and
repair_result
.
repaired_block
:
# 更新block内容(在副本中)
block
.
update
(
repair_result
.
repaired_block
)
repair_stats
[
'repaired'
]
+=
1
logger
.
debug
(
f
"图表 {block.get('widgetId')} 已修复 "
f
"(方法: {repair_result.method})"
)
else
:
repair_stats
[
'failed'
]
+=
1
reason
=
self
.
html_renderer
.
_format_chart_error_reason
(
validation
)
block
[
"_chart_renderable"
]
=
False
block
[
"_chart_error_reason"
]
=
reason
self
.
html_renderer
.
_note_chart_failure
(
self
.
html_renderer
.
_chart_cache_key
(
block
),
reason
)
logger
.
warning
(
f
"图表 {block.get('widgetId')} 修复失败,将使用占位提示: {reason}"
)
# 递归处理嵌套的blocks
nested_blocks
=
block
.
get
(
'blocks'
)
if
isinstance
(
nested_blocks
,
list
):
repair_widgets_in_blocks
(
nested_blocks
,
chapter_context
)
# 处理列表项
if
block
.
get
(
'type'
)
==
'list'
:
items
=
block
.
get
(
'items'
,
[])
for
item
in
items
:
if
isinstance
(
item
,
list
):
repair_widgets_in_blocks
(
item
,
chapter_context
)
# 处理表格单元格
if
block
.
get
(
'type'
)
==
'table'
:
rows
=
block
.
get
(
'rows'
,
[])
for
row
in
rows
:
cells
=
row
.
get
(
'cells'
,
[])
for
cell
in
cells
:
cell_blocks
=
cell
.
get
(
'blocks'
,
[])
if
isinstance
(
cell_blocks
,
list
):
repair_widgets_in_blocks
(
cell_blocks
,
chapter_context
)
# 处理所有章节
chapters
=
ir_copy
.
get
(
'chapters'
,
[])
for
chapter
in
chapters
:
blocks
=
chapter
.
get
(
'blocks'
,
[])
repair_widgets_in_blocks
(
blocks
,
chapter
)
reviewed_ir
=
self
.
html_renderer
.
review_and_patch_document
(
document_ir
,
reset_stats
=
True
,
clone
=
False
)
# 输出统计信息
if
repair_stats
[
'total'
]
>
0
:
stats
=
self
.
html_renderer
.
chart_validation_stats
if
stats
.
get
(
'total'
,
0
)
>
0
:
repaired_count
=
stats
.
get
(
'repaired_locally'
,
0
)
+
stats
.
get
(
'repaired_api'
,
0
)
logger
.
info
(
f
"PDF图表预处理完成: "
f
"总计 {repair_stats['total']} 个图表, "
f
"修复 {repair_stats['repaired']} 个, "
f
"失败 {repair_stats['failed']} 个"
f
"总计 {stats.get('total', 0)} 个图表, "
f
"修复 {repaired_count} 个, "
f
"失败 {stats.get('failed', 0)} 个"
)
return
ir_copy
# 返回深拷贝,避免后续 SVG 转换过程影响回写后的原始 IR
return
copy
.
deepcopy
(
reviewed_ir
)
def
_convert_charts_to_svg
(
self
,
document_ir
:
Dict
[
str
,
Any
])
->
Dict
[
str
,
str
]:
"""
...
...
Please
register
or
login
to post a comment