Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-12-16 03:14:50 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
a371cdf7c090d2f363c8e6ee15664b0899e70a81
a371cdf7
1 parent
c33efec8
Completely redesign the chart repair logic
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
582 additions
and
48 deletions
ReportEngine/renderers/html_renderer.py
ReportEngine/renderers/markdown_renderer.py
ReportEngine/renderers/pdf_renderer.py
ReportEngine/utils/__init__.py
ReportEngine/utils/chart_repair_api.py
ReportEngine/utils/chart_review_service.py
ReportEngine/utils/chart_validator.py
regenerate_latest_html.py
regenerate_latest_pdf.py
report_engine_only.py
ReportEngine/renderers/html_renderer.py
View file @
a371cdf
...
...
@@ -29,6 +29,7 @@ from ReportEngine.utils.chart_validator import (
create_chart_repairer
)
from
ReportEngine.utils.chart_repair_api
import
create_llm_repair_functions
from
ReportEngine.utils.chart_review_service
import
get_chart_review_service
class
HTMLRenderer
:
...
...
@@ -117,6 +118,12 @@ class HTMLRenderer:
validator
=
self
.
chart_validator
,
llm_repair_fns
=
llm_repair_fns
)
# 打印LLM修复函数状态
self
.
_llm_repair_count
=
len
(
llm_repair_fns
)
if
not
llm_repair_fns
:
logger
.
warning
(
"HTMLRenderer: 未配置任何LLM API,图表API修复功能不可用"
)
else
:
logger
.
info
(
f
"HTMLRenderer: 已配置 {len(llm_repair_fns)} 个LLM修复函数"
)
# 记录修复失败的图表,避免多次触发LLM循环修复
self
.
_chart_failure_notes
:
Dict
[
str
,
str
]
=
{}
self
.
_chart_failure_recorded
:
set
[
str
]
=
set
()
...
...
@@ -268,19 +275,36 @@ class HTMLRenderer:
# ====== 公共入口 ======
def
render
(
self
,
document_ir
:
Dict
[
str
,
Any
])
->
str
:
def
render
(
self
,
document_ir
:
Dict
[
str
,
Any
],
ir_file_path
:
str
|
None
=
None
)
->
str
:
"""
接收Document IR,重置内部状态并输出完整HTML。
参数:
document_ir: 由 DocumentComposer 生成的整本报告数据。
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存。
返回:
str: 可直接写入磁盘的完整HTML文档。
"""
self
.
document
=
document_ir
or
{}
# 先对图表做统一审查与修复,并将结果回写,供后续PDF/HTML共用
self
.
review_and_patch_document
(
self
.
document
,
reset_stats
=
True
)
# 使用统一的 ChartReviewService 进行图表审查与修复
# 修复结果会直接回写到 document_ir,避免多次渲染重复修复
chart_service
=
get_chart_review_service
()
chart_service
.
review_document
(
self
.
document
,
ir_file_path
=
ir_file_path
,
reset_stats
=
True
,
save_on_repair
=
bool
(
ir_file_path
)
)
# 同步统计信息到本地(用于兼容旧的 _log_chart_validation_stats)
service_stats
=
chart_service
.
stats
self
.
chart_validation_stats
.
update
(
service_stats
)
self
.
widget_scripts
=
[]
self
.
chart_counter
=
0
self
.
heading_counter
=
0
...
...
ReportEngine/renderers/markdown_renderer.py
View file @
a371cdf
...
...
@@ -5,6 +5,8 @@ from typing import Any, Dict, List
from
loguru
import
logger
from
ReportEngine.utils.chart_review_service
import
get_chart_review_service
class
MarkdownRenderer
:
"""
...
...
@@ -19,9 +21,33 @@ class MarkdownRenderer:
self
.
document
:
Dict
[
str
,
Any
]
=
{}
self
.
metadata
:
Dict
[
str
,
Any
]
=
{}
def
render
(
self
,
document_ir
:
Dict
[
str
,
Any
])
->
str
:
"""入口:将IR转换为Markdown字符串"""
def
render
(
self
,
document_ir
:
Dict
[
str
,
Any
],
ir_file_path
:
str
|
None
=
None
)
->
str
:
"""
入口:将IR转换为Markdown字符串。
参数:
document_ir: Document IR 数据
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
str: Markdown 字符串
"""
self
.
document
=
document_ir
or
{}
# 使用统一的 ChartReviewService 进行图表审查与修复
# 虽然 Markdown 渲染时图表会降级为表格,但仍需确保数据有效
chart_service
=
get_chart_review_service
()
chart_service
.
review_document
(
self
.
document
,
ir_file_path
=
ir_file_path
,
reset_stats
=
True
,
save_on_repair
=
bool
(
ir_file_path
)
)
self
.
metadata
=
self
.
document
.
get
(
"metadata"
,
{})
or
{}
parts
:
List
[
str
]
=
[]
...
...
ReportEngine/renderers/pdf_renderer.py
View file @
a371cdf
...
...
@@ -71,6 +71,7 @@ from .html_renderer import HTMLRenderer
from
.pdf_layout_optimizer
import
PDFLayoutOptimizer
,
PDFLayoutConfig
from
.chart_to_svg
import
create_chart_converter
from
.math_to_svg
import
MathToSVG
from
ReportEngine.utils.chart_review_service
import
get_chart_review_service
try
:
from
wordcloud
import
WordCloud
WORDCLOUD_AVAILABLE
=
True
...
...
@@ -153,27 +154,34 @@ class PDFRenderer:
raise
FileNotFoundError
(
f
"未找到字体文件,请检查 {fonts_dir} 目录"
)
def
_preprocess_charts
(
self
,
document_ir
:
Dict
[
str
,
Any
])
->
Dict
[
str
,
Any
]:
def
_preprocess_charts
(
self
,
document_ir
:
Dict
[
str
,
Any
],
ir_file_path
:
str
|
None
=
None
)
->
Dict
[
str
,
Any
]:
"""
预处理图表:
验证并修复所有图表数据,结果回写原始IR
。
预处理图表:
使用 ChartReviewService 验证并修复所有图表数据
。
先统一审查并修复图表,把修复结果直接写回传入的 IR,
然后返回修复后的深拷贝供后续 SVG/词云转换使用,避免
HTML 和 PDF 分别重复触发 ChartRepairer。
使用统一的 ChartReviewService 进行图表审查,修复结果直接写回传入的 IR。
如果提供 ir_file_path,修复后会自动保存到文件。
参数:
document_ir: Document IR数据
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Dict[str, Any]: 修复后的Document IR(深拷贝)
"""
reviewed_ir
=
self
.
html_renderer
.
review_and_patch_document
(
# 使用统一的 ChartReviewService
chart_service
=
get_chart_review_service
()
chart_service
.
review_document
(
document_ir
,
ir_file_path
=
ir_file_path
,
reset_stats
=
True
,
clone
=
False
save_on_repair
=
bool
(
ir_file_path
)
)
stats
=
self
.
html_renderer
.
chart_validation_
stats
stats
=
chart_service
.
stats
if
stats
.
get
(
'total'
,
0
)
>
0
:
repaired_count
=
stats
.
get
(
'repaired_locally'
,
0
)
+
stats
.
get
(
'repaired_api'
,
0
)
logger
.
info
(
...
...
@@ -184,7 +192,7 @@ class PDFRenderer:
)
# 返回深拷贝,避免后续 SVG 转换过程影响回写后的原始 IR
return
copy
.
deepcopy
(
reviewed
_ir
)
return
copy
.
deepcopy
(
document
_ir
)
def
_convert_charts_to_svg
(
self
,
document_ir
:
Dict
[
str
,
Any
])
->
Dict
[
str
,
str
]:
"""
...
...
@@ -813,7 +821,8 @@ class PDFRenderer:
def
_get_pdf_html
(
self
,
document_ir
:
Dict
[
str
,
Any
],
optimize_layout
:
bool
=
True
optimize_layout
:
bool
=
True
,
ir_file_path
:
str
|
None
=
None
)
->
str
:
"""
生成适用于PDF的HTML内容
...
...
@@ -827,6 +836,7 @@ class PDFRenderer:
参数:
document_ir: Document IR数据
optimize_layout: 是否启用布局优化
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
str: 优化后的HTML内容
...
...
@@ -853,7 +863,7 @@ class PDFRenderer:
# 关键修复:先预处理图表,确保数据有效
logger
.
info
(
"预处理图表数据..."
)
preprocessed_ir
=
self
.
_preprocess_charts
(
document_ir
)
preprocessed_ir
=
self
.
_preprocess_charts
(
document_ir
,
ir_file_path
)
# 转换图表为SVG(使用预处理后的IR)
logger
.
info
(
"开始转换图表为SVG矢量图形..."
)
...
...
@@ -1527,7 +1537,8 @@ button.ghost-btn {{
self
,
document_ir
:
Dict
[
str
,
Any
],
output_path
:
str
|
Path
,
optimize_layout
:
bool
=
True
optimize_layout
:
bool
=
True
,
ir_file_path
:
str
|
None
=
None
)
->
Path
:
"""
将Document IR渲染为PDF文件
...
...
@@ -1536,6 +1547,7 @@ button.ghost-btn {{
document_ir: Document IR数据
output_path: PDF输出路径
optimize_layout: 是否启用布局优化(默认True)
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Path: 生成的PDF文件路径
...
...
@@ -1545,7 +1557,7 @@ button.ghost-btn {{
logger
.
info
(
f
"开始生成PDF: {output_path}"
)
# 生成HTML内容
html_content
=
self
.
_get_pdf_html
(
document_ir
,
optimize_layout
)
html_content
=
self
.
_get_pdf_html
(
document_ir
,
optimize_layout
,
ir_file_path
)
# 配置字体
font_config
=
FontConfiguration
()
...
...
@@ -1570,7 +1582,8 @@ button.ghost-btn {{
def
render_to_bytes
(
self
,
document_ir
:
Dict
[
str
,
Any
],
optimize_layout
:
bool
=
True
optimize_layout
:
bool
=
True
,
ir_file_path
:
str
|
None
=
None
)
->
bytes
:
"""
将Document IR渲染为PDF字节流
...
...
@@ -1578,11 +1591,12 @@ button.ghost-btn {{
参数:
document_ir: Document IR数据
optimize_layout: 是否启用布局优化(默认True)
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
bytes: PDF文件的字节内容
"""
html_content
=
self
.
_get_pdf_html
(
document_ir
,
optimize_layout
)
html_content
=
self
.
_get_pdf_html
(
document_ir
,
optimize_layout
,
ir_file_path
)
font_config
=
FontConfiguration
()
html_doc
=
HTML
(
string
=
html_content
,
base_url
=
str
(
Path
.
cwd
()))
...
...
ReportEngine/utils/__init__.py
View file @
a371cdf
...
...
@@ -4,6 +4,14 @@ Report Engine工具模块。
当前主要暴露配置读取逻辑,后续可扩展更多通用工具。
"""
from
ReportEngine.utils.chart_review_service
import
(
ChartReviewService
,
get_chart_review_service
,
review_document_charts
,
)
__all__
=
[
"ChartReviewService"
,
"get_chart_review_service"
,
"review_document_charts"
,
]
...
...
ReportEngine/utils/chart_repair_api.py
View file @
a371cdf
...
...
@@ -169,10 +169,11 @@ def create_llm_repair_functions() -> List:
return
repaired
except
Exception
as
e
:
logger
.
e
rror
(
f
"ReportEngine图表修复失败: {e}"
)
logger
.
e
xception
(
f
"ReportEngine图表修复失败: {e}"
)
return
None
repair_functions
.
append
(
repair_with_report_engine
)
logger
.
debug
(
"已添加ReportEngine图表修复函数"
)
# 2. ForumEngine修复函数
if
settings
.
FORUM_HOST_API_KEY
and
settings
.
FORUM_HOST_BASE_URL
:
...
...
@@ -202,10 +203,11 @@ def create_llm_repair_functions() -> List:
return
repaired
except
Exception
as
e
:
logger
.
e
rror
(
f
"ForumEngine图表修复失败: {e}"
)
logger
.
e
xception
(
f
"ForumEngine图表修复失败: {e}"
)
return
None
repair_functions
.
append
(
repair_with_forum_engine
)
logger
.
debug
(
"已添加ForumEngine图表修复函数"
)
# 3. InsightEngine修复函数
if
settings
.
INSIGHT_ENGINE_API_KEY
and
settings
.
INSIGHT_ENGINE_BASE_URL
:
...
...
@@ -235,10 +237,11 @@ def create_llm_repair_functions() -> List:
return
repaired
except
Exception
as
e
:
logger
.
e
rror
(
f
"InsightEngine图表修复失败: {e}"
)
logger
.
e
xception
(
f
"InsightEngine图表修复失败: {e}"
)
return
None
repair_functions
.
append
(
repair_with_insight_engine
)
logger
.
debug
(
"已添加InsightEngine图表修复函数"
)
# 4. MediaEngine修复函数
if
settings
.
MEDIA_ENGINE_API_KEY
and
settings
.
MEDIA_ENGINE_BASE_URL
:
...
...
@@ -268,12 +271,15 @@ def create_llm_repair_functions() -> List:
return
repaired
except
Exception
as
e
:
logger
.
e
rror
(
f
"MediaEngine图表修复失败: {e}"
)
logger
.
e
xception
(
f
"MediaEngine图表修复失败: {e}"
)
return
None
repair_functions
.
append
(
repair_with_media_engine
)
logger
.
debug
(
"已添加MediaEngine图表修复函数"
)
if
not
repair_functions
:
logger
.
warning
(
"未配置任何Engine API,图表API修复功能将不可用"
)
else
:
logger
.
info
(
f
"图表API修复功能已启用,共 {len(repair_functions)} 个Engine可用"
)
return
repair_functions
...
...
ReportEngine/utils/chart_review_service.py
0 → 100644
View file @
a371cdf
"""
图表审查服务 - 统一管理图表验证和修复。
提供单例服务,确保所有渲染器共享修复状态,避免重复修复。
修复成功后可自动持久化到 IR 文件。
"""
from
__future__
import
annotations
import
copy
import
json
import
threading
from
pathlib
import
Path
from
typing
import
Any
,
Dict
,
List
,
Optional
from
loguru
import
logger
from
ReportEngine.utils.chart_validator
import
(
ChartValidator
,
ChartRepairer
,
ValidationResult
,
create_chart_validator
,
create_chart_repairer
)
from
ReportEngine.utils.chart_repair_api
import
create_llm_repair_functions
class
ChartReviewService
:
"""
图表审查服务 - 单例模式。
职责:
1. 统一管理图表验证和修复
2. 维护修复缓存,避免重复修复
3. 支持修复后自动持久化到 IR 文件
4. 提供统计信息
"""
_instance
:
Optional
[
"ChartReviewService"
]
=
None
_lock
=
threading
.
Lock
()
def
__new__
(
cls
)
->
"ChartReviewService"
:
"""单例模式"""
if
cls
.
_instance
is
None
:
with
cls
.
_lock
:
if
cls
.
_instance
is
None
:
cls
.
_instance
=
super
()
.
__new__
(
cls
)
cls
.
_instance
.
_initialized
=
False
return
cls
.
_instance
def
__init__
(
self
):
"""初始化服务(仅首次调用时执行)"""
if
self
.
_initialized
:
return
self
.
_initialized
=
True
# 初始化验证器和修复器
self
.
validator
=
create_chart_validator
()
self
.
llm_repair_fns
=
create_llm_repair_functions
()
self
.
repairer
=
create_chart_repairer
(
validator
=
self
.
validator
,
llm_repair_fns
=
self
.
llm_repair_fns
)
# 打印 LLM 修复函数状态
if
not
self
.
llm_repair_fns
:
logger
.
warning
(
"ChartReviewService: 未配置任何 LLM API,图表 API 修复功能不可用"
)
else
:
logger
.
info
(
f
"ChartReviewService: 已配置 {len(self.llm_repair_fns)} 个 LLM 修复函数"
)
# 统计信息
self
.
_stats
=
{
'total'
:
0
,
'valid'
:
0
,
'repaired_locally'
:
0
,
'repaired_api'
:
0
,
'failed'
:
0
}
logger
.
info
(
"ChartReviewService 初始化完成"
)
def
reset_stats
(
self
)
->
None
:
"""重置统计信息"""
self
.
_stats
=
{
'total'
:
0
,
'valid'
:
0
,
'repaired_locally'
:
0
,
'repaired_api'
:
0
,
'failed'
:
0
}
@property
def
stats
(
self
)
->
Dict
[
str
,
int
]:
"""获取统计信息副本"""
return
self
.
_stats
.
copy
()
def
review_document
(
self
,
document_ir
:
Dict
[
str
,
Any
],
ir_file_path
:
Optional
[
str
|
Path
]
=
None
,
*
,
reset_stats
:
bool
=
True
,
save_on_repair
:
bool
=
True
)
->
Dict
[
str
,
Any
]:
"""
审查并修复文档中的所有图表。
遍历所有章节的 blocks,检测图表类型的 widget,
对未审查过的图表进行验证和修复。
参数:
document_ir: Document IR 数据
ir_file_path: IR 文件路径,如果提供且有修复,会自动保存
reset_stats: 是否重置统计信息
save_on_repair: 修复后是否自动保存到文件
返回:
Dict[str, Any]: 审查后的 Document IR(原对象,已修改)
"""
if
reset_stats
:
self
.
reset_stats
()
if
not
document_ir
:
logger
.
warning
(
"ChartReviewService: document_ir 为空,跳过审查"
)
return
document_ir
has_repairs
=
False
# 遍历所有章节
for
chapter
in
document_ir
.
get
(
"chapters"
,
[])
or
[]:
if
not
isinstance
(
chapter
,
dict
):
continue
blocks
=
chapter
.
get
(
"blocks"
,
[])
if
isinstance
(
blocks
,
list
):
chapter_repairs
=
self
.
_walk_and_review_blocks
(
blocks
,
chapter
)
if
chapter_repairs
:
has_repairs
=
True
# 输出统计信息
self
.
_log_stats
()
# 如果有修复且提供了文件路径,保存到文件
if
has_repairs
and
ir_file_path
and
save_on_repair
:
self
.
_save_ir_to_file
(
document_ir
,
ir_file_path
)
return
document_ir
def
_walk_and_review_blocks
(
self
,
blocks
:
List
[
Any
],
chapter_context
:
Dict
[
str
,
Any
]
|
None
=
None
)
->
bool
:
"""
递归遍历 blocks 并审查图表。
返回:
bool: 是否有修复发生
"""
has_repairs
=
False
for
block
in
blocks
or
[]:
if
not
isinstance
(
block
,
dict
):
continue
# 检查是否是图表 widget
if
block
.
get
(
"type"
)
==
"widget"
:
repaired
=
self
.
_review_chart_block
(
block
,
chapter_context
)
if
repaired
:
has_repairs
=
True
# 递归处理嵌套的 blocks
nested_blocks
=
block
.
get
(
"blocks"
)
if
isinstance
(
nested_blocks
,
list
):
if
self
.
_walk_and_review_blocks
(
nested_blocks
,
chapter_context
):
has_repairs
=
True
# 处理 list 类型的 items
if
block
.
get
(
"type"
)
==
"list"
:
for
item
in
block
.
get
(
"items"
,
[]):
if
isinstance
(
item
,
list
):
if
self
.
_walk_and_review_blocks
(
item
,
chapter_context
):
has_repairs
=
True
# 处理 table 类型的 cells
if
block
.
get
(
"type"
)
==
"table"
:
for
row
in
block
.
get
(
"rows"
,
[]):
if
not
isinstance
(
row
,
dict
):
continue
for
cell
in
row
.
get
(
"cells"
,
[]):
if
isinstance
(
cell
,
dict
):
cell_blocks
=
cell
.
get
(
"blocks"
,
[])
if
isinstance
(
cell_blocks
,
list
):
if
self
.
_walk_and_review_blocks
(
cell_blocks
,
chapter_context
):
has_repairs
=
True
return
has_repairs
def
_review_chart_block
(
self
,
block
:
Dict
[
str
,
Any
],
chapter_context
:
Dict
[
str
,
Any
]
|
None
=
None
)
->
bool
:
"""
审查单个图表 block。
返回:
bool: 是否进行了修复
"""
widget_type
=
block
.
get
(
"widgetType"
,
""
)
if
not
isinstance
(
widget_type
,
str
):
return
False
# 只处理 chart.js 类型(词云单独处理,不需要修复)
is_chart
=
widget_type
.
startswith
(
"chart.js"
)
is_wordcloud
=
"wordcloud"
in
widget_type
.
lower
()
if
not
is_chart
:
return
False
widget_id
=
block
.
get
(
"widgetId"
,
"unknown"
)
# 检查是否已审查过
if
block
.
get
(
"_chart_reviewed"
):
logger
.
debug
(
f
"图表 {widget_id} 已审查过,跳过"
)
return
False
self
.
_stats
[
'total'
]
+=
1
# 词云直接标记为有效
if
is_wordcloud
:
self
.
_stats
[
'valid'
]
+=
1
block
[
"_chart_reviewed"
]
=
True
block
[
"_chart_review_status"
]
=
"valid"
block
[
"_chart_review_method"
]
=
"none"
return
False
# 先进行数据规范化(从章节上下文补充数据)
self
.
_normalize_chart_block
(
block
,
chapter_context
)
# 验证图表
validation_result
=
self
.
validator
.
validate
(
block
)
if
validation_result
.
is_valid
:
# 验证通过
self
.
_stats
[
'valid'
]
+=
1
block
[
"_chart_reviewed"
]
=
True
block
[
"_chart_review_status"
]
=
"valid"
block
[
"_chart_review_method"
]
=
"none"
if
validation_result
.
warnings
:
logger
.
debug
(
f
"图表 {widget_id} 验证通过,但有警告: {validation_result.warnings}"
)
return
False
# 验证失败,尝试修复
logger
.
warning
(
f
"图表 {widget_id} 验证失败: {validation_result.errors}"
)
repair_result
=
self
.
repairer
.
repair
(
block
,
validation_result
)
if
repair_result
.
success
and
repair_result
.
repaired_block
:
# 修复成功,覆盖原始 block 数据
repaired_block
=
repair_result
.
repaired_block
# 保留原始的一些元信息
original_widget_id
=
block
.
get
(
"widgetId"
)
block
.
clear
()
block
.
update
(
repaired_block
)
# 确保 widgetId 不丢失
if
original_widget_id
and
not
block
.
get
(
"widgetId"
):
block
[
"widgetId"
]
=
original_widget_id
method
=
repair_result
.
method
or
"local"
if
method
==
"local"
:
self
.
_stats
[
'repaired_locally'
]
+=
1
elif
method
==
"api"
:
self
.
_stats
[
'repaired_api'
]
+=
1
block
[
"_chart_reviewed"
]
=
True
block
[
"_chart_review_status"
]
=
"repaired"
block
[
"_chart_review_method"
]
=
method
logger
.
info
(
f
"图表 {widget_id} 修复成功 (方法: {method}): {repair_result.changes}"
)
return
True
# 修复失败
self
.
_stats
[
'failed'
]
+=
1
block
[
"_chart_reviewed"
]
=
True
block
[
"_chart_renderable"
]
=
False
block
[
"_chart_review_status"
]
=
"failed"
block
[
"_chart_review_method"
]
=
"none"
block
[
"_chart_error_reason"
]
=
self
.
_format_error_reason
(
validation_result
)
logger
.
warning
(
f
"图表 {widget_id} 修复失败,已标记为不可渲染"
)
return
False
def
_normalize_chart_block
(
self
,
block
:
Dict
[
str
,
Any
],
chapter_context
:
Dict
[
str
,
Any
]
|
None
=
None
)
->
None
:
"""
规范化图表数据,从章节上下文补充缺失数据。
"""
if
not
isinstance
(
block
,
dict
):
return
data
=
block
.
get
(
"data"
)
if
not
isinstance
(
data
,
dict
):
return
# 尝试从章节上下文补充 datasets
datasets
=
data
.
get
(
"datasets"
)
if
not
datasets
or
(
isinstance
(
datasets
,
list
)
and
len
(
datasets
)
==
0
):
if
isinstance
(
chapter_context
,
dict
):
chapter_data
=
chapter_context
.
get
(
"data"
)
if
isinstance
(
chapter_data
,
dict
):
fallback_ds
=
chapter_data
.
get
(
"datasets"
)
if
isinstance
(
fallback_ds
,
list
)
and
len
(
fallback_ds
)
>
0
:
merged_data
=
copy
.
deepcopy
(
data
)
merged_data
[
"datasets"
]
=
copy
.
deepcopy
(
fallback_ds
)
if
not
merged_data
.
get
(
"labels"
)
and
isinstance
(
chapter_data
.
get
(
"labels"
),
list
):
merged_data
[
"labels"
]
=
copy
.
deepcopy
(
chapter_data
[
"labels"
])
block
[
"data"
]
=
merged_data
# 如果缺少 labels 且数据点包含 x 值,自动生成
data_ref
=
block
.
get
(
"data"
)
if
isinstance
(
data_ref
,
dict
)
and
not
data_ref
.
get
(
"labels"
):
datasets_ref
=
data_ref
.
get
(
"datasets"
)
if
isinstance
(
datasets_ref
,
list
)
and
datasets_ref
:
first_ds
=
datasets_ref
[
0
]
ds_data
=
first_ds
.
get
(
"data"
)
if
isinstance
(
first_ds
,
dict
)
else
None
if
isinstance
(
ds_data
,
list
):
labels_from_data
=
[]
for
idx
,
point
in
enumerate
(
ds_data
):
if
isinstance
(
point
,
dict
):
label_text
=
point
.
get
(
"x"
)
or
point
.
get
(
"label"
)
or
f
"点{idx + 1}"
else
:
label_text
=
f
"点{idx + 1}"
labels_from_data
.
append
(
str
(
label_text
))
if
labels_from_data
:
data_ref
[
"labels"
]
=
labels_from_data
def
_format_error_reason
(
self
,
validation_result
:
ValidationResult
|
None
)
->
str
:
"""格式化错误原因"""
if
not
validation_result
:
return
"未知错误"
errors
=
validation_result
.
errors
or
[]
if
not
errors
:
return
"验证失败但无具体错误信息"
return
"; "
.
join
(
errors
[:
3
])
def
_log_stats
(
self
)
->
None
:
"""输出统计信息"""
if
self
.
_stats
[
'total'
]
==
0
:
logger
.
debug
(
"ChartReviewService: 没有图表需要审查"
)
return
repaired
=
self
.
_stats
[
'repaired_locally'
]
+
self
.
_stats
[
'repaired_api'
]
logger
.
info
(
f
"ChartReviewService 图表审查完成: "
f
"总计 {self._stats['total']} 个, "
f
"有效 {self._stats['valid']} 个, "
f
"修复 {repaired} 个 (本地 {self._stats['repaired_locally']}, API {self._stats['repaired_api']}), "
f
"失败 {self._stats['failed']} 个"
)
def
_save_ir_to_file
(
self
,
document_ir
:
Dict
[
str
,
Any
],
file_path
:
str
|
Path
)
->
None
:
"""保存 IR 到文件"""
try
:
path
=
Path
(
file_path
)
path
.
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
path
.
write_text
(
json
.
dumps
(
document_ir
,
ensure_ascii
=
False
,
indent
=
2
),
encoding
=
"utf-8"
)
logger
.
info
(
f
"ChartReviewService: 修复后的 IR 已保存到 {path}"
)
except
Exception
as
e
:
logger
.
exception
(
f
"ChartReviewService: 保存 IR 文件失败: {e}"
)
# 全局单例实例
_chart_review_service
:
Optional
[
ChartReviewService
]
=
None
def
get_chart_review_service
()
->
ChartReviewService
:
"""获取 ChartReviewService 单例实例"""
global
_chart_review_service
if
_chart_review_service
is
None
:
_chart_review_service
=
ChartReviewService
()
return
_chart_review_service
def
review_document_charts
(
document_ir
:
Dict
[
str
,
Any
],
ir_file_path
:
Optional
[
str
|
Path
]
=
None
,
*
,
reset_stats
:
bool
=
True
,
save_on_repair
:
bool
=
True
)
->
Dict
[
str
,
Any
]:
"""
便捷函数:审查并修复文档中的所有图表。
参数:
document_ir: Document IR 数据
ir_file_path: IR 文件路径,如果提供且有修复,会自动保存
reset_stats: 是否重置统计信息
save_on_repair: 修复后是否自动保存到文件
返回:
Dict[str, Any]: 审查后的 Document IR
"""
service
=
get_chart_review_service
()
return
service
.
review_document
(
document_ir
,
ir_file_path
,
reset_stats
=
reset_stats
,
save_on_repair
=
save_on_repair
)
__all__
=
[
"ChartReviewService"
,
"get_chart_review_service"
,
"review_document_charts"
,
]
...
...
ReportEngine/utils/chart_validator.py
View file @
a371cdf
...
...
@@ -444,11 +444,15 @@ class ChartRepairer:
if
validation_result
is
None
:
validation_result
=
self
.
validator
.
validate
(
widget_block
)
# 跟踪当前最新的验证结果和数据
current_validation
=
validation_result
current_block
=
widget_block
# 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告)
logger
.
info
(
f
"尝试本地修复图表"
)
local_result
=
self
.
repair_locally
(
widget_block
,
validation_result
)
# 3. 验证修复结果
# 3. 验证
本地
修复结果
if
local_result
.
has_changes
():
repaired_validation
=
self
.
validator
.
validate
(
local_result
.
repaired_block
)
if
repaired_validation
.
is_valid
:
...
...
@@ -458,22 +462,27 @@ class ChartRepairer:
)
else
:
logger
.
warning
(
f
"本地修复后仍然无效: {repaired_validation.errors}"
)
# 更新当前状态为本地修复后的结果,供API修复使用
current_validation
=
repaired_validation
current_block
=
local_result
.
repaired_block
# 4. 如果本地修复失败且有严重错误,尝试API修复
if
validation_result
.
has_critical_errors
()
and
len
(
self
.
llm_repair_fns
)
>
0
:
logger
.
info
(
"本地修复失败,尝试API修复"
)
api_result
=
self
.
repair_with_api
(
widget_block
,
validation_result
)
# 4. 如果当前仍有严重错误,尝试API修复
# 注意:使用 current_validation 而非原始 validation_result
if
current_validation
.
has_critical_errors
()
and
len
(
self
.
llm_repair_fns
)
>
0
:
logger
.
info
(
"本地修复失败或不足,尝试API修复"
)
# 传入本地已修复的数据(如果有),避免浪费本地修复的工作
api_result
=
self
.
repair_with_api
(
current_block
,
current_validation
)
if
api_result
.
success
:
# 验证修复结果
repaired_validation
=
self
.
validator
.
validate
(
api_result
.
repaired_block
)
if
repaired_validation
.
is_valid
:
api_repaired_validation
=
self
.
validator
.
validate
(
api_result
.
repaired_block
)
if
api_repaired_validation
.
is_valid
:
logger
.
info
(
f
"API修复成功: {api_result.changes}"
)
return
_cache_and_return
(
api_result
)
else
:
logger
.
warning
(
f
"API修复后仍然无效: {repaired_validation.errors}"
)
logger
.
warning
(
f
"API修复后仍然无效: {
api_
repaired_validation.errors}"
)
# 5. 如果验证通过,返回原始或修复后的数据
# 5. 如果
原始
验证通过,返回原始或修复后的数据
if
validation_result
.
is_valid
:
if
local_result
.
has_changes
():
return
_cache_and_return
(
...
...
@@ -482,9 +491,11 @@ class ChartRepairer:
else
:
return
_cache_and_return
(
RepairResult
(
True
,
widget_block
,
'none'
,
[]))
# 6. 所有修复都失败,返回原始数据
# 6. 所有修复都失败,返回原始数据
(或本地部分修复的数据)
logger
.
warning
(
"所有修复尝试失败,保持原始数据"
)
return
_cache_and_return
(
RepairResult
(
False
,
widget_block
,
'none'
,
[]))
# 如果本地有部分修复,返回本地修复后的数据(虽然验证仍失败,但可能比原始数据好)
final_block
=
local_result
.
repaired_block
if
local_result
.
has_changes
()
else
widget_block
return
_cache_and_return
(
RepairResult
(
False
,
final_block
,
'none'
,
[]))
def
repair_locally
(
self
,
...
...
@@ -664,27 +675,41 @@ class ChartRepairer:
策略:按顺序尝试不同的Engine,直到修复成功
"""
if
not
self
.
llm_repair_fns
:
logger
.
debug
(
"没有可用的LLM修复函数,跳过API修复"
)
return
RepairResult
(
False
,
None
,
'api'
,
[])
widget_id
=
widget_block
.
get
(
'widgetId'
,
'unknown'
)
logger
.
info
(
f
"图表 {widget_id} 开始API修复,共 {len(self.llm_repair_fns)} 个Engine可用"
)
for
idx
,
repair_fn
in
enumerate
(
self
.
llm_repair_fns
):
try
:
logger
.
info
(
f
"尝试使用Engine {idx + 1}
修复图表
"
)
logger
.
info
(
f
"尝试使用Engine {idx + 1}
/{len(self.llm_repair_fns)} 修复图表 {widget_id}
"
)
repaired
=
repair_fn
(
widget_block
,
validation_result
.
errors
)
if
repaired
and
isinstance
(
repaired
,
dict
):
# 验证修复结果
repaired_validation
=
self
.
validator
.
validate
(
repaired
)
if
repaired_validation
.
is_valid
:
logger
.
info
(
f
"图表 {widget_id} 使用Engine {idx + 1} 修复成功"
)
return
RepairResult
(
True
,
repaired
,
'api'
,
[
f
"使用Engine {idx + 1}修复成功"
]
)
else
:
logger
.
warning
(
f
"图表 {widget_id} Engine {idx + 1} 返回的数据验证失败: "
f
"{repaired_validation.errors}"
)
else
:
logger
.
warning
(
f
"图表 {widget_id} Engine {idx + 1} 返回空或无效响应"
)
except
Exception
as
e
:
logger
.
error
(
f
"Engine {idx + 1}修复失败: {e}"
)
# 使用 exception 记录完整堆栈
logger
.
exception
(
f
"图表 {widget_id} Engine {idx + 1} 修复过程中发生异常: {e}"
)
continue
logger
.
warning
(
f
"图表 {widget_id} 所有 {len(self.llm_repair_fns)} 个Engine均修复失败"
)
return
RepairResult
(
False
,
None
,
'api'
,
[])
...
...
regenerate_latest_html.py
View file @
a371cdf
...
...
@@ -228,7 +228,7 @@ def save_document_ir(document_ir, base_name, timestamp):
return
ir_path
def
render_html
(
document_ir
,
base_name
,
timestamp
):
def
render_html
(
document_ir
,
base_name
,
timestamp
,
ir_path
=
None
):
"""
使用 HTMLRenderer 将 Document IR 渲染为 HTML 并保存。
...
...
@@ -239,12 +239,14 @@ def render_html(document_ir, base_name, timestamp):
document_ir: 装订完成的整本 IR
base_name: 文件名片段(来源于报告主题/标题)
timestamp: 时间戳字符串
ir_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Path: 生成的 HTML 文件路径
"""
renderer
=
HTMLRenderer
()
html_content
=
renderer
.
render
(
document_ir
)
# 传入 ir_file_path,修复后自动保存
html_content
=
renderer
.
render
(
document_ir
,
ir_file_path
=
str
(
ir_path
)
if
ir_path
else
None
)
output_dir
=
Path
(
settings
.
OUTPUT_DIR
)
/
"html"
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
...
...
@@ -322,7 +324,8 @@ def main():
)
ir_path
=
save_document_ir
(
document_ir
,
base_name
,
timestamp
)
html_path
=
render_html
(
document_ir
,
base_name
,
timestamp
)
# 传入 ir_path,修复后的图表会自动保存到 IR 文件
html_path
=
render_html
(
document_ir
,
base_name
,
timestamp
,
ir_path
=
ir_path
)
logger
.
info
(
""
)
logger
.
info
(
"🎉 HTML装订与渲染完成"
)
...
...
regenerate_latest_pdf.py
View file @
a371cdf
...
...
@@ -88,7 +88,7 @@ def load_document_ir(file_path):
logger
.
error
(
f
"加载报告失败: {e}"
)
return
None
def
generate_pdf_with_vector_charts
(
document_ir
,
output_path
):
def
generate_pdf_with_vector_charts
(
document_ir
,
output_path
,
ir_file_path
=
None
):
"""
使用 PDFRenderer 将 Document IR 渲染为包含 SVG 矢量图表的 PDF。
...
...
@@ -97,6 +97,7 @@ def generate_pdf_with_vector_charts(document_ir, output_path):
参数:
document_ir: 完整的 Document IR
output_path: 目标 PDF 路径
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Path | None: 成功时返回生成的 PDF 路径,失败返回 None。
...
...
@@ -109,11 +110,12 @@ def generate_pdf_with_vector_charts(document_ir, output_path):
# 创建PDF渲染器
renderer
=
PDFRenderer
()
# 渲染PDF
# 渲染PDF
,传入 ir_file_path 用于修复后保存
result_path
=
renderer
.
render_to_pdf
(
document_ir
,
output_path
,
optimize_layout
=
True
optimize_layout
=
True
,
ir_file_path
=
str
(
ir_file_path
)
if
ir_file_path
else
None
)
logger
.
info
(
"="
*
60
)
...
...
@@ -171,8 +173,8 @@ def main():
logger
.
info
(
f
"输出路径: {output_path}"
)
logger
.
info
(
""
)
# 4. 生成PDF
result
=
generate_pdf_with_vector_charts
(
document_ir
,
output_path
)
# 4. 生成PDF,传入 IR 文件路径用于修复后保存
result
=
generate_pdf_with_vector_charts
(
document_ir
,
output_path
,
ir_file_path
=
latest_report
)
if
result
:
logger
.
info
(
""
)
...
...
report_engine_only.py
View file @
a371cdf
...
...
@@ -338,12 +338,13 @@ def save_pdf(document_ir_path: str, query: str) -> Optional[str]:
pdf_filename
=
f
"final_report_{query_safe}_{timestamp}.pdf"
pdf_path
=
pdf_dir
/
pdf_filename
# 使用 render_to_pdf 方法直接生成PDF文件
(与regenerate_latest_pdf.py一致)
# 使用 render_to_pdf 方法直接生成PDF文件
,传入 IR 文件路径用于修复后保存
logger
.
info
(
f
"开始渲染PDF: {pdf_path}"
)
result_path
=
renderer
.
render_to_pdf
(
document_ir
,
pdf_path
,
optimize_layout
=
True
optimize_layout
=
True
,
ir_file_path
=
document_ir_path
)
# 显示文件大小
...
...
@@ -378,7 +379,8 @@ def save_markdown(document_ir_path: str, query: str) -> Optional[str]:
from
ReportEngine.renderers
import
MarkdownRenderer
renderer
=
MarkdownRenderer
()
markdown_content
=
renderer
.
render
(
document_ir
)
# 传入 IR 文件路径用于修复后保存
markdown_content
=
renderer
.
render
(
document_ir
,
ir_file_path
=
document_ir_path
)
timestamp
=
datetime
.
now
()
.
strftime
(
"
%
Y
%
m
%
d_
%
H
%
M
%
S"
)
query_safe
=
""
.
join
(
...
...
Please
register
or
login
to post a comment