马一丁

Completely redesign the chart repair logic

... ... @@ -29,6 +29,7 @@ from ReportEngine.utils.chart_validator import (
create_chart_repairer
)
from ReportEngine.utils.chart_repair_api import create_llm_repair_functions
from ReportEngine.utils.chart_review_service import get_chart_review_service
class HTMLRenderer:
... ... @@ -117,6 +118,12 @@ class HTMLRenderer:
validator=self.chart_validator,
llm_repair_fns=llm_repair_fns
)
# 打印LLM修复函数状态
self._llm_repair_count = len(llm_repair_fns)
if not llm_repair_fns:
logger.warning("HTMLRenderer: 未配置任何LLM API,图表API修复功能不可用")
else:
logger.info(f"HTMLRenderer: 已配置 {len(llm_repair_fns)} 个LLM修复函数")
# 记录修复失败的图表,避免多次触发LLM循环修复
self._chart_failure_notes: Dict[str, str] = {}
self._chart_failure_recorded: set[str] = set()
... ... @@ -268,19 +275,36 @@ class HTMLRenderer:
# ====== 公共入口 ======
def render(self, document_ir: Dict[str, Any]) -> str:
def render(
self,
document_ir: Dict[str, Any],
ir_file_path: str | None = None
) -> str:
"""
接收Document IR,重置内部状态并输出完整HTML。
参数:
document_ir: 由 DocumentComposer 生成的整本报告数据。
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存。
返回:
str: 可直接写入磁盘的完整HTML文档。
"""
self.document = document_ir or {}
# 先对图表做统一审查与修复,并将结果回写,供后续PDF/HTML共用
self.review_and_patch_document(self.document, reset_stats=True)
# 使用统一的 ChartReviewService 进行图表审查与修复
# 修复结果会直接回写到 document_ir,避免多次渲染重复修复
chart_service = get_chart_review_service()
chart_service.review_document(
self.document,
ir_file_path=ir_file_path,
reset_stats=True,
save_on_repair=bool(ir_file_path)
)
# 同步统计信息到本地(用于兼容旧的 _log_chart_validation_stats)
service_stats = chart_service.stats
self.chart_validation_stats.update(service_stats)
self.widget_scripts = []
self.chart_counter = 0
self.heading_counter = 0
... ...
... ... @@ -5,6 +5,8 @@ from typing import Any, Dict, List
from loguru import logger
from ReportEngine.utils.chart_review_service import get_chart_review_service
class MarkdownRenderer:
"""
... ... @@ -19,9 +21,33 @@ class MarkdownRenderer:
self.document: Dict[str, Any] = {}
self.metadata: Dict[str, Any] = {}
def render(self, document_ir: Dict[str, Any]) -> str:
"""入口:将IR转换为Markdown字符串"""
def render(
self,
document_ir: Dict[str, Any],
ir_file_path: str | None = None
) -> str:
"""
入口:将IR转换为Markdown字符串。
参数:
document_ir: Document IR 数据
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
str: Markdown 字符串
"""
self.document = document_ir or {}
# 使用统一的 ChartReviewService 进行图表审查与修复
# 虽然 Markdown 渲染时图表会降级为表格,但仍需确保数据有效
chart_service = get_chart_review_service()
chart_service.review_document(
self.document,
ir_file_path=ir_file_path,
reset_stats=True,
save_on_repair=bool(ir_file_path)
)
self.metadata = self.document.get("metadata", {}) or {}
parts: List[str] = []
... ...
... ... @@ -71,6 +71,7 @@ from .html_renderer import HTMLRenderer
from .pdf_layout_optimizer import PDFLayoutOptimizer, PDFLayoutConfig
from .chart_to_svg import create_chart_converter
from .math_to_svg import MathToSVG
from ReportEngine.utils.chart_review_service import get_chart_review_service
try:
from wordcloud import WordCloud
WORDCLOUD_AVAILABLE = True
... ... @@ -153,27 +154,34 @@ class PDFRenderer:
raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录")
def _preprocess_charts(self, document_ir: Dict[str, Any]) -> Dict[str, Any]:
def _preprocess_charts(
self,
document_ir: Dict[str, Any],
ir_file_path: str | None = None
) -> Dict[str, Any]:
"""
预处理图表:验证并修复所有图表数据,结果回写原始IR
预处理图表:使用 ChartReviewService 验证并修复所有图表数据
先统一审查并修复图表,把修复结果直接写回传入的 IR,
然后返回修复后的深拷贝供后续 SVG/词云转换使用,避免
HTML 和 PDF 分别重复触发 ChartRepairer。
使用统一的 ChartReviewService 进行图表审查,修复结果直接写回传入的 IR。
如果提供 ir_file_path,修复后会自动保存到文件。
参数:
document_ir: Document IR数据
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Dict[str, Any]: 修复后的Document IR(深拷贝)
"""
reviewed_ir = self.html_renderer.review_and_patch_document(
# 使用统一的 ChartReviewService
chart_service = get_chart_review_service()
chart_service.review_document(
document_ir,
ir_file_path=ir_file_path,
reset_stats=True,
clone=False
save_on_repair=bool(ir_file_path)
)
stats = self.html_renderer.chart_validation_stats
stats = chart_service.stats
if stats.get('total', 0) > 0:
repaired_count = stats.get('repaired_locally', 0) + stats.get('repaired_api', 0)
logger.info(
... ... @@ -184,7 +192,7 @@ class PDFRenderer:
)
# 返回深拷贝,避免后续 SVG 转换过程影响回写后的原始 IR
return copy.deepcopy(reviewed_ir)
return copy.deepcopy(document_ir)
def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]:
"""
... ... @@ -813,7 +821,8 @@ class PDFRenderer:
def _get_pdf_html(
self,
document_ir: Dict[str, Any],
optimize_layout: bool = True
optimize_layout: bool = True,
ir_file_path: str | None = None
) -> str:
"""
生成适用于PDF的HTML内容
... ... @@ -827,6 +836,7 @@ class PDFRenderer:
参数:
document_ir: Document IR数据
optimize_layout: 是否启用布局优化
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
str: 优化后的HTML内容
... ... @@ -853,7 +863,7 @@ class PDFRenderer:
# 关键修复:先预处理图表,确保数据有效
logger.info("预处理图表数据...")
preprocessed_ir = self._preprocess_charts(document_ir)
preprocessed_ir = self._preprocess_charts(document_ir, ir_file_path)
# 转换图表为SVG(使用预处理后的IR)
logger.info("开始转换图表为SVG矢量图形...")
... ... @@ -1527,7 +1537,8 @@ button.ghost-btn {{
self,
document_ir: Dict[str, Any],
output_path: str | Path,
optimize_layout: bool = True
optimize_layout: bool = True,
ir_file_path: str | None = None
) -> Path:
"""
将Document IR渲染为PDF文件
... ... @@ -1536,6 +1547,7 @@ button.ghost-btn {{
document_ir: Document IR数据
output_path: PDF输出路径
optimize_layout: 是否启用布局优化(默认True)
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Path: 生成的PDF文件路径
... ... @@ -1545,7 +1557,7 @@ button.ghost-btn {{
logger.info(f"开始生成PDF: {output_path}")
# 生成HTML内容
html_content = self._get_pdf_html(document_ir, optimize_layout)
html_content = self._get_pdf_html(document_ir, optimize_layout, ir_file_path)
# 配置字体
font_config = FontConfiguration()
... ... @@ -1570,7 +1582,8 @@ button.ghost-btn {{
def render_to_bytes(
self,
document_ir: Dict[str, Any],
optimize_layout: bool = True
optimize_layout: bool = True,
ir_file_path: str | None = None
) -> bytes:
"""
将Document IR渲染为PDF字节流
... ... @@ -1578,11 +1591,12 @@ button.ghost-btn {{
参数:
document_ir: Document IR数据
optimize_layout: 是否启用布局优化(默认True)
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
bytes: PDF文件的字节内容
"""
html_content = self._get_pdf_html(document_ir, optimize_layout)
html_content = self._get_pdf_html(document_ir, optimize_layout, ir_file_path)
font_config = FontConfiguration()
html_doc = HTML(string=html_content, base_url=str(Path.cwd()))
... ...
... ... @@ -4,6 +4,14 @@ Report Engine工具模块。
当前主要暴露配置读取逻辑,后续可扩展更多通用工具。
"""
from ReportEngine.utils.chart_review_service import (
ChartReviewService,
get_chart_review_service,
review_document_charts,
)
__all__ = [
"ChartReviewService",
"get_chart_review_service",
"review_document_charts",
]
... ...
... ... @@ -169,10 +169,11 @@ def create_llm_repair_functions() -> List:
return repaired
except Exception as e:
logger.error(f"ReportEngine图表修复失败: {e}")
logger.exception(f"ReportEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_report_engine)
logger.debug("已添加ReportEngine图表修复函数")
# 2. ForumEngine修复函数
if settings.FORUM_HOST_API_KEY and settings.FORUM_HOST_BASE_URL:
... ... @@ -202,10 +203,11 @@ def create_llm_repair_functions() -> List:
return repaired
except Exception as e:
logger.error(f"ForumEngine图表修复失败: {e}")
logger.exception(f"ForumEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_forum_engine)
logger.debug("已添加ForumEngine图表修复函数")
# 3. InsightEngine修复函数
if settings.INSIGHT_ENGINE_API_KEY and settings.INSIGHT_ENGINE_BASE_URL:
... ... @@ -235,10 +237,11 @@ def create_llm_repair_functions() -> List:
return repaired
except Exception as e:
logger.error(f"InsightEngine图表修复失败: {e}")
logger.exception(f"InsightEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_insight_engine)
logger.debug("已添加InsightEngine图表修复函数")
# 4. MediaEngine修复函数
if settings.MEDIA_ENGINE_API_KEY and settings.MEDIA_ENGINE_BASE_URL:
... ... @@ -268,12 +271,15 @@ def create_llm_repair_functions() -> List:
return repaired
except Exception as e:
logger.error(f"MediaEngine图表修复失败: {e}")
logger.exception(f"MediaEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_media_engine)
logger.debug("已添加MediaEngine图表修复函数")
if not repair_functions:
logger.warning("未配置任何Engine API,图表API修复功能将不可用")
else:
logger.info(f"图表API修复功能已启用,共 {len(repair_functions)} 个Engine可用")
return repair_functions
... ...
"""
图表审查服务 - 统一管理图表验证和修复。
提供单例服务,确保所有渲染器共享修复状态,避免重复修复。
修复成功后可自动持久化到 IR 文件。
"""
from __future__ import annotations
import copy
import json
import threading
from pathlib import Path
from typing import Any, Dict, List, Optional
from loguru import logger
from ReportEngine.utils.chart_validator import (
ChartValidator,
ChartRepairer,
ValidationResult,
create_chart_validator,
create_chart_repairer
)
from ReportEngine.utils.chart_repair_api import create_llm_repair_functions
class ChartReviewService:
"""
图表审查服务 - 单例模式。
职责:
1. 统一管理图表验证和修复
2. 维护修复缓存,避免重复修复
3. 支持修复后自动持久化到 IR 文件
4. 提供统计信息
"""
_instance: Optional["ChartReviewService"] = None
_lock = threading.Lock()
def __new__(cls) -> "ChartReviewService":
"""单例模式"""
if cls._instance is None:
with cls._lock:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
"""初始化服务(仅首次调用时执行)"""
if self._initialized:
return
self._initialized = True
# 初始化验证器和修复器
self.validator = create_chart_validator()
self.llm_repair_fns = create_llm_repair_functions()
self.repairer = create_chart_repairer(
validator=self.validator,
llm_repair_fns=self.llm_repair_fns
)
# 打印 LLM 修复函数状态
if not self.llm_repair_fns:
logger.warning("ChartReviewService: 未配置任何 LLM API,图表 API 修复功能不可用")
else:
logger.info(f"ChartReviewService: 已配置 {len(self.llm_repair_fns)} 个 LLM 修复函数")
# 统计信息
self._stats = {
'total': 0,
'valid': 0,
'repaired_locally': 0,
'repaired_api': 0,
'failed': 0
}
logger.info("ChartReviewService 初始化完成")
def reset_stats(self) -> None:
"""重置统计信息"""
self._stats = {
'total': 0,
'valid': 0,
'repaired_locally': 0,
'repaired_api': 0,
'failed': 0
}
@property
def stats(self) -> Dict[str, int]:
"""获取统计信息副本"""
return self._stats.copy()
def review_document(
self,
document_ir: Dict[str, Any],
ir_file_path: Optional[str | Path] = None,
*,
reset_stats: bool = True,
save_on_repair: bool = True
) -> Dict[str, Any]:
"""
审查并修复文档中的所有图表。
遍历所有章节的 blocks,检测图表类型的 widget,
对未审查过的图表进行验证和修复。
参数:
document_ir: Document IR 数据
ir_file_path: IR 文件路径,如果提供且有修复,会自动保存
reset_stats: 是否重置统计信息
save_on_repair: 修复后是否自动保存到文件
返回:
Dict[str, Any]: 审查后的 Document IR(原对象,已修改)
"""
if reset_stats:
self.reset_stats()
if not document_ir:
logger.warning("ChartReviewService: document_ir 为空,跳过审查")
return document_ir
has_repairs = False
# 遍历所有章节
for chapter in document_ir.get("chapters", []) or []:
if not isinstance(chapter, dict):
continue
blocks = chapter.get("blocks", [])
if isinstance(blocks, list):
chapter_repairs = self._walk_and_review_blocks(blocks, chapter)
if chapter_repairs:
has_repairs = True
# 输出统计信息
self._log_stats()
# 如果有修复且提供了文件路径,保存到文件
if has_repairs and ir_file_path and save_on_repair:
self._save_ir_to_file(document_ir, ir_file_path)
return document_ir
def _walk_and_review_blocks(
self,
blocks: List[Any],
chapter_context: Dict[str, Any] | None = None
) -> bool:
"""
递归遍历 blocks 并审查图表。
返回:
bool: 是否有修复发生
"""
has_repairs = False
for block in blocks or []:
if not isinstance(block, dict):
continue
# 检查是否是图表 widget
if block.get("type") == "widget":
repaired = self._review_chart_block(block, chapter_context)
if repaired:
has_repairs = True
# 递归处理嵌套的 blocks
nested_blocks = block.get("blocks")
if isinstance(nested_blocks, list):
if self._walk_and_review_blocks(nested_blocks, chapter_context):
has_repairs = True
# 处理 list 类型的 items
if block.get("type") == "list":
for item in block.get("items", []):
if isinstance(item, list):
if self._walk_and_review_blocks(item, chapter_context):
has_repairs = True
# 处理 table 类型的 cells
if block.get("type") == "table":
for row in block.get("rows", []):
if not isinstance(row, dict):
continue
for cell in row.get("cells", []):
if isinstance(cell, dict):
cell_blocks = cell.get("blocks", [])
if isinstance(cell_blocks, list):
if self._walk_and_review_blocks(cell_blocks, chapter_context):
has_repairs = True
return has_repairs
def _review_chart_block(
self,
block: Dict[str, Any],
chapter_context: Dict[str, Any] | None = None
) -> bool:
"""
审查单个图表 block。
返回:
bool: 是否进行了修复
"""
widget_type = block.get("widgetType", "")
if not isinstance(widget_type, str):
return False
# 只处理 chart.js 类型(词云单独处理,不需要修复)
is_chart = widget_type.startswith("chart.js")
is_wordcloud = "wordcloud" in widget_type.lower()
if not is_chart:
return False
widget_id = block.get("widgetId", "unknown")
# 检查是否已审查过
if block.get("_chart_reviewed"):
logger.debug(f"图表 {widget_id} 已审查过,跳过")
return False
self._stats['total'] += 1
# 词云直接标记为有效
if is_wordcloud:
self._stats['valid'] += 1
block["_chart_reviewed"] = True
block["_chart_review_status"] = "valid"
block["_chart_review_method"] = "none"
return False
# 先进行数据规范化(从章节上下文补充数据)
self._normalize_chart_block(block, chapter_context)
# 验证图表
validation_result = self.validator.validate(block)
if validation_result.is_valid:
# 验证通过
self._stats['valid'] += 1
block["_chart_reviewed"] = True
block["_chart_review_status"] = "valid"
block["_chart_review_method"] = "none"
if validation_result.warnings:
logger.debug(f"图表 {widget_id} 验证通过,但有警告: {validation_result.warnings}")
return False
# 验证失败,尝试修复
logger.warning(f"图表 {widget_id} 验证失败: {validation_result.errors}")
repair_result = self.repairer.repair(block, validation_result)
if repair_result.success and repair_result.repaired_block:
# 修复成功,覆盖原始 block 数据
repaired_block = repair_result.repaired_block
# 保留原始的一些元信息
original_widget_id = block.get("widgetId")
block.clear()
block.update(repaired_block)
# 确保 widgetId 不丢失
if original_widget_id and not block.get("widgetId"):
block["widgetId"] = original_widget_id
method = repair_result.method or "local"
if method == "local":
self._stats['repaired_locally'] += 1
elif method == "api":
self._stats['repaired_api'] += 1
block["_chart_reviewed"] = True
block["_chart_review_status"] = "repaired"
block["_chart_review_method"] = method
logger.info(f"图表 {widget_id} 修复成功 (方法: {method}): {repair_result.changes}")
return True
# 修复失败
self._stats['failed'] += 1
block["_chart_reviewed"] = True
block["_chart_renderable"] = False
block["_chart_review_status"] = "failed"
block["_chart_review_method"] = "none"
block["_chart_error_reason"] = self._format_error_reason(validation_result)
logger.warning(f"图表 {widget_id} 修复失败,已标记为不可渲染")
return False
def _normalize_chart_block(
self,
block: Dict[str, Any],
chapter_context: Dict[str, Any] | None = None
) -> None:
"""
规范化图表数据,从章节上下文补充缺失数据。
"""
if not isinstance(block, dict):
return
data = block.get("data")
if not isinstance(data, dict):
return
# 尝试从章节上下文补充 datasets
datasets = data.get("datasets")
if not datasets or (isinstance(datasets, list) and len(datasets) == 0):
if isinstance(chapter_context, dict):
chapter_data = chapter_context.get("data")
if isinstance(chapter_data, dict):
fallback_ds = chapter_data.get("datasets")
if isinstance(fallback_ds, list) and len(fallback_ds) > 0:
merged_data = copy.deepcopy(data)
merged_data["datasets"] = copy.deepcopy(fallback_ds)
if not merged_data.get("labels") and isinstance(chapter_data.get("labels"), list):
merged_data["labels"] = copy.deepcopy(chapter_data["labels"])
block["data"] = merged_data
# 如果缺少 labels 且数据点包含 x 值,自动生成
data_ref = block.get("data")
if isinstance(data_ref, dict) and not data_ref.get("labels"):
datasets_ref = data_ref.get("datasets")
if isinstance(datasets_ref, list) and datasets_ref:
first_ds = datasets_ref[0]
ds_data = first_ds.get("data") if isinstance(first_ds, dict) else None
if isinstance(ds_data, list):
labels_from_data = []
for idx, point in enumerate(ds_data):
if isinstance(point, dict):
label_text = point.get("x") or point.get("label") or f"点{idx + 1}"
else:
label_text = f"点{idx + 1}"
labels_from_data.append(str(label_text))
if labels_from_data:
data_ref["labels"] = labels_from_data
def _format_error_reason(self, validation_result: ValidationResult | None) -> str:
"""格式化错误原因"""
if not validation_result:
return "未知错误"
errors = validation_result.errors or []
if not errors:
return "验证失败但无具体错误信息"
return "; ".join(errors[:3])
def _log_stats(self) -> None:
"""输出统计信息"""
if self._stats['total'] == 0:
logger.debug("ChartReviewService: 没有图表需要审查")
return
repaired = self._stats['repaired_locally'] + self._stats['repaired_api']
logger.info(
f"ChartReviewService 图表审查完成: "
f"总计 {self._stats['total']} 个, "
f"有效 {self._stats['valid']} 个, "
f"修复 {repaired} 个 (本地 {self._stats['repaired_locally']}, API {self._stats['repaired_api']}), "
f"失败 {self._stats['failed']} 个"
)
def _save_ir_to_file(self, document_ir: Dict[str, Any], file_path: str | Path) -> None:
"""保存 IR 到文件"""
try:
path = Path(file_path)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(document_ir, ensure_ascii=False, indent=2),
encoding="utf-8"
)
logger.info(f"ChartReviewService: 修复后的 IR 已保存到 {path}")
except Exception as e:
logger.exception(f"ChartReviewService: 保存 IR 文件失败: {e}")
# 全局单例实例
_chart_review_service: Optional[ChartReviewService] = None
def get_chart_review_service() -> ChartReviewService:
"""获取 ChartReviewService 单例实例"""
global _chart_review_service
if _chart_review_service is None:
_chart_review_service = ChartReviewService()
return _chart_review_service
def review_document_charts(
document_ir: Dict[str, Any],
ir_file_path: Optional[str | Path] = None,
*,
reset_stats: bool = True,
save_on_repair: bool = True
) -> Dict[str, Any]:
"""
便捷函数:审查并修复文档中的所有图表。
参数:
document_ir: Document IR 数据
ir_file_path: IR 文件路径,如果提供且有修复,会自动保存
reset_stats: 是否重置统计信息
save_on_repair: 修复后是否自动保存到文件
返回:
Dict[str, Any]: 审查后的 Document IR
"""
service = get_chart_review_service()
return service.review_document(
document_ir,
ir_file_path,
reset_stats=reset_stats,
save_on_repair=save_on_repair
)
__all__ = [
"ChartReviewService",
"get_chart_review_service",
"review_document_charts",
]
... ...
... ... @@ -444,11 +444,15 @@ class ChartRepairer:
if validation_result is None:
validation_result = self.validator.validate(widget_block)
# 跟踪当前最新的验证结果和数据
current_validation = validation_result
current_block = widget_block
# 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告)
logger.info(f"尝试本地修复图表")
local_result = self.repair_locally(widget_block, validation_result)
# 3. 验证修复结果
# 3. 验证本地修复结果
if local_result.has_changes():
repaired_validation = self.validator.validate(local_result.repaired_block)
if repaired_validation.is_valid:
... ... @@ -458,22 +462,27 @@ class ChartRepairer:
)
else:
logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}")
# 更新当前状态为本地修复后的结果,供API修复使用
current_validation = repaired_validation
current_block = local_result.repaired_block
# 4. 如果本地修复失败且有严重错误,尝试API修复
if validation_result.has_critical_errors() and len(self.llm_repair_fns) > 0:
logger.info("本地修复失败,尝试API修复")
api_result = self.repair_with_api(widget_block, validation_result)
# 4. 如果当前仍有严重错误,尝试API修复
# 注意:使用 current_validation 而非原始 validation_result
if current_validation.has_critical_errors() and len(self.llm_repair_fns) > 0:
logger.info("本地修复失败或不足,尝试API修复")
# 传入本地已修复的数据(如果有),避免浪费本地修复的工作
api_result = self.repair_with_api(current_block, current_validation)
if api_result.success:
# 验证修复结果
repaired_validation = self.validator.validate(api_result.repaired_block)
if repaired_validation.is_valid:
api_repaired_validation = self.validator.validate(api_result.repaired_block)
if api_repaired_validation.is_valid:
logger.info(f"API修复成功: {api_result.changes}")
return _cache_and_return(api_result)
else:
logger.warning(f"API修复后仍然无效: {repaired_validation.errors}")
logger.warning(f"API修复后仍然无效: {api_repaired_validation.errors}")
# 5. 如果验证通过,返回原始或修复后的数据
# 5. 如果原始验证通过,返回原始或修复后的数据
if validation_result.is_valid:
if local_result.has_changes():
return _cache_and_return(
... ... @@ -482,9 +491,11 @@ class ChartRepairer:
else:
return _cache_and_return(RepairResult(True, widget_block, 'none', []))
# 6. 所有修复都失败,返回原始数据
# 6. 所有修复都失败,返回原始数据(或本地部分修复的数据)
logger.warning("所有修复尝试失败,保持原始数据")
return _cache_and_return(RepairResult(False, widget_block, 'none', []))
# 如果本地有部分修复,返回本地修复后的数据(虽然验证仍失败,但可能比原始数据好)
final_block = local_result.repaired_block if local_result.has_changes() else widget_block
return _cache_and_return(RepairResult(False, final_block, 'none', []))
def repair_locally(
self,
... ... @@ -664,27 +675,41 @@ class ChartRepairer:
策略:按顺序尝试不同的Engine,直到修复成功
"""
if not self.llm_repair_fns:
logger.debug("没有可用的LLM修复函数,跳过API修复")
return RepairResult(False, None, 'api', [])
widget_id = widget_block.get('widgetId', 'unknown')
logger.info(f"图表 {widget_id} 开始API修复,共 {len(self.llm_repair_fns)} 个Engine可用")
for idx, repair_fn in enumerate(self.llm_repair_fns):
try:
logger.info(f"尝试使用Engine {idx + 1}修复图表")
logger.info(f"尝试使用Engine {idx + 1}/{len(self.llm_repair_fns)} 修复图表 {widget_id}")
repaired = repair_fn(widget_block, validation_result.errors)
if repaired and isinstance(repaired, dict):
# 验证修复结果
repaired_validation = self.validator.validate(repaired)
if repaired_validation.is_valid:
logger.info(f"图表 {widget_id} 使用Engine {idx + 1} 修复成功")
return RepairResult(
True,
repaired,
'api',
[f"使用Engine {idx + 1}修复成功"]
)
else:
logger.warning(
f"图表 {widget_id} Engine {idx + 1} 返回的数据验证失败: "
f"{repaired_validation.errors}"
)
else:
logger.warning(f"图表 {widget_id} Engine {idx + 1} 返回空或无效响应")
except Exception as e:
logger.error(f"Engine {idx + 1}修复失败: {e}")
# 使用 exception 记录完整堆栈
logger.exception(f"图表 {widget_id} Engine {idx + 1} 修复过程中发生异常: {e}")
continue
logger.warning(f"图表 {widget_id} 所有 {len(self.llm_repair_fns)} 个Engine均修复失败")
return RepairResult(False, None, 'api', [])
... ...
... ... @@ -228,7 +228,7 @@ def save_document_ir(document_ir, base_name, timestamp):
return ir_path
def render_html(document_ir, base_name, timestamp):
def render_html(document_ir, base_name, timestamp, ir_path=None):
"""
使用 HTMLRenderer 将 Document IR 渲染为 HTML 并保存。
... ... @@ -239,12 +239,14 @@ def render_html(document_ir, base_name, timestamp):
document_ir: 装订完成的整本 IR
base_name: 文件名片段(来源于报告主题/标题)
timestamp: 时间戳字符串
ir_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Path: 生成的 HTML 文件路径
"""
renderer = HTMLRenderer()
html_content = renderer.render(document_ir)
# 传入 ir_file_path,修复后自动保存
html_content = renderer.render(document_ir, ir_file_path=str(ir_path) if ir_path else None)
output_dir = Path(settings.OUTPUT_DIR) / "html"
output_dir.mkdir(parents=True, exist_ok=True)
... ... @@ -322,7 +324,8 @@ def main():
)
ir_path = save_document_ir(document_ir, base_name, timestamp)
html_path = render_html(document_ir, base_name, timestamp)
# 传入 ir_path,修复后的图表会自动保存到 IR 文件
html_path = render_html(document_ir, base_name, timestamp, ir_path=ir_path)
logger.info("")
logger.info("🎉 HTML装订与渲染完成")
... ...
... ... @@ -88,7 +88,7 @@ def load_document_ir(file_path):
logger.error(f"加载报告失败: {e}")
return None
def generate_pdf_with_vector_charts(document_ir, output_path):
def generate_pdf_with_vector_charts(document_ir, output_path, ir_file_path=None):
"""
使用 PDFRenderer 将 Document IR 渲染为包含 SVG 矢量图表的 PDF。
... ... @@ -97,6 +97,7 @@ def generate_pdf_with_vector_charts(document_ir, output_path):
参数:
document_ir: 完整的 Document IR
output_path: 目标 PDF 路径
ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存
返回:
Path | None: 成功时返回生成的 PDF 路径,失败返回 None。
... ... @@ -109,11 +110,12 @@ def generate_pdf_with_vector_charts(document_ir, output_path):
# 创建PDF渲染器
renderer = PDFRenderer()
# 渲染PDF
# 渲染PDF,传入 ir_file_path 用于修复后保存
result_path = renderer.render_to_pdf(
document_ir,
output_path,
optimize_layout=True
optimize_layout=True,
ir_file_path=str(ir_file_path) if ir_file_path else None
)
logger.info("=" * 60)
... ... @@ -171,8 +173,8 @@ def main():
logger.info(f"输出路径: {output_path}")
logger.info("")
# 4. 生成PDF
result = generate_pdf_with_vector_charts(document_ir, output_path)
# 4. 生成PDF,传入 IR 文件路径用于修复后保存
result = generate_pdf_with_vector_charts(document_ir, output_path, ir_file_path=latest_report)
if result:
logger.info("")
... ...
... ... @@ -338,12 +338,13 @@ def save_pdf(document_ir_path: str, query: str) -> Optional[str]:
pdf_filename = f"final_report_{query_safe}_{timestamp}.pdf"
pdf_path = pdf_dir / pdf_filename
# 使用 render_to_pdf 方法直接生成PDF文件(与regenerate_latest_pdf.py一致)
# 使用 render_to_pdf 方法直接生成PDF文件,传入 IR 文件路径用于修复后保存
logger.info(f"开始渲染PDF: {pdf_path}")
result_path = renderer.render_to_pdf(
document_ir,
pdf_path,
optimize_layout=True
optimize_layout=True,
ir_file_path=document_ir_path
)
# 显示文件大小
... ... @@ -378,7 +379,8 @@ def save_markdown(document_ir_path: str, query: str) -> Optional[str]:
from ReportEngine.renderers import MarkdownRenderer
renderer = MarkdownRenderer()
markdown_content = renderer.render(document_ir)
# 传入 IR 文件路径用于修复后保存
markdown_content = renderer.render(document_ir, ir_file_path=document_ir_path)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
query_safe = "".join(
... ...