Showing
10 changed files
with
582 additions
and
48 deletions
| @@ -29,6 +29,7 @@ from ReportEngine.utils.chart_validator import ( | @@ -29,6 +29,7 @@ from ReportEngine.utils.chart_validator import ( | ||
| 29 | create_chart_repairer | 29 | create_chart_repairer |
| 30 | ) | 30 | ) |
| 31 | from ReportEngine.utils.chart_repair_api import create_llm_repair_functions | 31 | from ReportEngine.utils.chart_repair_api import create_llm_repair_functions |
| 32 | +from ReportEngine.utils.chart_review_service import get_chart_review_service | ||
| 32 | 33 | ||
| 33 | 34 | ||
| 34 | class HTMLRenderer: | 35 | class HTMLRenderer: |
| @@ -117,6 +118,12 @@ class HTMLRenderer: | @@ -117,6 +118,12 @@ class HTMLRenderer: | ||
| 117 | validator=self.chart_validator, | 118 | validator=self.chart_validator, |
| 118 | llm_repair_fns=llm_repair_fns | 119 | llm_repair_fns=llm_repair_fns |
| 119 | ) | 120 | ) |
| 121 | + # 打印LLM修复函数状态 | ||
| 122 | + self._llm_repair_count = len(llm_repair_fns) | ||
| 123 | + if not llm_repair_fns: | ||
| 124 | + logger.warning("HTMLRenderer: 未配置任何LLM API,图表API修复功能不可用") | ||
| 125 | + else: | ||
| 126 | + logger.info(f"HTMLRenderer: 已配置 {len(llm_repair_fns)} 个LLM修复函数") | ||
| 120 | # 记录修复失败的图表,避免多次触发LLM循环修复 | 127 | # 记录修复失败的图表,避免多次触发LLM循环修复 |
| 121 | self._chart_failure_notes: Dict[str, str] = {} | 128 | self._chart_failure_notes: Dict[str, str] = {} |
| 122 | self._chart_failure_recorded: set[str] = set() | 129 | self._chart_failure_recorded: set[str] = set() |
| @@ -268,19 +275,36 @@ class HTMLRenderer: | @@ -268,19 +275,36 @@ class HTMLRenderer: | ||
| 268 | 275 | ||
| 269 | # ====== 公共入口 ====== | 276 | # ====== 公共入口 ====== |
| 270 | 277 | ||
| 271 | - def render(self, document_ir: Dict[str, Any]) -> str: | 278 | + def render( |
| 279 | + self, | ||
| 280 | + document_ir: Dict[str, Any], | ||
| 281 | + ir_file_path: str | None = None | ||
| 282 | + ) -> str: | ||
| 272 | """ | 283 | """ |
| 273 | 接收Document IR,重置内部状态并输出完整HTML。 | 284 | 接收Document IR,重置内部状态并输出完整HTML。 |
| 274 | 285 | ||
| 275 | 参数: | 286 | 参数: |
| 276 | document_ir: 由 DocumentComposer 生成的整本报告数据。 | 287 | document_ir: 由 DocumentComposer 生成的整本报告数据。 |
| 288 | + ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存。 | ||
| 277 | 289 | ||
| 278 | 返回: | 290 | 返回: |
| 279 | str: 可直接写入磁盘的完整HTML文档。 | 291 | str: 可直接写入磁盘的完整HTML文档。 |
| 280 | """ | 292 | """ |
| 281 | self.document = document_ir or {} | 293 | self.document = document_ir or {} |
| 282 | - # 先对图表做统一审查与修复,并将结果回写,供后续PDF/HTML共用 | ||
| 283 | - self.review_and_patch_document(self.document, reset_stats=True) | 294 | + |
| 295 | + # 使用统一的 ChartReviewService 进行图表审查与修复 | ||
| 296 | + # 修复结果会直接回写到 document_ir,避免多次渲染重复修复 | ||
| 297 | + chart_service = get_chart_review_service() | ||
| 298 | + chart_service.review_document( | ||
| 299 | + self.document, | ||
| 300 | + ir_file_path=ir_file_path, | ||
| 301 | + reset_stats=True, | ||
| 302 | + save_on_repair=bool(ir_file_path) | ||
| 303 | + ) | ||
| 304 | + # 同步统计信息到本地(用于兼容旧的 _log_chart_validation_stats) | ||
| 305 | + service_stats = chart_service.stats | ||
| 306 | + self.chart_validation_stats.update(service_stats) | ||
| 307 | + | ||
| 284 | self.widget_scripts = [] | 308 | self.widget_scripts = [] |
| 285 | self.chart_counter = 0 | 309 | self.chart_counter = 0 |
| 286 | self.heading_counter = 0 | 310 | self.heading_counter = 0 |
| @@ -5,6 +5,8 @@ from typing import Any, Dict, List | @@ -5,6 +5,8 @@ from typing import Any, Dict, List | ||
| 5 | 5 | ||
| 6 | from loguru import logger | 6 | from loguru import logger |
| 7 | 7 | ||
| 8 | +from ReportEngine.utils.chart_review_service import get_chart_review_service | ||
| 9 | + | ||
| 8 | 10 | ||
| 9 | class MarkdownRenderer: | 11 | class MarkdownRenderer: |
| 10 | """ | 12 | """ |
| @@ -19,9 +21,33 @@ class MarkdownRenderer: | @@ -19,9 +21,33 @@ class MarkdownRenderer: | ||
| 19 | self.document: Dict[str, Any] = {} | 21 | self.document: Dict[str, Any] = {} |
| 20 | self.metadata: Dict[str, Any] = {} | 22 | self.metadata: Dict[str, Any] = {} |
| 21 | 23 | ||
| 22 | - def render(self, document_ir: Dict[str, Any]) -> str: | ||
| 23 | - """入口:将IR转换为Markdown字符串""" | 24 | + def render( |
| 25 | + self, | ||
| 26 | + document_ir: Dict[str, Any], | ||
| 27 | + ir_file_path: str | None = None | ||
| 28 | + ) -> str: | ||
| 29 | + """ | ||
| 30 | + 入口:将IR转换为Markdown字符串。 | ||
| 31 | + | ||
| 32 | + 参数: | ||
| 33 | + document_ir: Document IR 数据 | ||
| 34 | + ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存 | ||
| 35 | + | ||
| 36 | + 返回: | ||
| 37 | + str: Markdown 字符串 | ||
| 38 | + """ | ||
| 24 | self.document = document_ir or {} | 39 | self.document = document_ir or {} |
| 40 | + | ||
| 41 | + # 使用统一的 ChartReviewService 进行图表审查与修复 | ||
| 42 | + # 虽然 Markdown 渲染时图表会降级为表格,但仍需确保数据有效 | ||
| 43 | + chart_service = get_chart_review_service() | ||
| 44 | + chart_service.review_document( | ||
| 45 | + self.document, | ||
| 46 | + ir_file_path=ir_file_path, | ||
| 47 | + reset_stats=True, | ||
| 48 | + save_on_repair=bool(ir_file_path) | ||
| 49 | + ) | ||
| 50 | + | ||
| 25 | self.metadata = self.document.get("metadata", {}) or {} | 51 | self.metadata = self.document.get("metadata", {}) or {} |
| 26 | 52 | ||
| 27 | parts: List[str] = [] | 53 | parts: List[str] = [] |
| @@ -71,6 +71,7 @@ from .html_renderer import HTMLRenderer | @@ -71,6 +71,7 @@ from .html_renderer import HTMLRenderer | ||
| 71 | from .pdf_layout_optimizer import PDFLayoutOptimizer, PDFLayoutConfig | 71 | from .pdf_layout_optimizer import PDFLayoutOptimizer, PDFLayoutConfig |
| 72 | from .chart_to_svg import create_chart_converter | 72 | from .chart_to_svg import create_chart_converter |
| 73 | from .math_to_svg import MathToSVG | 73 | from .math_to_svg import MathToSVG |
| 74 | +from ReportEngine.utils.chart_review_service import get_chart_review_service | ||
| 74 | try: | 75 | try: |
| 75 | from wordcloud import WordCloud | 76 | from wordcloud import WordCloud |
| 76 | WORDCLOUD_AVAILABLE = True | 77 | WORDCLOUD_AVAILABLE = True |
| @@ -153,27 +154,34 @@ class PDFRenderer: | @@ -153,27 +154,34 @@ class PDFRenderer: | ||
| 153 | 154 | ||
| 154 | raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录") | 155 | raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录") |
| 155 | 156 | ||
| 156 | - def _preprocess_charts(self, document_ir: Dict[str, Any]) -> Dict[str, Any]: | 157 | + def _preprocess_charts( |
| 158 | + self, | ||
| 159 | + document_ir: Dict[str, Any], | ||
| 160 | + ir_file_path: str | None = None | ||
| 161 | + ) -> Dict[str, Any]: | ||
| 157 | """ | 162 | """ |
| 158 | - 预处理图表:验证并修复所有图表数据,结果回写原始IR。 | 163 | + 预处理图表:使用 ChartReviewService 验证并修复所有图表数据。 |
| 159 | 164 | ||
| 160 | - 先统一审查并修复图表,把修复结果直接写回传入的 IR, | ||
| 161 | - 然后返回修复后的深拷贝供后续 SVG/词云转换使用,避免 | ||
| 162 | - HTML 和 PDF 分别重复触发 ChartRepairer。 | 165 | + 使用统一的 ChartReviewService 进行图表审查,修复结果直接写回传入的 IR。 |
| 166 | + 如果提供 ir_file_path,修复后会自动保存到文件。 | ||
| 163 | 167 | ||
| 164 | 参数: | 168 | 参数: |
| 165 | document_ir: Document IR数据 | 169 | document_ir: Document IR数据 |
| 170 | + ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存 | ||
| 166 | 171 | ||
| 167 | 返回: | 172 | 返回: |
| 168 | Dict[str, Any]: 修复后的Document IR(深拷贝) | 173 | Dict[str, Any]: 修复后的Document IR(深拷贝) |
| 169 | """ | 174 | """ |
| 170 | - reviewed_ir = self.html_renderer.review_and_patch_document( | 175 | + # 使用统一的 ChartReviewService |
| 176 | + chart_service = get_chart_review_service() | ||
| 177 | + chart_service.review_document( | ||
| 171 | document_ir, | 178 | document_ir, |
| 179 | + ir_file_path=ir_file_path, | ||
| 172 | reset_stats=True, | 180 | reset_stats=True, |
| 173 | - clone=False | 181 | + save_on_repair=bool(ir_file_path) |
| 174 | ) | 182 | ) |
| 175 | 183 | ||
| 176 | - stats = self.html_renderer.chart_validation_stats | 184 | + stats = chart_service.stats |
| 177 | if stats.get('total', 0) > 0: | 185 | if stats.get('total', 0) > 0: |
| 178 | repaired_count = stats.get('repaired_locally', 0) + stats.get('repaired_api', 0) | 186 | repaired_count = stats.get('repaired_locally', 0) + stats.get('repaired_api', 0) |
| 179 | logger.info( | 187 | logger.info( |
| @@ -184,7 +192,7 @@ class PDFRenderer: | @@ -184,7 +192,7 @@ class PDFRenderer: | ||
| 184 | ) | 192 | ) |
| 185 | 193 | ||
| 186 | # 返回深拷贝,避免后续 SVG 转换过程影响回写后的原始 IR | 194 | # 返回深拷贝,避免后续 SVG 转换过程影响回写后的原始 IR |
| 187 | - return copy.deepcopy(reviewed_ir) | 195 | + return copy.deepcopy(document_ir) |
| 188 | 196 | ||
| 189 | def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]: | 197 | def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]: |
| 190 | """ | 198 | """ |
| @@ -813,7 +821,8 @@ class PDFRenderer: | @@ -813,7 +821,8 @@ class PDFRenderer: | ||
| 813 | def _get_pdf_html( | 821 | def _get_pdf_html( |
| 814 | self, | 822 | self, |
| 815 | document_ir: Dict[str, Any], | 823 | document_ir: Dict[str, Any], |
| 816 | - optimize_layout: bool = True | 824 | + optimize_layout: bool = True, |
| 825 | + ir_file_path: str | None = None | ||
| 817 | ) -> str: | 826 | ) -> str: |
| 818 | """ | 827 | """ |
| 819 | 生成适用于PDF的HTML内容 | 828 | 生成适用于PDF的HTML内容 |
| @@ -827,6 +836,7 @@ class PDFRenderer: | @@ -827,6 +836,7 @@ class PDFRenderer: | ||
| 827 | 参数: | 836 | 参数: |
| 828 | document_ir: Document IR数据 | 837 | document_ir: Document IR数据 |
| 829 | optimize_layout: 是否启用布局优化 | 838 | optimize_layout: 是否启用布局优化 |
| 839 | + ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存 | ||
| 830 | 840 | ||
| 831 | 返回: | 841 | 返回: |
| 832 | str: 优化后的HTML内容 | 842 | str: 优化后的HTML内容 |
| @@ -853,7 +863,7 @@ class PDFRenderer: | @@ -853,7 +863,7 @@ class PDFRenderer: | ||
| 853 | 863 | ||
| 854 | # 关键修复:先预处理图表,确保数据有效 | 864 | # 关键修复:先预处理图表,确保数据有效 |
| 855 | logger.info("预处理图表数据...") | 865 | logger.info("预处理图表数据...") |
| 856 | - preprocessed_ir = self._preprocess_charts(document_ir) | 866 | + preprocessed_ir = self._preprocess_charts(document_ir, ir_file_path) |
| 857 | 867 | ||
| 858 | # 转换图表为SVG(使用预处理后的IR) | 868 | # 转换图表为SVG(使用预处理后的IR) |
| 859 | logger.info("开始转换图表为SVG矢量图形...") | 869 | logger.info("开始转换图表为SVG矢量图形...") |
| @@ -1527,7 +1537,8 @@ button.ghost-btn {{ | @@ -1527,7 +1537,8 @@ button.ghost-btn {{ | ||
| 1527 | self, | 1537 | self, |
| 1528 | document_ir: Dict[str, Any], | 1538 | document_ir: Dict[str, Any], |
| 1529 | output_path: str | Path, | 1539 | output_path: str | Path, |
| 1530 | - optimize_layout: bool = True | 1540 | + optimize_layout: bool = True, |
| 1541 | + ir_file_path: str | None = None | ||
| 1531 | ) -> Path: | 1542 | ) -> Path: |
| 1532 | """ | 1543 | """ |
| 1533 | 将Document IR渲染为PDF文件 | 1544 | 将Document IR渲染为PDF文件 |
| @@ -1536,6 +1547,7 @@ button.ghost-btn {{ | @@ -1536,6 +1547,7 @@ button.ghost-btn {{ | ||
| 1536 | document_ir: Document IR数据 | 1547 | document_ir: Document IR数据 |
| 1537 | output_path: PDF输出路径 | 1548 | output_path: PDF输出路径 |
| 1538 | optimize_layout: 是否启用布局优化(默认True) | 1549 | optimize_layout: 是否启用布局优化(默认True) |
| 1550 | + ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存 | ||
| 1539 | 1551 | ||
| 1540 | 返回: | 1552 | 返回: |
| 1541 | Path: 生成的PDF文件路径 | 1553 | Path: 生成的PDF文件路径 |
| @@ -1545,7 +1557,7 @@ button.ghost-btn {{ | @@ -1545,7 +1557,7 @@ button.ghost-btn {{ | ||
| 1545 | logger.info(f"开始生成PDF: {output_path}") | 1557 | logger.info(f"开始生成PDF: {output_path}") |
| 1546 | 1558 | ||
| 1547 | # 生成HTML内容 | 1559 | # 生成HTML内容 |
| 1548 | - html_content = self._get_pdf_html(document_ir, optimize_layout) | 1560 | + html_content = self._get_pdf_html(document_ir, optimize_layout, ir_file_path) |
| 1549 | 1561 | ||
| 1550 | # 配置字体 | 1562 | # 配置字体 |
| 1551 | font_config = FontConfiguration() | 1563 | font_config = FontConfiguration() |
| @@ -1570,7 +1582,8 @@ button.ghost-btn {{ | @@ -1570,7 +1582,8 @@ button.ghost-btn {{ | ||
| 1570 | def render_to_bytes( | 1582 | def render_to_bytes( |
| 1571 | self, | 1583 | self, |
| 1572 | document_ir: Dict[str, Any], | 1584 | document_ir: Dict[str, Any], |
| 1573 | - optimize_layout: bool = True | 1585 | + optimize_layout: bool = True, |
| 1586 | + ir_file_path: str | None = None | ||
| 1574 | ) -> bytes: | 1587 | ) -> bytes: |
| 1575 | """ | 1588 | """ |
| 1576 | 将Document IR渲染为PDF字节流 | 1589 | 将Document IR渲染为PDF字节流 |
| @@ -1578,11 +1591,12 @@ button.ghost-btn {{ | @@ -1578,11 +1591,12 @@ button.ghost-btn {{ | ||
| 1578 | 参数: | 1591 | 参数: |
| 1579 | document_ir: Document IR数据 | 1592 | document_ir: Document IR数据 |
| 1580 | optimize_layout: 是否启用布局优化(默认True) | 1593 | optimize_layout: 是否启用布局优化(默认True) |
| 1594 | + ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存 | ||
| 1581 | 1595 | ||
| 1582 | 返回: | 1596 | 返回: |
| 1583 | bytes: PDF文件的字节内容 | 1597 | bytes: PDF文件的字节内容 |
| 1584 | """ | 1598 | """ |
| 1585 | - html_content = self._get_pdf_html(document_ir, optimize_layout) | 1599 | + html_content = self._get_pdf_html(document_ir, optimize_layout, ir_file_path) |
| 1586 | font_config = FontConfiguration() | 1600 | font_config = FontConfiguration() |
| 1587 | html_doc = HTML(string=html_content, base_url=str(Path.cwd())) | 1601 | html_doc = HTML(string=html_content, base_url=str(Path.cwd())) |
| 1588 | 1602 |
| @@ -4,6 +4,14 @@ Report Engine工具模块。 | @@ -4,6 +4,14 @@ Report Engine工具模块。 | ||
| 4 | 当前主要暴露配置读取逻辑,后续可扩展更多通用工具。 | 4 | 当前主要暴露配置读取逻辑,后续可扩展更多通用工具。 |
| 5 | """ | 5 | """ |
| 6 | 6 | ||
| 7 | +from ReportEngine.utils.chart_review_service import ( | ||
| 8 | + ChartReviewService, | ||
| 9 | + get_chart_review_service, | ||
| 10 | + review_document_charts, | ||
| 11 | +) | ||
| 7 | 12 | ||
| 8 | __all__ = [ | 13 | __all__ = [ |
| 14 | + "ChartReviewService", | ||
| 15 | + "get_chart_review_service", | ||
| 16 | + "review_document_charts", | ||
| 9 | ] | 17 | ] |
| @@ -169,10 +169,11 @@ def create_llm_repair_functions() -> List: | @@ -169,10 +169,11 @@ def create_llm_repair_functions() -> List: | ||
| 169 | return repaired | 169 | return repaired |
| 170 | 170 | ||
| 171 | except Exception as e: | 171 | except Exception as e: |
| 172 | - logger.error(f"ReportEngine图表修复失败: {e}") | 172 | + logger.exception(f"ReportEngine图表修复失败: {e}") |
| 173 | return None | 173 | return None |
| 174 | 174 | ||
| 175 | repair_functions.append(repair_with_report_engine) | 175 | repair_functions.append(repair_with_report_engine) |
| 176 | + logger.debug("已添加ReportEngine图表修复函数") | ||
| 176 | 177 | ||
| 177 | # 2. ForumEngine修复函数 | 178 | # 2. ForumEngine修复函数 |
| 178 | if settings.FORUM_HOST_API_KEY and settings.FORUM_HOST_BASE_URL: | 179 | if settings.FORUM_HOST_API_KEY and settings.FORUM_HOST_BASE_URL: |
| @@ -202,10 +203,11 @@ def create_llm_repair_functions() -> List: | @@ -202,10 +203,11 @@ def create_llm_repair_functions() -> List: | ||
| 202 | return repaired | 203 | return repaired |
| 203 | 204 | ||
| 204 | except Exception as e: | 205 | except Exception as e: |
| 205 | - logger.error(f"ForumEngine图表修复失败: {e}") | 206 | + logger.exception(f"ForumEngine图表修复失败: {e}") |
| 206 | return None | 207 | return None |
| 207 | 208 | ||
| 208 | repair_functions.append(repair_with_forum_engine) | 209 | repair_functions.append(repair_with_forum_engine) |
| 210 | + logger.debug("已添加ForumEngine图表修复函数") | ||
| 209 | 211 | ||
| 210 | # 3. InsightEngine修复函数 | 212 | # 3. InsightEngine修复函数 |
| 211 | if settings.INSIGHT_ENGINE_API_KEY and settings.INSIGHT_ENGINE_BASE_URL: | 213 | if settings.INSIGHT_ENGINE_API_KEY and settings.INSIGHT_ENGINE_BASE_URL: |
| @@ -235,10 +237,11 @@ def create_llm_repair_functions() -> List: | @@ -235,10 +237,11 @@ def create_llm_repair_functions() -> List: | ||
| 235 | return repaired | 237 | return repaired |
| 236 | 238 | ||
| 237 | except Exception as e: | 239 | except Exception as e: |
| 238 | - logger.error(f"InsightEngine图表修复失败: {e}") | 240 | + logger.exception(f"InsightEngine图表修复失败: {e}") |
| 239 | return None | 241 | return None |
| 240 | 242 | ||
| 241 | repair_functions.append(repair_with_insight_engine) | 243 | repair_functions.append(repair_with_insight_engine) |
| 244 | + logger.debug("已添加InsightEngine图表修复函数") | ||
| 242 | 245 | ||
| 243 | # 4. MediaEngine修复函数 | 246 | # 4. MediaEngine修复函数 |
| 244 | if settings.MEDIA_ENGINE_API_KEY and settings.MEDIA_ENGINE_BASE_URL: | 247 | if settings.MEDIA_ENGINE_API_KEY and settings.MEDIA_ENGINE_BASE_URL: |
| @@ -268,12 +271,15 @@ def create_llm_repair_functions() -> List: | @@ -268,12 +271,15 @@ def create_llm_repair_functions() -> List: | ||
| 268 | return repaired | 271 | return repaired |
| 269 | 272 | ||
| 270 | except Exception as e: | 273 | except Exception as e: |
| 271 | - logger.error(f"MediaEngine图表修复失败: {e}") | 274 | + logger.exception(f"MediaEngine图表修复失败: {e}") |
| 272 | return None | 275 | return None |
| 273 | 276 | ||
| 274 | repair_functions.append(repair_with_media_engine) | 277 | repair_functions.append(repair_with_media_engine) |
| 278 | + logger.debug("已添加MediaEngine图表修复函数") | ||
| 275 | 279 | ||
| 276 | if not repair_functions: | 280 | if not repair_functions: |
| 277 | logger.warning("未配置任何Engine API,图表API修复功能将不可用") | 281 | logger.warning("未配置任何Engine API,图表API修复功能将不可用") |
| 282 | + else: | ||
| 283 | + logger.info(f"图表API修复功能已启用,共 {len(repair_functions)} 个Engine可用") | ||
| 278 | 284 | ||
| 279 | return repair_functions | 285 | return repair_functions |
ReportEngine/utils/chart_review_service.py
0 → 100644
| 1 | +""" | ||
| 2 | +图表审查服务 - 统一管理图表验证和修复。 | ||
| 3 | + | ||
| 4 | +提供单例服务,确保所有渲染器共享修复状态,避免重复修复。 | ||
| 5 | +修复成功后可自动持久化到 IR 文件。 | ||
| 6 | +""" | ||
| 7 | + | ||
| 8 | +from __future__ import annotations | ||
| 9 | + | ||
| 10 | +import copy | ||
| 11 | +import json | ||
| 12 | +import threading | ||
| 13 | +from pathlib import Path | ||
| 14 | +from typing import Any, Dict, List, Optional | ||
| 15 | + | ||
| 16 | +from loguru import logger | ||
| 17 | + | ||
| 18 | +from ReportEngine.utils.chart_validator import ( | ||
| 19 | + ChartValidator, | ||
| 20 | + ChartRepairer, | ||
| 21 | + ValidationResult, | ||
| 22 | + create_chart_validator, | ||
| 23 | + create_chart_repairer | ||
| 24 | +) | ||
| 25 | +from ReportEngine.utils.chart_repair_api import create_llm_repair_functions | ||
| 26 | + | ||
| 27 | + | ||
| 28 | +class ChartReviewService: | ||
| 29 | + """ | ||
| 30 | + 图表审查服务 - 单例模式。 | ||
| 31 | + | ||
| 32 | + 职责: | ||
| 33 | + 1. 统一管理图表验证和修复 | ||
| 34 | + 2. 维护修复缓存,避免重复修复 | ||
| 35 | + 3. 支持修复后自动持久化到 IR 文件 | ||
| 36 | + 4. 提供统计信息 | ||
| 37 | + """ | ||
| 38 | + | ||
| 39 | + _instance: Optional["ChartReviewService"] = None | ||
| 40 | + _lock = threading.Lock() | ||
| 41 | + | ||
| 42 | + def __new__(cls) -> "ChartReviewService": | ||
| 43 | + """单例模式""" | ||
| 44 | + if cls._instance is None: | ||
| 45 | + with cls._lock: | ||
| 46 | + if cls._instance is None: | ||
| 47 | + cls._instance = super().__new__(cls) | ||
| 48 | + cls._instance._initialized = False | ||
| 49 | + return cls._instance | ||
| 50 | + | ||
| 51 | + def __init__(self): | ||
| 52 | + """初始化服务(仅首次调用时执行)""" | ||
| 53 | + if self._initialized: | ||
| 54 | + return | ||
| 55 | + | ||
| 56 | + self._initialized = True | ||
| 57 | + | ||
| 58 | + # 初始化验证器和修复器 | ||
| 59 | + self.validator = create_chart_validator() | ||
| 60 | + self.llm_repair_fns = create_llm_repair_functions() | ||
| 61 | + self.repairer = create_chart_repairer( | ||
| 62 | + validator=self.validator, | ||
| 63 | + llm_repair_fns=self.llm_repair_fns | ||
| 64 | + ) | ||
| 65 | + | ||
| 66 | + # 打印 LLM 修复函数状态 | ||
| 67 | + if not self.llm_repair_fns: | ||
| 68 | + logger.warning("ChartReviewService: 未配置任何 LLM API,图表 API 修复功能不可用") | ||
| 69 | + else: | ||
| 70 | + logger.info(f"ChartReviewService: 已配置 {len(self.llm_repair_fns)} 个 LLM 修复函数") | ||
| 71 | + | ||
| 72 | + # 统计信息 | ||
| 73 | + self._stats = { | ||
| 74 | + 'total': 0, | ||
| 75 | + 'valid': 0, | ||
| 76 | + 'repaired_locally': 0, | ||
| 77 | + 'repaired_api': 0, | ||
| 78 | + 'failed': 0 | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + logger.info("ChartReviewService 初始化完成") | ||
| 82 | + | ||
| 83 | + def reset_stats(self) -> None: | ||
| 84 | + """重置统计信息""" | ||
| 85 | + self._stats = { | ||
| 86 | + 'total': 0, | ||
| 87 | + 'valid': 0, | ||
| 88 | + 'repaired_locally': 0, | ||
| 89 | + 'repaired_api': 0, | ||
| 90 | + 'failed': 0 | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + @property | ||
| 94 | + def stats(self) -> Dict[str, int]: | ||
| 95 | + """获取统计信息副本""" | ||
| 96 | + return self._stats.copy() | ||
| 97 | + | ||
| 98 | + def review_document( | ||
| 99 | + self, | ||
| 100 | + document_ir: Dict[str, Any], | ||
| 101 | + ir_file_path: Optional[str | Path] = None, | ||
| 102 | + *, | ||
| 103 | + reset_stats: bool = True, | ||
| 104 | + save_on_repair: bool = True | ||
| 105 | + ) -> Dict[str, Any]: | ||
| 106 | + """ | ||
| 107 | + 审查并修复文档中的所有图表。 | ||
| 108 | + | ||
| 109 | + 遍历所有章节的 blocks,检测图表类型的 widget, | ||
| 110 | + 对未审查过的图表进行验证和修复。 | ||
| 111 | + | ||
| 112 | + 参数: | ||
| 113 | + document_ir: Document IR 数据 | ||
| 114 | + ir_file_path: IR 文件路径,如果提供且有修复,会自动保存 | ||
| 115 | + reset_stats: 是否重置统计信息 | ||
| 116 | + save_on_repair: 修复后是否自动保存到文件 | ||
| 117 | + | ||
| 118 | + 返回: | ||
| 119 | + Dict[str, Any]: 审查后的 Document IR(原对象,已修改) | ||
| 120 | + """ | ||
| 121 | + if reset_stats: | ||
| 122 | + self.reset_stats() | ||
| 123 | + | ||
| 124 | + if not document_ir: | ||
| 125 | + logger.warning("ChartReviewService: document_ir 为空,跳过审查") | ||
| 126 | + return document_ir | ||
| 127 | + | ||
| 128 | + has_repairs = False | ||
| 129 | + | ||
| 130 | + # 遍历所有章节 | ||
| 131 | + for chapter in document_ir.get("chapters", []) or []: | ||
| 132 | + if not isinstance(chapter, dict): | ||
| 133 | + continue | ||
| 134 | + blocks = chapter.get("blocks", []) | ||
| 135 | + if isinstance(blocks, list): | ||
| 136 | + chapter_repairs = self._walk_and_review_blocks(blocks, chapter) | ||
| 137 | + if chapter_repairs: | ||
| 138 | + has_repairs = True | ||
| 139 | + | ||
| 140 | + # 输出统计信息 | ||
| 141 | + self._log_stats() | ||
| 142 | + | ||
| 143 | + # 如果有修复且提供了文件路径,保存到文件 | ||
| 144 | + if has_repairs and ir_file_path and save_on_repair: | ||
| 145 | + self._save_ir_to_file(document_ir, ir_file_path) | ||
| 146 | + | ||
| 147 | + return document_ir | ||
| 148 | + | ||
| 149 | + def _walk_and_review_blocks( | ||
| 150 | + self, | ||
| 151 | + blocks: List[Any], | ||
| 152 | + chapter_context: Dict[str, Any] | None = None | ||
| 153 | + ) -> bool: | ||
| 154 | + """ | ||
| 155 | + 递归遍历 blocks 并审查图表。 | ||
| 156 | + | ||
| 157 | + 返回: | ||
| 158 | + bool: 是否有修复发生 | ||
| 159 | + """ | ||
| 160 | + has_repairs = False | ||
| 161 | + | ||
| 162 | + for block in blocks or []: | ||
| 163 | + if not isinstance(block, dict): | ||
| 164 | + continue | ||
| 165 | + | ||
| 166 | + # 检查是否是图表 widget | ||
| 167 | + if block.get("type") == "widget": | ||
| 168 | + repaired = self._review_chart_block(block, chapter_context) | ||
| 169 | + if repaired: | ||
| 170 | + has_repairs = True | ||
| 171 | + | ||
| 172 | + # 递归处理嵌套的 blocks | ||
| 173 | + nested_blocks = block.get("blocks") | ||
| 174 | + if isinstance(nested_blocks, list): | ||
| 175 | + if self._walk_and_review_blocks(nested_blocks, chapter_context): | ||
| 176 | + has_repairs = True | ||
| 177 | + | ||
| 178 | + # 处理 list 类型的 items | ||
| 179 | + if block.get("type") == "list": | ||
| 180 | + for item in block.get("items", []): | ||
| 181 | + if isinstance(item, list): | ||
| 182 | + if self._walk_and_review_blocks(item, chapter_context): | ||
| 183 | + has_repairs = True | ||
| 184 | + | ||
| 185 | + # 处理 table 类型的 cells | ||
| 186 | + if block.get("type") == "table": | ||
| 187 | + for row in block.get("rows", []): | ||
| 188 | + if not isinstance(row, dict): | ||
| 189 | + continue | ||
| 190 | + for cell in row.get("cells", []): | ||
| 191 | + if isinstance(cell, dict): | ||
| 192 | + cell_blocks = cell.get("blocks", []) | ||
| 193 | + if isinstance(cell_blocks, list): | ||
| 194 | + if self._walk_and_review_blocks(cell_blocks, chapter_context): | ||
| 195 | + has_repairs = True | ||
| 196 | + | ||
| 197 | + return has_repairs | ||
| 198 | + | ||
| 199 | + def _review_chart_block( | ||
| 200 | + self, | ||
| 201 | + block: Dict[str, Any], | ||
| 202 | + chapter_context: Dict[str, Any] | None = None | ||
| 203 | + ) -> bool: | ||
| 204 | + """ | ||
| 205 | + 审查单个图表 block。 | ||
| 206 | + | ||
| 207 | + 返回: | ||
| 208 | + bool: 是否进行了修复 | ||
| 209 | + """ | ||
| 210 | + widget_type = block.get("widgetType", "") | ||
| 211 | + if not isinstance(widget_type, str): | ||
| 212 | + return False | ||
| 213 | + | ||
| 214 | + # 只处理 chart.js 类型(词云单独处理,不需要修复) | ||
| 215 | + is_chart = widget_type.startswith("chart.js") | ||
| 216 | + is_wordcloud = "wordcloud" in widget_type.lower() | ||
| 217 | + | ||
| 218 | + if not is_chart: | ||
| 219 | + return False | ||
| 220 | + | ||
| 221 | + widget_id = block.get("widgetId", "unknown") | ||
| 222 | + | ||
| 223 | + # 检查是否已审查过 | ||
| 224 | + if block.get("_chart_reviewed"): | ||
| 225 | + logger.debug(f"图表 {widget_id} 已审查过,跳过") | ||
| 226 | + return False | ||
| 227 | + | ||
| 228 | + self._stats['total'] += 1 | ||
| 229 | + | ||
| 230 | + # 词云直接标记为有效 | ||
| 231 | + if is_wordcloud: | ||
| 232 | + self._stats['valid'] += 1 | ||
| 233 | + block["_chart_reviewed"] = True | ||
| 234 | + block["_chart_review_status"] = "valid" | ||
| 235 | + block["_chart_review_method"] = "none" | ||
| 236 | + return False | ||
| 237 | + | ||
| 238 | + # 先进行数据规范化(从章节上下文补充数据) | ||
| 239 | + self._normalize_chart_block(block, chapter_context) | ||
| 240 | + | ||
| 241 | + # 验证图表 | ||
| 242 | + validation_result = self.validator.validate(block) | ||
| 243 | + | ||
| 244 | + if validation_result.is_valid: | ||
| 245 | + # 验证通过 | ||
| 246 | + self._stats['valid'] += 1 | ||
| 247 | + block["_chart_reviewed"] = True | ||
| 248 | + block["_chart_review_status"] = "valid" | ||
| 249 | + block["_chart_review_method"] = "none" | ||
| 250 | + if validation_result.warnings: | ||
| 251 | + logger.debug(f"图表 {widget_id} 验证通过,但有警告: {validation_result.warnings}") | ||
| 252 | + return False | ||
| 253 | + | ||
| 254 | + # 验证失败,尝试修复 | ||
| 255 | + logger.warning(f"图表 {widget_id} 验证失败: {validation_result.errors}") | ||
| 256 | + | ||
| 257 | + repair_result = self.repairer.repair(block, validation_result) | ||
| 258 | + | ||
| 259 | + if repair_result.success and repair_result.repaired_block: | ||
| 260 | + # 修复成功,覆盖原始 block 数据 | ||
| 261 | + repaired_block = repair_result.repaired_block | ||
| 262 | + # 保留原始的一些元信息 | ||
| 263 | + original_widget_id = block.get("widgetId") | ||
| 264 | + block.clear() | ||
| 265 | + block.update(repaired_block) | ||
| 266 | + # 确保 widgetId 不丢失 | ||
| 267 | + if original_widget_id and not block.get("widgetId"): | ||
| 268 | + block["widgetId"] = original_widget_id | ||
| 269 | + | ||
| 270 | + method = repair_result.method or "local" | ||
| 271 | + if method == "local": | ||
| 272 | + self._stats['repaired_locally'] += 1 | ||
| 273 | + elif method == "api": | ||
| 274 | + self._stats['repaired_api'] += 1 | ||
| 275 | + | ||
| 276 | + block["_chart_reviewed"] = True | ||
| 277 | + block["_chart_review_status"] = "repaired" | ||
| 278 | + block["_chart_review_method"] = method | ||
| 279 | + | ||
| 280 | + logger.info(f"图表 {widget_id} 修复成功 (方法: {method}): {repair_result.changes}") | ||
| 281 | + return True | ||
| 282 | + | ||
| 283 | + # 修复失败 | ||
| 284 | + self._stats['failed'] += 1 | ||
| 285 | + block["_chart_reviewed"] = True | ||
| 286 | + block["_chart_renderable"] = False | ||
| 287 | + block["_chart_review_status"] = "failed" | ||
| 288 | + block["_chart_review_method"] = "none" | ||
| 289 | + block["_chart_error_reason"] = self._format_error_reason(validation_result) | ||
| 290 | + | ||
| 291 | + logger.warning(f"图表 {widget_id} 修复失败,已标记为不可渲染") | ||
| 292 | + return False | ||
| 293 | + | ||
| 294 | + def _normalize_chart_block( | ||
| 295 | + self, | ||
| 296 | + block: Dict[str, Any], | ||
| 297 | + chapter_context: Dict[str, Any] | None = None | ||
| 298 | + ) -> None: | ||
| 299 | + """ | ||
| 300 | + 规范化图表数据,从章节上下文补充缺失数据。 | ||
| 301 | + """ | ||
| 302 | + if not isinstance(block, dict): | ||
| 303 | + return | ||
| 304 | + | ||
| 305 | + data = block.get("data") | ||
| 306 | + if not isinstance(data, dict): | ||
| 307 | + return | ||
| 308 | + | ||
| 309 | + # 尝试从章节上下文补充 datasets | ||
| 310 | + datasets = data.get("datasets") | ||
| 311 | + if not datasets or (isinstance(datasets, list) and len(datasets) == 0): | ||
| 312 | + if isinstance(chapter_context, dict): | ||
| 313 | + chapter_data = chapter_context.get("data") | ||
| 314 | + if isinstance(chapter_data, dict): | ||
| 315 | + fallback_ds = chapter_data.get("datasets") | ||
| 316 | + if isinstance(fallback_ds, list) and len(fallback_ds) > 0: | ||
| 317 | + merged_data = copy.deepcopy(data) | ||
| 318 | + merged_data["datasets"] = copy.deepcopy(fallback_ds) | ||
| 319 | + if not merged_data.get("labels") and isinstance(chapter_data.get("labels"), list): | ||
| 320 | + merged_data["labels"] = copy.deepcopy(chapter_data["labels"]) | ||
| 321 | + block["data"] = merged_data | ||
| 322 | + | ||
| 323 | + # 如果缺少 labels 且数据点包含 x 值,自动生成 | ||
| 324 | + data_ref = block.get("data") | ||
| 325 | + if isinstance(data_ref, dict) and not data_ref.get("labels"): | ||
| 326 | + datasets_ref = data_ref.get("datasets") | ||
| 327 | + if isinstance(datasets_ref, list) and datasets_ref: | ||
| 328 | + first_ds = datasets_ref[0] | ||
| 329 | + ds_data = first_ds.get("data") if isinstance(first_ds, dict) else None | ||
| 330 | + if isinstance(ds_data, list): | ||
| 331 | + labels_from_data = [] | ||
| 332 | + for idx, point in enumerate(ds_data): | ||
| 333 | + if isinstance(point, dict): | ||
| 334 | + label_text = point.get("x") or point.get("label") or f"点{idx + 1}" | ||
| 335 | + else: | ||
| 336 | + label_text = f"点{idx + 1}" | ||
| 337 | + labels_from_data.append(str(label_text)) | ||
| 338 | + if labels_from_data: | ||
| 339 | + data_ref["labels"] = labels_from_data | ||
| 340 | + | ||
| 341 | + def _format_error_reason(self, validation_result: ValidationResult | None) -> str: | ||
| 342 | + """格式化错误原因""" | ||
| 343 | + if not validation_result: | ||
| 344 | + return "未知错误" | ||
| 345 | + errors = validation_result.errors or [] | ||
| 346 | + if not errors: | ||
| 347 | + return "验证失败但无具体错误信息" | ||
| 348 | + return "; ".join(errors[:3]) | ||
| 349 | + | ||
| 350 | + def _log_stats(self) -> None: | ||
| 351 | + """输出统计信息""" | ||
| 352 | + if self._stats['total'] == 0: | ||
| 353 | + logger.debug("ChartReviewService: 没有图表需要审查") | ||
| 354 | + return | ||
| 355 | + | ||
| 356 | + repaired = self._stats['repaired_locally'] + self._stats['repaired_api'] | ||
| 357 | + logger.info( | ||
| 358 | + f"ChartReviewService 图表审查完成: " | ||
| 359 | + f"总计 {self._stats['total']} 个, " | ||
| 360 | + f"有效 {self._stats['valid']} 个, " | ||
| 361 | + f"修复 {repaired} 个 (本地 {self._stats['repaired_locally']}, API {self._stats['repaired_api']}), " | ||
| 362 | + f"失败 {self._stats['failed']} 个" | ||
| 363 | + ) | ||
| 364 | + | ||
| 365 | + def _save_ir_to_file(self, document_ir: Dict[str, Any], file_path: str | Path) -> None: | ||
| 366 | + """保存 IR 到文件""" | ||
| 367 | + try: | ||
| 368 | + path = Path(file_path) | ||
| 369 | + path.parent.mkdir(parents=True, exist_ok=True) | ||
| 370 | + path.write_text( | ||
| 371 | + json.dumps(document_ir, ensure_ascii=False, indent=2), | ||
| 372 | + encoding="utf-8" | ||
| 373 | + ) | ||
| 374 | + logger.info(f"ChartReviewService: 修复后的 IR 已保存到 {path}") | ||
| 375 | + except Exception as e: | ||
| 376 | + logger.exception(f"ChartReviewService: 保存 IR 文件失败: {e}") | ||
| 377 | + | ||
| 378 | + | ||
| 379 | +# 全局单例实例 | ||
| 380 | +_chart_review_service: Optional[ChartReviewService] = None | ||
| 381 | + | ||
| 382 | + | ||
| 383 | +def get_chart_review_service() -> ChartReviewService: | ||
| 384 | + """获取 ChartReviewService 单例实例""" | ||
| 385 | + global _chart_review_service | ||
| 386 | + if _chart_review_service is None: | ||
| 387 | + _chart_review_service = ChartReviewService() | ||
| 388 | + return _chart_review_service | ||
| 389 | + | ||
| 390 | + | ||
| 391 | +def review_document_charts( | ||
| 392 | + document_ir: Dict[str, Any], | ||
| 393 | + ir_file_path: Optional[str | Path] = None, | ||
| 394 | + *, | ||
| 395 | + reset_stats: bool = True, | ||
| 396 | + save_on_repair: bool = True | ||
| 397 | +) -> Dict[str, Any]: | ||
| 398 | + """ | ||
| 399 | + 便捷函数:审查并修复文档中的所有图表。 | ||
| 400 | + | ||
| 401 | + 参数: | ||
| 402 | + document_ir: Document IR 数据 | ||
| 403 | + ir_file_path: IR 文件路径,如果提供且有修复,会自动保存 | ||
| 404 | + reset_stats: 是否重置统计信息 | ||
| 405 | + save_on_repair: 修复后是否自动保存到文件 | ||
| 406 | + | ||
| 407 | + 返回: | ||
| 408 | + Dict[str, Any]: 审查后的 Document IR | ||
| 409 | + """ | ||
| 410 | + service = get_chart_review_service() | ||
| 411 | + return service.review_document( | ||
| 412 | + document_ir, | ||
| 413 | + ir_file_path, | ||
| 414 | + reset_stats=reset_stats, | ||
| 415 | + save_on_repair=save_on_repair | ||
| 416 | + ) | ||
| 417 | + | ||
| 418 | + | ||
| 419 | +__all__ = [ | ||
| 420 | + "ChartReviewService", | ||
| 421 | + "get_chart_review_service", | ||
| 422 | + "review_document_charts", | ||
| 423 | +] | ||
| 424 | + |
| @@ -444,11 +444,15 @@ class ChartRepairer: | @@ -444,11 +444,15 @@ class ChartRepairer: | ||
| 444 | if validation_result is None: | 444 | if validation_result is None: |
| 445 | validation_result = self.validator.validate(widget_block) | 445 | validation_result = self.validator.validate(widget_block) |
| 446 | 446 | ||
| 447 | + # 跟踪当前最新的验证结果和数据 | ||
| 448 | + current_validation = validation_result | ||
| 449 | + current_block = widget_block | ||
| 450 | + | ||
| 447 | # 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告) | 451 | # 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告) |
| 448 | logger.info(f"尝试本地修复图表") | 452 | logger.info(f"尝试本地修复图表") |
| 449 | local_result = self.repair_locally(widget_block, validation_result) | 453 | local_result = self.repair_locally(widget_block, validation_result) |
| 450 | 454 | ||
| 451 | - # 3. 验证修复结果 | 455 | + # 3. 验证本地修复结果 |
| 452 | if local_result.has_changes(): | 456 | if local_result.has_changes(): |
| 453 | repaired_validation = self.validator.validate(local_result.repaired_block) | 457 | repaired_validation = self.validator.validate(local_result.repaired_block) |
| 454 | if repaired_validation.is_valid: | 458 | if repaired_validation.is_valid: |
| @@ -458,22 +462,27 @@ class ChartRepairer: | @@ -458,22 +462,27 @@ class ChartRepairer: | ||
| 458 | ) | 462 | ) |
| 459 | else: | 463 | else: |
| 460 | logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}") | 464 | logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}") |
| 465 | + # 更新当前状态为本地修复后的结果,供API修复使用 | ||
| 466 | + current_validation = repaired_validation | ||
| 467 | + current_block = local_result.repaired_block | ||
| 461 | 468 | ||
| 462 | - # 4. 如果本地修复失败且有严重错误,尝试API修复 | ||
| 463 | - if validation_result.has_critical_errors() and len(self.llm_repair_fns) > 0: | ||
| 464 | - logger.info("本地修复失败,尝试API修复") | ||
| 465 | - api_result = self.repair_with_api(widget_block, validation_result) | 469 | + # 4. 如果当前仍有严重错误,尝试API修复 |
| 470 | + # 注意:使用 current_validation 而非原始 validation_result | ||
| 471 | + if current_validation.has_critical_errors() and len(self.llm_repair_fns) > 0: | ||
| 472 | + logger.info("本地修复失败或不足,尝试API修复") | ||
| 473 | + # 传入本地已修复的数据(如果有),避免浪费本地修复的工作 | ||
| 474 | + api_result = self.repair_with_api(current_block, current_validation) | ||
| 466 | 475 | ||
| 467 | if api_result.success: | 476 | if api_result.success: |
| 468 | # 验证修复结果 | 477 | # 验证修复结果 |
| 469 | - repaired_validation = self.validator.validate(api_result.repaired_block) | ||
| 470 | - if repaired_validation.is_valid: | 478 | + api_repaired_validation = self.validator.validate(api_result.repaired_block) |
| 479 | + if api_repaired_validation.is_valid: | ||
| 471 | logger.info(f"API修复成功: {api_result.changes}") | 480 | logger.info(f"API修复成功: {api_result.changes}") |
| 472 | return _cache_and_return(api_result) | 481 | return _cache_and_return(api_result) |
| 473 | else: | 482 | else: |
| 474 | - logger.warning(f"API修复后仍然无效: {repaired_validation.errors}") | 483 | + logger.warning(f"API修复后仍然无效: {api_repaired_validation.errors}") |
| 475 | 484 | ||
| 476 | - # 5. 如果验证通过,返回原始或修复后的数据 | 485 | + # 5. 如果原始验证通过,返回原始或修复后的数据 |
| 477 | if validation_result.is_valid: | 486 | if validation_result.is_valid: |
| 478 | if local_result.has_changes(): | 487 | if local_result.has_changes(): |
| 479 | return _cache_and_return( | 488 | return _cache_and_return( |
| @@ -482,9 +491,11 @@ class ChartRepairer: | @@ -482,9 +491,11 @@ class ChartRepairer: | ||
| 482 | else: | 491 | else: |
| 483 | return _cache_and_return(RepairResult(True, widget_block, 'none', [])) | 492 | return _cache_and_return(RepairResult(True, widget_block, 'none', [])) |
| 484 | 493 | ||
| 485 | - # 6. 所有修复都失败,返回原始数据 | 494 | + # 6. 所有修复都失败,返回原始数据(或本地部分修复的数据) |
| 486 | logger.warning("所有修复尝试失败,保持原始数据") | 495 | logger.warning("所有修复尝试失败,保持原始数据") |
| 487 | - return _cache_and_return(RepairResult(False, widget_block, 'none', [])) | 496 | + # 如果本地有部分修复,返回本地修复后的数据(虽然验证仍失败,但可能比原始数据好) |
| 497 | + final_block = local_result.repaired_block if local_result.has_changes() else widget_block | ||
| 498 | + return _cache_and_return(RepairResult(False, final_block, 'none', [])) | ||
| 488 | 499 | ||
| 489 | def repair_locally( | 500 | def repair_locally( |
| 490 | self, | 501 | self, |
| @@ -664,27 +675,41 @@ class ChartRepairer: | @@ -664,27 +675,41 @@ class ChartRepairer: | ||
| 664 | 策略:按顺序尝试不同的Engine,直到修复成功 | 675 | 策略:按顺序尝试不同的Engine,直到修复成功 |
| 665 | """ | 676 | """ |
| 666 | if not self.llm_repair_fns: | 677 | if not self.llm_repair_fns: |
| 678 | + logger.debug("没有可用的LLM修复函数,跳过API修复") | ||
| 667 | return RepairResult(False, None, 'api', []) | 679 | return RepairResult(False, None, 'api', []) |
| 668 | 680 | ||
| 681 | + widget_id = widget_block.get('widgetId', 'unknown') | ||
| 682 | + logger.info(f"图表 {widget_id} 开始API修复,共 {len(self.llm_repair_fns)} 个Engine可用") | ||
| 683 | + | ||
| 669 | for idx, repair_fn in enumerate(self.llm_repair_fns): | 684 | for idx, repair_fn in enumerate(self.llm_repair_fns): |
| 670 | try: | 685 | try: |
| 671 | - logger.info(f"尝试使用Engine {idx + 1}修复图表") | 686 | + logger.info(f"尝试使用Engine {idx + 1}/{len(self.llm_repair_fns)} 修复图表 {widget_id}") |
| 672 | repaired = repair_fn(widget_block, validation_result.errors) | 687 | repaired = repair_fn(widget_block, validation_result.errors) |
| 673 | 688 | ||
| 674 | if repaired and isinstance(repaired, dict): | 689 | if repaired and isinstance(repaired, dict): |
| 675 | # 验证修复结果 | 690 | # 验证修复结果 |
| 676 | repaired_validation = self.validator.validate(repaired) | 691 | repaired_validation = self.validator.validate(repaired) |
| 677 | if repaired_validation.is_valid: | 692 | if repaired_validation.is_valid: |
| 693 | + logger.info(f"图表 {widget_id} 使用Engine {idx + 1} 修复成功") | ||
| 678 | return RepairResult( | 694 | return RepairResult( |
| 679 | True, | 695 | True, |
| 680 | repaired, | 696 | repaired, |
| 681 | 'api', | 697 | 'api', |
| 682 | [f"使用Engine {idx + 1}修复成功"] | 698 | [f"使用Engine {idx + 1}修复成功"] |
| 683 | ) | 699 | ) |
| 700 | + else: | ||
| 701 | + logger.warning( | ||
| 702 | + f"图表 {widget_id} Engine {idx + 1} 返回的数据验证失败: " | ||
| 703 | + f"{repaired_validation.errors}" | ||
| 704 | + ) | ||
| 705 | + else: | ||
| 706 | + logger.warning(f"图表 {widget_id} Engine {idx + 1} 返回空或无效响应") | ||
| 684 | except Exception as e: | 707 | except Exception as e: |
| 685 | - logger.error(f"Engine {idx + 1}修复失败: {e}") | 708 | + # 使用 exception 记录完整堆栈 |
| 709 | + logger.exception(f"图表 {widget_id} Engine {idx + 1} 修复过程中发生异常: {e}") | ||
| 686 | continue | 710 | continue |
| 687 | 711 | ||
| 712 | + logger.warning(f"图表 {widget_id} 所有 {len(self.llm_repair_fns)} 个Engine均修复失败") | ||
| 688 | return RepairResult(False, None, 'api', []) | 713 | return RepairResult(False, None, 'api', []) |
| 689 | 714 | ||
| 690 | 715 |
| @@ -228,7 +228,7 @@ def save_document_ir(document_ir, base_name, timestamp): | @@ -228,7 +228,7 @@ def save_document_ir(document_ir, base_name, timestamp): | ||
| 228 | return ir_path | 228 | return ir_path |
| 229 | 229 | ||
| 230 | 230 | ||
| 231 | -def render_html(document_ir, base_name, timestamp): | 231 | +def render_html(document_ir, base_name, timestamp, ir_path=None): |
| 232 | """ | 232 | """ |
| 233 | 使用 HTMLRenderer 将 Document IR 渲染为 HTML 并保存。 | 233 | 使用 HTMLRenderer 将 Document IR 渲染为 HTML 并保存。 |
| 234 | 234 | ||
| @@ -239,12 +239,14 @@ def render_html(document_ir, base_name, timestamp): | @@ -239,12 +239,14 @@ def render_html(document_ir, base_name, timestamp): | ||
| 239 | document_ir: 装订完成的整本 IR | 239 | document_ir: 装订完成的整本 IR |
| 240 | base_name: 文件名片段(来源于报告主题/标题) | 240 | base_name: 文件名片段(来源于报告主题/标题) |
| 241 | timestamp: 时间戳字符串 | 241 | timestamp: 时间戳字符串 |
| 242 | + ir_path: 可选,IR 文件路径,提供时修复后会自动保存 | ||
| 242 | 243 | ||
| 243 | 返回: | 244 | 返回: |
| 244 | Path: 生成的 HTML 文件路径 | 245 | Path: 生成的 HTML 文件路径 |
| 245 | """ | 246 | """ |
| 246 | renderer = HTMLRenderer() | 247 | renderer = HTMLRenderer() |
| 247 | - html_content = renderer.render(document_ir) | 248 | + # 传入 ir_file_path,修复后自动保存 |
| 249 | + html_content = renderer.render(document_ir, ir_file_path=str(ir_path) if ir_path else None) | ||
| 248 | 250 | ||
| 249 | output_dir = Path(settings.OUTPUT_DIR) / "html" | 251 | output_dir = Path(settings.OUTPUT_DIR) / "html" |
| 250 | output_dir.mkdir(parents=True, exist_ok=True) | 252 | output_dir.mkdir(parents=True, exist_ok=True) |
| @@ -322,7 +324,8 @@ def main(): | @@ -322,7 +324,8 @@ def main(): | ||
| 322 | ) | 324 | ) |
| 323 | 325 | ||
| 324 | ir_path = save_document_ir(document_ir, base_name, timestamp) | 326 | ir_path = save_document_ir(document_ir, base_name, timestamp) |
| 325 | - html_path = render_html(document_ir, base_name, timestamp) | 327 | + # 传入 ir_path,修复后的图表会自动保存到 IR 文件 |
| 328 | + html_path = render_html(document_ir, base_name, timestamp, ir_path=ir_path) | ||
| 326 | 329 | ||
| 327 | logger.info("") | 330 | logger.info("") |
| 328 | logger.info("🎉 HTML装订与渲染完成") | 331 | logger.info("🎉 HTML装订与渲染完成") |
| @@ -88,7 +88,7 @@ def load_document_ir(file_path): | @@ -88,7 +88,7 @@ def load_document_ir(file_path): | ||
| 88 | logger.error(f"加载报告失败: {e}") | 88 | logger.error(f"加载报告失败: {e}") |
| 89 | return None | 89 | return None |
| 90 | 90 | ||
| 91 | -def generate_pdf_with_vector_charts(document_ir, output_path): | 91 | +def generate_pdf_with_vector_charts(document_ir, output_path, ir_file_path=None): |
| 92 | """ | 92 | """ |
| 93 | 使用 PDFRenderer 将 Document IR 渲染为包含 SVG 矢量图表的 PDF。 | 93 | 使用 PDFRenderer 将 Document IR 渲染为包含 SVG 矢量图表的 PDF。 |
| 94 | 94 | ||
| @@ -97,6 +97,7 @@ def generate_pdf_with_vector_charts(document_ir, output_path): | @@ -97,6 +97,7 @@ def generate_pdf_with_vector_charts(document_ir, output_path): | ||
| 97 | 参数: | 97 | 参数: |
| 98 | document_ir: 完整的 Document IR | 98 | document_ir: 完整的 Document IR |
| 99 | output_path: 目标 PDF 路径 | 99 | output_path: 目标 PDF 路径 |
| 100 | + ir_file_path: 可选,IR 文件路径,提供时修复后会自动保存 | ||
| 100 | 101 | ||
| 101 | 返回: | 102 | 返回: |
| 102 | Path | None: 成功时返回生成的 PDF 路径,失败返回 None。 | 103 | Path | None: 成功时返回生成的 PDF 路径,失败返回 None。 |
| @@ -109,11 +110,12 @@ def generate_pdf_with_vector_charts(document_ir, output_path): | @@ -109,11 +110,12 @@ def generate_pdf_with_vector_charts(document_ir, output_path): | ||
| 109 | # 创建PDF渲染器 | 110 | # 创建PDF渲染器 |
| 110 | renderer = PDFRenderer() | 111 | renderer = PDFRenderer() |
| 111 | 112 | ||
| 112 | - # 渲染PDF | 113 | + # 渲染PDF,传入 ir_file_path 用于修复后保存 |
| 113 | result_path = renderer.render_to_pdf( | 114 | result_path = renderer.render_to_pdf( |
| 114 | document_ir, | 115 | document_ir, |
| 115 | output_path, | 116 | output_path, |
| 116 | - optimize_layout=True | 117 | + optimize_layout=True, |
| 118 | + ir_file_path=str(ir_file_path) if ir_file_path else None | ||
| 117 | ) | 119 | ) |
| 118 | 120 | ||
| 119 | logger.info("=" * 60) | 121 | logger.info("=" * 60) |
| @@ -171,8 +173,8 @@ def main(): | @@ -171,8 +173,8 @@ def main(): | ||
| 171 | logger.info(f"输出路径: {output_path}") | 173 | logger.info(f"输出路径: {output_path}") |
| 172 | logger.info("") | 174 | logger.info("") |
| 173 | 175 | ||
| 174 | - # 4. 生成PDF | ||
| 175 | - result = generate_pdf_with_vector_charts(document_ir, output_path) | 176 | + # 4. 生成PDF,传入 IR 文件路径用于修复后保存 |
| 177 | + result = generate_pdf_with_vector_charts(document_ir, output_path, ir_file_path=latest_report) | ||
| 176 | 178 | ||
| 177 | if result: | 179 | if result: |
| 178 | logger.info("") | 180 | logger.info("") |
| @@ -338,12 +338,13 @@ def save_pdf(document_ir_path: str, query: str) -> Optional[str]: | @@ -338,12 +338,13 @@ def save_pdf(document_ir_path: str, query: str) -> Optional[str]: | ||
| 338 | pdf_filename = f"final_report_{query_safe}_{timestamp}.pdf" | 338 | pdf_filename = f"final_report_{query_safe}_{timestamp}.pdf" |
| 339 | pdf_path = pdf_dir / pdf_filename | 339 | pdf_path = pdf_dir / pdf_filename |
| 340 | 340 | ||
| 341 | - # 使用 render_to_pdf 方法直接生成PDF文件(与regenerate_latest_pdf.py一致) | 341 | + # 使用 render_to_pdf 方法直接生成PDF文件,传入 IR 文件路径用于修复后保存 |
| 342 | logger.info(f"开始渲染PDF: {pdf_path}") | 342 | logger.info(f"开始渲染PDF: {pdf_path}") |
| 343 | result_path = renderer.render_to_pdf( | 343 | result_path = renderer.render_to_pdf( |
| 344 | document_ir, | 344 | document_ir, |
| 345 | pdf_path, | 345 | pdf_path, |
| 346 | - optimize_layout=True | 346 | + optimize_layout=True, |
| 347 | + ir_file_path=document_ir_path | ||
| 347 | ) | 348 | ) |
| 348 | 349 | ||
| 349 | # 显示文件大小 | 350 | # 显示文件大小 |
| @@ -378,7 +379,8 @@ def save_markdown(document_ir_path: str, query: str) -> Optional[str]: | @@ -378,7 +379,8 @@ def save_markdown(document_ir_path: str, query: str) -> Optional[str]: | ||
| 378 | 379 | ||
| 379 | from ReportEngine.renderers import MarkdownRenderer | 380 | from ReportEngine.renderers import MarkdownRenderer |
| 380 | renderer = MarkdownRenderer() | 381 | renderer = MarkdownRenderer() |
| 381 | - markdown_content = renderer.render(document_ir) | 382 | + # 传入 IR 文件路径用于修复后保存 |
| 383 | + markdown_content = renderer.render(document_ir, ir_file_path=document_ir_path) | ||
| 382 | 384 | ||
| 383 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | 385 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| 384 | query_safe = "".join( | 386 | query_safe = "".join( |
-
Please register or login to post a comment