Showing
2 changed files
with
222 additions
and
146 deletions
| @@ -182,6 +182,18 @@ class HTMLRenderer: | @@ -182,6 +182,18 @@ class HTMLRenderer: | ||
| 182 | self._pdf_font_base64 = "" | 182 | self._pdf_font_base64 = "" |
| 183 | return self._pdf_font_base64 | 183 | return self._pdf_font_base64 |
| 184 | 184 | ||
| 185 | + def _reset_chart_validation_stats(self) -> None: | ||
| 186 | + """重置图表校验统计并清除失败计数标记""" | ||
| 187 | + self.chart_validation_stats = { | ||
| 188 | + 'total': 0, | ||
| 189 | + 'valid': 0, | ||
| 190 | + 'repaired_locally': 0, | ||
| 191 | + 'repaired_api': 0, | ||
| 192 | + 'failed': 0 | ||
| 193 | + } | ||
| 194 | + # 保留失败原因缓存,但重置本次渲染的计数 | ||
| 195 | + self._chart_failure_recorded = set() | ||
| 196 | + | ||
| 185 | def _build_script_with_fallback( | 197 | def _build_script_with_fallback( |
| 186 | self, | 198 | self, |
| 187 | inline_code: str, | 199 | inline_code: str, |
| @@ -267,6 +279,8 @@ class HTMLRenderer: | @@ -267,6 +279,8 @@ class HTMLRenderer: | ||
| 267 | str: 可直接写入磁盘的完整HTML文档。 | 279 | str: 可直接写入磁盘的完整HTML文档。 |
| 268 | """ | 280 | """ |
| 269 | self.document = document_ir or {} | 281 | self.document = document_ir or {} |
| 282 | + # 先对图表做统一审查与修复,并将结果回写,供后续PDF/HTML共用 | ||
| 283 | + self.review_and_patch_document(self.document, reset_stats=True) | ||
| 270 | self.widget_scripts = [] | 284 | self.widget_scripts = [] |
| 271 | self.chart_counter = 0 | 285 | self.chart_counter = 0 |
| 272 | self.heading_counter = 0 | 286 | self.heading_counter = 0 |
| @@ -282,17 +296,6 @@ class HTMLRenderer: | @@ -282,17 +296,6 @@ class HTMLRenderer: | ||
| 282 | self.heading_label_map = self._compute_heading_labels(self.chapters) | 296 | self.heading_label_map = self._compute_heading_labels(self.chapters) |
| 283 | self.toc_entries = self._collect_toc_entries(self.chapters) | 297 | self.toc_entries = self._collect_toc_entries(self.chapters) |
| 284 | 298 | ||
| 285 | - # 重置图表验证统计 | ||
| 286 | - self.chart_validation_stats = { | ||
| 287 | - 'total': 0, | ||
| 288 | - 'valid': 0, | ||
| 289 | - 'repaired_locally': 0, | ||
| 290 | - 'repaired_api': 0, | ||
| 291 | - 'failed': 0 | ||
| 292 | - } | ||
| 293 | - # 每次渲染重新统计失败计数,但保留失败原因,避免重复LLM调用 | ||
| 294 | - self._chart_failure_recorded = set() | ||
| 295 | - | ||
| 296 | metadata = self.metadata | 299 | metadata = self.metadata |
| 297 | theme_tokens = metadata.get("themeTokens") or self.document.get("themeTokens", {}) | 300 | theme_tokens = metadata.get("themeTokens") or self.document.get("themeTokens", {}) |
| 298 | title = metadata.get("title") or metadata.get("query") or "智能舆情报告" | 301 | title = metadata.get("title") or metadata.get("query") or "智能舆情报告" |
| @@ -2087,6 +2090,31 @@ class HTMLRenderer: | @@ -2087,6 +2090,31 @@ class HTMLRenderer: | ||
| 2087 | if cache_key: | 2090 | if cache_key: |
| 2088 | self._chart_failure_recorded.add(cache_key) | 2091 | self._chart_failure_recorded.add(cache_key) |
| 2089 | 2092 | ||
| 2093 | + def _apply_cached_review_stats(self, block: Dict[str, Any]) -> None: | ||
| 2094 | + """ | ||
| 2095 | + 在已审查过的图表上重新累计统计信息,避免重复修复。 | ||
| 2096 | + | ||
| 2097 | + 当渲染流程重置了统计但图表已经审查过(_chart_reviewed=True), | ||
| 2098 | + 直接根据记录的状态累加各项计数,防止再次触发 ChartRepairer。 | ||
| 2099 | + """ | ||
| 2100 | + if not isinstance(block, dict): | ||
| 2101 | + return | ||
| 2102 | + | ||
| 2103 | + status = block.get("_chart_review_status") or "valid" | ||
| 2104 | + method = (block.get("_chart_review_method") or "none").lower() | ||
| 2105 | + cache_key = self._chart_cache_key(block) | ||
| 2106 | + | ||
| 2107 | + self.chart_validation_stats['total'] += 1 | ||
| 2108 | + if status == "failed": | ||
| 2109 | + self._record_chart_failure_stat(cache_key) | ||
| 2110 | + elif status == "repaired": | ||
| 2111 | + if method == "api": | ||
| 2112 | + self.chart_validation_stats['repaired_api'] += 1 | ||
| 2113 | + else: | ||
| 2114 | + self.chart_validation_stats['repaired_locally'] += 1 | ||
| 2115 | + else: | ||
| 2116 | + self.chart_validation_stats['valid'] += 1 | ||
| 2117 | + | ||
| 2090 | def _format_chart_error_reason( | 2118 | def _format_chart_error_reason( |
| 2091 | self, | 2119 | self, |
| 2092 | validation_result: ValidationResult | None = None, | 2120 | validation_result: ValidationResult | None = None, |
| @@ -2211,52 +2239,64 @@ class HTMLRenderer: | @@ -2211,52 +2239,64 @@ class HTMLRenderer: | ||
| 2211 | if labels_from_data: | 2239 | if labels_from_data: |
| 2212 | data_ref["labels"] = labels_from_data | 2240 | data_ref["labels"] = labels_from_data |
| 2213 | 2241 | ||
| 2214 | - def _render_widget(self, block: Dict[str, Any]) -> str: | 2242 | + def _ensure_chart_reviewed( |
| 2243 | + self, | ||
| 2244 | + block: Dict[str, Any], | ||
| 2245 | + chapter_context: Dict[str, Any] | None = None, | ||
| 2246 | + *, | ||
| 2247 | + increment_stats: bool = True | ||
| 2248 | + ) -> tuple[bool, str | None]: | ||
| 2215 | """ | 2249 | """ |
| 2216 | - 渲染Chart.js等交互组件的占位容器,并记录配置JSON。 | ||
| 2217 | - | ||
| 2218 | - 在渲染前进行图表验证和修复: | ||
| 2219 | - 1. validate:ChartValidator 检查 block 的 data/props/options 结构; | ||
| 2220 | - 2. repair:若失败,先本地修补(缺 labels/datasets/scale 时兜底),再调用 LLM API; | ||
| 2221 | - 3. 失败兜底:写入 _chart_renderable=False 及 _chart_error_reason,输出错误占位而非抛异常。 | ||
| 2222 | - | ||
| 2223 | - 参数(对应 IR 层级): | ||
| 2224 | - - block.widgetType: "chart.js/bar"/"chart.js/line"/"wordcloud" 等,决定渲染器与校验策略; | ||
| 2225 | - - block.widgetId: 组件唯一ID,用于canvas/data script绑定; | ||
| 2226 | - - block.props: 透传到前端 Chart.js options,例如 props.title / props.options.legend; | ||
| 2227 | - - block.data: {labels, datasets} 等数据;缺失时会尝试从章节级 chapter.data 补齐; | ||
| 2228 | - - block.dataRef: 外部数据引用,暂作为透传记录。 | 2250 | + 确保图表已完成审查/修复,并将结果回写到原始block。 |
| 2229 | 2251 | ||
| 2230 | 返回: | 2252 | 返回: |
| 2231 | - str: 含canvas与配置脚本的HTML。 | 2253 | + (renderable, fail_reason) |
| 2232 | """ | 2254 | """ |
| 2233 | - # 先在block层面做一次容错补全(scales、章节级数据等) | ||
| 2234 | - self._normalize_chart_block(block, getattr(self, "_current_chapter", None)) | 2255 | + if not isinstance(block, dict): |
| 2256 | + return True, None | ||
| 2235 | 2257 | ||
| 2236 | - # 统计 | ||
| 2237 | widget_type = block.get('widgetType', '') | 2258 | widget_type = block.get('widgetType', '') |
| 2238 | is_chart = isinstance(widget_type, str) and widget_type.startswith('chart.js') | 2259 | is_chart = isinstance(widget_type, str) and widget_type.startswith('chart.js') |
| 2239 | - is_wordcloud = isinstance(widget_type, str) and 'wordcloud' in widget_type.lower() | ||
| 2240 | - widget_id = block.get('widgetId') | ||
| 2241 | - cache_key = self._chart_cache_key(block) if is_chart else "" | ||
| 2242 | - props_snapshot = block.get("props") if isinstance(block.get("props"), dict) else {} | ||
| 2243 | - display_title = props_snapshot.get("title") or block.get("title") or widget_id or "图表" | 2260 | + if not is_chart: |
| 2261 | + return True, None | ||
| 2244 | 2262 | ||
| 2245 | - if is_chart: | 2263 | + is_wordcloud = 'wordcloud' in widget_type.lower() if isinstance(widget_type, str) else False |
| 2264 | + cache_key = self._chart_cache_key(block) | ||
| 2265 | + | ||
| 2266 | + # 已有失败记录或显式标记为不可渲染,直接复用结果 | ||
| 2267 | + if block.get("_chart_renderable") is False: | ||
| 2268 | + if increment_stats: | ||
| 2269 | + self.chart_validation_stats['total'] += 1 | ||
| 2270 | + self._record_chart_failure_stat(cache_key) | ||
| 2271 | + reason = block.get("_chart_error_reason") | ||
| 2272 | + block["_chart_reviewed"] = True | ||
| 2273 | + block["_chart_review_status"] = block.get("_chart_review_status") or "failed" | ||
| 2274 | + block["_chart_review_method"] = block.get("_chart_review_method") or "none" | ||
| 2275 | + if reason: | ||
| 2276 | + self._note_chart_failure(cache_key, reason) | ||
| 2277 | + return False, reason | ||
| 2278 | + | ||
| 2279 | + if block.get("_chart_reviewed"): | ||
| 2280 | + if increment_stats: | ||
| 2281 | + self._apply_cached_review_stats(block) | ||
| 2282 | + failed, cached_reason = self._has_chart_failure(block) | ||
| 2283 | + renderable = not failed and block.get("_chart_renderable", True) is not False | ||
| 2284 | + return renderable, block.get("_chart_error_reason") or cached_reason | ||
| 2285 | + | ||
| 2286 | + # 首次审查:先补全结构,再验证/修复 | ||
| 2287 | + self._normalize_chart_block(block, chapter_context) | ||
| 2288 | + | ||
| 2289 | + if increment_stats: | ||
| 2246 | self.chart_validation_stats['total'] += 1 | 2290 | self.chart_validation_stats['total'] += 1 |
| 2247 | 2291 | ||
| 2248 | - # 词云使用专用渲染逻辑,不按Chart.js规则验证,直接跳过防止误判 | ||
| 2249 | if is_wordcloud: | 2292 | if is_wordcloud: |
| 2293 | + if increment_stats: | ||
| 2250 | self.chart_validation_stats['valid'] += 1 | 2294 | self.chart_validation_stats['valid'] += 1 |
| 2251 | - else: | ||
| 2252 | - # 如果此前已记录失败,直接使用占位提示,避免重复修复 | ||
| 2253 | - has_failed, cached_reason = self._has_chart_failure(block) | ||
| 2254 | - if has_failed: | ||
| 2255 | - self._record_chart_failure_stat(cache_key) | ||
| 2256 | - reason = cached_reason or "LLM返回的图表信息格式有误,无法正常显示" | ||
| 2257 | - return self._render_chart_error_placeholder(display_title, reason, widget_id) | 2295 | + block["_chart_reviewed"] = True |
| 2296 | + block["_chart_review_status"] = "valid" | ||
| 2297 | + block["_chart_review_method"] = "none" | ||
| 2298 | + return True, None | ||
| 2258 | 2299 | ||
| 2259 | - # 验证图表数据 | ||
| 2260 | validation_result = self.chart_validator.validate(block) | 2300 | validation_result = self.chart_validator.validate(block) |
| 2261 | 2301 | ||
| 2262 | if not validation_result.is_valid: | 2302 | if not validation_result.is_valid: |
| @@ -2264,41 +2304,153 @@ class HTMLRenderer: | @@ -2264,41 +2304,153 @@ class HTMLRenderer: | ||
| 2264 | f"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}" | 2304 | f"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}" |
| 2265 | ) | 2305 | ) |
| 2266 | 2306 | ||
| 2267 | - # 尝试修复 | ||
| 2268 | repair_result = self.chart_repairer.repair(block, validation_result) | 2307 | repair_result = self.chart_repairer.repair(block, validation_result) |
| 2269 | 2308 | ||
| 2270 | if repair_result.success and repair_result.repaired_block: | 2309 | if repair_result.success and repair_result.repaired_block: |
| 2271 | - # 修复成功,使用修复后的数据 | ||
| 2272 | - block = repair_result.repaired_block | 2310 | + # 修复成功,回写修复后的数据 |
| 2311 | + repaired_block = repair_result.repaired_block | ||
| 2312 | + block.clear() | ||
| 2313 | + block.update(repaired_block) | ||
| 2314 | + method = repair_result.method or "local" | ||
| 2273 | logger.info( | 2315 | logger.info( |
| 2274 | f"图表 {block.get('widgetId', 'unknown')} 修复成功 " | 2316 | f"图表 {block.get('widgetId', 'unknown')} 修复成功 " |
| 2275 | - f"(方法: {repair_result.method}): {repair_result.changes}" | 2317 | + f"(方法: {method}): {repair_result.changes}" |
| 2276 | ) | 2318 | ) |
| 2277 | 2319 | ||
| 2278 | - # 更新统计 | ||
| 2279 | - if repair_result.method == 'local': | 2320 | + if increment_stats: |
| 2321 | + if method == 'local': | ||
| 2280 | self.chart_validation_stats['repaired_locally'] += 1 | 2322 | self.chart_validation_stats['repaired_locally'] += 1 |
| 2281 | - elif repair_result.method == 'api': | 2323 | + elif method == 'api': |
| 2282 | self.chart_validation_stats['repaired_api'] += 1 | 2324 | self.chart_validation_stats['repaired_api'] += 1 |
| 2283 | - else: | 2325 | + block["_chart_review_status"] = "repaired" |
| 2326 | + block["_chart_review_method"] = method | ||
| 2327 | + block["_chart_reviewed"] = True | ||
| 2328 | + return True, None | ||
| 2329 | + | ||
| 2284 | # 修复失败,记录失败并输出占位提示 | 2330 | # 修复失败,记录失败并输出占位提示 |
| 2285 | fail_reason = self._format_chart_error_reason(validation_result) | 2331 | fail_reason = self._format_chart_error_reason(validation_result) |
| 2286 | block["_chart_renderable"] = False | 2332 | block["_chart_renderable"] = False |
| 2287 | block["_chart_error_reason"] = fail_reason | 2333 | block["_chart_error_reason"] = fail_reason |
| 2334 | + block["_chart_review_status"] = "failed" | ||
| 2335 | + block["_chart_review_method"] = "none" | ||
| 2336 | + block["_chart_reviewed"] = True | ||
| 2288 | self._note_chart_failure(cache_key, fail_reason) | 2337 | self._note_chart_failure(cache_key, fail_reason) |
| 2338 | + if increment_stats: | ||
| 2289 | self._record_chart_failure_stat(cache_key) | 2339 | self._record_chart_failure_stat(cache_key) |
| 2290 | logger.warning( | 2340 | logger.warning( |
| 2291 | f"图表 {block.get('widgetId', 'unknown')} 修复失败,已跳过渲染: {fail_reason}" | 2341 | f"图表 {block.get('widgetId', 'unknown')} 修复失败,已跳过渲染: {fail_reason}" |
| 2292 | ) | 2342 | ) |
| 2293 | - return self._render_chart_error_placeholder(display_title, fail_reason, widget_id) | ||
| 2294 | - else: | 2343 | + return False, fail_reason |
| 2344 | + | ||
| 2295 | # 验证通过 | 2345 | # 验证通过 |
| 2346 | + if increment_stats: | ||
| 2296 | self.chart_validation_stats['valid'] += 1 | 2347 | self.chart_validation_stats['valid'] += 1 |
| 2297 | if validation_result.warnings: | 2348 | if validation_result.warnings: |
| 2298 | logger.info( | 2349 | logger.info( |
| 2299 | f"图表 {block.get('widgetId', 'unknown')} 验证通过," | 2350 | f"图表 {block.get('widgetId', 'unknown')} 验证通过," |
| 2300 | f"但有警告: {validation_result.warnings}" | 2351 | f"但有警告: {validation_result.warnings}" |
| 2301 | ) | 2352 | ) |
| 2353 | + block["_chart_review_status"] = "valid" | ||
| 2354 | + block["_chart_review_method"] = "none" | ||
| 2355 | + block["_chart_reviewed"] = True | ||
| 2356 | + return True, None | ||
| 2357 | + | ||
| 2358 | + def review_and_patch_document( | ||
| 2359 | + self, | ||
| 2360 | + document_ir: Dict[str, Any], | ||
| 2361 | + *, | ||
| 2362 | + reset_stats: bool = True, | ||
| 2363 | + clone: bool = False | ||
| 2364 | + ) -> Dict[str, Any]: | ||
| 2365 | + """ | ||
| 2366 | + 全局审查并修复图表,将修复结果回写到原始 IR,避免多次渲染重复修复。 | ||
| 2367 | + | ||
| 2368 | + 参数: | ||
| 2369 | + document_ir: 原始 Document IR | ||
| 2370 | + reset_stats: 是否重置统计数据 | ||
| 2371 | + clone: 是否返回修复后的深拷贝(原始 IR 仍会被回写修复结果) | ||
| 2372 | + | ||
| 2373 | + 返回: | ||
| 2374 | + 修复后的 IR(可能是原对象或其深拷贝) | ||
| 2375 | + """ | ||
| 2376 | + if reset_stats: | ||
| 2377 | + self._reset_chart_validation_stats() | ||
| 2378 | + | ||
| 2379 | + target_ir = document_ir or {} | ||
| 2380 | + | ||
| 2381 | + def _walk_blocks(blocks: list, chapter_ctx: Dict[str, Any] | None = None) -> None: | ||
| 2382 | + for blk in blocks or []: | ||
| 2383 | + if not isinstance(blk, dict): | ||
| 2384 | + continue | ||
| 2385 | + if blk.get("type") == "widget": | ||
| 2386 | + self._ensure_chart_reviewed(blk, chapter_ctx, increment_stats=True) | ||
| 2387 | + | ||
| 2388 | + nested_blocks = blk.get("blocks") | ||
| 2389 | + if isinstance(nested_blocks, list): | ||
| 2390 | + _walk_blocks(nested_blocks, chapter_ctx) | ||
| 2391 | + | ||
| 2392 | + if blk.get("type") == "list": | ||
| 2393 | + for item in blk.get("items", []): | ||
| 2394 | + if isinstance(item, list): | ||
| 2395 | + _walk_blocks(item, chapter_ctx) | ||
| 2396 | + | ||
| 2397 | + if blk.get("type") == "table": | ||
| 2398 | + for row in blk.get("rows", []): | ||
| 2399 | + cells = row.get("cells", []) | ||
| 2400 | + for cell in cells: | ||
| 2401 | + if isinstance(cell, dict): | ||
| 2402 | + cell_blocks = cell.get("blocks", []) | ||
| 2403 | + if isinstance(cell_blocks, list): | ||
| 2404 | + _walk_blocks(cell_blocks, chapter_ctx) | ||
| 2405 | + | ||
| 2406 | + for chapter in target_ir.get("chapters", []) or []: | ||
| 2407 | + if not isinstance(chapter, dict): | ||
| 2408 | + continue | ||
| 2409 | + _walk_blocks(chapter.get("blocks", []), chapter) | ||
| 2410 | + | ||
| 2411 | + return copy.deepcopy(target_ir) if clone else target_ir | ||
| 2412 | + | ||
| 2413 | + def _render_widget(self, block: Dict[str, Any]) -> str: | ||
| 2414 | + """ | ||
| 2415 | + 渲染Chart.js等交互组件的占位容器,并记录配置JSON。 | ||
| 2416 | + | ||
| 2417 | + 在渲染前进行图表验证和修复: | ||
| 2418 | + 1. validate:ChartValidator 检查 block 的 data/props/options 结构; | ||
| 2419 | + 2. repair:若失败,先本地修补(缺 labels/datasets/scale 时兜底),再调用 LLM API; | ||
| 2420 | + 3. 失败兜底:写入 _chart_renderable=False 及 _chart_error_reason,输出错误占位而非抛异常。 | ||
| 2421 | + | ||
| 2422 | + 参数(对应 IR 层级): | ||
| 2423 | + - block.widgetType: "chart.js/bar"/"chart.js/line"/"wordcloud" 等,决定渲染器与校验策略; | ||
| 2424 | + - block.widgetId: 组件唯一ID,用于canvas/data script绑定; | ||
| 2425 | + - block.props: 透传到前端 Chart.js options,例如 props.title / props.options.legend; | ||
| 2426 | + - block.data: {labels, datasets} 等数据;缺失时会尝试从章节级 chapter.data 补齐; | ||
| 2427 | + - block.dataRef: 外部数据引用,暂作为透传记录。 | ||
| 2428 | + | ||
| 2429 | + 返回: | ||
| 2430 | + str: 含canvas与配置脚本的HTML。 | ||
| 2431 | + """ | ||
| 2432 | + # 统一的审查/修复入口,避免后续重复修复 | ||
| 2433 | + widget_type = block.get('widgetType', '') | ||
| 2434 | + is_chart = isinstance(widget_type, str) and widget_type.startswith('chart.js') | ||
| 2435 | + is_wordcloud = isinstance(widget_type, str) and 'wordcloud' in widget_type.lower() | ||
| 2436 | + reviewed = bool(block.get("_chart_reviewed")) | ||
| 2437 | + renderable = True | ||
| 2438 | + fail_reason = None | ||
| 2439 | + | ||
| 2440 | + if is_chart: | ||
| 2441 | + renderable, fail_reason = self._ensure_chart_reviewed( | ||
| 2442 | + block, | ||
| 2443 | + getattr(self, "_current_chapter", None), | ||
| 2444 | + increment_stats=not reviewed | ||
| 2445 | + ) | ||
| 2446 | + | ||
| 2447 | + widget_id = block.get('widgetId') | ||
| 2448 | + props_snapshot = block.get("props") if isinstance(block.get("props"), dict) else {} | ||
| 2449 | + display_title = props_snapshot.get("title") or block.get("title") or widget_id or "图表" | ||
| 2450 | + | ||
| 2451 | + if is_chart and not renderable: | ||
| 2452 | + reason = fail_reason or "LLM返回的图表信息格式有误,无法正常显示" | ||
| 2453 | + return self._render_chart_error_placeholder(display_title, reason, widget_id) | ||
| 2302 | 2454 | ||
| 2303 | # 渲染图表HTML | 2455 | # 渲染图表HTML |
| 2304 | self.chart_counter += 1 | 2456 | self.chart_counter += 1 |
| @@ -157,10 +157,11 @@ class PDFRenderer: | @@ -157,10 +157,11 @@ class PDFRenderer: | ||
| 157 | 157 | ||
| 158 | def _preprocess_charts(self, document_ir: Dict[str, Any]) -> Dict[str, Any]: | 158 | def _preprocess_charts(self, document_ir: Dict[str, Any]) -> Dict[str, Any]: |
| 159 | """ | 159 | """ |
| 160 | - 预处理图表:验证和修复所有图表数据 | 160 | + 预处理图表:验证并修复所有图表数据,结果回写原始IR。 |
| 161 | 161 | ||
| 162 | - 这个方法确保在转换为SVG之前,所有图表数据都是有效的。 | ||
| 163 | - 使用与HTMLRenderer相同的验证和修复逻辑,保证PDF和HTML的一致性。 | 162 | + 先统一审查并修复图表,把修复结果直接写回传入的 IR, |
| 163 | + 然后返回修复后的深拷贝供后续 SVG/词云转换使用,避免 | ||
| 164 | + HTML 和 PDF 分别重复触发 ChartRepairer。 | ||
| 164 | 165 | ||
| 165 | 参数: | 166 | 参数: |
| 166 | document_ir: Document IR数据 | 167 | document_ir: Document IR数据 |
| @@ -168,101 +169,24 @@ class PDFRenderer: | @@ -168,101 +169,24 @@ class PDFRenderer: | ||
| 168 | 返回: | 169 | 返回: |
| 169 | Dict[str, Any]: 修复后的Document IR(深拷贝) | 170 | Dict[str, Any]: 修复后的Document IR(深拷贝) |
| 170 | """ | 171 | """ |
| 171 | - # 深拷贝以避免修改原始IR | ||
| 172 | - ir_copy = copy.deepcopy(document_ir) | ||
| 173 | - | ||
| 174 | - repair_stats = { | ||
| 175 | - 'total': 0, | ||
| 176 | - 'repaired': 0, | ||
| 177 | - 'failed': 0 | ||
| 178 | - } | ||
| 179 | - | ||
| 180 | - def repair_widgets_in_blocks(blocks: list, chapter_context: Dict[str, Any] | None = None) -> None: | ||
| 181 | - """递归修复blocks中的所有widget""" | ||
| 182 | - for block in blocks: | ||
| 183 | - if not isinstance(block, dict): | ||
| 184 | - continue | ||
| 185 | - | ||
| 186 | - # 处理widget类型 | ||
| 187 | - if block.get('type') == 'widget': | ||
| 188 | - # 先用HTML渲染器的容错逻辑补全字段 | ||
| 189 | - try: | ||
| 190 | - self.html_renderer._normalize_chart_block(block, chapter_context) | ||
| 191 | - except Exception as exc: # 防御性处理,避免单个图表阻断流程 | ||
| 192 | - logger.debug(f"预处理图表 {block.get('widgetId')} 时出错: {exc}") | ||
| 193 | - | ||
| 194 | - widget_type = block.get('widgetType', '') | ||
| 195 | - if widget_type.startswith('chart.js'): | ||
| 196 | - repair_stats['total'] += 1 | ||
| 197 | - | ||
| 198 | - # 使用HTMLRenderer的验证器和修复器 | ||
| 199 | - validation = self.html_renderer.chart_validator.validate(block) | ||
| 200 | - | ||
| 201 | - if not validation.is_valid: | ||
| 202 | - logger.debug(f"图表 {block.get('widgetId')} 需要修复: {validation.errors}") | ||
| 203 | - | ||
| 204 | - # 尝试修复 | ||
| 205 | - repair_result = self.html_renderer.chart_repairer.repair(block, validation) | ||
| 206 | - | ||
| 207 | - if repair_result.success and repair_result.repaired_block: | ||
| 208 | - # 更新block内容(在副本中) | ||
| 209 | - block.update(repair_result.repaired_block) | ||
| 210 | - repair_stats['repaired'] += 1 | ||
| 211 | - logger.debug( | ||
| 212 | - f"图表 {block.get('widgetId')} 已修复 " | ||
| 213 | - f"(方法: {repair_result.method})" | 172 | + reviewed_ir = self.html_renderer.review_and_patch_document( |
| 173 | + document_ir, | ||
| 174 | + reset_stats=True, | ||
| 175 | + clone=False | ||
| 214 | ) | 176 | ) |
| 215 | - else: | ||
| 216 | - repair_stats['failed'] += 1 | ||
| 217 | - reason = self.html_renderer._format_chart_error_reason(validation) | ||
| 218 | - block["_chart_renderable"] = False | ||
| 219 | - block["_chart_error_reason"] = reason | ||
| 220 | - self.html_renderer._note_chart_failure( | ||
| 221 | - self.html_renderer._chart_cache_key(block), | ||
| 222 | - reason | ||
| 223 | - ) | ||
| 224 | - logger.warning( | ||
| 225 | - f"图表 {block.get('widgetId')} 修复失败,将使用占位提示: {reason}" | ||
| 226 | - ) | ||
| 227 | - | ||
| 228 | - # 递归处理嵌套的blocks | ||
| 229 | - nested_blocks = block.get('blocks') | ||
| 230 | - if isinstance(nested_blocks, list): | ||
| 231 | - repair_widgets_in_blocks(nested_blocks, chapter_context) | ||
| 232 | - | ||
| 233 | - # 处理列表项 | ||
| 234 | - if block.get('type') == 'list': | ||
| 235 | - items = block.get('items', []) | ||
| 236 | - for item in items: | ||
| 237 | - if isinstance(item, list): | ||
| 238 | - repair_widgets_in_blocks(item, chapter_context) | ||
| 239 | - | ||
| 240 | - # 处理表格单元格 | ||
| 241 | - if block.get('type') == 'table': | ||
| 242 | - rows = block.get('rows', []) | ||
| 243 | - for row in rows: | ||
| 244 | - cells = row.get('cells', []) | ||
| 245 | - for cell in cells: | ||
| 246 | - cell_blocks = cell.get('blocks', []) | ||
| 247 | - if isinstance(cell_blocks, list): | ||
| 248 | - repair_widgets_in_blocks(cell_blocks, chapter_context) | ||
| 249 | - | ||
| 250 | - # 处理所有章节 | ||
| 251 | - chapters = ir_copy.get('chapters', []) | ||
| 252 | - for chapter in chapters: | ||
| 253 | - blocks = chapter.get('blocks', []) | ||
| 254 | - repair_widgets_in_blocks(blocks, chapter) | ||
| 255 | 177 | ||
| 256 | - # 输出统计信息 | ||
| 257 | - if repair_stats['total'] > 0: | 178 | + stats = self.html_renderer.chart_validation_stats |
| 179 | + if stats.get('total', 0) > 0: | ||
| 180 | + repaired_count = stats.get('repaired_locally', 0) + stats.get('repaired_api', 0) | ||
| 258 | logger.info( | 181 | logger.info( |
| 259 | f"PDF图表预处理完成: " | 182 | f"PDF图表预处理完成: " |
| 260 | - f"总计 {repair_stats['total']} 个图表, " | ||
| 261 | - f"修复 {repair_stats['repaired']} 个, " | ||
| 262 | - f"失败 {repair_stats['failed']} 个" | 183 | + f"总计 {stats.get('total', 0)} 个图表, " |
| 184 | + f"修复 {repaired_count} 个, " | ||
| 185 | + f"失败 {stats.get('failed', 0)} 个" | ||
| 263 | ) | 186 | ) |
| 264 | 187 | ||
| 265 | - return ir_copy | 188 | + # 返回深拷贝,避免后续 SVG 转换过程影响回写后的原始 IR |
| 189 | + return copy.deepcopy(reviewed_ir) | ||
| 266 | 190 | ||
| 267 | def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]: | 191 | def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]: |
| 268 | """ | 192 | """ |
-
Please register or login to post a comment