Showing
4 changed files
with
460 additions
and
12 deletions
| @@ -29,6 +29,7 @@ from .nodes import ( | @@ -29,6 +29,7 @@ from .nodes import ( | ||
| 29 | TemplateSelectionNode, | 29 | TemplateSelectionNode, |
| 30 | ChapterGenerationNode, | 30 | ChapterGenerationNode, |
| 31 | ChapterJsonParseError, | 31 | ChapterJsonParseError, |
| 32 | + ChapterContentError, | ||
| 32 | DocumentLayoutNode, | 33 | DocumentLayoutNode, |
| 33 | WordBudgetNode, | 34 | WordBudgetNode, |
| 34 | ) | 35 | ) |
| @@ -438,20 +439,26 @@ class ReportAgent: | @@ -438,20 +439,26 @@ class ReportAgent: | ||
| 438 | stream_callback=chunk_callback | 439 | stream_callback=chunk_callback |
| 439 | ) | 440 | ) |
| 440 | break | 441 | break |
| 441 | - except ChapterJsonParseError as parse_error: | 442 | + except (ChapterJsonParseError, ChapterContentError) as structured_error: |
| 443 | + error_kind = ( | ||
| 444 | + "content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse" | ||
| 445 | + ) | ||
| 446 | + readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败" | ||
| 442 | logger.warning( | 447 | logger.warning( |
| 443 | - "章节 %s JSON解析失败(第 %s/%s 次尝试): %s", | 448 | + "章节 %s %s(第 %s/%s 次尝试): %s", |
| 444 | section.title, | 449 | section.title, |
| 450 | + readable_label, | ||
| 445 | attempt, | 451 | attempt, |
| 446 | chapter_max_attempts, | 452 | chapter_max_attempts, |
| 447 | - parse_error, | 453 | + structured_error, |
| 448 | ) | 454 | ) |
| 449 | emit('chapter_status', { | 455 | emit('chapter_status', { |
| 450 | 'chapterId': section.chapter_id, | 456 | 'chapterId': section.chapter_id, |
| 451 | 'title': section.title, | 457 | 'title': section.title, |
| 452 | 'status': 'retrying' if attempt < chapter_max_attempts else 'error', | 458 | 'status': 'retrying' if attempt < chapter_max_attempts else 'error', |
| 453 | 'attempt': attempt, | 459 | 'attempt': attempt, |
| 454 | - 'error': str(parse_error), | 460 | + 'error': str(structured_error), |
| 461 | + 'reason': error_kind, | ||
| 455 | }) | 462 | }) |
| 456 | if attempt >= chapter_max_attempts: | 463 | if attempt >= chapter_max_attempts: |
| 457 | raise | 464 | raise |
| @@ -6,7 +6,7 @@ Report Engine节点处理模块。 | @@ -6,7 +6,7 @@ Report Engine节点处理模块。 | ||
| 6 | 6 | ||
| 7 | from .base_node import BaseNode, StateMutationNode | 7 | from .base_node import BaseNode, StateMutationNode |
| 8 | from .template_selection_node import TemplateSelectionNode | 8 | from .template_selection_node import TemplateSelectionNode |
| 9 | -from .chapter_generation_node import ChapterGenerationNode, ChapterJsonParseError | 9 | +from .chapter_generation_node import ChapterGenerationNode, ChapterJsonParseError, ChapterContentError |
| 10 | from .document_layout_node import DocumentLayoutNode | 10 | from .document_layout_node import DocumentLayoutNode |
| 11 | from .word_budget_node import WordBudgetNode | 11 | from .word_budget_node import WordBudgetNode |
| 12 | 12 | ||
| @@ -16,6 +16,7 @@ __all__ = [ | @@ -16,6 +16,7 @@ __all__ = [ | ||
| 16 | "TemplateSelectionNode", | 16 | "TemplateSelectionNode", |
| 17 | "ChapterGenerationNode", | 17 | "ChapterGenerationNode", |
| 18 | "ChapterJsonParseError", | 18 | "ChapterJsonParseError", |
| 19 | + "ChapterContentError", | ||
| 19 | "DocumentLayoutNode", | 20 | "DocumentLayoutNode", |
| 20 | "WordBudgetNode", | 21 | "WordBudgetNode", |
| 21 | ] | 22 | ] |
| @@ -36,6 +36,14 @@ class ChapterJsonParseError(ValueError): | @@ -36,6 +36,14 @@ class ChapterJsonParseError(ValueError): | ||
| 36 | self.raw_text = raw_text | 36 | self.raw_text = raw_text |
| 37 | 37 | ||
| 38 | 38 | ||
| 39 | +class ChapterContentError(ValueError): | ||
| 40 | + """ | ||
| 41 | + 章节内容稀疏异常。 | ||
| 42 | + | ||
| 43 | + 当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。 | ||
| 44 | + """ | ||
| 45 | + | ||
| 46 | + | ||
| 39 | class ChapterGenerationNode(BaseNode): | 47 | class ChapterGenerationNode(BaseNode): |
| 40 | """ | 48 | """ |
| 41 | 负责按章节调用LLM并校验JSON结构。 | 49 | 负责按章节调用LLM并校验JSON结构。 |
| @@ -71,6 +79,12 @@ class ChapterGenerationNode(BaseNode): | @@ -71,6 +79,12 @@ class ChapterGenerationNode(BaseNode): | ||
| 71 | "sub": "subscript", | 79 | "sub": "subscript", |
| 72 | "sup": "superscript", | 80 | "sup": "superscript", |
| 73 | } | 81 | } |
| 82 | + # 章节若仅包含标题或字符过少则视为失败,强制LLM重新生成 | ||
| 83 | + _MIN_NON_HEADING_BLOCKS = 2 | ||
| 84 | + _MIN_BODY_CHARACTERS = 400 | ||
| 85 | + _PARAGRAPH_FRAGMENT_MAX_CHARS = 80 | ||
| 86 | + _PARAGRAPH_FRAGMENT_NO_TERMINATOR_MAX_CHARS = 240 | ||
| 87 | + _TERMINATION_PUNCTUATION = set("。!?!?;;……") | ||
| 74 | 88 | ||
| 75 | def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage): | 89 | def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage): |
| 76 | """ | 90 | """ |
| @@ -121,17 +135,32 @@ class ChapterGenerationNode(BaseNode): | @@ -121,17 +135,32 @@ class ChapterGenerationNode(BaseNode): | ||
| 121 | self._sanitize_chapter_blocks(chapter_json) | 135 | self._sanitize_chapter_blocks(chapter_json) |
| 122 | 136 | ||
| 123 | valid, errors = self.validator.validate_chapter(chapter_json) | 137 | valid, errors = self.validator.validate_chapter(chapter_json) |
| 138 | + content_error: ChapterContentError | None = None | ||
| 139 | + if valid: | ||
| 140 | + try: | ||
| 141 | + self._ensure_content_density(chapter_json) | ||
| 142 | + except ChapterContentError as exc: | ||
| 143 | + content_error = exc | ||
| 144 | + | ||
| 145 | + error_messages: List[str] = [] | ||
| 146 | + if not valid and errors: | ||
| 147 | + error_messages.extend(errors) | ||
| 148 | + if content_error: | ||
| 149 | + error_messages.append(str(content_error)) | ||
| 150 | + | ||
| 124 | self.storage.persist_chapter( | 151 | self.storage.persist_chapter( |
| 125 | run_dir, | 152 | run_dir, |
| 126 | chapter_meta, | 153 | chapter_meta, |
| 127 | chapter_json, | 154 | chapter_json, |
| 128 | - errors=None if valid else errors, | 155 | + errors=None if not error_messages else error_messages, |
| 129 | ) | 156 | ) |
| 130 | 157 | ||
| 131 | if not valid: | 158 | if not valid: |
| 132 | raise ValueError( | 159 | raise ValueError( |
| 133 | f"{section.title} 章节JSON校验失败: {'; '.join(errors[:5])}" | 160 | f"{section.title} 章节JSON校验失败: {'; '.join(errors[:5])}" |
| 134 | ) | 161 | ) |
| 162 | + if content_error: | ||
| 163 | + raise content_error | ||
| 135 | 164 | ||
| 136 | return chapter_json | 165 | return chapter_json |
| 137 | 166 | ||
| @@ -488,6 +517,97 @@ class ChapterGenerationNode(BaseNode): | @@ -488,6 +517,97 @@ class ChapterGenerationNode(BaseNode): | ||
| 488 | 517 | ||
| 489 | walk(chapter.get("blocks")) | 518 | walk(chapter.get("blocks")) |
| 490 | 519 | ||
| 520 | + blocks = chapter.get("blocks") | ||
| 521 | + if isinstance(blocks, list): | ||
| 522 | + chapter["blocks"] = self._merge_fragment_sequences(blocks) | ||
| 523 | + | ||
| 524 | + def _ensure_content_density(self, chapter: Dict[str, Any]): | ||
| 525 | + """ | ||
| 526 | + 校验章节正文密度。 | ||
| 527 | + | ||
| 528 | + 若blocks缺失、除标题外无有效区块,或正文字符数低于阈值, | ||
| 529 | + 则视为章节内容异常,触发ChapterContentError以便上游重试。 | ||
| 530 | + """ | ||
| 531 | + blocks = chapter.get("blocks") | ||
| 532 | + if not isinstance(blocks, list) or not blocks: | ||
| 533 | + raise ChapterContentError("章节缺少正文区块,无法输出内容") | ||
| 534 | + | ||
| 535 | + non_heading_blocks = [ | ||
| 536 | + block | ||
| 537 | + for block in blocks | ||
| 538 | + if isinstance(block, dict) | ||
| 539 | + and block.get("type") not in {"heading", "divider", "toc"} | ||
| 540 | + ] | ||
| 541 | + body_characters = self._count_body_characters(blocks) | ||
| 542 | + | ||
| 543 | + if len(non_heading_blocks) < self._MIN_NON_HEADING_BLOCKS or body_characters < self._MIN_BODY_CHARACTERS: | ||
| 544 | + raise ChapterContentError( | ||
| 545 | + f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {len(non_heading_blocks)} 个,估算字符数 {body_characters}" | ||
| 546 | + ) | ||
| 547 | + | ||
| 548 | + def _count_body_characters(self, blocks: Any) -> int: | ||
| 549 | + """ | ||
| 550 | + 递归统计正文字符数。 | ||
| 551 | + | ||
| 552 | + - 忽略heading/divider/widget等非正文类型; | ||
| 553 | + - 对paragraph/list/table/callout等结构抽取嵌套文本; | ||
| 554 | + - 仅用于粗粒度判断篇幅是否合理。 | ||
| 555 | + """ | ||
| 556 | + | ||
| 557 | + def walk(node: Any) -> int: | ||
| 558 | + if node is None: | ||
| 559 | + return 0 | ||
| 560 | + if isinstance(node, list): | ||
| 561 | + return sum(walk(item) for item in node) | ||
| 562 | + if isinstance(node, str): | ||
| 563 | + return len(node.strip()) | ||
| 564 | + if not isinstance(node, dict): | ||
| 565 | + return 0 | ||
| 566 | + | ||
| 567 | + block_type = node.get("type") | ||
| 568 | + if block_type in {"heading", "divider", "toc", "widget"}: | ||
| 569 | + return 0 | ||
| 570 | + | ||
| 571 | + if block_type == "paragraph": | ||
| 572 | + inlines = node.get("inlines") | ||
| 573 | + if isinstance(inlines, list): | ||
| 574 | + total = 0 | ||
| 575 | + for run in inlines: | ||
| 576 | + if isinstance(run, dict): | ||
| 577 | + text = run.get("text") | ||
| 578 | + if isinstance(text, str): | ||
| 579 | + total += len(text.strip()) | ||
| 580 | + return total | ||
| 581 | + text_value = node.get("text") | ||
| 582 | + if isinstance(text_value, str): | ||
| 583 | + return len(text_value.strip()) | ||
| 584 | + return len(self._extract_block_text(node).strip()) | ||
| 585 | + | ||
| 586 | + if block_type == "list": | ||
| 587 | + total = 0 | ||
| 588 | + for item in node.get("items", []): | ||
| 589 | + total += walk(item) | ||
| 590 | + return total | ||
| 591 | + | ||
| 592 | + if block_type in {"blockquote", "callout"}: | ||
| 593 | + return walk(node.get("blocks")) | ||
| 594 | + | ||
| 595 | + if block_type == "table": | ||
| 596 | + total = 0 | ||
| 597 | + for row in node.get("rows", []): | ||
| 598 | + cells = row.get("cells") or [] | ||
| 599 | + for cell in cells: | ||
| 600 | + total += walk(cell.get("blocks")) | ||
| 601 | + return total | ||
| 602 | + | ||
| 603 | + nested = node.get("blocks") | ||
| 604 | + if isinstance(nested, list): | ||
| 605 | + return walk(nested) | ||
| 606 | + | ||
| 607 | + return len(self._extract_block_text(node).strip()) | ||
| 608 | + | ||
| 609 | + return walk(blocks) | ||
| 610 | + | ||
| 491 | def _sanitize_block_content(self, block: Dict[str, Any]): | 611 | def _sanitize_block_content(self, block: Dict[str, Any]): |
| 492 | """根据类型做精细化修复,例如清理paragraph内的非法inline mark""" | 612 | """根据类型做精细化修复,例如清理paragraph内的非法inline mark""" |
| 493 | block_type = block.get("type") | 613 | block_type = block.get("type") |
| @@ -505,7 +625,134 @@ class ChapterGenerationNode(BaseNode): | @@ -505,7 +625,134 @@ class ChapterGenerationNode(BaseNode): | ||
| 505 | normalized_runs = [self._as_inline_run(self._extract_block_text(block))] | 625 | normalized_runs = [self._as_inline_run(self._extract_block_text(block))] |
| 506 | if not normalized_runs: | 626 | if not normalized_runs: |
| 507 | normalized_runs = [self._as_inline_run("")] | 627 | normalized_runs = [self._as_inline_run("")] |
| 508 | - block["inlines"] = normalized_runs | 628 | + block["inlines"] = self._strip_inline_artifacts(normalized_runs) |
| 629 | + | ||
| 630 | + def _strip_inline_artifacts(self, inlines: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | ||
| 631 | + """移除被LLM误写入的JSON哨兵文本,防止渲染出`{\"type\": \"\"}`等垃圾字符""" | ||
| 632 | + cleaned: List[Dict[str, Any]] = [] | ||
| 633 | + for run in inlines or []: | ||
| 634 | + if not isinstance(run, dict): | ||
| 635 | + continue | ||
| 636 | + text = run.get("text") | ||
| 637 | + if isinstance(text, str): | ||
| 638 | + stripped = text.strip() | ||
| 639 | + if stripped.startswith("{") and stripped.endswith("}"): | ||
| 640 | + try: | ||
| 641 | + payload = json.loads(stripped) | ||
| 642 | + except json.JSONDecodeError: | ||
| 643 | + payload = None | ||
| 644 | + if isinstance(payload, dict) and set(payload.keys()).issubset({"type", "value"}): | ||
| 645 | + continue | ||
| 646 | + cleaned.append(run) | ||
| 647 | + return cleaned or [self._as_inline_run("")] | ||
| 648 | + | ||
| 649 | + def _merge_fragment_sequences(self, blocks: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | ||
| 650 | + """合并被LLM拆成多段的句子片段,避免HTML出现大量孤立<p>""" | ||
| 651 | + if not isinstance(blocks, list): | ||
| 652 | + return blocks | ||
| 653 | + | ||
| 654 | + merged: List[Dict[str, Any]] = [] | ||
| 655 | + fragment_buffer: List[Dict[str, Any]] = [] | ||
| 656 | + | ||
| 657 | + def flush_buffer(): | ||
| 658 | + nonlocal fragment_buffer | ||
| 659 | + if not fragment_buffer: | ||
| 660 | + return | ||
| 661 | + if len(fragment_buffer) == 1: | ||
| 662 | + merged.append(fragment_buffer[0]) | ||
| 663 | + else: | ||
| 664 | + merged.append(self._combine_paragraph_fragments(fragment_buffer)) | ||
| 665 | + fragment_buffer = [] | ||
| 666 | + | ||
| 667 | + for block in blocks: | ||
| 668 | + if self._is_paragraph_fragment(block): | ||
| 669 | + fragment_buffer.append(block) | ||
| 670 | + continue | ||
| 671 | + flush_buffer() | ||
| 672 | + merged.append(self._merge_nested_fragments(block)) | ||
| 673 | + | ||
| 674 | + flush_buffer() | ||
| 675 | + return merged | ||
| 676 | + | ||
| 677 | + def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]: | ||
| 678 | + """对嵌套结构(callout/list/table)递归处理片段合并""" | ||
| 679 | + block_type = block.get("type") | ||
| 680 | + if block_type in {"callout", "blockquote"}: | ||
| 681 | + nested = block.get("blocks") | ||
| 682 | + if isinstance(nested, list): | ||
| 683 | + block["blocks"] = self._merge_fragment_sequences(nested) | ||
| 684 | + elif block_type == "list": | ||
| 685 | + items = block.get("items") | ||
| 686 | + if isinstance(items, list): | ||
| 687 | + for entry in items: | ||
| 688 | + if isinstance(entry, list): | ||
| 689 | + merged_entry = self._merge_fragment_sequences(entry) | ||
| 690 | + entry[:] = merged_entry | ||
| 691 | + elif block_type == "table": | ||
| 692 | + for row in block.get("rows", []): | ||
| 693 | + cells = row.get("cells") or [] | ||
| 694 | + for cell in cells: | ||
| 695 | + nested_blocks = cell.get("blocks") | ||
| 696 | + if isinstance(nested_blocks, list): | ||
| 697 | + cell["blocks"] = self._merge_fragment_sequences(nested_blocks) | ||
| 698 | + return block | ||
| 699 | + | ||
| 700 | + def _combine_paragraph_fragments(self, fragments: List[Dict[str, Any]]) -> Dict[str, Any]: | ||
| 701 | + """将多个句子片段合并为单个paragraph block""" | ||
| 702 | + template = dict(fragments[0]) | ||
| 703 | + combined_inlines: List[Dict[str, Any]] = [] | ||
| 704 | + for fragment in fragments: | ||
| 705 | + runs = fragment.get("inlines") | ||
| 706 | + if isinstance(runs, list) and runs: | ||
| 707 | + combined_inlines.extend(runs) | ||
| 708 | + else: | ||
| 709 | + fallback_text = self._extract_block_text(fragment) | ||
| 710 | + combined_inlines.append(self._as_inline_run(fallback_text)) | ||
| 711 | + if not combined_inlines: | ||
| 712 | + combined_inlines.append(self._as_inline_run("")) | ||
| 713 | + template["inlines"] = combined_inlines | ||
| 714 | + return template | ||
| 715 | + | ||
| 716 | + def _is_paragraph_fragment(self, block: Dict[str, Any]) -> bool: | ||
| 717 | + """判断paragraph是否为被错误拆分的短片段""" | ||
| 718 | + if not isinstance(block, dict) or block.get("type") != "paragraph": | ||
| 719 | + return False | ||
| 720 | + inlines = block.get("inlines") | ||
| 721 | + text = "" | ||
| 722 | + has_marks = False | ||
| 723 | + if isinstance(inlines, list) and inlines: | ||
| 724 | + parts: List[str] = [] | ||
| 725 | + for run in inlines: | ||
| 726 | + if not isinstance(run, dict): | ||
| 727 | + continue | ||
| 728 | + parts.append(str(run.get("text") or "")) | ||
| 729 | + marks = run.get("marks") | ||
| 730 | + if isinstance(marks, list) and any(marks): | ||
| 731 | + has_marks = True | ||
| 732 | + text = "".join(parts) | ||
| 733 | + else: | ||
| 734 | + text = self._extract_block_text(block) | ||
| 735 | + stripped = (text or "").strip() | ||
| 736 | + if not stripped: | ||
| 737 | + return True | ||
| 738 | + if has_marks: | ||
| 739 | + return False | ||
| 740 | + if "\n" in stripped: | ||
| 741 | + return False | ||
| 742 | + | ||
| 743 | + short_limit = self._PARAGRAPH_FRAGMENT_MAX_CHARS | ||
| 744 | + long_limit = getattr( | ||
| 745 | + self, | ||
| 746 | + "_PARAGRAPH_FRAGMENT_NO_TERMINATOR_MAX_CHARS", | ||
| 747 | + short_limit * 3, | ||
| 748 | + ) | ||
| 749 | + | ||
| 750 | + if stripped[-1] in self._TERMINATION_PUNCTUATION: | ||
| 751 | + return len(stripped) <= short_limit | ||
| 752 | + | ||
| 753 | + if len(stripped) > long_limit: | ||
| 754 | + return False | ||
| 755 | + return True | ||
| 509 | 756 | ||
| 510 | def _coerce_inline_run(self, run: Any) -> List[Dict[str, Any]]: | 757 | def _coerce_inline_run(self, run: Any) -> List[Dict[str, Any]]: |
| 511 | """将任意inline写法规整为合法run""" | 758 | """将任意inline写法规整为合法run""" |
| @@ -5,6 +5,7 @@ | @@ -5,6 +5,7 @@ | ||
| 5 | from __future__ import annotations | 5 | from __future__ import annotations |
| 6 | 6 | ||
| 7 | import ast | 7 | import ast |
| 8 | +import copy | ||
| 8 | import html | 9 | import html |
| 9 | import json | 10 | import json |
| 10 | from typing import Any, Dict, List | 11 | from typing import Any, Dict, List |
| @@ -19,6 +20,31 @@ class HTMLRenderer: | @@ -19,6 +20,31 @@ class HTMLRenderer: | ||
| 19 | - 提供主题变量、编号映射等辅助功能。 | 20 | - 提供主题变量、编号映射等辅助功能。 |
| 20 | """ | 21 | """ |
| 21 | 22 | ||
| 23 | + CALLOUT_ALLOWED_TYPES = { | ||
| 24 | + "paragraph", | ||
| 25 | + "list", | ||
| 26 | + "table", | ||
| 27 | + "blockquote", | ||
| 28 | + "code", | ||
| 29 | + "math", | ||
| 30 | + "figure", | ||
| 31 | + "kpiGrid", | ||
| 32 | + } | ||
| 33 | + INLINE_ARTIFACT_KEYS = { | ||
| 34 | + "props", | ||
| 35 | + "widgetId", | ||
| 36 | + "widgetType", | ||
| 37 | + "data", | ||
| 38 | + "dataRef", | ||
| 39 | + "datasets", | ||
| 40 | + "labels", | ||
| 41 | + "config", | ||
| 42 | + "options", | ||
| 43 | + } | ||
| 44 | + TABLE_COMPLEX_CHARS = set( | ||
| 45 | + "@%%()(),,。;;::、??!!·…-—_+<>[]{}|\\/\"'`~$^&*#" | ||
| 46 | + ) | ||
| 47 | + | ||
| 22 | def __init__(self, config: Dict[str, Any] | None = None): | 48 | def __init__(self, config: Dict[str, Any] | None = None): |
| 23 | """初始化渲染器缓存并允许注入额外配置(如主题覆盖)""" | 49 | """初始化渲染器缓存并允许注入额外配置(如主题覆盖)""" |
| 24 | self.config = config or {} | 50 | self.config = config or {} |
| @@ -72,6 +98,7 @@ class HTMLRenderer: | @@ -72,6 +98,7 @@ class HTMLRenderer: | ||
| 72 | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | 98 | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| 73 | <title>{self._escape_html(title)}</title> | 99 | <title>{self._escape_html(title)}</title> |
| 74 | <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> | 100 | <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> |
| 101 | + <script src="https://cdn.jsdelivr.net/npm/chartjs-chart-sankey@4"></script> | ||
| 75 | <script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.4.1/html2canvas.min.js"></script> | 102 | <script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.4.1/html2canvas.min.js"></script> |
| 76 | <script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script> | 103 | <script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script> |
| 77 | <script> | 104 | <script> |
| @@ -442,8 +469,9 @@ class HTMLRenderer: | @@ -442,8 +469,9 @@ class HTMLRenderer: | ||
| 442 | 469 | ||
| 443 | def _render_table(self, block: Dict[str, Any]) -> str: | 470 | def _render_table(self, block: Dict[str, Any]) -> str: |
| 444 | """渲染表格,同时保留caption与单元格属性""" | 471 | """渲染表格,同时保留caption与单元格属性""" |
| 472 | + rows = self._normalize_table_rows(block.get("rows") or []) | ||
| 445 | rows_html = "" | 473 | rows_html = "" |
| 446 | - for row in block.get("rows", []): | 474 | + for row in rows: |
| 447 | row_cells = "" | 475 | row_cells = "" |
| 448 | for cell in row.get("cells", []): | 476 | for cell in row.get("cells", []): |
| 449 | cell_tag = "th" if cell.get("header") or cell.get("isHeader") else "td" | 477 | cell_tag = "th" if cell.get("header") or cell.get("isHeader") else "td" |
| @@ -462,6 +490,105 @@ class HTMLRenderer: | @@ -462,6 +490,105 @@ class HTMLRenderer: | ||
| 462 | caption_html = f"<caption>{self._escape_html(caption)}</caption>" if caption else "" | 490 | caption_html = f"<caption>{self._escape_html(caption)}</caption>" if caption else "" |
| 463 | return f'<div class="table-wrap"><table>{caption_html}<tbody>{rows_html}</tbody></table></div>' | 491 | return f'<div class="table-wrap"><table>{caption_html}<tbody>{rows_html}</tbody></table></div>' |
| 464 | 492 | ||
| 493 | + def _normalize_table_rows(self, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | ||
| 494 | + """检测并修正仅有单列的竖排表,转换为标准网格""" | ||
| 495 | + if not rows: | ||
| 496 | + return [] | ||
| 497 | + if not all(len((row.get("cells") or [])) == 1 for row in rows): | ||
| 498 | + return rows | ||
| 499 | + texts = [self._extract_row_text(row) for row in rows] | ||
| 500 | + header_span = self._detect_transposed_header_span(rows, texts) | ||
| 501 | + if not header_span: | ||
| 502 | + return rows | ||
| 503 | + normalized = self._transpose_single_cell_table(rows, header_span) | ||
| 504 | + return normalized or rows | ||
| 505 | + | ||
| 506 | + def _detect_transposed_header_span(self, rows: List[Dict[str, Any]], texts: List[str]) -> int: | ||
| 507 | + """推断竖排表头的行数,用于后续转置""" | ||
| 508 | + max_fields = min(8, len(rows) // 2) | ||
| 509 | + header_span = 0 | ||
| 510 | + for idx, text in enumerate(texts): | ||
| 511 | + if idx >= max_fields: | ||
| 512 | + break | ||
| 513 | + if self._is_potential_table_header(text): | ||
| 514 | + header_span += 1 | ||
| 515 | + else: | ||
| 516 | + break | ||
| 517 | + if header_span < 2: | ||
| 518 | + return 0 | ||
| 519 | + remainder = texts[header_span:] | ||
| 520 | + if not remainder or (len(rows) - header_span) % header_span != 0: | ||
| 521 | + return 0 | ||
| 522 | + if not any(self._looks_like_table_value(txt) for txt in remainder): | ||
| 523 | + return 0 | ||
| 524 | + return header_span | ||
| 525 | + | ||
| 526 | + def _is_potential_table_header(self, text: str) -> bool: | ||
| 527 | + """根据长度与字符特征判断是否像表头字段""" | ||
| 528 | + if not text: | ||
| 529 | + return False | ||
| 530 | + stripped = text.strip() | ||
| 531 | + if not stripped or len(stripped) > 12: | ||
| 532 | + return False | ||
| 533 | + return not any(ch.isdigit() or ch in self.TABLE_COMPLEX_CHARS for ch in stripped) | ||
| 534 | + | ||
| 535 | + def _looks_like_table_value(self, text: str) -> bool: | ||
| 536 | + """判断该文本是否更像数据值,用于辅助判断转置""" | ||
| 537 | + if not text: | ||
| 538 | + return False | ||
| 539 | + stripped = text.strip() | ||
| 540 | + if len(stripped) >= 12: | ||
| 541 | + return True | ||
| 542 | + return any(ch.isdigit() or ch in self.TABLE_COMPLEX_CHARS for ch in stripped) | ||
| 543 | + | ||
| 544 | + def _transpose_single_cell_table(self, rows: List[Dict[str, Any]], span: int) -> List[Dict[str, Any]]: | ||
| 545 | + """将单列多行的表格转换为标准表头 + 若干数据行""" | ||
| 546 | + total = len(rows) | ||
| 547 | + if total <= span or (total - span) % span != 0: | ||
| 548 | + return [] | ||
| 549 | + header_rows = rows[:span] | ||
| 550 | + data_rows = rows[span:] | ||
| 551 | + normalized: List[Dict[str, Any]] = [] | ||
| 552 | + header_cells = [] | ||
| 553 | + for row in header_rows: | ||
| 554 | + cell = copy.deepcopy((row.get("cells") or [{}])[0]) | ||
| 555 | + cell["header"] = True | ||
| 556 | + header_cells.append(cell) | ||
| 557 | + normalized.append({"cells": header_cells}) | ||
| 558 | + for start in range(0, len(data_rows), span): | ||
| 559 | + group = data_rows[start : start + span] | ||
| 560 | + if len(group) < span: | ||
| 561 | + break | ||
| 562 | + normalized.append( | ||
| 563 | + { | ||
| 564 | + "cells": [ | ||
| 565 | + copy.deepcopy((item.get("cells") or [{}])[0]) | ||
| 566 | + for item in group | ||
| 567 | + ] | ||
| 568 | + } | ||
| 569 | + ) | ||
| 570 | + return normalized | ||
| 571 | + | ||
| 572 | + def _extract_row_text(self, row: Dict[str, Any]) -> str: | ||
| 573 | + """提取表格行中的纯文本,方便启发式分析""" | ||
| 574 | + cells = row.get("cells") or [] | ||
| 575 | + if not cells: | ||
| 576 | + return "" | ||
| 577 | + cell = cells[0] | ||
| 578 | + texts: List[str] = [] | ||
| 579 | + for block in cell.get("blocks", []): | ||
| 580 | + if isinstance(block, dict): | ||
| 581 | + if block.get("type") == "paragraph": | ||
| 582 | + for inline in block.get("inlines") or []: | ||
| 583 | + if isinstance(inline, dict): | ||
| 584 | + value = inline.get("text") | ||
| 585 | + else: | ||
| 586 | + value = inline | ||
| 587 | + if value is None: | ||
| 588 | + continue | ||
| 589 | + texts.append(str(value)) | ||
| 590 | + return "".join(texts) | ||
| 591 | + | ||
| 465 | def _render_blockquote(self, block: Dict[str, Any]) -> str: | 592 | def _render_blockquote(self, block: Dict[str, Any]) -> str: |
| 466 | """渲染引用块,可嵌套其他block""" | 593 | """渲染引用块,可嵌套其他block""" |
| 467 | inner = self._render_blocks(block.get("blocks", [])) | 594 | inner = self._render_blocks(block.get("blocks", [])) |
| @@ -487,9 +614,63 @@ class HTMLRenderer: | @@ -487,9 +614,63 @@ class HTMLRenderer: | ||
| 487 | """渲染高亮提示盒,tone决定颜色""" | 614 | """渲染高亮提示盒,tone决定颜色""" |
| 488 | tone = block.get("tone", "info") | 615 | tone = block.get("tone", "info") |
| 489 | title = block.get("title") | 616 | title = block.get("title") |
| 490 | - inner = self._render_blocks(block.get("blocks", [])) | 617 | + safe_blocks, trailing_blocks = self._split_callout_content(block.get("blocks")) |
| 618 | + inner = self._render_blocks(safe_blocks) | ||
| 491 | title_html = f"<strong>{self._escape_html(title)}</strong>" if title else "" | 619 | title_html = f"<strong>{self._escape_html(title)}</strong>" if title else "" |
| 492 | - return f'<div class="callout tone-{tone}">{title_html}{inner}</div>' | 620 | + callout_html = f'<div class="callout tone-{tone}">{title_html}{inner}</div>' |
| 621 | + trailing_html = self._render_blocks(trailing_blocks) if trailing_blocks else "" | ||
| 622 | + return callout_html + trailing_html | ||
| 623 | + | ||
| 624 | + def _split_callout_content( | ||
| 625 | + self, blocks: List[Dict[str, Any]] | None | ||
| 626 | + ) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: | ||
| 627 | + """限定callout内部仅包含轻量内容,其余块剥离到外层""" | ||
| 628 | + if not blocks: | ||
| 629 | + return [], [] | ||
| 630 | + safe: List[Dict[str, Any]] = [] | ||
| 631 | + trailing: List[Dict[str, Any]] = [] | ||
| 632 | + for idx, child in enumerate(blocks): | ||
| 633 | + child_type = child.get("type") | ||
| 634 | + if child_type == "list": | ||
| 635 | + sanitized, overflow = self._sanitize_callout_list(child) | ||
| 636 | + if sanitized: | ||
| 637 | + safe.append(sanitized) | ||
| 638 | + if overflow: | ||
| 639 | + trailing.extend(overflow) | ||
| 640 | + trailing.extend(copy.deepcopy(blocks[idx + 1 :])) | ||
| 641 | + break | ||
| 642 | + elif child_type in self.CALLOUT_ALLOWED_TYPES: | ||
| 643 | + safe.append(child) | ||
| 644 | + else: | ||
| 645 | + trailing.extend(copy.deepcopy(blocks[idx:])) | ||
| 646 | + break | ||
| 647 | + else: | ||
| 648 | + return safe, [] | ||
| 649 | + return safe, trailing | ||
| 650 | + | ||
| 651 | + def _sanitize_callout_list( | ||
| 652 | + self, block: Dict[str, Any] | ||
| 653 | + ) -> tuple[Dict[str, Any] | None, List[Dict[str, Any]]]: | ||
| 654 | + """当列表项包含结构型block时,将其截断移出callout""" | ||
| 655 | + items = block.get("items") or [] | ||
| 656 | + if not items: | ||
| 657 | + return block, [] | ||
| 658 | + sanitized_items: List[List[Dict[str, Any]]] = [] | ||
| 659 | + trailing: List[Dict[str, Any]] = [] | ||
| 660 | + for idx, item in enumerate(items): | ||
| 661 | + safe, overflow = self._split_callout_content(item) | ||
| 662 | + if safe: | ||
| 663 | + sanitized_items.append(safe) | ||
| 664 | + if overflow: | ||
| 665 | + trailing.extend(overflow) | ||
| 666 | + for rest in items[idx + 1 :]: | ||
| 667 | + trailing.extend(copy.deepcopy(rest)) | ||
| 668 | + break | ||
| 669 | + if not sanitized_items: | ||
| 670 | + return None, trailing | ||
| 671 | + new_block = copy.deepcopy(block) | ||
| 672 | + new_block["items"] = sanitized_items | ||
| 673 | + return new_block, trailing | ||
| 493 | 674 | ||
| 494 | def _render_kpi_grid(self, block: Dict[str, Any]) -> str: | 675 | def _render_kpi_grid(self, block: Dict[str, Any]) -> str: |
| 495 | """渲染KPI卡片栅格,包含指标值与涨跌幅""" | 676 | """渲染KPI卡片栅格,包含指标值与涨跌幅""" |
| @@ -631,6 +812,8 @@ class HTMLRenderer: | @@ -631,6 +812,8 @@ class HTMLRenderer: | ||
| 631 | nested_marks = inline_payload.get("marks") | 812 | nested_marks = inline_payload.get("marks") |
| 632 | if isinstance(nested_marks, list): | 813 | if isinstance(nested_marks, list): |
| 633 | marks.extend(nested_marks) | 814 | marks.extend(nested_marks) |
| 815 | + elif any(key in payload for key in self.INLINE_ARTIFACT_KEYS): | ||
| 816 | + text_value = "" | ||
| 634 | 817 | ||
| 635 | return text_value, marks | 818 | return text_value, marks |
| 636 | 819 | ||
| @@ -1281,10 +1464,11 @@ function mergeOptions(base, override) { | @@ -1281,10 +1464,11 @@ function mergeOptions(base, override) { | ||
| 1281 | } | 1464 | } |
| 1282 | 1465 | ||
| 1283 | function resolveChartTypes(payload) { | 1466 | function resolveChartTypes(payload) { |
| 1467 | + const explicit = payload && payload.props && payload.props.type; | ||
| 1284 | const widgetType = payload && payload.widgetType ? payload.widgetType : 'chart.js/bar'; | 1468 | const widgetType = payload && payload.widgetType ? payload.widgetType : 'chart.js/bar'; |
| 1285 | - const primary = widgetType.includes('/') ? widgetType.split('/').pop() : widgetType; | 1469 | + const derived = widgetType && widgetType.includes('/') ? widgetType.split('/').pop() : widgetType; |
| 1286 | const extra = Array.isArray(payload && payload.preferredTypes) ? payload.preferredTypes : []; | 1470 | const extra = Array.isArray(payload && payload.preferredTypes) ? payload.preferredTypes : []; |
| 1287 | - const pipeline = [primary, ...extra, ...STABLE_CHART_TYPES]; | 1471 | + const pipeline = [explicit, derived, ...extra, ...STABLE_CHART_TYPES].filter(Boolean); |
| 1288 | const result = []; | 1472 | const result = []; |
| 1289 | pipeline.forEach(type => { | 1473 | pipeline.forEach(type => { |
| 1290 | if (type && !result.includes(type)) { | 1474 | if (type && !result.includes(type)) { |
| @@ -1456,6 +1640,15 @@ function buildChartOptions(payload) { | @@ -1456,6 +1640,15 @@ function buildChartOptions(payload) { | ||
| 1456 | } | 1640 | } |
| 1457 | 1641 | ||
| 1458 | function instantiateChart(ctx, payload, optionsTemplate, type) { | 1642 | function instantiateChart(ctx, payload, optionsTemplate, type) { |
| 1643 | + if (!ctx) { | ||
| 1644 | + return null; | ||
| 1645 | + } | ||
| 1646 | + if (ctx.canvas && typeof Chart !== 'undefined' && typeof Chart.getChart === 'function') { | ||
| 1647 | + const existing = Chart.getChart(ctx.canvas); | ||
| 1648 | + if (existing) { | ||
| 1649 | + existing.destroy(); | ||
| 1650 | + } | ||
| 1651 | + } | ||
| 1459 | const data = cloneDeep(payload && payload.data ? payload.data : {}); | 1652 | const data = cloneDeep(payload && payload.data ? payload.data : {}); |
| 1460 | const config = { | 1653 | const config = { |
| 1461 | type, | 1654 | type, |
-
Please register or login to post a comment