Fixed retry logic for AttributeError, TypeError, KeyError, IndexError, ValueErro…
…r, and json.JSONDecodeError
Showing
2 changed files
with
149 additions
and
4 deletions
| @@ -719,6 +719,40 @@ class ReportAgent: | @@ -719,6 +719,40 @@ class ReportAgent: | ||
| 719 | stream_callback=chunk_callback | 719 | stream_callback=chunk_callback |
| 720 | ) | 720 | ) |
| 721 | break | 721 | break |
| 722 | + except (AttributeError, TypeError, KeyError, IndexError, ValueError, json.JSONDecodeError) as structure_error: | ||
| 723 | + # 捕获因 JSON 结构异常导致的运行时错误,包装为可重试异常 | ||
| 724 | + # 包括: | ||
| 725 | + # - AttributeError: 如 list.get() 调用失败 | ||
| 726 | + # - TypeError: 类型不匹配 | ||
| 727 | + # - KeyError: 字典键缺失 | ||
| 728 | + # - IndexError: 列表索引越界 | ||
| 729 | + # - ValueError: 值错误(如 LLM 返回空内容、缺少必要字段) | ||
| 730 | + # - json.JSONDecodeError: JSON 解析失败(未被内部捕获的情况) | ||
| 731 | + error_type = type(structure_error).__name__ | ||
| 732 | + logger.warning( | ||
| 733 | + "章节 {title} 生成过程中发生 {error_type}(第 {attempt}/{total} 次尝试),将尝试重新生成: {error}", | ||
| 734 | + title=section.title, | ||
| 735 | + error_type=error_type, | ||
| 736 | + attempt=attempt, | ||
| 737 | + total=chapter_max_attempts, | ||
| 738 | + error=structure_error, | ||
| 739 | + ) | ||
| 740 | + emit('chapter_status', { | ||
| 741 | + 'chapterId': section.chapter_id, | ||
| 742 | + 'title': section.title, | ||
| 743 | + 'status': 'retrying' if attempt < chapter_max_attempts else 'error', | ||
| 744 | + 'attempt': attempt, | ||
| 745 | + 'error': str(structure_error), | ||
| 746 | + 'reason': 'structure_error', | ||
| 747 | + 'error_type': error_type | ||
| 748 | + }) | ||
| 749 | + if attempt >= chapter_max_attempts: | ||
| 750 | + # 达到最大重试次数,包装为 ChapterJsonParseError 抛出 | ||
| 751 | + raise ChapterJsonParseError( | ||
| 752 | + f"{section.title} 章节因 {error_type} 在 {chapter_max_attempts} 次尝试后仍无法生成: {structure_error}" | ||
| 753 | + ) from structure_error | ||
| 754 | + attempt += 1 | ||
| 755 | + continue | ||
| 722 | except (ChapterJsonParseError, ChapterContentError, ChapterValidationError) as structured_error: | 756 | except (ChapterJsonParseError, ChapterContentError, ChapterValidationError) as structured_error: |
| 723 | if isinstance(structured_error, ChapterContentError): | 757 | if isinstance(structured_error, ChapterContentError): |
| 724 | error_kind = "content_sparse" | 758 | error_kind = "content_sparse" |
| @@ -671,7 +671,7 @@ class ChapterGenerationNode(BaseNode): | @@ -671,7 +671,7 @@ class ChapterGenerationNode(BaseNode): | ||
| 671 | cleaned = cleaned[:-3] | 671 | cleaned = cleaned[:-3] |
| 672 | cleaned = cleaned.strip() | 672 | cleaned = cleaned.strip() |
| 673 | if not cleaned: | 673 | if not cleaned: |
| 674 | - raise ValueError("LLM返回空内容") | 674 | + raise ChapterJsonParseError("LLM返回空内容", raw_text=raw_text) |
| 675 | 675 | ||
| 676 | candidate_payloads = [cleaned] | 676 | candidate_payloads = [cleaned] |
| 677 | repaired = self._repair_llm_json(cleaned) | 677 | repaired = self._repair_llm_json(cleaned) |
| @@ -714,7 +714,7 @@ class ChapterGenerationNode(BaseNode): | @@ -714,7 +714,7 @@ class ChapterGenerationNode(BaseNode): | ||
| 714 | return item["chapter"] | 714 | return item["chapter"] |
| 715 | if all(key in item for key in ("chapterId", "title", "blocks")): | 715 | if all(key in item for key in ("chapterId", "title", "blocks")): |
| 716 | return item | 716 | return item |
| 717 | - raise ValueError("章节JSON缺少chapter字段") | 717 | + raise ChapterJsonParseError("章节JSON缺少chapter字段或结构不完整", raw_text=cleaned) |
| 718 | 718 | ||
| 719 | def _persist_error_payload( | 719 | def _persist_error_payload( |
| 720 | self, | 720 | self, |
| @@ -996,13 +996,41 @@ class ChapterGenerationNode(BaseNode): | @@ -996,13 +996,41 @@ class ChapterGenerationNode(BaseNode): | ||
| 996 | """递归检查并修复嵌套结构,保证每个block合法""" | 996 | """递归检查并修复嵌套结构,保证每个block合法""" |
| 997 | if not isinstance(blocks, list): | 997 | if not isinstance(blocks, list): |
| 998 | return | 998 | return |
| 999 | - for block in blocks: | 999 | + # 先过滤掉非字典类型的异常 block |
| 1000 | + valid_indices = [] | ||
| 1001 | + for idx, block in enumerate(blocks): | ||
| 1002 | + if not isinstance(block, dict): | ||
| 1003 | + # 尝试将字符串转换为 paragraph | ||
| 1004 | + if isinstance(block, str) and block.strip(): | ||
| 1005 | + blocks[idx] = self._as_paragraph_block(block) | ||
| 1006 | + valid_indices.append(idx) | ||
| 1007 | + logger.warning(f"walk: 将字符串 block 转换为 paragraph") | ||
| 1008 | + elif isinstance(block, list): | ||
| 1009 | + # 尝试提取列表中的有效字典 | ||
| 1010 | + for item in block: | ||
| 1011 | + if isinstance(item, dict): | ||
| 1012 | + self._ensure_block_type(item) | ||
| 1013 | + blocks[idx] = item | ||
| 1014 | + valid_indices.append(idx) | ||
| 1015 | + logger.warning(f"walk: 从列表中提取字典 block") | ||
| 1016 | + break | ||
| 1017 | + else: | ||
| 1018 | + logger.warning(f"walk: 跳过无效的列表 block: {block}") | ||
| 1019 | + else: | ||
| 1020 | + logger.warning(f"walk: 跳过无效的 block(类型: {type(block).__name__})") | ||
| 1021 | + else: | ||
| 1022 | + valid_indices.append(idx) | ||
| 1023 | + | ||
| 1024 | + for idx in valid_indices: | ||
| 1025 | + block = blocks[idx] | ||
| 1000 | if not isinstance(block, dict): | 1026 | if not isinstance(block, dict): |
| 1001 | continue | 1027 | continue |
| 1002 | self._ensure_block_type(block) | 1028 | self._ensure_block_type(block) |
| 1003 | self._sanitize_block_content(block) | 1029 | self._sanitize_block_content(block) |
| 1004 | block_type = block.get("type") | 1030 | block_type = block.get("type") |
| 1005 | if block_type == "list": | 1031 | if block_type == "list": |
| 1032 | + # 自动修复 listType:确保是合法值 | ||
| 1033 | + self._normalize_list_type(block) | ||
| 1006 | items = block.get("items") | 1034 | items = block.get("items") |
| 1007 | normalized = self._normalize_list_items(items) | 1035 | normalized = self._normalize_list_items(items) |
| 1008 | if normalized: | 1036 | if normalized: |
| @@ -1013,8 +1041,12 @@ class ChapterGenerationNode(BaseNode): | @@ -1013,8 +1041,12 @@ class ChapterGenerationNode(BaseNode): | ||
| 1013 | walk(block.get("blocks")) | 1041 | walk(block.get("blocks")) |
| 1014 | elif block_type == "table": | 1042 | elif block_type == "table": |
| 1015 | for row in block.get("rows", []): | 1043 | for row in block.get("rows", []): |
| 1044 | + if not isinstance(row, dict): | ||
| 1045 | + continue | ||
| 1016 | cells = row.get("cells") or [] | 1046 | cells = row.get("cells") or [] |
| 1017 | for cell in cells: | 1047 | for cell in cells: |
| 1048 | + if not isinstance(cell, dict): | ||
| 1049 | + continue | ||
| 1018 | walk(cell.get("blocks")) | 1050 | walk(cell.get("blocks")) |
| 1019 | elif block_type == "widget": | 1051 | elif block_type == "widget": |
| 1020 | self._normalize_widget_block(block) | 1052 | self._normalize_widget_block(block) |
| @@ -1027,7 +1059,9 @@ class ChapterGenerationNode(BaseNode): | @@ -1027,7 +1059,9 @@ class ChapterGenerationNode(BaseNode): | ||
| 1027 | 1059 | ||
| 1028 | blocks = chapter.get("blocks") | 1060 | blocks = chapter.get("blocks") |
| 1029 | if isinstance(blocks, list): | 1061 | if isinstance(blocks, list): |
| 1030 | - chapter["blocks"] = self._merge_fragment_sequences(blocks) | 1062 | + # 在合并前先过滤掉所有非字典类型的 block |
| 1063 | + filtered_blocks = [b for b in blocks if isinstance(b, dict)] | ||
| 1064 | + chapter["blocks"] = self._merge_fragment_sequences(filtered_blocks) | ||
| 1031 | 1065 | ||
| 1032 | def _ensure_content_density(self, chapter: Dict[str, Any]): | 1066 | def _ensure_content_density(self, chapter: Dict[str, Any]): |
| 1033 | """ | 1067 | """ |
| @@ -1686,6 +1720,25 @@ class ChapterGenerationNode(BaseNode): | @@ -1686,6 +1720,25 @@ class ChapterGenerationNode(BaseNode): | ||
| 1686 | fragment_buffer = [] | 1720 | fragment_buffer = [] |
| 1687 | 1721 | ||
| 1688 | for block in blocks: | 1722 | for block in blocks: |
| 1723 | + # 类型检查:跳过非字典类型的异常 block,避免 AttributeError | ||
| 1724 | + if not isinstance(block, dict): | ||
| 1725 | + # 尝试将非字典类型转换为 paragraph | ||
| 1726 | + if isinstance(block, str) and block.strip(): | ||
| 1727 | + converted = self._as_paragraph_block(block) | ||
| 1728 | + logger.warning(f"检测到非字典类型的 block(字符串),已转换为 paragraph: {block[:50]}...") | ||
| 1729 | + merged.append(converted) | ||
| 1730 | + elif isinstance(block, list): | ||
| 1731 | + # 列表类型的 block 可能是 LLM 输出错误,尝试提取有效内容 | ||
| 1732 | + logger.warning(f"检测到列表类型的 block,尝试提取有效内容: {block}") | ||
| 1733 | + for item in block: | ||
| 1734 | + if isinstance(item, dict): | ||
| 1735 | + self._ensure_block_type(item) | ||
| 1736 | + merged.append(self._merge_nested_fragments(item)) | ||
| 1737 | + elif isinstance(item, str) and item.strip(): | ||
| 1738 | + merged.append(self._as_paragraph_block(item)) | ||
| 1739 | + else: | ||
| 1740 | + logger.warning(f"跳过无效的 block(类型: {type(block).__name__}): {block}") | ||
| 1741 | + continue | ||
| 1689 | if self._is_paragraph_fragment(block): | 1742 | if self._is_paragraph_fragment(block): |
| 1690 | fragment_buffer.append(block) | 1743 | fragment_buffer.append(block) |
| 1691 | continue | 1744 | continue |
| @@ -1697,6 +1750,24 @@ class ChapterGenerationNode(BaseNode): | @@ -1697,6 +1750,24 @@ class ChapterGenerationNode(BaseNode): | ||
| 1697 | 1750 | ||
| 1698 | def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]: | 1751 | def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]: |
| 1699 | """对嵌套结构(callout/blockquote/engineQuote/list/table)递归处理片段合并""" | 1752 | """对嵌套结构(callout/blockquote/engineQuote/list/table)递归处理片段合并""" |
| 1753 | + # 类型检查:确保 block 是字典类型 | ||
| 1754 | + if not isinstance(block, dict): | ||
| 1755 | + # 尝试将非字典类型转换为 paragraph | ||
| 1756 | + if isinstance(block, str) and block.strip(): | ||
| 1757 | + logger.warning(f"_merge_nested_fragments 收到字符串类型,已转换为 paragraph") | ||
| 1758 | + return self._as_paragraph_block(block) | ||
| 1759 | + elif isinstance(block, list): | ||
| 1760 | + # 尝试提取列表中的第一个有效字典 | ||
| 1761 | + for item in block: | ||
| 1762 | + if isinstance(item, dict): | ||
| 1763 | + self._ensure_block_type(item) | ||
| 1764 | + return self._merge_nested_fragments(item) | ||
| 1765 | + logger.warning(f"_merge_nested_fragments 收到无效列表,返回空 paragraph") | ||
| 1766 | + return self._as_paragraph_block("") | ||
| 1767 | + else: | ||
| 1768 | + logger.warning(f"_merge_nested_fragments 收到无效类型({type(block).__name__}),返回空 paragraph") | ||
| 1769 | + return self._as_paragraph_block("") | ||
| 1770 | + | ||
| 1700 | block_type = block.get("type") | 1771 | block_type = block.get("type") |
| 1701 | if block_type in {"callout", "blockquote", "engineQuote"}: | 1772 | if block_type in {"callout", "blockquote", "engineQuote"}: |
| 1702 | nested = block.get("blocks") | 1773 | nested = block.get("blocks") |
| @@ -1711,8 +1782,12 @@ class ChapterGenerationNode(BaseNode): | @@ -1711,8 +1782,12 @@ class ChapterGenerationNode(BaseNode): | ||
| 1711 | entry[:] = merged_entry | 1782 | entry[:] = merged_entry |
| 1712 | elif block_type == "table": | 1783 | elif block_type == "table": |
| 1713 | for row in block.get("rows", []): | 1784 | for row in block.get("rows", []): |
| 1785 | + if not isinstance(row, dict): | ||
| 1786 | + continue | ||
| 1714 | cells = row.get("cells") or [] | 1787 | cells = row.get("cells") or [] |
| 1715 | for cell in cells: | 1788 | for cell in cells: |
| 1789 | + if not isinstance(cell, dict): | ||
| 1790 | + continue | ||
| 1716 | nested_blocks = cell.get("blocks") | 1791 | nested_blocks = cell.get("blocks") |
| 1717 | if isinstance(nested_blocks, list): | 1792 | if isinstance(nested_blocks, list): |
| 1718 | cell["blocks"] = self._merge_fragment_sequences(nested_blocks) | 1793 | cell["blocks"] = self._merge_fragment_sequences(nested_blocks) |
| @@ -1848,6 +1923,42 @@ class ChapterGenerationNode(BaseNode): | @@ -1848,6 +1923,42 @@ class ChapterGenerationNode(BaseNode): | ||
| 1848 | return str(value) | 1923 | return str(value) |
| 1849 | return "" | 1924 | return "" |
| 1850 | 1925 | ||
| 1926 | + # 合法的 listType 值 | ||
| 1927 | + _ALLOWED_LIST_TYPES = {"ordered", "bullet", "task"} | ||
| 1928 | + # listType 的别名映射 | ||
| 1929 | + _LIST_TYPE_ALIASES = { | ||
| 1930 | + "unordered": "bullet", | ||
| 1931 | + "ul": "bullet", | ||
| 1932 | + "ol": "ordered", | ||
| 1933 | + "numbered": "ordered", | ||
| 1934 | + "checkbox": "task", | ||
| 1935 | + "check": "task", | ||
| 1936 | + "todo": "task", | ||
| 1937 | + } | ||
| 1938 | + | ||
| 1939 | + def _normalize_list_type(self, block: Dict[str, Any]): | ||
| 1940 | + """ | ||
| 1941 | + 确保 list block 的 listType 是合法值。 | ||
| 1942 | + | ||
| 1943 | + 如果 listType 缺失或非法,自动修复为 bullet。 | ||
| 1944 | + """ | ||
| 1945 | + list_type = block.get("listType") | ||
| 1946 | + if list_type in self._ALLOWED_LIST_TYPES: | ||
| 1947 | + return | ||
| 1948 | + # 尝试别名映射 | ||
| 1949 | + if isinstance(list_type, str): | ||
| 1950 | + lowered = list_type.strip().lower() | ||
| 1951 | + if lowered in self._LIST_TYPE_ALIASES: | ||
| 1952 | + block["listType"] = self._LIST_TYPE_ALIASES[lowered] | ||
| 1953 | + logger.warning(f"已将 listType '{list_type}' 映射为 '{block['listType']}'") | ||
| 1954 | + return | ||
| 1955 | + if lowered in self._ALLOWED_LIST_TYPES: | ||
| 1956 | + block["listType"] = lowered | ||
| 1957 | + return | ||
| 1958 | + # 无法识别,默认使用 bullet | ||
| 1959 | + logger.warning(f"检测到非法 listType: {list_type},已修复为 bullet") | ||
| 1960 | + block["listType"] = "bullet" | ||
| 1961 | + | ||
| 1851 | def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]: | 1962 | def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]: |
| 1852 | """确保list block的items为[[block, block], ...]结构""" | 1963 | """确保list block的items为[[block, block], ...]结构""" |
| 1853 | if not isinstance(items, list): | 1964 | if not isinstance(items, list): |
-
Please register or login to post a comment