马一丁

Fixed retry logic for AttributeError, TypeError, KeyError, IndexError, ValueErro…

…r, and json.JSONDecodeError
@@ -719,6 +719,40 @@ class ReportAgent: @@ -719,6 +719,40 @@ class ReportAgent:
719 stream_callback=chunk_callback 719 stream_callback=chunk_callback
720 ) 720 )
721 break 721 break
  722 + except (AttributeError, TypeError, KeyError, IndexError, ValueError, json.JSONDecodeError) as structure_error:
  723 + # 捕获因 JSON 结构异常导致的运行时错误,包装为可重试异常
  724 + # 包括:
  725 + # - AttributeError: 如 list.get() 调用失败
  726 + # - TypeError: 类型不匹配
  727 + # - KeyError: 字典键缺失
  728 + # - IndexError: 列表索引越界
  729 + # - ValueError: 值错误(如 LLM 返回空内容、缺少必要字段)
  730 + # - json.JSONDecodeError: JSON 解析失败(未被内部捕获的情况)
  731 + error_type = type(structure_error).__name__
  732 + logger.warning(
  733 + "章节 {title} 生成过程中发生 {error_type}(第 {attempt}/{total} 次尝试),将尝试重新生成: {error}",
  734 + title=section.title,
  735 + error_type=error_type,
  736 + attempt=attempt,
  737 + total=chapter_max_attempts,
  738 + error=structure_error,
  739 + )
  740 + emit('chapter_status', {
  741 + 'chapterId': section.chapter_id,
  742 + 'title': section.title,
  743 + 'status': 'retrying' if attempt < chapter_max_attempts else 'error',
  744 + 'attempt': attempt,
  745 + 'error': str(structure_error),
  746 + 'reason': 'structure_error',
  747 + 'error_type': error_type
  748 + })
  749 + if attempt >= chapter_max_attempts:
  750 + # 达到最大重试次数,包装为 ChapterJsonParseError 抛出
  751 + raise ChapterJsonParseError(
  752 + f"{section.title} 章节因 {error_type} 在 {chapter_max_attempts} 次尝试后仍无法生成: {structure_error}"
  753 + ) from structure_error
  754 + attempt += 1
  755 + continue
722 except (ChapterJsonParseError, ChapterContentError, ChapterValidationError) as structured_error: 756 except (ChapterJsonParseError, ChapterContentError, ChapterValidationError) as structured_error:
723 if isinstance(structured_error, ChapterContentError): 757 if isinstance(structured_error, ChapterContentError):
724 error_kind = "content_sparse" 758 error_kind = "content_sparse"
@@ -671,7 +671,7 @@ class ChapterGenerationNode(BaseNode): @@ -671,7 +671,7 @@ class ChapterGenerationNode(BaseNode):
671 cleaned = cleaned[:-3] 671 cleaned = cleaned[:-3]
672 cleaned = cleaned.strip() 672 cleaned = cleaned.strip()
673 if not cleaned: 673 if not cleaned:
674 - raise ValueError("LLM返回空内容") 674 + raise ChapterJsonParseError("LLM返回空内容", raw_text=raw_text)
675 675
676 candidate_payloads = [cleaned] 676 candidate_payloads = [cleaned]
677 repaired = self._repair_llm_json(cleaned) 677 repaired = self._repair_llm_json(cleaned)
@@ -714,7 +714,7 @@ class ChapterGenerationNode(BaseNode): @@ -714,7 +714,7 @@ class ChapterGenerationNode(BaseNode):
714 return item["chapter"] 714 return item["chapter"]
715 if all(key in item for key in ("chapterId", "title", "blocks")): 715 if all(key in item for key in ("chapterId", "title", "blocks")):
716 return item 716 return item
717 - raise ValueError("章节JSON缺少chapter字段") 717 + raise ChapterJsonParseError("章节JSON缺少chapter字段或结构不完整", raw_text=cleaned)
718 718
719 def _persist_error_payload( 719 def _persist_error_payload(
720 self, 720 self,
@@ -996,13 +996,41 @@ class ChapterGenerationNode(BaseNode): @@ -996,13 +996,41 @@ class ChapterGenerationNode(BaseNode):
996 """递归检查并修复嵌套结构,保证每个block合法""" 996 """递归检查并修复嵌套结构,保证每个block合法"""
997 if not isinstance(blocks, list): 997 if not isinstance(blocks, list):
998 return 998 return
999 - for block in blocks: 999 + # 先过滤掉非字典类型的异常 block
  1000 + valid_indices = []
  1001 + for idx, block in enumerate(blocks):
  1002 + if not isinstance(block, dict):
  1003 + # 尝试将字符串转换为 paragraph
  1004 + if isinstance(block, str) and block.strip():
  1005 + blocks[idx] = self._as_paragraph_block(block)
  1006 + valid_indices.append(idx)
  1007 + logger.warning(f"walk: 将字符串 block 转换为 paragraph")
  1008 + elif isinstance(block, list):
  1009 + # 尝试提取列表中的有效字典
  1010 + for item in block:
  1011 + if isinstance(item, dict):
  1012 + self._ensure_block_type(item)
  1013 + blocks[idx] = item
  1014 + valid_indices.append(idx)
  1015 + logger.warning(f"walk: 从列表中提取字典 block")
  1016 + break
  1017 + else:
  1018 + logger.warning(f"walk: 跳过无效的列表 block: {block}")
  1019 + else:
  1020 + logger.warning(f"walk: 跳过无效的 block(类型: {type(block).__name__})")
  1021 + else:
  1022 + valid_indices.append(idx)
  1023 +
  1024 + for idx in valid_indices:
  1025 + block = blocks[idx]
1000 if not isinstance(block, dict): 1026 if not isinstance(block, dict):
1001 continue 1027 continue
1002 self._ensure_block_type(block) 1028 self._ensure_block_type(block)
1003 self._sanitize_block_content(block) 1029 self._sanitize_block_content(block)
1004 block_type = block.get("type") 1030 block_type = block.get("type")
1005 if block_type == "list": 1031 if block_type == "list":
  1032 + # 自动修复 listType:确保是合法值
  1033 + self._normalize_list_type(block)
1006 items = block.get("items") 1034 items = block.get("items")
1007 normalized = self._normalize_list_items(items) 1035 normalized = self._normalize_list_items(items)
1008 if normalized: 1036 if normalized:
@@ -1013,8 +1041,12 @@ class ChapterGenerationNode(BaseNode): @@ -1013,8 +1041,12 @@ class ChapterGenerationNode(BaseNode):
1013 walk(block.get("blocks")) 1041 walk(block.get("blocks"))
1014 elif block_type == "table": 1042 elif block_type == "table":
1015 for row in block.get("rows", []): 1043 for row in block.get("rows", []):
  1044 + if not isinstance(row, dict):
  1045 + continue
1016 cells = row.get("cells") or [] 1046 cells = row.get("cells") or []
1017 for cell in cells: 1047 for cell in cells:
  1048 + if not isinstance(cell, dict):
  1049 + continue
1018 walk(cell.get("blocks")) 1050 walk(cell.get("blocks"))
1019 elif block_type == "widget": 1051 elif block_type == "widget":
1020 self._normalize_widget_block(block) 1052 self._normalize_widget_block(block)
@@ -1027,7 +1059,9 @@ class ChapterGenerationNode(BaseNode): @@ -1027,7 +1059,9 @@ class ChapterGenerationNode(BaseNode):
1027 1059
1028 blocks = chapter.get("blocks") 1060 blocks = chapter.get("blocks")
1029 if isinstance(blocks, list): 1061 if isinstance(blocks, list):
1030 - chapter["blocks"] = self._merge_fragment_sequences(blocks) 1062 + # 在合并前先过滤掉所有非字典类型的 block
  1063 + filtered_blocks = [b for b in blocks if isinstance(b, dict)]
  1064 + chapter["blocks"] = self._merge_fragment_sequences(filtered_blocks)
1031 1065
1032 def _ensure_content_density(self, chapter: Dict[str, Any]): 1066 def _ensure_content_density(self, chapter: Dict[str, Any]):
1033 """ 1067 """
@@ -1686,6 +1720,25 @@ class ChapterGenerationNode(BaseNode): @@ -1686,6 +1720,25 @@ class ChapterGenerationNode(BaseNode):
1686 fragment_buffer = [] 1720 fragment_buffer = []
1687 1721
1688 for block in blocks: 1722 for block in blocks:
  1723 + # 类型检查:跳过非字典类型的异常 block,避免 AttributeError
  1724 + if not isinstance(block, dict):
  1725 + # 尝试将非字典类型转换为 paragraph
  1726 + if isinstance(block, str) and block.strip():
  1727 + converted = self._as_paragraph_block(block)
  1728 + logger.warning(f"检测到非字典类型的 block(字符串),已转换为 paragraph: {block[:50]}...")
  1729 + merged.append(converted)
  1730 + elif isinstance(block, list):
  1731 + # 列表类型的 block 可能是 LLM 输出错误,尝试提取有效内容
  1732 + logger.warning(f"检测到列表类型的 block,尝试提取有效内容: {block}")
  1733 + for item in block:
  1734 + if isinstance(item, dict):
  1735 + self._ensure_block_type(item)
  1736 + merged.append(self._merge_nested_fragments(item))
  1737 + elif isinstance(item, str) and item.strip():
  1738 + merged.append(self._as_paragraph_block(item))
  1739 + else:
  1740 + logger.warning(f"跳过无效的 block(类型: {type(block).__name__}): {block}")
  1741 + continue
1689 if self._is_paragraph_fragment(block): 1742 if self._is_paragraph_fragment(block):
1690 fragment_buffer.append(block) 1743 fragment_buffer.append(block)
1691 continue 1744 continue
@@ -1697,6 +1750,24 @@ class ChapterGenerationNode(BaseNode): @@ -1697,6 +1750,24 @@ class ChapterGenerationNode(BaseNode):
1697 1750
1698 def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]: 1751 def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]:
1699 """对嵌套结构(callout/blockquote/engineQuote/list/table)递归处理片段合并""" 1752 """对嵌套结构(callout/blockquote/engineQuote/list/table)递归处理片段合并"""
  1753 + # 类型检查:确保 block 是字典类型
  1754 + if not isinstance(block, dict):
  1755 + # 尝试将非字典类型转换为 paragraph
  1756 + if isinstance(block, str) and block.strip():
  1757 + logger.warning(f"_merge_nested_fragments 收到字符串类型,已转换为 paragraph")
  1758 + return self._as_paragraph_block(block)
  1759 + elif isinstance(block, list):
  1760 + # 尝试提取列表中的第一个有效字典
  1761 + for item in block:
  1762 + if isinstance(item, dict):
  1763 + self._ensure_block_type(item)
  1764 + return self._merge_nested_fragments(item)
  1765 + logger.warning(f"_merge_nested_fragments 收到无效列表,返回空 paragraph")
  1766 + return self._as_paragraph_block("")
  1767 + else:
  1768 + logger.warning(f"_merge_nested_fragments 收到无效类型({type(block).__name__}),返回空 paragraph")
  1769 + return self._as_paragraph_block("")
  1770 +
1700 block_type = block.get("type") 1771 block_type = block.get("type")
1701 if block_type in {"callout", "blockquote", "engineQuote"}: 1772 if block_type in {"callout", "blockquote", "engineQuote"}:
1702 nested = block.get("blocks") 1773 nested = block.get("blocks")
@@ -1711,8 +1782,12 @@ class ChapterGenerationNode(BaseNode): @@ -1711,8 +1782,12 @@ class ChapterGenerationNode(BaseNode):
1711 entry[:] = merged_entry 1782 entry[:] = merged_entry
1712 elif block_type == "table": 1783 elif block_type == "table":
1713 for row in block.get("rows", []): 1784 for row in block.get("rows", []):
  1785 + if not isinstance(row, dict):
  1786 + continue
1714 cells = row.get("cells") or [] 1787 cells = row.get("cells") or []
1715 for cell in cells: 1788 for cell in cells:
  1789 + if not isinstance(cell, dict):
  1790 + continue
1716 nested_blocks = cell.get("blocks") 1791 nested_blocks = cell.get("blocks")
1717 if isinstance(nested_blocks, list): 1792 if isinstance(nested_blocks, list):
1718 cell["blocks"] = self._merge_fragment_sequences(nested_blocks) 1793 cell["blocks"] = self._merge_fragment_sequences(nested_blocks)
@@ -1848,6 +1923,42 @@ class ChapterGenerationNode(BaseNode): @@ -1848,6 +1923,42 @@ class ChapterGenerationNode(BaseNode):
1848 return str(value) 1923 return str(value)
1849 return "" 1924 return ""
1850 1925
  1926 + # 合法的 listType
  1927 + _ALLOWED_LIST_TYPES = {"ordered", "bullet", "task"}
  1928 + # listType 的别名映射
  1929 + _LIST_TYPE_ALIASES = {
  1930 + "unordered": "bullet",
  1931 + "ul": "bullet",
  1932 + "ol": "ordered",
  1933 + "numbered": "ordered",
  1934 + "checkbox": "task",
  1935 + "check": "task",
  1936 + "todo": "task",
  1937 + }
  1938 +
  1939 + def _normalize_list_type(self, block: Dict[str, Any]):
  1940 + """
  1941 + 确保 list block listType 是合法值。
  1942 +
  1943 + 如果 listType 缺失或非法,自动修复为 bullet
  1944 + """
  1945 + list_type = block.get("listType")
  1946 + if list_type in self._ALLOWED_LIST_TYPES:
  1947 + return
  1948 + # 尝试别名映射
  1949 + if isinstance(list_type, str):
  1950 + lowered = list_type.strip().lower()
  1951 + if lowered in self._LIST_TYPE_ALIASES:
  1952 + block["listType"] = self._LIST_TYPE_ALIASES[lowered]
  1953 + logger.warning(f"已将 listType '{list_type}' 映射为 '{block['listType']}'")
  1954 + return
  1955 + if lowered in self._ALLOWED_LIST_TYPES:
  1956 + block["listType"] = lowered
  1957 + return
  1958 + # 无法识别,默认使用 bullet
  1959 + logger.warning(f"检测到非法 listType: {list_type},已修复为 bullet")
  1960 + block["listType"] = "bullet"
  1961 +
1851 def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]: 1962 def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]:
1852 """确保list blockitems为[[block, block], ...]结构""" 1963 """确保list blockitems为[[block, block], ...]结构"""
1853 if not isinstance(items, list): 1964 if not isinstance(items, list):