Fixed retry logic for AttributeError, TypeError, KeyError, IndexError, ValueErro…

…r, and json.JSONDecodeError

Fixed retry logic for AttributeError, TypeError, KeyError, IndexError, ValueErro…
…r, and json.JSONDecodeError
马一丁
Commit 927c41c7df23fe675409b5f3c170aa137cbdaa8d 927c41c7 1 parent 9696eefd
Showing 2 changed files with 149 additions and 4 deletions
ReportEngine/agent.py
ReportEngine/nodes/chapter_generation_node.py
--- a/ReportEngine/agent.py
View file @927c41c
+++ b/ReportEngine/agent.py
View file @927c41c
@@ -719,6 +719,40 @@ class ReportAgent:
                             stream_callback=chunk_callback
                         )
                         break
+                     except (AttributeError, TypeError, KeyError, IndexError, ValueError, json.JSONDecodeError) as structure_error:
+                         # 捕获因 JSON 结构异常导致的运行时错误，包装为可重试异常
+                         # 包括：
+                         # - AttributeError: 如 list.get() 调用失败
+                         # - TypeError: 类型不匹配
+                         # - KeyError: 字典键缺失
+                         # - IndexError: 列表索引越界
+                         # - ValueError: 值错误（如 LLM 返回空内容、缺少必要字段）
+                         # - json.JSONDecodeError: JSON 解析失败（未被内部捕获的情况）
+                         error_type = type(structure_error).__name__
+                         logger.warning(
+                             "章节 {title} 生成过程中发生 {error_type}（第 {attempt}/{total} 次尝试），将尝试重新生成: {error}",
+                             title=section.title,
+                             error_type=error_type,
+                             attempt=attempt,
+                             total=chapter_max_attempts,
+                             error=structure_error,
+                         )
+                         emit('chapter_status', {
+                             'chapterId': section.chapter_id,
+                             'title': section.title,
+                             'status': 'retrying' if attempt < chapter_max_attempts else 'error',
+                             'attempt': attempt,
+                             'error': str(structure_error),
+                             'reason': 'structure_error',
+                             'error_type': error_type
+                         })
+                         if attempt >= chapter_max_attempts:
+                             # 达到最大重试次数，包装为 ChapterJsonParseError 抛出
+                             raise ChapterJsonParseError(
+                                 f"{section.title} 章节因 {error_type} 在 {chapter_max_attempts} 次尝试后仍无法生成: {structure_error}"
+                             ) from structure_error
+                         attempt += 1
+                         continue
                     except (ChapterJsonParseError, ChapterContentError, ChapterValidationError) as structured_error:
                         if isinstance(structured_error, ChapterContentError):
                             error_kind = "content_sparse"
--- a/ReportEngine/nodes/chapter_generation_node.py
View file @927c41c
+++ b/ReportEngine/nodes/chapter_generation_node.py
View file @927c41c
@@ -671,7 +671,7 @@ class ChapterGenerationNode(BaseNode):
             cleaned = cleaned[:-3]
         cleaned = cleaned.strip()
         if not cleaned:
-             raise ValueError("LLM返回空内容")
+             raise ChapterJsonParseError("LLM返回空内容", raw_text=raw_text)
 
         candidate_payloads = [cleaned]
         repaired = self._repair_llm_json(cleaned)
@@ -714,7 +714,7 @@ class ChapterGenerationNode(BaseNode):
                         return item["chapter"]
                     if all(key in item for key in ("chapterId", "title", "blocks")):
                         return item
-         raise ValueError("章节JSON缺少chapter字段")
+         raise ChapterJsonParseError("章节JSON缺少chapter字段或结构不完整", raw_text=cleaned)
 
     def _persist_error_payload(
         self,
@@ -996,13 +996,41 @@ class ChapterGenerationNode(BaseNode):
             """递归检查并修复嵌套结构，保证每个block合法"""
             if not isinstance(blocks, list):
                 return
-             for block in blocks:
+             # 先过滤掉非字典类型的异常 block
+             valid_indices = []
+             for idx, block in enumerate(blocks):
+                 if not isinstance(block, dict):
+                     # 尝试将字符串转换为 paragraph
+                     if isinstance(block, str) and block.strip():
+                         blocks[idx] = self._as_paragraph_block(block)
+                         valid_indices.append(idx)
+                         logger.warning(f"walk: 将字符串 block 转换为 paragraph")
+                     elif isinstance(block, list):
+                         # 尝试提取列表中的有效字典
+                         for item in block:
+                             if isinstance(item, dict):
+                                 self._ensure_block_type(item)
+                                 blocks[idx] = item
+                                 valid_indices.append(idx)
+                                 logger.warning(f"walk: 从列表中提取字典 block")
+                                 break
+                         else:
+                             logger.warning(f"walk: 跳过无效的列表 block: {block}")
+                     else:
+                         logger.warning(f"walk: 跳过无效的 block（类型: {type(block).__name__}）")
+                 else:
+                     valid_indices.append(idx)
+             
+             for idx in valid_indices:
+                 block = blocks[idx]
                 if not isinstance(block, dict):
                     continue
                 self._ensure_block_type(block)
                 self._sanitize_block_content(block)
                 block_type = block.get("type")
                 if block_type == "list":
+                     # 自动修复 listType：确保是合法值
+                     self._normalize_list_type(block)
                     items = block.get("items")
                     normalized = self._normalize_list_items(items)
                     if normalized:
@@ -1013,8 +1041,12 @@ class ChapterGenerationNode(BaseNode):
                     walk(block.get("blocks"))
                 elif block_type == "table":
                     for row in block.get("rows", []):
+                         if not isinstance(row, dict):
+                             continue
                         cells = row.get("cells") or []
                         for cell in cells:
+                             if not isinstance(cell, dict):
+                                 continue
                             walk(cell.get("blocks"))
                 elif block_type == "widget":
                     self._normalize_widget_block(block)
@@ -1027,7 +1059,9 @@ class ChapterGenerationNode(BaseNode):
 
         blocks = chapter.get("blocks")
         if isinstance(blocks, list):
-             chapter["blocks"] = self._merge_fragment_sequences(blocks)
+             # 在合并前先过滤掉所有非字典类型的 block
+             filtered_blocks = [b for b in blocks if isinstance(b, dict)]
+             chapter["blocks"] = self._merge_fragment_sequences(filtered_blocks)
 
     def _ensure_content_density(self, chapter: Dict[str, Any]):
         """
@@ -1686,6 +1720,25 @@ class ChapterGenerationNode(BaseNode):
             fragment_buffer = []
 
         for block in blocks:
+             # 类型检查：跳过非字典类型的异常 block，避免 AttributeError
+             if not isinstance(block, dict):
+                 # 尝试将非字典类型转换为 paragraph
+                 if isinstance(block, str) and block.strip():
+                     converted = self._as_paragraph_block(block)
+                     logger.warning(f"检测到非字典类型的 block（字符串），已转换为 paragraph: {block[:50]}...")
+                     merged.append(converted)
+                 elif isinstance(block, list):
+                     # 列表类型的 block 可能是 LLM 输出错误，尝试提取有效内容
+                     logger.warning(f"检测到列表类型的 block，尝试提取有效内容: {block}")
+                     for item in block:
+                         if isinstance(item, dict):
+                             self._ensure_block_type(item)
+                             merged.append(self._merge_nested_fragments(item))
+                         elif isinstance(item, str) and item.strip():
+                             merged.append(self._as_paragraph_block(item))
+                 else:
+                     logger.warning(f"跳过无效的 block（类型: {type(block).__name__}）: {block}")
+                 continue
             if self._is_paragraph_fragment(block):
                 fragment_buffer.append(block)
                 continue
@@ -1697,6 +1750,24 @@ class ChapterGenerationNode(BaseNode):
 
     def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]:
         """对嵌套结构（callout/blockquote/engineQuote/list/table）递归处理片段合并"""
+         # 类型检查：确保 block 是字典类型
+         if not isinstance(block, dict):
+             # 尝试将非字典类型转换为 paragraph
+             if isinstance(block, str) and block.strip():
+                 logger.warning(f"_merge_nested_fragments 收到字符串类型，已转换为 paragraph")
+                 return self._as_paragraph_block(block)
+             elif isinstance(block, list):
+                 # 尝试提取列表中的第一个有效字典
+                 for item in block:
+                     if isinstance(item, dict):
+                         self._ensure_block_type(item)
+                         return self._merge_nested_fragments(item)
+                 logger.warning(f"_merge_nested_fragments 收到无效列表，返回空 paragraph")
+                 return self._as_paragraph_block("")
+             else:
+                 logger.warning(f"_merge_nested_fragments 收到无效类型（{type(block).__name__}），返回空 paragraph")
+                 return self._as_paragraph_block("")
+         
         block_type = block.get("type")
         if block_type in {"callout", "blockquote", "engineQuote"}:
             nested = block.get("blocks")
@@ -1711,8 +1782,12 @@ class ChapterGenerationNode(BaseNode):
                         entry[:] = merged_entry
         elif block_type == "table":
             for row in block.get("rows", []):
+                 if not isinstance(row, dict):
+                     continue
                 cells = row.get("cells") or []
                 for cell in cells:
+                     if not isinstance(cell, dict):
+                         continue
                     nested_blocks = cell.get("blocks")
                     if isinstance(nested_blocks, list):
                         cell["blocks"] = self._merge_fragment_sequences(nested_blocks)
@@ -1848,6 +1923,42 @@ class ChapterGenerationNode(BaseNode):
                 return str(value)
         return ""
 
+     # 合法的 listType 值
+     _ALLOWED_LIST_TYPES = {"ordered", "bullet", "task"}
+     # listType 的别名映射
+     _LIST_TYPE_ALIASES = {
+         "unordered": "bullet",
+         "ul": "bullet",
+         "ol": "ordered",
+         "numbered": "ordered",
+         "checkbox": "task",
+         "check": "task",
+         "todo": "task",
+     }
+ 
+     def _normalize_list_type(self, block: Dict[str, Any]):
+         """
+         确保 list block 的 listType 是合法值。
+ 
+         如果 listType 缺失或非法，自动修复为 bullet。
+         """
+         list_type = block.get("listType")
+         if list_type in self._ALLOWED_LIST_TYPES:
+             return
+         # 尝试别名映射
+         if isinstance(list_type, str):
+             lowered = list_type.strip().lower()
+             if lowered in self._LIST_TYPE_ALIASES:
+                 block["listType"] = self._LIST_TYPE_ALIASES[lowered]
+                 logger.warning(f"已将 listType '{list_type}' 映射为 '{block['listType']}'")
+                 return
+             if lowered in self._ALLOWED_LIST_TYPES:
+                 block["listType"] = lowered
+                 return
+         # 无法识别，默认使用 bullet
+         logger.warning(f"检测到非法 listType: {list_type}，已修复为 bullet")
+         block["listType"] = "bullet"
+ 
     def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]:
         """确保list block的items为[[block, block], ...]结构"""
         if not isinstance(items, list):