Showing
5 changed files
with
283 additions
and
39 deletions
| @@ -13,7 +13,8 @@ from ..prompts import SYSTEM_PROMPT_REPORT_STRUCTURE | @@ -13,7 +13,8 @@ from ..prompts import SYSTEM_PROMPT_REPORT_STRUCTURE | ||
| 13 | from ..utils.text_processing import ( | 13 | from ..utils.text_processing import ( |
| 14 | remove_reasoning_from_output, | 14 | remove_reasoning_from_output, |
| 15 | clean_json_tags, | 15 | clean_json_tags, |
| 16 | - extract_clean_response | 16 | + extract_clean_response, |
| 17 | + fix_incomplete_json | ||
| 17 | ) | 18 | ) |
| 18 | 19 | ||
| 19 | 20 | ||
| @@ -77,48 +78,91 @@ class ReportStructureNode(StateMutationNode): | @@ -77,48 +78,91 @@ class ReportStructureNode(StateMutationNode): | ||
| 77 | cleaned_output = remove_reasoning_from_output(output) | 78 | cleaned_output = remove_reasoning_from_output(output) |
| 78 | cleaned_output = clean_json_tags(cleaned_output) | 79 | cleaned_output = clean_json_tags(cleaned_output) |
| 79 | 80 | ||
| 81 | + # 记录清理后的输出用于调试 | ||
| 82 | + self.log_info(f"清理后的输出: {cleaned_output[:200]}...") | ||
| 83 | + | ||
| 80 | # 解析JSON | 84 | # 解析JSON |
| 81 | try: | 85 | try: |
| 82 | report_structure = json.loads(cleaned_output) | 86 | report_structure = json.loads(cleaned_output) |
| 83 | - except JSONDecodeError: | 87 | + self.log_info("JSON解析成功") |
| 88 | + except JSONDecodeError as e: | ||
| 89 | + self.log_info(f"JSON解析失败: {str(e)}") | ||
| 84 | # 使用更强大的提取方法 | 90 | # 使用更强大的提取方法 |
| 85 | report_structure = extract_clean_response(cleaned_output) | 91 | report_structure = extract_clean_response(cleaned_output) |
| 86 | if "error" in report_structure: | 92 | if "error" in report_structure: |
| 87 | - raise ValueError("JSON解析失败") | 93 | + self.log_error("JSON解析失败,尝试修复...") |
| 94 | + # 尝试修复JSON | ||
| 95 | + fixed_json = fix_incomplete_json(cleaned_output) | ||
| 96 | + if fixed_json: | ||
| 97 | + try: | ||
| 98 | + report_structure = json.loads(fixed_json) | ||
| 99 | + self.log_info("JSON修复成功") | ||
| 100 | + except JSONDecodeError: | ||
| 101 | + self.log_error("JSON修复失败") | ||
| 102 | + # 返回默认结构 | ||
| 103 | + return self._generate_default_structure() | ||
| 104 | + else: | ||
| 105 | + self.log_error("无法修复JSON,使用默认结构") | ||
| 106 | + return self._generate_default_structure() | ||
| 88 | 107 | ||
| 89 | # 验证结构 | 108 | # 验证结构 |
| 90 | if not isinstance(report_structure, list): | 109 | if not isinstance(report_structure, list): |
| 91 | - raise ValueError("报告结构应该是一个列表") | 110 | + self.log_info("报告结构不是列表,尝试转换...") |
| 111 | + if isinstance(report_structure, dict): | ||
| 112 | + # 如果是单个对象,包装成列表 | ||
| 113 | + report_structure = [report_structure] | ||
| 114 | + else: | ||
| 115 | + self.log_error("报告结构格式无效,使用默认结构") | ||
| 116 | + return self._generate_default_structure() | ||
| 92 | 117 | ||
| 93 | # 验证每个段落 | 118 | # 验证每个段落 |
| 94 | validated_structure = [] | 119 | validated_structure = [] |
| 95 | for i, paragraph in enumerate(report_structure): | 120 | for i, paragraph in enumerate(report_structure): |
| 96 | if not isinstance(paragraph, dict): | 121 | if not isinstance(paragraph, dict): |
| 122 | + self.log_warning(f"段落 {i+1} 不是字典格式,跳过") | ||
| 97 | continue | 123 | continue |
| 98 | 124 | ||
| 99 | title = paragraph.get("title", f"段落 {i+1}") | 125 | title = paragraph.get("title", f"段落 {i+1}") |
| 100 | content = paragraph.get("content", "") | 126 | content = paragraph.get("content", "") |
| 101 | 127 | ||
| 128 | + if not title or not content: | ||
| 129 | + self.log_warning(f"段落 {i+1} 缺少标题或内容,跳过") | ||
| 130 | + continue | ||
| 131 | + | ||
| 102 | validated_structure.append({ | 132 | validated_structure.append({ |
| 103 | "title": title, | 133 | "title": title, |
| 104 | "content": content | 134 | "content": content |
| 105 | }) | 135 | }) |
| 106 | 136 | ||
| 137 | + if not validated_structure: | ||
| 138 | + self.log_warning("没有有效的段落结构,使用默认结构") | ||
| 139 | + return self._generate_default_structure() | ||
| 140 | + | ||
| 141 | + self.log_info(f"成功验证 {len(validated_structure)} 个段落结构") | ||
| 107 | return validated_structure | 142 | return validated_structure |
| 108 | 143 | ||
| 109 | except Exception as e: | 144 | except Exception as e: |
| 110 | self.log_error(f"处理输出失败: {str(e)}") | 145 | self.log_error(f"处理输出失败: {str(e)}") |
| 111 | - # 返回默认结构 | ||
| 112 | - return [ | ||
| 113 | - { | ||
| 114 | - "title": "概述", | ||
| 115 | - "content": f"对'{self.query}'的总体概述和背景介绍" | ||
| 116 | - }, | ||
| 117 | - { | ||
| 118 | - "title": "详细分析", | ||
| 119 | - "content": f"深入分析'{self.query}'的相关内容" | ||
| 120 | - } | ||
| 121 | - ] | 146 | + return self._generate_default_structure() |
| 147 | + | ||
| 148 | + def _generate_default_structure(self) -> List[Dict[str, str]]: | ||
| 149 | + """ | ||
| 150 | + 生成默认的报告结构 | ||
| 151 | + | ||
| 152 | + Returns: | ||
| 153 | + 默认的报告结构列表 | ||
| 154 | + """ | ||
| 155 | + self.log_info("生成默认报告结构") | ||
| 156 | + return [ | ||
| 157 | + { | ||
| 158 | + "title": "研究概述", | ||
| 159 | + "content": "对查询主题进行总体概述和分析" | ||
| 160 | + }, | ||
| 161 | + { | ||
| 162 | + "title": "深度分析", | ||
| 163 | + "content": "深入分析查询主题的各个方面" | ||
| 164 | + } | ||
| 165 | + ] | ||
| 122 | 166 | ||
| 123 | def mutate_state(self, input_data: Any = None, state: State = None, **kwargs) -> State: | 167 | def mutate_state(self, input_data: Any = None, state: State = None, **kwargs) -> State: |
| 124 | """ | 168 | """ |
| @@ -12,7 +12,8 @@ from ..prompts import SYSTEM_PROMPT_FIRST_SEARCH, SYSTEM_PROMPT_REFLECTION | @@ -12,7 +12,8 @@ from ..prompts import SYSTEM_PROMPT_FIRST_SEARCH, SYSTEM_PROMPT_REFLECTION | ||
| 12 | from ..utils.text_processing import ( | 12 | from ..utils.text_processing import ( |
| 13 | remove_reasoning_from_output, | 13 | remove_reasoning_from_output, |
| 14 | clean_json_tags, | 14 | clean_json_tags, |
| 15 | - extract_clean_response | 15 | + extract_clean_response, |
| 16 | + fix_incomplete_json | ||
| 16 | ) | 17 | ) |
| 17 | 18 | ||
| 18 | 19 | ||
| @@ -91,21 +92,40 @@ class FirstSearchNode(BaseNode): | @@ -91,21 +92,40 @@ class FirstSearchNode(BaseNode): | ||
| 91 | cleaned_output = remove_reasoning_from_output(output) | 92 | cleaned_output = remove_reasoning_from_output(output) |
| 92 | cleaned_output = clean_json_tags(cleaned_output) | 93 | cleaned_output = clean_json_tags(cleaned_output) |
| 93 | 94 | ||
| 95 | + # 记录清理后的输出用于调试 | ||
| 96 | + self.log_info(f"清理后的输出: {cleaned_output[:200]}...") | ||
| 97 | + | ||
| 94 | # 解析JSON | 98 | # 解析JSON |
| 95 | try: | 99 | try: |
| 96 | result = json.loads(cleaned_output) | 100 | result = json.loads(cleaned_output) |
| 97 | - except JSONDecodeError: | 101 | + self.log_info("JSON解析成功") |
| 102 | + except JSONDecodeError as e: | ||
| 103 | + self.log_info(f"JSON解析失败: {str(e)}") | ||
| 98 | # 使用更强大的提取方法 | 104 | # 使用更强大的提取方法 |
| 99 | result = extract_clean_response(cleaned_output) | 105 | result = extract_clean_response(cleaned_output) |
| 100 | if "error" in result: | 106 | if "error" in result: |
| 101 | - raise ValueError("JSON解析失败") | 107 | + self.log_error("JSON解析失败,尝试修复...") |
| 108 | + # 尝试修复JSON | ||
| 109 | + fixed_json = fix_incomplete_json(cleaned_output) | ||
| 110 | + if fixed_json: | ||
| 111 | + try: | ||
| 112 | + result = json.loads(fixed_json) | ||
| 113 | + self.log_info("JSON修复成功") | ||
| 114 | + except JSONDecodeError: | ||
| 115 | + self.log_error("JSON修复失败") | ||
| 116 | + # 返回默认查询 | ||
| 117 | + return self._get_default_search_query() | ||
| 118 | + else: | ||
| 119 | + self.log_error("无法修复JSON,使用默认查询") | ||
| 120 | + return self._get_default_search_query() | ||
| 102 | 121 | ||
| 103 | # 验证和清理结果 | 122 | # 验证和清理结果 |
| 104 | search_query = result.get("search_query", "") | 123 | search_query = result.get("search_query", "") |
| 105 | reasoning = result.get("reasoning", "") | 124 | reasoning = result.get("reasoning", "") |
| 106 | 125 | ||
| 107 | if not search_query: | 126 | if not search_query: |
| 108 | - raise ValueError("未找到搜索查询") | 127 | + self.log_warning("未找到搜索查询,使用默认查询") |
| 128 | + return self._get_default_search_query() | ||
| 109 | 129 | ||
| 110 | return { | 130 | return { |
| 111 | "search_query": search_query, | 131 | "search_query": search_query, |
| @@ -115,10 +135,19 @@ class FirstSearchNode(BaseNode): | @@ -115,10 +135,19 @@ class FirstSearchNode(BaseNode): | ||
| 115 | except Exception as e: | 135 | except Exception as e: |
| 116 | self.log_error(f"处理输出失败: {str(e)}") | 136 | self.log_error(f"处理输出失败: {str(e)}") |
| 117 | # 返回默认查询 | 137 | # 返回默认查询 |
| 118 | - return { | ||
| 119 | - "search_query": "相关主题研究", | ||
| 120 | - "reasoning": "由于解析失败,使用默认搜索查询" | ||
| 121 | - } | 138 | + return self._get_default_search_query() |
| 139 | + | ||
| 140 | + def _get_default_search_query(self) -> Dict[str, str]: | ||
| 141 | + """ | ||
| 142 | + 获取默认搜索查询 | ||
| 143 | + | ||
| 144 | + Returns: | ||
| 145 | + 默认的搜索查询字典 | ||
| 146 | + """ | ||
| 147 | + return { | ||
| 148 | + "search_query": "相关主题研究", | ||
| 149 | + "reasoning": "由于解析失败,使用默认搜索查询" | ||
| 150 | + } | ||
| 122 | 151 | ||
| 123 | 152 | ||
| 124 | class ReflectionNode(BaseNode): | 153 | class ReflectionNode(BaseNode): |
| @@ -198,21 +227,40 @@ class ReflectionNode(BaseNode): | @@ -198,21 +227,40 @@ class ReflectionNode(BaseNode): | ||
| 198 | cleaned_output = remove_reasoning_from_output(output) | 227 | cleaned_output = remove_reasoning_from_output(output) |
| 199 | cleaned_output = clean_json_tags(cleaned_output) | 228 | cleaned_output = clean_json_tags(cleaned_output) |
| 200 | 229 | ||
| 230 | + # 记录清理后的输出用于调试 | ||
| 231 | + self.log_info(f"清理后的输出: {cleaned_output[:200]}...") | ||
| 232 | + | ||
| 201 | # 解析JSON | 233 | # 解析JSON |
| 202 | try: | 234 | try: |
| 203 | result = json.loads(cleaned_output) | 235 | result = json.loads(cleaned_output) |
| 204 | - except JSONDecodeError: | 236 | + self.log_info("JSON解析成功") |
| 237 | + except JSONDecodeError as e: | ||
| 238 | + self.log_info(f"JSON解析失败: {str(e)}") | ||
| 205 | # 使用更强大的提取方法 | 239 | # 使用更强大的提取方法 |
| 206 | result = extract_clean_response(cleaned_output) | 240 | result = extract_clean_response(cleaned_output) |
| 207 | if "error" in result: | 241 | if "error" in result: |
| 208 | - raise ValueError("JSON解析失败") | 242 | + self.log_error("JSON解析失败,尝试修复...") |
| 243 | + # 尝试修复JSON | ||
| 244 | + fixed_json = fix_incomplete_json(cleaned_output) | ||
| 245 | + if fixed_json: | ||
| 246 | + try: | ||
| 247 | + result = json.loads(fixed_json) | ||
| 248 | + self.log_info("JSON修复成功") | ||
| 249 | + except JSONDecodeError: | ||
| 250 | + self.log_error("JSON修复失败") | ||
| 251 | + # 返回默认查询 | ||
| 252 | + return self._get_default_reflection_query() | ||
| 253 | + else: | ||
| 254 | + self.log_error("无法修复JSON,使用默认查询") | ||
| 255 | + return self._get_default_reflection_query() | ||
| 209 | 256 | ||
| 210 | # 验证和清理结果 | 257 | # 验证和清理结果 |
| 211 | search_query = result.get("search_query", "") | 258 | search_query = result.get("search_query", "") |
| 212 | reasoning = result.get("reasoning", "") | 259 | reasoning = result.get("reasoning", "") |
| 213 | 260 | ||
| 214 | if not search_query: | 261 | if not search_query: |
| 215 | - raise ValueError("未找到搜索查询") | 262 | + self.log_warning("未找到搜索查询,使用默认查询") |
| 263 | + return self._get_default_reflection_query() | ||
| 216 | 264 | ||
| 217 | return { | 265 | return { |
| 218 | "search_query": search_query, | 266 | "search_query": search_query, |
| @@ -222,7 +270,16 @@ class ReflectionNode(BaseNode): | @@ -222,7 +270,16 @@ class ReflectionNode(BaseNode): | ||
| 222 | except Exception as e: | 270 | except Exception as e: |
| 223 | self.log_error(f"处理输出失败: {str(e)}") | 271 | self.log_error(f"处理输出失败: {str(e)}") |
| 224 | # 返回默认查询 | 272 | # 返回默认查询 |
| 225 | - return { | ||
| 226 | - "search_query": "深度研究补充信息", | ||
| 227 | - "reasoning": "由于解析失败,使用默认反思搜索查询" | ||
| 228 | - } | 273 | + return self._get_default_reflection_query() |
| 274 | + | ||
| 275 | + def _get_default_reflection_query(self) -> Dict[str, str]: | ||
| 276 | + """ | ||
| 277 | + 获取默认反思搜索查询 | ||
| 278 | + | ||
| 279 | + Returns: | ||
| 280 | + 默认的反思搜索查询字典 | ||
| 281 | + """ | ||
| 282 | + return { | ||
| 283 | + "search_query": "深度研究补充信息", | ||
| 284 | + "reasoning": "由于解析失败,使用默认反思搜索查询" | ||
| 285 | + } |
| @@ -14,6 +14,7 @@ from ..utils.text_processing import ( | @@ -14,6 +14,7 @@ from ..utils.text_processing import ( | ||
| 14 | remove_reasoning_from_output, | 14 | remove_reasoning_from_output, |
| 15 | clean_json_tags, | 15 | clean_json_tags, |
| 16 | extract_clean_response, | 16 | extract_clean_response, |
| 17 | + fix_incomplete_json, | ||
| 17 | format_search_results_for_prompt | 18 | format_search_results_for_prompt |
| 18 | ) | 19 | ) |
| 19 | 20 | ||
| @@ -82,25 +83,42 @@ class FirstSummaryNode(StateMutationNode): | @@ -82,25 +83,42 @@ class FirstSummaryNode(StateMutationNode): | ||
| 82 | 83 | ||
| 83 | def process_output(self, output: str) -> str: | 84 | def process_output(self, output: str) -> str: |
| 84 | """ | 85 | """ |
| 85 | - 处理LLM输出,提取段落总结 | 86 | + 处理LLM输出,提取段落内容 |
| 86 | 87 | ||
| 87 | Args: | 88 | Args: |
| 88 | output: LLM原始输出 | 89 | output: LLM原始输出 |
| 89 | 90 | ||
| 90 | Returns: | 91 | Returns: |
| 91 | - 段落总结内容 | 92 | + 段落内容 |
| 92 | """ | 93 | """ |
| 93 | try: | 94 | try: |
| 94 | # 清理响应文本 | 95 | # 清理响应文本 |
| 95 | cleaned_output = remove_reasoning_from_output(output) | 96 | cleaned_output = remove_reasoning_from_output(output) |
| 96 | cleaned_output = clean_json_tags(cleaned_output) | 97 | cleaned_output = clean_json_tags(cleaned_output) |
| 97 | 98 | ||
| 99 | + # 记录清理后的输出用于调试 | ||
| 100 | + self.log_info(f"清理后的输出: {cleaned_output[:200]}...") | ||
| 101 | + | ||
| 98 | # 解析JSON | 102 | # 解析JSON |
| 99 | try: | 103 | try: |
| 100 | result = json.loads(cleaned_output) | 104 | result = json.loads(cleaned_output) |
| 101 | - except JSONDecodeError: | ||
| 102 | - # 如果不是JSON格式,直接返回清理后的文本 | ||
| 103 | - return cleaned_output | 105 | + self.log_info("JSON解析成功") |
| 106 | + except JSONDecodeError as e: | ||
| 107 | + self.log_info(f"JSON解析失败: {str(e)}") | ||
| 108 | + # 尝试修复JSON | ||
| 109 | + fixed_json = fix_incomplete_json(cleaned_output) | ||
| 110 | + if fixed_json: | ||
| 111 | + try: | ||
| 112 | + result = json.loads(fixed_json) | ||
| 113 | + self.log_info("JSON修复成功") | ||
| 114 | + except JSONDecodeError: | ||
| 115 | + self.log_info("JSON修复失败,直接使用清理后的文本") | ||
| 116 | + # 如果不是JSON格式,直接返回清理后的文本 | ||
| 117 | + return cleaned_output | ||
| 118 | + else: | ||
| 119 | + self.log_info("无法修复JSON,直接使用清理后的文本") | ||
| 120 | + # 如果不是JSON格式,直接返回清理后的文本 | ||
| 121 | + return cleaned_output | ||
| 104 | 122 | ||
| 105 | # 提取段落内容 | 123 | # 提取段落内容 |
| 106 | if isinstance(result, dict): | 124 | if isinstance(result, dict): |
| @@ -224,12 +242,29 @@ class ReflectionSummaryNode(StateMutationNode): | @@ -224,12 +242,29 @@ class ReflectionSummaryNode(StateMutationNode): | ||
| 224 | cleaned_output = remove_reasoning_from_output(output) | 242 | cleaned_output = remove_reasoning_from_output(output) |
| 225 | cleaned_output = clean_json_tags(cleaned_output) | 243 | cleaned_output = clean_json_tags(cleaned_output) |
| 226 | 244 | ||
| 245 | + # 记录清理后的输出用于调试 | ||
| 246 | + self.log_info(f"清理后的输出: {cleaned_output[:200]}...") | ||
| 247 | + | ||
| 227 | # 解析JSON | 248 | # 解析JSON |
| 228 | try: | 249 | try: |
| 229 | result = json.loads(cleaned_output) | 250 | result = json.loads(cleaned_output) |
| 230 | - except JSONDecodeError: | ||
| 231 | - # 如果不是JSON格式,直接返回清理后的文本 | ||
| 232 | - return cleaned_output | 251 | + self.log_info("JSON解析成功") |
| 252 | + except JSONDecodeError as e: | ||
| 253 | + self.log_info(f"JSON解析失败: {str(e)}") | ||
| 254 | + # 尝试修复JSON | ||
| 255 | + fixed_json = fix_incomplete_json(cleaned_output) | ||
| 256 | + if fixed_json: | ||
| 257 | + try: | ||
| 258 | + result = json.loads(fixed_json) | ||
| 259 | + self.log_info("JSON修复成功") | ||
| 260 | + except JSONDecodeError: | ||
| 261 | + self.log_info("JSON修复失败,直接使用清理后的文本") | ||
| 262 | + # 如果不是JSON格式,直接返回清理后的文本 | ||
| 263 | + return cleaned_output | ||
| 264 | + else: | ||
| 265 | + self.log_info("无法修复JSON,直接使用清理后的文本") | ||
| 266 | + # 如果不是JSON格式,直接返回清理后的文本 | ||
| 267 | + return cleaned_output | ||
| 233 | 268 | ||
| 234 | # 提取更新后的段落内容 | 269 | # 提取更新后的段落内容 |
| 235 | if isinstance(result, dict): | 270 | if isinstance(result, dict): |
| @@ -55,6 +55,20 @@ def remove_reasoning_from_output(text: str) -> str: | @@ -55,6 +55,20 @@ def remove_reasoning_from_output(text: str) -> str: | ||
| 55 | Returns: | 55 | Returns: |
| 56 | 清理后的文本 | 56 | 清理后的文本 |
| 57 | """ | 57 | """ |
| 58 | + # 查找JSON开始位置 | ||
| 59 | + json_start = -1 | ||
| 60 | + | ||
| 61 | + # 尝试找到第一个 { 或 [ | ||
| 62 | + for i, char in enumerate(text): | ||
| 63 | + if char in '{[': | ||
| 64 | + json_start = i | ||
| 65 | + break | ||
| 66 | + | ||
| 67 | + if json_start != -1: | ||
| 68 | + # 从JSON开始位置截取 | ||
| 69 | + return text[json_start:].strip() | ||
| 70 | + | ||
| 71 | + # 如果没有找到JSON标记,尝试其他方法 | ||
| 58 | # 移除常见的推理标识 | 72 | # 移除常见的推理标识 |
| 59 | patterns = [ | 73 | patterns = [ |
| 60 | r'(?:reasoning|推理|思考|分析)[::]\s*.*?(?=\{|\[)', # 移除推理部分 | 74 | r'(?:reasoning|推理|思考|分析)[::]\s*.*?(?=\{|\[)', # 移除推理部分 |
| @@ -88,6 +102,14 @@ def extract_clean_response(text: str) -> Dict[str, Any]: | @@ -88,6 +102,14 @@ def extract_clean_response(text: str) -> Dict[str, Any]: | ||
| 88 | except JSONDecodeError: | 102 | except JSONDecodeError: |
| 89 | pass | 103 | pass |
| 90 | 104 | ||
| 105 | + # 尝试修复不完整的JSON | ||
| 106 | + fixed_text = fix_incomplete_json(cleaned_text) | ||
| 107 | + if fixed_text: | ||
| 108 | + try: | ||
| 109 | + return json.loads(fixed_text) | ||
| 110 | + except JSONDecodeError: | ||
| 111 | + pass | ||
| 112 | + | ||
| 91 | # 尝试查找JSON对象 | 113 | # 尝试查找JSON对象 |
| 92 | json_pattern = r'\{.*\}' | 114 | json_pattern = r'\{.*\}' |
| 93 | match = re.search(json_pattern, cleaned_text, re.DOTALL) | 115 | match = re.search(json_pattern, cleaned_text, re.DOTALL) |
| @@ -111,6 +133,92 @@ def extract_clean_response(text: str) -> Dict[str, Any]: | @@ -111,6 +133,92 @@ def extract_clean_response(text: str) -> Dict[str, Any]: | ||
| 111 | return {"error": "JSON解析失败", "raw_text": cleaned_text} | 133 | return {"error": "JSON解析失败", "raw_text": cleaned_text} |
| 112 | 134 | ||
| 113 | 135 | ||
| 136 | +def fix_incomplete_json(text: str) -> str: | ||
| 137 | + """ | ||
| 138 | + 修复不完整的JSON响应 | ||
| 139 | + | ||
| 140 | + Args: | ||
| 141 | + text: 原始文本 | ||
| 142 | + | ||
| 143 | + Returns: | ||
| 144 | + 修复后的JSON文本,如果无法修复则返回空字符串 | ||
| 145 | + """ | ||
| 146 | + # 移除多余的逗号和空白 | ||
| 147 | + text = re.sub(r',\s*}', '}', text) | ||
| 148 | + text = re.sub(r',\s*]', ']', text) | ||
| 149 | + | ||
| 150 | + # 检查是否已经是有效的JSON | ||
| 151 | + try: | ||
| 152 | + json.loads(text) | ||
| 153 | + return text | ||
| 154 | + except JSONDecodeError: | ||
| 155 | + pass | ||
| 156 | + | ||
| 157 | + # 检查是否缺少开头的数组符号 | ||
| 158 | + if text.strip().startswith('{') and not text.strip().startswith('['): | ||
| 159 | + # 如果以对象开始,尝试包装成数组 | ||
| 160 | + if text.count('{') > 1: | ||
| 161 | + # 多个对象,包装成数组 | ||
| 162 | + text = '[' + text + ']' | ||
| 163 | + else: | ||
| 164 | + # 单个对象,包装成数组 | ||
| 165 | + text = '[' + text + ']' | ||
| 166 | + | ||
| 167 | + # 检查是否缺少结尾的数组符号 | ||
| 168 | + if text.strip().endswith('}') and not text.strip().endswith(']'): | ||
| 169 | + # 如果以对象结束,尝试包装成数组 | ||
| 170 | + if text.count('}') > 1: | ||
| 171 | + # 多个对象,包装成数组 | ||
| 172 | + text = '[' + text + ']' | ||
| 173 | + else: | ||
| 174 | + # 单个对象,包装成数组 | ||
| 175 | + text = '[' + text + ']' | ||
| 176 | + | ||
| 177 | + # 检查括号是否匹配 | ||
| 178 | + open_braces = text.count('{') | ||
| 179 | + close_braces = text.count('}') | ||
| 180 | + open_brackets = text.count('[') | ||
| 181 | + close_brackets = text.count(']') | ||
| 182 | + | ||
| 183 | + # 修复不匹配的括号 | ||
| 184 | + if open_braces > close_braces: | ||
| 185 | + text += '}' * (open_braces - close_braces) | ||
| 186 | + if open_brackets > close_brackets: | ||
| 187 | + text += ']' * (open_brackets - close_brackets) | ||
| 188 | + | ||
| 189 | + # 验证修复后的JSON是否有效 | ||
| 190 | + try: | ||
| 191 | + json.loads(text) | ||
| 192 | + return text | ||
| 193 | + except JSONDecodeError: | ||
| 194 | + # 如果仍然无效,尝试更激进的修复 | ||
| 195 | + return fix_aggressive_json(text) | ||
| 196 | + | ||
| 197 | + | ||
| 198 | +def fix_aggressive_json(text: str) -> str: | ||
| 199 | + """ | ||
| 200 | + 更激进的JSON修复方法 | ||
| 201 | + | ||
| 202 | + Args: | ||
| 203 | + text: 原始文本 | ||
| 204 | + | ||
| 205 | + Returns: | ||
| 206 | + 修复后的JSON文本 | ||
| 207 | + """ | ||
| 208 | + # 查找所有可能的JSON对象 | ||
| 209 | + objects = re.findall(r'\{[^{}]*\}', text) | ||
| 210 | + | ||
| 211 | + if len(objects) >= 2: | ||
| 212 | + # 如果有多个对象,包装成数组 | ||
| 213 | + return '[' + ','.join(objects) + ']' | ||
| 214 | + elif len(objects) == 1: | ||
| 215 | + # 如果只有一个对象,包装成数组 | ||
| 216 | + return '[' + objects[0] + ']' | ||
| 217 | + else: | ||
| 218 | + # 如果没有找到对象,返回空数组 | ||
| 219 | + return '[]' | ||
| 220 | + | ||
| 221 | + | ||
| 114 | def update_state_with_search_results(search_results: List[Dict[str, Any]], | 222 | def update_state_with_search_results(search_results: List[Dict[str, Any]], |
| 115 | paragraph_index: int, state: Any) -> Any: | 223 | paragraph_index: int, state: Any) -> Any: |
| 116 | """ | 224 | """ |
| @@ -13,7 +13,7 @@ import json | @@ -13,7 +13,7 @@ import json | ||
| 13 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '.')) | 13 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '.')) |
| 14 | 14 | ||
| 15 | from src import DeepSearchAgent, Config | 15 | from src import DeepSearchAgent, Config |
| 16 | -from config import DEEPSEEK_API_KEY, DEEPSEEK_API_KEY_2, TAVILY_API_KEY | 16 | +from config import DEEPSEEK_API_KEY, TAVILY_API_KEY |
| 17 | 17 | ||
| 18 | 18 | ||
| 19 | def main(): | 19 | def main(): |
-
Please register or login to post a comment