Showing
1 changed file
with
19 additions
and
14 deletions
| @@ -51,12 +51,12 @@ class RobustJSONParser: | @@ -51,12 +51,12 @@ class RobustJSONParser: | ||
| 51 | 51 | ||
| 52 | # 常见的LLM思考内容模式 | 52 | # 常见的LLM思考内容模式 |
| 53 | _THINKING_PATTERNS = [ | 53 | _THINKING_PATTERNS = [ |
| 54 | - r"<thinking>.*?</thinking>", | ||
| 55 | - r"<thought>.*?</thought>", | ||
| 56 | - r"让我想想.*?(?=\{|\[|$)", | ||
| 57 | - r"首先.*?(?=\{|\[|$)", | ||
| 58 | - r"分析.*?(?=\{|\[|$)", | ||
| 59 | - r"根据.*?(?=\{|\[|$)", | 54 | + r"^\s*<thinking>.*?</thinking>\s*", |
| 55 | + r"^\s*<thought>.*?</thought>\s*", | ||
| 56 | + r"^\s*让我想想.*?(?=\{|\[|$)", | ||
| 57 | + r"^\s*首先.*?(?=\{|\[|$)", | ||
| 58 | + r"^\s*分析.*?(?=\{|\[|$)", | ||
| 59 | + r"^\s*根据.*?(?=\{|\[|$)", | ||
| 60 | ] | 60 | ] |
| 61 | 61 | ||
| 62 | # 冒号等号模式(LLM常见错误) | 62 | # 冒号等号模式(LLM常见错误) |
| @@ -182,16 +182,21 @@ class RobustJSONParser: | @@ -182,16 +182,21 @@ class RobustJSONParser: | ||
| 182 | for pattern in self._THINKING_PATTERNS: | 182 | for pattern in self._THINKING_PATTERNS: |
| 183 | cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL | re.IGNORECASE) | 183 | cleaned = re.sub(pattern, "", cleaned, flags=re.DOTALL | re.IGNORECASE) |
| 184 | 184 | ||
| 185 | - # 移除markdown代码块标记 | ||
| 186 | - if cleaned.startswith("```json"): | ||
| 187 | - cleaned = cleaned[7:] | ||
| 188 | - elif cleaned.startswith("```"): | ||
| 189 | - cleaned = cleaned[3:] | 185 | + # 优先提取任意位置的```json```包裹内容 |
| 186 | + fenced_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", cleaned) | ||
| 187 | + if fenced_match: | ||
| 188 | + cleaned = fenced_match.group(1).strip() | ||
| 189 | + else: | ||
| 190 | + # 如果没有找到完整代码块,再尝试移除前后缀 | ||
| 191 | + if cleaned.startswith("```json"): | ||
| 192 | + cleaned = cleaned[7:] | ||
| 193 | + elif cleaned.startswith("```"): | ||
| 194 | + cleaned = cleaned[3:] | ||
| 190 | 195 | ||
| 191 | - if cleaned.endswith("```"): | ||
| 192 | - cleaned = cleaned[:-3] | 196 | + if cleaned.endswith("```"): |
| 197 | + cleaned = cleaned[:-3] | ||
| 193 | 198 | ||
| 194 | - cleaned = cleaned.strip() | 199 | + cleaned = cleaned.strip() |
| 195 | 200 | ||
| 196 | # 尝试提取第一个完整的JSON对象或数组 | 201 | # 尝试提取第一个完整的JSON对象或数组 |
| 197 | cleaned = self._extract_first_json_structure(cleaned) | 202 | cleaned = self._extract_first_json_structure(cleaned) |
-
Please register or login to post a comment