Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-11-18 00:35:09 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
ad36c03be08c319c8383ac6de052be6f58f5a9aa
ad36c03b
1 parent
bf1e2bfa
Add the Ability to Parse JSON
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
80 additions
and
15 deletions
ReportEngine/utils/json_parser.py
requirements.txt
ReportEngine/utils/json_parser.py
View file @
ad36c03
...
...
@@ -108,18 +108,13 @@ class RobustJSONParser:
if
not
raw_text
or
not
raw_text
.
strip
():
raise
JSONParseError
(
f
"{context_name}返回空内容"
)
# 步骤1: 清理markdown标记和思考内容
cleaned
=
self
.
_clean_response
(
raw_text
)
# 步骤2: 收集候选payload
candidates
=
[
cleaned
]
# 原始文本用于后续日志
original_text
=
raw_text
# 步骤3: 应用本地修复策略
local_repaired
=
self
.
_apply_local_repairs
(
cleaned
)
if
local_repaired
!=
cleaned
:
candidates
.
append
(
local_repaired
)
# 步骤1: 构造候选集,包含不同清理策略
candidates
=
self
.
_build_candidate_payloads
(
raw_text
,
context_name
)
# 步骤
4
: 尝试解析所有候选
# 步骤
2
: 尝试解析所有候选
last_error
:
Optional
[
json
.
JSONDecodeError
]
=
None
for
i
,
candidate
in
enumerate
(
candidates
):
try
:
...
...
@@ -132,7 +127,9 @@ class RobustJSONParser:
last_error
=
exc
logger
.
debug
(
f
"{context_name} 候选{i + 1}解析失败: {exc}"
)
# 步骤5: 使用json_repair库
cleaned
=
candidates
[
0
]
if
candidates
else
original_text
# 步骤3: 使用json_repair库
if
self
.
enable_json_repair
:
repaired
=
self
.
_attempt_json_repair
(
cleaned
,
context_name
)
if
repaired
:
...
...
@@ -146,7 +143,7 @@ class RobustJSONParser:
last_error
=
exc
logger
.
debug
(
f
"{context_name} json_repair修复后仍无法解析: {exc}"
)
# 步骤
6
: 使用LLM修复(如果启用)
# 步骤
4
: 使用LLM修复(如果启用)
if
self
.
enable_llm_repair
and
self
.
llm_repair_fn
:
llm_repaired
=
self
.
_attempt_llm_repair
(
cleaned
,
str
(
last_error
),
context_name
)
if
llm_repaired
:
...
...
@@ -163,8 +160,29 @@ class RobustJSONParser:
# 所有策略都失败了
error_msg
=
f
"{context_name} JSON解析失败: {last_error}"
logger
.
error
(
error_msg
)
logger
.
debug
(
f
"原始文本前500字符: {raw_text[:500]}"
)
raise
JSONParseError
(
error_msg
,
raw_text
=
raw_text
)
from
last_error
logger
.
debug
(
f
"原始文本前500字符: {original_text[:500]}"
)
raise
JSONParseError
(
error_msg
,
raw_text
=
original_text
)
from
last_error
def
_build_candidate_payloads
(
self
,
raw_text
:
str
,
context_name
:
str
)
->
List
[
str
]:
"""
针对原始文本构造多个候选JSON字符串,覆盖不同的清理策略。
返回:
List[str]: 候选JSON文本列表
"""
cleaned
=
self
.
_clean_response
(
raw_text
)
candidates
=
[
cleaned
]
local_repaired
=
self
.
_apply_local_repairs
(
cleaned
)
if
local_repaired
!=
cleaned
:
candidates
.
append
(
local_repaired
)
# 对含有三层列表结构的内容强制拉平一次
flattened
=
self
.
_flatten_nested_arrays
(
local_repaired
)
if
flattened
not
in
candidates
:
candidates
.
append
(
flattened
)
return
candidates
def
_clean_response
(
self
,
raw
:
str
)
->
str
:
"""
...
...
@@ -301,6 +319,12 @@ class RobustJSONParser:
logger
.
warning
(
"检测到对象/数组之间缺少逗号,已自动补齐"
)
mutated
=
True
# 合并多余的方括号(LLM常见把二维列表层级写成三层)
repaired
,
brackets_collapsed
=
self
.
_collapse_redundant_brackets
(
repaired
)
if
brackets_collapsed
:
logger
.
warning
(
"检测到连续的方括号嵌套,已尝试折叠为二维结构"
)
mutated
=
True
# 平衡括号
repaired
,
balanced
=
self
.
_balance_brackets
(
repaired
)
if
balanced
:
...
...
@@ -444,6 +468,46 @@ class RobustJSONParser:
return
""
.
join
(
chars
),
mutated
def
_collapse_redundant_brackets
(
self
,
text
:
str
)
->
Tuple
[
str
,
bool
]:
"""
针对LLM生成的三层或更多层数组(如]]], [[ / [[[)进行折叠,避免表格/列表写出额外维度。
返回:
Tuple[str, bool]: (修复后的文本, 是否有修改)
"""
if
not
text
:
return
text
,
False
mutated
=
False
patterns
=
[
# 典型错误: "]]], [[{...}" -> "]], [{...}"
(
re
.
compile
(
r"
\
]
\
s*
\
]
\
s*
\
]
\
s*,
\
s*
\
[
\
s*
\
["
),
"]],["
),
# 极端情况: 连续三层开头 "[[[" -> "[["
(
re
.
compile
(
r"
\
[
\
s*
\
[
\
s*
\
["
),
"[["
),
# 极端情况: 结尾 "]]]" -> "]]"
(
re
.
compile
(
r"
\
]
\
s*
\
]
\
s*
\
]"
),
"]]"
),
]
repaired
=
text
for
pattern
,
replacement
in
patterns
:
new_text
,
count
=
pattern
.
subn
(
replacement
,
repaired
)
if
count
>
0
:
mutated
=
True
repaired
=
new_text
return
repaired
,
mutated
def
_flatten_nested_arrays
(
self
,
text
:
str
)
->
str
:
"""
对明显多余的一层列表进行折叠,例如 [[[x]]] -> [[x]]。
"""
if
not
text
:
return
text
text
=
re
.
sub
(
r"
\
]
\
s*
\
]
\
s*
\
]"
,
"]]"
,
text
)
text
=
re
.
sub
(
r"
\
[
\
s*
\
[
\
s*
\
["
,
"[["
,
text
)
return
text
def
_balance_brackets
(
self
,
text
:
str
)
->
Tuple
[
str
,
bool
]:
"""
尝试修复因LLM多写/少写括号导致的不平衡结构。
...
...
requirements.txt
View file @
ad36c03
...
...
@@ -69,6 +69,7 @@ tenacity==8.2.2
loguru>=0.7.0
pydantic==2.5.2
pydantic-settings==2.2.1
json-repair==0.53.0
# ===== 开发工具(可选) =====
pytest>=7.4.0
...
...
@@ -77,4 +78,4 @@ flake8>=6.0.0
# ===== Web服务器 =====
fastapi==0.110.2
uvicorn==0.29.0
\ No newline at end of file
uvicorn==0.29.0
...
...
Please
register
or
login
to post a comment