Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-11-17 22:10:37 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
bf1e2bfa9c1a18a0ddd3b3815991593e42c52835
bf1e2bfa
1 parent
b31be562
Repair the Logic for Cleaning Data Returned by LLM
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
14 deletions
ReportEngine/utils/json_parser.py
ReportEngine/utils/json_parser.py
View file @
bf1e2bf
...
...
@@ -51,12 +51,12 @@ class RobustJSONParser:
# 常见的LLM思考内容模式
_THINKING_PATTERNS
=
[
r"<thinking>.*?</thinking>"
,
r"<thought>.*?</thought>"
,
r"让我想想.*?(?=
\
{|
\
[|$)"
,
r"首先.*?(?=
\
{|
\
[|$)"
,
r"分析.*?(?=
\
{|
\
[|$)"
,
r"根据.*?(?=
\
{|
\
[|$)"
,
r"^
\
s*<thinking>.*?</thinking>
\
s*"
,
r"^
\
s*<thought>.*?</thought>
\
s*"
,
r"^
\
s*让我想想.*?(?=
\
{|
\
[|$)"
,
r"^
\
s*首先.*?(?=
\
{|
\
[|$)"
,
r"^
\
s*分析.*?(?=
\
{|
\
[|$)"
,
r"^
\
s*根据.*?(?=
\
{|
\
[|$)"
,
]
# 冒号等号模式(LLM常见错误)
...
...
@@ -182,16 +182,21 @@ class RobustJSONParser:
for
pattern
in
self
.
_THINKING_PATTERNS
:
cleaned
=
re
.
sub
(
pattern
,
""
,
cleaned
,
flags
=
re
.
DOTALL
|
re
.
IGNORECASE
)
# 移除markdown代码块标记
if
cleaned
.
startswith
(
"```json"
):
cleaned
=
cleaned
[
7
:]
elif
cleaned
.
startswith
(
"```"
):
cleaned
=
cleaned
[
3
:]
# 优先提取任意位置的```json```包裹内容
fenced_match
=
re
.
search
(
r"```(?:json)?
\
s*([
\
s
\
S]*?)
\
s*```"
,
cleaned
)
if
fenced_match
:
cleaned
=
fenced_match
.
group
(
1
)
.
strip
()
else
:
# 如果没有找到完整代码块,再尝试移除前后缀
if
cleaned
.
startswith
(
"```json"
):
cleaned
=
cleaned
[
7
:]
elif
cleaned
.
startswith
(
"```"
):
cleaned
=
cleaned
[
3
:]
if
cleaned
.
endswith
(
"```"
):
cleaned
=
cleaned
[:
-
3
]
if
cleaned
.
endswith
(
"```"
):
cleaned
=
cleaned
[:
-
3
]
cleaned
=
cleaned
.
strip
()
cleaned
=
cleaned
.
strip
()
# 尝试提取第一个完整的JSON对象或数组
cleaned
=
self
.
_extract_first_json_structure
(
cleaned
)
...
...
Please
register
or
login
to post a comment