Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-11-17 17:36:58 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
a5f3964a73b36b2682231bd3eb676727add2c224
a5f3964a
1 parent
c20cc24c
Optimize JSON Parsing Compatibility
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
120 additions
and
3 deletions
ReportEngine/utils/json_parser.py
ReportEngine/utils/test_json_parser.py
ReportEngine/utils/json_parser.py
View file @
a5f3964
...
...
@@ -610,9 +610,34 @@ class RobustJSONParser:
# 验证数据类型
if
not
isinstance
(
data
,
dict
):
if
isinstance
(
data
,
list
)
and
len
(
data
)
>
0
and
isinstance
(
data
[
0
],
dict
):
logger
.
warning
(
f
"{context_name} 返回数组,自动提取第一个元素"
)
data
=
data
[
0
]
if
isinstance
(
data
,
list
):
if
len
(
data
)
>
0
:
# 尝试找到最符合期望的元素
best_match
=
None
max_match_count
=
0
for
item
in
data
:
if
isinstance
(
item
,
dict
):
if
expected_keys
:
# 计算匹配的键数量
match_count
=
sum
(
1
for
key
in
expected_keys
if
key
in
item
)
if
match_count
>
max_match_count
:
max_match_count
=
match_count
best_match
=
item
elif
best_match
is
None
:
best_match
=
item
if
best_match
:
logger
.
warning
(
f
"{context_name} 返回数组,自动提取最佳匹配元素(匹配{max_match_count}/{len(expected_keys or [])}个键)"
)
data
=
best_match
else
:
raise
JSONParseError
(
f
"{context_name} 返回的数组中没有有效的对象"
)
else
:
raise
JSONParseError
(
f
"{context_name} 返回空数组"
)
else
:
raise
JSONParseError
(
f
"{context_name} 返回的不是JSON对象: {type(data).__name__}"
...
...
@@ -625,6 +650,43 @@ class RobustJSONParser:
logger
.
warning
(
f
"{context_name} 缺少预期的键: {', '.join(missing_keys)}"
)
# 尝试修复常见的键名变体
data
=
self
.
_try_recover_missing_keys
(
data
,
missing_keys
,
context_name
)
return
data
def
_try_recover_missing_keys
(
self
,
data
:
Dict
[
str
,
Any
],
missing_keys
:
List
[
str
],
context_name
:
str
)
->
Dict
[
str
,
Any
]:
"""
尝试从数据中恢复缺失的键,通过查找相似的键名。
参数:
data: 原始数据
missing_keys: 缺失的键列表
context_name: 上下文名称
返回:
Dict[str, Any]: 修复后的数据
"""
# 常见的键名映射
key_aliases
=
{
"template_name"
:
[
"templateName"
,
"name"
,
"template"
],
"selection_reason"
:
[
"selectionReason"
,
"reason"
,
"explanation"
],
"title"
:
[
"reportTitle"
,
"documentTitle"
],
"chapters"
:
[
"chapterList"
,
"chapterPlan"
,
"sections"
],
"totalWords"
:
[
"total_words"
,
"wordCount"
,
"totalWordCount"
],
}
for
missing_key
in
missing_keys
:
if
missing_key
in
key_aliases
:
for
alias
in
key_aliases
[
missing_key
]:
if
alias
in
data
:
logger
.
info
(
f
"{context_name} 找到键'{missing_key}'的别名'{alias}',自动映射"
)
data
[
missing_key
]
=
data
[
alias
]
break
return
data
...
...
ReportEngine/utils/test_json_parser.py
View file @
a5f3964
...
...
@@ -127,6 +127,61 @@ class TestRobustJSONParser(unittest.TestCase):
self
.
assertEqual
(
result
[
"name"
],
"test"
)
self
.
assertEqual
(
result
[
"value"
],
123
)
def
test_unterminated_string_with_json_repair
(
self
):
"""测试使用json_repair库修复未终止的字符串。"""
# 创建启用json_repair的解析器
parser_with_repair
=
RobustJSONParser
(
enable_json_repair
=
True
,
enable_llm_repair
=
False
,
)
# 模拟实际错误:字符串中有未转义的控制字符或引号
json_str
=
"""{
"template_name": "特定政策报告",
"selection_reason": "这是测试内容"
}"""
result
=
parser_with_repair
.
parse
(
json_str
,
"未终止字符串测试"
)
# 只要能够解析成功,不报错就可以了
self
.
assertIsInstance
(
result
,
dict
)
self
.
assertIn
(
"template_name"
,
result
)
def
test_array_with_best_match
(
self
):
"""测试从数组中提取最佳匹配的元素。"""
json_str
=
"""[
{
"name": "test",
"value": 123
},
{
"totalWords": 40000,
"globalGuidelines": ["guide1", "guide2"],
"chapters": []
}
]"""
result
=
self
.
parser
.
parse
(
json_str
,
"数组最佳匹配测试"
,
expected_keys
=
[
"totalWords"
,
"globalGuidelines"
,
"chapters"
],
)
# 应该提取第二个元素,因为它匹配了3个键
self
.
assertEqual
(
result
[
"totalWords"
],
40000
)
self
.
assertEqual
(
len
(
result
[
"globalGuidelines"
]),
2
)
def
test_key_alias_recovery
(
self
):
"""测试键名别名恢复。"""
json_str
=
"""{
"templateName": "test_template",
"selectionReason": "This is a test"
}"""
result
=
self
.
parser
.
parse
(
json_str
,
"键别名测试"
,
expected_keys
=
[
"template_name"
,
"selection_reason"
],
)
# 应该自动映射 templateName -> template_name
self
.
assertEqual
(
result
[
"template_name"
],
"test_template"
)
self
.
assertEqual
(
result
[
"selection_reason"
],
"This is a test"
)
def
test_complex_real_world_case
(
self
):
"""测试真实世界的复杂案例(类似实际错误)。"""
# 模拟实际错误:缺少逗号、有markdown包裹、有思考内容
...
...
Please
register
or
login
to post a comment