Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
戒酒的李白
2025-08-22 19:14:32 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
5d7f41763f7efe624531c46233e5f604f7593cde
5d7f4176
1 parent
862cf7c5
JSON parsing fix.
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
283 additions
and
39 deletions
src/nodes/report_structure_node.py
src/nodes/search_node.py
src/nodes/summary_node.py
src/utils/text_processing.py
streamlit_app.py
src/nodes/report_structure_node.py
View file @
5d7f417
...
...
@@ -13,7 +13,8 @@ from ..prompts import SYSTEM_PROMPT_REPORT_STRUCTURE
from
..utils.text_processing
import
(
remove_reasoning_from_output
,
clean_json_tags
,
extract_clean_response
extract_clean_response
,
fix_incomplete_json
)
...
...
@@ -77,48 +78,91 @@ class ReportStructureNode(StateMutationNode):
cleaned_output
=
remove_reasoning_from_output
(
output
)
cleaned_output
=
clean_json_tags
(
cleaned_output
)
# 记录清理后的输出用于调试
self
.
log_info
(
f
"清理后的输出: {cleaned_output[:200]}..."
)
# 解析JSON
try
:
report_structure
=
json
.
loads
(
cleaned_output
)
except
JSONDecodeError
:
self
.
log_info
(
"JSON解析成功"
)
except
JSONDecodeError
as
e
:
self
.
log_info
(
f
"JSON解析失败: {str(e)}"
)
# 使用更强大的提取方法
report_structure
=
extract_clean_response
(
cleaned_output
)
if
"error"
in
report_structure
:
raise
ValueError
(
"JSON解析失败"
)
self
.
log_error
(
"JSON解析失败,尝试修复..."
)
# 尝试修复JSON
fixed_json
=
fix_incomplete_json
(
cleaned_output
)
if
fixed_json
:
try
:
report_structure
=
json
.
loads
(
fixed_json
)
self
.
log_info
(
"JSON修复成功"
)
except
JSONDecodeError
:
self
.
log_error
(
"JSON修复失败"
)
# 返回默认结构
return
self
.
_generate_default_structure
()
else
:
self
.
log_error
(
"无法修复JSON,使用默认结构"
)
return
self
.
_generate_default_structure
()
# 验证结构
if
not
isinstance
(
report_structure
,
list
):
raise
ValueError
(
"报告结构应该是一个列表"
)
self
.
log_info
(
"报告结构不是列表,尝试转换..."
)
if
isinstance
(
report_structure
,
dict
):
# 如果是单个对象,包装成列表
report_structure
=
[
report_structure
]
else
:
self
.
log_error
(
"报告结构格式无效,使用默认结构"
)
return
self
.
_generate_default_structure
()
# 验证每个段落
validated_structure
=
[]
for
i
,
paragraph
in
enumerate
(
report_structure
):
if
not
isinstance
(
paragraph
,
dict
):
self
.
log_warning
(
f
"段落 {i+1} 不是字典格式,跳过"
)
continue
title
=
paragraph
.
get
(
"title"
,
f
"段落 {i+1}"
)
content
=
paragraph
.
get
(
"content"
,
""
)
if
not
title
or
not
content
:
self
.
log_warning
(
f
"段落 {i+1} 缺少标题或内容,跳过"
)
continue
validated_structure
.
append
({
"title"
:
title
,
"content"
:
content
})
if
not
validated_structure
:
self
.
log_warning
(
"没有有效的段落结构,使用默认结构"
)
return
self
.
_generate_default_structure
()
self
.
log_info
(
f
"成功验证 {len(validated_structure)} 个段落结构"
)
return
validated_structure
except
Exception
as
e
:
self
.
log_error
(
f
"处理输出失败: {str(e)}"
)
# 返回默认结构
return
[
{
"title"
:
"概述"
,
"content"
:
f
"对'{self.query}'的总体概述和背景介绍"
},
{
"title"
:
"详细分析"
,
"content"
:
f
"深入分析'{self.query}'的相关内容"
}
]
return
self
.
_generate_default_structure
()
def
_generate_default_structure
(
self
)
->
List
[
Dict
[
str
,
str
]]:
"""
生成默认的报告结构
Returns:
默认的报告结构列表
"""
self
.
log_info
(
"生成默认报告结构"
)
return
[
{
"title"
:
"研究概述"
,
"content"
:
"对查询主题进行总体概述和分析"
},
{
"title"
:
"深度分析"
,
"content"
:
"深入分析查询主题的各个方面"
}
]
def
mutate_state
(
self
,
input_data
:
Any
=
None
,
state
:
State
=
None
,
**
kwargs
)
->
State
:
"""
...
...
src/nodes/search_node.py
View file @
5d7f417
...
...
@@ -12,7 +12,8 @@ from ..prompts import SYSTEM_PROMPT_FIRST_SEARCH, SYSTEM_PROMPT_REFLECTION
from
..utils.text_processing
import
(
remove_reasoning_from_output
,
clean_json_tags
,
extract_clean_response
extract_clean_response
,
fix_incomplete_json
)
...
...
@@ -91,21 +92,40 @@ class FirstSearchNode(BaseNode):
cleaned_output
=
remove_reasoning_from_output
(
output
)
cleaned_output
=
clean_json_tags
(
cleaned_output
)
# 记录清理后的输出用于调试
self
.
log_info
(
f
"清理后的输出: {cleaned_output[:200]}..."
)
# 解析JSON
try
:
result
=
json
.
loads
(
cleaned_output
)
except
JSONDecodeError
:
self
.
log_info
(
"JSON解析成功"
)
except
JSONDecodeError
as
e
:
self
.
log_info
(
f
"JSON解析失败: {str(e)}"
)
# 使用更强大的提取方法
result
=
extract_clean_response
(
cleaned_output
)
if
"error"
in
result
:
raise
ValueError
(
"JSON解析失败"
)
self
.
log_error
(
"JSON解析失败,尝试修复..."
)
# 尝试修复JSON
fixed_json
=
fix_incomplete_json
(
cleaned_output
)
if
fixed_json
:
try
:
result
=
json
.
loads
(
fixed_json
)
self
.
log_info
(
"JSON修复成功"
)
except
JSONDecodeError
:
self
.
log_error
(
"JSON修复失败"
)
# 返回默认查询
return
self
.
_get_default_search_query
()
else
:
self
.
log_error
(
"无法修复JSON,使用默认查询"
)
return
self
.
_get_default_search_query
()
# 验证和清理结果
search_query
=
result
.
get
(
"search_query"
,
""
)
reasoning
=
result
.
get
(
"reasoning"
,
""
)
if
not
search_query
:
raise
ValueError
(
"未找到搜索查询"
)
self
.
log_warning
(
"未找到搜索查询,使用默认查询"
)
return
self
.
_get_default_search_query
()
return
{
"search_query"
:
search_query
,
...
...
@@ -115,10 +135,19 @@ class FirstSearchNode(BaseNode):
except
Exception
as
e
:
self
.
log_error
(
f
"处理输出失败: {str(e)}"
)
# 返回默认查询
return
{
"search_query"
:
"相关主题研究"
,
"reasoning"
:
"由于解析失败,使用默认搜索查询"
}
return
self
.
_get_default_search_query
()
def
_get_default_search_query
(
self
)
->
Dict
[
str
,
str
]:
"""
获取默认搜索查询
Returns:
默认的搜索查询字典
"""
return
{
"search_query"
:
"相关主题研究"
,
"reasoning"
:
"由于解析失败,使用默认搜索查询"
}
class
ReflectionNode
(
BaseNode
):
...
...
@@ -198,21 +227,40 @@ class ReflectionNode(BaseNode):
cleaned_output
=
remove_reasoning_from_output
(
output
)
cleaned_output
=
clean_json_tags
(
cleaned_output
)
# 记录清理后的输出用于调试
self
.
log_info
(
f
"清理后的输出: {cleaned_output[:200]}..."
)
# 解析JSON
try
:
result
=
json
.
loads
(
cleaned_output
)
except
JSONDecodeError
:
self
.
log_info
(
"JSON解析成功"
)
except
JSONDecodeError
as
e
:
self
.
log_info
(
f
"JSON解析失败: {str(e)}"
)
# 使用更强大的提取方法
result
=
extract_clean_response
(
cleaned_output
)
if
"error"
in
result
:
raise
ValueError
(
"JSON解析失败"
)
self
.
log_error
(
"JSON解析失败,尝试修复..."
)
# 尝试修复JSON
fixed_json
=
fix_incomplete_json
(
cleaned_output
)
if
fixed_json
:
try
:
result
=
json
.
loads
(
fixed_json
)
self
.
log_info
(
"JSON修复成功"
)
except
JSONDecodeError
:
self
.
log_error
(
"JSON修复失败"
)
# 返回默认查询
return
self
.
_get_default_reflection_query
()
else
:
self
.
log_error
(
"无法修复JSON,使用默认查询"
)
return
self
.
_get_default_reflection_query
()
# 验证和清理结果
search_query
=
result
.
get
(
"search_query"
,
""
)
reasoning
=
result
.
get
(
"reasoning"
,
""
)
if
not
search_query
:
raise
ValueError
(
"未找到搜索查询"
)
self
.
log_warning
(
"未找到搜索查询,使用默认查询"
)
return
self
.
_get_default_reflection_query
()
return
{
"search_query"
:
search_query
,
...
...
@@ -222,7 +270,16 @@ class ReflectionNode(BaseNode):
except
Exception
as
e
:
self
.
log_error
(
f
"处理输出失败: {str(e)}"
)
# 返回默认查询
return
{
"search_query"
:
"深度研究补充信息"
,
"reasoning"
:
"由于解析失败,使用默认反思搜索查询"
}
return
self
.
_get_default_reflection_query
()
def
_get_default_reflection_query
(
self
)
->
Dict
[
str
,
str
]:
"""
获取默认反思搜索查询
Returns:
默认的反思搜索查询字典
"""
return
{
"search_query"
:
"深度研究补充信息"
,
"reasoning"
:
"由于解析失败,使用默认反思搜索查询"
}
...
...
src/nodes/summary_node.py
View file @
5d7f417
...
...
@@ -14,6 +14,7 @@ from ..utils.text_processing import (
remove_reasoning_from_output
,
clean_json_tags
,
extract_clean_response
,
fix_incomplete_json
,
format_search_results_for_prompt
)
...
...
@@ -82,25 +83,42 @@ class FirstSummaryNode(StateMutationNode):
def
process_output
(
self
,
output
:
str
)
->
str
:
"""
处理LLM输出,提取段落
总结
处理LLM输出,提取段落
内容
Args:
output: LLM原始输出
Returns:
段落
总结
内容
段落内容
"""
try
:
# 清理响应文本
cleaned_output
=
remove_reasoning_from_output
(
output
)
cleaned_output
=
clean_json_tags
(
cleaned_output
)
# 记录清理后的输出用于调试
self
.
log_info
(
f
"清理后的输出: {cleaned_output[:200]}..."
)
# 解析JSON
try
:
result
=
json
.
loads
(
cleaned_output
)
except
JSONDecodeError
:
# 如果不是JSON格式,直接返回清理后的文本
return
cleaned_output
self
.
log_info
(
"JSON解析成功"
)
except
JSONDecodeError
as
e
:
self
.
log_info
(
f
"JSON解析失败: {str(e)}"
)
# 尝试修复JSON
fixed_json
=
fix_incomplete_json
(
cleaned_output
)
if
fixed_json
:
try
:
result
=
json
.
loads
(
fixed_json
)
self
.
log_info
(
"JSON修复成功"
)
except
JSONDecodeError
:
self
.
log_info
(
"JSON修复失败,直接使用清理后的文本"
)
# 如果不是JSON格式,直接返回清理后的文本
return
cleaned_output
else
:
self
.
log_info
(
"无法修复JSON,直接使用清理后的文本"
)
# 如果不是JSON格式,直接返回清理后的文本
return
cleaned_output
# 提取段落内容
if
isinstance
(
result
,
dict
):
...
...
@@ -224,12 +242,29 @@ class ReflectionSummaryNode(StateMutationNode):
cleaned_output
=
remove_reasoning_from_output
(
output
)
cleaned_output
=
clean_json_tags
(
cleaned_output
)
# 记录清理后的输出用于调试
self
.
log_info
(
f
"清理后的输出: {cleaned_output[:200]}..."
)
# 解析JSON
try
:
result
=
json
.
loads
(
cleaned_output
)
except
JSONDecodeError
:
# 如果不是JSON格式,直接返回清理后的文本
return
cleaned_output
self
.
log_info
(
"JSON解析成功"
)
except
JSONDecodeError
as
e
:
self
.
log_info
(
f
"JSON解析失败: {str(e)}"
)
# 尝试修复JSON
fixed_json
=
fix_incomplete_json
(
cleaned_output
)
if
fixed_json
:
try
:
result
=
json
.
loads
(
fixed_json
)
self
.
log_info
(
"JSON修复成功"
)
except
JSONDecodeError
:
self
.
log_info
(
"JSON修复失败,直接使用清理后的文本"
)
# 如果不是JSON格式,直接返回清理后的文本
return
cleaned_output
else
:
self
.
log_info
(
"无法修复JSON,直接使用清理后的文本"
)
# 如果不是JSON格式,直接返回清理后的文本
return
cleaned_output
# 提取更新后的段落内容
if
isinstance
(
result
,
dict
):
...
...
src/utils/text_processing.py
View file @
5d7f417
...
...
@@ -55,6 +55,20 @@ def remove_reasoning_from_output(text: str) -> str:
Returns:
清理后的文本
"""
# 查找JSON开始位置
json_start
=
-
1
# 尝试找到第一个 { 或 [
for
i
,
char
in
enumerate
(
text
):
if
char
in
'{['
:
json_start
=
i
break
if
json_start
!=
-
1
:
# 从JSON开始位置截取
return
text
[
json_start
:]
.
strip
()
# 如果没有找到JSON标记,尝试其他方法
# 移除常见的推理标识
patterns
=
[
r'(?:reasoning|推理|思考|分析)[::]
\
s*.*?(?=
\
{|
\
[)'
,
# 移除推理部分
...
...
@@ -88,6 +102,14 @@ def extract_clean_response(text: str) -> Dict[str, Any]:
except
JSONDecodeError
:
pass
# 尝试修复不完整的JSON
fixed_text
=
fix_incomplete_json
(
cleaned_text
)
if
fixed_text
:
try
:
return
json
.
loads
(
fixed_text
)
except
JSONDecodeError
:
pass
# 尝试查找JSON对象
json_pattern
=
r'
\
{.*
\
}'
match
=
re
.
search
(
json_pattern
,
cleaned_text
,
re
.
DOTALL
)
...
...
@@ -111,6 +133,92 @@ def extract_clean_response(text: str) -> Dict[str, Any]:
return
{
"error"
:
"JSON解析失败"
,
"raw_text"
:
cleaned_text
}
def
fix_incomplete_json
(
text
:
str
)
->
str
:
"""
修复不完整的JSON响应
Args:
text: 原始文本
Returns:
修复后的JSON文本,如果无法修复则返回空字符串
"""
# 移除多余的逗号和空白
text
=
re
.
sub
(
r',
\
s*}'
,
'}'
,
text
)
text
=
re
.
sub
(
r',
\
s*]'
,
']'
,
text
)
# 检查是否已经是有效的JSON
try
:
json
.
loads
(
text
)
return
text
except
JSONDecodeError
:
pass
# 检查是否缺少开头的数组符号
if
text
.
strip
()
.
startswith
(
'{'
)
and
not
text
.
strip
()
.
startswith
(
'['
):
# 如果以对象开始,尝试包装成数组
if
text
.
count
(
'{'
)
>
1
:
# 多个对象,包装成数组
text
=
'['
+
text
+
']'
else
:
# 单个对象,包装成数组
text
=
'['
+
text
+
']'
# 检查是否缺少结尾的数组符号
if
text
.
strip
()
.
endswith
(
'}'
)
and
not
text
.
strip
()
.
endswith
(
']'
):
# 如果以对象结束,尝试包装成数组
if
text
.
count
(
'}'
)
>
1
:
# 多个对象,包装成数组
text
=
'['
+
text
+
']'
else
:
# 单个对象,包装成数组
text
=
'['
+
text
+
']'
# 检查括号是否匹配
open_braces
=
text
.
count
(
'{'
)
close_braces
=
text
.
count
(
'}'
)
open_brackets
=
text
.
count
(
'['
)
close_brackets
=
text
.
count
(
']'
)
# 修复不匹配的括号
if
open_braces
>
close_braces
:
text
+=
'}'
*
(
open_braces
-
close_braces
)
if
open_brackets
>
close_brackets
:
text
+=
']'
*
(
open_brackets
-
close_brackets
)
# 验证修复后的JSON是否有效
try
:
json
.
loads
(
text
)
return
text
except
JSONDecodeError
:
# 如果仍然无效,尝试更激进的修复
return
fix_aggressive_json
(
text
)
def
fix_aggressive_json
(
text
:
str
)
->
str
:
"""
更激进的JSON修复方法
Args:
text: 原始文本
Returns:
修复后的JSON文本
"""
# 查找所有可能的JSON对象
objects
=
re
.
findall
(
r'
\
{[^{}]*
\
}'
,
text
)
if
len
(
objects
)
>=
2
:
# 如果有多个对象,包装成数组
return
'['
+
','
.
join
(
objects
)
+
']'
elif
len
(
objects
)
==
1
:
# 如果只有一个对象,包装成数组
return
'['
+
objects
[
0
]
+
']'
else
:
# 如果没有找到对象,返回空数组
return
'[]'
def
update_state_with_search_results
(
search_results
:
List
[
Dict
[
str
,
Any
]],
paragraph_index
:
int
,
state
:
Any
)
->
Any
:
"""
...
...
streamlit_app.py
View file @
5d7f417
...
...
@@ -13,7 +13,7 @@ import json
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'.'
))
from
src
import
DeepSearchAgent
,
Config
from
config
import
DEEPSEEK_API_KEY
,
DEEPSEEK_API_KEY_2
,
TAVILY_API_KEY
from
config
import
DEEPSEEK_API_KEY
,
TAVILY_API_KEY
def
main
():
...
...
Please
register
or
login
to post a comment