summary_node.py
13.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
"""
总结节点实现
负责根据搜索结果生成和更新段落内容
"""
import json
from typing import Dict, Any, List
from json.decoder import JSONDecodeError
from loguru import logger
from .base_node import StateMutationNode
from ..state.state import State
from ..prompts import SYSTEM_PROMPT_FIRST_SUMMARY, SYSTEM_PROMPT_REFLECTION_SUMMARY
from ..utils.text_processing import (
remove_reasoning_from_output,
clean_json_tags,
extract_clean_response,
fix_incomplete_json,
format_search_results_for_prompt
)
# 导入论坛读取工具
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
try:
from utils.forum_reader import get_latest_host_speech, format_host_speech_for_prompt
FORUM_READER_AVAILABLE = True
except ImportError:
FORUM_READER_AVAILABLE = False
logger.warning("无法导入forum_reader模块,将跳过HOST发言读取功能")
class FirstSummaryNode(StateMutationNode):
"""根据搜索结果生成段落首次总结的节点"""
def __init__(self, llm_client):
"""
初始化首次总结节点
Args:
llm_client: LLM客户端
"""
super().__init__(llm_client, "FirstSummaryNode")
def validate_input(self, input_data: Any) -> bool:
"""验证输入数据"""
if isinstance(input_data, str):
try:
data = json.loads(input_data)
required_fields = ["title", "content", "search_query", "search_results"]
return all(field in data for field in required_fields)
except JSONDecodeError:
return False
elif isinstance(input_data, dict):
required_fields = ["title", "content", "search_query", "search_results"]
return all(field in input_data for field in required_fields)
return False
def run(self, input_data: Any, **kwargs) -> str:
"""
调用LLM生成段落总结
Args:
input_data: 包含title、content、search_query和search_results的数据
**kwargs: 额外参数
Returns:
段落总结内容
"""
try:
if not self.validate_input(input_data):
raise ValueError("输入数据格式错误")
# 准备输入数据
if isinstance(input_data, str):
data = json.loads(input_data)
else:
data = input_data.copy() if isinstance(input_data, dict) else input_data
# 读取最新的HOST发言(如果可用)
if FORUM_READER_AVAILABLE:
try:
host_speech = get_latest_host_speech()
if host_speech:
# 将HOST发言添加到输入数据中
data['host_speech'] = host_speech
logger.info(f"已读取HOST发言,长度: {len(host_speech)}字符")
except Exception as e:
logger.exception(f"读取HOST发言失败: {str(e)}")
# 转换为JSON字符串
message = json.dumps(data, ensure_ascii=False)
# 如果有HOST发言,添加到消息前面作为参考
if FORUM_READER_AVAILABLE and 'host_speech' in data and data['host_speech']:
formatted_host = format_host_speech_for_prompt(data['host_speech'])
message = formatted_host + "\n" + message
logger.info("正在生成首次段落总结")
# 调用LLM生成总结
response = self.llm_client.invoke(
SYSTEM_PROMPT_FIRST_SUMMARY,
message,
)
# 处理响应
processed_response = self.process_output(response)
logger.info("成功生成首次段落总结")
return processed_response
except Exception as e:
logger.exception(f"生成首次总结失败: {str(e)}")
raise e
def process_output(self, output: str) -> str:
"""
处理LLM输出,提取段落内容
Args:
output: LLM原始输出
Returns:
段落内容
"""
try:
# 清理响应文本
cleaned_output = remove_reasoning_from_output(output)
cleaned_output = clean_json_tags(cleaned_output)
# 记录清理后的输出用于调试
logger.info(f"清理后的输出: {cleaned_output}")
# 解析JSON
try:
result = json.loads(cleaned_output)
logger.info("JSON解析成功")
except JSONDecodeError as e:
logger.exception(f"JSON解析失败: {str(e)}")
# 尝试修复JSON
fixed_json = fix_incomplete_json(cleaned_output)
if fixed_json:
try:
result = json.loads(fixed_json)
logger.info("JSON修复成功")
except JSONDecodeError:
logger.exception("JSON修复失败,直接使用清理后的文本")
# 如果不是JSON格式,直接返回清理后的文本
return cleaned_output
else:
logger.exception("无法修复JSON,直接使用清理后的文本")
# 如果不是JSON格式,直接返回清理后的文本
return cleaned_output
# 提取段落内容
if isinstance(result, dict):
paragraph_content = result.get("paragraph_latest_state", "")
if paragraph_content:
return paragraph_content
# 如果提取失败,返回原始清理后的文本
return cleaned_output
except Exception as e:
logger.exception(f"处理输出失败: {str(e)}")
return "段落总结生成失败"
def mutate_state(self, input_data: Any, state: State, paragraph_index: int, **kwargs) -> State:
"""
更新段落的最新总结到状态
Args:
input_data: 输入数据
state: 当前状态
paragraph_index: 段落索引
**kwargs: 额外参数
Returns:
更新后的状态
"""
try:
# 生成总结
summary = self.run(input_data, **kwargs)
# 更新状态
if 0 <= paragraph_index < len(state.paragraphs):
state.paragraphs[paragraph_index].research.latest_summary = summary
logger.info(f"已更新段落 {paragraph_index} 的首次总结")
else:
raise ValueError(f"段落索引 {paragraph_index} 超出范围")
state.update_timestamp()
return state
except Exception as e:
logger.exception(f"状态更新失败: {str(e)}")
raise e
class ReflectionSummaryNode(StateMutationNode):
"""根据反思搜索结果更新段落总结的节点"""
def __init__(self, llm_client):
"""
初始化反思总结节点
Args:
llm_client: LLM客户端
"""
super().__init__(llm_client, "ReflectionSummaryNode")
def validate_input(self, input_data: Any) -> bool:
"""验证输入数据"""
if isinstance(input_data, str):
try:
data = json.loads(input_data)
required_fields = ["title", "content", "search_query", "search_results", "paragraph_latest_state"]
return all(field in data for field in required_fields)
except JSONDecodeError:
return False
elif isinstance(input_data, dict):
required_fields = ["title", "content", "search_query", "search_results", "paragraph_latest_state"]
return all(field in input_data for field in required_fields)
return False
def run(self, input_data: Any, **kwargs) -> str:
"""
调用LLM更新段落内容
Args:
input_data: 包含完整反思信息的数据
**kwargs: 额外参数
Returns:
更新后的段落内容
"""
try:
if not self.validate_input(input_data):
raise ValueError("输入数据格式错误")
# 准备输入数据
if isinstance(input_data, str):
data = json.loads(input_data)
else:
data = input_data.copy() if isinstance(input_data, dict) else input_data
# 读取最新的HOST发言(如果可用)
if FORUM_READER_AVAILABLE:
try:
host_speech = get_latest_host_speech()
if host_speech:
# 将HOST发言添加到输入数据中
data['host_speech'] = host_speech
logger.info(f"已读取HOST发言,长度: {len(host_speech)}字符")
except Exception as e:
logger.exception(f"读取HOST发言失败: {str(e)}")
# 转换为JSON字符串
message = json.dumps(data, ensure_ascii=False)
# 如果有HOST发言,添加到消息前面作为参考
if FORUM_READER_AVAILABLE and 'host_speech' in data and data['host_speech']:
formatted_host = format_host_speech_for_prompt(data['host_speech'])
message = formatted_host + "\n" + message
logger.info("正在生成反思总结")
# 调用LLM生成总结
response = self.llm_client.invoke(
SYSTEM_PROMPT_REFLECTION_SUMMARY,
message,
)
# 处理响应
processed_response = self.process_output(response)
logger.info("成功生成反思总结")
return processed_response
except Exception as e:
logger.exception(f"生成反思总结失败: {str(e)}")
raise e
def process_output(self, output: str) -> str:
"""
处理LLM输出,提取更新后的段落内容
Args:
output: LLM原始输出
Returns:
更新后的段落内容
"""
try:
# 清理响应文本
cleaned_output = remove_reasoning_from_output(output)
cleaned_output = clean_json_tags(cleaned_output)
# 记录清理后的输出用于调试
logger.info(f"清理后的输出: {cleaned_output}")
# 解析JSON
try:
result = json.loads(cleaned_output)
logger.info("JSON解析成功")
except JSONDecodeError as e:
logger.exception(f"JSON解析失败: {str(e)}")
# 尝试修复JSON
fixed_json = fix_incomplete_json(cleaned_output)
if fixed_json:
try:
result = json.loads(fixed_json)
logger.info("JSON修复成功")
except JSONDecodeError:
logger.exception("JSON修复失败,直接使用清理后的文本")
# 如果不是JSON格式,直接返回清理后的文本
return cleaned_output
else:
logger.exception("无法修复JSON,直接使用清理后的文本")
# 如果不是JSON格式,直接返回清理后的文本
return cleaned_output
# 提取更新后的段落内容
if isinstance(result, dict):
updated_content = result.get("updated_paragraph_latest_state", "")
if updated_content:
return updated_content
# 如果提取失败,返回原始清理后的文本
return cleaned_output
except Exception as e:
logger.exception(f"处理输出失败: {str(e)}")
return "反思总结生成失败"
def mutate_state(self, input_data: Any, state: State, paragraph_index: int, **kwargs) -> State:
"""
将更新后的总结写入状态
Args:
input_data: 输入数据
state: 当前状态
paragraph_index: 段落索引
**kwargs: 额外参数
Returns:
更新后的状态
"""
try:
# 生成更新后的总结
updated_summary = self.run(input_data, **kwargs)
# 更新状态
if 0 <= paragraph_index < len(state.paragraphs):
state.paragraphs[paragraph_index].research.latest_summary = updated_summary
state.paragraphs[paragraph_index].research.increment_reflection()
logger.info(f"已更新段落 {paragraph_index} 的反思总结")
else:
raise ValueError(f"段落索引 {paragraph_index} 超出范围")
state.update_timestamp()
return state
except Exception as e:
logger.exception(f"状态更新失败: {str(e)}")
raise e