Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-11-15 17:46:42 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
a12ac4234dcb7a7bafb3feca5a2152d0dc39f031
a12ac423
1 parent
cab812e2
Optimize the Handling of Low Word Counts
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
114 additions
and
9 deletions
ReportEngine/agent.py
ReportEngine/nodes/chapter_generation_node.py
ReportEngine/agent.py
View file @
a12ac42
...
...
@@ -10,6 +10,7 @@ Report Agent主类。
import
json
import
os
from
copy
import
deepcopy
from
pathlib
import
Path
from
uuid
import
uuid4
from
datetime
import
datetime
...
...
@@ -174,6 +175,8 @@ class ReportAgent:
- 章节存储、IR装订、渲染器等产出链路;
- 状态管理、日志、输入输出校验与持久化。
"""
_CONTENT_SPARSE_MIN_ATTEMPTS
=
3
_CONTENT_SPARSE_WARNING_TEXT
=
"本章LLM生成的内容字数可能过低,必要时可以尝试重新运行程序。"
def
__init__
(
self
,
config
:
Optional
[
Settings
]
=
None
):
"""
...
...
@@ -466,7 +469,9 @@ class ReportAgent:
emit
(
'stage'
,
{
'stage'
:
'storage_ready'
,
'run_dir'
:
str
(
run_dir
)})
chapters
=
[]
chapter_max_attempts
=
max
(
1
,
self
.
config
.
CHAPTER_JSON_MAX_ATTEMPTS
)
chapter_max_attempts
=
max
(
self
.
_CONTENT_SPARSE_MIN_ATTEMPTS
,
self
.
config
.
CHAPTER_JSON_MAX_ATTEMPTS
)
for
section
in
sections
:
logger
.
info
(
f
"生成章节: {section.title}"
)
emit
(
'chapter_status'
,
{
...
...
@@ -492,6 +497,9 @@ class ReportAgent:
chapter_payload
:
Dict
[
str
,
Any
]
|
None
=
None
attempt
=
1
best_sparse_candidate
:
Dict
[
str
,
Any
]
|
None
=
None
best_sparse_score
=
-
1
fallback_used
=
False
while
attempt
<=
chapter_max_attempts
:
try
:
chapter_payload
=
self
.
chapter_generation_node
.
run
(
...
...
@@ -506,6 +514,19 @@ class ReportAgent:
"content_sparse"
if
isinstance
(
structured_error
,
ChapterContentError
)
else
"json_parse"
)
readable_label
=
"内容密度异常"
if
error_kind
==
"content_sparse"
else
"JSON解析失败"
if
isinstance
(
structured_error
,
ChapterContentError
):
candidate
=
getattr
(
structured_error
,
"chapter_payload"
,
None
)
candidate_score
=
getattr
(
structured_error
,
"body_characters"
,
0
)
or
0
if
isinstance
(
candidate
,
dict
)
and
candidate_score
>=
0
:
if
candidate_score
>
best_sparse_score
:
best_sparse_candidate
=
deepcopy
(
candidate
)
best_sparse_score
=
candidate_score
will_fallback
=
(
isinstance
(
structured_error
,
ChapterContentError
)
and
attempt
>=
chapter_max_attempts
and
attempt
>=
self
.
_CONTENT_SPARSE_MIN_ATTEMPTS
and
best_sparse_candidate
is
not
None
)
logger
.
warning
(
"章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}"
,
title
=
section
.
title
,
...
...
@@ -514,14 +535,27 @@ class ReportAgent:
total
=
chapter_max_attempts
,
error
=
structured_error
,
)
emit
(
'chapter_status'
,
{
status_value
=
'retrying'
if
attempt
<
chapter_max_attempts
or
will_fallback
else
'error'
status_payload
=
{
'chapterId'
:
section
.
chapter_id
,
'title'
:
section
.
title
,
'status'
:
'retrying'
if
attempt
<
chapter_max_attempts
else
'error'
,
'status'
:
status_value
,
'attempt'
:
attempt
,
'error'
:
str
(
structured_error
),
'reason'
:
error_kind
,
})
}
if
will_fallback
:
status_payload
[
'warning'
]
=
'content_sparse_fallback_pending'
emit
(
'chapter_status'
,
status_payload
)
if
will_fallback
:
logger
.
warning
(
"章节 {title} 达到最大尝试次数,保留字数最多(约 {score} 字)的版本作为兜底输出"
,
title
=
section
.
title
,
score
=
best_sparse_score
,
)
chapter_payload
=
self
.
_finalize_sparse_chapter
(
best_sparse_candidate
)
fallback_used
=
True
break
if
attempt
>=
chapter_max_attempts
:
raise
attempt
+=
1
...
...
@@ -553,12 +587,16 @@ class ReportAgent:
f
"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析"
)
chapters
.
append
(
chapter_payload
)
emit
(
'chapter_status'
,
{
completion_status
=
{
'chapterId'
:
section
.
chapter_id
,
'title'
:
section
.
title
,
'status'
:
'completed'
,
'attempt'
:
attempt
,
})
}
if
fallback_used
:
completion_status
[
'warning'
]
=
'content_sparse_fallback'
completion_status
[
'warningMessage'
]
=
self
.
_CONTENT_SPARSE_WARNING_TEXT
emit
(
'chapter_status'
,
completion_status
)
document_ir
=
self
.
document_composer
.
build_document
(
report_id
,
...
...
@@ -779,6 +817,48 @@ class ReportAgent:
]
return
any
(
keyword
in
normalized
for
keyword
in
keywords
)
def
_finalize_sparse_chapter
(
self
,
chapter
:
Optional
[
Dict
[
str
,
Any
]])
->
Dict
[
str
,
Any
]:
"""
构造内容稀疏兜底章节:复制原始payload并插入温馨提示段落。
"""
safe_chapter
=
deepcopy
(
chapter
or
{})
if
not
isinstance
(
safe_chapter
,
dict
):
safe_chapter
=
{}
self
.
_ensure_sparse_warning_block
(
safe_chapter
)
return
safe_chapter
def
_ensure_sparse_warning_block
(
self
,
chapter
:
Dict
[
str
,
Any
])
->
None
:
"""
将提示段落插在章节标题后,提醒读者该章字数偏少。
"""
warning_block
=
{
"type"
:
"paragraph"
,
"inlines"
:
[
{
"text"
:
self
.
_CONTENT_SPARSE_WARNING_TEXT
,
"marks"
:
[{
"type"
:
"italic"
}],
}
],
"meta"
:
{
"role"
:
"content-sparse-warning"
},
}
blocks
=
chapter
.
get
(
"blocks"
)
if
isinstance
(
blocks
,
list
)
and
blocks
:
inserted
=
False
for
idx
,
block
in
enumerate
(
blocks
):
if
isinstance
(
block
,
dict
)
and
block
.
get
(
"type"
)
==
"heading"
:
blocks
.
insert
(
idx
+
1
,
warning_block
)
inserted
=
True
break
if
not
inserted
:
blocks
.
insert
(
0
,
warning_block
)
else
:
chapter
[
"blocks"
]
=
[
warning_block
]
meta
=
chapter
.
get
(
"meta"
)
if
isinstance
(
meta
,
dict
):
meta
[
"contentSparseWarning"
]
=
True
else
:
chapter
[
"meta"
]
=
{
"contentSparseWarning"
:
True
}
def
_stringify
(
self
,
value
:
Any
)
->
str
:
"""
安全地将对象转成字符串。
...
...
ReportEngine/nodes/chapter_generation_node.py
View file @
a12ac42
...
...
@@ -55,6 +55,20 @@ class ChapterContentError(ValueError):
当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。
"""
def
__init__
(
self
,
message
:
str
,
chapter
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
body_characters
:
int
=
0
,
narrative_characters
:
int
=
0
,
non_heading_blocks
:
int
=
0
,
):
super
()
.
__init__
(
message
)
self
.
chapter_payload
:
Optional
[
Dict
[
str
,
Any
]]
=
chapter
self
.
body_characters
:
int
=
int
(
body_characters
or
0
)
self
.
narrative_characters
:
int
=
int
(
narrative_characters
or
0
)
self
.
non_heading_blocks
:
int
=
int
(
non_heading_blocks
or
0
)
class
ChapterGenerationNode
(
BaseNode
):
"""
...
...
@@ -897,7 +911,13 @@ class ChapterGenerationNode(BaseNode):
"""
blocks
=
chapter.get
("
blocks
")
if
not
isinstance
(
blocks
,
list
)
or
not
blocks
:
raise
ChapterContentError
("章节缺少正文区块,无法输出内容")
raise
ChapterContentError
(
"章节缺少正文区块,无法输出内容",
chapter
=
chapter
,
body_characters
=0,
narrative_characters
=0,
non_heading_blocks
=0,
)
non_heading_blocks
=
[
block
...
...
@@ -905,16 +925,21 @@ class ChapterGenerationNode(BaseNode):
if
isinstance
(
block
,
dict
)
and
block.get
("
type
")
not
in
{"
heading
",
"
divider
",
"
toc
"}
]
valid_block_count
=
len
(
non_heading_blocks
)
body_characters
=
self._count_body_characters
(
blocks
)
narrative_characters
=
self._count_narrative_characters
(
blocks
)
if
(
len
(
non_heading_blocks
)
<
self._MIN_NON_HEADING_BLOCKS
valid_block_count
<
self._MIN_NON_HEADING_BLOCKS
or
body_characters
<
self._MIN_BODY_CHARACTERS
or
narrative_characters
<
self._MIN_NARRATIVE_CHARACTERS
):
raise
ChapterContentError
(
f
"{
chapter.get
('
title
')
or
'该章节'}
正文不足:有效区块
{
len
(
non_heading_blocks
)}
个,估算字符数
{
body_characters
},叙述性字符数
{
narrative_characters
}"
f
"{
chapter.get
('
title
')
or
'该章节'}
正文不足:有效区块
{
valid_block_count
}
个,估算字符数
{
body_characters
},叙述性字符数
{
narrative_characters
}",
chapter
=
chapter
,
body_characters
=
body_characters
,
narrative_characters
=
narrative_characters
,
non_heading_blocks
=
valid_block_count
,
)
def
_count_body_characters
(
self
,
blocks
:
Any
)
->
int
:
...
...
Please
register
or
login
to post a comment