Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-11-15 14:45:20 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
fa1ebc07ec4810947c5c585ea6d21459641a8081
fa1ebc07
1 parent
bae13bf4
Optimize Re-output Logic
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
61 additions
and
16 deletions
ReportEngine/nodes/chapter_generation_node.py
ReportEngine/nodes/chapter_generation_node.py
View file @
fa1ebc0
...
...
@@ -90,7 +90,8 @@ class ChapterGenerationNode(BaseNode):
}
# 章节若仅包含标题或字符过少则视为失败,强制LLM重新生成
_MIN_NON_HEADING_BLOCKS
=
2
_MIN_BODY_CHARACTERS
=
400
_MIN_BODY_CHARACTERS
=
600
_MIN_NARRATIVE_CHARACTERS
=
300
_PARAGRAPH_FRAGMENT_MAX_CHARS
=
80
_PARAGRAPH_FRAGMENT_NO_TERMINATOR_MAX_CHARS
=
240
_TERMINATION_PUNCTUATION
=
set
(
"。!?!?;;……"
)
...
...
@@ -659,10 +660,15 @@ class ChapterGenerationNode(BaseNode):
and
block.get
("
type
")
not
in
{"
heading
",
"
divider
",
"
toc
"}
]
body_characters
=
self._count_body_characters
(
blocks
)
narrative_characters
=
self._count_narrative_characters
(
blocks
)
if
len
(
non_heading_blocks
)
<
self._MIN_NON_HEADING_BLOCKS
or
body_characters
<
self._MIN_BODY_CHARACTERS
:
if
(
len
(
non_heading_blocks
)
<
self._MIN_NON_HEADING_BLOCKS
or
body_characters
<
self._MIN_BODY_CHARACTERS
or
narrative_characters
<
self._MIN_NARRATIVE_CHARACTERS
):
raise
ChapterContentError
(
f
"{
chapter.get
('
title
')
or
'该章节'}
正文不足:有效区块
{
len
(
non_heading_blocks
)}
个,估算字符数
{
body_characters
}"
f
"{
chapter.get
('
title
')
or
'该章节'}
正文不足:有效区块
{
len
(
non_heading_blocks
)}
个,估算字符数
{
body_characters
}
,叙述性字符数
{
narrative_characters
}
"
)
def
_count_body_characters
(
self
,
blocks
:
Any
)
->
int
:
...
...
@@ -696,19 +702,7 @@ class ChapterGenerationNode(BaseNode):
return
0
if
block_type
==
"
paragraph
":
inlines
=
node.get
("
inlines
")
if
isinstance
(
inlines
,
list
):
total
=
0
for
run
in
inlines
:
if
isinstance
(
run
,
dict
):
text
=
run.get
("
text
")
if
isinstance
(
text
,
str
):
total
+=
len
(
text.strip
())
return
total
text_value
=
node.get
("
text
")
if
isinstance
(
text_value
,
str
):
return
len
(
text_value.strip
())
return
len
(
self._extract_block_text
(
node
)
.strip
())
return
self._estimate_paragraph_characters
(
node
)
if
block_type
==
"
list
":
total
=
0
...
...
@@ -735,6 +729,57 @@ class ChapterGenerationNode(BaseNode):
return
walk
(
blocks
)
def
_count_narrative_characters
(
self
,
blocks
:
Any
)
->
int
:
"""
统计
paragraph
/
callout
/
list
/
blockquote
等叙述性结构的字符数,避免被表格/图表“刷长”。
"""
def
walk
(
node
:
Any
)
->
int
:
if
node
is
None
:
return
0
if
isinstance
(
node
,
list
):
return
sum
(
walk
(
item
)
for
item
in
node
)
if
isinstance
(
node
,
str
):
return
len
(
node.strip
())
if
not
isinstance
(
node
,
dict
):
return
0
block_type
=
node.get
("
type
")
if
block_type
==
"
paragraph
":
return
self._estimate_paragraph_characters
(
node
)
if
block_type
==
"
list
":
total
=
0
for
item
in
node.get
("
items
",
[]):
total
+=
walk
(
item
)
return
total
if
block_type
in
{"
callout
",
"
blockquote
"}:
return
walk
(
node.get
("
blocks
"))
#
list
项可能是匿名
dict
,兼容性遍历
if
block_type
is
None
:
nested
=
node.get
("
blocks
")
if
isinstance
(
nested
,
list
):
return
walk
(
nested
)
return
0
return
walk
(
blocks
)
def
_estimate_paragraph_characters
(
self
,
block
:
Dict
[
str
,
Any
])
->
int
:
"""提取
paragraph
文本长度,复用在多种统计中。"""
inlines
=
block.get
("
inlines
")
if
isinstance
(
inlines
,
list
):
total
=
0
for
run
in
inlines
:
if
isinstance
(
run
,
dict
):
text
=
run.get
("
text
")
if
isinstance
(
text
,
str
):
total
+=
len
(
text.strip
())
return
total
text_value
=
block.get
("
text
")
if
isinstance
(
text_value
,
str
):
return
len
(
text_value.strip
())
return
len
(
self._extract_block_text
(
block
)
.strip
())
def
_sanitize_block_content
(
self
,
block
:
Dict
[
str
,
Any
]):
"""根据类型做精细化修复,例如清理
paragraph
内的非法
inline
mark
"""
block_type
=
block.get
("
type
")
...
...
Please
register
or
login
to post a comment