Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
马一丁
2025-11-13 11:37:13 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
3e4aa6366d3510d26056208115f909f90f27cec4
3e4aa636
1 parent
4846b1f7
Add Comments
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
72 additions
and
3 deletions
ReportEngine/agent.py
ReportEngine/core/chapter_storage.py
ReportEngine/core/stitcher.py
ReportEngine/core/template_parser.py
ReportEngine/flask_interface.py
ReportEngine/ir/validator.py
ReportEngine/llms/base.py
ReportEngine/nodes/chapter_generation_node.py
ReportEngine/nodes/document_layout_node.py
ReportEngine/nodes/word_budget_node.py
ReportEngine/renderers/html_renderer.py
ReportEngine/utils/config.py
ReportEngine/agent.py
View file @
3e4aa63
...
...
@@ -35,6 +35,7 @@ class FileCountBaseline:
"""文件数量基准管理器"""
def
__init__
(
self
):
"""在初始化阶段加载或创建文件数量基准快照"""
self
.
baseline_file
=
'logs/report_baseline.json'
self
.
baseline_data
=
self
.
_load_baseline
()
...
...
ReportEngine/core/chapter_storage.py
View file @
3e4aa63
...
...
@@ -29,6 +29,7 @@ class ChapterRecord:
updated_at
:
str
=
field
(
default_factory
=
lambda
:
datetime
.
utcnow
()
.
isoformat
()
+
"Z"
)
def
to_dict
(
self
)
->
Dict
[
str
,
object
]:
"""将记录转换为便于写入manifest.json的序列化字典"""
return
{
"chapterId"
:
self
.
chapter_id
,
"slug"
:
self
.
slug
,
...
...
@@ -54,6 +55,12 @@ class ChapterStorage:
"""
def
__init__
(
self
,
base_dir
:
str
):
"""
创建章节存储器。
Args:
base_dir: 所有输出run目录的根路径
"""
self
.
base_dir
=
Path
(
base_dir
)
self
.
base_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
_manifests
:
Dict
[
str
,
Dict
[
str
,
object
]]
=
{}
...
...
@@ -133,6 +140,7 @@ class ChapterStorage:
return
final_path
def
load_chapters
(
self
,
run_dir
:
Path
)
->
List
[
Dict
[
str
,
object
]]:
"""从指定run目录读取全部chapter.json并按order排序返回"""
payloads
:
List
[
Dict
[
str
,
object
]]
=
[]
for
child
in
sorted
(
run_dir
.
iterdir
()):
if
not
child
.
is_dir
():
...
...
@@ -161,6 +169,7 @@ class ChapterStorage:
# ======== 内部工具 ========
def
_chapter_dir
(
self
,
run_dir
:
Path
,
slug
:
str
,
order
:
int
)
->
Path
:
"""根据slug/order生成稳定的章节目录,确保各章分隔存盘"""
safe_slug
=
self
.
_safe_slug
(
slug
)
folder
=
f
"{order:03d}-{safe_slug}"
path
=
run_dir
/
folder
...
...
@@ -168,25 +177,31 @@ class ChapterStorage:
return
path
def
_safe_slug
(
self
,
slug
:
str
)
->
str
:
"""移除危险字符,避免生成非法文件夹名"""
slug
=
slug
.
replace
(
" "
,
"-"
)
.
replace
(
"/"
,
"-"
)
return
slug
or
"section"
def
_raw_stream_path
(
self
,
chapter_dir
:
Path
)
->
Path
:
"""返回某章节流式输出对应的raw文件路径"""
return
chapter_dir
/
"stream.raw"
def
_key
(
self
,
run_dir
:
Path
)
->
str
:
"""将run目录解析为字典缓存的键,避免重复读取磁盘"""
return
str
(
run_dir
.
resolve
())
def
_manifest_path
(
self
,
run_dir
:
Path
)
->
Path
:
"""获取manifest.json的实际文件路径"""
return
run_dir
/
"manifest.json"
def
_write_manifest
(
self
,
run_dir
:
Path
,
manifest
:
Dict
[
str
,
object
]):
"""将内存中的manifest快照全量写回磁盘"""
self
.
_manifest_path
(
run_dir
)
.
write_text
(
json
.
dumps
(
manifest
,
ensure_ascii
=
False
,
indent
=
2
),
encoding
=
"utf-8"
,
)
def
_read_manifest
(
self
,
run_dir
:
Path
)
->
Dict
[
str
,
object
]:
"""从磁盘读取已有manifest,用于进程重启或多实例协作"""
manifest_path
=
self
.
_manifest_path
(
run_dir
)
if
manifest_path
.
exists
():
return
json
.
loads
(
manifest_path
.
read_text
(
encoding
=
"utf-8"
))
...
...
ReportEngine/core/stitcher.py
View file @
3e4aa63
...
...
@@ -16,6 +16,7 @@ class DocumentComposer:
"""
def
__init__
(
self
):
"""初始化装订器并记录已使用的锚点,避免重复"""
self
.
_seen_anchors
:
Set
[
str
]
=
set
()
def
build_document
(
...
...
ReportEngine/core/template_parser.py
View file @
3e4aa63
...
...
@@ -30,6 +30,7 @@ class TemplateSection:
outline
:
List
[
str
]
=
field
(
default_factory
=
list
)
def
to_dict
(
self
)
->
dict
:
"""将章节实体序列化为字典,方便传给LLM或落盘"""
return
{
"title"
:
self
.
title
,
"slug"
:
self
.
slug
,
...
...
@@ -185,6 +186,7 @@ def _build_slug(number: str, title: str) -> str:
def
_slugify_text
(
text
:
str
)
->
str
:
"""对任意文本做降噪与转写,得到URL友好的slug片段"""
text
=
unicodedata
.
normalize
(
"NFKD"
,
text
)
text
=
text
.
replace
(
"·"
,
"-"
)
.
replace
(
" "
,
"-"
)
text
=
re
.
sub
(
r"[^0-9a-zA-Z
\
u4e00-
\
u9fff-]+"
,
"-"
,
text
)
...
...
@@ -193,6 +195,7 @@ def _slugify_text(text: str) -> str:
def
_ensure_unique_slug
(
slug
:
str
,
used
:
set
)
->
str
:
"""若slug重复则自动追加序号,直到在used集合中唯一"""
if
slug
not
in
used
:
used
.
add
(
slug
)
return
slug
...
...
ReportEngine/flask_interface.py
View file @
3e4aa63
...
...
@@ -40,6 +40,14 @@ class ReportTask:
"""报告生成任务"""
def
__init__
(
self
,
query
:
str
,
task_id
:
str
,
custom_template
:
str
=
""
):
"""
初始化任务对象,记录查询词、自定义模板与运行期元数据。
Args:
query: 最终需要生成的报告主题
task_id: 任务唯一ID,通常由时间戳构造
custom_template: 可选的自定义Markdown模板
"""
self
.
task_id
=
task_id
self
.
query
=
query
self
.
custom_template
=
custom_template
...
...
@@ -470,6 +478,7 @@ def get_templates():
# 错误处理
@report_bp.errorhandler
(
404
)
def
not_found
(
error
):
"""404兜底处理:保证接口统一返回JSON结构"""
logger
.
exception
(
f
"API端点不存在: {str(error)}"
)
return
jsonify
({
'success'
:
False
,
...
...
@@ -479,6 +488,7 @@ def not_found(error):
@report_bp.errorhandler
(
500
)
def
internal_error
(
error
):
"""500兜底处理:捕获未被主动捕获的异常"""
logger
.
exception
(
f
"服务器内部错误: {str(error)}"
)
return
jsonify
({
'success'
:
False
,
...
...
ReportEngine/ir/validator.py
View file @
3e4aa63
...
...
@@ -23,6 +23,7 @@ class IRValidator:
"""
def
__init__
(
self
,
schema_version
:
str
=
IR_VERSION
):
"""记录当前Schema版本,便于未来多版本并存"""
self
.
schema_version
=
schema_version
# ======== 对外接口 ========
...
...
ReportEngine/llms/base.py
View file @
3e4aa63
"""
Unified OpenAI-compatible LLM client for the Report Engine, with retry support.
Report Engine 默认的OpenAI兼容LLM客户端封装,内置重试/流式能力。
"""
import
os
...
...
@@ -19,7 +19,9 @@ try:
from
retry_helper
import
with_retry
,
LLM_RETRY_CONFIG
except
ImportError
:
def
with_retry
(
config
=
None
):
"""简化版with_retry占位,实现与真实装饰器一致的调用签名"""
def
decorator
(
func
):
"""直接返回原函数,确保无retry依赖时代码仍可运行"""
return
func
return
decorator
...
...
@@ -27,9 +29,17 @@ except ImportError:
class
LLMClient
:
"""
Minimal wrapper around the OpenAI-compatible chat completion API.
"""
"""
针对OpenAI Chat Completion API的轻量封装,统一Report Engine调用入口。
"""
def
__init__
(
self
,
api_key
:
str
,
model_name
:
str
,
base_url
:
Optional
[
str
]
=
None
):
"""
初始化LLM客户端并保存基础连接信息。
Args:
api_key: 用于鉴权的API Token
model_name: 具体模型ID,用于定位供应商能力
base_url: 自定义兼容接口地址,默认为OpenAI官方
"""
if
not
api_key
:
raise
ValueError
(
"Report Engine LLM API key is required."
)
if
not
model_name
:
...
...
@@ -55,6 +65,17 @@ class LLMClient:
@with_retry
(
LLM_RETRY_CONFIG
)
def
invoke
(
self
,
system_prompt
:
str
,
user_prompt
:
str
,
**
kwargs
)
->
str
:
"""
以非流式方式调用LLM,并返回一次性完成的完整响应。
Args:
system_prompt: 系统角色提示
user_prompt: 用户高优先级指令
**kwargs: 允许透传temperature/top_p等采样参数
Returns:
去除首尾空白后的LLM响应文本
"""
messages
=
[
{
"role"
:
"system"
,
"content"
:
system_prompt
},
{
"role"
:
"user"
,
"content"
:
user_prompt
},
...
...
@@ -142,11 +163,13 @@ class LLMClient:
@staticmethod
def
validate_response
(
response
:
Optional
[
str
])
->
str
:
"""兜底处理None/空白字符串,防止上层逻辑崩溃"""
if
response
is
None
:
return
""
return
response
.
strip
()
def
get_model_info
(
self
)
->
Dict
[
str
,
Any
]:
"""以字典形式返回当前客户端的模型/提供方/基础URL信息"""
return
{
"provider"
:
self
.
provider
,
"model"
:
self
.
model_name
,
...
...
ReportEngine/nodes/chapter_generation_node.py
View file @
3e4aa63
...
...
@@ -34,6 +34,14 @@ class ChapterGenerationNode(BaseNode):
_COLON_EQUALS_PATTERN
=
re
.
compile
(
r'(":
\
s*)='
)
def
__init__
(
self
,
llm_client
,
validator
:
IRValidator
,
storage
:
ChapterStorage
):
"""
记录LLM客户端/校验器/章节存储器,便于run方法调度。
Args:
llm_client: 实际调用大模型的客户端
validator: IR结构校验器
storage: 负责章节流式落盘的存储器
"""
super
()
.
__init__
(
llm_client
,
"ChapterGenerationNode"
)
self
.
validator
=
validator
self
.
storage
=
storage
...
...
@@ -385,6 +393,7 @@ class ChapterGenerationNode(BaseNode):
"""修正常见的结构性错误(例如list.items嵌套过深)"""
def
walk
(
blocks
:
List
[
Dict
[
str
,
Any
]]
|
None
):
"""递归检查并修复嵌套结构,保证每个block合法"""
if
not
isinstance
(
blocks
,
list
):
return
for
block
in
blocks
:
...
...
@@ -485,6 +494,7 @@ class ChapterGenerationNode(BaseNode):
@staticmethod
def
_as_paragraph_block
(
text
:
str
)
->
Dict
[
str
,
Any
]:
"""将字符串快速包装成paragraph block,方便统一处理"""
return
{
"type"
:
"paragraph"
,
"inlines"
:
[{
"text"
:
text
or
""
}],
...
...
ReportEngine/nodes/document_layout_node.py
View file @
3e4aa63
...
...
@@ -21,6 +21,7 @@ class DocumentLayoutNode(BaseNode):
"""负责生成全局标题、目录与Hero设计"""
def
__init__
(
self
,
llm_client
):
"""记录LLM客户端并设置节点名字,供BaseNode日志使用"""
super
()
.
__init__
(
llm_client
,
"DocumentLayoutNode"
)
def
run
(
...
...
ReportEngine/nodes/word_budget_node.py
View file @
3e4aa63
...
...
@@ -21,6 +21,7 @@ class WordBudgetNode(BaseNode):
"""规划各章节字数与重点"""
def
__init__
(
self
,
llm_client
):
"""仅记录LLM客户端引用,方便run阶段发起请求"""
super
()
.
__init__
(
llm_client
,
"WordBudgetNode"
)
def
run
(
...
...
ReportEngine/renderers/html_renderer.py
View file @
3e4aa63
...
...
@@ -13,6 +13,7 @@ class HTMLRenderer:
"""Document IR → HTML 渲染器"""
def
__init__
(
self
,
config
:
Dict
[
str
,
Any
]
|
None
=
None
):
"""初始化渲染器缓存并允许注入额外配置(如主题覆盖)"""
self
.
config
=
config
or
{}
self
.
document
:
Dict
[
str
,
Any
]
=
{}
self
.
widget_scripts
:
List
[
str
]
=
[]
...
...
ReportEngine/utils/config.py
View file @
3e4aa63
"""
Configuration management module for the Report Engine.
Report Engine 配置模块,统一读取环境变量并提供类型安全的访问方式。
"""
import
os
...
...
@@ -34,6 +34,7 @@ class Settings(BaseSettings):
CHART_STYLE
:
str
=
Field
(
"modern"
,
description
=
"图表样式:modern/classic/"
)
class
Config
:
"""Pydantic配置:允许从.env读取并兼容大小写"""
env_file
=
".env"
env_prefix
=
""
case_sensitive
=
False
...
...
@@ -43,6 +44,7 @@ settings = Settings()
def
print_config
(
config
:
Settings
):
"""将当前配置项按人类可读格式输出到日志,方便排障"""
message
=
""
message
+=
"
\n
=== Report Engine 配置 ===
\n
"
message
+=
f
"LLM 模型: {config.REPORT_ENGINE_MODEL_NAME}
\n
"
...
...
Please
register
or
login
to post a comment