马一丁

Add Comments

... ... @@ -35,6 +35,7 @@ class FileCountBaseline:
"""文件数量基准管理器"""
def __init__(self):
"""在初始化阶段加载或创建文件数量基准快照"""
self.baseline_file = 'logs/report_baseline.json'
self.baseline_data = self._load_baseline()
... ...
... ... @@ -29,6 +29,7 @@ class ChapterRecord:
updated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
def to_dict(self) -> Dict[str, object]:
"""将记录转换为便于写入manifest.json的序列化字典"""
return {
"chapterId": self.chapter_id,
"slug": self.slug,
... ... @@ -54,6 +55,12 @@ class ChapterStorage:
"""
def __init__(self, base_dir: str):
"""
创建章节存储器。
Args:
base_dir: 所有输出run目录的根路径
"""
self.base_dir = Path(base_dir)
self.base_dir.mkdir(parents=True, exist_ok=True)
self._manifests: Dict[str, Dict[str, object]] = {}
... ... @@ -133,6 +140,7 @@ class ChapterStorage:
return final_path
def load_chapters(self, run_dir: Path) -> List[Dict[str, object]]:
"""从指定run目录读取全部chapter.json并按order排序返回"""
payloads: List[Dict[str, object]] = []
for child in sorted(run_dir.iterdir()):
if not child.is_dir():
... ... @@ -161,6 +169,7 @@ class ChapterStorage:
# ======== 内部工具 ========
def _chapter_dir(self, run_dir: Path, slug: str, order: int) -> Path:
"""根据slug/order生成稳定的章节目录,确保各章分隔存盘"""
safe_slug = self._safe_slug(slug)
folder = f"{order:03d}-{safe_slug}"
path = run_dir / folder
... ... @@ -168,25 +177,31 @@ class ChapterStorage:
return path
def _safe_slug(self, slug: str) -> str:
"""移除危险字符,避免生成非法文件夹名"""
slug = slug.replace(" ", "-").replace("/", "-")
return slug or "section"
def _raw_stream_path(self, chapter_dir: Path) -> Path:
"""返回某章节流式输出对应的raw文件路径"""
return chapter_dir / "stream.raw"
def _key(self, run_dir: Path) -> str:
"""将run目录解析为字典缓存的键,避免重复读取磁盘"""
return str(run_dir.resolve())
def _manifest_path(self, run_dir: Path) -> Path:
"""获取manifest.json的实际文件路径"""
return run_dir / "manifest.json"
def _write_manifest(self, run_dir: Path, manifest: Dict[str, object]):
"""将内存中的manifest快照全量写回磁盘"""
self._manifest_path(run_dir).write_text(
json.dumps(manifest, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def _read_manifest(self, run_dir: Path) -> Dict[str, object]:
"""从磁盘读取已有manifest,用于进程重启或多实例协作"""
manifest_path = self._manifest_path(run_dir)
if manifest_path.exists():
return json.loads(manifest_path.read_text(encoding="utf-8"))
... ...
... ... @@ -16,6 +16,7 @@ class DocumentComposer:
"""
def __init__(self):
"""初始化装订器并记录已使用的锚点,避免重复"""
self._seen_anchors: Set[str] = set()
def build_document(
... ...
... ... @@ -30,6 +30,7 @@ class TemplateSection:
outline: List[str] = field(default_factory=list)
def to_dict(self) -> dict:
"""将章节实体序列化为字典,方便传给LLM或落盘"""
return {
"title": self.title,
"slug": self.slug,
... ... @@ -185,6 +186,7 @@ def _build_slug(number: str, title: str) -> str:
def _slugify_text(text: str) -> str:
"""对任意文本做降噪与转写,得到URL友好的slug片段"""
text = unicodedata.normalize("NFKD", text)
text = text.replace("·", "-").replace(" ", "-")
text = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff-]+", "-", text)
... ... @@ -193,6 +195,7 @@ def _slugify_text(text: str) -> str:
def _ensure_unique_slug(slug: str, used: set) -> str:
"""若slug重复则自动追加序号,直到在used集合中唯一"""
if slug not in used:
used.add(slug)
return slug
... ...
... ... @@ -40,6 +40,14 @@ class ReportTask:
"""报告生成任务"""
def __init__(self, query: str, task_id: str, custom_template: str = ""):
"""
初始化任务对象,记录查询词、自定义模板与运行期元数据。
Args:
query: 最终需要生成的报告主题
task_id: 任务唯一ID,通常由时间戳构造
custom_template: 可选的自定义Markdown模板
"""
self.task_id = task_id
self.query = query
self.custom_template = custom_template
... ... @@ -470,6 +478,7 @@ def get_templates():
# 错误处理
@report_bp.errorhandler(404)
def not_found(error):
"""404兜底处理:保证接口统一返回JSON结构"""
logger.exception(f"API端点不存在: {str(error)}")
return jsonify({
'success': False,
... ... @@ -479,6 +488,7 @@ def not_found(error):
@report_bp.errorhandler(500)
def internal_error(error):
"""500兜底处理:捕获未被主动捕获的异常"""
logger.exception(f"服务器内部错误: {str(error)}")
return jsonify({
'success': False,
... ...
... ... @@ -23,6 +23,7 @@ class IRValidator:
"""
def __init__(self, schema_version: str = IR_VERSION):
"""记录当前Schema版本,便于未来多版本并存"""
self.schema_version = schema_version
# ======== 对外接口 ========
... ...
"""
Unified OpenAI-compatible LLM client for the Report Engine, with retry support.
Report Engine 默认的OpenAI兼容LLM客户端封装,内置重试/流式能力。
"""
import os
... ... @@ -19,7 +19,9 @@ try:
from retry_helper import with_retry, LLM_RETRY_CONFIG
except ImportError:
def with_retry(config=None):
"""简化版with_retry占位,实现与真实装饰器一致的调用签名"""
def decorator(func):
"""直接返回原函数,确保无retry依赖时代码仍可运行"""
return func
return decorator
... ... @@ -27,9 +29,17 @@ except ImportError:
class LLMClient:
"""Minimal wrapper around the OpenAI-compatible chat completion API."""
"""针对OpenAI Chat Completion API的轻量封装,统一Report Engine调用入口。"""
def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None):
"""
初始化LLM客户端并保存基础连接信息。
Args:
api_key: 用于鉴权的API Token
model_name: 具体模型ID,用于定位供应商能力
base_url: 自定义兼容接口地址,默认为OpenAI官方
"""
if not api_key:
raise ValueError("Report Engine LLM API key is required.")
if not model_name:
... ... @@ -55,6 +65,17 @@ class LLMClient:
@with_retry(LLM_RETRY_CONFIG)
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
"""
以非流式方式调用LLM,并返回一次性完成的完整响应。
Args:
system_prompt: 系统角色提示
user_prompt: 用户高优先级指令
**kwargs: 允许透传temperature/top_p等采样参数
Returns:
去除首尾空白后的LLM响应文本
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
... ... @@ -142,11 +163,13 @@ class LLMClient:
@staticmethod
def validate_response(response: Optional[str]) -> str:
"""兜底处理None/空白字符串,防止上层逻辑崩溃"""
if response is None:
return ""
return response.strip()
def get_model_info(self) -> Dict[str, Any]:
"""以字典形式返回当前客户端的模型/提供方/基础URL信息"""
return {
"provider": self.provider,
"model": self.model_name,
... ...
... ... @@ -34,6 +34,14 @@ class ChapterGenerationNode(BaseNode):
_COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=')
def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage):
"""
记录LLM客户端/校验器/章节存储器,便于run方法调度。
Args:
llm_client: 实际调用大模型的客户端
validator: IR结构校验器
storage: 负责章节流式落盘的存储器
"""
super().__init__(llm_client, "ChapterGenerationNode")
self.validator = validator
self.storage = storage
... ... @@ -385,6 +393,7 @@ class ChapterGenerationNode(BaseNode):
"""修正常见的结构性错误(例如list.items嵌套过深)"""
def walk(blocks: List[Dict[str, Any]] | None):
"""递归检查并修复嵌套结构,保证每个block合法"""
if not isinstance(blocks, list):
return
for block in blocks:
... ... @@ -485,6 +494,7 @@ class ChapterGenerationNode(BaseNode):
@staticmethod
def _as_paragraph_block(text: str) -> Dict[str, Any]:
"""将字符串快速包装成paragraph block,方便统一处理"""
return {
"type": "paragraph",
"inlines": [{"text": text or ""}],
... ...
... ... @@ -21,6 +21,7 @@ class DocumentLayoutNode(BaseNode):
"""负责生成全局标题、目录与Hero设计"""
def __init__(self, llm_client):
"""记录LLM客户端并设置节点名字,供BaseNode日志使用"""
super().__init__(llm_client, "DocumentLayoutNode")
def run(
... ...
... ... @@ -21,6 +21,7 @@ class WordBudgetNode(BaseNode):
"""规划各章节字数与重点"""
def __init__(self, llm_client):
"""仅记录LLM客户端引用,方便run阶段发起请求"""
super().__init__(llm_client, "WordBudgetNode")
def run(
... ...
... ... @@ -13,6 +13,7 @@ class HTMLRenderer:
"""Document IR → HTML 渲染器"""
def __init__(self, config: Dict[str, Any] | None = None):
"""初始化渲染器缓存并允许注入额外配置(如主题覆盖)"""
self.config = config or {}
self.document: Dict[str, Any] = {}
self.widget_scripts: List[str] = []
... ...
"""
Configuration management module for the Report Engine.
Report Engine 配置模块,统一读取环境变量并提供类型安全的访问方式。
"""
import os
... ... @@ -34,6 +34,7 @@ class Settings(BaseSettings):
CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/")
class Config:
"""Pydantic配置:允许从.env读取并兼容大小写"""
env_file = ".env"
env_prefix = ""
case_sensitive = False
... ... @@ -43,6 +44,7 @@ settings = Settings()
def print_config(config: Settings):
"""将当前配置项按人类可读格式输出到日志,方便排障"""
message = ""
message += "\n=== Report Engine 配置 ===\n"
message += f"LLM 模型: {config.REPORT_ENGINE_MODEL_NAME}\n"
... ...