马一丁

Add Comments

@@ -35,6 +35,7 @@ class FileCountBaseline: @@ -35,6 +35,7 @@ class FileCountBaseline:
35 """文件数量基准管理器""" 35 """文件数量基准管理器"""
36 36
37 def __init__(self): 37 def __init__(self):
  38 + """在初始化阶段加载或创建文件数量基准快照"""
38 self.baseline_file = 'logs/report_baseline.json' 39 self.baseline_file = 'logs/report_baseline.json'
39 self.baseline_data = self._load_baseline() 40 self.baseline_data = self._load_baseline()
40 41
@@ -29,6 +29,7 @@ class ChapterRecord: @@ -29,6 +29,7 @@ class ChapterRecord:
29 updated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z") 29 updated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
30 30
31 def to_dict(self) -> Dict[str, object]: 31 def to_dict(self) -> Dict[str, object]:
  32 + """将记录转换为便于写入manifest.json的序列化字典"""
32 return { 33 return {
33 "chapterId": self.chapter_id, 34 "chapterId": self.chapter_id,
34 "slug": self.slug, 35 "slug": self.slug,
@@ -54,6 +55,12 @@ class ChapterStorage: @@ -54,6 +55,12 @@ class ChapterStorage:
54 """ 55 """
55 56
56 def __init__(self, base_dir: str): 57 def __init__(self, base_dir: str):
  58 + """
  59 + 创建章节存储器。
  60 +
  61 + Args:
  62 + base_dir: 所有输出run目录的根路径
  63 + """
57 self.base_dir = Path(base_dir) 64 self.base_dir = Path(base_dir)
58 self.base_dir.mkdir(parents=True, exist_ok=True) 65 self.base_dir.mkdir(parents=True, exist_ok=True)
59 self._manifests: Dict[str, Dict[str, object]] = {} 66 self._manifests: Dict[str, Dict[str, object]] = {}
@@ -133,6 +140,7 @@ class ChapterStorage: @@ -133,6 +140,7 @@ class ChapterStorage:
133 return final_path 140 return final_path
134 141
135 def load_chapters(self, run_dir: Path) -> List[Dict[str, object]]: 142 def load_chapters(self, run_dir: Path) -> List[Dict[str, object]]:
  143 + """从指定run目录读取全部chapter.json并按order排序返回"""
136 payloads: List[Dict[str, object]] = [] 144 payloads: List[Dict[str, object]] = []
137 for child in sorted(run_dir.iterdir()): 145 for child in sorted(run_dir.iterdir()):
138 if not child.is_dir(): 146 if not child.is_dir():
@@ -161,6 +169,7 @@ class ChapterStorage: @@ -161,6 +169,7 @@ class ChapterStorage:
161 # ======== 内部工具 ======== 169 # ======== 内部工具 ========
162 170
163 def _chapter_dir(self, run_dir: Path, slug: str, order: int) -> Path: 171 def _chapter_dir(self, run_dir: Path, slug: str, order: int) -> Path:
  172 + """根据slug/order生成稳定的章节目录,确保各章分隔存盘"""
164 safe_slug = self._safe_slug(slug) 173 safe_slug = self._safe_slug(slug)
165 folder = f"{order:03d}-{safe_slug}" 174 folder = f"{order:03d}-{safe_slug}"
166 path = run_dir / folder 175 path = run_dir / folder
@@ -168,25 +177,31 @@ class ChapterStorage: @@ -168,25 +177,31 @@ class ChapterStorage:
168 return path 177 return path
169 178
170 def _safe_slug(self, slug: str) -> str: 179 def _safe_slug(self, slug: str) -> str:
  180 + """移除危险字符,避免生成非法文件夹名"""
171 slug = slug.replace(" ", "-").replace("/", "-") 181 slug = slug.replace(" ", "-").replace("/", "-")
172 return slug or "section" 182 return slug or "section"
173 183
174 def _raw_stream_path(self, chapter_dir: Path) -> Path: 184 def _raw_stream_path(self, chapter_dir: Path) -> Path:
  185 + """返回某章节流式输出对应的raw文件路径"""
175 return chapter_dir / "stream.raw" 186 return chapter_dir / "stream.raw"
176 187
177 def _key(self, run_dir: Path) -> str: 188 def _key(self, run_dir: Path) -> str:
  189 + """将run目录解析为字典缓存的键,避免重复读取磁盘"""
178 return str(run_dir.resolve()) 190 return str(run_dir.resolve())
179 191
180 def _manifest_path(self, run_dir: Path) -> Path: 192 def _manifest_path(self, run_dir: Path) -> Path:
  193 + """获取manifest.json的实际文件路径"""
181 return run_dir / "manifest.json" 194 return run_dir / "manifest.json"
182 195
183 def _write_manifest(self, run_dir: Path, manifest: Dict[str, object]): 196 def _write_manifest(self, run_dir: Path, manifest: Dict[str, object]):
  197 + """将内存中的manifest快照全量写回磁盘"""
184 self._manifest_path(run_dir).write_text( 198 self._manifest_path(run_dir).write_text(
185 json.dumps(manifest, ensure_ascii=False, indent=2), 199 json.dumps(manifest, ensure_ascii=False, indent=2),
186 encoding="utf-8", 200 encoding="utf-8",
187 ) 201 )
188 202
189 def _read_manifest(self, run_dir: Path) -> Dict[str, object]: 203 def _read_manifest(self, run_dir: Path) -> Dict[str, object]:
  204 + """从磁盘读取已有manifest,用于进程重启或多实例协作"""
190 manifest_path = self._manifest_path(run_dir) 205 manifest_path = self._manifest_path(run_dir)
191 if manifest_path.exists(): 206 if manifest_path.exists():
192 return json.loads(manifest_path.read_text(encoding="utf-8")) 207 return json.loads(manifest_path.read_text(encoding="utf-8"))
@@ -16,6 +16,7 @@ class DocumentComposer: @@ -16,6 +16,7 @@ class DocumentComposer:
16 """ 16 """
17 17
18 def __init__(self): 18 def __init__(self):
  19 + """初始化装订器并记录已使用的锚点,避免重复"""
19 self._seen_anchors: Set[str] = set() 20 self._seen_anchors: Set[str] = set()
20 21
21 def build_document( 22 def build_document(
@@ -30,6 +30,7 @@ class TemplateSection: @@ -30,6 +30,7 @@ class TemplateSection:
30 outline: List[str] = field(default_factory=list) 30 outline: List[str] = field(default_factory=list)
31 31
32 def to_dict(self) -> dict: 32 def to_dict(self) -> dict:
  33 + """将章节实体序列化为字典,方便传给LLM或落盘"""
33 return { 34 return {
34 "title": self.title, 35 "title": self.title,
35 "slug": self.slug, 36 "slug": self.slug,
@@ -185,6 +186,7 @@ def _build_slug(number: str, title: str) -> str: @@ -185,6 +186,7 @@ def _build_slug(number: str, title: str) -> str:
185 186
186 187
187 def _slugify_text(text: str) -> str: 188 def _slugify_text(text: str) -> str:
  189 + """对任意文本做降噪与转写,得到URL友好的slug片段"""
188 text = unicodedata.normalize("NFKD", text) 190 text = unicodedata.normalize("NFKD", text)
189 text = text.replace("·", "-").replace(" ", "-") 191 text = text.replace("·", "-").replace(" ", "-")
190 text = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff-]+", "-", text) 192 text = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff-]+", "-", text)
@@ -193,6 +195,7 @@ def _slugify_text(text: str) -> str: @@ -193,6 +195,7 @@ def _slugify_text(text: str) -> str:
193 195
194 196
195 def _ensure_unique_slug(slug: str, used: set) -> str: 197 def _ensure_unique_slug(slug: str, used: set) -> str:
  198 + """若slug重复则自动追加序号,直到在used集合中唯一"""
196 if slug not in used: 199 if slug not in used:
197 used.add(slug) 200 used.add(slug)
198 return slug 201 return slug
@@ -40,6 +40,14 @@ class ReportTask: @@ -40,6 +40,14 @@ class ReportTask:
40 """报告生成任务""" 40 """报告生成任务"""
41 41
42 def __init__(self, query: str, task_id: str, custom_template: str = ""): 42 def __init__(self, query: str, task_id: str, custom_template: str = ""):
  43 + """
  44 + 初始化任务对象,记录查询词、自定义模板与运行期元数据。
  45 +
  46 + Args:
  47 + query: 最终需要生成的报告主题
  48 + task_id: 任务唯一ID,通常由时间戳构造
  49 + custom_template: 可选的自定义Markdown模板
  50 + """
43 self.task_id = task_id 51 self.task_id = task_id
44 self.query = query 52 self.query = query
45 self.custom_template = custom_template 53 self.custom_template = custom_template
@@ -470,6 +478,7 @@ def get_templates(): @@ -470,6 +478,7 @@ def get_templates():
470 # 错误处理 478 # 错误处理
471 @report_bp.errorhandler(404) 479 @report_bp.errorhandler(404)
472 def not_found(error): 480 def not_found(error):
  481 + """404兜底处理:保证接口统一返回JSON结构"""
473 logger.exception(f"API端点不存在: {str(error)}") 482 logger.exception(f"API端点不存在: {str(error)}")
474 return jsonify({ 483 return jsonify({
475 'success': False, 484 'success': False,
@@ -479,6 +488,7 @@ def not_found(error): @@ -479,6 +488,7 @@ def not_found(error):
479 488
480 @report_bp.errorhandler(500) 489 @report_bp.errorhandler(500)
481 def internal_error(error): 490 def internal_error(error):
  491 + """500兜底处理:捕获未被主动捕获的异常"""
482 logger.exception(f"服务器内部错误: {str(error)}") 492 logger.exception(f"服务器内部错误: {str(error)}")
483 return jsonify({ 493 return jsonify({
484 'success': False, 494 'success': False,
@@ -23,6 +23,7 @@ class IRValidator: @@ -23,6 +23,7 @@ class IRValidator:
23 """ 23 """
24 24
25 def __init__(self, schema_version: str = IR_VERSION): 25 def __init__(self, schema_version: str = IR_VERSION):
  26 + """记录当前Schema版本,便于未来多版本并存"""
26 self.schema_version = schema_version 27 self.schema_version = schema_version
27 28
28 # ======== 对外接口 ======== 29 # ======== 对外接口 ========
1 """ 1 """
2 -Unified OpenAI-compatible LLM client for the Report Engine, with retry support. 2 +Report Engine 默认的OpenAI兼容LLM客户端封装,内置重试/流式能力。
3 """ 3 """
4 4
5 import os 5 import os
@@ -19,7 +19,9 @@ try: @@ -19,7 +19,9 @@ try:
19 from retry_helper import with_retry, LLM_RETRY_CONFIG 19 from retry_helper import with_retry, LLM_RETRY_CONFIG
20 except ImportError: 20 except ImportError:
21 def with_retry(config=None): 21 def with_retry(config=None):
  22 + """简化版with_retry占位,实现与真实装饰器一致的调用签名"""
22 def decorator(func): 23 def decorator(func):
  24 + """直接返回原函数,确保无retry依赖时代码仍可运行"""
23 return func 25 return func
24 return decorator 26 return decorator
25 27
@@ -27,9 +29,17 @@ except ImportError: @@ -27,9 +29,17 @@ except ImportError:
27 29
28 30
29 class LLMClient: 31 class LLMClient:
30 - """Minimal wrapper around the OpenAI-compatible chat completion API.""" 32 + """针对OpenAI Chat Completion API的轻量封装,统一Report Engine调用入口。"""
31 33
32 def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None): 34 def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None):
  35 + """
  36 + 初始化LLM客户端并保存基础连接信息。
  37 +
  38 + Args:
  39 + api_key: 用于鉴权的API Token
  40 + model_name: 具体模型ID,用于定位供应商能力
  41 + base_url: 自定义兼容接口地址,默认为OpenAI官方
  42 + """
33 if not api_key: 43 if not api_key:
34 raise ValueError("Report Engine LLM API key is required.") 44 raise ValueError("Report Engine LLM API key is required.")
35 if not model_name: 45 if not model_name:
@@ -55,6 +65,17 @@ class LLMClient: @@ -55,6 +65,17 @@ class LLMClient:
55 65
56 @with_retry(LLM_RETRY_CONFIG) 66 @with_retry(LLM_RETRY_CONFIG)
57 def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str: 67 def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
  68 + """
  69 + 以非流式方式调用LLM,并返回一次性完成的完整响应。
  70 +
  71 + Args:
  72 + system_prompt: 系统角色提示
  73 + user_prompt: 用户高优先级指令
  74 + **kwargs: 允许透传temperature/top_p等采样参数
  75 +
  76 + Returns:
  77 + 去除首尾空白后的LLM响应文本
  78 + """
58 messages = [ 79 messages = [
59 {"role": "system", "content": system_prompt}, 80 {"role": "system", "content": system_prompt},
60 {"role": "user", "content": user_prompt}, 81 {"role": "user", "content": user_prompt},
@@ -142,11 +163,13 @@ class LLMClient: @@ -142,11 +163,13 @@ class LLMClient:
142 163
143 @staticmethod 164 @staticmethod
144 def validate_response(response: Optional[str]) -> str: 165 def validate_response(response: Optional[str]) -> str:
  166 + """兜底处理None/空白字符串,防止上层逻辑崩溃"""
145 if response is None: 167 if response is None:
146 return "" 168 return ""
147 return response.strip() 169 return response.strip()
148 170
149 def get_model_info(self) -> Dict[str, Any]: 171 def get_model_info(self) -> Dict[str, Any]:
  172 + """以字典形式返回当前客户端的模型/提供方/基础URL信息"""
150 return { 173 return {
151 "provider": self.provider, 174 "provider": self.provider,
152 "model": self.model_name, 175 "model": self.model_name,
@@ -34,6 +34,14 @@ class ChapterGenerationNode(BaseNode): @@ -34,6 +34,14 @@ class ChapterGenerationNode(BaseNode):
34 _COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=') 34 _COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=')
35 35
36 def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage): 36 def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage):
  37 + """
  38 + 记录LLM客户端/校验器/章节存储器,便于run方法调度。
  39 +
  40 + Args:
  41 + llm_client: 实际调用大模型的客户端
  42 + validator: IR结构校验器
  43 + storage: 负责章节流式落盘的存储器
  44 + """
37 super().__init__(llm_client, "ChapterGenerationNode") 45 super().__init__(llm_client, "ChapterGenerationNode")
38 self.validator = validator 46 self.validator = validator
39 self.storage = storage 47 self.storage = storage
@@ -385,6 +393,7 @@ class ChapterGenerationNode(BaseNode): @@ -385,6 +393,7 @@ class ChapterGenerationNode(BaseNode):
385 """修正常见的结构性错误(例如list.items嵌套过深)""" 393 """修正常见的结构性错误(例如list.items嵌套过深)"""
386 394
387 def walk(blocks: List[Dict[str, Any]] | None): 395 def walk(blocks: List[Dict[str, Any]] | None):
  396 + """递归检查并修复嵌套结构,保证每个block合法"""
388 if not isinstance(blocks, list): 397 if not isinstance(blocks, list):
389 return 398 return
390 for block in blocks: 399 for block in blocks:
@@ -485,6 +494,7 @@ class ChapterGenerationNode(BaseNode): @@ -485,6 +494,7 @@ class ChapterGenerationNode(BaseNode):
485 494
486 @staticmethod 495 @staticmethod
487 def _as_paragraph_block(text: str) -> Dict[str, Any]: 496 def _as_paragraph_block(text: str) -> Dict[str, Any]:
  497 + """将字符串快速包装成paragraph block,方便统一处理"""
488 return { 498 return {
489 "type": "paragraph", 499 "type": "paragraph",
490 "inlines": [{"text": text or ""}], 500 "inlines": [{"text": text or ""}],
@@ -21,6 +21,7 @@ class DocumentLayoutNode(BaseNode): @@ -21,6 +21,7 @@ class DocumentLayoutNode(BaseNode):
21 """负责生成全局标题、目录与Hero设计""" 21 """负责生成全局标题、目录与Hero设计"""
22 22
23 def __init__(self, llm_client): 23 def __init__(self, llm_client):
  24 + """记录LLM客户端并设置节点名字,供BaseNode日志使用"""
24 super().__init__(llm_client, "DocumentLayoutNode") 25 super().__init__(llm_client, "DocumentLayoutNode")
25 26
26 def run( 27 def run(
@@ -21,6 +21,7 @@ class WordBudgetNode(BaseNode): @@ -21,6 +21,7 @@ class WordBudgetNode(BaseNode):
21 """规划各章节字数与重点""" 21 """规划各章节字数与重点"""
22 22
23 def __init__(self, llm_client): 23 def __init__(self, llm_client):
  24 + """仅记录LLM客户端引用,方便run阶段发起请求"""
24 super().__init__(llm_client, "WordBudgetNode") 25 super().__init__(llm_client, "WordBudgetNode")
25 26
26 def run( 27 def run(
@@ -13,6 +13,7 @@ class HTMLRenderer: @@ -13,6 +13,7 @@ class HTMLRenderer:
13 """Document IR → HTML 渲染器""" 13 """Document IR → HTML 渲染器"""
14 14
15 def __init__(self, config: Dict[str, Any] | None = None): 15 def __init__(self, config: Dict[str, Any] | None = None):
  16 + """初始化渲染器缓存并允许注入额外配置(如主题覆盖)"""
16 self.config = config or {} 17 self.config = config or {}
17 self.document: Dict[str, Any] = {} 18 self.document: Dict[str, Any] = {}
18 self.widget_scripts: List[str] = [] 19 self.widget_scripts: List[str] = []
1 """ 1 """
2 -Configuration management module for the Report Engine. 2 +Report Engine 配置模块,统一读取环境变量并提供类型安全的访问方式。
3 """ 3 """
4 4
5 import os 5 import os
@@ -34,6 +34,7 @@ class Settings(BaseSettings): @@ -34,6 +34,7 @@ class Settings(BaseSettings):
34 CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/") 34 CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/")
35 35
36 class Config: 36 class Config:
  37 + """Pydantic配置:允许从.env读取并兼容大小写"""
37 env_file = ".env" 38 env_file = ".env"
38 env_prefix = "" 39 env_prefix = ""
39 case_sensitive = False 40 case_sensitive = False
@@ -43,6 +44,7 @@ settings = Settings() @@ -43,6 +44,7 @@ settings = Settings()
43 44
44 45
45 def print_config(config: Settings): 46 def print_config(config: Settings):
  47 + """将当前配置项按人类可读格式输出到日志,方便排障"""
46 message = "" 48 message = ""
47 message += "\n=== Report Engine 配置 ===\n" 49 message += "\n=== Report Engine 配置 ===\n"
48 message += f"LLM 模型: {config.REPORT_ENGINE_MODEL_NAME}\n" 50 message += f"LLM 模型: {config.REPORT_ENGINE_MODEL_NAME}\n"