Showing
12 changed files
with
72 additions
and
3 deletions
| @@ -35,6 +35,7 @@ class FileCountBaseline: | @@ -35,6 +35,7 @@ class FileCountBaseline: | ||
| 35 | """文件数量基准管理器""" | 35 | """文件数量基准管理器""" |
| 36 | 36 | ||
| 37 | def __init__(self): | 37 | def __init__(self): |
| 38 | + """在初始化阶段加载或创建文件数量基准快照""" | ||
| 38 | self.baseline_file = 'logs/report_baseline.json' | 39 | self.baseline_file = 'logs/report_baseline.json' |
| 39 | self.baseline_data = self._load_baseline() | 40 | self.baseline_data = self._load_baseline() |
| 40 | 41 |
| @@ -29,6 +29,7 @@ class ChapterRecord: | @@ -29,6 +29,7 @@ class ChapterRecord: | ||
| 29 | updated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z") | 29 | updated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z") |
| 30 | 30 | ||
| 31 | def to_dict(self) -> Dict[str, object]: | 31 | def to_dict(self) -> Dict[str, object]: |
| 32 | + """将记录转换为便于写入manifest.json的序列化字典""" | ||
| 32 | return { | 33 | return { |
| 33 | "chapterId": self.chapter_id, | 34 | "chapterId": self.chapter_id, |
| 34 | "slug": self.slug, | 35 | "slug": self.slug, |
| @@ -54,6 +55,12 @@ class ChapterStorage: | @@ -54,6 +55,12 @@ class ChapterStorage: | ||
| 54 | """ | 55 | """ |
| 55 | 56 | ||
| 56 | def __init__(self, base_dir: str): | 57 | def __init__(self, base_dir: str): |
| 58 | + """ | ||
| 59 | + 创建章节存储器。 | ||
| 60 | + | ||
| 61 | + Args: | ||
| 62 | + base_dir: 所有输出run目录的根路径 | ||
| 63 | + """ | ||
| 57 | self.base_dir = Path(base_dir) | 64 | self.base_dir = Path(base_dir) |
| 58 | self.base_dir.mkdir(parents=True, exist_ok=True) | 65 | self.base_dir.mkdir(parents=True, exist_ok=True) |
| 59 | self._manifests: Dict[str, Dict[str, object]] = {} | 66 | self._manifests: Dict[str, Dict[str, object]] = {} |
| @@ -133,6 +140,7 @@ class ChapterStorage: | @@ -133,6 +140,7 @@ class ChapterStorage: | ||
| 133 | return final_path | 140 | return final_path |
| 134 | 141 | ||
| 135 | def load_chapters(self, run_dir: Path) -> List[Dict[str, object]]: | 142 | def load_chapters(self, run_dir: Path) -> List[Dict[str, object]]: |
| 143 | + """从指定run目录读取全部chapter.json并按order排序返回""" | ||
| 136 | payloads: List[Dict[str, object]] = [] | 144 | payloads: List[Dict[str, object]] = [] |
| 137 | for child in sorted(run_dir.iterdir()): | 145 | for child in sorted(run_dir.iterdir()): |
| 138 | if not child.is_dir(): | 146 | if not child.is_dir(): |
| @@ -161,6 +169,7 @@ class ChapterStorage: | @@ -161,6 +169,7 @@ class ChapterStorage: | ||
| 161 | # ======== 内部工具 ======== | 169 | # ======== 内部工具 ======== |
| 162 | 170 | ||
| 163 | def _chapter_dir(self, run_dir: Path, slug: str, order: int) -> Path: | 171 | def _chapter_dir(self, run_dir: Path, slug: str, order: int) -> Path: |
| 172 | + """根据slug/order生成稳定的章节目录,确保各章分隔存盘""" | ||
| 164 | safe_slug = self._safe_slug(slug) | 173 | safe_slug = self._safe_slug(slug) |
| 165 | folder = f"{order:03d}-{safe_slug}" | 174 | folder = f"{order:03d}-{safe_slug}" |
| 166 | path = run_dir / folder | 175 | path = run_dir / folder |
| @@ -168,25 +177,31 @@ class ChapterStorage: | @@ -168,25 +177,31 @@ class ChapterStorage: | ||
| 168 | return path | 177 | return path |
| 169 | 178 | ||
| 170 | def _safe_slug(self, slug: str) -> str: | 179 | def _safe_slug(self, slug: str) -> str: |
| 180 | + """移除危险字符,避免生成非法文件夹名""" | ||
| 171 | slug = slug.replace(" ", "-").replace("/", "-") | 181 | slug = slug.replace(" ", "-").replace("/", "-") |
| 172 | return slug or "section" | 182 | return slug or "section" |
| 173 | 183 | ||
| 174 | def _raw_stream_path(self, chapter_dir: Path) -> Path: | 184 | def _raw_stream_path(self, chapter_dir: Path) -> Path: |
| 185 | + """返回某章节流式输出对应的raw文件路径""" | ||
| 175 | return chapter_dir / "stream.raw" | 186 | return chapter_dir / "stream.raw" |
| 176 | 187 | ||
| 177 | def _key(self, run_dir: Path) -> str: | 188 | def _key(self, run_dir: Path) -> str: |
| 189 | + """将run目录解析为字典缓存的键,避免重复读取磁盘""" | ||
| 178 | return str(run_dir.resolve()) | 190 | return str(run_dir.resolve()) |
| 179 | 191 | ||
| 180 | def _manifest_path(self, run_dir: Path) -> Path: | 192 | def _manifest_path(self, run_dir: Path) -> Path: |
| 193 | + """获取manifest.json的实际文件路径""" | ||
| 181 | return run_dir / "manifest.json" | 194 | return run_dir / "manifest.json" |
| 182 | 195 | ||
| 183 | def _write_manifest(self, run_dir: Path, manifest: Dict[str, object]): | 196 | def _write_manifest(self, run_dir: Path, manifest: Dict[str, object]): |
| 197 | + """将内存中的manifest快照全量写回磁盘""" | ||
| 184 | self._manifest_path(run_dir).write_text( | 198 | self._manifest_path(run_dir).write_text( |
| 185 | json.dumps(manifest, ensure_ascii=False, indent=2), | 199 | json.dumps(manifest, ensure_ascii=False, indent=2), |
| 186 | encoding="utf-8", | 200 | encoding="utf-8", |
| 187 | ) | 201 | ) |
| 188 | 202 | ||
| 189 | def _read_manifest(self, run_dir: Path) -> Dict[str, object]: | 203 | def _read_manifest(self, run_dir: Path) -> Dict[str, object]: |
| 204 | + """从磁盘读取已有manifest,用于进程重启或多实例协作""" | ||
| 190 | manifest_path = self._manifest_path(run_dir) | 205 | manifest_path = self._manifest_path(run_dir) |
| 191 | if manifest_path.exists(): | 206 | if manifest_path.exists(): |
| 192 | return json.loads(manifest_path.read_text(encoding="utf-8")) | 207 | return json.loads(manifest_path.read_text(encoding="utf-8")) |
| @@ -16,6 +16,7 @@ class DocumentComposer: | @@ -16,6 +16,7 @@ class DocumentComposer: | ||
| 16 | """ | 16 | """ |
| 17 | 17 | ||
| 18 | def __init__(self): | 18 | def __init__(self): |
| 19 | + """初始化装订器并记录已使用的锚点,避免重复""" | ||
| 19 | self._seen_anchors: Set[str] = set() | 20 | self._seen_anchors: Set[str] = set() |
| 20 | 21 | ||
| 21 | def build_document( | 22 | def build_document( |
| @@ -30,6 +30,7 @@ class TemplateSection: | @@ -30,6 +30,7 @@ class TemplateSection: | ||
| 30 | outline: List[str] = field(default_factory=list) | 30 | outline: List[str] = field(default_factory=list) |
| 31 | 31 | ||
| 32 | def to_dict(self) -> dict: | 32 | def to_dict(self) -> dict: |
| 33 | + """将章节实体序列化为字典,方便传给LLM或落盘""" | ||
| 33 | return { | 34 | return { |
| 34 | "title": self.title, | 35 | "title": self.title, |
| 35 | "slug": self.slug, | 36 | "slug": self.slug, |
| @@ -185,6 +186,7 @@ def _build_slug(number: str, title: str) -> str: | @@ -185,6 +186,7 @@ def _build_slug(number: str, title: str) -> str: | ||
| 185 | 186 | ||
| 186 | 187 | ||
| 187 | def _slugify_text(text: str) -> str: | 188 | def _slugify_text(text: str) -> str: |
| 189 | + """对任意文本做降噪与转写,得到URL友好的slug片段""" | ||
| 188 | text = unicodedata.normalize("NFKD", text) | 190 | text = unicodedata.normalize("NFKD", text) |
| 189 | text = text.replace("·", "-").replace(" ", "-") | 191 | text = text.replace("·", "-").replace(" ", "-") |
| 190 | text = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff-]+", "-", text) | 192 | text = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff-]+", "-", text) |
| @@ -193,6 +195,7 @@ def _slugify_text(text: str) -> str: | @@ -193,6 +195,7 @@ def _slugify_text(text: str) -> str: | ||
| 193 | 195 | ||
| 194 | 196 | ||
| 195 | def _ensure_unique_slug(slug: str, used: set) -> str: | 197 | def _ensure_unique_slug(slug: str, used: set) -> str: |
| 198 | + """若slug重复则自动追加序号,直到在used集合中唯一""" | ||
| 196 | if slug not in used: | 199 | if slug not in used: |
| 197 | used.add(slug) | 200 | used.add(slug) |
| 198 | return slug | 201 | return slug |
| @@ -40,6 +40,14 @@ class ReportTask: | @@ -40,6 +40,14 @@ class ReportTask: | ||
| 40 | """报告生成任务""" | 40 | """报告生成任务""" |
| 41 | 41 | ||
| 42 | def __init__(self, query: str, task_id: str, custom_template: str = ""): | 42 | def __init__(self, query: str, task_id: str, custom_template: str = ""): |
| 43 | + """ | ||
| 44 | + 初始化任务对象,记录查询词、自定义模板与运行期元数据。 | ||
| 45 | + | ||
| 46 | + Args: | ||
| 47 | + query: 最终需要生成的报告主题 | ||
| 48 | + task_id: 任务唯一ID,通常由时间戳构造 | ||
| 49 | + custom_template: 可选的自定义Markdown模板 | ||
| 50 | + """ | ||
| 43 | self.task_id = task_id | 51 | self.task_id = task_id |
| 44 | self.query = query | 52 | self.query = query |
| 45 | self.custom_template = custom_template | 53 | self.custom_template = custom_template |
| @@ -470,6 +478,7 @@ def get_templates(): | @@ -470,6 +478,7 @@ def get_templates(): | ||
| 470 | # 错误处理 | 478 | # 错误处理 |
| 471 | @report_bp.errorhandler(404) | 479 | @report_bp.errorhandler(404) |
| 472 | def not_found(error): | 480 | def not_found(error): |
| 481 | + """404兜底处理:保证接口统一返回JSON结构""" | ||
| 473 | logger.exception(f"API端点不存在: {str(error)}") | 482 | logger.exception(f"API端点不存在: {str(error)}") |
| 474 | return jsonify({ | 483 | return jsonify({ |
| 475 | 'success': False, | 484 | 'success': False, |
| @@ -479,6 +488,7 @@ def not_found(error): | @@ -479,6 +488,7 @@ def not_found(error): | ||
| 479 | 488 | ||
| 480 | @report_bp.errorhandler(500) | 489 | @report_bp.errorhandler(500) |
| 481 | def internal_error(error): | 490 | def internal_error(error): |
| 491 | + """500兜底处理:捕获未被主动捕获的异常""" | ||
| 482 | logger.exception(f"服务器内部错误: {str(error)}") | 492 | logger.exception(f"服务器内部错误: {str(error)}") |
| 483 | return jsonify({ | 493 | return jsonify({ |
| 484 | 'success': False, | 494 | 'success': False, |
| @@ -23,6 +23,7 @@ class IRValidator: | @@ -23,6 +23,7 @@ class IRValidator: | ||
| 23 | """ | 23 | """ |
| 24 | 24 | ||
| 25 | def __init__(self, schema_version: str = IR_VERSION): | 25 | def __init__(self, schema_version: str = IR_VERSION): |
| 26 | + """记录当前Schema版本,便于未来多版本并存""" | ||
| 26 | self.schema_version = schema_version | 27 | self.schema_version = schema_version |
| 27 | 28 | ||
| 28 | # ======== 对外接口 ======== | 29 | # ======== 对外接口 ======== |
| 1 | """ | 1 | """ |
| 2 | -Unified OpenAI-compatible LLM client for the Report Engine, with retry support. | 2 | +Report Engine 默认的OpenAI兼容LLM客户端封装,内置重试/流式能力。 |
| 3 | """ | 3 | """ |
| 4 | 4 | ||
| 5 | import os | 5 | import os |
| @@ -19,7 +19,9 @@ try: | @@ -19,7 +19,9 @@ try: | ||
| 19 | from retry_helper import with_retry, LLM_RETRY_CONFIG | 19 | from retry_helper import with_retry, LLM_RETRY_CONFIG |
| 20 | except ImportError: | 20 | except ImportError: |
| 21 | def with_retry(config=None): | 21 | def with_retry(config=None): |
| 22 | + """简化版with_retry占位,实现与真实装饰器一致的调用签名""" | ||
| 22 | def decorator(func): | 23 | def decorator(func): |
| 24 | + """直接返回原函数,确保无retry依赖时代码仍可运行""" | ||
| 23 | return func | 25 | return func |
| 24 | return decorator | 26 | return decorator |
| 25 | 27 | ||
| @@ -27,9 +29,17 @@ except ImportError: | @@ -27,9 +29,17 @@ except ImportError: | ||
| 27 | 29 | ||
| 28 | 30 | ||
| 29 | class LLMClient: | 31 | class LLMClient: |
| 30 | - """Minimal wrapper around the OpenAI-compatible chat completion API.""" | 32 | + """针对OpenAI Chat Completion API的轻量封装,统一Report Engine调用入口。""" |
| 31 | 33 | ||
| 32 | def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None): | 34 | def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None): |
| 35 | + """ | ||
| 36 | + 初始化LLM客户端并保存基础连接信息。 | ||
| 37 | + | ||
| 38 | + Args: | ||
| 39 | + api_key: 用于鉴权的API Token | ||
| 40 | + model_name: 具体模型ID,用于定位供应商能力 | ||
| 41 | + base_url: 自定义兼容接口地址,默认为OpenAI官方 | ||
| 42 | + """ | ||
| 33 | if not api_key: | 43 | if not api_key: |
| 34 | raise ValueError("Report Engine LLM API key is required.") | 44 | raise ValueError("Report Engine LLM API key is required.") |
| 35 | if not model_name: | 45 | if not model_name: |
| @@ -55,6 +65,17 @@ class LLMClient: | @@ -55,6 +65,17 @@ class LLMClient: | ||
| 55 | 65 | ||
| 56 | @with_retry(LLM_RETRY_CONFIG) | 66 | @with_retry(LLM_RETRY_CONFIG) |
| 57 | def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str: | 67 | def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str: |
| 68 | + """ | ||
| 69 | + 以非流式方式调用LLM,并返回一次性完成的完整响应。 | ||
| 70 | + | ||
| 71 | + Args: | ||
| 72 | + system_prompt: 系统角色提示 | ||
| 73 | + user_prompt: 用户高优先级指令 | ||
| 74 | + **kwargs: 允许透传temperature/top_p等采样参数 | ||
| 75 | + | ||
| 76 | + Returns: | ||
| 77 | + 去除首尾空白后的LLM响应文本 | ||
| 78 | + """ | ||
| 58 | messages = [ | 79 | messages = [ |
| 59 | {"role": "system", "content": system_prompt}, | 80 | {"role": "system", "content": system_prompt}, |
| 60 | {"role": "user", "content": user_prompt}, | 81 | {"role": "user", "content": user_prompt}, |
| @@ -142,11 +163,13 @@ class LLMClient: | @@ -142,11 +163,13 @@ class LLMClient: | ||
| 142 | 163 | ||
| 143 | @staticmethod | 164 | @staticmethod |
| 144 | def validate_response(response: Optional[str]) -> str: | 165 | def validate_response(response: Optional[str]) -> str: |
| 166 | + """兜底处理None/空白字符串,防止上层逻辑崩溃""" | ||
| 145 | if response is None: | 167 | if response is None: |
| 146 | return "" | 168 | return "" |
| 147 | return response.strip() | 169 | return response.strip() |
| 148 | 170 | ||
| 149 | def get_model_info(self) -> Dict[str, Any]: | 171 | def get_model_info(self) -> Dict[str, Any]: |
| 172 | + """以字典形式返回当前客户端的模型/提供方/基础URL信息""" | ||
| 150 | return { | 173 | return { |
| 151 | "provider": self.provider, | 174 | "provider": self.provider, |
| 152 | "model": self.model_name, | 175 | "model": self.model_name, |
| @@ -34,6 +34,14 @@ class ChapterGenerationNode(BaseNode): | @@ -34,6 +34,14 @@ class ChapterGenerationNode(BaseNode): | ||
| 34 | _COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=') | 34 | _COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=') |
| 35 | 35 | ||
| 36 | def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage): | 36 | def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage): |
| 37 | + """ | ||
| 38 | + 记录LLM客户端/校验器/章节存储器,便于run方法调度。 | ||
| 39 | + | ||
| 40 | + Args: | ||
| 41 | + llm_client: 实际调用大模型的客户端 | ||
| 42 | + validator: IR结构校验器 | ||
| 43 | + storage: 负责章节流式落盘的存储器 | ||
| 44 | + """ | ||
| 37 | super().__init__(llm_client, "ChapterGenerationNode") | 45 | super().__init__(llm_client, "ChapterGenerationNode") |
| 38 | self.validator = validator | 46 | self.validator = validator |
| 39 | self.storage = storage | 47 | self.storage = storage |
| @@ -385,6 +393,7 @@ class ChapterGenerationNode(BaseNode): | @@ -385,6 +393,7 @@ class ChapterGenerationNode(BaseNode): | ||
| 385 | """修正常见的结构性错误(例如list.items嵌套过深)""" | 393 | """修正常见的结构性错误(例如list.items嵌套过深)""" |
| 386 | 394 | ||
| 387 | def walk(blocks: List[Dict[str, Any]] | None): | 395 | def walk(blocks: List[Dict[str, Any]] | None): |
| 396 | + """递归检查并修复嵌套结构,保证每个block合法""" | ||
| 388 | if not isinstance(blocks, list): | 397 | if not isinstance(blocks, list): |
| 389 | return | 398 | return |
| 390 | for block in blocks: | 399 | for block in blocks: |
| @@ -485,6 +494,7 @@ class ChapterGenerationNode(BaseNode): | @@ -485,6 +494,7 @@ class ChapterGenerationNode(BaseNode): | ||
| 485 | 494 | ||
| 486 | @staticmethod | 495 | @staticmethod |
| 487 | def _as_paragraph_block(text: str) -> Dict[str, Any]: | 496 | def _as_paragraph_block(text: str) -> Dict[str, Any]: |
| 497 | + """将字符串快速包装成paragraph block,方便统一处理""" | ||
| 488 | return { | 498 | return { |
| 489 | "type": "paragraph", | 499 | "type": "paragraph", |
| 490 | "inlines": [{"text": text or ""}], | 500 | "inlines": [{"text": text or ""}], |
| @@ -21,6 +21,7 @@ class DocumentLayoutNode(BaseNode): | @@ -21,6 +21,7 @@ class DocumentLayoutNode(BaseNode): | ||
| 21 | """负责生成全局标题、目录与Hero设计""" | 21 | """负责生成全局标题、目录与Hero设计""" |
| 22 | 22 | ||
| 23 | def __init__(self, llm_client): | 23 | def __init__(self, llm_client): |
| 24 | + """记录LLM客户端并设置节点名字,供BaseNode日志使用""" | ||
| 24 | super().__init__(llm_client, "DocumentLayoutNode") | 25 | super().__init__(llm_client, "DocumentLayoutNode") |
| 25 | 26 | ||
| 26 | def run( | 27 | def run( |
| @@ -21,6 +21,7 @@ class WordBudgetNode(BaseNode): | @@ -21,6 +21,7 @@ class WordBudgetNode(BaseNode): | ||
| 21 | """规划各章节字数与重点""" | 21 | """规划各章节字数与重点""" |
| 22 | 22 | ||
| 23 | def __init__(self, llm_client): | 23 | def __init__(self, llm_client): |
| 24 | + """仅记录LLM客户端引用,方便run阶段发起请求""" | ||
| 24 | super().__init__(llm_client, "WordBudgetNode") | 25 | super().__init__(llm_client, "WordBudgetNode") |
| 25 | 26 | ||
| 26 | def run( | 27 | def run( |
| @@ -13,6 +13,7 @@ class HTMLRenderer: | @@ -13,6 +13,7 @@ class HTMLRenderer: | ||
| 13 | """Document IR → HTML 渲染器""" | 13 | """Document IR → HTML 渲染器""" |
| 14 | 14 | ||
| 15 | def __init__(self, config: Dict[str, Any] | None = None): | 15 | def __init__(self, config: Dict[str, Any] | None = None): |
| 16 | + """初始化渲染器缓存并允许注入额外配置(如主题覆盖)""" | ||
| 16 | self.config = config or {} | 17 | self.config = config or {} |
| 17 | self.document: Dict[str, Any] = {} | 18 | self.document: Dict[str, Any] = {} |
| 18 | self.widget_scripts: List[str] = [] | 19 | self.widget_scripts: List[str] = [] |
| 1 | """ | 1 | """ |
| 2 | -Configuration management module for the Report Engine. | 2 | +Report Engine 配置模块,统一读取环境变量并提供类型安全的访问方式。 |
| 3 | """ | 3 | """ |
| 4 | 4 | ||
| 5 | import os | 5 | import os |
| @@ -34,6 +34,7 @@ class Settings(BaseSettings): | @@ -34,6 +34,7 @@ class Settings(BaseSettings): | ||
| 34 | CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/") | 34 | CHART_STYLE: str = Field("modern", description="图表样式:modern/classic/") |
| 35 | 35 | ||
| 36 | class Config: | 36 | class Config: |
| 37 | + """Pydantic配置:允许从.env读取并兼容大小写""" | ||
| 37 | env_file = ".env" | 38 | env_file = ".env" |
| 38 | env_prefix = "" | 39 | env_prefix = "" |
| 39 | case_sensitive = False | 40 | case_sensitive = False |
| @@ -43,6 +44,7 @@ settings = Settings() | @@ -43,6 +44,7 @@ settings = Settings() | ||
| 43 | 44 | ||
| 44 | 45 | ||
| 45 | def print_config(config: Settings): | 46 | def print_config(config: Settings): |
| 47 | + """将当前配置项按人类可读格式输出到日志,方便排障""" | ||
| 46 | message = "" | 48 | message = "" |
| 47 | message += "\n=== Report Engine 配置 ===\n" | 49 | message += "\n=== Report Engine 配置 ===\n" |
| 48 | message += f"LLM 模型: {config.REPORT_ENGINE_MODEL_NAME}\n" | 50 | message += f"LLM 模型: {config.REPORT_ENGINE_MODEL_NAME}\n" |
-
Please register or login to post a comment