Showing
2 changed files
with
114 additions
and
9 deletions
| @@ -10,6 +10,7 @@ Report Agent主类。 | @@ -10,6 +10,7 @@ Report Agent主类。 | ||
| 10 | 10 | ||
| 11 | import json | 11 | import json |
| 12 | import os | 12 | import os |
| 13 | +from copy import deepcopy | ||
| 13 | from pathlib import Path | 14 | from pathlib import Path |
| 14 | from uuid import uuid4 | 15 | from uuid import uuid4 |
| 15 | from datetime import datetime | 16 | from datetime import datetime |
| @@ -174,6 +175,8 @@ class ReportAgent: | @@ -174,6 +175,8 @@ class ReportAgent: | ||
| 174 | - 章节存储、IR装订、渲染器等产出链路; | 175 | - 章节存储、IR装订、渲染器等产出链路; |
| 175 | - 状态管理、日志、输入输出校验与持久化。 | 176 | - 状态管理、日志、输入输出校验与持久化。 |
| 176 | """ | 177 | """ |
| 178 | + _CONTENT_SPARSE_MIN_ATTEMPTS = 3 | ||
| 179 | + _CONTENT_SPARSE_WARNING_TEXT = "本章LLM生成的内容字数可能过低,必要时可以尝试重新运行程序。" | ||
| 177 | 180 | ||
| 178 | def __init__(self, config: Optional[Settings] = None): | 181 | def __init__(self, config: Optional[Settings] = None): |
| 179 | """ | 182 | """ |
| @@ -466,7 +469,9 @@ class ReportAgent: | @@ -466,7 +469,9 @@ class ReportAgent: | ||
| 466 | emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)}) | 469 | emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)}) |
| 467 | 470 | ||
| 468 | chapters = [] | 471 | chapters = [] |
| 469 | - chapter_max_attempts = max(1, self.config.CHAPTER_JSON_MAX_ATTEMPTS) | 472 | + chapter_max_attempts = max( |
| 473 | + self._CONTENT_SPARSE_MIN_ATTEMPTS, self.config.CHAPTER_JSON_MAX_ATTEMPTS | ||
| 474 | + ) | ||
| 470 | for section in sections: | 475 | for section in sections: |
| 471 | logger.info(f"生成章节: {section.title}") | 476 | logger.info(f"生成章节: {section.title}") |
| 472 | emit('chapter_status', { | 477 | emit('chapter_status', { |
| @@ -492,6 +497,9 @@ class ReportAgent: | @@ -492,6 +497,9 @@ class ReportAgent: | ||
| 492 | 497 | ||
| 493 | chapter_payload: Dict[str, Any] | None = None | 498 | chapter_payload: Dict[str, Any] | None = None |
| 494 | attempt = 1 | 499 | attempt = 1 |
| 500 | + best_sparse_candidate: Dict[str, Any] | None = None | ||
| 501 | + best_sparse_score = -1 | ||
| 502 | + fallback_used = False | ||
| 495 | while attempt <= chapter_max_attempts: | 503 | while attempt <= chapter_max_attempts: |
| 496 | try: | 504 | try: |
| 497 | chapter_payload = self.chapter_generation_node.run( | 505 | chapter_payload = self.chapter_generation_node.run( |
| @@ -506,6 +514,19 @@ class ReportAgent: | @@ -506,6 +514,19 @@ class ReportAgent: | ||
| 506 | "content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse" | 514 | "content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse" |
| 507 | ) | 515 | ) |
| 508 | readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败" | 516 | readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败" |
| 517 | + if isinstance(structured_error, ChapterContentError): | ||
| 518 | + candidate = getattr(structured_error, "chapter_payload", None) | ||
| 519 | + candidate_score = getattr(structured_error, "body_characters", 0) or 0 | ||
| 520 | + if isinstance(candidate, dict) and candidate_score >= 0: | ||
| 521 | + if candidate_score > best_sparse_score: | ||
| 522 | + best_sparse_candidate = deepcopy(candidate) | ||
| 523 | + best_sparse_score = candidate_score | ||
| 524 | + will_fallback = ( | ||
| 525 | + isinstance(structured_error, ChapterContentError) | ||
| 526 | + and attempt >= chapter_max_attempts | ||
| 527 | + and attempt >= self._CONTENT_SPARSE_MIN_ATTEMPTS | ||
| 528 | + and best_sparse_candidate is not None | ||
| 529 | + ) | ||
| 509 | logger.warning( | 530 | logger.warning( |
| 510 | "章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}", | 531 | "章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}", |
| 511 | title=section.title, | 532 | title=section.title, |
| @@ -514,14 +535,27 @@ class ReportAgent: | @@ -514,14 +535,27 @@ class ReportAgent: | ||
| 514 | total=chapter_max_attempts, | 535 | total=chapter_max_attempts, |
| 515 | error=structured_error, | 536 | error=structured_error, |
| 516 | ) | 537 | ) |
| 517 | - emit('chapter_status', { | 538 | + status_value = 'retrying' if attempt < chapter_max_attempts or will_fallback else 'error' |
| 539 | + status_payload = { | ||
| 518 | 'chapterId': section.chapter_id, | 540 | 'chapterId': section.chapter_id, |
| 519 | 'title': section.title, | 541 | 'title': section.title, |
| 520 | - 'status': 'retrying' if attempt < chapter_max_attempts else 'error', | 542 | + 'status': status_value, |
| 521 | 'attempt': attempt, | 543 | 'attempt': attempt, |
| 522 | 'error': str(structured_error), | 544 | 'error': str(structured_error), |
| 523 | 'reason': error_kind, | 545 | 'reason': error_kind, |
| 524 | - }) | 546 | + } |
| 547 | + if will_fallback: | ||
| 548 | + status_payload['warning'] = 'content_sparse_fallback_pending' | ||
| 549 | + emit('chapter_status', status_payload) | ||
| 550 | + if will_fallback: | ||
| 551 | + logger.warning( | ||
| 552 | + "章节 {title} 达到最大尝试次数,保留字数最多(约 {score} 字)的版本作为兜底输出", | ||
| 553 | + title=section.title, | ||
| 554 | + score=best_sparse_score, | ||
| 555 | + ) | ||
| 556 | + chapter_payload = self._finalize_sparse_chapter(best_sparse_candidate) | ||
| 557 | + fallback_used = True | ||
| 558 | + break | ||
| 525 | if attempt >= chapter_max_attempts: | 559 | if attempt >= chapter_max_attempts: |
| 526 | raise | 560 | raise |
| 527 | attempt += 1 | 561 | attempt += 1 |
| @@ -553,12 +587,16 @@ class ReportAgent: | @@ -553,12 +587,16 @@ class ReportAgent: | ||
| 553 | f"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析" | 587 | f"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析" |
| 554 | ) | 588 | ) |
| 555 | chapters.append(chapter_payload) | 589 | chapters.append(chapter_payload) |
| 556 | - emit('chapter_status', { | 590 | + completion_status = { |
| 557 | 'chapterId': section.chapter_id, | 591 | 'chapterId': section.chapter_id, |
| 558 | 'title': section.title, | 592 | 'title': section.title, |
| 559 | 'status': 'completed', | 593 | 'status': 'completed', |
| 560 | 'attempt': attempt, | 594 | 'attempt': attempt, |
| 561 | - }) | 595 | + } |
| 596 | + if fallback_used: | ||
| 597 | + completion_status['warning'] = 'content_sparse_fallback' | ||
| 598 | + completion_status['warningMessage'] = self._CONTENT_SPARSE_WARNING_TEXT | ||
| 599 | + emit('chapter_status', completion_status) | ||
| 562 | 600 | ||
| 563 | document_ir = self.document_composer.build_document( | 601 | document_ir = self.document_composer.build_document( |
| 564 | report_id, | 602 | report_id, |
| @@ -779,6 +817,48 @@ class ReportAgent: | @@ -779,6 +817,48 @@ class ReportAgent: | ||
| 779 | ] | 817 | ] |
| 780 | return any(keyword in normalized for keyword in keywords) | 818 | return any(keyword in normalized for keyword in keywords) |
| 781 | 819 | ||
| 820 | + def _finalize_sparse_chapter(self, chapter: Optional[Dict[str, Any]]) -> Dict[str, Any]: | ||
| 821 | + """ | ||
| 822 | + 构造内容稀疏兜底章节:复制原始payload并插入温馨提示段落。 | ||
| 823 | + """ | ||
| 824 | + safe_chapter = deepcopy(chapter or {}) | ||
| 825 | + if not isinstance(safe_chapter, dict): | ||
| 826 | + safe_chapter = {} | ||
| 827 | + self._ensure_sparse_warning_block(safe_chapter) | ||
| 828 | + return safe_chapter | ||
| 829 | + | ||
| 830 | + def _ensure_sparse_warning_block(self, chapter: Dict[str, Any]) -> None: | ||
| 831 | + """ | ||
| 832 | + 将提示段落插在章节标题后,提醒读者该章字数偏少。 | ||
| 833 | + """ | ||
| 834 | + warning_block = { | ||
| 835 | + "type": "paragraph", | ||
| 836 | + "inlines": [ | ||
| 837 | + { | ||
| 838 | + "text": self._CONTENT_SPARSE_WARNING_TEXT, | ||
| 839 | + "marks": [{"type": "italic"}], | ||
| 840 | + } | ||
| 841 | + ], | ||
| 842 | + "meta": {"role": "content-sparse-warning"}, | ||
| 843 | + } | ||
| 844 | + blocks = chapter.get("blocks") | ||
| 845 | + if isinstance(blocks, list) and blocks: | ||
| 846 | + inserted = False | ||
| 847 | + for idx, block in enumerate(blocks): | ||
| 848 | + if isinstance(block, dict) and block.get("type") == "heading": | ||
| 849 | + blocks.insert(idx + 1, warning_block) | ||
| 850 | + inserted = True | ||
| 851 | + break | ||
| 852 | + if not inserted: | ||
| 853 | + blocks.insert(0, warning_block) | ||
| 854 | + else: | ||
| 855 | + chapter["blocks"] = [warning_block] | ||
| 856 | + meta = chapter.get("meta") | ||
| 857 | + if isinstance(meta, dict): | ||
| 858 | + meta["contentSparseWarning"] = True | ||
| 859 | + else: | ||
| 860 | + chapter["meta"] = {"contentSparseWarning": True} | ||
| 861 | + | ||
| 782 | def _stringify(self, value: Any) -> str: | 862 | def _stringify(self, value: Any) -> str: |
| 783 | """ | 863 | """ |
| 784 | 安全地将对象转成字符串。 | 864 | 安全地将对象转成字符串。 |
| @@ -55,6 +55,20 @@ class ChapterContentError(ValueError): | @@ -55,6 +55,20 @@ class ChapterContentError(ValueError): | ||
| 55 | 当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。 | 55 | 当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。 |
| 56 | """ | 56 | """ |
| 57 | 57 | ||
| 58 | + def __init__( | ||
| 59 | + self, | ||
| 60 | + message: str, | ||
| 61 | + chapter: Optional[Dict[str, Any]] = None, | ||
| 62 | + body_characters: int = 0, | ||
| 63 | + narrative_characters: int = 0, | ||
| 64 | + non_heading_blocks: int = 0, | ||
| 65 | + ): | ||
| 66 | + super().__init__(message) | ||
| 67 | + self.chapter_payload: Optional[Dict[str, Any]] = chapter | ||
| 68 | + self.body_characters: int = int(body_characters or 0) | ||
| 69 | + self.narrative_characters: int = int(narrative_characters or 0) | ||
| 70 | + self.non_heading_blocks: int = int(non_heading_blocks or 0) | ||
| 71 | + | ||
| 58 | 72 | ||
| 59 | class ChapterGenerationNode(BaseNode): | 73 | class ChapterGenerationNode(BaseNode): |
| 60 | """ | 74 | """ |
| @@ -897,7 +911,13 @@ class ChapterGenerationNode(BaseNode): | @@ -897,7 +911,13 @@ class ChapterGenerationNode(BaseNode): | ||
| 897 | """ | 911 | """ |
| 898 | blocks = chapter.get("blocks") | 912 | blocks = chapter.get("blocks") |
| 899 | if not isinstance(blocks, list) or not blocks: | 913 | if not isinstance(blocks, list) or not blocks: |
| 900 | - raise ChapterContentError("章节缺少正文区块,无法输出内容") | 914 | + raise ChapterContentError( |
| 915 | + "章节缺少正文区块,无法输出内容", | ||
| 916 | + chapter=chapter, | ||
| 917 | + body_characters=0, | ||
| 918 | + narrative_characters=0, | ||
| 919 | + non_heading_blocks=0, | ||
| 920 | + ) | ||
| 901 | 921 | ||
| 902 | non_heading_blocks = [ | 922 | non_heading_blocks = [ |
| 903 | block | 923 | block |
| @@ -905,16 +925,21 @@ class ChapterGenerationNode(BaseNode): | @@ -905,16 +925,21 @@ class ChapterGenerationNode(BaseNode): | ||
| 905 | if isinstance(block, dict) | 925 | if isinstance(block, dict) |
| 906 | and block.get("type") not in {"heading", "divider", "toc"} | 926 | and block.get("type") not in {"heading", "divider", "toc"} |
| 907 | ] | 927 | ] |
| 928 | + valid_block_count = len(non_heading_blocks) | ||
| 908 | body_characters = self._count_body_characters(blocks) | 929 | body_characters = self._count_body_characters(blocks) |
| 909 | narrative_characters = self._count_narrative_characters(blocks) | 930 | narrative_characters = self._count_narrative_characters(blocks) |
| 910 | 931 | ||
| 911 | if ( | 932 | if ( |
| 912 | - len(non_heading_blocks) < self._MIN_NON_HEADING_BLOCKS | 933 | + valid_block_count < self._MIN_NON_HEADING_BLOCKS |
| 913 | or body_characters < self._MIN_BODY_CHARACTERS | 934 | or body_characters < self._MIN_BODY_CHARACTERS |
| 914 | or narrative_characters < self._MIN_NARRATIVE_CHARACTERS | 935 | or narrative_characters < self._MIN_NARRATIVE_CHARACTERS |
| 915 | ): | 936 | ): |
| 916 | raise ChapterContentError( | 937 | raise ChapterContentError( |
| 917 | - f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {len(non_heading_blocks)} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}" | 938 | + f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {valid_block_count} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}", |
| 939 | + chapter=chapter, | ||
| 940 | + body_characters=body_characters, | ||
| 941 | + narrative_characters=narrative_characters, | ||
| 942 | + non_heading_blocks=valid_block_count, | ||
| 918 | ) | 943 | ) |
| 919 | 944 | ||
| 920 | def _count_body_characters(self, blocks: Any) -> int: | 945 | def _count_body_characters(self, blocks: Any) -> int: |
-
Please register or login to post a comment