马一丁

Optimize the Handling of Low Word Counts

@@ -10,6 +10,7 @@ Report Agent主类。 @@ -10,6 +10,7 @@ Report Agent主类。
10 10
11 import json 11 import json
12 import os 12 import os
  13 +from copy import deepcopy
13 from pathlib import Path 14 from pathlib import Path
14 from uuid import uuid4 15 from uuid import uuid4
15 from datetime import datetime 16 from datetime import datetime
@@ -174,6 +175,8 @@ class ReportAgent: @@ -174,6 +175,8 @@ class ReportAgent:
174 - 章节存储、IR装订、渲染器等产出链路; 175 - 章节存储、IR装订、渲染器等产出链路;
175 - 状态管理、日志、输入输出校验与持久化。 176 - 状态管理、日志、输入输出校验与持久化。
176 """ 177 """
  178 + _CONTENT_SPARSE_MIN_ATTEMPTS = 3
  179 + _CONTENT_SPARSE_WARNING_TEXT = "本章LLM生成的内容字数可能过低,必要时可以尝试重新运行程序。"
177 180
178 def __init__(self, config: Optional[Settings] = None): 181 def __init__(self, config: Optional[Settings] = None):
179 """ 182 """
@@ -466,7 +469,9 @@ class ReportAgent: @@ -466,7 +469,9 @@ class ReportAgent:
466 emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)}) 469 emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)})
467 470
468 chapters = [] 471 chapters = []
469 - chapter_max_attempts = max(1, self.config.CHAPTER_JSON_MAX_ATTEMPTS) 472 + chapter_max_attempts = max(
  473 + self._CONTENT_SPARSE_MIN_ATTEMPTS, self.config.CHAPTER_JSON_MAX_ATTEMPTS
  474 + )
470 for section in sections: 475 for section in sections:
471 logger.info(f"生成章节: {section.title}") 476 logger.info(f"生成章节: {section.title}")
472 emit('chapter_status', { 477 emit('chapter_status', {
@@ -492,6 +497,9 @@ class ReportAgent: @@ -492,6 +497,9 @@ class ReportAgent:
492 497
493 chapter_payload: Dict[str, Any] | None = None 498 chapter_payload: Dict[str, Any] | None = None
494 attempt = 1 499 attempt = 1
  500 + best_sparse_candidate: Dict[str, Any] | None = None
  501 + best_sparse_score = -1
  502 + fallback_used = False
495 while attempt <= chapter_max_attempts: 503 while attempt <= chapter_max_attempts:
496 try: 504 try:
497 chapter_payload = self.chapter_generation_node.run( 505 chapter_payload = self.chapter_generation_node.run(
@@ -506,6 +514,19 @@ class ReportAgent: @@ -506,6 +514,19 @@ class ReportAgent:
506 "content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse" 514 "content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse"
507 ) 515 )
508 readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败" 516 readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败"
  517 + if isinstance(structured_error, ChapterContentError):
  518 + candidate = getattr(structured_error, "chapter_payload", None)
  519 + candidate_score = getattr(structured_error, "body_characters", 0) or 0
  520 + if isinstance(candidate, dict) and candidate_score >= 0:
  521 + if candidate_score > best_sparse_score:
  522 + best_sparse_candidate = deepcopy(candidate)
  523 + best_sparse_score = candidate_score
  524 + will_fallback = (
  525 + isinstance(structured_error, ChapterContentError)
  526 + and attempt >= chapter_max_attempts
  527 + and attempt >= self._CONTENT_SPARSE_MIN_ATTEMPTS
  528 + and best_sparse_candidate is not None
  529 + )
509 logger.warning( 530 logger.warning(
510 "章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}", 531 "章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}",
511 title=section.title, 532 title=section.title,
@@ -514,14 +535,27 @@ class ReportAgent: @@ -514,14 +535,27 @@ class ReportAgent:
514 total=chapter_max_attempts, 535 total=chapter_max_attempts,
515 error=structured_error, 536 error=structured_error,
516 ) 537 )
517 - emit('chapter_status', { 538 + status_value = 'retrying' if attempt < chapter_max_attempts or will_fallback else 'error'
  539 + status_payload = {
518 'chapterId': section.chapter_id, 540 'chapterId': section.chapter_id,
519 'title': section.title, 541 'title': section.title,
520 - 'status': 'retrying' if attempt < chapter_max_attempts else 'error', 542 + 'status': status_value,
521 'attempt': attempt, 543 'attempt': attempt,
522 'error': str(structured_error), 544 'error': str(structured_error),
523 'reason': error_kind, 545 'reason': error_kind,
524 - }) 546 + }
  547 + if will_fallback:
  548 + status_payload['warning'] = 'content_sparse_fallback_pending'
  549 + emit('chapter_status', status_payload)
  550 + if will_fallback:
  551 + logger.warning(
  552 + "章节 {title} 达到最大尝试次数,保留字数最多(约 {score} 字)的版本作为兜底输出",
  553 + title=section.title,
  554 + score=best_sparse_score,
  555 + )
  556 + chapter_payload = self._finalize_sparse_chapter(best_sparse_candidate)
  557 + fallback_used = True
  558 + break
525 if attempt >= chapter_max_attempts: 559 if attempt >= chapter_max_attempts:
526 raise 560 raise
527 attempt += 1 561 attempt += 1
@@ -553,12 +587,16 @@ class ReportAgent: @@ -553,12 +587,16 @@ class ReportAgent:
553 f"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析" 587 f"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析"
554 ) 588 )
555 chapters.append(chapter_payload) 589 chapters.append(chapter_payload)
556 - emit('chapter_status', { 590 + completion_status = {
557 'chapterId': section.chapter_id, 591 'chapterId': section.chapter_id,
558 'title': section.title, 592 'title': section.title,
559 'status': 'completed', 593 'status': 'completed',
560 'attempt': attempt, 594 'attempt': attempt,
561 - }) 595 + }
  596 + if fallback_used:
  597 + completion_status['warning'] = 'content_sparse_fallback'
  598 + completion_status['warningMessage'] = self._CONTENT_SPARSE_WARNING_TEXT
  599 + emit('chapter_status', completion_status)
562 600
563 document_ir = self.document_composer.build_document( 601 document_ir = self.document_composer.build_document(
564 report_id, 602 report_id,
@@ -779,6 +817,48 @@ class ReportAgent: @@ -779,6 +817,48 @@ class ReportAgent:
779 ] 817 ]
780 return any(keyword in normalized for keyword in keywords) 818 return any(keyword in normalized for keyword in keywords)
781 819
  820 + def _finalize_sparse_chapter(self, chapter: Optional[Dict[str, Any]]) -> Dict[str, Any]:
  821 + """
  822 + 构造内容稀疏兜底章节:复制原始payload并插入温馨提示段落。
  823 + """
  824 + safe_chapter = deepcopy(chapter or {})
  825 + if not isinstance(safe_chapter, dict):
  826 + safe_chapter = {}
  827 + self._ensure_sparse_warning_block(safe_chapter)
  828 + return safe_chapter
  829 +
  830 + def _ensure_sparse_warning_block(self, chapter: Dict[str, Any]) -> None:
  831 + """
  832 + 将提示段落插在章节标题后,提醒读者该章字数偏少。
  833 + """
  834 + warning_block = {
  835 + "type": "paragraph",
  836 + "inlines": [
  837 + {
  838 + "text": self._CONTENT_SPARSE_WARNING_TEXT,
  839 + "marks": [{"type": "italic"}],
  840 + }
  841 + ],
  842 + "meta": {"role": "content-sparse-warning"},
  843 + }
  844 + blocks = chapter.get("blocks")
  845 + if isinstance(blocks, list) and blocks:
  846 + inserted = False
  847 + for idx, block in enumerate(blocks):
  848 + if isinstance(block, dict) and block.get("type") == "heading":
  849 + blocks.insert(idx + 1, warning_block)
  850 + inserted = True
  851 + break
  852 + if not inserted:
  853 + blocks.insert(0, warning_block)
  854 + else:
  855 + chapter["blocks"] = [warning_block]
  856 + meta = chapter.get("meta")
  857 + if isinstance(meta, dict):
  858 + meta["contentSparseWarning"] = True
  859 + else:
  860 + chapter["meta"] = {"contentSparseWarning": True}
  861 +
782 def _stringify(self, value: Any) -> str: 862 def _stringify(self, value: Any) -> str:
783 """ 863 """
784 安全地将对象转成字符串。 864 安全地将对象转成字符串。
@@ -55,6 +55,20 @@ class ChapterContentError(ValueError): @@ -55,6 +55,20 @@ class ChapterContentError(ValueError):
55 当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。 55 当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。
56 """ 56 """
57 57
  58 + def __init__(
  59 + self,
  60 + message: str,
  61 + chapter: Optional[Dict[str, Any]] = None,
  62 + body_characters: int = 0,
  63 + narrative_characters: int = 0,
  64 + non_heading_blocks: int = 0,
  65 + ):
  66 + super().__init__(message)
  67 + self.chapter_payload: Optional[Dict[str, Any]] = chapter
  68 + self.body_characters: int = int(body_characters or 0)
  69 + self.narrative_characters: int = int(narrative_characters or 0)
  70 + self.non_heading_blocks: int = int(non_heading_blocks or 0)
  71 +
58 72
59 class ChapterGenerationNode(BaseNode): 73 class ChapterGenerationNode(BaseNode):
60 """ 74 """
@@ -897,7 +911,13 @@ class ChapterGenerationNode(BaseNode): @@ -897,7 +911,13 @@ class ChapterGenerationNode(BaseNode):
897 """ 911 """
898 blocks = chapter.get("blocks") 912 blocks = chapter.get("blocks")
899 if not isinstance(blocks, list) or not blocks: 913 if not isinstance(blocks, list) or not blocks:
900 - raise ChapterContentError("章节缺少正文区块,无法输出内容") 914 + raise ChapterContentError(
  915 + "章节缺少正文区块,无法输出内容",
  916 + chapter=chapter,
  917 + body_characters=0,
  918 + narrative_characters=0,
  919 + non_heading_blocks=0,
  920 + )
901 921
902 non_heading_blocks = [ 922 non_heading_blocks = [
903 block 923 block
@@ -905,16 +925,21 @@ class ChapterGenerationNode(BaseNode): @@ -905,16 +925,21 @@ class ChapterGenerationNode(BaseNode):
905 if isinstance(block, dict) 925 if isinstance(block, dict)
906 and block.get("type") not in {"heading", "divider", "toc"} 926 and block.get("type") not in {"heading", "divider", "toc"}
907 ] 927 ]
  928 + valid_block_count = len(non_heading_blocks)
908 body_characters = self._count_body_characters(blocks) 929 body_characters = self._count_body_characters(blocks)
909 narrative_characters = self._count_narrative_characters(blocks) 930 narrative_characters = self._count_narrative_characters(blocks)
910 931
911 if ( 932 if (
912 - len(non_heading_blocks) < self._MIN_NON_HEADING_BLOCKS 933 + valid_block_count < self._MIN_NON_HEADING_BLOCKS
913 or body_characters < self._MIN_BODY_CHARACTERS 934 or body_characters < self._MIN_BODY_CHARACTERS
914 or narrative_characters < self._MIN_NARRATIVE_CHARACTERS 935 or narrative_characters < self._MIN_NARRATIVE_CHARACTERS
915 ): 936 ):
916 raise ChapterContentError( 937 raise ChapterContentError(
917 - f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {len(non_heading_blocks)} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}" 938 + f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {valid_block_count} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}",
  939 + chapter=chapter,
  940 + body_characters=body_characters,
  941 + narrative_characters=narrative_characters,
  942 + non_heading_blocks=valid_block_count,
918 ) 943 )
919 944
920 def _count_body_characters(self, blocks: Any) -> int: 945 def _count_body_characters(self, blocks: Any) -> int: