Showing
1 changed file
with
476 additions
and
0 deletions
report_engine_only.py
0 → 100644
| 1 | +#!/usr/bin/env python | ||
| 2 | +""" | ||
| 3 | +Report Engine 命令行版本 | ||
| 4 | + | ||
| 5 | +这是一个不需要前端的命令行报告生成程序。 | ||
| 6 | +主要流程: | ||
| 7 | +1. 检查PDF依赖 | ||
| 8 | +2. 获取最新的log、md文件 | ||
| 9 | +3. 直接调用Report Engine生成报告(跳过文件增加审核) | ||
| 10 | +4. 自动保存HTML和PDF(如果有依赖)到final_reports/ | ||
| 11 | + | ||
| 12 | +使用方法: | ||
| 13 | + python report_engine_only.py [选项] | ||
| 14 | + | ||
| 15 | +选项: | ||
| 16 | + --query QUERY 指定报告主题(可选,默认从文件名提取) | ||
| 17 | + --skip-pdf 跳过PDF生成(即使有依赖) | ||
| 18 | + --verbose 显示详细日志 | ||
| 19 | + --help 显示帮助信息 | ||
| 20 | +""" | ||
| 21 | + | ||
| 22 | +import os | ||
| 23 | +import sys | ||
| 24 | +import json | ||
| 25 | +import argparse | ||
| 26 | +from pathlib import Path | ||
| 27 | +from datetime import datetime | ||
| 28 | +from typing import Dict, Any, Optional | ||
| 29 | + | ||
| 30 | +from loguru import logger | ||
| 31 | + | ||
| 32 | +# 全局配置 | ||
| 33 | +VERBOSE = False | ||
| 34 | + | ||
| 35 | +# 配置日志 | ||
| 36 | +def setup_logger(verbose: bool = False): | ||
| 37 | + """设置日志配置""" | ||
| 38 | + global VERBOSE | ||
| 39 | + VERBOSE = verbose | ||
| 40 | + | ||
| 41 | + logger.remove() # 移除默认处理器 | ||
| 42 | + logger.add( | ||
| 43 | + sys.stdout, | ||
| 44 | + format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <level>{message}</level>", | ||
| 45 | + level="DEBUG" if verbose else "INFO" | ||
| 46 | + ) | ||
| 47 | + | ||
| 48 | + | ||
| 49 | +def check_dependencies() -> tuple[bool, Optional[str]]: | ||
| 50 | + """ | ||
| 51 | + 检查PDF生成所需的系统依赖 | ||
| 52 | + | ||
| 53 | + Returns: | ||
| 54 | + tuple: (is_available: bool, message: str) | ||
| 55 | + - is_available: PDF功能是否可用 | ||
| 56 | + - message: 依赖检查结果消息 | ||
| 57 | + """ | ||
| 58 | + logger.info("=" * 70) | ||
| 59 | + logger.info("步骤 1/4: 检查系统依赖") | ||
| 60 | + logger.info("=" * 70) | ||
| 61 | + | ||
| 62 | + try: | ||
| 63 | + from ReportEngine.utils.dependency_check import check_pango_available | ||
| 64 | + is_available, message = check_pango_available() | ||
| 65 | + | ||
| 66 | + if is_available: | ||
| 67 | + logger.success("✓ PDF 依赖检测通过,将同时生成 HTML 和 PDF 文件") | ||
| 68 | + else: | ||
| 69 | + logger.warning("⚠ PDF 依赖缺失,仅生成 HTML 文件") | ||
| 70 | + logger.info("\n" + message) | ||
| 71 | + | ||
| 72 | + return is_available, message | ||
| 73 | + except Exception as e: | ||
| 74 | + logger.error(f"依赖检查失败: {e}") | ||
| 75 | + return False, str(e) | ||
| 76 | + | ||
| 77 | + | ||
| 78 | +def get_latest_engine_reports() -> Dict[str, str]: | ||
| 79 | + """ | ||
| 80 | + 获取三个引擎目录中的最新报告文件 | ||
| 81 | + | ||
| 82 | + Returns: | ||
| 83 | + Dict[str, str]: 引擎名称到文件路径的映射 | ||
| 84 | + """ | ||
| 85 | + logger.info("\n" + "=" * 70) | ||
| 86 | + logger.info("步骤 2/4: 获取最新的分析引擎报告") | ||
| 87 | + logger.info("=" * 70) | ||
| 88 | + | ||
| 89 | + # 定义三个引擎的目录 | ||
| 90 | + directories = { | ||
| 91 | + 'insight': 'insight_engine_streamlit_reports', | ||
| 92 | + 'media': 'media_engine_streamlit_reports', | ||
| 93 | + 'query': 'query_engine_streamlit_reports' | ||
| 94 | + } | ||
| 95 | + | ||
| 96 | + latest_files = {} | ||
| 97 | + | ||
| 98 | + for engine, directory in directories.items(): | ||
| 99 | + if not os.path.exists(directory): | ||
| 100 | + logger.warning(f"⚠ {engine.capitalize()} Engine 目录不存在: {directory}") | ||
| 101 | + continue | ||
| 102 | + | ||
| 103 | + # 获取所有 .md 文件 | ||
| 104 | + md_files = [f for f in os.listdir(directory) if f.endswith('.md')] | ||
| 105 | + | ||
| 106 | + if not md_files: | ||
| 107 | + logger.warning(f"⚠ {engine.capitalize()} Engine 目录中没有找到 .md 文件") | ||
| 108 | + continue | ||
| 109 | + | ||
| 110 | + # 获取最新文件 | ||
| 111 | + latest_file = max( | ||
| 112 | + md_files, | ||
| 113 | + key=lambda x: os.path.getmtime(os.path.join(directory, x)) | ||
| 114 | + ) | ||
| 115 | + latest_path = os.path.join(directory, latest_file) | ||
| 116 | + latest_files[engine] = latest_path | ||
| 117 | + | ||
| 118 | + logger.info(f"✓ 找到 {engine.capitalize()} Engine 最新报告") | ||
| 119 | + | ||
| 120 | + if not latest_files: | ||
| 121 | + logger.error("❌ 未找到任何引擎报告文件,请先运行分析引擎生成报告") | ||
| 122 | + sys.exit(1) | ||
| 123 | + | ||
| 124 | + logger.info(f"\n共找到 {len(latest_files)} 个引擎的最新报告") | ||
| 125 | + | ||
| 126 | + return latest_files | ||
| 127 | + | ||
| 128 | + | ||
| 129 | +def confirm_file_selection(latest_files: Dict[str, str]) -> bool: | ||
| 130 | + """ | ||
| 131 | + 向用户确认选择的文件是否正确 | ||
| 132 | + | ||
| 133 | + Args: | ||
| 134 | + latest_files: 引擎名称到文件路径的映射 | ||
| 135 | + | ||
| 136 | + Returns: | ||
| 137 | + bool: 用户确认则返回True,否则返回False | ||
| 138 | + """ | ||
| 139 | + logger.info("\n" + "=" * 70) | ||
| 140 | + logger.info("请确认以下选择的文件:") | ||
| 141 | + logger.info("=" * 70) | ||
| 142 | + | ||
| 143 | + for engine, file_path in latest_files.items(): | ||
| 144 | + filename = os.path.basename(file_path) | ||
| 145 | + # 获取文件修改时间 | ||
| 146 | + mtime = os.path.getmtime(file_path) | ||
| 147 | + mtime_str = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M:%S') | ||
| 148 | + | ||
| 149 | + logger.info(f" {engine.capitalize()} Engine:") | ||
| 150 | + logger.info(f" 文件名: {filename}") | ||
| 151 | + logger.info(f" 路径: {file_path}") | ||
| 152 | + logger.info(f" 修改时间: {mtime_str}") | ||
| 153 | + logger.info("") | ||
| 154 | + | ||
| 155 | + logger.info("=" * 70) | ||
| 156 | + | ||
| 157 | + # 提示用户确认 | ||
| 158 | + try: | ||
| 159 | + response = input("是否使用以上文件生成报告? [Y/n]: ").strip().lower() | ||
| 160 | + | ||
| 161 | + # 默认是y,所以空输入或y都表示确认 | ||
| 162 | + if response == '' or response == 'y' or response == 'yes': | ||
| 163 | + logger.success("✓ 用户确认,继续生成报告") | ||
| 164 | + return True | ||
| 165 | + else: | ||
| 166 | + logger.warning("✗ 用户取消操作") | ||
| 167 | + return False | ||
| 168 | + except (KeyboardInterrupt, EOFError): | ||
| 169 | + logger.warning("\n✗ 用户取消操作") | ||
| 170 | + return False | ||
| 171 | + | ||
| 172 | + | ||
| 173 | +def load_engine_reports(latest_files: Dict[str, str]) -> list[str]: | ||
| 174 | + """ | ||
| 175 | + 加载引擎报告内容 | ||
| 176 | + | ||
| 177 | + Args: | ||
| 178 | + latest_files: 引擎名称到文件路径的映射 | ||
| 179 | + | ||
| 180 | + Returns: | ||
| 181 | + list[str]: 报告内容列表 | ||
| 182 | + """ | ||
| 183 | + reports = [] | ||
| 184 | + | ||
| 185 | + for engine, file_path in latest_files.items(): | ||
| 186 | + try: | ||
| 187 | + with open(file_path, 'r', encoding='utf-8') as f: | ||
| 188 | + content = f.read() | ||
| 189 | + reports.append(content) | ||
| 190 | + logger.debug(f"已加载 {engine} 报告,长度: {len(content)} 字符") | ||
| 191 | + except Exception as e: | ||
| 192 | + logger.error(f"加载 {engine} 报告失败: {e}") | ||
| 193 | + | ||
| 194 | + return reports | ||
| 195 | + | ||
| 196 | + | ||
| 197 | +def extract_query_from_reports(latest_files: Dict[str, str]) -> str: | ||
| 198 | + """ | ||
| 199 | + 从报告文件名中提取查询主题 | ||
| 200 | + | ||
| 201 | + Args: | ||
| 202 | + latest_files: 引擎名称到文件路径的映射 | ||
| 203 | + | ||
| 204 | + Returns: | ||
| 205 | + str: 提取的查询主题 | ||
| 206 | + """ | ||
| 207 | + # 尝试从文件名中提取主题 | ||
| 208 | + for engine, file_path in latest_files.items(): | ||
| 209 | + filename = os.path.basename(file_path) | ||
| 210 | + # 假设文件名格式为: report_主题_时间戳.md | ||
| 211 | + if '_' in filename: | ||
| 212 | + parts = filename.replace('.md', '').split('_') | ||
| 213 | + if len(parts) >= 2: | ||
| 214 | + # 提取中间部分作为主题 | ||
| 215 | + topic = '_'.join(parts[1:-1]) if len(parts) > 2 else parts[1] | ||
| 216 | + if topic: | ||
| 217 | + return topic | ||
| 218 | + | ||
| 219 | + # 如果无法提取,返回默认值 | ||
| 220 | + return "综合分析报告" | ||
| 221 | + | ||
| 222 | + | ||
| 223 | +def generate_report(reports: list[str], query: str, pdf_available: bool) -> Dict[str, Any]: | ||
| 224 | + """ | ||
| 225 | + 调用Report Engine生成报告 | ||
| 226 | + | ||
| 227 | + Args: | ||
| 228 | + reports: 报告内容列表 | ||
| 229 | + query: 报告主题 | ||
| 230 | + pdf_available: PDF功能是否可用 | ||
| 231 | + | ||
| 232 | + Returns: | ||
| 233 | + Dict[str, Any]: 包含生成结果的字典 | ||
| 234 | + """ | ||
| 235 | + logger.info("\n" + "=" * 70) | ||
| 236 | + logger.info("步骤 3/4: 生成综合报告") | ||
| 237 | + logger.info("=" * 70) | ||
| 238 | + logger.info(f"报告主题: {query}") | ||
| 239 | + logger.info(f"输入报告数量: {len(reports)}") | ||
| 240 | + | ||
| 241 | + try: | ||
| 242 | + from ReportEngine.agent import ReportAgent | ||
| 243 | + | ||
| 244 | + # 初始化Report Agent | ||
| 245 | + logger.info("正在初始化 Report Engine...") | ||
| 246 | + agent = ReportAgent() | ||
| 247 | + | ||
| 248 | + # 定义流式事件处理器 | ||
| 249 | + def stream_handler(event_type: str, payload: Dict[str, Any]): | ||
| 250 | + """处理Report Engine的流式事件""" | ||
| 251 | + if event_type == 'stage': | ||
| 252 | + stage = payload.get('stage', '') | ||
| 253 | + if stage == 'agent_start': | ||
| 254 | + logger.info(f"开始生成报告: {payload.get('report_id', '')}") | ||
| 255 | + elif stage == 'template_selected': | ||
| 256 | + logger.info(f"✓ 已选择模板: {payload.get('template', '')}") | ||
| 257 | + elif stage == 'template_sliced': | ||
| 258 | + logger.info(f"✓ 模板解析完成,共 {payload.get('section_count', 0)} 个章节") | ||
| 259 | + elif stage == 'layout_designed': | ||
| 260 | + logger.info(f"✓ 文档布局设计完成") | ||
| 261 | + logger.info(f" 标题: {payload.get('title', '')}") | ||
| 262 | + elif stage == 'word_plan_ready': | ||
| 263 | + logger.info(f"✓ 篇幅规划完成,目标章节数: {payload.get('chapter_targets', 0)}") | ||
| 264 | + elif stage == 'chapters_compiled': | ||
| 265 | + logger.info(f"✓ 章节生成完成,共 {payload.get('chapter_count', 0)} 个章节") | ||
| 266 | + elif stage == 'html_rendered': | ||
| 267 | + logger.info(f"✓ HTML 渲染完成") | ||
| 268 | + elif stage == 'report_saved': | ||
| 269 | + logger.info(f"✓ 报告已保存") | ||
| 270 | + elif event_type == 'chapter_status': | ||
| 271 | + chapter_id = payload.get('chapterId', '') | ||
| 272 | + title = payload.get('title', '') | ||
| 273 | + status = payload.get('status', '') | ||
| 274 | + if status == 'generating': | ||
| 275 | + logger.info(f" 正在生成章节: {title}") | ||
| 276 | + elif status == 'completed': | ||
| 277 | + attempt = payload.get('attempt', 1) | ||
| 278 | + warning = payload.get('warning', '') | ||
| 279 | + if warning: | ||
| 280 | + logger.warning(f" ✓ 章节完成: {title} (第 {attempt} 次尝试,{payload.get('warningMessage', '')})") | ||
| 281 | + else: | ||
| 282 | + logger.success(f" ✓ 章节完成: {title}") | ||
| 283 | + elif event_type == 'error': | ||
| 284 | + logger.error(f"错误: {payload.get('message', '')}") | ||
| 285 | + | ||
| 286 | + # 生成报告 | ||
| 287 | + logger.info("开始生成报告,这可能需要几分钟时间...") | ||
| 288 | + result = agent.generate_report( | ||
| 289 | + query=query, | ||
| 290 | + reports=reports, | ||
| 291 | + forum_logs="", # 不使用论坛日志 | ||
| 292 | + custom_template="", # 使用自动模板选择 | ||
| 293 | + save_report=True, # 自动保存报告 | ||
| 294 | + stream_handler=stream_handler | ||
| 295 | + ) | ||
| 296 | + | ||
| 297 | + logger.success("✓ 报告生成成功!") | ||
| 298 | + return result | ||
| 299 | + | ||
| 300 | + except Exception as e: | ||
| 301 | + logger.exception(f"❌ 报告生成失败: {e}") | ||
| 302 | + sys.exit(1) | ||
| 303 | + | ||
| 304 | + | ||
| 305 | +def save_pdf(document_ir_path: str, query: str) -> Optional[str]: | ||
| 306 | + """ | ||
| 307 | + 从IR文件生成并保存PDF | ||
| 308 | + | ||
| 309 | + Args: | ||
| 310 | + document_ir_path: Document IR文件路径 | ||
| 311 | + query: 报告主题 | ||
| 312 | + | ||
| 313 | + Returns: | ||
| 314 | + Optional[str]: PDF文件路径,如果失败则返回None | ||
| 315 | + """ | ||
| 316 | + logger.info("\n正在生成 PDF 文件...") | ||
| 317 | + | ||
| 318 | + try: | ||
| 319 | + # 读取IR数据 | ||
| 320 | + with open(document_ir_path, 'r', encoding='utf-8') as f: | ||
| 321 | + document_ir = json.load(f) | ||
| 322 | + | ||
| 323 | + # 创建PDF渲染器 | ||
| 324 | + from ReportEngine.renderers import PDFRenderer | ||
| 325 | + renderer = PDFRenderer() | ||
| 326 | + | ||
| 327 | + # 生成PDF字节流 | ||
| 328 | + pdf_bytes = renderer.render_to_bytes(document_ir, optimize_layout=True) | ||
| 329 | + | ||
| 330 | + # 保存PDF文件 | ||
| 331 | + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | ||
| 332 | + query_safe = "".join( | ||
| 333 | + c for c in query if c.isalnum() or c in (" ", "-", "_") | ||
| 334 | + ).rstrip() | ||
| 335 | + query_safe = query_safe.replace(" ", "_")[:30] or "report" | ||
| 336 | + | ||
| 337 | + pdf_dir = Path("final_reports") / "pdf" | ||
| 338 | + pdf_dir.mkdir(parents=True, exist_ok=True) | ||
| 339 | + | ||
| 340 | + pdf_filename = f"final_report_{query_safe}_{timestamp}.pdf" | ||
| 341 | + pdf_path = pdf_dir / pdf_filename | ||
| 342 | + | ||
| 343 | + pdf_path.write_bytes(pdf_bytes) | ||
| 344 | + | ||
| 345 | + logger.success(f"✓ PDF 已保存: {pdf_path}") | ||
| 346 | + return str(pdf_path) | ||
| 347 | + | ||
| 348 | + except Exception as e: | ||
| 349 | + logger.error(f"❌ PDF 生成失败: {e}") | ||
| 350 | + return None | ||
| 351 | + | ||
| 352 | + | ||
| 353 | +def parse_arguments(): | ||
| 354 | + """解析命令行参数""" | ||
| 355 | + parser = argparse.ArgumentParser( | ||
| 356 | + description="Report Engine 命令行版本 - 无需前端的报告生成工具", | ||
| 357 | + formatter_class=argparse.RawDescriptionHelpFormatter, | ||
| 358 | + epilog=""" | ||
| 359 | +示例: | ||
| 360 | + python report_engine_only.py | ||
| 361 | + python report_engine_only.py --query "土木工程行业分析" | ||
| 362 | + python report_engine_only.py --skip-pdf --verbose | ||
| 363 | + | ||
| 364 | +注意: | ||
| 365 | + 程序会自动获取三个引擎目录中的最新报告文件, | ||
| 366 | + 不进行文件增加审核,直接生成综合报告。 | ||
| 367 | + """ | ||
| 368 | + ) | ||
| 369 | + | ||
| 370 | + parser.add_argument( | ||
| 371 | + '--query', | ||
| 372 | + type=str, | ||
| 373 | + default=None, | ||
| 374 | + help='指定报告主题(默认从文件名自动提取)' | ||
| 375 | + ) | ||
| 376 | + | ||
| 377 | + parser.add_argument( | ||
| 378 | + '--skip-pdf', | ||
| 379 | + action='store_true', | ||
| 380 | + help='跳过PDF生成(即使系统支持)' | ||
| 381 | + ) | ||
| 382 | + | ||
| 383 | + parser.add_argument( | ||
| 384 | + '--verbose', | ||
| 385 | + action='store_true', | ||
| 386 | + help='显示详细日志信息' | ||
| 387 | + ) | ||
| 388 | + | ||
| 389 | + return parser.parse_args() | ||
| 390 | + | ||
| 391 | + | ||
| 392 | +def main(): | ||
| 393 | + """主函数""" | ||
| 394 | + # 解析命令行参数 | ||
| 395 | + args = parse_arguments() | ||
| 396 | + | ||
| 397 | + # 设置日志 | ||
| 398 | + setup_logger(verbose=args.verbose) | ||
| 399 | + | ||
| 400 | + logger.info("\n") | ||
| 401 | + logger.info("╔" + "═" * 68 + "╗") | ||
| 402 | + logger.info("║" + " " * 20 + "Report Engine 命令行版本" + " " * 24 + "║") | ||
| 403 | + logger.info("╚" + "═" * 68 + "╝") | ||
| 404 | + logger.info("\n") | ||
| 405 | + | ||
| 406 | + # 步骤 1: 检查依赖 | ||
| 407 | + pdf_available, _ = check_dependencies() | ||
| 408 | + | ||
| 409 | + # 如果用户指定跳过PDF,则禁用PDF生成 | ||
| 410 | + if args.skip_pdf: | ||
| 411 | + logger.info("用户指定 --skip-pdf,将跳过 PDF 生成") | ||
| 412 | + pdf_available = False | ||
| 413 | + | ||
| 414 | + # 步骤 2: 获取最新文件 | ||
| 415 | + latest_files = get_latest_engine_reports() | ||
| 416 | + | ||
| 417 | + # 确认文件选择 | ||
| 418 | + if not confirm_file_selection(latest_files): | ||
| 419 | + logger.info("\n程序已退出") | ||
| 420 | + sys.exit(0) | ||
| 421 | + | ||
| 422 | + # 加载报告内容 | ||
| 423 | + reports = load_engine_reports(latest_files) | ||
| 424 | + | ||
| 425 | + if not reports: | ||
| 426 | + logger.error("❌ 未能加载任何报告内容") | ||
| 427 | + sys.exit(1) | ||
| 428 | + | ||
| 429 | + # 提取或使用指定的查询主题 | ||
| 430 | + query = args.query if args.query else extract_query_from_reports(latest_files) | ||
| 431 | + logger.info(f"使用报告主题: {query}") | ||
| 432 | + | ||
| 433 | + # 步骤 3: 生成报告 | ||
| 434 | + result = generate_report(reports, query, pdf_available) | ||
| 435 | + | ||
| 436 | + # 步骤 4: 保存文件 | ||
| 437 | + logger.info("\n" + "=" * 70) | ||
| 438 | + logger.info("步骤 4/4: 保存生成的文件") | ||
| 439 | + logger.info("=" * 70) | ||
| 440 | + | ||
| 441 | + # HTML 已经在 generate_report 中自动保存 | ||
| 442 | + html_path = result.get('report_filepath', '') | ||
| 443 | + if html_path: | ||
| 444 | + logger.success(f"✓ HTML 已保存: {result.get('report_relative_path', html_path)}") | ||
| 445 | + | ||
| 446 | + # 如果有PDF依赖,生成并保存PDF | ||
| 447 | + if pdf_available: | ||
| 448 | + ir_path = result.get('ir_filepath', '') | ||
| 449 | + if ir_path and os.path.exists(ir_path): | ||
| 450 | + pdf_path = save_pdf(ir_path, query) | ||
| 451 | + else: | ||
| 452 | + logger.warning("⚠ 未找到 IR 文件,无法生成 PDF") | ||
| 453 | + else: | ||
| 454 | + logger.info("⚠ 跳过 PDF 生成(缺少系统依赖或用户指定跳过)") | ||
| 455 | + | ||
| 456 | + # 总结 | ||
| 457 | + logger.info("\n" + "=" * 70) | ||
| 458 | + logger.success("✓ 报告生成完成!") | ||
| 459 | + logger.info("=" * 70) | ||
| 460 | + logger.info(f"报告 ID: {result.get('report_id', 'N/A')}") | ||
| 461 | + logger.info(f"HTML 文件: {result.get('report_relative_path', 'N/A')}") | ||
| 462 | + if pdf_available: | ||
| 463 | + logger.info(f"PDF 文件: final_reports/pdf/ 目录下") | ||
| 464 | + logger.info("=" * 70) | ||
| 465 | + logger.info("\n程序结束") | ||
| 466 | + | ||
| 467 | + | ||
| 468 | +if __name__ == "__main__": | ||
| 469 | + try: | ||
| 470 | + main() | ||
| 471 | + except KeyboardInterrupt: | ||
| 472 | + logger.warning("\n\n用户中断程序") | ||
| 473 | + sys.exit(0) | ||
| 474 | + except Exception as e: | ||
| 475 | + logger.exception(f"\n程序异常退出: {e}") | ||
| 476 | + sys.exit(1) |
-
Please register or login to post a comment