马一丁

Update Report Engine Log Filtering Strategy

@@ -245,6 +245,31 @@ class ReportAgent: @@ -245,6 +245,31 @@ class ReportAgent:
245 log_dir = os.path.dirname(self.config.LOG_FILE) 245 log_dir = os.path.dirname(self.config.LOG_FILE)
246 os.makedirs(log_dir, exist_ok=True) 246 os.makedirs(log_dir, exist_ok=True)
247 247
  248 + def _exclude_other_engines(record):
  249 + """
  250 + 过滤掉其他引擎(Insight/Media/Query/Forum)产生的日志,其余日志全部保留。
  251 +
  252 + 使用路径匹配为主,无法获取路径时退化到模块名。
  253 + """
  254 + excluded_keywords = ("InsightEngine", "MediaEngine", "QueryEngine", "ForumEngine")
  255 + try:
  256 + file_path = record["file"].path
  257 + if any(keyword in file_path for keyword in excluded_keywords):
  258 + return False
  259 + except Exception:
  260 + pass
  261 +
  262 + try:
  263 + module_name = record.get("module", "")
  264 + if isinstance(module_name, str):
  265 + lowered = module_name.lower()
  266 + if any(keyword.lower() in lowered for keyword in excluded_keywords):
  267 + return False
  268 + except Exception:
  269 + pass
  270 +
  271 + return True
  272 +
248 # 【修复】检查是否已经添加过这个文件的handler,避免重复 273 # 【修复】检查是否已经添加过这个文件的handler,避免重复
249 # loguru会自动去重,但显式检查更安全 274 # loguru会自动去重,但显式检查更安全
250 log_file_path = str(Path(self.config.LOG_FILE).resolve()) 275 log_file_path = str(Path(self.config.LOG_FILE).resolve())
@@ -274,7 +299,8 @@ class ReportAgent: @@ -274,7 +299,8 @@ class ReportAgent:
274 buffering=1, # 行缓冲,每行立即写入 299 buffering=1, # 行缓冲,每行立即写入
275 serialize=False, # 普通文本格式,不序列化为JSON 300 serialize=False, # 普通文本格式,不序列化为JSON
276 encoding="utf-8", # 明确UTF-8编码 301 encoding="utf-8", # 明确UTF-8编码
277 - mode="a" # 追加模式 302 + mode="a", # 追加模式
  303 + filter=_exclude_other_engines # 过滤掉四个 Engine 的日志,保留其余信息
278 ) 304 )
279 logger.debug(f"已添加日志handler (ID: {handler_id}): {self.config.LOG_FILE}") 305 logger.debug(f"已添加日志handler (ID: {handler_id}): {self.config.LOG_FILE}")
280 306
@@ -42,6 +42,33 @@ tasks_registry: Dict[str, 'ReportTask'] = {} @@ -42,6 +42,33 @@ tasks_registry: Dict[str, 'ReportTask'] = {}
42 LOG_STREAM_LEVELS = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"} 42 LOG_STREAM_LEVELS = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
43 log_stream_handler_id: Optional[int] = None 43 log_stream_handler_id: Optional[int] = None
44 44
  45 +EXCLUDED_ENGINE_PATH_KEYWORDS = ("ForumEngine", "InsightEngine", "MediaEngine", "QueryEngine")
  46 +
  47 +def _is_excluded_engine_log(record: Dict[str, Any]) -> bool:
  48 + """
  49 + 判断日志是否来自其他引擎(Insight/Media/Query/Forum),用于过滤混入的日志。
  50 +
  51 + 返回:
  52 + bool: True 表示应当过滤(即不写入/不转发)。
  53 + """
  54 + try:
  55 + file_path = record["file"].path
  56 + if any(keyword in file_path for keyword in EXCLUDED_ENGINE_PATH_KEYWORDS):
  57 + return True
  58 + except Exception:
  59 + pass
  60 +
  61 + # 兜底:尝试按模块名过滤,防止file信息缺失时误混入
  62 + try:
  63 + module_name = record.get("module", "")
  64 + if isinstance(module_name, str):
  65 + lowered = module_name.lower()
  66 + return any(keyword.lower() in lowered for keyword in EXCLUDED_ENGINE_PATH_KEYWORDS)
  67 + except Exception:
  68 + pass
  69 +
  70 + return False
  71 +
45 72
46 def _stream_log_to_task(message): 73 def _stream_log_to_task(message):
47 """ 74 """
@@ -54,6 +81,8 @@ def _stream_log_to_task(message): @@ -54,6 +81,8 @@ def _stream_log_to_task(message):
54 level_name = record["level"].name 81 level_name = record["level"].name
55 if level_name not in LOG_STREAM_LEVELS: 82 if level_name not in LOG_STREAM_LEVELS:
56 return 83 return
  84 + if _is_excluded_engine_log(record):
  85 + return
57 86
58 with task_lock: 87 with task_lock:
59 task = current_task 88 task = current_task