Angiin
Committed by GitHub

Merge pull request #3 from Angiin/feat/anti-detection

feat: 增强反检测能力 — JS 伪装、CDP 真实交互、随机延迟
@@ -2,14 +2,17 @@ @@ -2,14 +2,17 @@
2 2
3 from __future__ import annotations 3 from __future__ import annotations
4 4
  5 +import contextlib
5 import json 6 import json
6 import logging 7 import logging
7 import os 8 import os
8 import platform 9 import platform
9 import shutil 10 import shutil
10 -import signal 11 +import socket
11 import subprocess 12 import subprocess
  13 +import sys
12 import time 14 import time
  15 +from pathlib import Path
13 16
14 from xhs.stealth import STEALTH_ARGS 17 from xhs.stealth import STEALTH_ARGS
15 18
@@ -18,6 +21,9 @@ logger = logging.getLogger(__name__) @@ -18,6 +21,9 @@ logger = logging.getLogger(__name__)
18 # 默认远程调试端口 21 # 默认远程调试端口
19 DEFAULT_PORT = 9222 22 DEFAULT_PORT = 9222
20 23
  24 +# 全局进程追踪
  25 +_chrome_process: subprocess.Popen | None = None
  26 +
21 # 各平台 Chrome 默认路径 27 # 各平台 Chrome 默认路径
22 _CHROME_PATHS: dict[str, list[str]] = { 28 _CHROME_PATHS: dict[str, list[str]] = {
23 "Darwin": [ 29 "Darwin": [
@@ -38,6 +44,22 @@ _CHROME_PATHS: dict[str, list[str]] = { @@ -38,6 +44,22 @@ _CHROME_PATHS: dict[str, list[str]] = {
38 } 44 }
39 45
40 46
  47 +def _get_default_data_dir() -> str:
  48 + """返回默认 Chrome Profile 目录路径。"""
  49 + return str(Path.home() / ".xhs" / "chrome-profile")
  50 +
  51 +
  52 +def is_port_open(port: int, host: str = "127.0.0.1") -> bool:
  53 + """TCP socket 级端口检测(秒级响应)。"""
  54 + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  55 + s.settimeout(1)
  56 + try:
  57 + s.connect((host, port))
  58 + return True
  59 + except (ConnectionRefusedError, TimeoutError, OSError):
  60 + return False
  61 +
  62 +
41 def find_chrome() -> str | None: 63 def find_chrome() -> str | None:
42 """查找 Chrome 可执行文件路径。""" 64 """查找 Chrome 可执行文件路径。"""
43 # 环境变量优先 65 # 环境变量优先
@@ -45,13 +67,28 @@ def find_chrome() -> str | None: @@ -45,13 +67,28 @@ def find_chrome() -> str | None:
45 if env_path and os.path.isfile(env_path): 67 if env_path and os.path.isfile(env_path):
46 return env_path 68 return env_path
47 69
48 - # which/where 查找  
49 - chrome = shutil.which("google-chrome") or shutil.which("chromium") 70 + # which/where 查找(含 Windows chrome.exe)
  71 + chrome = (
  72 + shutil.which("google-chrome")
  73 + or shutil.which("chromium")
  74 + or shutil.which("chrome")
  75 + or shutil.which("chrome.exe")
  76 + )
50 if chrome: 77 if chrome:
51 return chrome 78 return chrome
52 79
53 # 平台默认路径 80 # 平台默认路径
54 system = platform.system() 81 system = platform.system()
  82 +
  83 + # Windows: 额外检查环境变量路径
  84 + if system == "Windows":
  85 + for env_var in ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA"):
  86 + base = os.environ.get(env_var, "")
  87 + if base:
  88 + candidate = os.path.join(base, "Google", "Chrome", "Application", "chrome.exe")
  89 + if os.path.isfile(candidate):
  90 + return candidate
  91 +
55 for path in _CHROME_PATHS.get(system, []): 92 for path in _CHROME_PATHS.get(system, []):
56 if os.path.isfile(path): 93 if os.path.isfile(path):
57 return path 94 return path
@@ -59,55 +96,70 @@ def find_chrome() -> str | None: @@ -59,55 +96,70 @@ def find_chrome() -> str | None:
59 return None 96 return None
60 97
61 98
  99 +def is_chrome_running(port: int = DEFAULT_PORT) -> bool:
  100 + """检查指定端口的 Chrome 是否在运行(TCP 级检测)。"""
  101 + return is_port_open(port)
  102 +
  103 +
62 def launch_chrome( 104 def launch_chrome(
63 port: int = DEFAULT_PORT, 105 port: int = DEFAULT_PORT,
64 headless: bool = False, 106 headless: bool = False,
65 user_data_dir: str | None = None, 107 user_data_dir: str | None = None,
66 chrome_bin: str | None = None, 108 chrome_bin: str | None = None,
67 -) -> subprocess.Popen: 109 +) -> subprocess.Popen | None:
68 """启动 Chrome 进程(带远程调试端口)。 110 """启动 Chrome 进程(带远程调试端口)。
69 111
70 Args: 112 Args:
71 port: 远程调试端口。 113 port: 远程调试端口。
72 headless: 是否无头模式。 114 headless: 是否无头模式。
73 - user_data_dir: 用户数据目录(Profile 隔离)。 115 + user_data_dir: 用户数据目录(Profile 隔离),默认 ~/.xhs/chrome-profile
74 chrome_bin: Chrome 可执行文件路径。 116 chrome_bin: Chrome 可执行文件路径。
75 117
76 Returns: 118 Returns:
77 - Chrome 子进程。 119 + Chrome 子进程,若已在运行则返回 None
78 120
79 Raises: 121 Raises:
80 FileNotFoundError: 未找到 Chrome。 122 FileNotFoundError: 未找到 Chrome。
81 """ 123 """
  124 + global _chrome_process
  125 +
  126 + # 已在运行则跳过
  127 + if is_port_open(port):
  128 + logger.info("Chrome 已在运行 (port=%d),跳过启动", port)
  129 + return None
  130 +
82 if not chrome_bin: 131 if not chrome_bin:
83 chrome_bin = find_chrome() 132 chrome_bin = find_chrome()
84 if not chrome_bin: 133 if not chrome_bin:
85 raise FileNotFoundError("未找到 Chrome,请设置 CHROME_BIN 环境变量或安装 Chrome") 134 raise FileNotFoundError("未找到 Chrome,请设置 CHROME_BIN 环境变量或安装 Chrome")
86 135
  136 + # 默认 user-data-dir
  137 + if not user_data_dir:
  138 + user_data_dir = _get_default_data_dir()
  139 +
87 args = [ 140 args = [
88 chrome_bin, 141 chrome_bin,
89 f"--remote-debugging-port={port}", 142 f"--remote-debugging-port={port}",
  143 + f"--user-data-dir={user_data_dir}",
90 *STEALTH_ARGS, 144 *STEALTH_ARGS,
91 ] 145 ]
92 146
93 if headless: 147 if headless:
94 args.append("--headless=new") 148 args.append("--headless=new")
95 149
96 - if user_data_dir:  
97 - args.append(f"--user-data-dir={user_data_dir}")  
98 -  
99 # 代理 150 # 代理
100 proxy = os.getenv("XHS_PROXY") 151 proxy = os.getenv("XHS_PROXY")
101 if proxy: 152 if proxy:
102 args.append(f"--proxy-server={proxy}") 153 args.append(f"--proxy-server={proxy}")
103 logger.info("使用代理: %s", _mask_proxy(proxy)) 154 logger.info("使用代理: %s", _mask_proxy(proxy))
104 155
105 - logger.info("启动 Chrome: port=%d, headless=%s", port, headless) 156 + logger.info("启动 Chrome: port=%d, headless=%s, profile=%s", port, headless, user_data_dir)
106 process = subprocess.Popen( 157 process = subprocess.Popen(
107 args, 158 args,
108 stdout=subprocess.DEVNULL, 159 stdout=subprocess.DEVNULL,
109 stderr=subprocess.DEVNULL, 160 stderr=subprocess.DEVNULL,
110 ) 161 )
  162 + _chrome_process = process
111 163
112 # 等待 Chrome 准备就绪 164 # 等待 Chrome 准备就绪
113 _wait_for_chrome(port) 165 _wait_for_chrome(port)
@@ -120,7 +172,7 @@ def close_chrome(process: subprocess.Popen) -> None: @@ -120,7 +172,7 @@ def close_chrome(process: subprocess.Popen) -> None:
120 return 172 return
121 173
122 try: 174 try:
123 - process.send_signal(signal.SIGTERM) 175 + process.terminate()
124 process.wait(timeout=5) 176 process.wait(timeout=5)
125 except (subprocess.TimeoutExpired, OSError): 177 except (subprocess.TimeoutExpired, OSError):
126 process.kill() 178 process.kill()
@@ -129,29 +181,20 @@ def close_chrome(process: subprocess.Popen) -> None: @@ -129,29 +181,20 @@ def close_chrome(process: subprocess.Popen) -> None:
129 logger.info("Chrome 进程已关闭") 181 logger.info("Chrome 进程已关闭")
130 182
131 183
132 -def is_chrome_running(port: int = DEFAULT_PORT) -> bool:  
133 - """检查指定端口的 Chrome 是否在运行。"""  
134 - import requests  
135 -  
136 - try:  
137 - resp = requests.get(f"http://127.0.0.1:{port}/json/version", timeout=2)  
138 - return resp.status_code == 200  
139 - except (requests.ConnectionError, requests.Timeout):  
140 - return False  
141 -  
142 -  
143 def kill_chrome(port: int = DEFAULT_PORT) -> None: 184 def kill_chrome(port: int = DEFAULT_PORT) -> None:
144 """关闭指定端口的 Chrome 实例。 185 """关闭指定端口的 Chrome 实例。
145 186
146 - 尝试通过 CDP Browser.close 命令关闭,失败则使用进程信号 187 + 策略: CDP Browser.close → terminate 追踪进程 → 端口查找终止进程
147 188
148 Args: 189 Args:
149 port: Chrome 调试端口。 190 port: Chrome 调试端口。
150 """ 191 """
151 - import requests 192 + global _chrome_process
152 193
153 # 策略1: 通过 CDP 关闭 194 # 策略1: 通过 CDP 关闭
154 try: 195 try:
  196 + import requests
  197 +
155 resp = requests.get(f"http://127.0.0.1:{port}/json/version", timeout=2) 198 resp = requests.get(f"http://127.0.0.1:{port}/json/version", timeout=2)
156 if resp.status_code == 200: 199 if resp.status_code == 200:
157 ws_url = resp.json().get("webSocketDebuggerUrl") 200 ws_url = resp.json().get("webSocketDebuggerUrl")
@@ -163,32 +206,70 @@ def kill_chrome(port: int = DEFAULT_PORT) -> None: @@ -163,32 +206,70 @@ def kill_chrome(port: int = DEFAULT_PORT) -> None:
163 ws.close() 206 ws.close()
164 logger.info("通过 CDP Browser.close 关闭 Chrome (port=%d)", port) 207 logger.info("通过 CDP Browser.close 关闭 Chrome (port=%d)", port)
165 time.sleep(1) 208 time.sleep(1)
166 - return  
167 except Exception: 209 except Exception:
168 pass 210 pass
169 211
170 - # 策略2: 通过 lsof 查找并 kill 进程 212 + # 策略2: terminate 追踪的子进程
  213 + if _chrome_process and _chrome_process.poll() is None:
171 try: 214 try:
172 - result = subprocess.run(  
173 - ["lsof", "-ti", f":{port}"],  
174 - capture_output=True,  
175 - text=True,  
176 - timeout=5,  
177 - )  
178 - if result.returncode == 0 and result.stdout.strip():  
179 - import contextlib  
180 -  
181 - pids = result.stdout.strip().split("\n") 215 + _chrome_process.terminate()
  216 + _chrome_process.wait(timeout=5)
  217 + logger.info("通过 terminate 关闭追踪的 Chrome 进程")
  218 + except Exception:
  219 + with contextlib.suppress(Exception):
  220 + _chrome_process.kill()
  221 + _chrome_process = None
  222 +
  223 + # 策略3: 通过端口查找并终止进程(跨平台)
  224 + if is_port_open(port):
  225 + pids = _find_pids_by_port(port)
  226 + if pids:
182 for pid in pids: 227 for pid in pids:
183 - with contextlib.suppress(OSError, ValueError):  
184 - os.kill(int(pid), signal.SIGTERM)  
185 - logger.info("通过 SIGTERM 关闭 Chrome 进程 (port=%d)", port)  
186 - time.sleep(1) 228 + _kill_pid(pid)
  229 + logger.info("通过进程终止关闭 Chrome (port=%d)", port)
  230 +
  231 + # 等待端口释放(最多 5s)
  232 + deadline = time.monotonic() + 5
  233 + while time.monotonic() < deadline:
  234 + if not is_port_open(port):
187 return 235 return
188 - except Exception:  
189 - pass 236 + time.sleep(0.5)
  237 +
  238 + if is_port_open(port):
  239 + logger.warning("端口 %d 仍被占用,kill 可能未完全生效", port)
  240 +
  241 +
  242 +def ensure_chrome(
  243 + port: int = DEFAULT_PORT,
  244 + headless: bool = False,
  245 + user_data_dir: str | None = None,
  246 + chrome_bin: str | None = None,
  247 +) -> bool:
  248 + """确保 Chrome 在指定端口可用(一站式入口)。
  249 +
  250 + 如果 Chrome 已在运行,直接返回 True。
  251 + 否则尝试启动 Chrome 并等待端口就绪。
  252 +
  253 + Args:
  254 + port: 远程调试端口。
  255 + headless: 是否无头模式(仅新启动时生效)。
  256 + user_data_dir: 用户数据目录。
  257 + chrome_bin: Chrome 可执行文件路径。
190 258
191 - logger.warning("未能关闭 Chrome (port=%d)", port) 259 + Returns:
  260 + True 表示 Chrome 可用,False 表示启动失败。
  261 + """
  262 + if is_port_open(port):
  263 + return True
  264 +
  265 + try:
  266 + launch_chrome(
  267 + port=port, headless=headless, user_data_dir=user_data_dir, chrome_bin=chrome_bin,
  268 + )
  269 + return is_port_open(port)
  270 + except FileNotFoundError as e:
  271 + logger.error("启动 Chrome 失败: %s", e)
  272 + return False
192 273
193 274
194 def restart_chrome( 275 def restart_chrome(
@@ -196,7 +277,7 @@ def restart_chrome( @@ -196,7 +277,7 @@ def restart_chrome(
196 headless: bool = False, 277 headless: bool = False,
197 user_data_dir: str | None = None, 278 user_data_dir: str | None = None,
198 chrome_bin: str | None = None, 279 chrome_bin: str | None = None,
199 -) -> subprocess.Popen: 280 +) -> subprocess.Popen | None:
200 """重启 Chrome:关闭当前实例后以新模式重新启动。 281 """重启 Chrome:关闭当前实例后以新模式重新启动。
201 282
202 Args: 283 Args:
@@ -206,7 +287,7 @@ def restart_chrome( @@ -206,7 +287,7 @@ def restart_chrome(
206 chrome_bin: Chrome 可执行文件路径。 287 chrome_bin: Chrome 可执行文件路径。
207 288
208 Returns: 289 Returns:
209 - 新的 Chrome 子进程。 290 + 新的 Chrome 子进程,或 None
210 """ 291 """
211 logger.info("重启 Chrome: port=%d, headless=%s", port, headless) 292 logger.info("重启 Chrome: port=%d, headless=%s", port, headless)
212 kill_chrome(port) 293 kill_chrome(port)
@@ -220,16 +301,70 @@ def restart_chrome( @@ -220,16 +301,70 @@ def restart_chrome(
220 301
221 302
222 def _wait_for_chrome(port: int, timeout: float = 15.0) -> None: 303 def _wait_for_chrome(port: int, timeout: float = 15.0) -> None:
223 - """等待 Chrome 调试端口就绪。""" 304 + """等待 Chrome 调试端口就绪(TCP 级检测)。"""
224 deadline = time.monotonic() + timeout 305 deadline = time.monotonic() + timeout
225 while time.monotonic() < deadline: 306 while time.monotonic() < deadline:
226 - if is_chrome_running(port): 307 + if is_port_open(port):
227 logger.info("Chrome 已就绪 (port=%d)", port) 308 logger.info("Chrome 已就绪 (port=%d)", port)
228 return 309 return
229 time.sleep(0.5) 310 time.sleep(0.5)
230 logger.warning("等待 Chrome 就绪超时 (port=%d)", port) 311 logger.warning("等待 Chrome 就绪超时 (port=%d)", port)
231 312
232 313
  314 +def _find_pids_by_port(port: int) -> list[int]:
  315 + """查找占用指定端口的进程 PID(跨平台)。"""
  316 + try:
  317 + if sys.platform == "win32":
  318 + result = subprocess.run(
  319 + ["netstat", "-ano", "-p", "TCP"],
  320 + capture_output=True,
  321 + text=True,
  322 + timeout=5,
  323 + )
  324 + if result.returncode != 0:
  325 + return []
  326 + pids: list[int] = []
  327 + for line in result.stdout.splitlines():
  328 + if f":{port}" in line and "LISTENING" in line:
  329 + parts = line.split()
  330 + with contextlib.suppress(ValueError, IndexError):
  331 + pids.append(int(parts[-1]))
  332 + return list(set(pids))
  333 + else:
  334 + result = subprocess.run(
  335 + ["lsof", "-ti", f":{port}"],
  336 + capture_output=True,
  337 + text=True,
  338 + timeout=5,
  339 + )
  340 + if result.returncode != 0 or not result.stdout.strip():
  341 + return []
  342 + pids = []
  343 + for p in result.stdout.strip().split("\n"):
  344 + with contextlib.suppress(ValueError):
  345 + pids.append(int(p))
  346 + return pids
  347 + except Exception:
  348 + return []
  349 +
  350 +
  351 +def _kill_pid(pid: int) -> None:
  352 + """终止指定 PID 的进程(跨平台)。"""
  353 + try:
  354 + if sys.platform == "win32":
  355 + subprocess.run(
  356 + ["taskkill", "/PID", str(pid), "/F"],
  357 + capture_output=True,
  358 + timeout=5,
  359 + )
  360 + else:
  361 + import signal
  362 +
  363 + os.kill(pid, signal.SIGTERM)
  364 + except Exception:
  365 + logger.debug("终止进程 %d 失败", pid)
  366 +
  367 +
233 def _mask_proxy(proxy_url: str) -> str: 368 def _mask_proxy(proxy_url: str) -> str:
234 """隐藏代理 URL 中的敏感信息。""" 369 """隐藏代理 URL 中的敏感信息。"""
235 from urllib.parse import urlparse 370 from urllib.parse import urlparse
@@ -12,6 +12,12 @@ import json @@ -12,6 +12,12 @@ import json
12 import logging 12 import logging
13 import sys 13 import sys
14 14
  15 +# Windows 控制台默认编码(如 cp1252)不支持中文,强制 UTF-8
  16 +if sys.stdout and hasattr(sys.stdout, "reconfigure"):
  17 + sys.stdout.reconfigure(encoding="utf-8")
  18 +if sys.stderr and hasattr(sys.stderr, "reconfigure"):
  19 + sys.stderr.reconfigure(encoding="utf-8")
  20 +
15 logging.basicConfig( 21 logging.basicConfig(
16 level=logging.INFO, 22 level=logging.INFO,
17 format="%(asctime)s %(levelname)s %(name)s: %(message)s", 23 format="%(asctime)s %(levelname)s %(name)s: %(message)s",
@@ -27,14 +33,43 @@ def _output(data: dict, exit_code: int = 0) -> None: @@ -27,14 +33,43 @@ def _output(data: dict, exit_code: int = 0) -> None:
27 33
28 def _connect(args: argparse.Namespace): 34 def _connect(args: argparse.Namespace):
29 """连接到 Chrome 并返回 (browser, page)。""" 35 """连接到 Chrome 并返回 (browser, page)。"""
  36 + from chrome_launcher import ensure_chrome
30 from xhs.cdp import Browser 37 from xhs.cdp import Browser
31 38
  39 + if not ensure_chrome(port=args.port):
  40 + _output(
  41 + {"success": False, "error": "无法启动 Chrome,请检查 Chrome 是否已安装"},
  42 + exit_code=2,
  43 + )
  44 +
32 browser = Browser(host=args.host, port=args.port) 45 browser = Browser(host=args.host, port=args.port)
33 browser.connect() 46 browser.connect()
34 page = browser.new_page() 47 page = browser.new_page()
35 return browser, page 48 return browser, page
36 49
37 50
  51 +def _connect_existing(args: argparse.Namespace):
  52 + """连接到 Chrome 并复用已有页面(用于分步发布的后续步骤)。"""
  53 + from chrome_launcher import ensure_chrome
  54 + from xhs.cdp import Browser
  55 +
  56 + if not ensure_chrome(port=args.port):
  57 + _output(
  58 + {"success": False, "error": "无法连接到 Chrome"},
  59 + exit_code=2,
  60 + )
  61 +
  62 + browser = Browser(host=args.host, port=args.port)
  63 + browser.connect()
  64 + page = browser.get_existing_page()
  65 + if not page:
  66 + _output(
  67 + {"success": False, "error": "未找到已打开的页面,请先执行前置步骤"},
  68 + exit_code=2,
  69 + )
  70 + return browser, page
  71 +
  72 +
38 def _headless_fallback(port: int) -> None: 73 def _headless_fallback(port: int) -> None:
39 """Headless 模式未登录时自动降级到有窗口模式。""" 74 """Headless 模式未登录时自动降级到有窗口模式。"""
40 from chrome_launcher import restart_chrome 75 from chrome_launcher import restart_chrome
@@ -332,7 +367,7 @@ def cmd_fill_publish(args: argparse.Namespace) -> None: @@ -332,7 +367,7 @@ def cmd_fill_publish(args: argparse.Namespace) -> None:
332 } 367 }
333 ) 368 )
334 finally: 369 finally:
335 - browser.close_page(page) 370 + # 不关闭页面,让用户在浏览器中预览
336 browser.close() 371 browser.close()
337 372
338 373
@@ -368,15 +403,15 @@ def cmd_fill_publish_video(args: argparse.Namespace) -> None: @@ -368,15 +403,15 @@ def cmd_fill_publish_video(args: argparse.Namespace) -> None:
368 } 403 }
369 ) 404 )
370 finally: 405 finally:
371 - browser.close_page(page) 406 + # 不关闭页面,让用户在浏览器中预览
372 browser.close() 407 browser.close()
373 408
374 409
375 def cmd_click_publish(args: argparse.Namespace) -> None: 410 def cmd_click_publish(args: argparse.Namespace) -> None:
376 - """点击发布按钮(在用户确认后调用)。""" 411 + """点击发布按钮(在用户确认后调用)。复用已有的发布页 tab。"""
377 from xhs.publish import click_publish_button 412 from xhs.publish import click_publish_button
378 413
379 - browser, page = _connect(args) 414 + browser, page = _connect_existing(args)
380 try: 415 try:
381 click_publish_button(page) 416 click_publish_button(page)
382 _output({"success": True, "status": "发布完成"}) 417 _output({"success": True, "status": "发布完成"})
@@ -410,15 +445,15 @@ def cmd_long_article(args: argparse.Namespace) -> None: @@ -410,15 +445,15 @@ def cmd_long_article(args: argparse.Namespace) -> None:
410 } 445 }
411 ) 446 )
412 finally: 447 finally:
413 - browser.close_page(page) 448 + # 不关闭页面,后续 select-template / next-step 需要复用
414 browser.close() 449 browser.close()
415 450
416 451
417 def cmd_select_template(args: argparse.Namespace) -> None: 452 def cmd_select_template(args: argparse.Namespace) -> None:
418 - """选择排版模板。""" 453 + """选择排版模板。复用已有的长文编辑页 tab。"""
419 from xhs.publish_long_article import select_template 454 from xhs.publish_long_article import select_template
420 455
421 - browser, page = _connect(args) 456 + browser, page = _connect_existing(args)
422 try: 457 try:
423 selected = select_template(page, args.name) 458 selected = select_template(page, args.name)
424 if selected: 459 if selected:
@@ -429,23 +464,23 @@ def cmd_select_template(args: argparse.Namespace) -> None: @@ -429,23 +464,23 @@ def cmd_select_template(args: argparse.Namespace) -> None:
429 exit_code=2, 464 exit_code=2,
430 ) 465 )
431 finally: 466 finally:
432 - browser.close_page(page) 467 + # 不关闭页面,后续 next-step 需要复用
433 browser.close() 468 browser.close()
434 469
435 470
436 def cmd_next_step(args: argparse.Namespace) -> None: 471 def cmd_next_step(args: argparse.Namespace) -> None:
437 - """点击下一步 + 填写发布页描述。""" 472 + """点击下一步 + 填写发布页描述。复用已有的长文编辑页 tab。"""
438 from xhs.publish_long_article import click_next_and_fill_description 473 from xhs.publish_long_article import click_next_and_fill_description
439 474
440 with open(args.content_file, encoding="utf-8") as f: 475 with open(args.content_file, encoding="utf-8") as f:
441 description = f.read().strip() 476 description = f.read().strip()
442 477
443 - browser, page = _connect(args) 478 + browser, page = _connect_existing(args)
444 try: 479 try:
445 click_next_and_fill_description(page, description) 480 click_next_and_fill_description(page, description)
446 _output({"success": True, "status": "已进入发布页,等待确认发布"}) 481 _output({"success": True, "status": "已进入发布页,等待确认发布"})
447 finally: 482 finally:
448 - browser.close_page(page) 483 + # 不关闭页面,等待 click-publish
449 browser.close() 484 browser.close()
450 485
451 486
@@ -71,7 +71,7 @@ class RunLock: @@ -71,7 +71,7 @@ class RunLock:
71 # 检查进程是否存在 71 # 检查进程是否存在
72 os.kill(pid, 0) 72 os.kill(pid, 0)
73 return False 73 return False
74 - except (FileNotFoundError, ValueError, ProcessLookupError, PermissionError): 74 + except (ValueError, OSError):
75 return True 75 return True
76 76
77 def _force_release(self) -> None: 77 def _force_release(self) -> None:
@@ -7,6 +7,7 @@ from __future__ import annotations @@ -7,6 +7,7 @@ from __future__ import annotations
7 7
8 import json 8 import json
9 import logging 9 import logging
  10 +import random
10 import time 11 import time
11 from typing import Any 12 from typing import Any
12 13
@@ -14,7 +15,7 @@ import requests @@ -14,7 +15,7 @@ import requests
14 import websockets.sync.client as ws_client 15 import websockets.sync.client as ws_client
15 16
16 from .errors import CDPError, ElementNotFoundError 17 from .errors import CDPError, ElementNotFoundError
17 -from .stealth import STEALTH_JS 18 +from .stealth import REALISTIC_UA, STEALTH_JS
18 19
19 logger = logging.getLogger(__name__) 20 logger = logging.getLogger(__name__)
20 21
@@ -211,15 +212,25 @@ class Page: @@ -211,15 +212,25 @@ class Page:
211 raise ElementNotFoundError(selector) 212 raise ElementNotFoundError(selector)
212 213
213 def click_element(self, selector: str) -> None: 214 def click_element(self, selector: str) -> None:
214 - """点击指定选择器的元素。"""  
215 - self.evaluate( 215 + """点击指定选择器的元素(通过 CDP Input 事件,isTrusted=true)。"""
  216 + box = self.evaluate(
216 f""" 217 f"""
217 (() => {{ 218 (() => {{
218 const el = document.querySelector({json.dumps(selector)}); 219 const el = document.querySelector({json.dumps(selector)});
219 - if (el) el.click(); 220 + if (!el) return null;
  221 + el.scrollIntoView({{block: 'center'}});
  222 + const rect = el.getBoundingClientRect();
  223 + return {{x: rect.left + rect.width / 2, y: rect.top + rect.height / 2}};
220 }})() 224 }})()
221 """ 225 """
222 ) 226 )
  227 + if not box:
  228 + return
  229 + x = box["x"] + random.uniform(-3, 3)
  230 + y = box["y"] + random.uniform(-3, 3)
  231 + self.mouse_move(x, y)
  232 + time.sleep(random.uniform(0.03, 0.08))
  233 + self.mouse_click(x, y)
223 234
224 def input_text(self, selector: str, text: str) -> None: 235 def input_text(self, selector: str, text: str) -> None:
225 """向指定选择器的元素输入文本。""" 236 """向指定选择器的元素输入文本。"""
@@ -237,18 +248,59 @@ class Page: @@ -237,18 +248,59 @@ class Page:
237 ) 248 )
238 249
239 def input_content_editable(self, selector: str, text: str) -> None: 250 def input_content_editable(self, selector: str, text: str) -> None:
240 - """向 contentEditable 元素输入文本(如 div.ql-editor)。""" 251 + """向 contentEditable 元素输入文本(CDP 逐字输入,模拟真实打字)。"""
  252 + # 1. focus 元素
241 self.evaluate( 253 self.evaluate(
242 f""" 254 f"""
243 (() => {{ 255 (() => {{
244 const el = document.querySelector({json.dumps(selector)}); 256 const el = document.querySelector({json.dumps(selector)});
245 - if (!el) return;  
246 - el.focus();  
247 - el.textContent = {json.dumps(text)};  
248 - el.dispatchEvent(new Event('input', {{bubbles: true}})); 257 + if (el) el.focus();
249 }})() 258 }})()
250 """ 259 """
251 ) 260 )
  261 + time.sleep(0.1)
  262 + # 2. 全选清空(Ctrl+A + Backspace)
  263 + self._send_session(
  264 + "Input.dispatchKeyEvent",
  265 + {"type": "keyDown", "key": "a", "code": "KeyA", "modifiers": 2},
  266 + )
  267 + self._send_session(
  268 + "Input.dispatchKeyEvent",
  269 + {"type": "keyUp", "key": "a", "code": "KeyA", "modifiers": 2},
  270 + )
  271 + self._send_session(
  272 + "Input.dispatchKeyEvent",
  273 + {
  274 + "type": "keyDown",
  275 + "key": "Backspace",
  276 + "code": "Backspace",
  277 + "windowsVirtualKeyCode": 8,
  278 + },
  279 + )
  280 + self._send_session(
  281 + "Input.dispatchKeyEvent",
  282 + {
  283 + "type": "keyUp",
  284 + "key": "Backspace",
  285 + "code": "Backspace",
  286 + "windowsVirtualKeyCode": 8,
  287 + },
  288 + )
  289 + time.sleep(0.1)
  290 + # 3. 逐字输入(随机 30-80ms 间隔,换行符转为 Enter 键)
  291 + for char in text:
  292 + if char == "\n":
  293 + self.press_key("Enter")
  294 + else:
  295 + self._send_session(
  296 + "Input.dispatchKeyEvent",
  297 + {"type": "keyDown", "text": char},
  298 + )
  299 + self._send_session(
  300 + "Input.dispatchKeyEvent",
  301 + {"type": "keyUp", "text": char},
  302 + )
  303 + time.sleep(random.uniform(0.03, 0.08))
252 304
253 def get_element_text(self, selector: str) -> str | None: 305 def get_element_text(self, selector: str) -> str | None:
254 """获取元素文本内容。""" 306 """获取元素文本内容。"""
@@ -500,14 +552,31 @@ class Browser: @@ -500,14 +552,31 @@ class Browser:
500 552
501 page = Page(self._cdp, target_id, session_id) 553 page = Page(self._cdp, target_id, session_id)
502 554
  555 + # 注入反检测(必须在 enable domains 之前)
  556 + page.inject_stealth()
  557 +
  558 + # UA 覆盖
  559 + page._send_session(
  560 + "Emulation.setUserAgentOverride",
  561 + {"userAgent": REALISTIC_UA},
  562 + )
  563 +
  564 + # 随机 viewport(模拟真实屏幕尺寸)
  565 + page._send_session(
  566 + "Emulation.setDeviceMetricsOverride",
  567 + {
  568 + "width": random.randint(1366, 1920),
  569 + "height": random.randint(768, 1080),
  570 + "deviceScaleFactor": 1,
  571 + "mobile": False,
  572 + },
  573 + )
  574 +
503 # 启用必要的 domain 575 # 启用必要的 domain
504 page._send_session("Page.enable") 576 page._send_session("Page.enable")
505 page._send_session("DOM.enable") 577 page._send_session("DOM.enable")
506 page._send_session("Runtime.enable") 578 page._send_session("Runtime.enable")
507 579
508 - # 注入反检测  
509 - page.inject_stealth()  
510 -  
511 return page 580 return page
512 581
513 def get_existing_page(self) -> Page | None: 582 def get_existing_page(self) -> Page | None:
@@ -3,10 +3,10 @@ @@ -3,10 +3,10 @@
3 from __future__ import annotations 3 from __future__ import annotations
4 4
5 import logging 5 import logging
6 -import time  
7 6
8 from .cdp import Page 7 from .cdp import Page
9 from .feed_detail import _check_end_container, _check_page_accessible, _get_comment_count 8 from .feed_detail import _check_end_container, _check_page_accessible, _get_comment_count
  9 +from .human import sleep_random
10 from .selectors import ( 10 from .selectors import (
11 COMMENT_INPUT_FIELD, 11 COMMENT_INPUT_FIELD,
12 COMMENT_INPUT_TRIGGER, 12 COMMENT_INPUT_TRIGGER,
@@ -37,7 +37,7 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non @@ -37,7 +37,7 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non
37 page.navigate(url) 37 page.navigate(url)
38 page.wait_for_load() 38 page.wait_for_load()
39 page.wait_dom_stable() 39 page.wait_dom_stable()
40 - time.sleep(1) 40 + sleep_random(800, 1500)
41 41
42 _check_page_accessible(page) 42 _check_page_accessible(page)
43 43
@@ -46,27 +46,16 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non @@ -46,27 +46,16 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non
46 raise RuntimeError("未找到评论输入框,该帖子可能不支持评论或网页端不可访问") 46 raise RuntimeError("未找到评论输入框,该帖子可能不支持评论或网页端不可访问")
47 47
48 page.click_element(COMMENT_INPUT_TRIGGER) 48 page.click_element(COMMENT_INPUT_TRIGGER)
49 - time.sleep(0.5) 49 + sleep_random(400, 800)
50 50
51 - # 输入评论内容 51 + # 输入评论内容(CDP 逐字输入)
52 page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5) 52 page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5)
53 - page.evaluate(  
54 - f"""  
55 - (() => {{  
56 - const el = document.querySelector({_js_str(COMMENT_INPUT_FIELD)});  
57 - if (el) {{  
58 - el.focus();  
59 - el.textContent = {_js_str(content)};  
60 - el.dispatchEvent(new Event('input', {{bubbles: true}}));  
61 - }}  
62 - }})()  
63 - """  
64 - )  
65 - time.sleep(1) 53 + page.input_content_editable(COMMENT_INPUT_FIELD, content)
  54 + sleep_random(600, 1200)
66 55
67 # 点击提交 56 # 点击提交
68 page.click_element(COMMENT_SUBMIT_BUTTON) 57 page.click_element(COMMENT_SUBMIT_BUTTON)
69 - time.sleep(1) 58 + sleep_random(800, 1500)
70 59
71 logger.info("评论发送成功: feed=%s", feed_id) 60 logger.info("评论发送成功: feed=%s", feed_id)
72 61
@@ -103,42 +92,31 @@ def reply_comment( @@ -103,42 +92,31 @@ def reply_comment(
103 page.navigate(url) 92 page.navigate(url)
104 page.wait_for_load() 93 page.wait_for_load()
105 page.wait_dom_stable() 94 page.wait_dom_stable()
106 - time.sleep(1) 95 + sleep_random(800, 1500)
107 96
108 _check_page_accessible(page) 97 _check_page_accessible(page)
109 - time.sleep(2) 98 + sleep_random(1500, 2500)
110 99
111 # 查找目标评论 100 # 查找目标评论
112 comment_found = _find_and_scroll_to_comment(page, comment_id, user_id) 101 comment_found = _find_and_scroll_to_comment(page, comment_id, user_id)
113 if not comment_found: 102 if not comment_found:
114 raise RuntimeError(f"未找到评论 (commentID: {comment_id}, userID: {user_id})") 103 raise RuntimeError(f"未找到评论 (commentID: {comment_id}, userID: {user_id})")
115 104
116 - time.sleep(1) 105 + sleep_random(800, 1500)
117 106
118 # 点击回复按钮 107 # 点击回复按钮
119 reply_selector = f"#comment-{comment_id} {REPLY_BUTTON}" if comment_id else REPLY_BUTTON 108 reply_selector = f"#comment-{comment_id} {REPLY_BUTTON}" if comment_id else REPLY_BUTTON
120 page.click_element(reply_selector) 109 page.click_element(reply_selector)
121 - time.sleep(1) 110 + sleep_random(800, 1500)
122 111
123 - # 输入回复内容 112 + # 输入回复内容(CDP 逐字输入)
124 page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5) 113 page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5)
125 - page.evaluate(  
126 - f"""  
127 - (() => {{  
128 - const el = document.querySelector({_js_str(COMMENT_INPUT_FIELD)});  
129 - if (el) {{  
130 - el.focus();  
131 - el.textContent = {_js_str(content)};  
132 - el.dispatchEvent(new Event('input', {{bubbles: true}}));  
133 - }}  
134 - }})()  
135 - """  
136 - )  
137 - time.sleep(0.5) 114 + page.input_content_editable(COMMENT_INPUT_FIELD, content)
  115 + sleep_random(600, 1200)
138 116
139 # 点击提交 117 # 点击提交
140 page.click_element(COMMENT_SUBMIT_BUTTON) 118 page.click_element(COMMENT_SUBMIT_BUTTON)
141 - time.sleep(2) 119 + sleep_random(1500, 2500)
142 120
143 logger.info("回复评论成功") 121 logger.info("回复评论成功")
144 122
@@ -154,7 +132,7 @@ def _find_and_scroll_to_comment( @@ -154,7 +132,7 @@ def _find_and_scroll_to_comment(
154 132
155 # 先滚动到评论区 133 # 先滚动到评论区
156 page.scroll_element_into_view(".comments-container") 134 page.scroll_element_into_view(".comments-container")
157 - time.sleep(1) 135 + sleep_random(800, 1500)
158 136
159 last_count = 0 137 last_count = 0
160 stagnant = 0 138 stagnant = 0
@@ -179,11 +157,11 @@ def _find_and_scroll_to_comment( @@ -179,11 +157,11 @@ def _find_and_scroll_to_comment(
179 # 滚动到最后一条评论 157 # 滚动到最后一条评论
180 if current_count > 0: 158 if current_count > 0:
181 page.scroll_nth_element_into_view(PARENT_COMMENT, current_count - 1) 159 page.scroll_nth_element_into_view(PARENT_COMMENT, current_count - 1)
182 - time.sleep(0.3) 160 + sleep_random(200, 500)
183 161
184 # 继续滚动 162 # 继续滚动
185 page.evaluate("window.scrollBy(0, window.innerHeight * 0.8)") 163 page.evaluate("window.scrollBy(0, window.innerHeight * 0.8)")
186 - time.sleep(0.5) 164 + sleep_random(400, 800)
187 165
188 # 通过 commentID 查找 166 # 通过 commentID 查找
189 if comment_id: 167 if comment_id:
@@ -215,7 +193,7 @@ def _find_and_scroll_to_comment( @@ -215,7 +193,7 @@ def _find_and_scroll_to_comment(
215 logger.info("通过 userID 找到评论 (尝试 %d 次)", attempt + 1) 193 logger.info("通过 userID 找到评论 (尝试 %d 次)", attempt + 1)
216 return True 194 return True
217 195
218 - time.sleep(0.8) 196 + sleep_random(600, 1200)
219 197
220 return False 198 return False
221 199
@@ -58,6 +58,15 @@ _INACCESSIBLE_KEYWORDS = [ @@ -58,6 +58,15 @@ _INACCESSIBLE_KEYWORDS = [
58 "仅作者可见", 58 "仅作者可见",
59 "因用户设置,你无法查看", 59 "因用户设置,你无法查看",
60 "因违规无法查看", 60 "因违规无法查看",
  61 + "Isn't Available",
  62 + "isn't available",
  63 +]
  64 +
  65 +# 扫码验证关键词(触发反爬机制)
  66 +_SCAN_QRCODE_KEYWORDS = [
  67 + "扫码查看",
  68 + "打开小红书App扫码",
  69 + "请使用小红书App扫码",
61 ] 70 ]
62 71
63 _REPLY_COUNT_RE = re.compile(r"展开\s*(\d+)\s*条回复") 72 _REPLY_COUNT_RE = re.compile(r"展开\s*(\d+)\s*条回复")
@@ -110,10 +119,10 @@ def get_feed_detail( @@ -110,10 +119,10 @@ def get_feed_detail(
110 else: 119 else:
111 raise RuntimeError("页面导航失败") 120 raise RuntimeError("页面导航失败")
112 121
113 - sleep_random(1000, 1000) 122 + sleep_random(800, 1500)
114 123
115 - # 检查页面可访问性  
116 - _check_page_accessible(page) 124 + # 检查页面可访问性(扫码验证时自动等待重试)
  125 + _check_page_accessible(page, url)
117 126
118 # 加载全部评论 127 # 加载全部评论
119 if load_all_comments: 128 if load_all_comments:
@@ -128,8 +137,11 @@ def get_feed_detail( @@ -128,8 +137,11 @@ def get_feed_detail(
128 # ========== 页面检查 ========== 137 # ========== 页面检查 ==========
129 138
130 139
131 -def _check_page_accessible(page: Page) -> None:  
132 - """检查页面是否可访问。""" 140 +def _check_page_accessible(page: Page, url: str = "") -> None:
  141 + """检查页面是否可访问。
  142 +
  143 + 扫码验证场景:等待 10 秒后自动重新访问,验证消失则继续,否则报错。
  144 + """
133 time.sleep(0.5) 145 time.sleep(0.5)
134 146
135 text = page.get_element_text(ACCESS_ERROR_WRAPPER) 147 text = page.get_element_text(ACCESS_ERROR_WRAPPER)
@@ -137,6 +149,28 @@ def _check_page_accessible(page: Page) -> None: @@ -137,6 +149,28 @@ def _check_page_accessible(page: Page) -> None:
137 return 149 return
138 150
139 text = text.strip() 151 text = text.strip()
  152 +
  153 + # 检测扫码验证(反爬机制触发)→ 等待后重试
  154 + if _is_scan_qrcode_verification(text) and url:
  155 + logger.warning("触发小红书扫码验证,等待 10 秒后重新访问...")
  156 + time.sleep(10)
  157 + page.navigate(url)
  158 + page.wait_for_load()
  159 + page.wait_dom_stable()
  160 + time.sleep(1)
  161 +
  162 + retry_text = page.get_element_text(ACCESS_ERROR_WRAPPER)
  163 + if retry_text and _is_scan_qrcode_verification(retry_text.strip()):
  164 + raise PageNotAccessibleError(
  165 + "触发了小红书验证,需要在浏览器中扫码完成验证后重试。"
  166 + "这通常是小红书的反爬机制,请稍后再试或在 Chrome 中手动打开该笔记完成验证"
  167 + )
  168 + if not retry_text or not retry_text.strip():
  169 + logger.info("验证已消失,继续加载笔记")
  170 + return
  171 + # 重试后仍有其他错误,继续走下面的关键词检测
  172 + text = retry_text.strip()
  173 +
140 for kw in _INACCESSIBLE_KEYWORDS: 174 for kw in _INACCESSIBLE_KEYWORDS:
141 if kw in text: 175 if kw in text:
142 raise PageNotAccessibleError(kw) 176 raise PageNotAccessibleError(kw)
@@ -145,6 +179,11 @@ def _check_page_accessible(page: Page) -> None: @@ -145,6 +179,11 @@ def _check_page_accessible(page: Page) -> None:
145 raise PageNotAccessibleError(text) 179 raise PageNotAccessibleError(text)
146 180
147 181
  182 +def _is_scan_qrcode_verification(text: str) -> bool:
  183 + """判断页面文本是否为扫码验证。"""
  184 + return any(kw in text for kw in _SCAN_QRCODE_KEYWORDS)
  185 +
  186 +
148 # ========== 数据提取 ========== 187 # ========== 数据提取 ==========
149 188
150 189
@@ -32,6 +32,11 @@ def sleep_random(min_ms: int, max_ms: int) -> None: @@ -32,6 +32,11 @@ def sleep_random(min_ms: int, max_ms: int) -> None:
32 time.sleep(delay) 32 time.sleep(delay)
33 33
34 34
  35 +def navigation_delay() -> None:
  36 + """页面导航后的随机等待,模拟人类阅读。"""
  37 + sleep_random(1000, 2500)
  38 +
  39 +
35 def get_scroll_interval(speed: str) -> float: 40 def get_scroll_interval(speed: str) -> float:
36 """根据速度获取滚动间隔(秒)。""" 41 """根据速度获取滚动间隔(秒)。"""
37 if speed == "slow": 42 if speed == "slow":
@@ -9,6 +9,7 @@ import tempfile @@ -9,6 +9,7 @@ import tempfile
9 import time 9 import time
10 10
11 from .cdp import Page 11 from .cdp import Page
  12 +from .human import sleep_random
12 from .selectors import LOGIN_STATUS, QRCODE_IMG 13 from .selectors import LOGIN_STATUS, QRCODE_IMG
13 from .urls import EXPLORE_URL 14 from .urls import EXPLORE_URL
14 15
@@ -23,7 +24,7 @@ def check_login_status(page: Page) -> bool: @@ -23,7 +24,7 @@ def check_login_status(page: Page) -> bool:
23 """ 24 """
24 page.navigate(EXPLORE_URL) 25 page.navigate(EXPLORE_URL)
25 page.wait_for_load() 26 page.wait_for_load()
26 - time.sleep(1) 27 + sleep_random(800, 1500)
27 28
28 return page.has_element(LOGIN_STATUS) 29 return page.has_element(LOGIN_STATUS)
29 30
@@ -38,7 +39,7 @@ def fetch_qrcode(page: Page) -> tuple[str, bool]: @@ -38,7 +39,7 @@ def fetch_qrcode(page: Page) -> tuple[str, bool]:
38 """ 39 """
39 page.navigate(EXPLORE_URL) 40 page.navigate(EXPLORE_URL)
40 page.wait_for_load() 41 page.wait_for_load()
41 - time.sleep(2) 42 + sleep_random(1500, 2500)
42 43
43 # 检查是否已登录 44 # 检查是否已登录
44 if page.has_element(LOGIN_STATUS): 45 if page.has_element(LOGIN_STATUS):
@@ -5,6 +5,7 @@ from __future__ import annotations @@ -5,6 +5,7 @@ from __future__ import annotations
5 import json 5 import json
6 import logging 6 import logging
7 import random 7 import random
  8 +import re
8 import time 9 import time
9 10
10 from .cdp import Page 11 from .cdp import Page
@@ -127,27 +128,31 @@ def _navigate_to_publish_page(page: Page) -> None: @@ -127,27 +128,31 @@ def _navigate_to_publish_page(page: Page) -> None:
127 """导航到发布页面。""" 128 """导航到发布页面。"""
128 page.navigate(PUBLISH_URL) 129 page.navigate(PUBLISH_URL)
129 page.wait_for_load(timeout=300) 130 page.wait_for_load(timeout=300)
130 - time.sleep(2) 131 + time.sleep(3)
131 page.wait_dom_stable() 132 page.wait_dom_stable()
132 - time.sleep(1) 133 + time.sleep(2)
133 134
134 135
135 def _click_publish_tab(page: Page, tab_name: str) -> None: 136 def _click_publish_tab(page: Page, tab_name: str) -> None:
136 """点击发布页 TAB(上传图文/上传视频)。""" 137 """点击发布页 TAB(上传图文/上传视频)。"""
137 - page.wait_for_element(UPLOAD_CONTENT, timeout=15)  
138 -  
139 deadline = time.monotonic() + 15 138 deadline = time.monotonic() + 15
140 while time.monotonic() < deadline: 139 while time.monotonic() < deadline:
141 - # 查找匹配的 TAB 140 + # 查找匹配的 TAB(支持多种结构)
142 found = page.evaluate( 141 found = page.evaluate(
143 f""" 142 f"""
144 (() => {{ 143 (() => {{
145 - const tabs = document.querySelectorAll({json.dumps(CREATOR_TAB)}); 144 + // 策略1: 查找 div.creator-tab(过滤隐藏元素)
  145 + let tabs = document.querySelectorAll({json.dumps(CREATOR_TAB)});
146 for (const tab of tabs) {{ 146 for (const tab of tabs) {{
147 - if (tab.textContent.trim() === {json.dumps(tab_name)}) {{  
148 - // 检查是否被遮挡 147 + const titleSpan = tab.querySelector('span.title');
  148 + const tabText = titleSpan ? titleSpan.textContent.trim() : tab.textContent.trim();
  149 + if (tabText === {json.dumps(tab_name)}) {{
149 const rect = tab.getBoundingClientRect(); 150 const rect = tab.getBoundingClientRect();
  151 + const style = window.getComputedStyle(tab);
  152 + // 跳过隐藏或被移出视口的元素
150 if (rect.width === 0 || rect.height === 0) continue; 153 if (rect.width === 0 || rect.height === 0) continue;
  154 + if (rect.left < 0 || rect.top < 0) continue;
  155 + if (style.display === 'none' || style.visibility === 'hidden') continue;
151 const x = rect.left + rect.width / 2; 156 const x = rect.left + rect.width / 2;
152 const y = rect.top + rect.height / 2; 157 const y = rect.top + rect.height / 2;
153 const target = document.elementFromPoint(x, y); 158 const target = document.elementFromPoint(x, y);
@@ -158,6 +163,21 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: @@ -158,6 +163,21 @@ def _click_publish_tab(page: Page, tab_name: str) -> None:
158 return 'blocked'; 163 return 'blocked';
159 }} 164 }}
160 }} 165 }}
  166 +
  167 + // 策略2: 查找任意包含目标文本的元素
  168 + const allElements = document.querySelectorAll('*');
  169 + for (const el of allElements) {{
  170 + if (el.children.length === 0 && el.textContent.trim() === {json.dumps(tab_name)}) {{
  171 + const rect = el.getBoundingClientRect();
  172 + const style = window.getComputedStyle(el);
  173 + if (rect.width === 0 || rect.height === 0) continue;
  174 + if (rect.left < 0 || rect.top < 0) continue;
  175 + if (style.display === 'none' || style.visibility === 'hidden') continue;
  176 + el.click();
  177 + return 'clicked';
  178 + }}
  179 + }}
  180 +
161 return 'not_found'; 181 return 'not_found';
162 }})() 182 }})()
163 """ 183 """
@@ -172,6 +192,19 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: @@ -172,6 +192,19 @@ def _click_publish_tab(page: Page, tab_name: str) -> None:
172 192
173 time.sleep(0.2) 193 time.sleep(0.2)
174 194
  195 + # 调试:输出页面信息
  196 + debug_info = page.evaluate("""
  197 + (() => {
  198 + const creatorTabs = document.querySelectorAll('div.creator-tab');
  199 + const tabTexts = Array.from(creatorTabs).map(t => ({
  200 + text: t.textContent.trim(),
  201 + html: t.outerHTML.substring(0, 200)
  202 + }));
  203 + const url = window.location.href;
  204 + return JSON.stringify({url, tabCount: creatorTabs.length, tabs: tabTexts});
  205 + })()
  206 + """)
  207 + logger.error("调试信息: %s", debug_info)
175 raise PublishError(f"没有找到发布 TAB - {tab_name}") 208 raise PublishError(f"没有找到发布 TAB - {tab_name}")
176 209
177 210
@@ -223,6 +256,34 @@ def _wait_for_upload_complete(page: Page, expected_count: int) -> None: @@ -223,6 +256,34 @@ def _wait_for_upload_complete(page: Page, expected_count: int) -> None:
223 # ========== 表单提交 ========== 256 # ========== 表单提交 ==========
224 257
225 258
  259 +def _extract_hashtags_from_content(content: str, tags: list[str]) -> tuple[str, list[str]]:
  260 + """从正文末尾提取 hashtag 行,合并到 tags 列表。
  261 +
  262 + Returns:
  263 + (cleaned_content, merged_tags)
  264 + """
  265 + lines = content.rstrip().split("\n")
  266 + # 检查最后一行是否全是 #tag 格式
  267 + if lines:
  268 + last_line = lines[-1].strip()
  269 + hashtag_pattern = re.compile(r"^(#\S+\s*)+$")
  270 + if hashtag_pattern.match(last_line):
  271 + # 提取 hashtag
  272 + extracted = re.findall(r"#(\S+)", last_line)
  273 + # 合并到 tags(去重)
  274 + existing = {t.lstrip("#") for t in tags}
  275 + merged = list(tags)
  276 + for t in extracted:
  277 + if t not in existing:
  278 + merged.append(t)
  279 + existing.add(t)
  280 + # 去掉最后一行
  281 + cleaned = "\n".join(lines[:-1]).rstrip()
  282 + logger.info("从正文末尾提取 %d 个标签,合并后共 %d 个", len(extracted), len(merged))
  283 + return cleaned, merged
  284 + return content, list(tags)
  285 +
  286 +
226 def _fill_publish_form( 287 def _fill_publish_form(
227 page: Page, 288 page: Page,
228 title: str, 289 title: str,
@@ -233,6 +294,9 @@ def _fill_publish_form( @@ -233,6 +294,9 @@ def _fill_publish_form(
233 visibility: str, 294 visibility: str,
234 ) -> None: 295 ) -> None:
235 """填写表单(不点击发布)。""" 296 """填写表单(不点击发布)。"""
  297 + # 从正文末尾提取 hashtag 并合并到 tags
  298 + content, tags = _extract_hashtags_from_content(content, tags)
  299 +
236 # 标题 300 # 标题
237 page.input_text(TITLE_INPUT, title) 301 page.input_text(TITLE_INPUT, title)
238 time.sleep(0.5) 302 time.sleep(0.5)
@@ -334,6 +398,10 @@ def _input_tags(page: Page, content_selector: str, tags: list[str]) -> None: @@ -334,6 +398,10 @@ def _input_tags(page: Page, content_selector: str, tags: list[str]) -> None:
334 """输入标签。""" 398 """输入标签。"""
335 time.sleep(1) 399 time.sleep(1)
336 400
  401 + # 先点击正文编辑器,确保焦点在正文而非标题
  402 + page.click_element(content_selector)
  403 + time.sleep(0.3)
  404 +
337 # 移动光标到正文末尾(20次 ArrowDown) 405 # 移动光标到正文末尾(20次 ArrowDown)
338 for _ in range(20): 406 for _ in range(20):
339 page.press_key("ArrowDown") 407 page.press_key("ArrowDown")
@@ -353,27 +421,32 @@ def _input_single_tag(page: Page, content_selector: str, tag: str) -> None: @@ -353,27 +421,32 @@ def _input_single_tag(page: Page, content_selector: str, tag: str) -> None:
353 """输入单个标签。""" 421 """输入单个标签。"""
354 # 输入 # 422 # 输入 #
355 page.type_text("#", delay_ms=0) 423 page.type_text("#", delay_ms=0)
356 - time.sleep(0.2) 424 + time.sleep(0.3)
357 425
358 - # 逐字输入标签 426 + # 逐字输入标签(随机间隔模拟真实输入)
359 for char in tag: 427 for char in tag:
360 - page.type_text(char, delay_ms=50)  
361 -  
362 - time.sleep(1) 428 + page.type_text(char, delay_ms=0)
  429 + time.sleep(random.uniform(0.05, 0.12))
363 430
364 - # 尝试点击标签联想 431 + # 等待标签联想出现(最多 3 秒)
  432 + deadline = time.monotonic() + 3.0
  433 + clicked = False
  434 + while time.monotonic() < deadline:
  435 + time.sleep(0.5)
365 if page.has_element(TAG_TOPIC_CONTAINER): 436 if page.has_element(TAG_TOPIC_CONTAINER):
366 item_selector = f"{TAG_TOPIC_CONTAINER} {TAG_FIRST_ITEM}" 437 item_selector = f"{TAG_TOPIC_CONTAINER} {TAG_FIRST_ITEM}"
367 if page.has_element(item_selector): 438 if page.has_element(item_selector):
368 page.click_element(item_selector) 439 page.click_element(item_selector)
369 logger.info("点击标签联想: %s", tag) 440 logger.info("点击标签联想: %s", tag)
370 - time.sleep(0.5)  
371 - return 441 + clicked = True
  442 + break
372 443
  444 + if not clicked:
373 # 没有联想,直接空格 445 # 没有联想,直接空格
374 logger.warning("未找到标签联想,直接输入空格: %s", tag) 446 logger.warning("未找到标签联想,直接输入空格: %s", tag)
375 page.type_text(" ", delay_ms=0) 447 page.type_text(" ", delay_ms=0)
376 - time.sleep(0.5) 448 +
  449 + time.sleep(0.8)
377 450
378 451
379 # ========== 定时发布 ========== 452 # ========== 定时发布 ==========
@@ -5,6 +5,7 @@ from __future__ import annotations @@ -5,6 +5,7 @@ from __future__ import annotations
5 import json 5 import json
6 import logging 6 import logging
7 import time 7 import time
  8 +from pathlib import Path
8 9
9 from .cdp import Page 10 from .cdp import Page
10 from .errors import PublishError 11 from .errors import PublishError
@@ -217,14 +218,14 @@ def _fill_long_content(page: Page, content: str) -> None: @@ -217,14 +218,14 @@ def _fill_long_content(page: Page, content: str) -> None:
217 def _insert_images_to_editor(page: Page, image_paths: list[str]) -> None: 218 def _insert_images_to_editor(page: Page, image_paths: list[str]) -> None:
218 """将图片插入到编辑器中。""" 219 """将图片插入到编辑器中。"""
219 for img_path in image_paths: 220 for img_path in image_paths:
220 - normalized = img_path.replace("\\", "/") 221 + file_uri = Path(img_path).resolve().as_uri()
221 page.evaluate( 222 page.evaluate(
222 f""" 223 f"""
223 (() => {{ 224 (() => {{
224 const editor = document.querySelector({json.dumps(CONTENT_EDITOR)}); 225 const editor = document.querySelector({json.dumps(CONTENT_EDITOR)});
225 if (!editor) return false; 226 if (!editor) return false;
226 const img = document.createElement('img'); 227 const img = document.createElement('img');
227 - img.src = 'file:///' + {json.dumps(normalized)}; 228 + img.src = {json.dumps(file_uri)};
228 editor.appendChild(img); 229 editor.appendChild(img);
229 editor.dispatchEvent(new Event('input', {{ bubbles: true }})); 230 editor.dispatchEvent(new Event('input', {{ bubbles: true }}));
230 return true; 231 return true;
@@ -8,6 +8,7 @@ import time @@ -8,6 +8,7 @@ import time
8 8
9 from .cdp import Page 9 from .cdp import Page
10 from .errors import NoFeedsError 10 from .errors import NoFeedsError
  11 +from .human import sleep_random
11 from .selectors import FILTER_BUTTON, FILTER_PANEL 12 from .selectors import FILTER_BUTTON, FILTER_PANEL
12 from .types import Feed, FilterOption 13 from .types import Feed, FilterOption
13 from .urls import make_search_url 14 from .urls import make_search_url
@@ -139,7 +140,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None: @@ -139,7 +140,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None:
139 while time.monotonic() < deadline: 140 while time.monotonic() < deadline:
140 if page.has_element(FILTER_PANEL): 141 if page.has_element(FILTER_PANEL):
141 break 142 break
142 - time.sleep(0.3) 143 + sleep_random(300, 600)
143 144
144 # 点击各筛选项 145 # 点击各筛选项
145 for filters_index, tags_index in filters: 146 for filters_index, tags_index in filters:
@@ -148,7 +149,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None: @@ -148,7 +149,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None:
148 f"div.tags:nth-child({tags_index})" 149 f"div.tags:nth-child({tags_index})"
149 ) 150 )
150 page.click_element(selector) 151 page.click_element(selector)
151 - time.sleep(0.3) 152 + sleep_random(300, 600)
152 153
153 # 等待页面更新 154 # 等待页面更新
154 page.wait_dom_stable() 155 page.wait_dom_stable()
1 """反检测 JS 注入 + Chrome 启动参数,对应 go-rod/stealth。""" 1 """反检测 JS 注入 + Chrome 启动参数,对应 go-rod/stealth。"""
2 2
  3 +# 真实 Chrome UA(固定版本,避免每次随机导致指纹不一致)
  4 +REALISTIC_UA = (
  5 + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
  6 + "AppleWebKit/537.36 (KHTML, like Gecko) "
  7 + "Chrome/131.0.0.0 Safari/537.36"
  8 +)
  9 +
3 # 反检测 JS 脚本:在页面加载时注入 10 # 反检测 JS 脚本:在页面加载时注入
4 STEALTH_JS = """ 11 STEALTH_JS = """
5 (() => { 12 (() => {
@@ -72,6 +79,45 @@ STEALTH_JS = """ @@ -72,6 +79,45 @@ STEALTH_JS = """
72 if (parameter === 37446) return 'Intel Iris OpenGL Engine'; 79 if (parameter === 37446) return 'Intel Iris OpenGL Engine';
73 return getParameter.call(this, parameter); 80 return getParameter.call(this, parameter);
74 }; 81 };
  82 +
  83 + // 7. hardwareConcurrency — 随机 4 或 8
  84 + Object.defineProperty(navigator, 'hardwareConcurrency', {
  85 + get: () => [4, 8][Math.floor(Math.random() * 2)],
  86 + configurable: true,
  87 + });
  88 +
  89 + // 8. deviceMemory — 随机 4 或 8
  90 + Object.defineProperty(navigator, 'deviceMemory', {
  91 + get: () => [4, 8][Math.floor(Math.random() * 2)],
  92 + configurable: true,
  93 + });
  94 +
  95 + // 9. navigator.connection — 伪造网络信息
  96 + Object.defineProperty(navigator, 'connection', {
  97 + get: () => ({
  98 + effectiveType: '4g',
  99 + downlink: 10,
  100 + rtt: 50,
  101 + saveData: false,
  102 + }),
  103 + configurable: true,
  104 + });
  105 +
  106 + // 10. chrome.csi / chrome.loadTimes — 空函数伪装
  107 + if (window.chrome) {
  108 + window.chrome.csi = function() { return {}; };
  109 + window.chrome.loadTimes = function() { return {}; };
  110 + }
  111 +
  112 + // 11. outerWidth/outerHeight — 与 innerWidth/innerHeight 对齐
  113 + Object.defineProperty(window, 'outerWidth', {
  114 + get: () => window.innerWidth,
  115 + configurable: true,
  116 + });
  117 + Object.defineProperty(window, 'outerHeight', {
  118 + get: () => window.innerHeight,
  119 + configurable: true,
  120 + });
75 })(); 121 })();
76 """ 122 """
77 123
@@ -85,4 +131,6 @@ STEALTH_ARGS = [ @@ -85,4 +131,6 @@ STEALTH_ARGS = [
85 "--disable-backgrounding-occluded-windows", 131 "--disable-backgrounding-occluded-windows",
86 "--disable-renderer-backgrounding", 132 "--disable-renderer-backgrounding",
87 "--disable-component-update", 133 "--disable-component-update",
  134 + "--disable-extensions",
  135 + "--disable-sync",
88 ] 136 ]
@@ -159,6 +159,9 @@ class Feed: @@ -159,6 +159,9 @@ class Feed:
159 "sharedCount": self.note_card.interact_info.shared_count, 159 "sharedCount": self.note_card.interact_info.shared_count,
160 }, 160 },
161 } 161 }
  162 + cover = self.note_card.cover
  163 + if cover.url or cover.url_default:
  164 + result["cover"] = cover.url or cover.url_default
162 if self.note_card.video: 165 if self.note_card.video:
163 result["video"] = {"duration": self.note_card.video.capa.duration} 166 result["video"] = {"duration": self.note_card.video.capa.duration}
164 return result 167 return result