feat: 增强反检测能力 — JS 伪装、CDP 真实交互、随机延迟
- stealth.py: 新增 5 项 JS 覆盖(hardwareConcurrency/deviceMemory/connection/chrome.csi+loadTimes/outerWidth+Height)、REALISTIC_UA 常量、--disable-extensions/--disable-sync 启动参数 - cdp.py: click_element 改用 CDP Input 事件(isTrusted=true)、input_content_editable 改用逐字 CDP 键入、new_page 注入 UA 覆盖和随机 viewport - human.py: 新增 navigation_delay() 函数 - comment.py: 所有 time.sleep 替换为 sleep_random,评论输入改用 CDP 逐字输入 - search.py/feed_detail.py/login.py: 固定延迟替换为随机区间延迟
Showing
7 changed files
with
200 additions
and
62 deletions
| @@ -7,6 +7,7 @@ from __future__ import annotations | @@ -7,6 +7,7 @@ from __future__ import annotations | ||
| 7 | 7 | ||
| 8 | import json | 8 | import json |
| 9 | import logging | 9 | import logging |
| 10 | +import random | ||
| 10 | import time | 11 | import time |
| 11 | from typing import Any | 12 | from typing import Any |
| 12 | 13 | ||
| @@ -14,7 +15,7 @@ import requests | @@ -14,7 +15,7 @@ import requests | ||
| 14 | import websockets.sync.client as ws_client | 15 | import websockets.sync.client as ws_client |
| 15 | 16 | ||
| 16 | from .errors import CDPError, ElementNotFoundError | 17 | from .errors import CDPError, ElementNotFoundError |
| 17 | -from .stealth import STEALTH_JS | 18 | +from .stealth import REALISTIC_UA, STEALTH_JS |
| 18 | 19 | ||
| 19 | logger = logging.getLogger(__name__) | 20 | logger = logging.getLogger(__name__) |
| 20 | 21 | ||
| @@ -211,15 +212,25 @@ class Page: | @@ -211,15 +212,25 @@ class Page: | ||
| 211 | raise ElementNotFoundError(selector) | 212 | raise ElementNotFoundError(selector) |
| 212 | 213 | ||
| 213 | def click_element(self, selector: str) -> None: | 214 | def click_element(self, selector: str) -> None: |
| 214 | - """点击指定选择器的元素。""" | ||
| 215 | - self.evaluate( | 215 | + """点击指定选择器的元素(通过 CDP Input 事件,isTrusted=true)。""" |
| 216 | + box = self.evaluate( | ||
| 216 | f""" | 217 | f""" |
| 217 | (() => {{ | 218 | (() => {{ |
| 218 | const el = document.querySelector({json.dumps(selector)}); | 219 | const el = document.querySelector({json.dumps(selector)}); |
| 219 | - if (el) el.click(); | 220 | + if (!el) return null; |
| 221 | + el.scrollIntoView({{block: 'center'}}); | ||
| 222 | + const rect = el.getBoundingClientRect(); | ||
| 223 | + return {{x: rect.left + rect.width / 2, y: rect.top + rect.height / 2}}; | ||
| 220 | }})() | 224 | }})() |
| 221 | """ | 225 | """ |
| 222 | ) | 226 | ) |
| 227 | + if not box: | ||
| 228 | + return | ||
| 229 | + x = box["x"] + random.uniform(-3, 3) | ||
| 230 | + y = box["y"] + random.uniform(-3, 3) | ||
| 231 | + self.mouse_move(x, y) | ||
| 232 | + time.sleep(random.uniform(0.03, 0.08)) | ||
| 233 | + self.mouse_click(x, y) | ||
| 223 | 234 | ||
| 224 | def input_text(self, selector: str, text: str) -> None: | 235 | def input_text(self, selector: str, text: str) -> None: |
| 225 | """向指定选择器的元素输入文本。""" | 236 | """向指定选择器的元素输入文本。""" |
| @@ -237,18 +248,56 @@ class Page: | @@ -237,18 +248,56 @@ class Page: | ||
| 237 | ) | 248 | ) |
| 238 | 249 | ||
| 239 | def input_content_editable(self, selector: str, text: str) -> None: | 250 | def input_content_editable(self, selector: str, text: str) -> None: |
| 240 | - """向 contentEditable 元素输入文本(如 div.ql-editor)。""" | 251 | + """向 contentEditable 元素输入文本(CDP 逐字输入,模拟真实打字)。""" |
| 252 | + # 1. focus 元素 | ||
| 241 | self.evaluate( | 253 | self.evaluate( |
| 242 | f""" | 254 | f""" |
| 243 | (() => {{ | 255 | (() => {{ |
| 244 | const el = document.querySelector({json.dumps(selector)}); | 256 | const el = document.querySelector({json.dumps(selector)}); |
| 245 | - if (!el) return; | ||
| 246 | - el.focus(); | ||
| 247 | - el.textContent = {json.dumps(text)}; | ||
| 248 | - el.dispatchEvent(new Event('input', {{bubbles: true}})); | 257 | + if (el) el.focus(); |
| 249 | }})() | 258 | }})() |
| 250 | """ | 259 | """ |
| 251 | ) | 260 | ) |
| 261 | + time.sleep(0.1) | ||
| 262 | + # 2. 全选清空(Ctrl+A + Backspace) | ||
| 263 | + self._send_session( | ||
| 264 | + "Input.dispatchKeyEvent", | ||
| 265 | + {"type": "keyDown", "key": "a", "code": "KeyA", "modifiers": 2}, | ||
| 266 | + ) | ||
| 267 | + self._send_session( | ||
| 268 | + "Input.dispatchKeyEvent", | ||
| 269 | + {"type": "keyUp", "key": "a", "code": "KeyA", "modifiers": 2}, | ||
| 270 | + ) | ||
| 271 | + self._send_session( | ||
| 272 | + "Input.dispatchKeyEvent", | ||
| 273 | + { | ||
| 274 | + "type": "keyDown", | ||
| 275 | + "key": "Backspace", | ||
| 276 | + "code": "Backspace", | ||
| 277 | + "windowsVirtualKeyCode": 8, | ||
| 278 | + }, | ||
| 279 | + ) | ||
| 280 | + self._send_session( | ||
| 281 | + "Input.dispatchKeyEvent", | ||
| 282 | + { | ||
| 283 | + "type": "keyUp", | ||
| 284 | + "key": "Backspace", | ||
| 285 | + "code": "Backspace", | ||
| 286 | + "windowsVirtualKeyCode": 8, | ||
| 287 | + }, | ||
| 288 | + ) | ||
| 289 | + time.sleep(0.1) | ||
| 290 | + # 3. 逐字输入(随机 30-80ms 间隔) | ||
| 291 | + for char in text: | ||
| 292 | + self._send_session( | ||
| 293 | + "Input.dispatchKeyEvent", | ||
| 294 | + {"type": "keyDown", "text": char}, | ||
| 295 | + ) | ||
| 296 | + self._send_session( | ||
| 297 | + "Input.dispatchKeyEvent", | ||
| 298 | + {"type": "keyUp", "text": char}, | ||
| 299 | + ) | ||
| 300 | + time.sleep(random.uniform(0.03, 0.08)) | ||
| 252 | 301 | ||
| 253 | def get_element_text(self, selector: str) -> str | None: | 302 | def get_element_text(self, selector: str) -> str | None: |
| 254 | """获取元素文本内容。""" | 303 | """获取元素文本内容。""" |
| @@ -500,14 +549,31 @@ class Browser: | @@ -500,14 +549,31 @@ class Browser: | ||
| 500 | 549 | ||
| 501 | page = Page(self._cdp, target_id, session_id) | 550 | page = Page(self._cdp, target_id, session_id) |
| 502 | 551 | ||
| 552 | + # 注入反检测(必须在 enable domains 之前) | ||
| 553 | + page.inject_stealth() | ||
| 554 | + | ||
| 555 | + # UA 覆盖 | ||
| 556 | + page._send_session( | ||
| 557 | + "Emulation.setUserAgentOverride", | ||
| 558 | + {"userAgent": REALISTIC_UA}, | ||
| 559 | + ) | ||
| 560 | + | ||
| 561 | + # 随机 viewport(模拟真实屏幕尺寸) | ||
| 562 | + page._send_session( | ||
| 563 | + "Emulation.setDeviceMetricsOverride", | ||
| 564 | + { | ||
| 565 | + "width": random.randint(1366, 1920), | ||
| 566 | + "height": random.randint(768, 1080), | ||
| 567 | + "deviceScaleFactor": 1, | ||
| 568 | + "mobile": False, | ||
| 569 | + }, | ||
| 570 | + ) | ||
| 571 | + | ||
| 503 | # 启用必要的 domain | 572 | # 启用必要的 domain |
| 504 | page._send_session("Page.enable") | 573 | page._send_session("Page.enable") |
| 505 | page._send_session("DOM.enable") | 574 | page._send_session("DOM.enable") |
| 506 | page._send_session("Runtime.enable") | 575 | page._send_session("Runtime.enable") |
| 507 | 576 | ||
| 508 | - # 注入反检测 | ||
| 509 | - page.inject_stealth() | ||
| 510 | - | ||
| 511 | return page | 577 | return page |
| 512 | 578 | ||
| 513 | def get_existing_page(self) -> Page | None: | 579 | def get_existing_page(self) -> Page | None: |
| @@ -3,10 +3,10 @@ | @@ -3,10 +3,10 @@ | ||
| 3 | from __future__ import annotations | 3 | from __future__ import annotations |
| 4 | 4 | ||
| 5 | import logging | 5 | import logging |
| 6 | -import time | ||
| 7 | 6 | ||
| 8 | from .cdp import Page | 7 | from .cdp import Page |
| 9 | from .feed_detail import _check_end_container, _check_page_accessible, _get_comment_count | 8 | from .feed_detail import _check_end_container, _check_page_accessible, _get_comment_count |
| 9 | +from .human import sleep_random | ||
| 10 | from .selectors import ( | 10 | from .selectors import ( |
| 11 | COMMENT_INPUT_FIELD, | 11 | COMMENT_INPUT_FIELD, |
| 12 | COMMENT_INPUT_TRIGGER, | 12 | COMMENT_INPUT_TRIGGER, |
| @@ -37,7 +37,7 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non | @@ -37,7 +37,7 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non | ||
| 37 | page.navigate(url) | 37 | page.navigate(url) |
| 38 | page.wait_for_load() | 38 | page.wait_for_load() |
| 39 | page.wait_dom_stable() | 39 | page.wait_dom_stable() |
| 40 | - time.sleep(1) | 40 | + sleep_random(800, 1500) |
| 41 | 41 | ||
| 42 | _check_page_accessible(page) | 42 | _check_page_accessible(page) |
| 43 | 43 | ||
| @@ -46,27 +46,16 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non | @@ -46,27 +46,16 @@ def post_comment(page: Page, feed_id: str, xsec_token: str, content: str) -> Non | ||
| 46 | raise RuntimeError("未找到评论输入框,该帖子可能不支持评论或网页端不可访问") | 46 | raise RuntimeError("未找到评论输入框,该帖子可能不支持评论或网页端不可访问") |
| 47 | 47 | ||
| 48 | page.click_element(COMMENT_INPUT_TRIGGER) | 48 | page.click_element(COMMENT_INPUT_TRIGGER) |
| 49 | - time.sleep(0.5) | 49 | + sleep_random(400, 800) |
| 50 | 50 | ||
| 51 | - # 输入评论内容 | 51 | + # 输入评论内容(CDP 逐字输入) |
| 52 | page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5) | 52 | page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5) |
| 53 | - page.evaluate( | ||
| 54 | - f""" | ||
| 55 | - (() => {{ | ||
| 56 | - const el = document.querySelector({_js_str(COMMENT_INPUT_FIELD)}); | ||
| 57 | - if (el) {{ | ||
| 58 | - el.focus(); | ||
| 59 | - el.textContent = {_js_str(content)}; | ||
| 60 | - el.dispatchEvent(new Event('input', {{bubbles: true}})); | ||
| 61 | - }} | ||
| 62 | - }})() | ||
| 63 | - """ | ||
| 64 | - ) | ||
| 65 | - time.sleep(1) | 53 | + page.input_content_editable(COMMENT_INPUT_FIELD, content) |
| 54 | + sleep_random(600, 1200) | ||
| 66 | 55 | ||
| 67 | # 点击提交 | 56 | # 点击提交 |
| 68 | page.click_element(COMMENT_SUBMIT_BUTTON) | 57 | page.click_element(COMMENT_SUBMIT_BUTTON) |
| 69 | - time.sleep(1) | 58 | + sleep_random(800, 1500) |
| 70 | 59 | ||
| 71 | logger.info("评论发送成功: feed=%s", feed_id) | 60 | logger.info("评论发送成功: feed=%s", feed_id) |
| 72 | 61 | ||
| @@ -103,42 +92,31 @@ def reply_comment( | @@ -103,42 +92,31 @@ def reply_comment( | ||
| 103 | page.navigate(url) | 92 | page.navigate(url) |
| 104 | page.wait_for_load() | 93 | page.wait_for_load() |
| 105 | page.wait_dom_stable() | 94 | page.wait_dom_stable() |
| 106 | - time.sleep(1) | 95 | + sleep_random(800, 1500) |
| 107 | 96 | ||
| 108 | _check_page_accessible(page) | 97 | _check_page_accessible(page) |
| 109 | - time.sleep(2) | 98 | + sleep_random(1500, 2500) |
| 110 | 99 | ||
| 111 | # 查找目标评论 | 100 | # 查找目标评论 |
| 112 | comment_found = _find_and_scroll_to_comment(page, comment_id, user_id) | 101 | comment_found = _find_and_scroll_to_comment(page, comment_id, user_id) |
| 113 | if not comment_found: | 102 | if not comment_found: |
| 114 | raise RuntimeError(f"未找到评论 (commentID: {comment_id}, userID: {user_id})") | 103 | raise RuntimeError(f"未找到评论 (commentID: {comment_id}, userID: {user_id})") |
| 115 | 104 | ||
| 116 | - time.sleep(1) | 105 | + sleep_random(800, 1500) |
| 117 | 106 | ||
| 118 | # 点击回复按钮 | 107 | # 点击回复按钮 |
| 119 | reply_selector = f"#comment-{comment_id} {REPLY_BUTTON}" if comment_id else REPLY_BUTTON | 108 | reply_selector = f"#comment-{comment_id} {REPLY_BUTTON}" if comment_id else REPLY_BUTTON |
| 120 | page.click_element(reply_selector) | 109 | page.click_element(reply_selector) |
| 121 | - time.sleep(1) | 110 | + sleep_random(800, 1500) |
| 122 | 111 | ||
| 123 | - # 输入回复内容 | 112 | + # 输入回复内容(CDP 逐字输入) |
| 124 | page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5) | 113 | page.wait_for_element(COMMENT_INPUT_FIELD, timeout=5) |
| 125 | - page.evaluate( | ||
| 126 | - f""" | ||
| 127 | - (() => {{ | ||
| 128 | - const el = document.querySelector({_js_str(COMMENT_INPUT_FIELD)}); | ||
| 129 | - if (el) {{ | ||
| 130 | - el.focus(); | ||
| 131 | - el.textContent = {_js_str(content)}; | ||
| 132 | - el.dispatchEvent(new Event('input', {{bubbles: true}})); | ||
| 133 | - }} | ||
| 134 | - }})() | ||
| 135 | - """ | ||
| 136 | - ) | ||
| 137 | - time.sleep(0.5) | 114 | + page.input_content_editable(COMMENT_INPUT_FIELD, content) |
| 115 | + sleep_random(600, 1200) | ||
| 138 | 116 | ||
| 139 | # 点击提交 | 117 | # 点击提交 |
| 140 | page.click_element(COMMENT_SUBMIT_BUTTON) | 118 | page.click_element(COMMENT_SUBMIT_BUTTON) |
| 141 | - time.sleep(2) | 119 | + sleep_random(1500, 2500) |
| 142 | 120 | ||
| 143 | logger.info("回复评论成功") | 121 | logger.info("回复评论成功") |
| 144 | 122 | ||
| @@ -154,7 +132,7 @@ def _find_and_scroll_to_comment( | @@ -154,7 +132,7 @@ def _find_and_scroll_to_comment( | ||
| 154 | 132 | ||
| 155 | # 先滚动到评论区 | 133 | # 先滚动到评论区 |
| 156 | page.scroll_element_into_view(".comments-container") | 134 | page.scroll_element_into_view(".comments-container") |
| 157 | - time.sleep(1) | 135 | + sleep_random(800, 1500) |
| 158 | 136 | ||
| 159 | last_count = 0 | 137 | last_count = 0 |
| 160 | stagnant = 0 | 138 | stagnant = 0 |
| @@ -179,11 +157,11 @@ def _find_and_scroll_to_comment( | @@ -179,11 +157,11 @@ def _find_and_scroll_to_comment( | ||
| 179 | # 滚动到最后一条评论 | 157 | # 滚动到最后一条评论 |
| 180 | if current_count > 0: | 158 | if current_count > 0: |
| 181 | page.scroll_nth_element_into_view(PARENT_COMMENT, current_count - 1) | 159 | page.scroll_nth_element_into_view(PARENT_COMMENT, current_count - 1) |
| 182 | - time.sleep(0.3) | 160 | + sleep_random(200, 500) |
| 183 | 161 | ||
| 184 | # 继续滚动 | 162 | # 继续滚动 |
| 185 | page.evaluate("window.scrollBy(0, window.innerHeight * 0.8)") | 163 | page.evaluate("window.scrollBy(0, window.innerHeight * 0.8)") |
| 186 | - time.sleep(0.5) | 164 | + sleep_random(400, 800) |
| 187 | 165 | ||
| 188 | # 通过 commentID 查找 | 166 | # 通过 commentID 查找 |
| 189 | if comment_id: | 167 | if comment_id: |
| @@ -215,7 +193,7 @@ def _find_and_scroll_to_comment( | @@ -215,7 +193,7 @@ def _find_and_scroll_to_comment( | ||
| 215 | logger.info("通过 userID 找到评论 (尝试 %d 次)", attempt + 1) | 193 | logger.info("通过 userID 找到评论 (尝试 %d 次)", attempt + 1) |
| 216 | return True | 194 | return True |
| 217 | 195 | ||
| 218 | - time.sleep(0.8) | 196 | + sleep_random(600, 1200) |
| 219 | 197 | ||
| 220 | return False | 198 | return False |
| 221 | 199 |
| @@ -58,6 +58,15 @@ _INACCESSIBLE_KEYWORDS = [ | @@ -58,6 +58,15 @@ _INACCESSIBLE_KEYWORDS = [ | ||
| 58 | "仅作者可见", | 58 | "仅作者可见", |
| 59 | "因用户设置,你无法查看", | 59 | "因用户设置,你无法查看", |
| 60 | "因违规无法查看", | 60 | "因违规无法查看", |
| 61 | + "Isn't Available", | ||
| 62 | + "isn't available", | ||
| 63 | +] | ||
| 64 | + | ||
| 65 | +# 扫码验证关键词(触发反爬机制) | ||
| 66 | +_SCAN_QRCODE_KEYWORDS = [ | ||
| 67 | + "扫码查看", | ||
| 68 | + "打开小红书App扫码", | ||
| 69 | + "请使用小红书App扫码", | ||
| 61 | ] | 70 | ] |
| 62 | 71 | ||
| 63 | _REPLY_COUNT_RE = re.compile(r"展开\s*(\d+)\s*条回复") | 72 | _REPLY_COUNT_RE = re.compile(r"展开\s*(\d+)\s*条回复") |
| @@ -110,10 +119,10 @@ def get_feed_detail( | @@ -110,10 +119,10 @@ def get_feed_detail( | ||
| 110 | else: | 119 | else: |
| 111 | raise RuntimeError("页面导航失败") | 120 | raise RuntimeError("页面导航失败") |
| 112 | 121 | ||
| 113 | - sleep_random(1000, 1000) | 122 | + sleep_random(800, 1500) |
| 114 | 123 | ||
| 115 | - # 检查页面可访问性 | ||
| 116 | - _check_page_accessible(page) | 124 | + # 检查页面可访问性(扫码验证时自动等待重试) |
| 125 | + _check_page_accessible(page, url) | ||
| 117 | 126 | ||
| 118 | # 加载全部评论 | 127 | # 加载全部评论 |
| 119 | if load_all_comments: | 128 | if load_all_comments: |
| @@ -128,8 +137,11 @@ def get_feed_detail( | @@ -128,8 +137,11 @@ def get_feed_detail( | ||
| 128 | # ========== 页面检查 ========== | 137 | # ========== 页面检查 ========== |
| 129 | 138 | ||
| 130 | 139 | ||
| 131 | -def _check_page_accessible(page: Page) -> None: | ||
| 132 | - """检查页面是否可访问。""" | 140 | +def _check_page_accessible(page: Page, url: str = "") -> None: |
| 141 | + """检查页面是否可访问。 | ||
| 142 | + | ||
| 143 | + 扫码验证场景:等待 10 秒后自动重新访问,验证消失则继续,否则报错。 | ||
| 144 | + """ | ||
| 133 | time.sleep(0.5) | 145 | time.sleep(0.5) |
| 134 | 146 | ||
| 135 | text = page.get_element_text(ACCESS_ERROR_WRAPPER) | 147 | text = page.get_element_text(ACCESS_ERROR_WRAPPER) |
| @@ -137,6 +149,28 @@ def _check_page_accessible(page: Page) -> None: | @@ -137,6 +149,28 @@ def _check_page_accessible(page: Page) -> None: | ||
| 137 | return | 149 | return |
| 138 | 150 | ||
| 139 | text = text.strip() | 151 | text = text.strip() |
| 152 | + | ||
| 153 | + # 检测扫码验证(反爬机制触发)→ 等待后重试 | ||
| 154 | + if _is_scan_qrcode_verification(text) and url: | ||
| 155 | + logger.warning("触发小红书扫码验证,等待 10 秒后重新访问...") | ||
| 156 | + time.sleep(10) | ||
| 157 | + page.navigate(url) | ||
| 158 | + page.wait_for_load() | ||
| 159 | + page.wait_dom_stable() | ||
| 160 | + time.sleep(1) | ||
| 161 | + | ||
| 162 | + retry_text = page.get_element_text(ACCESS_ERROR_WRAPPER) | ||
| 163 | + if retry_text and _is_scan_qrcode_verification(retry_text.strip()): | ||
| 164 | + raise PageNotAccessibleError( | ||
| 165 | + "触发了小红书验证,需要在浏览器中扫码完成验证后重试。" | ||
| 166 | + "这通常是小红书的反爬机制,请稍后再试或在 Chrome 中手动打开该笔记完成验证" | ||
| 167 | + ) | ||
| 168 | + if not retry_text or not retry_text.strip(): | ||
| 169 | + logger.info("验证已消失,继续加载笔记") | ||
| 170 | + return | ||
| 171 | + # 重试后仍有其他错误,继续走下面的关键词检测 | ||
| 172 | + text = retry_text.strip() | ||
| 173 | + | ||
| 140 | for kw in _INACCESSIBLE_KEYWORDS: | 174 | for kw in _INACCESSIBLE_KEYWORDS: |
| 141 | if kw in text: | 175 | if kw in text: |
| 142 | raise PageNotAccessibleError(kw) | 176 | raise PageNotAccessibleError(kw) |
| @@ -145,6 +179,11 @@ def _check_page_accessible(page: Page) -> None: | @@ -145,6 +179,11 @@ def _check_page_accessible(page: Page) -> None: | ||
| 145 | raise PageNotAccessibleError(text) | 179 | raise PageNotAccessibleError(text) |
| 146 | 180 | ||
| 147 | 181 | ||
| 182 | +def _is_scan_qrcode_verification(text: str) -> bool: | ||
| 183 | + """判断页面文本是否为扫码验证。""" | ||
| 184 | + return any(kw in text for kw in _SCAN_QRCODE_KEYWORDS) | ||
| 185 | + | ||
| 186 | + | ||
| 148 | # ========== 数据提取 ========== | 187 | # ========== 数据提取 ========== |
| 149 | 188 | ||
| 150 | 189 |
| @@ -32,6 +32,11 @@ def sleep_random(min_ms: int, max_ms: int) -> None: | @@ -32,6 +32,11 @@ def sleep_random(min_ms: int, max_ms: int) -> None: | ||
| 32 | time.sleep(delay) | 32 | time.sleep(delay) |
| 33 | 33 | ||
| 34 | 34 | ||
| 35 | +def navigation_delay() -> None: | ||
| 36 | + """页面导航后的随机等待,模拟人类阅读。""" | ||
| 37 | + sleep_random(1000, 2500) | ||
| 38 | + | ||
| 39 | + | ||
| 35 | def get_scroll_interval(speed: str) -> float: | 40 | def get_scroll_interval(speed: str) -> float: |
| 36 | """根据速度获取滚动间隔(秒)。""" | 41 | """根据速度获取滚动间隔(秒)。""" |
| 37 | if speed == "slow": | 42 | if speed == "slow": |
| @@ -9,6 +9,7 @@ import tempfile | @@ -9,6 +9,7 @@ import tempfile | ||
| 9 | import time | 9 | import time |
| 10 | 10 | ||
| 11 | from .cdp import Page | 11 | from .cdp import Page |
| 12 | +from .human import sleep_random | ||
| 12 | from .selectors import LOGIN_STATUS, QRCODE_IMG | 13 | from .selectors import LOGIN_STATUS, QRCODE_IMG |
| 13 | from .urls import EXPLORE_URL | 14 | from .urls import EXPLORE_URL |
| 14 | 15 | ||
| @@ -23,7 +24,7 @@ def check_login_status(page: Page) -> bool: | @@ -23,7 +24,7 @@ def check_login_status(page: Page) -> bool: | ||
| 23 | """ | 24 | """ |
| 24 | page.navigate(EXPLORE_URL) | 25 | page.navigate(EXPLORE_URL) |
| 25 | page.wait_for_load() | 26 | page.wait_for_load() |
| 26 | - time.sleep(1) | 27 | + sleep_random(800, 1500) |
| 27 | 28 | ||
| 28 | return page.has_element(LOGIN_STATUS) | 29 | return page.has_element(LOGIN_STATUS) |
| 29 | 30 | ||
| @@ -38,7 +39,7 @@ def fetch_qrcode(page: Page) -> tuple[str, bool]: | @@ -38,7 +39,7 @@ def fetch_qrcode(page: Page) -> tuple[str, bool]: | ||
| 38 | """ | 39 | """ |
| 39 | page.navigate(EXPLORE_URL) | 40 | page.navigate(EXPLORE_URL) |
| 40 | page.wait_for_load() | 41 | page.wait_for_load() |
| 41 | - time.sleep(2) | 42 | + sleep_random(1500, 2500) |
| 42 | 43 | ||
| 43 | # 检查是否已登录 | 44 | # 检查是否已登录 |
| 44 | if page.has_element(LOGIN_STATUS): | 45 | if page.has_element(LOGIN_STATUS): |
| @@ -8,6 +8,7 @@ import time | @@ -8,6 +8,7 @@ import time | ||
| 8 | 8 | ||
| 9 | from .cdp import Page | 9 | from .cdp import Page |
| 10 | from .errors import NoFeedsError | 10 | from .errors import NoFeedsError |
| 11 | +from .human import sleep_random | ||
| 11 | from .selectors import FILTER_BUTTON, FILTER_PANEL | 12 | from .selectors import FILTER_BUTTON, FILTER_PANEL |
| 12 | from .types import Feed, FilterOption | 13 | from .types import Feed, FilterOption |
| 13 | from .urls import make_search_url | 14 | from .urls import make_search_url |
| @@ -139,7 +140,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None: | @@ -139,7 +140,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None: | ||
| 139 | while time.monotonic() < deadline: | 140 | while time.monotonic() < deadline: |
| 140 | if page.has_element(FILTER_PANEL): | 141 | if page.has_element(FILTER_PANEL): |
| 141 | break | 142 | break |
| 142 | - time.sleep(0.3) | 143 | + sleep_random(300, 600) |
| 143 | 144 | ||
| 144 | # 点击各筛选项 | 145 | # 点击各筛选项 |
| 145 | for filters_index, tags_index in filters: | 146 | for filters_index, tags_index in filters: |
| @@ -148,7 +149,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None: | @@ -148,7 +149,7 @@ def _apply_filters(page: Page, filters: list[tuple[int, int]]) -> None: | ||
| 148 | f"div.tags:nth-child({tags_index})" | 149 | f"div.tags:nth-child({tags_index})" |
| 149 | ) | 150 | ) |
| 150 | page.click_element(selector) | 151 | page.click_element(selector) |
| 151 | - time.sleep(0.3) | 152 | + sleep_random(300, 600) |
| 152 | 153 | ||
| 153 | # 等待页面更新 | 154 | # 等待页面更新 |
| 154 | page.wait_dom_stable() | 155 | page.wait_dom_stable() |
| 1 | """反检测 JS 注入 + Chrome 启动参数,对应 go-rod/stealth。""" | 1 | """反检测 JS 注入 + Chrome 启动参数,对应 go-rod/stealth。""" |
| 2 | 2 | ||
| 3 | +# 真实 Chrome UA(固定版本,避免每次随机导致指纹不一致) | ||
| 4 | +REALISTIC_UA = ( | ||
| 5 | + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | ||
| 6 | + "AppleWebKit/537.36 (KHTML, like Gecko) " | ||
| 7 | + "Chrome/131.0.0.0 Safari/537.36" | ||
| 8 | +) | ||
| 9 | + | ||
| 3 | # 反检测 JS 脚本:在页面加载时注入 | 10 | # 反检测 JS 脚本:在页面加载时注入 |
| 4 | STEALTH_JS = """ | 11 | STEALTH_JS = """ |
| 5 | (() => { | 12 | (() => { |
| @@ -72,6 +79,45 @@ STEALTH_JS = """ | @@ -72,6 +79,45 @@ STEALTH_JS = """ | ||
| 72 | if (parameter === 37446) return 'Intel Iris OpenGL Engine'; | 79 | if (parameter === 37446) return 'Intel Iris OpenGL Engine'; |
| 73 | return getParameter.call(this, parameter); | 80 | return getParameter.call(this, parameter); |
| 74 | }; | 81 | }; |
| 82 | + | ||
| 83 | + // 7. hardwareConcurrency — 随机 4 或 8 | ||
| 84 | + Object.defineProperty(navigator, 'hardwareConcurrency', { | ||
| 85 | + get: () => [4, 8][Math.floor(Math.random() * 2)], | ||
| 86 | + configurable: true, | ||
| 87 | + }); | ||
| 88 | + | ||
| 89 | + // 8. deviceMemory — 随机 4 或 8 | ||
| 90 | + Object.defineProperty(navigator, 'deviceMemory', { | ||
| 91 | + get: () => [4, 8][Math.floor(Math.random() * 2)], | ||
| 92 | + configurable: true, | ||
| 93 | + }); | ||
| 94 | + | ||
| 95 | + // 9. navigator.connection — 伪造网络信息 | ||
| 96 | + Object.defineProperty(navigator, 'connection', { | ||
| 97 | + get: () => ({ | ||
| 98 | + effectiveType: '4g', | ||
| 99 | + downlink: 10, | ||
| 100 | + rtt: 50, | ||
| 101 | + saveData: false, | ||
| 102 | + }), | ||
| 103 | + configurable: true, | ||
| 104 | + }); | ||
| 105 | + | ||
| 106 | + // 10. chrome.csi / chrome.loadTimes — 空函数伪装 | ||
| 107 | + if (window.chrome) { | ||
| 108 | + window.chrome.csi = function() { return {}; }; | ||
| 109 | + window.chrome.loadTimes = function() { return {}; }; | ||
| 110 | + } | ||
| 111 | + | ||
| 112 | + // 11. outerWidth/outerHeight — 与 innerWidth/innerHeight 对齐 | ||
| 113 | + Object.defineProperty(window, 'outerWidth', { | ||
| 114 | + get: () => window.innerWidth, | ||
| 115 | + configurable: true, | ||
| 116 | + }); | ||
| 117 | + Object.defineProperty(window, 'outerHeight', { | ||
| 118 | + get: () => window.innerHeight, | ||
| 119 | + configurable: true, | ||
| 120 | + }); | ||
| 75 | })(); | 121 | })(); |
| 76 | """ | 122 | """ |
| 77 | 123 | ||
| @@ -85,4 +131,6 @@ STEALTH_ARGS = [ | @@ -85,4 +131,6 @@ STEALTH_ARGS = [ | ||
| 85 | "--disable-backgrounding-occluded-windows", | 131 | "--disable-backgrounding-occluded-windows", |
| 86 | "--disable-renderer-backgrounding", | 132 | "--disable-renderer-backgrounding", |
| 87 | "--disable-component-update", | 133 | "--disable-component-update", |
| 134 | + "--disable-extensions", | ||
| 135 | + "--disable-sync", | ||
| 88 | ] | 136 | ] |
-
Please register or login to post a comment