fix: 二维码获取改用 Canvas 提取,修复遮罩层截图偏移问题
- 选择器从 .qrcode.force-light 改为 img.qrcode-img(实际二维码元素) - 不再使用 CDP 截图,直接读取 img.src 的 base64 数据,避免渲染时机问题 - 在浏览器端用 Canvas 添加 16px 白边,零依赖 - screenshot_element 改用 DOM.getBoxModel 获取坐标,修复 position:fixed 遮罩层内元素用 getBoundingClientRect + pageOffset 截到背景的问题
Showing
5 changed files
with
111 additions
and
173 deletions
| @@ -231,19 +231,15 @@ def cmd_login(args: argparse.Namespace) -> None: | @@ -231,19 +231,15 @@ def cmd_login(args: argparse.Namespace) -> None: | ||
| 231 | 231 | ||
| 232 | browser, page = _connect(args) | 232 | browser, page = _connect(args) |
| 233 | try: | 233 | try: |
| 234 | - src, already = fetch_qrcode(page) | 234 | + png_bytes, already = fetch_qrcode(page) |
| 235 | if already: | 235 | if already: |
| 236 | _output({"logged_in": True, "message": "已登录"}) | 236 | _output({"logged_in": True, "message": "已登录"}) |
| 237 | return | 237 | return |
| 238 | 238 | ||
| 239 | - qrcode_path, qrcode_data_url = save_qrcode_to_file(src) | 239 | + qrcode_path = save_qrcode_to_file(png_bytes) |
| 240 | print( | 240 | print( |
| 241 | json.dumps( | 241 | json.dumps( |
| 242 | - { | ||
| 243 | - "qrcode_path": qrcode_path, | ||
| 244 | - "qrcode_data_url": qrcode_data_url, | ||
| 245 | - "message": "请扫码登录,二维码已保存到文件", | ||
| 246 | - }, | 242 | + {"qrcode_path": qrcode_path, "message": "请扫码登录,二维码已保存到文件"}, |
| 247 | ensure_ascii=False, | 243 | ensure_ascii=False, |
| 248 | ) | 244 | ) |
| 249 | ) | 245 | ) |
| @@ -313,14 +309,14 @@ def cmd_get_qrcode(args: argparse.Namespace) -> None: | @@ -313,14 +309,14 @@ def cmd_get_qrcode(args: argparse.Namespace) -> None: | ||
| 313 | 309 | ||
| 314 | browser, page = _connect(args) | 310 | browser, page = _connect(args) |
| 315 | 311 | ||
| 316 | - src, already = fetch_qrcode(page) | 312 | + png_bytes, already = fetch_qrcode(page) |
| 317 | if already: | 313 | if already: |
| 318 | browser.close_page(page) | 314 | browser.close_page(page) |
| 319 | browser.close() | 315 | browser.close() |
| 320 | _output({"logged_in": True, "message": "已登录"}) | 316 | _output({"logged_in": True, "message": "已登录"}) |
| 321 | return | 317 | return |
| 322 | 318 | ||
| 323 | - qrcode_path, qrcode_data_url = save_qrcode_to_file(src) | 319 | + qrcode_path = save_qrcode_to_file(png_bytes) |
| 324 | 320 | ||
| 325 | # 记录 tab,供 wait-login 精确reconnect | 321 | # 记录 tab,供 wait-login 精确reconnect |
| 326 | _save_login_tab(page.target_id) | 322 | _save_login_tab(page.target_id) |
| @@ -329,7 +325,6 @@ def cmd_get_qrcode(args: argparse.Namespace) -> None: | @@ -329,7 +325,6 @@ def cmd_get_qrcode(args: argparse.Namespace) -> None: | ||
| 329 | browser.close() | 325 | browser.close() |
| 330 | _output({ | 326 | _output({ |
| 331 | "qrcode_path": qrcode_path, | 327 | "qrcode_path": qrcode_path, |
| 332 | - "qrcode_data_url": qrcode_data_url, | ||
| 333 | "message": "二维码已生成,请扫码登录。扫码后运行 check-login 确认登录状态。", | 328 | "message": "二维码已生成,请扫码登录。扫码后运行 check-login 确认登录状态。", |
| 334 | }) | 329 | }) |
| 335 | 330 |
| @@ -514,6 +514,53 @@ class Page: | @@ -514,6 +514,53 @@ class Page: | ||
| 514 | """ | 514 | """ |
| 515 | ) | 515 | ) |
| 516 | 516 | ||
| 517 | + def screenshot_element(self, selector: str, padding: int = 0) -> bytes: | ||
| 518 | + """对指定 CSS 选择器的元素截图,返回 PNG 字节。 | ||
| 519 | + | ||
| 520 | + 通过 CDP Page.captureScreenshot 截取元素所在区域,比 Python 层 PNG | ||
| 521 | + 解码/重编码快很多,且图片直接来自浏览器渲染结果。 | ||
| 522 | + | ||
| 523 | + Args: | ||
| 524 | + selector: CSS 选择器。 | ||
| 525 | + padding: 在元素四周额外保留的像素数(背景色填充,相当于白边)。 | ||
| 526 | + | ||
| 527 | + Returns: | ||
| 528 | + PNG 字节;元素不存在时返回 b""。 | ||
| 529 | + """ | ||
| 530 | + import base64 as _b64 | ||
| 531 | + | ||
| 532 | + # 用 DOM.getBoxModel 获取元素坐标,返回的是 page 坐标系(CSS px,相对于文档左上角)。 | ||
| 533 | + # getBoundingClientRect 返回的是 viewport 坐标系,对 position:fixed 遮罩层内的元素 | ||
| 534 | + # 加 pageXOffset 后依然会截到遮罩背后的内容。DOM.getBoxModel 则始终正确。 | ||
| 535 | + try: | ||
| 536 | + doc = self._send_session("DOM.getDocument", {"depth": 0}) | ||
| 537 | + root_id = doc["root"]["nodeId"] | ||
| 538 | + query = self._send_session("DOM.querySelector", {"nodeId": root_id, "selector": selector}) | ||
| 539 | + node_id = query.get("nodeId", 0) | ||
| 540 | + if not node_id: | ||
| 541 | + return b"" | ||
| 542 | + box_model = self._send_session("DOM.getBoxModel", {"nodeId": node_id}) | ||
| 543 | + model = box_model["model"] | ||
| 544 | + content = model["content"] # [x1,y1, x2,y2, x3,y3, x4,y4] 顺时针四角 | ||
| 545 | + x, y = content[0], content[1] | ||
| 546 | + width, height = float(model["width"]), float(model["height"]) | ||
| 547 | + except Exception: | ||
| 548 | + return b"" | ||
| 549 | + | ||
| 550 | + result = self._send_session( | ||
| 551 | + "Page.captureScreenshot", | ||
| 552 | + { | ||
| 553 | + "format": "png", | ||
| 554 | + "clip": { | ||
| 555 | + "x": max(0.0, x - padding), | ||
| 556 | + "y": max(0.0, y - padding), | ||
| 557 | + "width": width + padding * 2, | ||
| 558 | + "height": height + padding * 2, | ||
| 559 | + "scale": 1.0, | ||
| 560 | + }, | ||
| 561 | + }, | ||
| 562 | + ) | ||
| 563 | + return _b64.b64decode(result.get("data", "")) | ||
| 517 | 564 | ||
| 518 | 565 | ||
| 519 | class Browser: | 566 | class Browser: |
| @@ -2,129 +2,15 @@ | @@ -2,129 +2,15 @@ | ||
| 2 | 2 | ||
| 3 | from __future__ import annotations | 3 | from __future__ import annotations |
| 4 | 4 | ||
| 5 | -import base64 | 5 | +import json |
| 6 | import logging | 6 | import logging |
| 7 | import os | 7 | import os |
| 8 | -import struct | ||
| 9 | import tempfile | 8 | import tempfile |
| 10 | import time | 9 | import time |
| 11 | -import zlib | ||
| 12 | 10 | ||
| 13 | _QR_DIR = os.path.join(tempfile.gettempdir(), "xhs") | 11 | _QR_DIR = os.path.join(tempfile.gettempdir(), "xhs") |
| 14 | _QR_FILE = os.path.join(_QR_DIR, "login_qrcode.png") | 12 | _QR_FILE = os.path.join(_QR_DIR, "login_qrcode.png") |
| 15 | -_QR_BORDER = 16 # 白边宽度(像素) | ||
| 16 | - | ||
| 17 | -_PNG_SIG = b"\x89PNG\r\n\x1a\n" | ||
| 18 | - | ||
| 19 | - | ||
| 20 | -def _add_png_border(data: bytes, padding: int = _QR_BORDER) -> bytes: | ||
| 21 | - """给 PNG 图片添加白色边框(纯 Python stdlib,不依赖 Pillow)。 | ||
| 22 | - | ||
| 23 | - 支持 8-bit 深度的 Grayscale / RGB / Grayscale+Alpha / RGBA 四种色彩类型。 | ||
| 24 | - Indexed-color(color_type=3)暂不处理,原样返回。 | ||
| 25 | - | ||
| 26 | - Args: | ||
| 27 | - data: 原始 PNG 字节。 | ||
| 28 | - padding: 边框宽度(像素)。 | ||
| 29 | - | ||
| 30 | - Returns: | ||
| 31 | - 带白色边框的 PNG 字节。 | ||
| 32 | - """ | ||
| 33 | - if not data.startswith(_PNG_SIG): | ||
| 34 | - return data | ||
| 35 | - | ||
| 36 | - # ── 解析 chunks ────────────────────────────────────────────── | ||
| 37 | - def _read_chunks(buf: bytes) -> list[tuple[bytes, bytes]]: | ||
| 38 | - result, pos = [], 8 | ||
| 39 | - while pos < len(buf): | ||
| 40 | - (length,) = struct.unpack_from(">I", buf, pos) | ||
| 41 | - ctype = buf[pos + 4 : pos + 8] | ||
| 42 | - cdata = buf[pos + 8 : pos + 8 + length] | ||
| 43 | - result.append((ctype, cdata)) | ||
| 44 | - pos += 12 + length | ||
| 45 | - return result | ||
| 46 | - | ||
| 47 | - def _make_chunk(ctype: bytes, cdata: bytes) -> bytes: | ||
| 48 | - crc = zlib.crc32(ctype + cdata) & 0xFFFFFFFF | ||
| 49 | - return struct.pack(">I", len(cdata)) + ctype + cdata + struct.pack(">I", crc) | ||
| 50 | - | ||
| 51 | - chunks = _read_chunks(data) | ||
| 52 | - | ||
| 53 | - # ── IHDR ───────────────────────────────────────────────────── | ||
| 54 | - ihdr = next(d for t, d in chunks if t == b"IHDR") | ||
| 55 | - w, h = struct.unpack_from(">II", ihdr) | ||
| 56 | - bit_depth, color_type = ihdr[8], ihdr[9] | ||
| 57 | - | ||
| 58 | - if bit_depth != 8 or color_type == 3: | ||
| 59 | - return data # 不支持的格式,原样返回 | ||
| 60 | - | ||
| 61 | - bpp = {0: 1, 2: 3, 4: 2, 6: 4}[color_type] | ||
| 62 | - white = bytes([255] * bpp) | ||
| 63 | - | ||
| 64 | - # ── 解压 IDAT ──────────────────────────────────────────────── | ||
| 65 | - raw = zlib.decompress(b"".join(d for t, d in chunks if t == b"IDAT")) | ||
| 66 | - | ||
| 67 | - # ── 逐行解码 PNG filter,还原像素数据 ──────────────────────── | ||
| 68 | - stride = w * bpp | ||
| 69 | - | ||
| 70 | - def _paeth(a: int, b: int, c: int) -> int: | ||
| 71 | - p = a + b - c | ||
| 72 | - pa, pb, pc = abs(p - a), abs(p - b), abs(p - c) | ||
| 73 | - if pa <= pb and pa <= pc: | ||
| 74 | - return a | ||
| 75 | - return b if pb <= pc else c | ||
| 76 | - | ||
| 77 | - pixel_rows: list[bytes] = [] | ||
| 78 | - prior = bytearray(stride) | ||
| 79 | - pos = 0 | ||
| 80 | - for _ in range(h): | ||
| 81 | - f = raw[pos] | ||
| 82 | - row = bytearray(raw[pos + 1 : pos + 1 + stride]) | ||
| 83 | - pos += 1 + stride | ||
| 84 | - if f == 1: # Sub | ||
| 85 | - for i in range(bpp, stride): | ||
| 86 | - row[i] = (row[i] + row[i - bpp]) & 0xFF | ||
| 87 | - elif f == 2: # Up | ||
| 88 | - for i in range(stride): | ||
| 89 | - row[i] = (row[i] + prior[i]) & 0xFF | ||
| 90 | - elif f == 3: # Average | ||
| 91 | - for i in range(stride): | ||
| 92 | - a = row[i - bpp] if i >= bpp else 0 | ||
| 93 | - row[i] = (row[i] + (a + prior[i]) // 2) & 0xFF | ||
| 94 | - elif f == 4: # Paeth | ||
| 95 | - for i in range(stride): | ||
| 96 | - a = row[i - bpp] if i >= bpp else 0 | ||
| 97 | - b = prior[i] | ||
| 98 | - c = prior[i - bpp] if i >= bpp else 0 | ||
| 99 | - row[i] = (row[i] + _paeth(a, b, c)) & 0xFF | ||
| 100 | - pixel_rows.append(bytes(row)) | ||
| 101 | - prior = row | ||
| 102 | - | ||
| 103 | - # ── 构建带边框的新图像(filter 0 = None,最简单)──────────── | ||
| 104 | - new_w, new_h = w + padding * 2, h + padding * 2 | ||
| 105 | - white_row = b"\x00" + white * new_w | ||
| 106 | - pad_cols = white * padding | ||
| 107 | - | ||
| 108 | - new_raw = bytearray() | ||
| 109 | - for _ in range(padding): | ||
| 110 | - new_raw += white_row | ||
| 111 | - for row in pixel_rows: | ||
| 112 | - new_raw += b"\x00" + pad_cols + row + pad_cols | ||
| 113 | - for _ in range(padding): | ||
| 114 | - new_raw += white_row | ||
| 115 | - | ||
| 116 | - new_idat = zlib.compress(bytes(new_raw), 6) | ||
| 117 | - new_ihdr = struct.pack(">II", new_w, new_h) + ihdr[8:] | ||
| 118 | - | ||
| 119 | - # ── 重建 PNG ───────────────────────────────────────────────── | ||
| 120 | - out = bytearray(_PNG_SIG) | ||
| 121 | - out += _make_chunk(b"IHDR", new_ihdr) | ||
| 122 | - for ctype, cdata in chunks: | ||
| 123 | - if ctype not in (b"IHDR", b"IDAT", b"IEND"): | ||
| 124 | - out += _make_chunk(ctype, cdata) | ||
| 125 | - out += _make_chunk(b"IDAT", new_idat) | ||
| 126 | - out += _make_chunk(b"IEND", b"") | ||
| 127 | - return bytes(out) | 13 | +_QR_BORDER = 16 # 截图时在元素四周留白的像素数 |
| 128 | 14 | ||
| 129 | from .cdp import Page | 15 | from .cdp import Page |
| 130 | from .errors import RateLimitError | 16 | from .errors import RateLimitError |
| @@ -148,6 +34,19 @@ from .urls import EXPLORE_URL | @@ -148,6 +34,19 @@ from .urls import EXPLORE_URL | ||
| 148 | logger = logging.getLogger(__name__) | 34 | logger = logging.getLogger(__name__) |
| 149 | 35 | ||
| 150 | 36 | ||
| 37 | +def _wait_for_auth_ui(page: Page, timeout: float = 8.0) -> None: | ||
| 38 | + """等待认证 UI 出现,替代固定延迟。 | ||
| 39 | + | ||
| 40 | + 轮询直到登录状态指示器或登录容器出现为止,避免无谓等待。 | ||
| 41 | + 超时后静默返回,由调用方自行处理元素不存在的情况。 | ||
| 42 | + """ | ||
| 43 | + deadline = time.monotonic() + timeout | ||
| 44 | + while time.monotonic() < deadline: | ||
| 45 | + if page.has_element(LOGIN_STATUS) or page.has_element(LOGIN_CONTAINER): | ||
| 46 | + return | ||
| 47 | + time.sleep(0.2) | ||
| 48 | + | ||
| 49 | + | ||
| 151 | def check_login_status(page: Page) -> bool: | 50 | def check_login_status(page: Page) -> bool: |
| 152 | """检查登录状态。 | 51 | """检查登录状态。 |
| 153 | 52 | ||
| @@ -156,70 +55,67 @@ def check_login_status(page: Page) -> bool: | @@ -156,70 +55,67 @@ def check_login_status(page: Page) -> bool: | ||
| 156 | """ | 55 | """ |
| 157 | page.navigate(EXPLORE_URL) | 56 | page.navigate(EXPLORE_URL) |
| 158 | page.wait_for_load() | 57 | page.wait_for_load() |
| 159 | - sleep_random(800, 1500) | 58 | + _wait_for_auth_ui(page) |
| 160 | 59 | ||
| 161 | return page.has_element(LOGIN_STATUS) | 60 | return page.has_element(LOGIN_STATUS) |
| 162 | 61 | ||
| 163 | 62 | ||
| 164 | -def fetch_qrcode(page: Page) -> tuple[str, bool]: | ||
| 165 | - """获取登录二维码。 | 63 | +def fetch_qrcode(page: Page) -> tuple[bytes, bool]: |
| 64 | + """截取登录二维码图片(CDP 元素截图)。 | ||
| 166 | 65 | ||
| 167 | Returns: | 66 | Returns: |
| 168 | - (qrcode_src, already_logged_in) | ||
| 169 | - - 如果已登录,返回 ("", True) | ||
| 170 | - - 如果未登录,返回 (qrcode_base64_or_url, False) | 67 | + (png_bytes, already_logged_in) |
| 68 | + - 如果已登录,返回 (b"", True) | ||
| 69 | + - 如果未登录,返回 (png_bytes, False) | ||
| 171 | """ | 70 | """ |
| 172 | page.navigate(EXPLORE_URL) | 71 | page.navigate(EXPLORE_URL) |
| 173 | page.wait_for_load() | 72 | page.wait_for_load() |
| 174 | - sleep_random(1500, 2500) | 73 | + _wait_for_auth_ui(page) |
| 175 | 74 | ||
| 176 | - # 检查是否已登录 | ||
| 177 | if page.has_element(LOGIN_STATUS): | 75 | if page.has_element(LOGIN_STATUS): |
| 178 | - return "", True | ||
| 179 | - | ||
| 180 | - # 获取二维码图片 src | ||
| 181 | - src = page.get_element_attribute(QRCODE_IMG, "src") | ||
| 182 | - if not src: | ||
| 183 | - raise RuntimeError("二维码图片 src 为空") | ||
| 184 | - | ||
| 185 | - return src, False | ||
| 186 | - | ||
| 187 | - | ||
| 188 | -def save_qrcode_to_file(src: str) -> tuple[str, str]: | ||
| 189 | - """将二维码图片保存为临时 PNG 文件,同时返回 data URL。 | ||
| 190 | - | ||
| 191 | - 相当于浏览器"右键 → 另存为图片":从 img.src 取得图片字节后落盘。 | 76 | + return b"", True |
| 77 | + | ||
| 78 | + # 等待 img.qrcode-img 出现,用浏览器 Canvas 加白边后导出 PNG base64 | ||
| 79 | + page.wait_for_element(QRCODE_IMG, timeout=10.0) | ||
| 80 | + b64 = page.evaluate( | ||
| 81 | + f""" | ||
| 82 | + (() => {{ | ||
| 83 | + const img = document.querySelector({json.dumps(QRCODE_IMG)}); | ||
| 84 | + if (!img) return null; | ||
| 85 | + const p = {_QR_BORDER}; | ||
| 86 | + const c = document.createElement('canvas'); | ||
| 87 | + c.width = img.naturalWidth + p * 2; | ||
| 88 | + c.height = img.naturalHeight + p * 2; | ||
| 89 | + const ctx = c.getContext('2d'); | ||
| 90 | + ctx.fillStyle = '#ffffff'; | ||
| 91 | + ctx.fillRect(0, 0, c.width, c.height); | ||
| 92 | + ctx.drawImage(img, p, p); | ||
| 93 | + return c.toDataURL('image/png').split(',')[1]; | ||
| 94 | + }})() | ||
| 95 | + """ | ||
| 96 | + ) | ||
| 97 | + if not b64: | ||
| 98 | + raise RuntimeError("二维码 Canvas 导出失败") | ||
| 99 | + import base64 | ||
| 100 | + png_bytes = base64.b64decode(b64) | ||
| 101 | + | ||
| 102 | + return png_bytes, False | ||
| 103 | + | ||
| 104 | + | ||
| 105 | +def save_qrcode_to_file(png_bytes: bytes) -> str: | ||
| 106 | + """将二维码 PNG 字节保存到临时文件,返回文件路径。 | ||
| 192 | 107 | ||
| 193 | Args: | 108 | Args: |
| 194 | - src: 二维码 img 元素的 src——data URL(data:image/...;base64,...)或网络 URL。 | 109 | + png_bytes: CDP 截图返回的 PNG 字节。 |
| 195 | 110 | ||
| 196 | Returns: | 111 | Returns: |
| 197 | - (file_path, data_url) | ||
| 198 | - - file_path: 保存的 PNG 文件绝对路径 | ||
| 199 | - - data_url: data:image/png;base64,... 格式,可直接嵌入 Markdown | 112 | + file_path: 保存的 PNG 文件绝对路径。 |
| 200 | """ | 113 | """ |
| 201 | - if src.startswith("data:image/"): | ||
| 202 | - # data URL:直接解码 | ||
| 203 | - _, encoded = src.split(",", 1) | ||
| 204 | - img_data = base64.b64decode(encoded) | ||
| 205 | - elif src.startswith("http://") or src.startswith("https://"): | ||
| 206 | - # 网络 URL:下载(等同浏览器右键另存为) | ||
| 207 | - import requests as _req | ||
| 208 | - resp = _req.get(src, timeout=10) | ||
| 209 | - resp.raise_for_status() | ||
| 210 | - img_data = resp.content | ||
| 211 | - else: | ||
| 212 | - raise ValueError(f"不支持的二维码格式: {src[:80]}") | ||
| 213 | - | ||
| 214 | - img_data = _add_png_border(img_data) | ||
| 215 | - | ||
| 216 | os.makedirs(_QR_DIR, exist_ok=True) | 114 | os.makedirs(_QR_DIR, exist_ok=True) |
| 217 | with open(_QR_FILE, "wb") as f: | 115 | with open(_QR_FILE, "wb") as f: |
| 218 | - f.write(img_data) | ||
| 219 | - | ||
| 220 | - data_url = "data:image/png;base64," + base64.b64encode(img_data).decode() | 116 | + f.write(png_bytes) |
| 221 | logger.info("二维码已保存: %s", _QR_FILE) | 117 | logger.info("二维码已保存: %s", _QR_FILE) |
| 222 | - return _QR_FILE, data_url | 118 | + return _QR_FILE |
| 223 | 119 | ||
| 224 | 120 | ||
| 225 | def send_phone_code(page: Page, phone: str) -> bool: | 121 | def send_phone_code(page: Page, phone: str) -> bool: |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | # ========== 登录 ========== | 3 | # ========== 登录 ========== |
| 4 | LOGIN_STATUS = ".main-container .user .link-wrapper .channel" | 4 | LOGIN_STATUS = ".main-container .user .link-wrapper .channel" |
| 5 | -QRCODE_IMG = ".login-container .qrcode-img" | 5 | +QRCODE_IMG = ".qrcode-img" |
| 6 | 6 | ||
| 7 | # ========== 手机号登录 ========== | 7 | # ========== 手机号登录 ========== |
| 8 | LOGIN_CONTAINER = ".login-container" | 8 | LOGIN_CONTAINER = ".login-container" |
| @@ -89,7 +89,7 @@ python scripts/cli.py get-qrcode | @@ -89,7 +89,7 @@ python scripts/cli.py get-qrcode | ||
| 89 |  | 89 |  |
| 90 | ``` | 90 | ``` |
| 91 | 91 | ||
| 92 | -图片含 16px 白色边框,内嵌在对话窗口,用户用小红书 App 扫对话里的二维码。 | 92 | +图片内嵌在对话窗口,用户用小红书 App 扫对话里的二维码。 |
| 93 | 93 | ||
| 94 | **第三步** — 等待登录完成(**单次调用,无需轮询**): | 94 | **第三步** — 等待登录完成(**单次调用,无需轮询**): |
| 95 | 95 |
-
Please register or login to post a comment