fix: 修复发布流程 4 个 bug — 页面关闭、标签错位、换行丢失、标签重复
- cli.py: 分步发布命令(fill-publish/fill-publish-video/long-article/select-template/next-step)不再关闭页面,新增 _connect_existing 复用已有 tab - cdp.py: input_content_editable 遇到 \n 时按 Enter 键,正确产生段落换行 - publish.py: _input_tags 先 focus 正文编辑器再输入标签;新增 _extract_hashtags_from_content 从正文末尾提取 hashtag 合并到 tags 去重;标签输入增加重试等待联想下拉
Showing
3 changed files
with
155 additions
and
44 deletions
| @@ -12,6 +12,12 @@ import json | @@ -12,6 +12,12 @@ import json | ||
| 12 | import logging | 12 | import logging |
| 13 | import sys | 13 | import sys |
| 14 | 14 | ||
| 15 | +# Windows 控制台默认编码(如 cp1252)不支持中文,强制 UTF-8 | ||
| 16 | +if sys.stdout and hasattr(sys.stdout, "reconfigure"): | ||
| 17 | + sys.stdout.reconfigure(encoding="utf-8") | ||
| 18 | +if sys.stderr and hasattr(sys.stderr, "reconfigure"): | ||
| 19 | + sys.stderr.reconfigure(encoding="utf-8") | ||
| 20 | + | ||
| 15 | logging.basicConfig( | 21 | logging.basicConfig( |
| 16 | level=logging.INFO, | 22 | level=logging.INFO, |
| 17 | format="%(asctime)s %(levelname)s %(name)s: %(message)s", | 23 | format="%(asctime)s %(levelname)s %(name)s: %(message)s", |
| @@ -27,14 +33,43 @@ def _output(data: dict, exit_code: int = 0) -> None: | @@ -27,14 +33,43 @@ def _output(data: dict, exit_code: int = 0) -> None: | ||
| 27 | 33 | ||
| 28 | def _connect(args: argparse.Namespace): | 34 | def _connect(args: argparse.Namespace): |
| 29 | """连接到 Chrome 并返回 (browser, page)。""" | 35 | """连接到 Chrome 并返回 (browser, page)。""" |
| 36 | + from chrome_launcher import ensure_chrome | ||
| 30 | from xhs.cdp import Browser | 37 | from xhs.cdp import Browser |
| 31 | 38 | ||
| 39 | + if not ensure_chrome(port=args.port): | ||
| 40 | + _output( | ||
| 41 | + {"success": False, "error": "无法启动 Chrome,请检查 Chrome 是否已安装"}, | ||
| 42 | + exit_code=2, | ||
| 43 | + ) | ||
| 44 | + | ||
| 32 | browser = Browser(host=args.host, port=args.port) | 45 | browser = Browser(host=args.host, port=args.port) |
| 33 | browser.connect() | 46 | browser.connect() |
| 34 | page = browser.new_page() | 47 | page = browser.new_page() |
| 35 | return browser, page | 48 | return browser, page |
| 36 | 49 | ||
| 37 | 50 | ||
| 51 | +def _connect_existing(args: argparse.Namespace): | ||
| 52 | + """连接到 Chrome 并复用已有页面(用于分步发布的后续步骤)。""" | ||
| 53 | + from chrome_launcher import ensure_chrome | ||
| 54 | + from xhs.cdp import Browser | ||
| 55 | + | ||
| 56 | + if not ensure_chrome(port=args.port): | ||
| 57 | + _output( | ||
| 58 | + {"success": False, "error": "无法连接到 Chrome"}, | ||
| 59 | + exit_code=2, | ||
| 60 | + ) | ||
| 61 | + | ||
| 62 | + browser = Browser(host=args.host, port=args.port) | ||
| 63 | + browser.connect() | ||
| 64 | + page = browser.get_existing_page() | ||
| 65 | + if not page: | ||
| 66 | + _output( | ||
| 67 | + {"success": False, "error": "未找到已打开的页面,请先执行前置步骤"}, | ||
| 68 | + exit_code=2, | ||
| 69 | + ) | ||
| 70 | + return browser, page | ||
| 71 | + | ||
| 72 | + | ||
| 38 | def _headless_fallback(port: int) -> None: | 73 | def _headless_fallback(port: int) -> None: |
| 39 | """Headless 模式未登录时自动降级到有窗口模式。""" | 74 | """Headless 模式未登录时自动降级到有窗口模式。""" |
| 40 | from chrome_launcher import restart_chrome | 75 | from chrome_launcher import restart_chrome |
| @@ -332,7 +367,7 @@ def cmd_fill_publish(args: argparse.Namespace) -> None: | @@ -332,7 +367,7 @@ def cmd_fill_publish(args: argparse.Namespace) -> None: | ||
| 332 | } | 367 | } |
| 333 | ) | 368 | ) |
| 334 | finally: | 369 | finally: |
| 335 | - browser.close_page(page) | 370 | + # 不关闭页面,让用户在浏览器中预览 |
| 336 | browser.close() | 371 | browser.close() |
| 337 | 372 | ||
| 338 | 373 | ||
| @@ -368,15 +403,15 @@ def cmd_fill_publish_video(args: argparse.Namespace) -> None: | @@ -368,15 +403,15 @@ def cmd_fill_publish_video(args: argparse.Namespace) -> None: | ||
| 368 | } | 403 | } |
| 369 | ) | 404 | ) |
| 370 | finally: | 405 | finally: |
| 371 | - browser.close_page(page) | 406 | + # 不关闭页面,让用户在浏览器中预览 |
| 372 | browser.close() | 407 | browser.close() |
| 373 | 408 | ||
| 374 | 409 | ||
| 375 | def cmd_click_publish(args: argparse.Namespace) -> None: | 410 | def cmd_click_publish(args: argparse.Namespace) -> None: |
| 376 | - """点击发布按钮(在用户确认后调用)。""" | 411 | + """点击发布按钮(在用户确认后调用)。复用已有的发布页 tab。""" |
| 377 | from xhs.publish import click_publish_button | 412 | from xhs.publish import click_publish_button |
| 378 | 413 | ||
| 379 | - browser, page = _connect(args) | 414 | + browser, page = _connect_existing(args) |
| 380 | try: | 415 | try: |
| 381 | click_publish_button(page) | 416 | click_publish_button(page) |
| 382 | _output({"success": True, "status": "发布完成"}) | 417 | _output({"success": True, "status": "发布完成"}) |
| @@ -410,15 +445,15 @@ def cmd_long_article(args: argparse.Namespace) -> None: | @@ -410,15 +445,15 @@ def cmd_long_article(args: argparse.Namespace) -> None: | ||
| 410 | } | 445 | } |
| 411 | ) | 446 | ) |
| 412 | finally: | 447 | finally: |
| 413 | - browser.close_page(page) | 448 | + # 不关闭页面,后续 select-template / next-step 需要复用 |
| 414 | browser.close() | 449 | browser.close() |
| 415 | 450 | ||
| 416 | 451 | ||
| 417 | def cmd_select_template(args: argparse.Namespace) -> None: | 452 | def cmd_select_template(args: argparse.Namespace) -> None: |
| 418 | - """选择排版模板。""" | 453 | + """选择排版模板。复用已有的长文编辑页 tab。""" |
| 419 | from xhs.publish_long_article import select_template | 454 | from xhs.publish_long_article import select_template |
| 420 | 455 | ||
| 421 | - browser, page = _connect(args) | 456 | + browser, page = _connect_existing(args) |
| 422 | try: | 457 | try: |
| 423 | selected = select_template(page, args.name) | 458 | selected = select_template(page, args.name) |
| 424 | if selected: | 459 | if selected: |
| @@ -429,23 +464,23 @@ def cmd_select_template(args: argparse.Namespace) -> None: | @@ -429,23 +464,23 @@ def cmd_select_template(args: argparse.Namespace) -> None: | ||
| 429 | exit_code=2, | 464 | exit_code=2, |
| 430 | ) | 465 | ) |
| 431 | finally: | 466 | finally: |
| 432 | - browser.close_page(page) | 467 | + # 不关闭页面,后续 next-step 需要复用 |
| 433 | browser.close() | 468 | browser.close() |
| 434 | 469 | ||
| 435 | 470 | ||
| 436 | def cmd_next_step(args: argparse.Namespace) -> None: | 471 | def cmd_next_step(args: argparse.Namespace) -> None: |
| 437 | - """点击下一步 + 填写发布页描述。""" | 472 | + """点击下一步 + 填写发布页描述。复用已有的长文编辑页 tab。""" |
| 438 | from xhs.publish_long_article import click_next_and_fill_description | 473 | from xhs.publish_long_article import click_next_and_fill_description |
| 439 | 474 | ||
| 440 | with open(args.content_file, encoding="utf-8") as f: | 475 | with open(args.content_file, encoding="utf-8") as f: |
| 441 | description = f.read().strip() | 476 | description = f.read().strip() |
| 442 | 477 | ||
| 443 | - browser, page = _connect(args) | 478 | + browser, page = _connect_existing(args) |
| 444 | try: | 479 | try: |
| 445 | click_next_and_fill_description(page, description) | 480 | click_next_and_fill_description(page, description) |
| 446 | _output({"success": True, "status": "已进入发布页,等待确认发布"}) | 481 | _output({"success": True, "status": "已进入发布页,等待确认发布"}) |
| 447 | finally: | 482 | finally: |
| 448 | - browser.close_page(page) | 483 | + # 不关闭页面,等待 click-publish |
| 449 | browser.close() | 484 | browser.close() |
| 450 | 485 | ||
| 451 | 486 |
| @@ -287,16 +287,19 @@ class Page: | @@ -287,16 +287,19 @@ class Page: | ||
| 287 | }, | 287 | }, |
| 288 | ) | 288 | ) |
| 289 | time.sleep(0.1) | 289 | time.sleep(0.1) |
| 290 | - # 3. 逐字输入(随机 30-80ms 间隔) | 290 | + # 3. 逐字输入(随机 30-80ms 间隔,换行符转为 Enter 键) |
| 291 | for char in text: | 291 | for char in text: |
| 292 | - self._send_session( | ||
| 293 | - "Input.dispatchKeyEvent", | ||
| 294 | - {"type": "keyDown", "text": char}, | ||
| 295 | - ) | ||
| 296 | - self._send_session( | ||
| 297 | - "Input.dispatchKeyEvent", | ||
| 298 | - {"type": "keyUp", "text": char}, | ||
| 299 | - ) | 292 | + if char == "\n": |
| 293 | + self.press_key("Enter") | ||
| 294 | + else: | ||
| 295 | + self._send_session( | ||
| 296 | + "Input.dispatchKeyEvent", | ||
| 297 | + {"type": "keyDown", "text": char}, | ||
| 298 | + ) | ||
| 299 | + self._send_session( | ||
| 300 | + "Input.dispatchKeyEvent", | ||
| 301 | + {"type": "keyUp", "text": char}, | ||
| 302 | + ) | ||
| 300 | time.sleep(random.uniform(0.03, 0.08)) | 303 | time.sleep(random.uniform(0.03, 0.08)) |
| 301 | 304 | ||
| 302 | def get_element_text(self, selector: str) -> str | None: | 305 | def get_element_text(self, selector: str) -> str | None: |
| @@ -5,6 +5,7 @@ from __future__ import annotations | @@ -5,6 +5,7 @@ from __future__ import annotations | ||
| 5 | import json | 5 | import json |
| 6 | import logging | 6 | import logging |
| 7 | import random | 7 | import random |
| 8 | +import re | ||
| 8 | import time | 9 | import time |
| 9 | 10 | ||
| 10 | from .cdp import Page | 11 | from .cdp import Page |
| @@ -127,27 +128,31 @@ def _navigate_to_publish_page(page: Page) -> None: | @@ -127,27 +128,31 @@ def _navigate_to_publish_page(page: Page) -> None: | ||
| 127 | """导航到发布页面。""" | 128 | """导航到发布页面。""" |
| 128 | page.navigate(PUBLISH_URL) | 129 | page.navigate(PUBLISH_URL) |
| 129 | page.wait_for_load(timeout=300) | 130 | page.wait_for_load(timeout=300) |
| 130 | - time.sleep(2) | 131 | + time.sleep(3) |
| 131 | page.wait_dom_stable() | 132 | page.wait_dom_stable() |
| 132 | - time.sleep(1) | 133 | + time.sleep(2) |
| 133 | 134 | ||
| 134 | 135 | ||
| 135 | def _click_publish_tab(page: Page, tab_name: str) -> None: | 136 | def _click_publish_tab(page: Page, tab_name: str) -> None: |
| 136 | """点击发布页 TAB(上传图文/上传视频)。""" | 137 | """点击发布页 TAB(上传图文/上传视频)。""" |
| 137 | - page.wait_for_element(UPLOAD_CONTENT, timeout=15) | ||
| 138 | - | ||
| 139 | deadline = time.monotonic() + 15 | 138 | deadline = time.monotonic() + 15 |
| 140 | while time.monotonic() < deadline: | 139 | while time.monotonic() < deadline: |
| 141 | - # 查找匹配的 TAB | 140 | + # 查找匹配的 TAB(支持多种结构) |
| 142 | found = page.evaluate( | 141 | found = page.evaluate( |
| 143 | f""" | 142 | f""" |
| 144 | (() => {{ | 143 | (() => {{ |
| 145 | - const tabs = document.querySelectorAll({json.dumps(CREATOR_TAB)}); | 144 | + // 策略1: 查找 div.creator-tab(过滤隐藏元素) |
| 145 | + let tabs = document.querySelectorAll({json.dumps(CREATOR_TAB)}); | ||
| 146 | for (const tab of tabs) {{ | 146 | for (const tab of tabs) {{ |
| 147 | - if (tab.textContent.trim() === {json.dumps(tab_name)}) {{ | ||
| 148 | - // 检查是否被遮挡 | 147 | + const titleSpan = tab.querySelector('span.title'); |
| 148 | + const tabText = titleSpan ? titleSpan.textContent.trim() : tab.textContent.trim(); | ||
| 149 | + if (tabText === {json.dumps(tab_name)}) {{ | ||
| 149 | const rect = tab.getBoundingClientRect(); | 150 | const rect = tab.getBoundingClientRect(); |
| 151 | + const style = window.getComputedStyle(tab); | ||
| 152 | + // 跳过隐藏或被移出视口的元素 | ||
| 150 | if (rect.width === 0 || rect.height === 0) continue; | 153 | if (rect.width === 0 || rect.height === 0) continue; |
| 154 | + if (rect.left < 0 || rect.top < 0) continue; | ||
| 155 | + if (style.display === 'none' || style.visibility === 'hidden') continue; | ||
| 151 | const x = rect.left + rect.width / 2; | 156 | const x = rect.left + rect.width / 2; |
| 152 | const y = rect.top + rect.height / 2; | 157 | const y = rect.top + rect.height / 2; |
| 153 | const target = document.elementFromPoint(x, y); | 158 | const target = document.elementFromPoint(x, y); |
| @@ -158,6 +163,21 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: | @@ -158,6 +163,21 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: | ||
| 158 | return 'blocked'; | 163 | return 'blocked'; |
| 159 | }} | 164 | }} |
| 160 | }} | 165 | }} |
| 166 | + | ||
| 167 | + // 策略2: 查找任意包含目标文本的元素 | ||
| 168 | + const allElements = document.querySelectorAll('*'); | ||
| 169 | + for (const el of allElements) {{ | ||
| 170 | + if (el.children.length === 0 && el.textContent.trim() === {json.dumps(tab_name)}) {{ | ||
| 171 | + const rect = el.getBoundingClientRect(); | ||
| 172 | + const style = window.getComputedStyle(el); | ||
| 173 | + if (rect.width === 0 || rect.height === 0) continue; | ||
| 174 | + if (rect.left < 0 || rect.top < 0) continue; | ||
| 175 | + if (style.display === 'none' || style.visibility === 'hidden') continue; | ||
| 176 | + el.click(); | ||
| 177 | + return 'clicked'; | ||
| 178 | + }} | ||
| 179 | + }} | ||
| 180 | + | ||
| 161 | return 'not_found'; | 181 | return 'not_found'; |
| 162 | }})() | 182 | }})() |
| 163 | """ | 183 | """ |
| @@ -172,6 +192,19 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: | @@ -172,6 +192,19 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: | ||
| 172 | 192 | ||
| 173 | time.sleep(0.2) | 193 | time.sleep(0.2) |
| 174 | 194 | ||
| 195 | + # 调试:输出页面信息 | ||
| 196 | + debug_info = page.evaluate(""" | ||
| 197 | + (() => { | ||
| 198 | + const creatorTabs = document.querySelectorAll('div.creator-tab'); | ||
| 199 | + const tabTexts = Array.from(creatorTabs).map(t => ({ | ||
| 200 | + text: t.textContent.trim(), | ||
| 201 | + html: t.outerHTML.substring(0, 200) | ||
| 202 | + })); | ||
| 203 | + const url = window.location.href; | ||
| 204 | + return JSON.stringify({url, tabCount: creatorTabs.length, tabs: tabTexts}); | ||
| 205 | + })() | ||
| 206 | + """) | ||
| 207 | + logger.error("调试信息: %s", debug_info) | ||
| 175 | raise PublishError(f"没有找到发布 TAB - {tab_name}") | 208 | raise PublishError(f"没有找到发布 TAB - {tab_name}") |
| 176 | 209 | ||
| 177 | 210 | ||
| @@ -223,6 +256,34 @@ def _wait_for_upload_complete(page: Page, expected_count: int) -> None: | @@ -223,6 +256,34 @@ def _wait_for_upload_complete(page: Page, expected_count: int) -> None: | ||
| 223 | # ========== 表单提交 ========== | 256 | # ========== 表单提交 ========== |
| 224 | 257 | ||
| 225 | 258 | ||
| 259 | +def _extract_hashtags_from_content(content: str, tags: list[str]) -> tuple[str, list[str]]: | ||
| 260 | + """从正文末尾提取 hashtag 行,合并到 tags 列表。 | ||
| 261 | + | ||
| 262 | + Returns: | ||
| 263 | + (cleaned_content, merged_tags) | ||
| 264 | + """ | ||
| 265 | + lines = content.rstrip().split("\n") | ||
| 266 | + # 检查最后一行是否全是 #tag 格式 | ||
| 267 | + if lines: | ||
| 268 | + last_line = lines[-1].strip() | ||
| 269 | + hashtag_pattern = re.compile(r"^(#\S+\s*)+$") | ||
| 270 | + if hashtag_pattern.match(last_line): | ||
| 271 | + # 提取 hashtag | ||
| 272 | + extracted = re.findall(r"#(\S+)", last_line) | ||
| 273 | + # 合并到 tags(去重) | ||
| 274 | + existing = {t.lstrip("#") for t in tags} | ||
| 275 | + merged = list(tags) | ||
| 276 | + for t in extracted: | ||
| 277 | + if t not in existing: | ||
| 278 | + merged.append(t) | ||
| 279 | + existing.add(t) | ||
| 280 | + # 去掉最后一行 | ||
| 281 | + cleaned = "\n".join(lines[:-1]).rstrip() | ||
| 282 | + logger.info("从正文末尾提取 %d 个标签,合并后共 %d 个", len(extracted), len(merged)) | ||
| 283 | + return cleaned, merged | ||
| 284 | + return content, list(tags) | ||
| 285 | + | ||
| 286 | + | ||
| 226 | def _fill_publish_form( | 287 | def _fill_publish_form( |
| 227 | page: Page, | 288 | page: Page, |
| 228 | title: str, | 289 | title: str, |
| @@ -233,6 +294,9 @@ def _fill_publish_form( | @@ -233,6 +294,9 @@ def _fill_publish_form( | ||
| 233 | visibility: str, | 294 | visibility: str, |
| 234 | ) -> None: | 295 | ) -> None: |
| 235 | """填写表单(不点击发布)。""" | 296 | """填写表单(不点击发布)。""" |
| 297 | + # 从正文末尾提取 hashtag 并合并到 tags | ||
| 298 | + content, tags = _extract_hashtags_from_content(content, tags) | ||
| 299 | + | ||
| 236 | # 标题 | 300 | # 标题 |
| 237 | page.input_text(TITLE_INPUT, title) | 301 | page.input_text(TITLE_INPUT, title) |
| 238 | time.sleep(0.5) | 302 | time.sleep(0.5) |
| @@ -334,6 +398,10 @@ def _input_tags(page: Page, content_selector: str, tags: list[str]) -> None: | @@ -334,6 +398,10 @@ def _input_tags(page: Page, content_selector: str, tags: list[str]) -> None: | ||
| 334 | """输入标签。""" | 398 | """输入标签。""" |
| 335 | time.sleep(1) | 399 | time.sleep(1) |
| 336 | 400 | ||
| 401 | + # 先点击正文编辑器,确保焦点在正文而非标题 | ||
| 402 | + page.click_element(content_selector) | ||
| 403 | + time.sleep(0.3) | ||
| 404 | + | ||
| 337 | # 移动光标到正文末尾(20次 ArrowDown) | 405 | # 移动光标到正文末尾(20次 ArrowDown) |
| 338 | for _ in range(20): | 406 | for _ in range(20): |
| 339 | page.press_key("ArrowDown") | 407 | page.press_key("ArrowDown") |
| @@ -353,27 +421,32 @@ def _input_single_tag(page: Page, content_selector: str, tag: str) -> None: | @@ -353,27 +421,32 @@ def _input_single_tag(page: Page, content_selector: str, tag: str) -> None: | ||
| 353 | """输入单个标签。""" | 421 | """输入单个标签。""" |
| 354 | # 输入 # | 422 | # 输入 # |
| 355 | page.type_text("#", delay_ms=0) | 423 | page.type_text("#", delay_ms=0) |
| 356 | - time.sleep(0.2) | 424 | + time.sleep(0.3) |
| 357 | 425 | ||
| 358 | - # 逐字输入标签 | 426 | + # 逐字输入标签(随机间隔模拟真实输入) |
| 359 | for char in tag: | 427 | for char in tag: |
| 360 | - page.type_text(char, delay_ms=50) | 428 | + page.type_text(char, delay_ms=0) |
| 429 | + time.sleep(random.uniform(0.05, 0.12)) | ||
| 361 | 430 | ||
| 362 | - time.sleep(1) | 431 | + # 等待标签联想出现(最多 3 秒) |
| 432 | + deadline = time.monotonic() + 3.0 | ||
| 433 | + clicked = False | ||
| 434 | + while time.monotonic() < deadline: | ||
| 435 | + time.sleep(0.5) | ||
| 436 | + if page.has_element(TAG_TOPIC_CONTAINER): | ||
| 437 | + item_selector = f"{TAG_TOPIC_CONTAINER} {TAG_FIRST_ITEM}" | ||
| 438 | + if page.has_element(item_selector): | ||
| 439 | + page.click_element(item_selector) | ||
| 440 | + logger.info("点击标签联想: %s", tag) | ||
| 441 | + clicked = True | ||
| 442 | + break | ||
| 363 | 443 | ||
| 364 | - # 尝试点击标签联想 | ||
| 365 | - if page.has_element(TAG_TOPIC_CONTAINER): | ||
| 366 | - item_selector = f"{TAG_TOPIC_CONTAINER} {TAG_FIRST_ITEM}" | ||
| 367 | - if page.has_element(item_selector): | ||
| 368 | - page.click_element(item_selector) | ||
| 369 | - logger.info("点击标签联想: %s", tag) | ||
| 370 | - time.sleep(0.5) | ||
| 371 | - return | 444 | + if not clicked: |
| 445 | + # 没有联想,直接空格 | ||
| 446 | + logger.warning("未找到标签联想,直接输入空格: %s", tag) | ||
| 447 | + page.type_text(" ", delay_ms=0) | ||
| 372 | 448 | ||
| 373 | - # 没有联想,直接空格 | ||
| 374 | - logger.warning("未找到标签联想,直接输入空格: %s", tag) | ||
| 375 | - page.type_text(" ", delay_ms=0) | ||
| 376 | - time.sleep(0.5) | 449 | + time.sleep(0.8) |
| 377 | 450 | ||
| 378 | 451 | ||
| 379 | # ========== 定时发布 ========== | 452 | # ========== 定时发布 ========== |
-
Please register or login to post a comment