Angiin

fix: 修复发布流程 4 个 bug — 页面关闭、标签错位、换行丢失、标签重复

- cli.py: 分步发布命令(fill-publish/fill-publish-video/long-article/select-template/next-step)不再关闭页面,新增 _connect_existing 复用已有 tab
- cdp.py: input_content_editable 遇到 \n 时按 Enter 键,正确产生段落换行
- publish.py: _input_tags 先 focus 正文编辑器再输入标签;新增 _extract_hashtags_from_content 从正文末尾提取 hashtag 合并到 tags 去重;标签输入增加重试等待联想下拉
@@ -12,6 +12,12 @@ import json @@ -12,6 +12,12 @@ import json
12 import logging 12 import logging
13 import sys 13 import sys
14 14
  15 +# Windows 控制台默认编码(如 cp1252)不支持中文,强制 UTF-8
  16 +if sys.stdout and hasattr(sys.stdout, "reconfigure"):
  17 + sys.stdout.reconfigure(encoding="utf-8")
  18 +if sys.stderr and hasattr(sys.stderr, "reconfigure"):
  19 + sys.stderr.reconfigure(encoding="utf-8")
  20 +
15 logging.basicConfig( 21 logging.basicConfig(
16 level=logging.INFO, 22 level=logging.INFO,
17 format="%(asctime)s %(levelname)s %(name)s: %(message)s", 23 format="%(asctime)s %(levelname)s %(name)s: %(message)s",
@@ -27,14 +33,43 @@ def _output(data: dict, exit_code: int = 0) -> None: @@ -27,14 +33,43 @@ def _output(data: dict, exit_code: int = 0) -> None:
27 33
28 def _connect(args: argparse.Namespace): 34 def _connect(args: argparse.Namespace):
29 """连接到 Chrome 并返回 (browser, page)。""" 35 """连接到 Chrome 并返回 (browser, page)。"""
  36 + from chrome_launcher import ensure_chrome
30 from xhs.cdp import Browser 37 from xhs.cdp import Browser
31 38
  39 + if not ensure_chrome(port=args.port):
  40 + _output(
  41 + {"success": False, "error": "无法启动 Chrome,请检查 Chrome 是否已安装"},
  42 + exit_code=2,
  43 + )
  44 +
32 browser = Browser(host=args.host, port=args.port) 45 browser = Browser(host=args.host, port=args.port)
33 browser.connect() 46 browser.connect()
34 page = browser.new_page() 47 page = browser.new_page()
35 return browser, page 48 return browser, page
36 49
37 50
  51 +def _connect_existing(args: argparse.Namespace):
  52 + """连接到 Chrome 并复用已有页面(用于分步发布的后续步骤)。"""
  53 + from chrome_launcher import ensure_chrome
  54 + from xhs.cdp import Browser
  55 +
  56 + if not ensure_chrome(port=args.port):
  57 + _output(
  58 + {"success": False, "error": "无法连接到 Chrome"},
  59 + exit_code=2,
  60 + )
  61 +
  62 + browser = Browser(host=args.host, port=args.port)
  63 + browser.connect()
  64 + page = browser.get_existing_page()
  65 + if not page:
  66 + _output(
  67 + {"success": False, "error": "未找到已打开的页面,请先执行前置步骤"},
  68 + exit_code=2,
  69 + )
  70 + return browser, page
  71 +
  72 +
38 def _headless_fallback(port: int) -> None: 73 def _headless_fallback(port: int) -> None:
39 """Headless 模式未登录时自动降级到有窗口模式。""" 74 """Headless 模式未登录时自动降级到有窗口模式。"""
40 from chrome_launcher import restart_chrome 75 from chrome_launcher import restart_chrome
@@ -332,7 +367,7 @@ def cmd_fill_publish(args: argparse.Namespace) -> None: @@ -332,7 +367,7 @@ def cmd_fill_publish(args: argparse.Namespace) -> None:
332 } 367 }
333 ) 368 )
334 finally: 369 finally:
335 - browser.close_page(page) 370 + # 不关闭页面,让用户在浏览器中预览
336 browser.close() 371 browser.close()
337 372
338 373
@@ -368,15 +403,15 @@ def cmd_fill_publish_video(args: argparse.Namespace) -> None: @@ -368,15 +403,15 @@ def cmd_fill_publish_video(args: argparse.Namespace) -> None:
368 } 403 }
369 ) 404 )
370 finally: 405 finally:
371 - browser.close_page(page) 406 + # 不关闭页面,让用户在浏览器中预览
372 browser.close() 407 browser.close()
373 408
374 409
375 def cmd_click_publish(args: argparse.Namespace) -> None: 410 def cmd_click_publish(args: argparse.Namespace) -> None:
376 - """点击发布按钮(在用户确认后调用)。""" 411 + """点击发布按钮(在用户确认后调用)。复用已有的发布页 tab。"""
377 from xhs.publish import click_publish_button 412 from xhs.publish import click_publish_button
378 413
379 - browser, page = _connect(args) 414 + browser, page = _connect_existing(args)
380 try: 415 try:
381 click_publish_button(page) 416 click_publish_button(page)
382 _output({"success": True, "status": "发布完成"}) 417 _output({"success": True, "status": "发布完成"})
@@ -410,15 +445,15 @@ def cmd_long_article(args: argparse.Namespace) -> None: @@ -410,15 +445,15 @@ def cmd_long_article(args: argparse.Namespace) -> None:
410 } 445 }
411 ) 446 )
412 finally: 447 finally:
413 - browser.close_page(page) 448 + # 不关闭页面,后续 select-template / next-step 需要复用
414 browser.close() 449 browser.close()
415 450
416 451
417 def cmd_select_template(args: argparse.Namespace) -> None: 452 def cmd_select_template(args: argparse.Namespace) -> None:
418 - """选择排版模板。""" 453 + """选择排版模板。复用已有的长文编辑页 tab。"""
419 from xhs.publish_long_article import select_template 454 from xhs.publish_long_article import select_template
420 455
421 - browser, page = _connect(args) 456 + browser, page = _connect_existing(args)
422 try: 457 try:
423 selected = select_template(page, args.name) 458 selected = select_template(page, args.name)
424 if selected: 459 if selected:
@@ -429,23 +464,23 @@ def cmd_select_template(args: argparse.Namespace) -> None: @@ -429,23 +464,23 @@ def cmd_select_template(args: argparse.Namespace) -> None:
429 exit_code=2, 464 exit_code=2,
430 ) 465 )
431 finally: 466 finally:
432 - browser.close_page(page) 467 + # 不关闭页面,后续 next-step 需要复用
433 browser.close() 468 browser.close()
434 469
435 470
436 def cmd_next_step(args: argparse.Namespace) -> None: 471 def cmd_next_step(args: argparse.Namespace) -> None:
437 - """点击下一步 + 填写发布页描述。""" 472 + """点击下一步 + 填写发布页描述。复用已有的长文编辑页 tab。"""
438 from xhs.publish_long_article import click_next_and_fill_description 473 from xhs.publish_long_article import click_next_and_fill_description
439 474
440 with open(args.content_file, encoding="utf-8") as f: 475 with open(args.content_file, encoding="utf-8") as f:
441 description = f.read().strip() 476 description = f.read().strip()
442 477
443 - browser, page = _connect(args) 478 + browser, page = _connect_existing(args)
444 try: 479 try:
445 click_next_and_fill_description(page, description) 480 click_next_and_fill_description(page, description)
446 _output({"success": True, "status": "已进入发布页,等待确认发布"}) 481 _output({"success": True, "status": "已进入发布页,等待确认发布"})
447 finally: 482 finally:
448 - browser.close_page(page) 483 + # 不关闭页面,等待 click-publish
449 browser.close() 484 browser.close()
450 485
451 486
@@ -287,8 +287,11 @@ class Page: @@ -287,8 +287,11 @@ class Page:
287 }, 287 },
288 ) 288 )
289 time.sleep(0.1) 289 time.sleep(0.1)
290 - # 3. 逐字输入(随机 30-80ms 间隔) 290 + # 3. 逐字输入(随机 30-80ms 间隔,换行符转为 Enter 键
291 for char in text: 291 for char in text:
  292 + if char == "\n":
  293 + self.press_key("Enter")
  294 + else:
292 self._send_session( 295 self._send_session(
293 "Input.dispatchKeyEvent", 296 "Input.dispatchKeyEvent",
294 {"type": "keyDown", "text": char}, 297 {"type": "keyDown", "text": char},
@@ -5,6 +5,7 @@ from __future__ import annotations @@ -5,6 +5,7 @@ from __future__ import annotations
5 import json 5 import json
6 import logging 6 import logging
7 import random 7 import random
  8 +import re
8 import time 9 import time
9 10
10 from .cdp import Page 11 from .cdp import Page
@@ -127,27 +128,31 @@ def _navigate_to_publish_page(page: Page) -> None: @@ -127,27 +128,31 @@ def _navigate_to_publish_page(page: Page) -> None:
127 """导航到发布页面。""" 128 """导航到发布页面。"""
128 page.navigate(PUBLISH_URL) 129 page.navigate(PUBLISH_URL)
129 page.wait_for_load(timeout=300) 130 page.wait_for_load(timeout=300)
130 - time.sleep(2) 131 + time.sleep(3)
131 page.wait_dom_stable() 132 page.wait_dom_stable()
132 - time.sleep(1) 133 + time.sleep(2)
133 134
134 135
135 def _click_publish_tab(page: Page, tab_name: str) -> None: 136 def _click_publish_tab(page: Page, tab_name: str) -> None:
136 """点击发布页 TAB(上传图文/上传视频)。""" 137 """点击发布页 TAB(上传图文/上传视频)。"""
137 - page.wait_for_element(UPLOAD_CONTENT, timeout=15)  
138 -  
139 deadline = time.monotonic() + 15 138 deadline = time.monotonic() + 15
140 while time.monotonic() < deadline: 139 while time.monotonic() < deadline:
141 - # 查找匹配的 TAB 140 + # 查找匹配的 TAB(支持多种结构)
142 found = page.evaluate( 141 found = page.evaluate(
143 f""" 142 f"""
144 (() => {{ 143 (() => {{
145 - const tabs = document.querySelectorAll({json.dumps(CREATOR_TAB)}); 144 + // 策略1: 查找 div.creator-tab(过滤隐藏元素)
  145 + let tabs = document.querySelectorAll({json.dumps(CREATOR_TAB)});
146 for (const tab of tabs) {{ 146 for (const tab of tabs) {{
147 - if (tab.textContent.trim() === {json.dumps(tab_name)}) {{  
148 - // 检查是否被遮挡 147 + const titleSpan = tab.querySelector('span.title');
  148 + const tabText = titleSpan ? titleSpan.textContent.trim() : tab.textContent.trim();
  149 + if (tabText === {json.dumps(tab_name)}) {{
149 const rect = tab.getBoundingClientRect(); 150 const rect = tab.getBoundingClientRect();
  151 + const style = window.getComputedStyle(tab);
  152 + // 跳过隐藏或被移出视口的元素
150 if (rect.width === 0 || rect.height === 0) continue; 153 if (rect.width === 0 || rect.height === 0) continue;
  154 + if (rect.left < 0 || rect.top < 0) continue;
  155 + if (style.display === 'none' || style.visibility === 'hidden') continue;
151 const x = rect.left + rect.width / 2; 156 const x = rect.left + rect.width / 2;
152 const y = rect.top + rect.height / 2; 157 const y = rect.top + rect.height / 2;
153 const target = document.elementFromPoint(x, y); 158 const target = document.elementFromPoint(x, y);
@@ -158,6 +163,21 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: @@ -158,6 +163,21 @@ def _click_publish_tab(page: Page, tab_name: str) -> None:
158 return 'blocked'; 163 return 'blocked';
159 }} 164 }}
160 }} 165 }}
  166 +
  167 + // 策略2: 查找任意包含目标文本的元素
  168 + const allElements = document.querySelectorAll('*');
  169 + for (const el of allElements) {{
  170 + if (el.children.length === 0 && el.textContent.trim() === {json.dumps(tab_name)}) {{
  171 + const rect = el.getBoundingClientRect();
  172 + const style = window.getComputedStyle(el);
  173 + if (rect.width === 0 || rect.height === 0) continue;
  174 + if (rect.left < 0 || rect.top < 0) continue;
  175 + if (style.display === 'none' || style.visibility === 'hidden') continue;
  176 + el.click();
  177 + return 'clicked';
  178 + }}
  179 + }}
  180 +
161 return 'not_found'; 181 return 'not_found';
162 }})() 182 }})()
163 """ 183 """
@@ -172,6 +192,19 @@ def _click_publish_tab(page: Page, tab_name: str) -> None: @@ -172,6 +192,19 @@ def _click_publish_tab(page: Page, tab_name: str) -> None:
172 192
173 time.sleep(0.2) 193 time.sleep(0.2)
174 194
  195 + # 调试:输出页面信息
  196 + debug_info = page.evaluate("""
  197 + (() => {
  198 + const creatorTabs = document.querySelectorAll('div.creator-tab');
  199 + const tabTexts = Array.from(creatorTabs).map(t => ({
  200 + text: t.textContent.trim(),
  201 + html: t.outerHTML.substring(0, 200)
  202 + }));
  203 + const url = window.location.href;
  204 + return JSON.stringify({url, tabCount: creatorTabs.length, tabs: tabTexts});
  205 + })()
  206 + """)
  207 + logger.error("调试信息: %s", debug_info)
175 raise PublishError(f"没有找到发布 TAB - {tab_name}") 208 raise PublishError(f"没有找到发布 TAB - {tab_name}")
176 209
177 210
@@ -223,6 +256,34 @@ def _wait_for_upload_complete(page: Page, expected_count: int) -> None: @@ -223,6 +256,34 @@ def _wait_for_upload_complete(page: Page, expected_count: int) -> None:
223 # ========== 表单提交 ========== 256 # ========== 表单提交 ==========
224 257
225 258
  259 +def _extract_hashtags_from_content(content: str, tags: list[str]) -> tuple[str, list[str]]:
  260 + """从正文末尾提取 hashtag 行,合并到 tags 列表。
  261 +
  262 + Returns:
  263 + (cleaned_content, merged_tags)
  264 + """
  265 + lines = content.rstrip().split("\n")
  266 + # 检查最后一行是否全是 #tag 格式
  267 + if lines:
  268 + last_line = lines[-1].strip()
  269 + hashtag_pattern = re.compile(r"^(#\S+\s*)+$")
  270 + if hashtag_pattern.match(last_line):
  271 + # 提取 hashtag
  272 + extracted = re.findall(r"#(\S+)", last_line)
  273 + # 合并到 tags(去重)
  274 + existing = {t.lstrip("#") for t in tags}
  275 + merged = list(tags)
  276 + for t in extracted:
  277 + if t not in existing:
  278 + merged.append(t)
  279 + existing.add(t)
  280 + # 去掉最后一行
  281 + cleaned = "\n".join(lines[:-1]).rstrip()
  282 + logger.info("从正文末尾提取 %d 个标签,合并后共 %d 个", len(extracted), len(merged))
  283 + return cleaned, merged
  284 + return content, list(tags)
  285 +
  286 +
226 def _fill_publish_form( 287 def _fill_publish_form(
227 page: Page, 288 page: Page,
228 title: str, 289 title: str,
@@ -233,6 +294,9 @@ def _fill_publish_form( @@ -233,6 +294,9 @@ def _fill_publish_form(
233 visibility: str, 294 visibility: str,
234 ) -> None: 295 ) -> None:
235 """填写表单(不点击发布)。""" 296 """填写表单(不点击发布)。"""
  297 + # 从正文末尾提取 hashtag 并合并到 tags
  298 + content, tags = _extract_hashtags_from_content(content, tags)
  299 +
236 # 标题 300 # 标题
237 page.input_text(TITLE_INPUT, title) 301 page.input_text(TITLE_INPUT, title)
238 time.sleep(0.5) 302 time.sleep(0.5)
@@ -334,6 +398,10 @@ def _input_tags(page: Page, content_selector: str, tags: list[str]) -> None: @@ -334,6 +398,10 @@ def _input_tags(page: Page, content_selector: str, tags: list[str]) -> None:
334 """输入标签。""" 398 """输入标签。"""
335 time.sleep(1) 399 time.sleep(1)
336 400
  401 + # 先点击正文编辑器,确保焦点在正文而非标题
  402 + page.click_element(content_selector)
  403 + time.sleep(0.3)
  404 +
337 # 移动光标到正文末尾(20次 ArrowDown) 405 # 移动光标到正文末尾(20次 ArrowDown)
338 for _ in range(20): 406 for _ in range(20):
339 page.press_key("ArrowDown") 407 page.press_key("ArrowDown")
@@ -353,27 +421,32 @@ def _input_single_tag(page: Page, content_selector: str, tag: str) -> None: @@ -353,27 +421,32 @@ def _input_single_tag(page: Page, content_selector: str, tag: str) -> None:
353 """输入单个标签。""" 421 """输入单个标签。"""
354 # 输入 # 422 # 输入 #
355 page.type_text("#", delay_ms=0) 423 page.type_text("#", delay_ms=0)
356 - time.sleep(0.2) 424 + time.sleep(0.3)
357 425
358 - # 逐字输入标签 426 + # 逐字输入标签(随机间隔模拟真实输入)
359 for char in tag: 427 for char in tag:
360 - page.type_text(char, delay_ms=50)  
361 -  
362 - time.sleep(1) 428 + page.type_text(char, delay_ms=0)
  429 + time.sleep(random.uniform(0.05, 0.12))
363 430
364 - # 尝试点击标签联想 431 + # 等待标签联想出现(最多 3 秒)
  432 + deadline = time.monotonic() + 3.0
  433 + clicked = False
  434 + while time.monotonic() < deadline:
  435 + time.sleep(0.5)
365 if page.has_element(TAG_TOPIC_CONTAINER): 436 if page.has_element(TAG_TOPIC_CONTAINER):
366 item_selector = f"{TAG_TOPIC_CONTAINER} {TAG_FIRST_ITEM}" 437 item_selector = f"{TAG_TOPIC_CONTAINER} {TAG_FIRST_ITEM}"
367 if page.has_element(item_selector): 438 if page.has_element(item_selector):
368 page.click_element(item_selector) 439 page.click_element(item_selector)
369 logger.info("点击标签联想: %s", tag) 440 logger.info("点击标签联想: %s", tag)
370 - time.sleep(0.5)  
371 - return 441 + clicked = True
  442 + break
372 443
  444 + if not clicked:
373 # 没有联想,直接空格 445 # 没有联想,直接空格
374 logger.warning("未找到标签联想,直接输入空格: %s", tag) 446 logger.warning("未找到标签联想,直接输入空格: %s", tag)
375 page.type_text(" ", delay_ms=0) 447 page.type_text(" ", delay_ms=0)
376 - time.sleep(0.5) 448 +
  449 + time.sleep(0.8)
377 450
378 451
379 # ========== 定时发布 ========== 452 # ========== 定时发布 ==========