test_crawler_managers.py 6.93 KB
from __future__ import annotations

from types import SimpleNamespace

import backend.crawler.managers as crawler_managers


class _FakePopen:
    def __init__(self, command, **kwargs) -> None:
        self.command = list(command)
        self.kwargs = kwargs
        self.stdout = None

    def poll(self):
        return None


class _FakeThread:
    def __init__(self, target=None, args=(), daemon=None) -> None:
        self.target = target
        self.args = args
        self.daemon = daemon
        self.started = False

    def start(self) -> None:
        self.started = True


def test_crawl_task_manager_uses_canonical_crawl_command_spec(monkeypatch):
    manager = crawler_managers.CrawlTaskManager(crawler_managers.LoginTaskManager())
    created_history: list[dict[str, object]] = []
    saved_configs: list[tuple[str, dict[str, object]]] = []
    crawl_specs: list[dict[str, object]] = []
    popen_kwargs_calls: list[dict[str, object]] = []

    monkeypatch.setattr(
        crawler_managers,
        "build_crawl_command_spec",
        lambda **kwargs: crawl_specs.append(dict(kwargs))
        or SimpleNamespace(
            command=["crawl", "--platform", kwargs["platform"]],
            resolved_save_data_option="db",
        ),
    )
    monkeypatch.setattr(
        crawler_managers.ui_state_store,
        "create_history_entry",
        lambda **kwargs: created_history.append(kwargs) or "history-1",
    )
    monkeypatch.setattr(
        crawler_managers.ui_state_store,
        "save_last_crawl_config",
        lambda platform, config: saved_configs.append((platform, dict(config))),
    )
    monkeypatch.setattr(
        crawler_managers,
        "build_streaming_process_kwargs",
        lambda: popen_kwargs_calls.append({"called": True})
        or {
            "cwd": "runtime-root",
            "env": {"ENV": "1"},
        },
    )
    monkeypatch.setattr(crawler_managers, "now_iso", lambda: "2026-04-20T00:00:00")
    monkeypatch.setattr(crawler_managers.subprocess, "Popen", _FakePopen)
    monkeypatch.setattr(crawler_managers.threading, "Thread", _FakeThread)

    ok, message = manager.start({"platform": "xiaohongshu", "keywords": "museum"})

    assert ok is True
    assert crawl_specs == [
        {
            "platform": "xiaohongshu",
            "login_type": "qrcode",
            "crawler_type": "search",
            "keywords": "museum",
            "specified_ids": "",
            "creator_ids": "",
            "start_page": 1,
            "max_notes": 20,
            "max_comments": 20,
            "enable_comments": True,
            "enable_sub_comments": False,
            "save_data_option": "",
            "headless": True,
            "cookies": "",
            "phone": "",
        }
    ]
    assert created_history[0]["config"]["save_option"] == "db"
    assert saved_configs == [("xiaohongshu", created_history[0]["config"])]
    assert manager.snapshot()["current_config"]["save_option"] == "db"
    assert popen_kwargs_calls == [{"called": True}]
    assert manager._process.command == ["crawl", "--platform", "xiaohongshu"]
    assert manager._process.kwargs["cwd"] == "runtime-root"
    assert manager._process.kwargs["env"] == {"ENV": "1"}
    assert isinstance(message, str)


def test_login_task_manager_uses_canonical_status_command_spec_and_completed_process_kwargs(monkeypatch):
    manager = crawler_managers.LoginTaskManager()
    run_calls: list[dict[str, object]] = []
    status_specs: list[dict[str, object]] = []
    completed_kwargs_calls: list[dict[str, object]] = []

    monkeypatch.setattr(
        crawler_managers,
        "build_login_status_command_spec",
        lambda **kwargs: status_specs.append(dict(kwargs))
        or SimpleNamespace(command=["status", "--platform", kwargs["platform"]]),
    )
    monkeypatch.setattr(
        crawler_managers,
        "build_completed_process_kwargs",
        lambda **kwargs: completed_kwargs_calls.append(dict(kwargs))
        or {
            "cwd": "runtime-root",
            "env": {"ENV": "1"},
        },
    )

    def run(command, **kwargs):
        run_calls.append(
            {
                "command": list(command),
                "cwd": kwargs["cwd"],
                "env": kwargs["env"],
            }
        )
        return SimpleNamespace(returncode=0, stdout="", stderr="")

    monkeypatch.setattr(crawler_managers.subprocess, "run", run)

    ok, payload = manager.check_login("xhs", headless=True)

    assert ok is True
    assert payload["platform"] == "xhs"
    assert status_specs == [{"platform": "xhs", "headless": True}]
    assert completed_kwargs_calls == [{"timeout": 120}]
    assert run_calls == [
        {
            "command": ["status", "--platform", "xhs"],
            "cwd": "runtime-root",
            "env": {"ENV": "1"},
        }
    ]


def test_login_task_manager_uses_canonical_login_command_spec(monkeypatch):
    manager = crawler_managers.LoginTaskManager()
    created_history: list[dict[str, object]] = []
    saved_configs: list[tuple[str, dict[str, object]]] = []
    login_specs: list[dict[str, object]] = []
    popen_kwargs_calls: list[dict[str, object]] = []

    monkeypatch.setattr(
        crawler_managers,
        "build_login_command_spec",
        lambda **kwargs: login_specs.append(dict(kwargs))
        or SimpleNamespace(command=["login", "--platform", kwargs["platform"]]),
    )
    monkeypatch.setattr(
        crawler_managers.ui_state_store,
        "create_history_entry",
        lambda **kwargs: created_history.append(kwargs) or "history-1",
    )
    monkeypatch.setattr(
        crawler_managers.ui_state_store,
        "save_last_login_config",
        lambda platform, config: saved_configs.append((platform, dict(config))),
    )
    monkeypatch.setattr(
        crawler_managers,
        "build_streaming_process_kwargs",
        lambda: popen_kwargs_calls.append({"called": True})
        or {
            "cwd": "runtime-root",
            "env": {"ENV": "1"},
        },
    )
    monkeypatch.setattr(crawler_managers, "now_iso", lambda: "2026-04-20T00:00:00")
    monkeypatch.setattr(crawler_managers.subprocess, "Popen", _FakePopen)
    monkeypatch.setattr(crawler_managers.threading, "Thread", _FakeThread)

    ok, message = manager.start_login(
        platform="xhs",
        login_type="phone",
        cookies="cookie=value",
        phone="13800138000",
        headless=False,
    )

    assert ok is True
    assert login_specs == [
        {
            "platform": "xhs",
            "login_type": "phone",
            "headless": False,
            "cookies": "cookie=value",
            "phone": "13800138000",
        }
    ]
    assert created_history[0]["config"]["login_type"] == "phone"
    assert saved_configs == [("xhs", created_history[0]["config"])]
    assert popen_kwargs_calls == [{"called": True}]
    assert manager._process.command == ["login", "--platform", "xhs"]
    assert manager._process.kwargs["cwd"] == "runtime-root"
    assert manager._process.kwargs["env"] == {"ENV": "1"}
    assert isinstance(message, str)