test_crawler_runtime.py 4.42 KB
from __future__ import annotations

from types import SimpleNamespace

import backend.crawler.runtime as crawler_runtime


def test_runtime_module_only_exports_manager_facing_helpers():
    assert not hasattr(crawler_runtime, "PROJECT_ROOT")
    assert not hasattr(crawler_runtime, "MEDIACRAWLER_PATHS")
    assert not hasattr(crawler_runtime, "RUNTIME_PATH")
    assert not hasattr(crawler_runtime, "RUNTIME_CWD")
    assert not hasattr(crawler_runtime, "build_runtime_env")
    assert not hasattr(crawler_runtime, "build_runtime_command")
    assert not hasattr(crawler_runtime, "resolve_save_data_option")
    assert not hasattr(crawler_runtime, "build_execution_context")


def test_build_completed_process_kwargs_delegates_to_runtime_service(monkeypatch):
    calls: list[dict[str, object]] = []
    monkeypatch.setattr(
        crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
        "build_completed_process_kwargs",
        lambda **kwargs: calls.append(dict(kwargs)) or {"cwd": "runtime-root"},
    )

    payload = crawler_runtime.build_completed_process_kwargs(timeout=120)

    assert payload == {"cwd": "runtime-root"}
    assert calls == [{"timeout": 120}]


def test_build_streaming_process_kwargs_delegates_to_runtime_service(monkeypatch):
    calls: list[dict[str, object]] = []
    monkeypatch.setattr(
        crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
        "build_streaming_process_kwargs",
        lambda: calls.append({"called": True}) or {"cwd": "runtime-root"},
    )

    payload = crawler_runtime.build_streaming_process_kwargs()

    assert payload == {"cwd": "runtime-root"}
    assert calls == [{"called": True}]


def test_build_login_status_command_spec_delegates_to_runtime_service(monkeypatch):
    calls: list[dict[str, object]] = []
    marker = SimpleNamespace(command=["status", "--platform", "xhs"])
    monkeypatch.setattr(
        crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
        "build_login_status_command_spec",
        lambda **kwargs: calls.append(dict(kwargs)) or marker,
    )

    payload = crawler_runtime.build_login_status_command_spec(
        platform="xhs",
        headless=True,
    )

    assert payload is marker
    assert calls == [{"platform": "xhs", "headless": True}]


def test_build_login_command_spec_delegates_to_runtime_service(monkeypatch):
    calls: list[dict[str, object]] = []
    marker = SimpleNamespace(command=["login", "--platform", "xhs"])
    monkeypatch.setattr(
        crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
        "build_login_command_spec",
        lambda **kwargs: calls.append(dict(kwargs)) or marker,
    )

    payload = crawler_runtime.build_login_command_spec(
        platform="xhs",
        login_type="phone",
        headless=False,
        cookies="cookie=value",
        phone="13800138000",
    )

    assert payload is marker
    assert calls == [
        {
            "platform": "xhs",
            "login_type": "phone",
            "headless": False,
            "cookies": "cookie=value",
            "phone": "13800138000",
        }
    ]


def test_build_crawl_command_spec_delegates_to_runtime_service(monkeypatch):
    calls: list[dict[str, object]] = []
    marker = SimpleNamespace(command=["crawl", "--platform", "xhs"])
    monkeypatch.setattr(
        crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
        "build_crawl_command_spec",
        lambda **kwargs: calls.append(dict(kwargs)) or marker,
    )

    payload = crawler_runtime.build_crawl_command_spec(
        platform="xhs",
        login_type="qrcode",
        crawler_type="search",
        keywords="museum",
        specified_ids="",
        creator_ids="author-1",
        start_page=2,
        max_notes=30,
        max_comments=15,
        enable_comments=True,
        enable_sub_comments=False,
        save_data_option="postgres",
        headless=True,
        cookies="cookie=value",
        phone="13800138000",
    )

    assert payload is marker
    assert calls == [
        {
            "platform": "xhs",
            "login_type": "qrcode",
            "crawler_type": "search",
            "keywords": "museum",
            "specified_ids": "",
            "creator_ids": "author-1",
            "start_page": 2,
            "max_notes": 30,
            "max_comments": 15,
            "enable_comments": True,
            "enable_sub_comments": False,
            "save_data_option": "postgres",
            "headless": True,
            "cookies": "cookie=value",
            "phone": "13800138000",
        }
    ]