test_crawler_runtime.py
4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from __future__ import annotations
from types import SimpleNamespace
import backend.crawler.runtime as crawler_runtime
def test_runtime_module_only_exports_manager_facing_helpers():
assert not hasattr(crawler_runtime, "PROJECT_ROOT")
assert not hasattr(crawler_runtime, "MEDIACRAWLER_PATHS")
assert not hasattr(crawler_runtime, "RUNTIME_PATH")
assert not hasattr(crawler_runtime, "RUNTIME_CWD")
assert not hasattr(crawler_runtime, "build_runtime_env")
assert not hasattr(crawler_runtime, "build_runtime_command")
assert not hasattr(crawler_runtime, "resolve_save_data_option")
assert not hasattr(crawler_runtime, "build_execution_context")
def test_build_completed_process_kwargs_delegates_to_runtime_service(monkeypatch):
calls: list[dict[str, object]] = []
monkeypatch.setattr(
crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
"build_completed_process_kwargs",
lambda **kwargs: calls.append(dict(kwargs)) or {"cwd": "runtime-root"},
)
payload = crawler_runtime.build_completed_process_kwargs(timeout=120)
assert payload == {"cwd": "runtime-root"}
assert calls == [{"timeout": 120}]
def test_build_streaming_process_kwargs_delegates_to_runtime_service(monkeypatch):
calls: list[dict[str, object]] = []
monkeypatch.setattr(
crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
"build_streaming_process_kwargs",
lambda: calls.append({"called": True}) or {"cwd": "runtime-root"},
)
payload = crawler_runtime.build_streaming_process_kwargs()
assert payload == {"cwd": "runtime-root"}
assert calls == [{"called": True}]
def test_build_login_status_command_spec_delegates_to_runtime_service(monkeypatch):
calls: list[dict[str, object]] = []
marker = SimpleNamespace(command=["status", "--platform", "xhs"])
monkeypatch.setattr(
crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
"build_login_status_command_spec",
lambda **kwargs: calls.append(dict(kwargs)) or marker,
)
payload = crawler_runtime.build_login_status_command_spec(
platform="xhs",
headless=True,
)
assert payload is marker
assert calls == [{"platform": "xhs", "headless": True}]
def test_build_login_command_spec_delegates_to_runtime_service(monkeypatch):
calls: list[dict[str, object]] = []
marker = SimpleNamespace(command=["login", "--platform", "xhs"])
monkeypatch.setattr(
crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
"build_login_command_spec",
lambda **kwargs: calls.append(dict(kwargs)) or marker,
)
payload = crawler_runtime.build_login_command_spec(
platform="xhs",
login_type="phone",
headless=False,
cookies="cookie=value",
phone="13800138000",
)
assert payload is marker
assert calls == [
{
"platform": "xhs",
"login_type": "phone",
"headless": False,
"cookies": "cookie=value",
"phone": "13800138000",
}
]
def test_build_crawl_command_spec_delegates_to_runtime_service(monkeypatch):
calls: list[dict[str, object]] = []
marker = SimpleNamespace(command=["crawl", "--platform", "xhs"])
monkeypatch.setattr(
crawler_runtime.MEDIACRAWLER_RUNTIME_SERVICE,
"build_crawl_command_spec",
lambda **kwargs: calls.append(dict(kwargs)) or marker,
)
payload = crawler_runtime.build_crawl_command_spec(
platform="xhs",
login_type="qrcode",
crawler_type="search",
keywords="museum",
specified_ids="",
creator_ids="author-1",
start_page=2,
max_notes=30,
max_comments=15,
enable_comments=True,
enable_sub_comments=False,
save_data_option="postgres",
headless=True,
cookies="cookie=value",
phone="13800138000",
)
assert payload is marker
assert calls == [
{
"platform": "xhs",
"login_type": "qrcode",
"crawler_type": "search",
"keywords": "museum",
"specified_ids": "",
"creator_ids": "author-1",
"start_page": 2,
"max_notes": 30,
"max_comments": 15,
"enable_comments": True,
"enable_sub_comments": False,
"save_data_option": "postgres",
"headless": True,
"cookies": "cookie=value",
"phone": "13800138000",
}
]