catalog.py
2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from __future__ import annotations
from typing import Any, Dict
PLATFORM_OPTIONS = [
{"value": "xhs", "label": "小红书"},
{"value": "dy", "label": "抖音"},
{"value": "ks", "label": "快手"},
{"value": "bili", "label": "B站"},
{"value": "wb", "label": "微博"},
{"value": "tieba", "label": "贴吧"},
{"value": "zhihu", "label": "知乎"},
]
LOGIN_TYPE_OPTIONS = [
{"value": "qrcode", "label": "二维码登录"},
{"value": "phone", "label": "手机号登录"},
{"value": "cookie", "label": "Cookie 登录"},
]
CRAWLER_TYPE_OPTIONS = [
{"value": "search", "label": "关键词搜索"},
{"value": "detail", "label": "指定内容详情"},
{"value": "creator", "label": "创作者主页"},
]
SAVE_OPTION_OPTIONS = [
{"value": "postgres", "label": "PostgreSQL"},
{"value": "db", "label": "MySQL"},
{"value": "json", "label": "JSON 文件"},
{"value": "csv", "label": "CSV 文件"},
{"value": "excel", "label": "Excel 文件"},
{"value": "sqlite", "label": "SQLite"},
]
PLATFORM_LABELS = {item["value"]: item["label"] for item in PLATFORM_OPTIONS}
PLATFORM_CAPABILITIES: Dict[str, Dict[str, Any]] = {
"xhs": {
"login_types": ["qrcode", "phone", "cookie"],
"crawler_types": ["search", "detail", "creator"],
"note": "推荐优先使用二维码登录;手机号登录需要可用的短信验证码来源。",
},
"dy": {
"login_types": ["qrcode", "phone", "cookie"],
"crawler_types": ["search", "detail", "creator"],
"note": "抖音登录更容易触发滑块验证,长期运行建议优先使用 Cookie 登录。",
},
"ks": {
"login_types": ["qrcode", "phone", "cookie"],
"crawler_types": ["search", "detail", "creator"],
"note": "快手支持二维码、手机号和 Cookie 登录,二维码链路相对直接。",
},
"bili": {
"login_types": ["qrcode", "phone", "cookie"],
"crawler_types": ["search", "detail", "creator"],
"note": "B站二维码登录体验较稳定,适合首次建立浏览器登录态。",
},
"wb": {
"login_types": ["qrcode", "phone", "cookie"],
"crawler_types": ["search", "detail", "creator"],
"note": "微博二维码和 Cookie 都可用;若有频繁失效,建议改为 Cookie 方案。",
},
"tieba": {
"login_types": ["qrcode", "phone", "cookie"],
"crawler_types": ["search", "detail", "creator"],
"note": "贴吧二维码登录可用,适合先验证浏览器登录态是否持久化成功。",
},
"zhihu": {
"login_types": ["qrcode", "phone", "cookie"],
"crawler_types": ["search", "detail", "creator"],
"note": "知乎支持三种登录方式;若页面频繁风控,建议优先复用已有 Cookie。",
},
}