catalog.py 2.79 KB
from __future__ import annotations

from typing import Any, Dict

PLATFORM_OPTIONS = [
    {"value": "xhs", "label": "小红书"},
    {"value": "dy", "label": "抖音"},
    {"value": "ks", "label": "快手"},
    {"value": "bili", "label": "B站"},
    {"value": "wb", "label": "微博"},
    {"value": "tieba", "label": "贴吧"},
    {"value": "zhihu", "label": "知乎"},
]

LOGIN_TYPE_OPTIONS = [
    {"value": "qrcode", "label": "二维码登录"},
    {"value": "phone", "label": "手机号登录"},
    {"value": "cookie", "label": "Cookie 登录"},
]

CRAWLER_TYPE_OPTIONS = [
    {"value": "search", "label": "关键词搜索"},
    {"value": "detail", "label": "指定内容详情"},
    {"value": "creator", "label": "创作者主页"},
]

SAVE_OPTION_OPTIONS = [
    {"value": "postgres", "label": "PostgreSQL"},
    {"value": "db", "label": "MySQL"},
    {"value": "json", "label": "JSON 文件"},
    {"value": "csv", "label": "CSV 文件"},
    {"value": "excel", "label": "Excel 文件"},
    {"value": "sqlite", "label": "SQLite"},
]

PLATFORM_LABELS = {item["value"]: item["label"] for item in PLATFORM_OPTIONS}

PLATFORM_CAPABILITIES: Dict[str, Dict[str, Any]] = {
    "xhs": {
        "login_types": ["qrcode", "phone", "cookie"],
        "crawler_types": ["search", "detail", "creator"],
        "note": "推荐优先使用二维码登录;手机号登录需要可用的短信验证码来源。",
    },
    "dy": {
        "login_types": ["qrcode", "phone", "cookie"],
        "crawler_types": ["search", "detail", "creator"],
        "note": "抖音登录更容易触发滑块验证,长期运行建议优先使用 Cookie 登录。",
    },
    "ks": {
        "login_types": ["qrcode", "phone", "cookie"],
        "crawler_types": ["search", "detail", "creator"],
        "note": "快手支持二维码、手机号和 Cookie 登录,二维码链路相对直接。",
    },
    "bili": {
        "login_types": ["qrcode", "phone", "cookie"],
        "crawler_types": ["search", "detail", "creator"],
        "note": "B站二维码登录体验较稳定,适合首次建立浏览器登录态。",
    },
    "wb": {
        "login_types": ["qrcode", "phone", "cookie"],
        "crawler_types": ["search", "detail", "creator"],
        "note": "微博二维码和 Cookie 都可用;若有频繁失效,建议改为 Cookie 方案。",
    },
    "tieba": {
        "login_types": ["qrcode", "phone", "cookie"],
        "crawler_types": ["search", "detail", "creator"],
        "note": "贴吧二维码登录可用,适合先验证浏览器登录态是否持久化成功。",
    },
    "zhihu": {
        "login_types": ["qrcode", "phone", "cookie"],
        "crawler_types": ["search", "detail", "creator"],
        "note": "知乎支持三种登录方式;若页面频繁风控,建议优先复用已有 Cookie。",
    },
}