test_mediacrawler_vendor_paths.py 1.19 KB
from __future__ import annotations

import sys
import types
from pathlib import Path


PROJECT_ROOT = Path(__file__).resolve().parents[3]
MEDIACRAWLER_ROOT = PROJECT_ROOT / "vendor" / "mediacrawler"

if str(MEDIACRAWLER_ROOT) not in sys.path:
    sys.path.insert(0, str(MEDIACRAWLER_ROOT))

sys.modules.setdefault("cv2", types.SimpleNamespace())

from tools import utils


def _normalize(path: str) -> str:
    return path.replace("\\", "/")


def test_resolve_browser_user_data_dir_keeps_absolute_runtime_paths():
    runtime_path = "/runtime/crawler/browser_data/xhs_user_data_dir"

    assert _normalize(utils.resolve_browser_user_data_dir(runtime_path)) == runtime_path


def test_resolve_browser_user_data_dir_preserves_vendor_relative_fallback():
    resolved = utils.resolve_browser_user_data_dir("xhs_user_data_dir")

    assert resolved == str(MEDIACRAWLER_ROOT / "browser_data" / "xhs_user_data_dir")


def test_resolve_cdp_browser_user_data_dir_uses_same_runtime_parent_directory():
    runtime_path = "/runtime/crawler/browser_data/xhs_user_data_dir"

    assert (
        _normalize(utils.resolve_cdp_browser_user_data_dir(runtime_path))
        == "/runtime/crawler/browser_data/cdp_xhs_user_data_dir"
    )