test_crawler_routes.py 8.29 KB
from __future__ import annotations

import pytest
from flask import Flask

from backend.crawler.routes import crawler_bp
class _FakeCrawlerAppService:
    def __init__(self) -> None:
        self.state_calls = 0
        self.jobs_calls = 0
        self.job_calls: list[str] = []
        self.check_login_calls: list[dict[str, object]] = []
        self.start_crawler_calls: list[dict[str, object]] = []
        self.stop_crawler_calls = 0

        self.state_payload = {
            "crawler": {"status": "running"},
            "login": {"active_task": {"running": False}},
            "storage": {"history": []},
            "crawler_jobs": {
                "current_job": {"id": "crawl-2"},
                "jobs": [{"id": "crawl-2"}, {"id": "crawl-1"}],
            },
        }
        self.jobs_payload = {
            "current_job": {"id": "crawl-2"},
            "jobs": [{"id": "crawl-2"}, {"id": "crawl-1"}],
        }
        self.job_payloads = {
            "crawl-2": {
                "job_id": "crawl-2",
                "crawler_job": {"id": "crawl-2", "status": "running"},
            },
            "missing": {
                "job_id": "missing",
                "crawler_job": None,
            },
        }
        self.check_login_result = {"platform_state": {"platform": "xiaohongshu", "logged_in": True}}
        self.start_crawler_result = {"message": "crawler started", "job_id": "crawl-2"}
        self.stop_crawler_result = {"message": "crawler stopped"}
        self.default_save_option = "postgres"

    def build_crawler_state_payload(self) -> dict[str, object]:
        self.state_calls += 1
        return dict(self.state_payload)

    def build_crawler_jobs_payload(self) -> dict[str, object]:
        self.jobs_calls += 1
        return dict(self.jobs_payload)

    def build_crawler_job_payload(self, job_id: str) -> dict[str, object]:
        self.job_calls.append(job_id)
        return dict(self.job_payloads.get(job_id, {"job_id": job_id, "crawler_job": None}))

    def check_login(self, **payload) -> dict[str, object]:
        self.check_login_calls.append(payload)
        result = self.check_login_result
        if isinstance(result, Exception):
            raise result
        return dict(result)

    def start_crawler(self, payload: dict[str, object]) -> dict[str, object]:
        self.start_crawler_calls.append(payload)
        result = self.start_crawler_result
        if isinstance(result, Exception):
            raise result
        return dict(result)

    def stop_crawler(self) -> dict[str, object]:
        self.stop_crawler_calls += 1
        result = self.stop_crawler_result
        if isinstance(result, Exception):
            raise result
        return dict(result)

    def get_default_save_option(self) -> str:
        return self.default_save_option


def _require_crawler_app_service():
    import backend.crawler.routes as crawler_routes

    if not hasattr(crawler_routes, "CRAWLER_APP_SERVICE"):
        pytest.skip("CRAWLER_APP_SERVICE route wiring has not landed in this workspace yet.")

    return crawler_routes


def _build_client():
    app = Flask(__name__)
    app.register_blueprint(crawler_bp)
    return app.test_client()


def test_crawler_jobs_route_returns_query_service_payload(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.get("/api/crawler/jobs")

    assert response.status_code == 200
    assert response.get_json() == {
        "success": True,
        "current_job": {"id": "crawl-2"},
        "jobs": [{"id": "crawl-2"}, {"id": "crawl-1"}],
    }
    assert fake_service.jobs_calls == 1


def test_crawler_job_route_returns_not_found_when_job_is_missing(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.get("/api/crawler/jobs/missing")

    assert response.status_code == 404
    assert response.get_json() == {
        "success": False,
        "message": "Crawler job not found",
    }
    assert fake_service.job_calls == ["missing"]


def test_crawler_job_route_returns_query_service_payload_when_job_exists(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.get("/api/crawler/jobs/crawl-2")

    assert response.status_code == 200
    assert response.get_json() == {
        "success": True,
        "job_id": "crawl-2",
        "crawler_job": {"id": "crawl-2", "status": "running"},
    }
    assert fake_service.job_calls == ["crawl-2"]


def test_crawler_state_route_delegates_to_app_service(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.get("/api/crawler/state")

    assert response.status_code == 200
    payload = response.get_json()
    assert payload["success"] is True
    assert payload["crawler"] == {"status": "running"}
    assert payload["login"] == {"active_task": {"running": False}}
    assert payload["storage"] == {"history": []}
    assert payload["crawler_jobs"] == {
        "current_job": {"id": "crawl-2"},
        "jobs": [{"id": "crawl-2"}, {"id": "crawl-1"}],
    }
    assert fake_service.state_calls == 1


def test_crawler_options_route_uses_app_service_default_save_option(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    fake_service.default_save_option = "postgres"
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.get("/api/crawler/options")

    assert response.status_code == 200
    payload = response.get_json()
    assert payload["success"] is True
    assert payload["defaults"]["save_option"] == "postgres"


def test_crawler_login_check_route_returns_app_service_payload(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.post("/api/crawler/login/check", json={"platform": "xiaohongshu"})

    assert response.status_code == 200
    assert response.get_json() == {
        "success": True,
        "platform_state": {"platform": "xiaohongshu", "logged_in": True},
    }
    assert fake_service.check_login_calls == [{"platform": "xiaohongshu", "headless": True}]


def test_crawler_start_route_returns_validation_error_from_app_service(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    validation_error = getattr(crawler_routes, "CrawlerValidationError", ValueError)
    fake_service.start_crawler_result = validation_error("platform is required")
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.post("/api/crawler/start", json={"platform": ""})

    assert response.status_code == 400
    assert response.get_json() == {
        "success": False,
        "message": "platform is required",
    }
    assert fake_service.start_crawler_calls == [{"platform": ""}]


def test_crawler_stop_route_returns_conflict_error_from_app_service(monkeypatch):
    crawler_routes = _require_crawler_app_service()
    fake_service = _FakeCrawlerAppService()
    conflict_error = getattr(crawler_routes, "CrawlerOperationConflictError", RuntimeError)
    fake_service.stop_crawler_result = conflict_error("crawler is not running")
    monkeypatch.setattr(crawler_routes, "CRAWLER_APP_SERVICE", fake_service, raising=False)

    client = _build_client()
    response = client.post("/api/crawler/stop")

    assert response.status_code == 409
    assert response.get_json() == {
        "success": False,
        "message": "crawler is not running",
    }
    assert fake_service.stop_crawler_calls == 1