test_crawler_job_dto.py 2.3 KB
from __future__ import annotations

from datetime import datetime

from services.shared.dto import CrawlerJobDTO, CrawlerJobListDTO
from services.shared.models import CrawlerJob, CrawlerJobStatus, ErrorInfo, ProgressInfo


def test_crawler_job_dto_from_model_and_response_item():
    job = CrawlerJob(
        id="crawler-123",
        research_task_id="task-123",
        platform="xiaohongshu",
        keywords=["museum", "coffee"],
        status=CrawlerJobStatus.RUNNING,
        progress=ProgressInfo(
            current=3,
            total=10,
            percent=30.0,
            stage="collecting",
            message="collecting notes",
        ),
        config={"region": "shanghai"},
        result_summary={"items_collected": 12},
        last_action="collecting notes",
        error=ErrorInfo(code="RATE_LIMIT", message="slow down", retryable=True),
        created_at=datetime(2026, 4, 16, 10, 0, 0),
        updated_at=datetime(2026, 4, 16, 10, 5, 0),
        started_at=datetime(2026, 4, 16, 10, 1, 0),
        finished_at=datetime(2026, 4, 16, 10, 6, 0),
        legacy_payload={"platform_label": "Xiaohongshu"},
    )

    dto = CrawlerJobDTO.from_model(job)
    payload = dto.to_response_item()

    assert dto.id == "crawler-123"
    assert dto.status == "running"
    assert payload["job_id"] == "crawler-123"
    assert payload["platform"] == "xiaohongshu"
    assert payload["keywords"] == ["museum", "coffee"]
    assert payload["config"]["region"] == "shanghai"
    assert payload["result_summary"]["items_collected"] == 12
    assert payload["legacy_payload"]["platform_label"] == "Xiaohongshu"
    assert payload["platform_label"] == "Xiaohongshu"
    assert payload["unified_job"]["research_task_id"] == "task-123"


def test_crawler_job_list_dto_builds_history_payload():
    current = CrawlerJobDTO(
        id="crawler-2",
        research_task_id="task-1",
        platform="xiaohongshu",
        status="running",
    )
    previous = CrawlerJobDTO(
        id="crawler-1",
        research_task_id="task-1",
        platform="dianping",
        status="completed",
    )

    payload = CrawlerJobListDTO(current_job=current, jobs=[current, previous]).to_response_payload()

    assert payload["current_job"]["id"] == "crawler-2"
    assert [item["id"] for item in payload["jobs"]] == ["crawler-2", "crawler-1"]