routes.py 5.07 KB
from __future__ import annotations

from typing import Any

from flask import Blueprint, jsonify, request

from backend import research_tasks as research_backend
from services.application.crawler import (
    CrawlerJobQueryService,
    CrawlerOperationConflictError,
    CrawlerService,
    CrawlerValidationError,
)
from services.application.research import ResearchTaskService
from services.shared.config.access import get_database_runtime_settings
from services.shared.config.app_settings import reload_settings

from .catalog import (
    CRAWLER_TYPE_OPTIONS,
    LOGIN_TYPE_OPTIONS,
    PLATFORM_CAPABILITIES,
    PLATFORM_LABELS,
    PLATFORM_OPTIONS,
    SAVE_OPTION_OPTIONS,
)
from .managers import CrawlTaskManager, LoginTaskManager
from .state_store import ui_state_store

crawler_bp = Blueprint("crawler", __name__, url_prefix="/api/crawler")

login_manager = LoginTaskManager()
crawl_manager = CrawlTaskManager(login_manager)
RESEARCH_TASK_APP_SERVICE = ResearchTaskService(research_backend.research_task_service)
CRAWLER_QUERY_SERVICE = CrawlerJobQueryService(
    crawl_state_getter=crawl_manager.snapshot,
    storage_state_getter=ui_state_store.snapshot,
)


def _get_crawler_database_runtime_settings():
    return get_database_runtime_settings(reload=True)


CRAWLER_APP_SERVICE = CrawlerService(
    crawl_manager=crawl_manager,
    login_manager=login_manager,
    ui_state_store=ui_state_store,
    crawler_query_service=CRAWLER_QUERY_SERVICE,
    valid_platforms=PLATFORM_LABELS.keys(),
    valid_login_types=[option["value"] for option in LOGIN_TYPE_OPTIONS] + ["phone"],
    reload_settings_callable=reload_settings,
    database_runtime_settings_getter=_get_crawler_database_runtime_settings,
    research_task_service=RESEARCH_TASK_APP_SERVICE,
)


def response_ok(**kwargs: Any):
    return jsonify({"success": True, **kwargs})


def response_error(message: str, status_code: int = 400):
    return jsonify({"success": False, "message": message}), status_code


def handle_service_error(exc: Exception):
    if isinstance(exc, CrawlerValidationError):
        return response_error(str(exc), 400)
    if isinstance(exc, CrawlerOperationConflictError):
        return response_error(str(exc), 409)
    raise exc


@crawler_bp.get("/options")
def get_crawler_options():
    return response_ok(
        platforms=PLATFORM_OPTIONS,
        login_types=LOGIN_TYPE_OPTIONS,
        crawler_types=CRAWLER_TYPE_OPTIONS,
        save_options=SAVE_OPTION_OPTIONS,
        platform_capabilities=PLATFORM_CAPABILITIES,
        defaults={
            "headless": True,
            "enable_comments": True,
            "enable_sub_comments": False,
            "start_page": 1,
            "max_notes": 20,
            "max_comments": 20,
            "login_type": "qrcode",
            "crawler_type": "search",
            "save_option": CRAWLER_APP_SERVICE.get_default_save_option(),
        },
    )


@crawler_bp.get("/state")
def get_crawler_state():
    return response_ok(**CRAWLER_APP_SERVICE.build_crawler_state_payload())


@crawler_bp.get("/jobs")
def list_crawler_jobs():
    return response_ok(**CRAWLER_APP_SERVICE.build_crawler_jobs_payload())


@crawler_bp.get("/jobs/<job_id>")
def get_crawler_job(job_id: str):
    payload = CRAWLER_APP_SERVICE.build_crawler_job_payload(job_id)
    if payload["crawler_job"] is None:
        return response_error("Crawler job not found", 404)
    return response_ok(**payload)


@crawler_bp.post("/login/check")
def check_platform_login():
    payload = request.get_json(silent=True) or {}
    try:
        return response_ok(
            **CRAWLER_APP_SERVICE.check_login(
                platform=payload.get("platform"),
                headless=True,
            )
        )
    except Exception as exc:
        return handle_service_error(exc)


@crawler_bp.post("/login/start")
def start_platform_login():
    payload = request.get_json(silent=True) or {}
    try:
        return response_ok(
            **CRAWLER_APP_SERVICE.start_login(
                platform=payload.get("platform"),
                login_type=payload.get("login_type") or "qrcode",
                cookies=payload.get("cookies"),
                phone=payload.get("phone"),
                headless=True,
            )
        )
    except Exception as exc:
        return handle_service_error(exc)


@crawler_bp.post("/login/cancel")
def cancel_platform_login():
    try:
        return response_ok(**CRAWLER_APP_SERVICE.cancel_login())
    except Exception as exc:
        return handle_service_error(exc)


@crawler_bp.post("/start")
def start_crawler_task():
    payload = request.get_json(silent=True) or {}
    try:
        return response_ok(**CRAWLER_APP_SERVICE.start_crawler(payload))
    except Exception as exc:
        return handle_service_error(exc)


@crawler_bp.post("/stop")
def stop_crawler_task():
    try:
        return response_ok(**CRAWLER_APP_SERVICE.stop_crawler())
    except Exception as exc:
        return handle_service_error(exc)


__all__ = [
    "crawler_bp",
    "crawl_manager",
    "login_manager",
    "CrawlTaskManager",
    "LoginTaskManager",
    "CRAWLER_APP_SERVICE",
    "CRAWLER_QUERY_SERVICE",
]