routes.py
5.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
from __future__ import annotations
from typing import Any
from flask import Blueprint, jsonify, request
from backend import research_tasks as research_backend
from services.application.crawler import (
CrawlerJobQueryService,
CrawlerOperationConflictError,
CrawlerService,
CrawlerValidationError,
)
from services.application.research import ResearchTaskService
from services.shared.config.access import get_database_runtime_settings
from services.shared.config.app_settings import reload_settings
from .catalog import (
CRAWLER_TYPE_OPTIONS,
LOGIN_TYPE_OPTIONS,
PLATFORM_CAPABILITIES,
PLATFORM_LABELS,
PLATFORM_OPTIONS,
SAVE_OPTION_OPTIONS,
)
from .managers import CrawlTaskManager, LoginTaskManager
from .state_store import ui_state_store
crawler_bp = Blueprint("crawler", __name__, url_prefix="/api/crawler")
login_manager = LoginTaskManager()
crawl_manager = CrawlTaskManager(login_manager)
RESEARCH_TASK_APP_SERVICE = ResearchTaskService(research_backend.research_task_service)
CRAWLER_QUERY_SERVICE = CrawlerJobQueryService(
crawl_state_getter=crawl_manager.snapshot,
storage_state_getter=ui_state_store.snapshot,
)
def _get_crawler_database_runtime_settings():
return get_database_runtime_settings(reload=True)
CRAWLER_APP_SERVICE = CrawlerService(
crawl_manager=crawl_manager,
login_manager=login_manager,
ui_state_store=ui_state_store,
crawler_query_service=CRAWLER_QUERY_SERVICE,
valid_platforms=PLATFORM_LABELS.keys(),
valid_login_types=[option["value"] for option in LOGIN_TYPE_OPTIONS] + ["phone"],
reload_settings_callable=reload_settings,
database_runtime_settings_getter=_get_crawler_database_runtime_settings,
research_task_service=RESEARCH_TASK_APP_SERVICE,
)
def response_ok(**kwargs: Any):
return jsonify({"success": True, **kwargs})
def response_error(message: str, status_code: int = 400):
return jsonify({"success": False, "message": message}), status_code
def handle_service_error(exc: Exception):
if isinstance(exc, CrawlerValidationError):
return response_error(str(exc), 400)
if isinstance(exc, CrawlerOperationConflictError):
return response_error(str(exc), 409)
raise exc
@crawler_bp.get("/options")
def get_crawler_options():
return response_ok(
platforms=PLATFORM_OPTIONS,
login_types=LOGIN_TYPE_OPTIONS,
crawler_types=CRAWLER_TYPE_OPTIONS,
save_options=SAVE_OPTION_OPTIONS,
platform_capabilities=PLATFORM_CAPABILITIES,
defaults={
"headless": True,
"enable_comments": True,
"enable_sub_comments": False,
"start_page": 1,
"max_notes": 20,
"max_comments": 20,
"login_type": "qrcode",
"crawler_type": "search",
"save_option": CRAWLER_APP_SERVICE.get_default_save_option(),
},
)
@crawler_bp.get("/state")
def get_crawler_state():
return response_ok(**CRAWLER_APP_SERVICE.build_crawler_state_payload())
@crawler_bp.get("/jobs")
def list_crawler_jobs():
return response_ok(**CRAWLER_APP_SERVICE.build_crawler_jobs_payload())
@crawler_bp.get("/jobs/<job_id>")
def get_crawler_job(job_id: str):
payload = CRAWLER_APP_SERVICE.build_crawler_job_payload(job_id)
if payload["crawler_job"] is None:
return response_error("Crawler job not found", 404)
return response_ok(**payload)
@crawler_bp.post("/login/check")
def check_platform_login():
payload = request.get_json(silent=True) or {}
try:
return response_ok(
**CRAWLER_APP_SERVICE.check_login(
platform=payload.get("platform"),
headless=True,
)
)
except Exception as exc:
return handle_service_error(exc)
@crawler_bp.post("/login/start")
def start_platform_login():
payload = request.get_json(silent=True) or {}
try:
return response_ok(
**CRAWLER_APP_SERVICE.start_login(
platform=payload.get("platform"),
login_type=payload.get("login_type") or "qrcode",
cookies=payload.get("cookies"),
phone=payload.get("phone"),
headless=True,
)
)
except Exception as exc:
return handle_service_error(exc)
@crawler_bp.post("/login/cancel")
def cancel_platform_login():
try:
return response_ok(**CRAWLER_APP_SERVICE.cancel_login())
except Exception as exc:
return handle_service_error(exc)
@crawler_bp.post("/start")
def start_crawler_task():
payload = request.get_json(silent=True) or {}
try:
return response_ok(**CRAWLER_APP_SERVICE.start_crawler(payload))
except Exception as exc:
return handle_service_error(exc)
@crawler_bp.post("/stop")
def stop_crawler_task():
try:
return response_ok(**CRAWLER_APP_SERVICE.stop_crawler())
except Exception as exc:
return handle_service_error(exc)
__all__ = [
"crawler_bp",
"crawl_manager",
"login_manager",
"CrawlTaskManager",
"LoginTaskManager",
"CRAWLER_APP_SERVICE",
"CRAWLER_QUERY_SERVICE",
]