Showing
2 changed files
with
220 additions
and
0 deletions
| @@ -103,6 +103,39 @@ | @@ -103,6 +103,39 @@ | ||
| 103 | </div> | 103 | </div> |
| 104 | </div> | 104 | </div> |
| 105 | 105 | ||
| 106 | + <!-- AI配置助手 --> | ||
| 107 | + <div class="card mb-4"> | ||
| 108 | + <div class="card-header"> | ||
| 109 | + <h5 class="mb-0"> | ||
| 110 | + <i class="fas fa-robot"></i> AI配置助手 | ||
| 111 | + </h5> | ||
| 112 | + </div> | ||
| 113 | + <div class="card-body"> | ||
| 114 | + <div class="mb-3"> | ||
| 115 | + <label for="aiPrompt" class="form-label">用自然语言描述您的爬虫需求</label> | ||
| 116 | + <textarea class="form-control" id="aiPrompt" rows="3" | ||
| 117 | + placeholder="例如:我想爬取最近一周关于人工智能的热门微博,重点关注转发量超过1000的内容,每个话题爬取前5页内容。"></textarea> | ||
| 118 | + </div> | ||
| 119 | + <div class="d-flex justify-content-between align-items-center"> | ||
| 120 | + <button class="btn btn-primary" onclick="generateConfig()"> | ||
| 121 | + <i class="fas fa-magic"></i> 生成配置 | ||
| 122 | + </button> | ||
| 123 | + <div class="form-check"> | ||
| 124 | + <input class="form-check-input" type="checkbox" id="autoApply" checked> | ||
| 125 | + <label class="form-check-label" for="autoApply"> | ||
| 126 | + 自动应用生成的配置 | ||
| 127 | + </label> | ||
| 128 | + </div> | ||
| 129 | + </div> | ||
| 130 | + <div id="aiResponse" class="mt-3" style="display: none;"> | ||
| 131 | + <div class="alert alert-info"> | ||
| 132 | + <h6 class="alert-heading">AI助手建议:</h6> | ||
| 133 | + <p id="aiSuggestion" class="mb-0"></p> | ||
| 134 | + </div> | ||
| 135 | + </div> | ||
| 136 | + </div> | ||
| 137 | + </div> | ||
| 138 | + | ||
| 106 | <!-- 操作按钮 --> | 139 | <!-- 操作按钮 --> |
| 107 | <div class="d-flex justify-content-between mb-5"> | 140 | <div class="d-flex justify-content-between mb-5"> |
| 108 | <button class="btn btn-primary" onclick="startCrawling()"> | 141 | <button class="btn btn-primary" onclick="startCrawling()"> |
| @@ -286,6 +319,63 @@ | @@ -286,6 +319,63 @@ | ||
| 286 | updateCrawlLog(data.message); | 319 | updateCrawlLog(data.message); |
| 287 | } | 320 | } |
| 288 | }; | 321 | }; |
| 322 | + | ||
| 323 | + // AI配置生成 | ||
| 324 | + async function generateConfig() { | ||
| 325 | + const prompt = document.getElementById('aiPrompt').value.trim(); | ||
| 326 | + if (!prompt) { | ||
| 327 | + alert('请输入您的爬虫需求描述!'); | ||
| 328 | + return; | ||
| 329 | + } | ||
| 330 | + | ||
| 331 | + const aiResponse = document.getElementById('aiResponse'); | ||
| 332 | + const aiSuggestion = document.getElementById('aiSuggestion'); | ||
| 333 | + | ||
| 334 | + try { | ||
| 335 | + const response = await fetch('/api/spider/ai-config', { | ||
| 336 | + method: 'POST', | ||
| 337 | + headers: { | ||
| 338 | + 'Content-Type': 'application/json' | ||
| 339 | + }, | ||
| 340 | + body: JSON.stringify({ prompt }) | ||
| 341 | + }); | ||
| 342 | + | ||
| 343 | + const data = await response.json(); | ||
| 344 | + if (data.success) { | ||
| 345 | + // 显示AI建议 | ||
| 346 | + aiSuggestion.textContent = data.suggestion; | ||
| 347 | + aiResponse.style.display = 'block'; | ||
| 348 | + | ||
| 349 | + // 如果选择自动应用配置 | ||
| 350 | + if (document.getElementById('autoApply').checked) { | ||
| 351 | + // 清除现有选择 | ||
| 352 | + selectedTopics.clear(); | ||
| 353 | + | ||
| 354 | + // 应用新的话题 | ||
| 355 | + data.config.topics.forEach(topic => { | ||
| 356 | + selectedTopics.add(topic); | ||
| 357 | + }); | ||
| 358 | + | ||
| 359 | + // 更新参数 | ||
| 360 | + document.getElementById('crawlDepth').value = data.config.parameters.crawlDepth; | ||
| 361 | + document.getElementById('interval').value = data.config.parameters.interval; | ||
| 362 | + document.getElementById('maxRetries').value = data.config.parameters.maxRetries; | ||
| 363 | + document.getElementById('timeout').value = data.config.parameters.timeout; | ||
| 364 | + | ||
| 365 | + // 更新UI | ||
| 366 | + updateSelectedTopicsList(); | ||
| 367 | + | ||
| 368 | + // 添加提示 | ||
| 369 | + updateCrawlLog('AI配置已自动应用'); | ||
| 370 | + } | ||
| 371 | + } else { | ||
| 372 | + throw new Error(data.message); | ||
| 373 | + } | ||
| 374 | + } catch (error) { | ||
| 375 | + aiSuggestion.textContent = '生成配置时出错:' + error.message; | ||
| 376 | + aiResponse.style.display = 'block'; | ||
| 377 | + } | ||
| 378 | + } | ||
| 289 | </script> | 379 | </script> |
| 290 | </body> | 380 | </body> |
| 291 | </html> | 381 | </html> |
| @@ -8,6 +8,8 @@ import asyncio | @@ -8,6 +8,8 @@ import asyncio | ||
| 8 | import websockets | 8 | import websockets |
| 9 | import logging | 9 | import logging |
| 10 | from spider.spiderData import SpiderData | 10 | from spider.spiderData import SpiderData |
| 11 | +from openai import OpenAI | ||
| 12 | +from anthropic import Anthropic | ||
| 11 | 13 | ||
| 12 | # 创建蓝图 | 14 | # 创建蓝图 |
| 13 | spider_bp = Blueprint('spider', __name__) | 15 | spider_bp = Blueprint('spider', __name__) |
| @@ -211,3 +213,131 @@ async def spider_status_socket(): | @@ -211,3 +213,131 @@ async def spider_status_socket(): | ||
| 211 | websocket_connections.remove(websocket) | 213 | websocket_connections.remove(websocket) |
| 212 | except Exception as e: | 214 | except Exception as e: |
| 213 | logger.error(f"WebSocket连接处理失败: {e}") | 215 | logger.error(f"WebSocket连接处理失败: {e}") |
| 216 | + | ||
| 217 | +def get_ai_client(): | ||
| 218 | + """获取可用的AI客户端""" | ||
| 219 | + # 按优先级尝试不同的AI服务 | ||
| 220 | + if os.getenv('ANTHROPIC_API_KEY'): | ||
| 221 | + return { | ||
| 222 | + 'type': 'anthropic', | ||
| 223 | + 'client': Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) | ||
| 224 | + } | ||
| 225 | + elif os.getenv('OPENAI_API_KEY'): | ||
| 226 | + return { | ||
| 227 | + 'type': 'openai', | ||
| 228 | + 'client': OpenAI(api_key=os.getenv('OPENAI_API_KEY')) | ||
| 229 | + } | ||
| 230 | + else: | ||
| 231 | + raise ValueError("未找到可用的AI API密钥") | ||
| 232 | + | ||
| 233 | +def parse_ai_response(response_text): | ||
| 234 | + """解析AI响应中的JSON配置""" | ||
| 235 | + try: | ||
| 236 | + # 查找JSON内容 | ||
| 237 | + start = response_text.find('{') | ||
| 238 | + end = response_text.rfind('}') + 1 | ||
| 239 | + if start == -1 or end == 0: | ||
| 240 | + raise ValueError("未找到有效的JSON配置") | ||
| 241 | + | ||
| 242 | + json_str = response_text[start:end] | ||
| 243 | + config = json.loads(json_str) | ||
| 244 | + | ||
| 245 | + # 验证配置格式 | ||
| 246 | + if not isinstance(config.get('topics'), list): | ||
| 247 | + raise ValueError("配置必须包含话题列表") | ||
| 248 | + | ||
| 249 | + parameters = config.get('parameters', {}) | ||
| 250 | + if not all(key in parameters for key in ['crawlDepth', 'interval', 'maxRetries', 'timeout']): | ||
| 251 | + raise ValueError("配置缺少必要的参数") | ||
| 252 | + | ||
| 253 | + # 提取建议文本(JSON之前的部分) | ||
| 254 | + suggestion = response_text[:start].strip() | ||
| 255 | + | ||
| 256 | + return config, suggestion | ||
| 257 | + except Exception as e: | ||
| 258 | + raise ValueError(f"解析AI响应失败: {str(e)}") | ||
| 259 | + | ||
| 260 | +@spider_bp.route('/api/spider/ai-config', methods=['POST']) | ||
| 261 | +def generate_ai_config(): | ||
| 262 | + """使用AI生成爬虫配置""" | ||
| 263 | + try: | ||
| 264 | + prompt = request.json.get('prompt', '') | ||
| 265 | + if not prompt: | ||
| 266 | + return jsonify({ | ||
| 267 | + 'success': False, | ||
| 268 | + 'message': '请提供爬虫需求描述' | ||
| 269 | + }) | ||
| 270 | + | ||
| 271 | + # 构建AI提示 | ||
| 272 | + system_prompt = """你是一个专业的爬虫配置助手。请根据用户的自然语言描述,生成合适的微博爬虫配置。 | ||
| 273 | +配置应包含以下内容: | ||
| 274 | +1. 要爬取的话题列表 | ||
| 275 | +2. 爬虫参数(爬取深度、间隔时间、重试次数、超时时间) | ||
| 276 | + | ||
| 277 | +请先用通俗易懂的语言解释你的配置建议,然后在最后提供一个JSON格式的具体配置。 | ||
| 278 | +注意: | ||
| 279 | +- 爬取深度(crawlDepth)范围:1-10页 | ||
| 280 | +- 间隔时间(interval)范围:3-30秒 | ||
| 281 | +- 重试次数(maxRetries)范围:1-5次 | ||
| 282 | +- 超时时间(timeout)范围:10-60秒 | ||
| 283 | +- 所有参数都必须是整数 | ||
| 284 | + | ||
| 285 | +示例输出格式: | ||
| 286 | +根据您的需求,我建议... | ||
| 287 | + | ||
| 288 | +{ | ||
| 289 | + "topics": ["话题1", "话题2"], | ||
| 290 | + "parameters": { | ||
| 291 | + "crawlDepth": 5, | ||
| 292 | + "interval": 5, | ||
| 293 | + "maxRetries": 3, | ||
| 294 | + "timeout": 30 | ||
| 295 | + } | ||
| 296 | +}""" | ||
| 297 | + | ||
| 298 | + # 获取AI客户端 | ||
| 299 | + ai = get_ai_client() | ||
| 300 | + | ||
| 301 | + try: | ||
| 302 | + if ai['type'] == 'anthropic': | ||
| 303 | + response = ai['client'].messages.create( | ||
| 304 | + model="claude-3-sonnet-20240229", | ||
| 305 | + max_tokens=1000, | ||
| 306 | + messages=[ | ||
| 307 | + {"role": "system", "content": system_prompt}, | ||
| 308 | + {"role": "user", "content": prompt} | ||
| 309 | + ] | ||
| 310 | + ) | ||
| 311 | + response_text = response.content[0].text | ||
| 312 | + else: # OpenAI | ||
| 313 | + response = ai['client'].chat.completions.create( | ||
| 314 | + model="gpt-3.5-turbo", | ||
| 315 | + messages=[ | ||
| 316 | + {"role": "system", "content": system_prompt}, | ||
| 317 | + {"role": "user", "content": prompt} | ||
| 318 | + ] | ||
| 319 | + ) | ||
| 320 | + response_text = response.choices[0].message.content | ||
| 321 | + | ||
| 322 | + # 解析AI响应 | ||
| 323 | + config, suggestion = parse_ai_response(response_text) | ||
| 324 | + | ||
| 325 | + return jsonify({ | ||
| 326 | + 'success': True, | ||
| 327 | + 'config': config, | ||
| 328 | + 'suggestion': suggestion | ||
| 329 | + }) | ||
| 330 | + | ||
| 331 | + except Exception as e: | ||
| 332 | + logger.error(f"AI服务调用失败: {e}") | ||
| 333 | + return jsonify({ | ||
| 334 | + 'success': False, | ||
| 335 | + 'message': f"AI配置生成失败: {str(e)}" | ||
| 336 | + }) | ||
| 337 | + | ||
| 338 | + except Exception as e: | ||
| 339 | + logger.error(f"生成配置失败: {e}") | ||
| 340 | + return jsonify({ | ||
| 341 | + 'success': False, | ||
| 342 | + 'message': str(e) | ||
| 343 | + }) |
-
Please register or login to post a comment