Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
戒酒的李白
2025-02-24 17:10:23 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
930046fd5ce07b5e45609360a28e6be2f139b850
930046fd
1 parent
1180f285
Add AI-powered Spider Configuration Assistant.
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
220 additions
and
0 deletions
templates/spider_control.html
views/spider_control.py
templates/spider_control.html
View file @
930046f
...
...
@@ -103,6 +103,39 @@
</div>
</div>
<!-- AI配置助手 -->
<div
class=
"card mb-4"
>
<div
class=
"card-header"
>
<h5
class=
"mb-0"
>
<i
class=
"fas fa-robot"
></i>
AI配置助手
</h5>
</div>
<div
class=
"card-body"
>
<div
class=
"mb-3"
>
<label
for=
"aiPrompt"
class=
"form-label"
>
用自然语言描述您的爬虫需求
</label>
<textarea
class=
"form-control"
id=
"aiPrompt"
rows=
"3"
placeholder=
"例如:我想爬取最近一周关于人工智能的热门微博,重点关注转发量超过1000的内容,每个话题爬取前5页内容。"
></textarea>
</div>
<div
class=
"d-flex justify-content-between align-items-center"
>
<button
class=
"btn btn-primary"
onclick=
"generateConfig()"
>
<i
class=
"fas fa-magic"
></i>
生成配置
</button>
<div
class=
"form-check"
>
<input
class=
"form-check-input"
type=
"checkbox"
id=
"autoApply"
checked
>
<label
class=
"form-check-label"
for=
"autoApply"
>
自动应用生成的配置
</label>
</div>
</div>
<div
id=
"aiResponse"
class=
"mt-3"
style=
"display: none;"
>
<div
class=
"alert alert-info"
>
<h6
class=
"alert-heading"
>
AI助手建议:
</h6>
<p
id=
"aiSuggestion"
class=
"mb-0"
></p>
</div>
</div>
</div>
</div>
<!-- 操作按钮 -->
<div
class=
"d-flex justify-content-between mb-5"
>
<button
class=
"btn btn-primary"
onclick=
"startCrawling()"
>
...
...
@@ -286,6 +319,63 @@
updateCrawlLog
(
data
.
message
);
}
};
// AI配置生成
async
function
generateConfig
()
{
const
prompt
=
document
.
getElementById
(
'aiPrompt'
).
value
.
trim
();
if
(
!
prompt
)
{
alert
(
'请输入您的爬虫需求描述!'
);
return
;
}
const
aiResponse
=
document
.
getElementById
(
'aiResponse'
);
const
aiSuggestion
=
document
.
getElementById
(
'aiSuggestion'
);
try
{
const
response
=
await
fetch
(
'/api/spider/ai-config'
,
{
method
:
'POST'
,
headers
:
{
'Content-Type'
:
'application/json'
},
body
:
JSON
.
stringify
({
prompt
})
});
const
data
=
await
response
.
json
();
if
(
data
.
success
)
{
// 显示AI建议
aiSuggestion
.
textContent
=
data
.
suggestion
;
aiResponse
.
style
.
display
=
'block'
;
// 如果选择自动应用配置
if
(
document
.
getElementById
(
'autoApply'
).
checked
)
{
// 清除现有选择
selectedTopics
.
clear
();
// 应用新的话题
data
.
config
.
topics
.
forEach
(
topic
=>
{
selectedTopics
.
add
(
topic
);
});
// 更新参数
document
.
getElementById
(
'crawlDepth'
).
value
=
data
.
config
.
parameters
.
crawlDepth
;
document
.
getElementById
(
'interval'
).
value
=
data
.
config
.
parameters
.
interval
;
document
.
getElementById
(
'maxRetries'
).
value
=
data
.
config
.
parameters
.
maxRetries
;
document
.
getElementById
(
'timeout'
).
value
=
data
.
config
.
parameters
.
timeout
;
// 更新UI
updateSelectedTopicsList
();
// 添加提示
updateCrawlLog
(
'AI配置已自动应用'
);
}
}
else
{
throw
new
Error
(
data
.
message
);
}
}
catch
(
error
)
{
aiSuggestion
.
textContent
=
'生成配置时出错:'
+
error
.
message
;
aiResponse
.
style
.
display
=
'block'
;
}
}
</script>
</body>
</html>
\ No newline at end of file
...
...
views/spider_control.py
View file @
930046f
...
...
@@ -8,6 +8,8 @@ import asyncio
import
websockets
import
logging
from
spider.spiderData
import
SpiderData
from
openai
import
OpenAI
from
anthropic
import
Anthropic
# 创建蓝图
spider_bp
=
Blueprint
(
'spider'
,
__name__
)
...
...
@@ -211,3 +213,131 @@ async def spider_status_socket():
websocket_connections
.
remove
(
websocket
)
except
Exception
as
e
:
logger
.
error
(
f
"WebSocket连接处理失败: {e}"
)
def
get_ai_client
():
"""获取可用的AI客户端"""
# 按优先级尝试不同的AI服务
if
os
.
getenv
(
'ANTHROPIC_API_KEY'
):
return
{
'type'
:
'anthropic'
,
'client'
:
Anthropic
(
api_key
=
os
.
getenv
(
'ANTHROPIC_API_KEY'
))
}
elif
os
.
getenv
(
'OPENAI_API_KEY'
):
return
{
'type'
:
'openai'
,
'client'
:
OpenAI
(
api_key
=
os
.
getenv
(
'OPENAI_API_KEY'
))
}
else
:
raise
ValueError
(
"未找到可用的AI API密钥"
)
def
parse_ai_response
(
response_text
):
"""解析AI响应中的JSON配置"""
try
:
# 查找JSON内容
start
=
response_text
.
find
(
'{'
)
end
=
response_text
.
rfind
(
'}'
)
+
1
if
start
==
-
1
or
end
==
0
:
raise
ValueError
(
"未找到有效的JSON配置"
)
json_str
=
response_text
[
start
:
end
]
config
=
json
.
loads
(
json_str
)
# 验证配置格式
if
not
isinstance
(
config
.
get
(
'topics'
),
list
):
raise
ValueError
(
"配置必须包含话题列表"
)
parameters
=
config
.
get
(
'parameters'
,
{})
if
not
all
(
key
in
parameters
for
key
in
[
'crawlDepth'
,
'interval'
,
'maxRetries'
,
'timeout'
]):
raise
ValueError
(
"配置缺少必要的参数"
)
# 提取建议文本(JSON之前的部分)
suggestion
=
response_text
[:
start
]
.
strip
()
return
config
,
suggestion
except
Exception
as
e
:
raise
ValueError
(
f
"解析AI响应失败: {str(e)}"
)
@spider_bp.route
(
'/api/spider/ai-config'
,
methods
=
[
'POST'
])
def
generate_ai_config
():
"""使用AI生成爬虫配置"""
try
:
prompt
=
request
.
json
.
get
(
'prompt'
,
''
)
if
not
prompt
:
return
jsonify
({
'success'
:
False
,
'message'
:
'请提供爬虫需求描述'
})
# 构建AI提示
system_prompt
=
"""你是一个专业的爬虫配置助手。请根据用户的自然语言描述,生成合适的微博爬虫配置。
配置应包含以下内容:
1. 要爬取的话题列表
2. 爬虫参数(爬取深度、间隔时间、重试次数、超时时间)
请先用通俗易懂的语言解释你的配置建议,然后在最后提供一个JSON格式的具体配置。
注意:
- 爬取深度(crawlDepth)范围:1-10页
- 间隔时间(interval)范围:3-30秒
- 重试次数(maxRetries)范围:1-5次
- 超时时间(timeout)范围:10-60秒
- 所有参数都必须是整数
示例输出格式:
根据您的需求,我建议...
{
"topics": ["话题1", "话题2"],
"parameters": {
"crawlDepth": 5,
"interval": 5,
"maxRetries": 3,
"timeout": 30
}
}"""
# 获取AI客户端
ai
=
get_ai_client
()
try
:
if
ai
[
'type'
]
==
'anthropic'
:
response
=
ai
[
'client'
]
.
messages
.
create
(
model
=
"claude-3-sonnet-20240229"
,
max_tokens
=
1000
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
system_prompt
},
{
"role"
:
"user"
,
"content"
:
prompt
}
]
)
response_text
=
response
.
content
[
0
]
.
text
else
:
# OpenAI
response
=
ai
[
'client'
]
.
chat
.
completions
.
create
(
model
=
"gpt-3.5-turbo"
,
messages
=
[
{
"role"
:
"system"
,
"content"
:
system_prompt
},
{
"role"
:
"user"
,
"content"
:
prompt
}
]
)
response_text
=
response
.
choices
[
0
]
.
message
.
content
# 解析AI响应
config
,
suggestion
=
parse_ai_response
(
response_text
)
return
jsonify
({
'success'
:
True
,
'config'
:
config
,
'suggestion'
:
suggestion
})
except
Exception
as
e
:
logger
.
error
(
f
"AI服务调用失败: {e}"
)
return
jsonify
({
'success'
:
False
,
'message'
:
f
"AI配置生成失败: {str(e)}"
})
except
Exception
as
e
:
logger
.
error
(
f
"生成配置失败: {e}"
)
return
jsonify
({
'success'
:
False
,
'message'
:
str
(
e
)
})
\ No newline at end of file
...
...
Please
register
or
login
to post a comment