Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
666ghj
2025-10-08 01:40:19 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
e70fceb3f33449756aa81cdcacde5f08f8608464
e70fceb3
1 parent
52eca47c
Optimize sentiment analysis module fault tolerance and keyword optimizer prompts.
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
173 additions
and
65 deletions
InsightEngine/agent.py
InsightEngine/tools/keyword_optimizer.py
InsightEngine/tools/sentiment_analyzer.py
InsightEngine/agent.py
View file @
e70fceb
...
...
@@ -292,13 +292,14 @@ class DeepSearchAgent:
情感分析结果字典,如果失败则返回None
"""
try
:
# 初始化情感分析器(如果尚未初始化)
if
not
self
.
sentiment_analyzer
.
is_initialized
:
# 初始化情感分析器(如果尚未初始化且未被禁用)
if
not
self
.
sentiment_analyzer
.
is_initialized
and
not
self
.
sentiment_analyzer
.
is_disabled
:
print
(
" 初始化情感分析模型..."
)
if
not
self
.
sentiment_analyzer
.
initialize
():
print
(
" ❌ 情感分析模型初始化失败"
)
return
None
print
(
" 情感分析模型初始化失败,将直接透传原始文本"
)
elif
self
.
sentiment_analyzer
.
is_disabled
:
print
(
" 情感分析功能已禁用,直接透传原始文本"
)
# 将查询结果转换为字典格式
results_dict
=
[]
for
result
in
results
:
...
...
@@ -337,34 +338,46 @@ class DeepSearchAgent:
print
(
f
" → 执行独立情感分析"
)
try
:
# 初始化情感分析器(如果尚未初始化)
if
not
self
.
sentiment_analyzer
.
is_initialized
:
# 初始化情感分析器(如果尚未初始化且未被禁用)
if
not
self
.
sentiment_analyzer
.
is_initialized
and
not
self
.
sentiment_analyzer
.
is_disabled
:
print
(
" 初始化情感分析模型..."
)
if
not
self
.
sentiment_analyzer
.
initialize
():
return
{
"success"
:
False
,
"error"
:
"情感分析模型初始化失败"
,
"results"
:
[]
}
print
(
" 情感分析模型初始化失败,将直接透传原始文本"
)
elif
self
.
sentiment_analyzer
.
is_disabled
:
print
(
" 情感分析功能已禁用,直接透传原始文本"
)
# 执行分析
if
isinstance
(
texts
,
str
):
result
=
self
.
sentiment_analyzer
.
analyze_single_text
(
texts
)
return
{
"success"
:
True
,
"total_analyzed"
:
1
,
"results"
:
[
result
.
__dict__
]
result_dict
=
result
.
__dict__
response
=
{
"success"
:
result
.
success
and
result
.
analysis_performed
,
"total_analyzed"
:
1
if
result
.
analysis_performed
and
result
.
success
else
0
,
"results"
:
[
result_dict
]
}
if
not
result
.
analysis_performed
:
response
[
"success"
]
=
False
response
[
"warning"
]
=
result
.
error_message
or
"情感分析功能不可用,已直接返回原始文本"
return
response
else
:
batch_result
=
self
.
sentiment_analyzer
.
analyze_batch
(
texts
,
show_progress
=
True
)
return
{
"success"
:
True
,
"total_analyzed"
:
batch_result
.
total_processed
,
texts_list
=
list
(
texts
)
batch_result
=
self
.
sentiment_analyzer
.
analyze_batch
(
texts_list
,
show_progress
=
True
)
response
=
{
"success"
:
batch_result
.
analysis_performed
and
batch_result
.
success_count
>
0
,
"total_analyzed"
:
batch_result
.
total_processed
if
batch_result
.
analysis_performed
else
0
,
"success_count"
:
batch_result
.
success_count
,
"failed_count"
:
batch_result
.
failed_count
,
"average_confidence"
:
batch_result
.
average_confidence
,
"average_confidence"
:
batch_result
.
average_confidence
if
batch_result
.
analysis_performed
else
0.0
,
"results"
:
[
result
.
__dict__
for
result
in
batch_result
.
results
]
}
if
not
batch_result
.
analysis_performed
:
warning
=
next
(
(
r
.
error_message
for
r
in
batch_result
.
results
if
r
.
error_message
),
"情感分析功能不可用,已直接返回原始文本"
)
response
[
"success"
]
=
False
response
[
"warning"
]
=
warning
return
response
except
Exception
as
e
:
print
(
f
" ❌ 情感分析过程中发生错误: {str(e)}"
)
...
...
@@ -486,11 +499,11 @@ class DeepSearchAgent:
search_kwargs
[
"end_date"
]
=
end_date
print
(
f
" - 时间范围: {start_date} 到 {end_date}"
)
else
:
print
(
f
"
⚠️
日期格式错误(应为YYYY-MM-DD),改用全局搜索"
)
print
(
f
" 日期格式错误(应为YYYY-MM-DD),改用全局搜索"
)
print
(
f
" 提供的日期: start_date={start_date}, end_date={end_date}"
)
search_tool
=
"search_topic_globally"
elif
search_tool
==
"search_topic_by_date"
:
print
(
f
"
⚠️
search_topic_by_date工具缺少时间参数,改用全局搜索"
)
print
(
f
" search_topic_by_date工具缺少时间参数,改用全局搜索"
)
search_tool
=
"search_topic_globally"
# 处理需要平台参数的工具
...
...
@@ -500,7 +513,7 @@ class DeepSearchAgent:
search_kwargs
[
"platform"
]
=
platform
print
(
f
" - 指定平台: {platform}"
)
else
:
print
(
f
"
⚠️
search_topic_on_platform工具缺少平台参数,改用全局搜索"
)
print
(
f
" search_topic_on_platform工具缺少平台参数,改用全局搜索"
)
search_tool
=
"search_topic_globally"
# 处理限制参数,使用配置文件中的默认值而不是agent提供的参数
...
...
@@ -615,11 +628,11 @@ class DeepSearchAgent:
search_kwargs
[
"end_date"
]
=
end_date
print
(
f
" 时间范围: {start_date} 到 {end_date}"
)
else
:
print
(
f
"
⚠️
日期格式错误(应为YYYY-MM-DD),改用全局搜索"
)
print
(
f
" 日期格式错误(应为YYYY-MM-DD),改用全局搜索"
)
print
(
f
" 提供的日期: start_date={start_date}, end_date={end_date}"
)
search_tool
=
"search_topic_globally"
elif
search_tool
==
"search_topic_by_date"
:
print
(
f
"
⚠️
search_topic_by_date工具缺少时间参数,改用全局搜索"
)
print
(
f
" search_topic_by_date工具缺少时间参数,改用全局搜索"
)
search_tool
=
"search_topic_globally"
# 处理需要平台参数的工具
...
...
@@ -629,7 +642,7 @@ class DeepSearchAgent:
search_kwargs
[
"platform"
]
=
platform
print
(
f
" 指定平台: {platform}"
)
else
:
print
(
f
"
⚠️
search_topic_on_platform工具缺少平台参数,改用全局搜索"
)
print
(
f
" search_topic_on_platform工具缺少平台参数,改用全局搜索"
)
search_tool
=
"search_topic_globally"
# 处理限制参数
...
...
InsightEngine/tools/keyword_optimizer.py
View file @
e70fceb
...
...
@@ -147,6 +147,8 @@ class KeywordOptimizer:
5. **数量控制**:最少提供10个关键词,最多提供20个关键词
6. **避免重复**:不要脱离初始查询的主题
**重要提醒**:每个关键词都必须是一个不可分割的独立词条,严禁在词条内部包含空格。例如,应使用 "雷军班争议" 而不是错误的 "雷军班 争议"。
**输出格式**:
请以JSON格式返回结果:
{
...
...
InsightEngine/tools/sentiment_analyzer.py
View file @
e70fceb
...
...
@@ -16,7 +16,6 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_
weibo_sentiment_path
=
os
.
path
.
join
(
project_root
,
"SentimentAnalysisModel"
,
"WeiboMultilingualSentiment"
)
sys
.
path
.
append
(
weibo_sentiment_path
)
@dataclass
class
SentimentResult
:
"""情感分析结果数据类"""
...
...
@@ -26,6 +25,7 @@ class SentimentResult:
probability_distribution
:
Dict
[
str
,
float
]
success
:
bool
=
True
error_message
:
Optional
[
str
]
=
None
analysis_performed
:
bool
=
True
@dataclass
...
...
@@ -36,6 +36,7 @@ class BatchSentimentResult:
success_count
:
int
failed_count
:
int
average_confidence
:
float
analysis_performed
:
bool
=
True
class
WeiboMultilingualSentimentAnalyzer
:
...
...
@@ -50,6 +51,7 @@ class WeiboMultilingualSentimentAnalyzer:
self
.
tokenizer
=
None
self
.
device
=
None
self
.
is_initialized
=
False
self
.
is_disabled
=
False
# 情感标签映射(5级分类)
self
.
sentiment_map
=
{
...
...
@@ -69,6 +71,10 @@ class WeiboMultilingualSentimentAnalyzer:
Returns:
是否初始化成功
"""
if
self
.
is_disabled
:
print
(
"情感分析功能已禁用,跳过模型加载"
)
return
False
if
self
.
is_initialized
:
print
(
"模型已经初始化,无需重复加载"
)
return
True
...
...
@@ -102,6 +108,7 @@ class WeiboMultilingualSentimentAnalyzer:
self
.
model
.
to
(
self
.
device
)
self
.
model
.
eval
()
self
.
is_initialized
=
True
self
.
is_disabled
=
False
print
(
f
"模型加载成功! 使用设备: {self.device}"
)
print
(
"支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言"
)
...
...
@@ -113,6 +120,11 @@ class WeiboMultilingualSentimentAnalyzer:
print
(
f
"模型加载失败: {e}"
)
print
(
"请检查网络连接或模型文件"
)
self
.
is_initialized
=
False
self
.
is_disabled
=
True
self
.
model
=
None
self
.
tokenizer
=
None
self
.
device
=
None
print
(
"情感分析功能已禁用,将直接返回原始文本内容"
)
return
False
def
_preprocess_text
(
self
,
text
:
str
)
->
str
:
...
...
@@ -144,6 +156,17 @@ class WeiboMultilingualSentimentAnalyzer:
Returns:
SentimentResult对象
"""
if
self
.
is_disabled
:
return
SentimentResult
(
text
=
text
,
sentiment_label
=
"情感分析未执行"
,
confidence
=
0.0
,
probability_distribution
=
{},
success
=
False
,
error_message
=
"情感分析功能已禁用"
,
analysis_performed
=
False
)
if
not
self
.
is_initialized
:
return
SentimentResult
(
text
=
text
,
...
...
@@ -151,13 +174,14 @@ class WeiboMultilingualSentimentAnalyzer:
confidence
=
0.0
,
probability_distribution
=
{},
success
=
False
,
error_message
=
"模型未初始化,请先调用 initialize() 方法"
error_message
=
"模型未初始化,请先调用initialize() 方法"
,
analysis_performed
=
False
)
try
:
# 预处理文本
processed_text
=
self
.
_preprocess_text
(
text
)
if
not
processed_text
:
return
SentimentResult
(
text
=
text
,
...
...
@@ -165,9 +189,10 @@ class WeiboMultilingualSentimentAnalyzer:
confidence
=
0.0
,
probability_distribution
=
{},
success
=
False
,
error_message
=
"输入文本为空或无效"
error_message
=
"输入文本为空或无效内容"
,
analysis_performed
=
False
)
# 分词编码
inputs
=
self
.
tokenizer
(
processed_text
,
...
...
@@ -176,26 +201,26 @@ class WeiboMultilingualSentimentAnalyzer:
truncation
=
True
,
return_tensors
=
'pt'
)
# 转移到设备
inputs
=
{
k
:
v
.
to
(
self
.
device
)
for
k
,
v
in
inputs
.
items
()}
# 预测
with
torch
.
no_grad
():
outputs
=
self
.
model
(
**
inputs
)
logits
=
outputs
.
logits
probabilities
=
torch
.
softmax
(
logits
,
dim
=
1
)
prediction
=
torch
.
argmax
(
probabilities
,
dim
=
1
)
.
item
()
# 构建结果
confidence
=
probabilities
[
0
][
prediction
]
.
item
()
label
=
self
.
sentiment_map
[
prediction
]
# 构建概率分布字典
prob_dist
=
{}
for
i
,
(
label_name
,
prob
)
in
enumerate
(
zip
(
self
.
sentiment_map
.
values
(),
probabilities
[
0
])
):
for
label_name
,
prob
in
zip
(
self
.
sentiment_map
.
values
(),
probabilities
[
0
]
):
prob_dist
[
label_name
]
=
prob
.
item
()
return
SentimentResult
(
text
=
text
,
sentiment_label
=
label
,
...
...
@@ -203,7 +228,7 @@ class WeiboMultilingualSentimentAnalyzer:
probability_distribution
=
prob_dist
,
success
=
True
)
except
Exception
as
e
:
return
SentimentResult
(
text
=
text
,
...
...
@@ -211,9 +236,10 @@ class WeiboMultilingualSentimentAnalyzer:
confidence
=
0.0
,
probability_distribution
=
{},
success
=
False
,
error_message
=
f
"预测时发生错误: {str(e)}"
error_message
=
f
"预测时发生错误: {str(e)}"
,
analysis_performed
=
False
)
def
analyze_batch
(
self
,
texts
:
List
[
str
],
show_progress
:
bool
=
True
)
->
BatchSentimentResult
:
"""
批量情感分析
...
...
@@ -231,7 +257,30 @@ class WeiboMultilingualSentimentAnalyzer:
total_processed
=
0
,
success_count
=
0
,
failed_count
=
0
,
average_confidence
=
0.0
average_confidence
=
0.0
,
analysis_performed
=
not
self
.
is_disabled
and
self
.
is_initialized
)
if
self
.
is_disabled
or
not
self
.
is_initialized
:
passthrough_results
=
[
SentimentResult
(
text
=
text
,
sentiment_label
=
"情感分析未执行"
,
confidence
=
0.0
,
probability_distribution
=
{},
success
=
False
,
error_message
=
"情感分析功能不可用"
,
analysis_performed
=
False
)
for
text
in
texts
]
return
BatchSentimentResult
(
results
=
passthrough_results
,
total_processed
=
len
(
texts
),
success_count
=
0
,
failed_count
=
len
(
texts
),
average_confidence
=
0.0
,
analysis_performed
=
False
)
results
=
[]
...
...
@@ -257,9 +306,46 @@ class WeiboMultilingualSentimentAnalyzer:
total_processed
=
len
(
texts
),
success_count
=
success_count
,
failed_count
=
failed_count
,
average_confidence
=
average_confidence
average_confidence
=
average_confidence
,
analysis_performed
=
True
)
def
_build_passthrough_analysis
(
self
,
original_data
:
List
[
Dict
[
str
,
Any
]],
reason
:
str
,
texts
:
Optional
[
List
[
str
]]
=
None
,
results
:
Optional
[
List
[
SentimentResult
]]
=
None
)
->
Dict
[
str
,
Any
]:
"""
构建在情感分析不可用时的透传结�?
"""
total_items
=
len
(
texts
)
if
texts
is
not
None
else
len
(
original_data
)
response
:
Dict
[
str
,
Any
]
=
{
"sentiment_analysis"
:
{
"available"
:
False
,
"reason"
:
reason
,
"total_analyzed"
:
0
,
"success_rate"
:
f
"0/{total_items}"
,
"average_confidence"
:
0.0
,
"sentiment_distribution"
:
{},
"high_confidence_results"
:
[],
"summary"
:
f
"情感分析未执行:{reason}"
,
"original_texts"
:
original_data
}
}
if
texts
is
not
None
:
response
[
"sentiment_analysis"
][
"passthrough_texts"
]
=
texts
if
results
is
not
None
:
response
[
"sentiment_analysis"
][
"results"
]
=
[
result
.
__dict__
if
isinstance
(
result
,
SentimentResult
)
else
result
for
result
in
results
]
return
response
def
analyze_query_results
(
self
,
query_results
:
List
[
Dict
[
str
,
Any
]],
text_field
:
str
=
"content"
,
min_confidence
:
float
=
0.5
)
->
Dict
[
str
,
Any
]:
...
...
@@ -311,10 +397,30 @@ class WeiboMultilingualSentimentAnalyzer:
}
}
if
self
.
is_disabled
:
return
self
.
_build_passthrough_analysis
(
original_data
=
original_data
,
reason
=
"情感分析模型不可用"
,
texts
=
texts_to_analyze
)
# 执行批量情感分析
print
(
f
"正在对{len(texts_to_analyze)}条内容进行情感分析..."
)
batch_result
=
self
.
analyze_batch
(
texts_to_analyze
,
show_progress
=
True
)
if
not
batch_result
.
analysis_performed
:
reason
=
"情感分析功能不可用"
if
batch_result
.
results
:
candidate_error
=
next
((
r
.
error_message
for
r
in
batch_result
.
results
if
r
.
error_message
),
None
)
if
candidate_error
:
reason
=
candidate_error
return
self
.
_build_passthrough_analysis
(
original_data
=
original_data
,
reason
=
reason
,
texts
=
texts_to_analyze
,
results
=
batch_result
.
results
)
# 统计情感分布
sentiment_distribution
=
{}
high_confidence_results
=
[]
...
...
@@ -392,31 +498,18 @@ def analyze_sentiment(text_or_texts: Union[str, List[str]],
Returns:
SentimentResult或BatchSentimentResult
"""
if
initialize_if_needed
and
not
multilingual_sentiment_analyzer
.
is_initialized
:
if
not
multilingual_sentiment_analyzer
.
initialize
():
# 如果初始化失败,返回失败结果
if
isinstance
(
text_or_texts
,
str
):
return
SentimentResult
(
text
=
text_or_texts
,
sentiment_label
=
"初始化失败"
,
confidence
=
0.0
,
probability_distribution
=
{},
success
=
False
,
error_message
=
"模型初始化失败"
)
else
:
return
BatchSentimentResult
(
results
=
[],
total_processed
=
0
,
success_count
=
0
,
failed_count
=
len
(
text_or_texts
),
average_confidence
=
0.0
)
if
(
initialize_if_needed
and
not
multilingual_sentiment_analyzer
.
is_initialized
and
not
multilingual_sentiment_analyzer
.
is_disabled
):
multilingual_sentiment_analyzer
.
initialize
()
if
isinstance
(
text_or_texts
,
str
):
return
multilingual_sentiment_analyzer
.
analyze_single_text
(
text_or_texts
)
else
:
return
multilingual_sentiment_analyzer
.
analyze_batch
(
text_or_texts
)
texts_list
=
list
(
text_or_texts
)
return
multilingual_sentiment_analyzer
.
analyze_batch
(
texts_list
)
if
__name__
==
"__main__"
:
...
...
Please
register
or
login
to post a comment