Toggle navigation
Toggle navigation
This project
Loading...
Sign in
万朱浩
/
Venue-Ops
Go to a project
Toggle navigation
Projects
Groups
Snippets
Help
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
ghmark675
2025-11-10 19:00:06 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
666ghj
2025-11-11 01:11:32 +0800
Commit
aa11c529c8707615a2f40d8a301cd740f345dbad
aa11c529
1 parent
71f4b3ad
style(sentiment_analyzer): format file
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
112 additions
and
48 deletions
InsightEngine/tools/sentiment_analyzer.py
InsightEngine/tools/sentiment_analyzer.py
View file @
aa11c52
...
...
@@ -11,6 +11,7 @@ import re
try
:
import
torch
TORCH_AVAILABLE
=
True
except
ImportError
:
torch
=
None
# type: ignore
...
...
@@ -18,6 +19,7 @@ except ImportError:
try
:
from
transformers
import
AutoTokenizer
,
AutoModelForSequenceClassification
TRANSFORMERS_AVAILABLE
=
True
except
ImportError
:
AutoTokenizer
=
None
# type: ignore
...
...
@@ -28,6 +30,7 @@ except ImportError:
# INFO:若想跳过情感分析,可手动切换此开关为False
SENTIMENT_ANALYSIS_ENABLED
=
True
def
_describe_missing_dependencies
()
->
str
:
missing
=
[]
if
not
TORCH_AVAILABLE
:
...
...
@@ -36,14 +39,21 @@ def _describe_missing_dependencies() -> str:
missing
.
append
(
"Transformers"
)
return
" / "
.
join
(
missing
)
# 添加项目根目录到路径,以便导入WeiboMultilingualSentiment
project_root
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))))
weibo_sentiment_path
=
os
.
path
.
join
(
project_root
,
"SentimentAnalysisModel"
,
"WeiboMultilingualSentiment"
)
project_root
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))
)
weibo_sentiment_path
=
os
.
path
.
join
(
project_root
,
"SentimentAnalysisModel"
,
"WeiboMultilingualSentiment"
)
sys
.
path
.
append
(
weibo_sentiment_path
)
@dataclass
class
SentimentResult
:
"""情感分析结果数据类"""
text
:
str
sentiment_label
:
str
confidence
:
float
...
...
@@ -56,6 +66,7 @@ class SentimentResult:
@dataclass
class
BatchSentimentResult
:
"""批量情感分析结果数据类"""
results
:
List
[
SentimentResult
]
total_processed
:
int
success_count
:
int
...
...
@@ -85,7 +96,7 @@ class WeiboMultilingualSentimentAnalyzer:
1
:
"负面"
,
2
:
"中性"
,
3
:
"正面"
,
4
:
"非常正面"
4
:
"非常正面"
,
}
if
not
SENTIMENT_ANALYSIS_ENABLED
:
...
...
@@ -96,9 +107,13 @@ class WeiboMultilingualSentimentAnalyzer:
if
self
.
is_disabled
:
reason
=
self
.
disable_reason
or
"Sentiment analysis disabled."
print
(
f
"WeiboMultilingualSentimentAnalyzer initialized but disabled: {reason}"
)
print
(
f
"WeiboMultilingualSentimentAnalyzer initialized but disabled: {reason}"
)
else
:
print
(
"WeiboMultilingualSentimentAnalyzer 已创建,调用 initialize() 来加载模型"
)
print
(
"WeiboMultilingualSentimentAnalyzer 已创建,调用 initialize() 来加载模型"
)
def
disable
(
self
,
reason
:
Optional
[
str
]
=
None
,
drop_state
:
bool
=
False
)
->
None
:
"""Disable sentiment analysis, optionally clearing loaded resources."""
...
...
@@ -130,7 +145,11 @@ class WeiboMultilingualSentimentAnalyzer:
if
torch
.
cuda
.
is_available
():
return
torch
.
device
(
"cuda"
)
mps_backend
=
getattr
(
torch
.
backends
,
"mps"
,
None
)
if
mps_backend
and
getattr
(
mps_backend
,
"is_available"
,
lambda
:
False
)()
and
getattr
(
mps_backend
,
"is_built"
,
lambda
:
False
)():
if
(
mps_backend
and
getattr
(
mps_backend
,
"is_available"
,
lambda
:
False
)()
and
getattr
(
mps_backend
,
"is_built"
,
lambda
:
False
)()
):
return
torch
.
device
(
"mps"
)
return
torch
.
device
(
"cpu"
)
...
...
@@ -167,12 +186,16 @@ class WeiboMultilingualSentimentAnalyzer:
if
os
.
path
.
exists
(
local_model_path
):
print
(
"从本地加载模型..."
)
self
.
tokenizer
=
AutoTokenizer
.
from_pretrained
(
local_model_path
)
self
.
model
=
AutoModelForSequenceClassification
.
from_pretrained
(
local_model_path
)
self
.
model
=
AutoModelForSequenceClassification
.
from_pretrained
(
local_model_path
)
else
:
print
(
"首次使用,正在下载模型到本地..."
)
# 下载并保存到本地
self
.
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
self
.
model
=
AutoModelForSequenceClassification
.
from_pretrained
(
model_name
)
self
.
model
=
AutoModelForSequenceClassification
.
from_pretrained
(
model_name
)
# 保存到本地
os
.
makedirs
(
local_model_path
,
exist_ok
=
True
)
...
...
@@ -227,7 +250,7 @@ class WeiboMultilingualSentimentAnalyzer:
return
""
# 去除多余空格
text
=
re
.
sub
(
r
'
\
s+'
,
' '
,
text
.
strip
())
text
=
re
.
sub
(
r
"
\
s+"
,
" "
,
text
.
strip
())
return
text
...
...
@@ -249,7 +272,7 @@ class WeiboMultilingualSentimentAnalyzer:
probability_distribution
=
{},
success
=
False
,
error_message
=
self
.
disable_reason
or
"情感分析功能已禁用"
,
analysis_performed
=
False
analysis_performed
=
False
,
)
if
not
self
.
is_initialized
:
...
...
@@ -260,7 +283,7 @@ class WeiboMultilingualSentimentAnalyzer:
probability_distribution
=
{},
success
=
False
,
error_message
=
"模型未初始化,请先调用initialize() 方法"
,
analysis_performed
=
False
analysis_performed
=
False
,
)
try
:
...
...
@@ -275,7 +298,7 @@ class WeiboMultilingualSentimentAnalyzer:
probability_distribution
=
{},
success
=
False
,
error_message
=
"输入文本为空或无效内容"
,
analysis_performed
=
False
analysis_performed
=
False
,
)
# 分词编码
...
...
@@ -284,7 +307,7 @@ class WeiboMultilingualSentimentAnalyzer:
max_length
=
512
,
padding
=
True
,
truncation
=
True
,
return_tensors
=
'pt'
return_tensors
=
"pt"
,
)
# 转移到设备
...
...
@@ -311,7 +334,7 @@ class WeiboMultilingualSentimentAnalyzer:
sentiment_label
=
label
,
confidence
=
confidence
,
probability_distribution
=
prob_dist
,
success
=
True
success
=
True
,
)
except
Exception
as
e
:
...
...
@@ -322,10 +345,12 @@ class WeiboMultilingualSentimentAnalyzer:
probability_distribution
=
{},
success
=
False
,
error_message
=
f
"预测时发生错误: {str(e)}"
,
analysis_performed
=
False
analysis_performed
=
False
,
)
def
analyze_batch
(
self
,
texts
:
List
[
str
],
show_progress
:
bool
=
True
)
->
BatchSentimentResult
:
def
analyze_batch
(
self
,
texts
:
List
[
str
],
show_progress
:
bool
=
True
)
->
BatchSentimentResult
:
"""
批量情感分析
...
...
@@ -343,7 +368,7 @@ class WeiboMultilingualSentimentAnalyzer:
success_count
=
0
,
failed_count
=
0
,
average_confidence
=
0.0
,
analysis_performed
=
not
self
.
is_disabled
and
self
.
is_initialized
analysis_performed
=
not
self
.
is_disabled
and
self
.
is_initialized
,
)
if
self
.
is_disabled
or
not
self
.
is_initialized
:
...
...
@@ -355,7 +380,7 @@ class WeiboMultilingualSentimentAnalyzer:
probability_distribution
=
{},
success
=
False
,
error_message
=
self
.
disable_reason
or
"情感分析功能不可用"
,
analysis_performed
=
False
analysis_performed
=
False
,
)
for
text
in
texts
]
...
...
@@ -365,7 +390,7 @@ class WeiboMultilingualSentimentAnalyzer:
success_count
=
0
,
failed_count
=
len
(
texts
),
average_confidence
=
0.0
,
analysis_performed
=
False
analysis_performed
=
False
,
)
results
=
[]
...
...
@@ -374,7 +399,7 @@ class WeiboMultilingualSentimentAnalyzer:
for
i
,
text
in
enumerate
(
texts
):
if
show_progress
and
len
(
texts
)
>
1
:
print
(
f
"处理进度: {i
+
1}/{len(texts)}"
)
print
(
f
"处理进度: {i
+
1}/{len(texts)}"
)
result
=
self
.
analyze_single_text
(
text
)
results
.
append
(
result
)
...
...
@@ -383,7 +408,9 @@ class WeiboMultilingualSentimentAnalyzer:
success_count
+=
1
total_confidence
+=
result
.
confidence
average_confidence
=
total_confidence
/
success_count
if
success_count
>
0
else
0.0
average_confidence
=
(
total_confidence
/
success_count
if
success_count
>
0
else
0.0
)
failed_count
=
len
(
texts
)
-
success_count
return
BatchSentimentResult
(
...
...
@@ -392,7 +419,7 @@ class WeiboMultilingualSentimentAnalyzer:
success_count
=
success_count
,
failed_count
=
failed_count
,
average_confidence
=
average_confidence
,
analysis_performed
=
True
analysis_performed
=
True
,
)
def
_build_passthrough_analysis
(
...
...
@@ -400,7 +427,7 @@ class WeiboMultilingualSentimentAnalyzer:
original_data
:
List
[
Dict
[
str
,
Any
]],
reason
:
str
,
texts
:
Optional
[
List
[
str
]]
=
None
,
results
:
Optional
[
List
[
SentimentResult
]]
=
None
results
:
Optional
[
List
[
SentimentResult
]]
=
None
,
)
->
Dict
[
str
,
Any
]:
"""
构建在情感分析不可用时的透传结果
...
...
@@ -416,7 +443,7 @@ class WeiboMultilingualSentimentAnalyzer:
"sentiment_distribution"
:
{},
"high_confidence_results"
:
[],
"summary"
:
f
"情感分析未执行:{reason}"
,
"original_texts"
:
original_data
"original_texts"
:
original_data
,
}
}
...
...
@@ -431,9 +458,12 @@ class WeiboMultilingualSentimentAnalyzer:
return
response
def
analyze_query_results
(
self
,
query_results
:
List
[
Dict
[
str
,
Any
]],
def
analyze_query_results
(
self
,
query_results
:
List
[
Dict
[
str
,
Any
]],
text_field
:
str
=
"content"
,
min_confidence
:
float
=
0.5
)
->
Dict
[
str
,
Any
]:
min_confidence
:
float
=
0.5
,
)
->
Dict
[
str
,
Any
]:
"""
对查询结果进行情感分析
专门用于分析从MediaCrawlerDB返回的查询结果
...
...
@@ -452,7 +482,7 @@ class WeiboMultilingualSentimentAnalyzer:
"total_analyzed"
:
0
,
"sentiment_distribution"
:
{},
"high_confidence_results"
:
[],
"summary"
:
"没有内容需要分析"
"summary"
:
"没有内容需要分析"
,
}
}
...
...
@@ -478,7 +508,7 @@ class WeiboMultilingualSentimentAnalyzer:
"total_analyzed"
:
0
,
"sentiment_distribution"
:
{},
"high_confidence_results"
:
[],
"summary"
:
"查询结果中没有找到可分析的文本内容"
"summary"
:
"查询结果中没有找到可分析的文本内容"
,
}
}
...
...
@@ -486,7 +516,7 @@ class WeiboMultilingualSentimentAnalyzer:
return
self
.
_build_passthrough_analysis
(
original_data
=
original_data
,
reason
=
self
.
disable_reason
or
"情感分析模型不可用"
,
texts
=
texts_to_analyze
texts
=
texts_to_analyze
,
)
# 执行批量情感分析
...
...
@@ -496,14 +526,17 @@ class WeiboMultilingualSentimentAnalyzer:
if
not
batch_result
.
analysis_performed
:
reason
=
self
.
disable_reason
or
"情感分析功能不可用"
if
batch_result
.
results
:
candidate_error
=
next
((
r
.
error_message
for
r
in
batch_result
.
results
if
r
.
error_message
),
None
)
candidate_error
=
next
(
(
r
.
error_message
for
r
in
batch_result
.
results
if
r
.
error_message
),
None
,
)
if
candidate_error
:
reason
=
candidate_error
return
self
.
_build_passthrough_analysis
(
original_data
=
original_data
,
reason
=
reason
,
texts
=
texts_to_analyze
,
results
=
batch_result
.
results
results
=
batch_result
.
results
,
)
# 统计情感分布
...
...
@@ -520,18 +553,22 @@ class WeiboMultilingualSentimentAnalyzer:
# 收集高置信度结果
if
result
.
confidence
>=
min_confidence
:
high_confidence_results
.
append
({
high_confidence_results
.
append
(
{
"original_data"
:
original_item
,
"sentiment"
:
result
.
sentiment_label
,
"confidence"
:
result
.
confidence
,
"text_preview"
:
result
.
text
[:
100
]
+
"..."
if
len
(
result
.
text
)
>
100
else
result
.
text
})
"text_preview"
:
result
.
text
[:
100
]
+
"..."
if
len
(
result
.
text
)
>
100
else
result
.
text
,
}
)
# 生成情感分析摘要
total_analyzed
=
batch_result
.
success_count
if
total_analyzed
>
0
:
dominant_sentiment
=
max
(
sentiment_distribution
.
items
(),
key
=
lambda
x
:
x
[
1
])
sentiment_summary
=
f
"共分析{total_analyzed}条内容,主要情感倾向为'{dominant_sentiment[0]}'({dominant_sentiment[1]}条,占{dominant_sentiment[1]
/total_analyzed*
100:.1f}
%
)"
sentiment_summary
=
f
"共分析{total_analyzed}条内容,主要情感倾向为'{dominant_sentiment[0]}'({dominant_sentiment[1]}条,占{dominant_sentiment[1]
/ total_analyzed *
100:.1f}
%
)"
else
:
sentiment_summary
=
"情感分析失败"
...
...
@@ -542,7 +579,7 @@ class WeiboMultilingualSentimentAnalyzer:
"average_confidence"
:
round
(
batch_result
.
average_confidence
,
4
),
"sentiment_distribution"
:
sentiment_distribution
,
"high_confidence_results"
:
high_confidence_results
,
# 返回所有高置信度结果,不做限制
"summary"
:
sentiment_summary
"summary"
:
sentiment_summary
,
}
}
...
...
@@ -556,14 +593,32 @@ class WeiboMultilingualSentimentAnalyzer:
return
{
"model_name"
:
"tabularisai/multilingual-sentiment-analysis"
,
"supported_languages"
:
[
"中文"
,
"英文"
,
"西班牙文"
,
"阿拉伯文"
,
"日文"
,
"韩文"
,
"德文"
,
"法文"
,
"意大利文"
,
"葡萄牙文"
,
"俄文"
,
"荷兰文"
,
"波兰文"
,
"土耳其文"
,
"丹麦文"
,
"希腊文"
,
"芬兰文"
,
"瑞典文"
,
"挪威文"
,
"匈牙利文"
,
"捷克文"
,
"保加利亚文"
"中文"
,
"英文"
,
"西班牙文"
,
"阿拉伯文"
,
"日文"
,
"韩文"
,
"德文"
,
"法文"
,
"意大利文"
,
"葡萄牙文"
,
"俄文"
,
"荷兰文"
,
"波兰文"
,
"土耳其文"
,
"丹麦文"
,
"希腊文"
,
"芬兰文"
,
"瑞典文"
,
"挪威文"
,
"匈牙利文"
,
"捷克文"
,
"保加利亚文"
,
],
"sentiment_levels"
:
list
(
self
.
sentiment_map
.
values
()),
"is_initialized"
:
self
.
is_initialized
,
"device"
:
str
(
self
.
device
)
if
self
.
device
else
"未设置"
"device"
:
str
(
self
.
device
)
if
self
.
device
else
"未设置"
,
}
...
...
@@ -576,13 +631,16 @@ def enable_sentiment_analysis() -> bool:
return
multilingual_sentiment_analyzer
.
enable
()
def
disable_sentiment_analysis
(
reason
:
Optional
[
str
]
=
None
,
drop_state
:
bool
=
False
)
->
None
:
def
disable_sentiment_analysis
(
reason
:
Optional
[
str
]
=
None
,
drop_state
:
bool
=
False
)
->
None
:
"""Public helper to disable sentiment analysis at runtime."""
multilingual_sentiment_analyzer
.
disable
(
reason
=
reason
,
drop_state
=
drop_state
)
def
analyze_sentiment
(
text_or_texts
:
Union
[
str
,
List
[
str
]],
initialize_if_needed
:
bool
=
True
)
->
Union
[
SentimentResult
,
BatchSentimentResult
]:
def
analyze_sentiment
(
text_or_texts
:
Union
[
str
,
List
[
str
]],
initialize_if_needed
:
bool
=
True
)
->
Union
[
SentimentResult
,
BatchSentimentResult
]:
"""
便捷的情感分析函数
...
...
@@ -614,20 +672,26 @@ if __name__ == "__main__":
if
analyzer
.
initialize
():
# 测试单个文本
result
=
analyzer
.
analyze_single_text
(
"今天天气真好,心情特别棒!"
)
print
(
f
"单个文本分析: {result.sentiment_label} (置信度: {result.confidence:.4f})"
)
print
(
f
"单个文本分析: {result.sentiment_label} (置信度: {result.confidence:.4f})"
)
# 测试批量文本
test_texts
=
[
"这家餐厅的菜味道非常棒!"
,
"服务态度太差了,很失望"
,
"I absolutely love this product!"
,
"The customer service was disappointing."
"The customer service was disappointing."
,
]
batch_result
=
analyzer
.
analyze_batch
(
test_texts
)
print
(
f
"
\n
批量分析: 成功 {batch_result.success_count}/{batch_result.total_processed}"
)
print
(
f
"
\n
批量分析: 成功 {batch_result.success_count}/{batch_result.total_processed}"
)
for
result
in
batch_result
.
results
:
print
(
f
"'{result.text[:30]}...' -> {result.sentiment_label} ({result.confidence:.4f})"
)
print
(
f
"'{result.text[:30]}...' -> {result.sentiment_label} ({result.confidence:.4f})"
)
else
:
print
(
"模型初始化失败,无法进行测试"
)
...
...
Please
register
or
login
to post a comment