666ghj

Optimize sentiment analysis module fault tolerance and keyword optimizer prompts.

... ... @@ -292,13 +292,14 @@ class DeepSearchAgent:
情感分析结果字典,如果失败则返回None
"""
try:
# 初始化情感分析器(如果尚未初始化)
if not self.sentiment_analyzer.is_initialized:
# 初始化情感分析器(如果尚未初始化且未被禁用)
if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled:
print(" 初始化情感分析模型...")
if not self.sentiment_analyzer.initialize():
print(" ❌ 情感分析模型初始化失败")
return None
print(" 情感分析模型初始化失败,将直接透传原始文本")
elif self.sentiment_analyzer.is_disabled:
print(" 情感分析功能已禁用,直接透传原始文本")
# 将查询结果转换为字典格式
results_dict = []
for result in results:
... ... @@ -337,34 +338,46 @@ class DeepSearchAgent:
print(f" → 执行独立情感分析")
try:
# 初始化情感分析器(如果尚未初始化)
if not self.sentiment_analyzer.is_initialized:
# 初始化情感分析器(如果尚未初始化且未被禁用)
if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled:
print(" 初始化情感分析模型...")
if not self.sentiment_analyzer.initialize():
return {
"success": False,
"error": "情感分析模型初始化失败",
"results": []
}
print(" 情感分析模型初始化失败,将直接透传原始文本")
elif self.sentiment_analyzer.is_disabled:
print(" 情感分析功能已禁用,直接透传原始文本")
# 执行分析
if isinstance(texts, str):
result = self.sentiment_analyzer.analyze_single_text(texts)
return {
"success": True,
"total_analyzed": 1,
"results": [result.__dict__]
result_dict = result.__dict__
response = {
"success": result.success and result.analysis_performed,
"total_analyzed": 1 if result.analysis_performed and result.success else 0,
"results": [result_dict]
}
if not result.analysis_performed:
response["success"] = False
response["warning"] = result.error_message or "情感分析功能不可用,已直接返回原始文本"
return response
else:
batch_result = self.sentiment_analyzer.analyze_batch(texts, show_progress=True)
return {
"success": True,
"total_analyzed": batch_result.total_processed,
texts_list = list(texts)
batch_result = self.sentiment_analyzer.analyze_batch(texts_list, show_progress=True)
response = {
"success": batch_result.analysis_performed and batch_result.success_count > 0,
"total_analyzed": batch_result.total_processed if batch_result.analysis_performed else 0,
"success_count": batch_result.success_count,
"failed_count": batch_result.failed_count,
"average_confidence": batch_result.average_confidence,
"average_confidence": batch_result.average_confidence if batch_result.analysis_performed else 0.0,
"results": [result.__dict__ for result in batch_result.results]
}
if not batch_result.analysis_performed:
warning = next(
(r.error_message for r in batch_result.results if r.error_message),
"情感分析功能不可用,已直接返回原始文本"
)
response["success"] = False
response["warning"] = warning
return response
except Exception as e:
print(f" ❌ 情感分析过程中发生错误: {str(e)}")
... ... @@ -486,11 +499,11 @@ class DeepSearchAgent:
search_kwargs["end_date"] = end_date
print(f" - 时间范围: {start_date} 到 {end_date}")
else:
print(f" ⚠️ 日期格式错误(应为YYYY-MM-DD),改用全局搜索")
print(f" 日期格式错误(应为YYYY-MM-DD),改用全局搜索")
print(f" 提供的日期: start_date={start_date}, end_date={end_date}")
search_tool = "search_topic_globally"
elif search_tool == "search_topic_by_date":
print(f" ⚠️ search_topic_by_date工具缺少时间参数,改用全局搜索")
print(f" search_topic_by_date工具缺少时间参数,改用全局搜索")
search_tool = "search_topic_globally"
# 处理需要平台参数的工具
... ... @@ -500,7 +513,7 @@ class DeepSearchAgent:
search_kwargs["platform"] = platform
print(f" - 指定平台: {platform}")
else:
print(f" ⚠️ search_topic_on_platform工具缺少平台参数,改用全局搜索")
print(f" search_topic_on_platform工具缺少平台参数,改用全局搜索")
search_tool = "search_topic_globally"
# 处理限制参数,使用配置文件中的默认值而不是agent提供的参数
... ... @@ -615,11 +628,11 @@ class DeepSearchAgent:
search_kwargs["end_date"] = end_date
print(f" 时间范围: {start_date} 到 {end_date}")
else:
print(f" ⚠️ 日期格式错误(应为YYYY-MM-DD),改用全局搜索")
print(f" 日期格式错误(应为YYYY-MM-DD),改用全局搜索")
print(f" 提供的日期: start_date={start_date}, end_date={end_date}")
search_tool = "search_topic_globally"
elif search_tool == "search_topic_by_date":
print(f" ⚠️ search_topic_by_date工具缺少时间参数,改用全局搜索")
print(f" search_topic_by_date工具缺少时间参数,改用全局搜索")
search_tool = "search_topic_globally"
# 处理需要平台参数的工具
... ... @@ -629,7 +642,7 @@ class DeepSearchAgent:
search_kwargs["platform"] = platform
print(f" 指定平台: {platform}")
else:
print(f" ⚠️ search_topic_on_platform工具缺少平台参数,改用全局搜索")
print(f" search_topic_on_platform工具缺少平台参数,改用全局搜索")
search_tool = "search_topic_globally"
# 处理限制参数
... ...
... ... @@ -147,6 +147,8 @@ class KeywordOptimizer:
5. **数量控制**:最少提供10个关键词,最多提供20个关键词
6. **避免重复**:不要脱离初始查询的主题
**重要提醒**:每个关键词都必须是一个不可分割的独立词条,严禁在词条内部包含空格。例如,应使用 "雷军班争议" 而不是错误的 "雷军班 争议"。
**输出格式**:
请以JSON格式返回结果:
{
... ...
... ... @@ -16,7 +16,6 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_
weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment")
sys.path.append(weibo_sentiment_path)
@dataclass
class SentimentResult:
"""情感分析结果数据类"""
... ... @@ -26,6 +25,7 @@ class SentimentResult:
probability_distribution: Dict[str, float]
success: bool = True
error_message: Optional[str] = None
analysis_performed: bool = True
@dataclass
... ... @@ -36,6 +36,7 @@ class BatchSentimentResult:
success_count: int
failed_count: int
average_confidence: float
analysis_performed: bool = True
class WeiboMultilingualSentimentAnalyzer:
... ... @@ -50,6 +51,7 @@ class WeiboMultilingualSentimentAnalyzer:
self.tokenizer = None
self.device = None
self.is_initialized = False
self.is_disabled = False
# 情感标签映射(5级分类)
self.sentiment_map = {
... ... @@ -69,6 +71,10 @@ class WeiboMultilingualSentimentAnalyzer:
Returns:
是否初始化成功
"""
if self.is_disabled:
print("情感分析功能已禁用,跳过模型加载")
return False
if self.is_initialized:
print("模型已经初始化,无需重复加载")
return True
... ... @@ -102,6 +108,7 @@ class WeiboMultilingualSentimentAnalyzer:
self.model.to(self.device)
self.model.eval()
self.is_initialized = True
self.is_disabled = False
print(f"模型加载成功! 使用设备: {self.device}")
print("支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言")
... ... @@ -113,6 +120,11 @@ class WeiboMultilingualSentimentAnalyzer:
print(f"模型加载失败: {e}")
print("请检查网络连接或模型文件")
self.is_initialized = False
self.is_disabled = True
self.model = None
self.tokenizer = None
self.device = None
print("情感分析功能已禁用,将直接返回原始文本内容")
return False
def _preprocess_text(self, text: str) -> str:
... ... @@ -144,6 +156,17 @@ class WeiboMultilingualSentimentAnalyzer:
Returns:
SentimentResult对象
"""
if self.is_disabled:
return SentimentResult(
text=text,
sentiment_label="情感分析未执行",
confidence=0.0,
probability_distribution={},
success=False,
error_message="情感分析功能已禁用",
analysis_performed=False
)
if not self.is_initialized:
return SentimentResult(
text=text,
... ... @@ -151,13 +174,14 @@ class WeiboMultilingualSentimentAnalyzer:
confidence=0.0,
probability_distribution={},
success=False,
error_message="模型未初始化,请先调用 initialize() 方法"
error_message="模型未初始化,请先调用initialize() 方法",
analysis_performed=False
)
try:
# 预处理文本
processed_text = self._preprocess_text(text)
if not processed_text:
return SentimentResult(
text=text,
... ... @@ -165,9 +189,10 @@ class WeiboMultilingualSentimentAnalyzer:
confidence=0.0,
probability_distribution={},
success=False,
error_message="输入文本为空或无效"
error_message="输入文本为空或无效内容",
analysis_performed=False
)
# 分词编码
inputs = self.tokenizer(
processed_text,
... ... @@ -176,26 +201,26 @@ class WeiboMultilingualSentimentAnalyzer:
truncation=True,
return_tensors='pt'
)
# 转移到设备
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# 预测
with torch.no_grad():
outputs = self.model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1)
prediction = torch.argmax(probabilities, dim=1).item()
# 构建结果
confidence = probabilities[0][prediction].item()
label = self.sentiment_map[prediction]
# 构建概率分布字典
prob_dist = {}
for i, (label_name, prob) in enumerate(zip(self.sentiment_map.values(), probabilities[0])):
for label_name, prob in zip(self.sentiment_map.values(), probabilities[0]):
prob_dist[label_name] = prob.item()
return SentimentResult(
text=text,
sentiment_label=label,
... ... @@ -203,7 +228,7 @@ class WeiboMultilingualSentimentAnalyzer:
probability_distribution=prob_dist,
success=True
)
except Exception as e:
return SentimentResult(
text=text,
... ... @@ -211,9 +236,10 @@ class WeiboMultilingualSentimentAnalyzer:
confidence=0.0,
probability_distribution={},
success=False,
error_message=f"预测时发生错误: {str(e)}"
error_message=f"预测时发生错误: {str(e)}",
analysis_performed=False
)
def analyze_batch(self, texts: List[str], show_progress: bool = True) -> BatchSentimentResult:
"""
批量情感分析
... ... @@ -231,7 +257,30 @@ class WeiboMultilingualSentimentAnalyzer:
total_processed=0,
success_count=0,
failed_count=0,
average_confidence=0.0
average_confidence=0.0,
analysis_performed=not self.is_disabled and self.is_initialized
)
if self.is_disabled or not self.is_initialized:
passthrough_results = [
SentimentResult(
text=text,
sentiment_label="情感分析未执行",
confidence=0.0,
probability_distribution={},
success=False,
error_message="情感分析功能不可用",
analysis_performed=False
)
for text in texts
]
return BatchSentimentResult(
results=passthrough_results,
total_processed=len(texts),
success_count=0,
failed_count=len(texts),
average_confidence=0.0,
analysis_performed=False
)
results = []
... ... @@ -257,9 +306,46 @@ class WeiboMultilingualSentimentAnalyzer:
total_processed=len(texts),
success_count=success_count,
failed_count=failed_count,
average_confidence=average_confidence
average_confidence=average_confidence,
analysis_performed=True
)
def _build_passthrough_analysis(
self,
original_data: List[Dict[str, Any]],
reason: str,
texts: Optional[List[str]] = None,
results: Optional[List[SentimentResult]] = None
) -> Dict[str, Any]:
"""
构建在情感分析不可用时的透传结�?
"""
total_items = len(texts) if texts is not None else len(original_data)
response: Dict[str, Any] = {
"sentiment_analysis": {
"available": False,
"reason": reason,
"total_analyzed": 0,
"success_rate": f"0/{total_items}",
"average_confidence": 0.0,
"sentiment_distribution": {},
"high_confidence_results": [],
"summary": f"情感分析未执行:{reason}",
"original_texts": original_data
}
}
if texts is not None:
response["sentiment_analysis"]["passthrough_texts"] = texts
if results is not None:
response["sentiment_analysis"]["results"] = [
result.__dict__ if isinstance(result, SentimentResult) else result
for result in results
]
return response
def analyze_query_results(self, query_results: List[Dict[str, Any]],
text_field: str = "content",
min_confidence: float = 0.5) -> Dict[str, Any]:
... ... @@ -311,10 +397,30 @@ class WeiboMultilingualSentimentAnalyzer:
}
}
if self.is_disabled:
return self._build_passthrough_analysis(
original_data=original_data,
reason="情感分析模型不可用",
texts=texts_to_analyze
)
# 执行批量情感分析
print(f"正在对{len(texts_to_analyze)}条内容进行情感分析...")
batch_result = self.analyze_batch(texts_to_analyze, show_progress=True)
if not batch_result.analysis_performed:
reason = "情感分析功能不可用"
if batch_result.results:
candidate_error = next((r.error_message for r in batch_result.results if r.error_message), None)
if candidate_error:
reason = candidate_error
return self._build_passthrough_analysis(
original_data=original_data,
reason=reason,
texts=texts_to_analyze,
results=batch_result.results
)
# 统计情感分布
sentiment_distribution = {}
high_confidence_results = []
... ... @@ -392,31 +498,18 @@ def analyze_sentiment(text_or_texts: Union[str, List[str]],
Returns:
SentimentResult或BatchSentimentResult
"""
if initialize_if_needed and not multilingual_sentiment_analyzer.is_initialized:
if not multilingual_sentiment_analyzer.initialize():
# 如果初始化失败,返回失败结果
if isinstance(text_or_texts, str):
return SentimentResult(
text=text_or_texts,
sentiment_label="初始化失败",
confidence=0.0,
probability_distribution={},
success=False,
error_message="模型初始化失败"
)
else:
return BatchSentimentResult(
results=[],
total_processed=0,
success_count=0,
failed_count=len(text_or_texts),
average_confidence=0.0
)
if (
initialize_if_needed
and not multilingual_sentiment_analyzer.is_initialized
and not multilingual_sentiment_analyzer.is_disabled
):
multilingual_sentiment_analyzer.initialize()
if isinstance(text_or_texts, str):
return multilingual_sentiment_analyzer.analyze_single_text(text_or_texts)
else:
return multilingual_sentiment_analyzer.analyze_batch(text_or_texts)
texts_list = list(text_or_texts)
return multilingual_sentiment_analyzer.analyze_batch(texts_list)
if __name__ == "__main__":
... ...