Optimize sentiment analysis module fault tolerance and keyword optimizer prompts.

666ghj
Commit e70fceb3f33449756aa81cdcacde5f08f8608464 e70fceb3 1 parent 52eca47c
Showing 3 changed files with 173 additions and 65 deletions
InsightEngine/agent.py
InsightEngine/tools/keyword_optimizer.py
InsightEngine/tools/sentiment_analyzer.py
--- a/InsightEngine/agent.py
View file @e70fceb
+++ b/InsightEngine/agent.py
View file @e70fceb
@@ -292,13 +292,14 @@ class DeepSearchAgent:
             情感分析结果字典，如果失败则返回None
         """
         try:
-             # 初始化情感分析器（如果尚未初始化）
-             if not self.sentiment_analyzer.is_initialized:
+             # 初始化情感分析器（如果尚未初始化且未被禁用）
+             if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled:
                 print("    初始化情感分析模型...")
                 if not self.sentiment_analyzer.initialize():
-                     print("    ❌ 情感分析模型初始化失败")
-                     return None
-             
+                     print("     情感分析模型初始化失败，将直接透传原始文本")
+             elif self.sentiment_analyzer.is_disabled:
+                 print("     情感分析功能已禁用，直接透传原始文本")
+ 
             # 将查询结果转换为字典格式
             results_dict = []
             for result in results:
@@ -337,34 +338,46 @@ class DeepSearchAgent:
         print(f"  → 执行独立情感分析")
         
         try:
-             # 初始化情感分析器（如果尚未初始化）
-             if not self.sentiment_analyzer.is_initialized:
+             # 初始化情感分析器（如果尚未初始化且未被禁用）
+             if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled:
                 print("    初始化情感分析模型...")
                 if not self.sentiment_analyzer.initialize():
-                     return {
-                         "success": False,
-                         "error": "情感分析模型初始化失败",
-                         "results": []
-                     }
+                     print("     情感分析模型初始化失败，将直接透传原始文本")
+             elif self.sentiment_analyzer.is_disabled:
+                 print("     情感分析功能已禁用，直接透传原始文本")
             
             # 执行分析
             if isinstance(texts, str):
                 result = self.sentiment_analyzer.analyze_single_text(texts)
-                 return {
-                     "success": True,
-                     "total_analyzed": 1,
-                     "results": [result.__dict__]
+                 result_dict = result.__dict__
+                 response = {
+                     "success": result.success and result.analysis_performed,
+                     "total_analyzed": 1 if result.analysis_performed and result.success else 0,
+                     "results": [result_dict]
                 }
+                 if not result.analysis_performed:
+                     response["success"] = False
+                     response["warning"] = result.error_message or "情感分析功能不可用，已直接返回原始文本"
+                 return response
             else:
-                 batch_result = self.sentiment_analyzer.analyze_batch(texts, show_progress=True)
-                 return {
-                     "success": True,
-                     "total_analyzed": batch_result.total_processed,
+                 texts_list = list(texts)
+                 batch_result = self.sentiment_analyzer.analyze_batch(texts_list, show_progress=True)
+                 response = {
+                     "success": batch_result.analysis_performed and batch_result.success_count > 0,
+                     "total_analyzed": batch_result.total_processed if batch_result.analysis_performed else 0,
                     "success_count": batch_result.success_count,
                     "failed_count": batch_result.failed_count,
-                     "average_confidence": batch_result.average_confidence,
+                     "average_confidence": batch_result.average_confidence if batch_result.analysis_performed else 0.0,
                     "results": [result.__dict__ for result in batch_result.results]
                 }
+                 if not batch_result.analysis_performed:
+                     warning = next(
+                         (r.error_message for r in batch_result.results if r.error_message),
+                         "情感分析功能不可用，已直接返回原始文本"
+                     )
+                     response["success"] = False
+                     response["warning"] = warning
+                 return response
                 
         except Exception as e:
             print(f"    ❌ 情感分析过程中发生错误: {str(e)}")
@@ -486,11 +499,11 @@ class DeepSearchAgent:
                     search_kwargs["end_date"] = end_date
                     print(f"  - 时间范围: {start_date} 到 {end_date}")
                 else:
-                     print(f"  ⚠️  日期格式错误（应为YYYY-MM-DD），改用全局搜索")
+                     print(f"    日期格式错误（应为YYYY-MM-DD），改用全局搜索")
                     print(f"      提供的日期: start_date={start_date}, end_date={end_date}")
                     search_tool = "search_topic_globally"
             elif search_tool == "search_topic_by_date":
-                 print(f"  ⚠️  search_topic_by_date工具缺少时间参数，改用全局搜索")
+                 print(f"    search_topic_by_date工具缺少时间参数，改用全局搜索")
                 search_tool = "search_topic_globally"
         
         # 处理需要平台参数的工具
@@ -500,7 +513,7 @@ class DeepSearchAgent:
                 search_kwargs["platform"] = platform
                 print(f"  - 指定平台: {platform}")
             else:
-                 print(f"  ⚠️  search_topic_on_platform工具缺少平台参数，改用全局搜索")
+                 print(f"    search_topic_on_platform工具缺少平台参数，改用全局搜索")
                 search_tool = "search_topic_globally"
         
         # 处理限制参数，使用配置文件中的默认值而不是agent提供的参数
@@ -615,11 +628,11 @@ class DeepSearchAgent:
                         search_kwargs["end_date"] = end_date
                         print(f"    时间范围: {start_date} 到 {end_date}")
                     else:
-                         print(f"    ⚠️  日期格式错误（应为YYYY-MM-DD），改用全局搜索")
+                         print(f"      日期格式错误（应为YYYY-MM-DD），改用全局搜索")
                         print(f"        提供的日期: start_date={start_date}, end_date={end_date}")
                         search_tool = "search_topic_globally"
                 elif search_tool == "search_topic_by_date":
-                     print(f"    ⚠️  search_topic_by_date工具缺少时间参数，改用全局搜索")
+                     print(f"      search_topic_by_date工具缺少时间参数，改用全局搜索")
                     search_tool = "search_topic_globally"
             
             # 处理需要平台参数的工具
@@ -629,7 +642,7 @@ class DeepSearchAgent:
                     search_kwargs["platform"] = platform
                     print(f"    指定平台: {platform}")
                 else:
-                     print(f"    ⚠️  search_topic_on_platform工具缺少平台参数，改用全局搜索")
+                     print(f"      search_topic_on_platform工具缺少平台参数，改用全局搜索")
                     search_tool = "search_topic_globally"
             
             # 处理限制参数
--- a/InsightEngine/tools/keyword_optimizer.py
View file @e70fceb
+++ b/InsightEngine/tools/keyword_optimizer.py
View file @e70fceb
@@ -147,6 +147,8 @@ class KeywordOptimizer:
 5. **数量控制**：最少提供10个关键词，最多提供20个关键词
 6. **避免重复**：不要脱离初始查询的主题
 
+ **重要提醒**：每个关键词都必须是一个不可分割的独立词条，严禁在词条内部包含空格。例如，应使用 "雷军班争议" 而不是错误的 "雷军班 争议"。
+ 
 **输出格式**：
 请以JSON格式返回结果：
 {
--- a/InsightEngine/tools/sentiment_analyzer.py
View file @e70fceb
+++ b/InsightEngine/tools/sentiment_analyzer.py
View file @e70fceb
@@ -16,7 +16,6 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_
 weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment")
 sys.path.append(weibo_sentiment_path)
 
- 
 @dataclass
 class SentimentResult:
     """情感分析结果数据类"""
@@ -26,6 +25,7 @@ class SentimentResult:
     probability_distribution: Dict[str, float]
     success: bool = True
     error_message: Optional[str] = None
+     analysis_performed: bool = True
 
 
 @dataclass 
@@ -36,6 +36,7 @@ class BatchSentimentResult:
     success_count: int
     failed_count: int
     average_confidence: float
+     analysis_performed: bool = True
 
 
 class WeiboMultilingualSentimentAnalyzer:
@@ -50,6 +51,7 @@ class WeiboMultilingualSentimentAnalyzer:
         self.tokenizer = None
         self.device = None
         self.is_initialized = False
+         self.is_disabled = False
         
         # 情感标签映射（5级分类）
         self.sentiment_map = {
@@ -69,6 +71,10 @@ class WeiboMultilingualSentimentAnalyzer:
         Returns:
             是否初始化成功
         """
+         if self.is_disabled:
+             print("情感分析功能已禁用，跳过模型加载")
+             return False
+ 
         if self.is_initialized:
             print("模型已经初始化，无需重复加载")
             return True
@@ -102,6 +108,7 @@ class WeiboMultilingualSentimentAnalyzer:
             self.model.to(self.device)
             self.model.eval()
             self.is_initialized = True
+             self.is_disabled = False
             
             print(f"模型加载成功! 使用设备: {self.device}")
             print("支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言")
@@ -113,6 +120,11 @@ class WeiboMultilingualSentimentAnalyzer:
             print(f"模型加载失败: {e}")
             print("请检查网络连接或模型文件")
             self.is_initialized = False
+             self.is_disabled = True
+             self.model = None
+             self.tokenizer = None
+             self.device = None
+             print("情感分析功能已禁用，将直接返回原始文本内容")
             return False
     
     def _preprocess_text(self, text: str) -> str:
@@ -144,6 +156,17 @@ class WeiboMultilingualSentimentAnalyzer:
         Returns:
             SentimentResult对象
         """
+         if self.is_disabled:
+             return SentimentResult(
+                 text=text,
+                 sentiment_label="情感分析未执行",
+                 confidence=0.0,
+                 probability_distribution={},
+                 success=False,
+                 error_message="情感分析功能已禁用",
+                 analysis_performed=False
+             )
+ 
         if not self.is_initialized:
             return SentimentResult(
                 text=text,
@@ -151,13 +174,14 @@ class WeiboMultilingualSentimentAnalyzer:
                 confidence=0.0,
                 probability_distribution={},
                 success=False,
-                 error_message="模型未初始化，请先调用 initialize() 方法"
+                 error_message="模型未初始化，请先调用initialize() 方法",
+                 analysis_performed=False
             )
-         
+ 
         try:
             # 预处理文本
             processed_text = self._preprocess_text(text)
-             
+ 
             if not processed_text:
                 return SentimentResult(
                     text=text,
@@ -165,9 +189,10 @@ class WeiboMultilingualSentimentAnalyzer:
                     confidence=0.0,
                     probability_distribution={},
                     success=False,
-                     error_message="输入文本为空或无效"
+                     error_message="输入文本为空或无效内容",
+                     analysis_performed=False
                 )
-             
+ 
             # 分词编码
             inputs = self.tokenizer(
                 processed_text,
@@ -176,26 +201,26 @@ class WeiboMultilingualSentimentAnalyzer:
                 truncation=True,
                 return_tensors='pt'
             )
-             
+ 
             # 转移到设备
             inputs = {k: v.to(self.device) for k, v in inputs.items()}
-             
+ 
             # 预测
             with torch.no_grad():
                 outputs = self.model(**inputs)
                 logits = outputs.logits
                 probabilities = torch.softmax(logits, dim=1)
                 prediction = torch.argmax(probabilities, dim=1).item()
-             
+ 
             # 构建结果
             confidence = probabilities[0][prediction].item()
             label = self.sentiment_map[prediction]
-             
+ 
             # 构建概率分布字典
             prob_dist = {}
-             for i, (label_name, prob) in enumerate(zip(self.sentiment_map.values(), probabilities[0])):
+             for label_name, prob in zip(self.sentiment_map.values(), probabilities[0]):
                 prob_dist[label_name] = prob.item()
-             
+ 
             return SentimentResult(
                 text=text,
                 sentiment_label=label,
@@ -203,7 +228,7 @@ class WeiboMultilingualSentimentAnalyzer:
                 probability_distribution=prob_dist,
                 success=True
             )
-             
+ 
         except Exception as e:
             return SentimentResult(
                 text=text,
@@ -211,9 +236,10 @@ class WeiboMultilingualSentimentAnalyzer:
                 confidence=0.0,
                 probability_distribution={},
                 success=False,
-                 error_message=f"预测时发生错误: {str(e)}"
+                 error_message=f"预测时发生错误: {str(e)}",
+                 analysis_performed=False
             )
-     
+ 
     def analyze_batch(self, texts: List[str], show_progress: bool = True) -> BatchSentimentResult:
         """
         批量情感分析
@@ -231,7 +257,30 @@ class WeiboMultilingualSentimentAnalyzer:
                 total_processed=0,
                 success_count=0,
                 failed_count=0,
-                 average_confidence=0.0
+                 average_confidence=0.0,
+                 analysis_performed=not self.is_disabled and self.is_initialized
+             )
+         
+         if self.is_disabled or not self.is_initialized:
+             passthrough_results = [
+                 SentimentResult(
+                     text=text,
+                     sentiment_label="情感分析未执行",
+                     confidence=0.0,
+                     probability_distribution={},
+                     success=False,
+                     error_message="情感分析功能不可用",
+                     analysis_performed=False
+                 )
+                 for text in texts
+             ]
+             return BatchSentimentResult(
+                 results=passthrough_results,
+                 total_processed=len(texts),
+                 success_count=0,
+                 failed_count=len(texts),
+                 average_confidence=0.0,
+                 analysis_performed=False
             )
         
         results = []
@@ -257,9 +306,46 @@ class WeiboMultilingualSentimentAnalyzer:
             total_processed=len(texts),
             success_count=success_count,
             failed_count=failed_count,
-             average_confidence=average_confidence
+             average_confidence=average_confidence,
+             analysis_performed=True
         )
     
+     def _build_passthrough_analysis(
+         self,
+         original_data: List[Dict[str, Any]],
+         reason: str,
+         texts: Optional[List[str]] = None,
+         results: Optional[List[SentimentResult]] = None
+     ) -> Dict[str, Any]:
+         """
+         构建在情感分析不可用时的透传结�?
+         """
+         total_items = len(texts) if texts is not None else len(original_data)
+         response: Dict[str, Any] = {
+             "sentiment_analysis": {
+                 "available": False,
+                 "reason": reason,
+                 "total_analyzed": 0,
+                 "success_rate": f"0/{total_items}",
+                 "average_confidence": 0.0,
+                 "sentiment_distribution": {},
+                 "high_confidence_results": [],
+                 "summary": f"情感分析未执行：{reason}",
+                 "original_texts": original_data
+             }
+         }
+         
+         if texts is not None:
+             response["sentiment_analysis"]["passthrough_texts"] = texts
+         
+         if results is not None:
+             response["sentiment_analysis"]["results"] = [
+                 result.__dict__ if isinstance(result, SentimentResult) else result
+                 for result in results
+             ]
+         
+         return response
+     
     def analyze_query_results(self, query_results: List[Dict[str, Any]], 
                             text_field: str = "content", 
                             min_confidence: float = 0.5) -> Dict[str, Any]:
@@ -311,10 +397,30 @@ class WeiboMultilingualSentimentAnalyzer:
                 }
             }
         
+         if self.is_disabled:
+             return self._build_passthrough_analysis(
+                 original_data=original_data,
+                 reason="情感分析模型不可用",
+                 texts=texts_to_analyze
+             )
+         
         # 执行批量情感分析
         print(f"正在对{len(texts_to_analyze)}条内容进行情感分析...")
         batch_result = self.analyze_batch(texts_to_analyze, show_progress=True)
         
+         if not batch_result.analysis_performed:
+             reason = "情感分析功能不可用"
+             if batch_result.results:
+                 candidate_error = next((r.error_message for r in batch_result.results if r.error_message), None)
+                 if candidate_error:
+                     reason = candidate_error
+             return self._build_passthrough_analysis(
+                 original_data=original_data,
+                 reason=reason,
+                 texts=texts_to_analyze,
+                 results=batch_result.results
+             )
+         
         # 统计情感分布
         sentiment_distribution = {}
         high_confidence_results = []
@@ -392,31 +498,18 @@ def analyze_sentiment(text_or_texts: Union[str, List[str]],
     Returns:
         SentimentResult或BatchSentimentResult
     """
-     if initialize_if_needed and not multilingual_sentiment_analyzer.is_initialized:
-         if not multilingual_sentiment_analyzer.initialize():
-             # 如果初始化失败，返回失败结果
-             if isinstance(text_or_texts, str):
-                 return SentimentResult(
-                     text=text_or_texts,
-                     sentiment_label="初始化失败",
-                     confidence=0.0,
-                     probability_distribution={},
-                     success=False,
-                     error_message="模型初始化失败"
-                 )
-             else:
-                 return BatchSentimentResult(
-                     results=[],
-                     total_processed=0,
-                     success_count=0,
-                     failed_count=len(text_or_texts),
-                     average_confidence=0.0
-                 )
+     if (
+         initialize_if_needed
+         and not multilingual_sentiment_analyzer.is_initialized
+         and not multilingual_sentiment_analyzer.is_disabled
+     ):
+         multilingual_sentiment_analyzer.initialize()
     
     if isinstance(text_or_texts, str):
         return multilingual_sentiment_analyzer.analyze_single_text(text_or_texts)
     else:
-         return multilingual_sentiment_analyzer.analyze_batch(text_or_texts)
+         texts_list = list(text_or_texts)
+         return multilingual_sentiment_analyzer.analyze_batch(texts_list)
 
 
 if __name__ == "__main__":