Optimize sentiment analysis module fault tolerance and keyword optimizer prompts.
Showing
3 changed files
with
173 additions
and
65 deletions
| @@ -292,13 +292,14 @@ class DeepSearchAgent: | @@ -292,13 +292,14 @@ class DeepSearchAgent: | ||
| 292 | 情感分析结果字典,如果失败则返回None | 292 | 情感分析结果字典,如果失败则返回None |
| 293 | """ | 293 | """ |
| 294 | try: | 294 | try: |
| 295 | - # 初始化情感分析器(如果尚未初始化) | ||
| 296 | - if not self.sentiment_analyzer.is_initialized: | 295 | + # 初始化情感分析器(如果尚未初始化且未被禁用) |
| 296 | + if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled: | ||
| 297 | print(" 初始化情感分析模型...") | 297 | print(" 初始化情感分析模型...") |
| 298 | if not self.sentiment_analyzer.initialize(): | 298 | if not self.sentiment_analyzer.initialize(): |
| 299 | - print(" ❌ 情感分析模型初始化失败") | ||
| 300 | - return None | ||
| 301 | - | 299 | + print(" 情感分析模型初始化失败,将直接透传原始文本") |
| 300 | + elif self.sentiment_analyzer.is_disabled: | ||
| 301 | + print(" 情感分析功能已禁用,直接透传原始文本") | ||
| 302 | + | ||
| 302 | # 将查询结果转换为字典格式 | 303 | # 将查询结果转换为字典格式 |
| 303 | results_dict = [] | 304 | results_dict = [] |
| 304 | for result in results: | 305 | for result in results: |
| @@ -337,34 +338,46 @@ class DeepSearchAgent: | @@ -337,34 +338,46 @@ class DeepSearchAgent: | ||
| 337 | print(f" → 执行独立情感分析") | 338 | print(f" → 执行独立情感分析") |
| 338 | 339 | ||
| 339 | try: | 340 | try: |
| 340 | - # 初始化情感分析器(如果尚未初始化) | ||
| 341 | - if not self.sentiment_analyzer.is_initialized: | 341 | + # 初始化情感分析器(如果尚未初始化且未被禁用) |
| 342 | + if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled: | ||
| 342 | print(" 初始化情感分析模型...") | 343 | print(" 初始化情感分析模型...") |
| 343 | if not self.sentiment_analyzer.initialize(): | 344 | if not self.sentiment_analyzer.initialize(): |
| 344 | - return { | ||
| 345 | - "success": False, | ||
| 346 | - "error": "情感分析模型初始化失败", | ||
| 347 | - "results": [] | ||
| 348 | - } | 345 | + print(" 情感分析模型初始化失败,将直接透传原始文本") |
| 346 | + elif self.sentiment_analyzer.is_disabled: | ||
| 347 | + print(" 情感分析功能已禁用,直接透传原始文本") | ||
| 349 | 348 | ||
| 350 | # 执行分析 | 349 | # 执行分析 |
| 351 | if isinstance(texts, str): | 350 | if isinstance(texts, str): |
| 352 | result = self.sentiment_analyzer.analyze_single_text(texts) | 351 | result = self.sentiment_analyzer.analyze_single_text(texts) |
| 353 | - return { | ||
| 354 | - "success": True, | ||
| 355 | - "total_analyzed": 1, | ||
| 356 | - "results": [result.__dict__] | 352 | + result_dict = result.__dict__ |
| 353 | + response = { | ||
| 354 | + "success": result.success and result.analysis_performed, | ||
| 355 | + "total_analyzed": 1 if result.analysis_performed and result.success else 0, | ||
| 356 | + "results": [result_dict] | ||
| 357 | } | 357 | } |
| 358 | + if not result.analysis_performed: | ||
| 359 | + response["success"] = False | ||
| 360 | + response["warning"] = result.error_message or "情感分析功能不可用,已直接返回原始文本" | ||
| 361 | + return response | ||
| 358 | else: | 362 | else: |
| 359 | - batch_result = self.sentiment_analyzer.analyze_batch(texts, show_progress=True) | ||
| 360 | - return { | ||
| 361 | - "success": True, | ||
| 362 | - "total_analyzed": batch_result.total_processed, | 363 | + texts_list = list(texts) |
| 364 | + batch_result = self.sentiment_analyzer.analyze_batch(texts_list, show_progress=True) | ||
| 365 | + response = { | ||
| 366 | + "success": batch_result.analysis_performed and batch_result.success_count > 0, | ||
| 367 | + "total_analyzed": batch_result.total_processed if batch_result.analysis_performed else 0, | ||
| 363 | "success_count": batch_result.success_count, | 368 | "success_count": batch_result.success_count, |
| 364 | "failed_count": batch_result.failed_count, | 369 | "failed_count": batch_result.failed_count, |
| 365 | - "average_confidence": batch_result.average_confidence, | 370 | + "average_confidence": batch_result.average_confidence if batch_result.analysis_performed else 0.0, |
| 366 | "results": [result.__dict__ for result in batch_result.results] | 371 | "results": [result.__dict__ for result in batch_result.results] |
| 367 | } | 372 | } |
| 373 | + if not batch_result.analysis_performed: | ||
| 374 | + warning = next( | ||
| 375 | + (r.error_message for r in batch_result.results if r.error_message), | ||
| 376 | + "情感分析功能不可用,已直接返回原始文本" | ||
| 377 | + ) | ||
| 378 | + response["success"] = False | ||
| 379 | + response["warning"] = warning | ||
| 380 | + return response | ||
| 368 | 381 | ||
| 369 | except Exception as e: | 382 | except Exception as e: |
| 370 | print(f" ❌ 情感分析过程中发生错误: {str(e)}") | 383 | print(f" ❌ 情感分析过程中发生错误: {str(e)}") |
| @@ -486,11 +499,11 @@ class DeepSearchAgent: | @@ -486,11 +499,11 @@ class DeepSearchAgent: | ||
| 486 | search_kwargs["end_date"] = end_date | 499 | search_kwargs["end_date"] = end_date |
| 487 | print(f" - 时间范围: {start_date} 到 {end_date}") | 500 | print(f" - 时间范围: {start_date} 到 {end_date}") |
| 488 | else: | 501 | else: |
| 489 | - print(f" ⚠️ 日期格式错误(应为YYYY-MM-DD),改用全局搜索") | 502 | + print(f" 日期格式错误(应为YYYY-MM-DD),改用全局搜索") |
| 490 | print(f" 提供的日期: start_date={start_date}, end_date={end_date}") | 503 | print(f" 提供的日期: start_date={start_date}, end_date={end_date}") |
| 491 | search_tool = "search_topic_globally" | 504 | search_tool = "search_topic_globally" |
| 492 | elif search_tool == "search_topic_by_date": | 505 | elif search_tool == "search_topic_by_date": |
| 493 | - print(f" ⚠️ search_topic_by_date工具缺少时间参数,改用全局搜索") | 506 | + print(f" search_topic_by_date工具缺少时间参数,改用全局搜索") |
| 494 | search_tool = "search_topic_globally" | 507 | search_tool = "search_topic_globally" |
| 495 | 508 | ||
| 496 | # 处理需要平台参数的工具 | 509 | # 处理需要平台参数的工具 |
| @@ -500,7 +513,7 @@ class DeepSearchAgent: | @@ -500,7 +513,7 @@ class DeepSearchAgent: | ||
| 500 | search_kwargs["platform"] = platform | 513 | search_kwargs["platform"] = platform |
| 501 | print(f" - 指定平台: {platform}") | 514 | print(f" - 指定平台: {platform}") |
| 502 | else: | 515 | else: |
| 503 | - print(f" ⚠️ search_topic_on_platform工具缺少平台参数,改用全局搜索") | 516 | + print(f" search_topic_on_platform工具缺少平台参数,改用全局搜索") |
| 504 | search_tool = "search_topic_globally" | 517 | search_tool = "search_topic_globally" |
| 505 | 518 | ||
| 506 | # 处理限制参数,使用配置文件中的默认值而不是agent提供的参数 | 519 | # 处理限制参数,使用配置文件中的默认值而不是agent提供的参数 |
| @@ -615,11 +628,11 @@ class DeepSearchAgent: | @@ -615,11 +628,11 @@ class DeepSearchAgent: | ||
| 615 | search_kwargs["end_date"] = end_date | 628 | search_kwargs["end_date"] = end_date |
| 616 | print(f" 时间范围: {start_date} 到 {end_date}") | 629 | print(f" 时间范围: {start_date} 到 {end_date}") |
| 617 | else: | 630 | else: |
| 618 | - print(f" ⚠️ 日期格式错误(应为YYYY-MM-DD),改用全局搜索") | 631 | + print(f" 日期格式错误(应为YYYY-MM-DD),改用全局搜索") |
| 619 | print(f" 提供的日期: start_date={start_date}, end_date={end_date}") | 632 | print(f" 提供的日期: start_date={start_date}, end_date={end_date}") |
| 620 | search_tool = "search_topic_globally" | 633 | search_tool = "search_topic_globally" |
| 621 | elif search_tool == "search_topic_by_date": | 634 | elif search_tool == "search_topic_by_date": |
| 622 | - print(f" ⚠️ search_topic_by_date工具缺少时间参数,改用全局搜索") | 635 | + print(f" search_topic_by_date工具缺少时间参数,改用全局搜索") |
| 623 | search_tool = "search_topic_globally" | 636 | search_tool = "search_topic_globally" |
| 624 | 637 | ||
| 625 | # 处理需要平台参数的工具 | 638 | # 处理需要平台参数的工具 |
| @@ -629,7 +642,7 @@ class DeepSearchAgent: | @@ -629,7 +642,7 @@ class DeepSearchAgent: | ||
| 629 | search_kwargs["platform"] = platform | 642 | search_kwargs["platform"] = platform |
| 630 | print(f" 指定平台: {platform}") | 643 | print(f" 指定平台: {platform}") |
| 631 | else: | 644 | else: |
| 632 | - print(f" ⚠️ search_topic_on_platform工具缺少平台参数,改用全局搜索") | 645 | + print(f" search_topic_on_platform工具缺少平台参数,改用全局搜索") |
| 633 | search_tool = "search_topic_globally" | 646 | search_tool = "search_topic_globally" |
| 634 | 647 | ||
| 635 | # 处理限制参数 | 648 | # 处理限制参数 |
| @@ -147,6 +147,8 @@ class KeywordOptimizer: | @@ -147,6 +147,8 @@ class KeywordOptimizer: | ||
| 147 | 5. **数量控制**:最少提供10个关键词,最多提供20个关键词 | 147 | 5. **数量控制**:最少提供10个关键词,最多提供20个关键词 |
| 148 | 6. **避免重复**:不要脱离初始查询的主题 | 148 | 6. **避免重复**:不要脱离初始查询的主题 |
| 149 | 149 | ||
| 150 | +**重要提醒**:每个关键词都必须是一个不可分割的独立词条,严禁在词条内部包含空格。例如,应使用 "雷军班争议" 而不是错误的 "雷军班 争议"。 | ||
| 151 | + | ||
| 150 | **输出格式**: | 152 | **输出格式**: |
| 151 | 请以JSON格式返回结果: | 153 | 请以JSON格式返回结果: |
| 152 | { | 154 | { |
| @@ -16,7 +16,6 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_ | @@ -16,7 +16,6 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_ | ||
| 16 | weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment") | 16 | weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment") |
| 17 | sys.path.append(weibo_sentiment_path) | 17 | sys.path.append(weibo_sentiment_path) |
| 18 | 18 | ||
| 19 | - | ||
| 20 | @dataclass | 19 | @dataclass |
| 21 | class SentimentResult: | 20 | class SentimentResult: |
| 22 | """情感分析结果数据类""" | 21 | """情感分析结果数据类""" |
| @@ -26,6 +25,7 @@ class SentimentResult: | @@ -26,6 +25,7 @@ class SentimentResult: | ||
| 26 | probability_distribution: Dict[str, float] | 25 | probability_distribution: Dict[str, float] |
| 27 | success: bool = True | 26 | success: bool = True |
| 28 | error_message: Optional[str] = None | 27 | error_message: Optional[str] = None |
| 28 | + analysis_performed: bool = True | ||
| 29 | 29 | ||
| 30 | 30 | ||
| 31 | @dataclass | 31 | @dataclass |
| @@ -36,6 +36,7 @@ class BatchSentimentResult: | @@ -36,6 +36,7 @@ class BatchSentimentResult: | ||
| 36 | success_count: int | 36 | success_count: int |
| 37 | failed_count: int | 37 | failed_count: int |
| 38 | average_confidence: float | 38 | average_confidence: float |
| 39 | + analysis_performed: bool = True | ||
| 39 | 40 | ||
| 40 | 41 | ||
| 41 | class WeiboMultilingualSentimentAnalyzer: | 42 | class WeiboMultilingualSentimentAnalyzer: |
| @@ -50,6 +51,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -50,6 +51,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 50 | self.tokenizer = None | 51 | self.tokenizer = None |
| 51 | self.device = None | 52 | self.device = None |
| 52 | self.is_initialized = False | 53 | self.is_initialized = False |
| 54 | + self.is_disabled = False | ||
| 53 | 55 | ||
| 54 | # 情感标签映射(5级分类) | 56 | # 情感标签映射(5级分类) |
| 55 | self.sentiment_map = { | 57 | self.sentiment_map = { |
| @@ -69,6 +71,10 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -69,6 +71,10 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 69 | Returns: | 71 | Returns: |
| 70 | 是否初始化成功 | 72 | 是否初始化成功 |
| 71 | """ | 73 | """ |
| 74 | + if self.is_disabled: | ||
| 75 | + print("情感分析功能已禁用,跳过模型加载") | ||
| 76 | + return False | ||
| 77 | + | ||
| 72 | if self.is_initialized: | 78 | if self.is_initialized: |
| 73 | print("模型已经初始化,无需重复加载") | 79 | print("模型已经初始化,无需重复加载") |
| 74 | return True | 80 | return True |
| @@ -102,6 +108,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -102,6 +108,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 102 | self.model.to(self.device) | 108 | self.model.to(self.device) |
| 103 | self.model.eval() | 109 | self.model.eval() |
| 104 | self.is_initialized = True | 110 | self.is_initialized = True |
| 111 | + self.is_disabled = False | ||
| 105 | 112 | ||
| 106 | print(f"模型加载成功! 使用设备: {self.device}") | 113 | print(f"模型加载成功! 使用设备: {self.device}") |
| 107 | print("支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言") | 114 | print("支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言") |
| @@ -113,6 +120,11 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -113,6 +120,11 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 113 | print(f"模型加载失败: {e}") | 120 | print(f"模型加载失败: {e}") |
| 114 | print("请检查网络连接或模型文件") | 121 | print("请检查网络连接或模型文件") |
| 115 | self.is_initialized = False | 122 | self.is_initialized = False |
| 123 | + self.is_disabled = True | ||
| 124 | + self.model = None | ||
| 125 | + self.tokenizer = None | ||
| 126 | + self.device = None | ||
| 127 | + print("情感分析功能已禁用,将直接返回原始文本内容") | ||
| 116 | return False | 128 | return False |
| 117 | 129 | ||
| 118 | def _preprocess_text(self, text: str) -> str: | 130 | def _preprocess_text(self, text: str) -> str: |
| @@ -144,6 +156,17 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -144,6 +156,17 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 144 | Returns: | 156 | Returns: |
| 145 | SentimentResult对象 | 157 | SentimentResult对象 |
| 146 | """ | 158 | """ |
| 159 | + if self.is_disabled: | ||
| 160 | + return SentimentResult( | ||
| 161 | + text=text, | ||
| 162 | + sentiment_label="情感分析未执行", | ||
| 163 | + confidence=0.0, | ||
| 164 | + probability_distribution={}, | ||
| 165 | + success=False, | ||
| 166 | + error_message="情感分析功能已禁用", | ||
| 167 | + analysis_performed=False | ||
| 168 | + ) | ||
| 169 | + | ||
| 147 | if not self.is_initialized: | 170 | if not self.is_initialized: |
| 148 | return SentimentResult( | 171 | return SentimentResult( |
| 149 | text=text, | 172 | text=text, |
| @@ -151,13 +174,14 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -151,13 +174,14 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 151 | confidence=0.0, | 174 | confidence=0.0, |
| 152 | probability_distribution={}, | 175 | probability_distribution={}, |
| 153 | success=False, | 176 | success=False, |
| 154 | - error_message="模型未初始化,请先调用 initialize() 方法" | 177 | + error_message="模型未初始化,请先调用initialize() 方法", |
| 178 | + analysis_performed=False | ||
| 155 | ) | 179 | ) |
| 156 | - | 180 | + |
| 157 | try: | 181 | try: |
| 158 | # 预处理文本 | 182 | # 预处理文本 |
| 159 | processed_text = self._preprocess_text(text) | 183 | processed_text = self._preprocess_text(text) |
| 160 | - | 184 | + |
| 161 | if not processed_text: | 185 | if not processed_text: |
| 162 | return SentimentResult( | 186 | return SentimentResult( |
| 163 | text=text, | 187 | text=text, |
| @@ -165,9 +189,10 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -165,9 +189,10 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 165 | confidence=0.0, | 189 | confidence=0.0, |
| 166 | probability_distribution={}, | 190 | probability_distribution={}, |
| 167 | success=False, | 191 | success=False, |
| 168 | - error_message="输入文本为空或无效" | 192 | + error_message="输入文本为空或无效内容", |
| 193 | + analysis_performed=False | ||
| 169 | ) | 194 | ) |
| 170 | - | 195 | + |
| 171 | # 分词编码 | 196 | # 分词编码 |
| 172 | inputs = self.tokenizer( | 197 | inputs = self.tokenizer( |
| 173 | processed_text, | 198 | processed_text, |
| @@ -176,26 +201,26 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -176,26 +201,26 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 176 | truncation=True, | 201 | truncation=True, |
| 177 | return_tensors='pt' | 202 | return_tensors='pt' |
| 178 | ) | 203 | ) |
| 179 | - | 204 | + |
| 180 | # 转移到设备 | 205 | # 转移到设备 |
| 181 | inputs = {k: v.to(self.device) for k, v in inputs.items()} | 206 | inputs = {k: v.to(self.device) for k, v in inputs.items()} |
| 182 | - | 207 | + |
| 183 | # 预测 | 208 | # 预测 |
| 184 | with torch.no_grad(): | 209 | with torch.no_grad(): |
| 185 | outputs = self.model(**inputs) | 210 | outputs = self.model(**inputs) |
| 186 | logits = outputs.logits | 211 | logits = outputs.logits |
| 187 | probabilities = torch.softmax(logits, dim=1) | 212 | probabilities = torch.softmax(logits, dim=1) |
| 188 | prediction = torch.argmax(probabilities, dim=1).item() | 213 | prediction = torch.argmax(probabilities, dim=1).item() |
| 189 | - | 214 | + |
| 190 | # 构建结果 | 215 | # 构建结果 |
| 191 | confidence = probabilities[0][prediction].item() | 216 | confidence = probabilities[0][prediction].item() |
| 192 | label = self.sentiment_map[prediction] | 217 | label = self.sentiment_map[prediction] |
| 193 | - | 218 | + |
| 194 | # 构建概率分布字典 | 219 | # 构建概率分布字典 |
| 195 | prob_dist = {} | 220 | prob_dist = {} |
| 196 | - for i, (label_name, prob) in enumerate(zip(self.sentiment_map.values(), probabilities[0])): | 221 | + for label_name, prob in zip(self.sentiment_map.values(), probabilities[0]): |
| 197 | prob_dist[label_name] = prob.item() | 222 | prob_dist[label_name] = prob.item() |
| 198 | - | 223 | + |
| 199 | return SentimentResult( | 224 | return SentimentResult( |
| 200 | text=text, | 225 | text=text, |
| 201 | sentiment_label=label, | 226 | sentiment_label=label, |
| @@ -203,7 +228,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -203,7 +228,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 203 | probability_distribution=prob_dist, | 228 | probability_distribution=prob_dist, |
| 204 | success=True | 229 | success=True |
| 205 | ) | 230 | ) |
| 206 | - | 231 | + |
| 207 | except Exception as e: | 232 | except Exception as e: |
| 208 | return SentimentResult( | 233 | return SentimentResult( |
| 209 | text=text, | 234 | text=text, |
| @@ -211,9 +236,10 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -211,9 +236,10 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 211 | confidence=0.0, | 236 | confidence=0.0, |
| 212 | probability_distribution={}, | 237 | probability_distribution={}, |
| 213 | success=False, | 238 | success=False, |
| 214 | - error_message=f"预测时发生错误: {str(e)}" | 239 | + error_message=f"预测时发生错误: {str(e)}", |
| 240 | + analysis_performed=False | ||
| 215 | ) | 241 | ) |
| 216 | - | 242 | + |
| 217 | def analyze_batch(self, texts: List[str], show_progress: bool = True) -> BatchSentimentResult: | 243 | def analyze_batch(self, texts: List[str], show_progress: bool = True) -> BatchSentimentResult: |
| 218 | """ | 244 | """ |
| 219 | 批量情感分析 | 245 | 批量情感分析 |
| @@ -231,7 +257,30 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -231,7 +257,30 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 231 | total_processed=0, | 257 | total_processed=0, |
| 232 | success_count=0, | 258 | success_count=0, |
| 233 | failed_count=0, | 259 | failed_count=0, |
| 234 | - average_confidence=0.0 | 260 | + average_confidence=0.0, |
| 261 | + analysis_performed=not self.is_disabled and self.is_initialized | ||
| 262 | + ) | ||
| 263 | + | ||
| 264 | + if self.is_disabled or not self.is_initialized: | ||
| 265 | + passthrough_results = [ | ||
| 266 | + SentimentResult( | ||
| 267 | + text=text, | ||
| 268 | + sentiment_label="情感分析未执行", | ||
| 269 | + confidence=0.0, | ||
| 270 | + probability_distribution={}, | ||
| 271 | + success=False, | ||
| 272 | + error_message="情感分析功能不可用", | ||
| 273 | + analysis_performed=False | ||
| 274 | + ) | ||
| 275 | + for text in texts | ||
| 276 | + ] | ||
| 277 | + return BatchSentimentResult( | ||
| 278 | + results=passthrough_results, | ||
| 279 | + total_processed=len(texts), | ||
| 280 | + success_count=0, | ||
| 281 | + failed_count=len(texts), | ||
| 282 | + average_confidence=0.0, | ||
| 283 | + analysis_performed=False | ||
| 235 | ) | 284 | ) |
| 236 | 285 | ||
| 237 | results = [] | 286 | results = [] |
| @@ -257,9 +306,46 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -257,9 +306,46 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 257 | total_processed=len(texts), | 306 | total_processed=len(texts), |
| 258 | success_count=success_count, | 307 | success_count=success_count, |
| 259 | failed_count=failed_count, | 308 | failed_count=failed_count, |
| 260 | - average_confidence=average_confidence | 309 | + average_confidence=average_confidence, |
| 310 | + analysis_performed=True | ||
| 261 | ) | 311 | ) |
| 262 | 312 | ||
| 313 | + def _build_passthrough_analysis( | ||
| 314 | + self, | ||
| 315 | + original_data: List[Dict[str, Any]], | ||
| 316 | + reason: str, | ||
| 317 | + texts: Optional[List[str]] = None, | ||
| 318 | + results: Optional[List[SentimentResult]] = None | ||
| 319 | + ) -> Dict[str, Any]: | ||
| 320 | + """ | ||
| 321 | + 构建在情感分析不可用时的透传结�? | ||
| 322 | + """ | ||
| 323 | + total_items = len(texts) if texts is not None else len(original_data) | ||
| 324 | + response: Dict[str, Any] = { | ||
| 325 | + "sentiment_analysis": { | ||
| 326 | + "available": False, | ||
| 327 | + "reason": reason, | ||
| 328 | + "total_analyzed": 0, | ||
| 329 | + "success_rate": f"0/{total_items}", | ||
| 330 | + "average_confidence": 0.0, | ||
| 331 | + "sentiment_distribution": {}, | ||
| 332 | + "high_confidence_results": [], | ||
| 333 | + "summary": f"情感分析未执行:{reason}", | ||
| 334 | + "original_texts": original_data | ||
| 335 | + } | ||
| 336 | + } | ||
| 337 | + | ||
| 338 | + if texts is not None: | ||
| 339 | + response["sentiment_analysis"]["passthrough_texts"] = texts | ||
| 340 | + | ||
| 341 | + if results is not None: | ||
| 342 | + response["sentiment_analysis"]["results"] = [ | ||
| 343 | + result.__dict__ if isinstance(result, SentimentResult) else result | ||
| 344 | + for result in results | ||
| 345 | + ] | ||
| 346 | + | ||
| 347 | + return response | ||
| 348 | + | ||
| 263 | def analyze_query_results(self, query_results: List[Dict[str, Any]], | 349 | def analyze_query_results(self, query_results: List[Dict[str, Any]], |
| 264 | text_field: str = "content", | 350 | text_field: str = "content", |
| 265 | min_confidence: float = 0.5) -> Dict[str, Any]: | 351 | min_confidence: float = 0.5) -> Dict[str, Any]: |
| @@ -311,10 +397,30 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -311,10 +397,30 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 311 | } | 397 | } |
| 312 | } | 398 | } |
| 313 | 399 | ||
| 400 | + if self.is_disabled: | ||
| 401 | + return self._build_passthrough_analysis( | ||
| 402 | + original_data=original_data, | ||
| 403 | + reason="情感分析模型不可用", | ||
| 404 | + texts=texts_to_analyze | ||
| 405 | + ) | ||
| 406 | + | ||
| 314 | # 执行批量情感分析 | 407 | # 执行批量情感分析 |
| 315 | print(f"正在对{len(texts_to_analyze)}条内容进行情感分析...") | 408 | print(f"正在对{len(texts_to_analyze)}条内容进行情感分析...") |
| 316 | batch_result = self.analyze_batch(texts_to_analyze, show_progress=True) | 409 | batch_result = self.analyze_batch(texts_to_analyze, show_progress=True) |
| 317 | 410 | ||
| 411 | + if not batch_result.analysis_performed: | ||
| 412 | + reason = "情感分析功能不可用" | ||
| 413 | + if batch_result.results: | ||
| 414 | + candidate_error = next((r.error_message for r in batch_result.results if r.error_message), None) | ||
| 415 | + if candidate_error: | ||
| 416 | + reason = candidate_error | ||
| 417 | + return self._build_passthrough_analysis( | ||
| 418 | + original_data=original_data, | ||
| 419 | + reason=reason, | ||
| 420 | + texts=texts_to_analyze, | ||
| 421 | + results=batch_result.results | ||
| 422 | + ) | ||
| 423 | + | ||
| 318 | # 统计情感分布 | 424 | # 统计情感分布 |
| 319 | sentiment_distribution = {} | 425 | sentiment_distribution = {} |
| 320 | high_confidence_results = [] | 426 | high_confidence_results = [] |
| @@ -392,31 +498,18 @@ def analyze_sentiment(text_or_texts: Union[str, List[str]], | @@ -392,31 +498,18 @@ def analyze_sentiment(text_or_texts: Union[str, List[str]], | ||
| 392 | Returns: | 498 | Returns: |
| 393 | SentimentResult或BatchSentimentResult | 499 | SentimentResult或BatchSentimentResult |
| 394 | """ | 500 | """ |
| 395 | - if initialize_if_needed and not multilingual_sentiment_analyzer.is_initialized: | ||
| 396 | - if not multilingual_sentiment_analyzer.initialize(): | ||
| 397 | - # 如果初始化失败,返回失败结果 | ||
| 398 | - if isinstance(text_or_texts, str): | ||
| 399 | - return SentimentResult( | ||
| 400 | - text=text_or_texts, | ||
| 401 | - sentiment_label="初始化失败", | ||
| 402 | - confidence=0.0, | ||
| 403 | - probability_distribution={}, | ||
| 404 | - success=False, | ||
| 405 | - error_message="模型初始化失败" | ||
| 406 | - ) | ||
| 407 | - else: | ||
| 408 | - return BatchSentimentResult( | ||
| 409 | - results=[], | ||
| 410 | - total_processed=0, | ||
| 411 | - success_count=0, | ||
| 412 | - failed_count=len(text_or_texts), | ||
| 413 | - average_confidence=0.0 | ||
| 414 | - ) | 501 | + if ( |
| 502 | + initialize_if_needed | ||
| 503 | + and not multilingual_sentiment_analyzer.is_initialized | ||
| 504 | + and not multilingual_sentiment_analyzer.is_disabled | ||
| 505 | + ): | ||
| 506 | + multilingual_sentiment_analyzer.initialize() | ||
| 415 | 507 | ||
| 416 | if isinstance(text_or_texts, str): | 508 | if isinstance(text_or_texts, str): |
| 417 | return multilingual_sentiment_analyzer.analyze_single_text(text_or_texts) | 509 | return multilingual_sentiment_analyzer.analyze_single_text(text_or_texts) |
| 418 | else: | 510 | else: |
| 419 | - return multilingual_sentiment_analyzer.analyze_batch(text_or_texts) | 511 | + texts_list = list(text_or_texts) |
| 512 | + return multilingual_sentiment_analyzer.analyze_batch(texts_list) | ||
| 420 | 513 | ||
| 421 | 514 | ||
| 422 | if __name__ == "__main__": | 515 | if __name__ == "__main__": |
-
Please register or login to post a comment