Showing
1 changed file
with
112 additions
and
48 deletions
| @@ -11,6 +11,7 @@ import re | @@ -11,6 +11,7 @@ import re | ||
| 11 | 11 | ||
| 12 | try: | 12 | try: |
| 13 | import torch | 13 | import torch |
| 14 | + | ||
| 14 | TORCH_AVAILABLE = True | 15 | TORCH_AVAILABLE = True |
| 15 | except ImportError: | 16 | except ImportError: |
| 16 | torch = None # type: ignore | 17 | torch = None # type: ignore |
| @@ -18,6 +19,7 @@ except ImportError: | @@ -18,6 +19,7 @@ except ImportError: | ||
| 18 | 19 | ||
| 19 | try: | 20 | try: |
| 20 | from transformers import AutoTokenizer, AutoModelForSequenceClassification | 21 | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| 22 | + | ||
| 21 | TRANSFORMERS_AVAILABLE = True | 23 | TRANSFORMERS_AVAILABLE = True |
| 22 | except ImportError: | 24 | except ImportError: |
| 23 | AutoTokenizer = None # type: ignore | 25 | AutoTokenizer = None # type: ignore |
| @@ -28,6 +30,7 @@ except ImportError: | @@ -28,6 +30,7 @@ except ImportError: | ||
| 28 | # INFO:若想跳过情感分析,可手动切换此开关为False | 30 | # INFO:若想跳过情感分析,可手动切换此开关为False |
| 29 | SENTIMENT_ANALYSIS_ENABLED = True | 31 | SENTIMENT_ANALYSIS_ENABLED = True |
| 30 | 32 | ||
| 33 | + | ||
| 31 | def _describe_missing_dependencies() -> str: | 34 | def _describe_missing_dependencies() -> str: |
| 32 | missing = [] | 35 | missing = [] |
| 33 | if not TORCH_AVAILABLE: | 36 | if not TORCH_AVAILABLE: |
| @@ -36,14 +39,21 @@ def _describe_missing_dependencies() -> str: | @@ -36,14 +39,21 @@ def _describe_missing_dependencies() -> str: | ||
| 36 | missing.append("Transformers") | 39 | missing.append("Transformers") |
| 37 | return " / ".join(missing) | 40 | return " / ".join(missing) |
| 38 | 41 | ||
| 42 | + | ||
| 39 | # 添加项目根目录到路径,以便导入WeiboMultilingualSentiment | 43 | # 添加项目根目录到路径,以便导入WeiboMultilingualSentiment |
| 40 | -project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||
| 41 | -weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment") | 44 | +project_root = os.path.dirname( |
| 45 | + os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
| 46 | +) | ||
| 47 | +weibo_sentiment_path = os.path.join( | ||
| 48 | + project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment" | ||
| 49 | +) | ||
| 42 | sys.path.append(weibo_sentiment_path) | 50 | sys.path.append(weibo_sentiment_path) |
| 43 | 51 | ||
| 52 | + | ||
| 44 | @dataclass | 53 | @dataclass |
| 45 | class SentimentResult: | 54 | class SentimentResult: |
| 46 | """情感分析结果数据类""" | 55 | """情感分析结果数据类""" |
| 56 | + | ||
| 47 | text: str | 57 | text: str |
| 48 | sentiment_label: str | 58 | sentiment_label: str |
| 49 | confidence: float | 59 | confidence: float |
| @@ -56,6 +66,7 @@ class SentimentResult: | @@ -56,6 +66,7 @@ class SentimentResult: | ||
| 56 | @dataclass | 66 | @dataclass |
| 57 | class BatchSentimentResult: | 67 | class BatchSentimentResult: |
| 58 | """批量情感分析结果数据类""" | 68 | """批量情感分析结果数据类""" |
| 69 | + | ||
| 59 | results: List[SentimentResult] | 70 | results: List[SentimentResult] |
| 60 | total_processed: int | 71 | total_processed: int |
| 61 | success_count: int | 72 | success_count: int |
| @@ -85,7 +96,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -85,7 +96,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 85 | 1: "负面", | 96 | 1: "负面", |
| 86 | 2: "中性", | 97 | 2: "中性", |
| 87 | 3: "正面", | 98 | 3: "正面", |
| 88 | - 4: "非常正面" | 99 | + 4: "非常正面", |
| 89 | } | 100 | } |
| 90 | 101 | ||
| 91 | if not SENTIMENT_ANALYSIS_ENABLED: | 102 | if not SENTIMENT_ANALYSIS_ENABLED: |
| @@ -96,9 +107,13 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -96,9 +107,13 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 96 | 107 | ||
| 97 | if self.is_disabled: | 108 | if self.is_disabled: |
| 98 | reason = self.disable_reason or "Sentiment analysis disabled." | 109 | reason = self.disable_reason or "Sentiment analysis disabled." |
| 99 | - print(f"WeiboMultilingualSentimentAnalyzer initialized but disabled: {reason}") | 110 | + print( |
| 111 | + f"WeiboMultilingualSentimentAnalyzer initialized but disabled: {reason}" | ||
| 112 | + ) | ||
| 100 | else: | 113 | else: |
| 101 | - print("WeiboMultilingualSentimentAnalyzer 已创建,调用 initialize() 来加载模型") | 114 | + print( |
| 115 | + "WeiboMultilingualSentimentAnalyzer 已创建,调用 initialize() 来加载模型" | ||
| 116 | + ) | ||
| 102 | 117 | ||
| 103 | def disable(self, reason: Optional[str] = None, drop_state: bool = False) -> None: | 118 | def disable(self, reason: Optional[str] = None, drop_state: bool = False) -> None: |
| 104 | """Disable sentiment analysis, optionally clearing loaded resources.""" | 119 | """Disable sentiment analysis, optionally clearing loaded resources.""" |
| @@ -130,7 +145,11 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -130,7 +145,11 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 130 | if torch.cuda.is_available(): | 145 | if torch.cuda.is_available(): |
| 131 | return torch.device("cuda") | 146 | return torch.device("cuda") |
| 132 | mps_backend = getattr(torch.backends, "mps", None) | 147 | mps_backend = getattr(torch.backends, "mps", None) |
| 133 | - if mps_backend and getattr(mps_backend, "is_available", lambda: False)() and getattr(mps_backend, "is_built", lambda: False)(): | 148 | + if ( |
| 149 | + mps_backend | ||
| 150 | + and getattr(mps_backend, "is_available", lambda: False)() | ||
| 151 | + and getattr(mps_backend, "is_built", lambda: False)() | ||
| 152 | + ): | ||
| 134 | return torch.device("mps") | 153 | return torch.device("mps") |
| 135 | return torch.device("cpu") | 154 | return torch.device("cpu") |
| 136 | 155 | ||
| @@ -167,12 +186,16 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -167,12 +186,16 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 167 | if os.path.exists(local_model_path): | 186 | if os.path.exists(local_model_path): |
| 168 | print("从本地加载模型...") | 187 | print("从本地加载模型...") |
| 169 | self.tokenizer = AutoTokenizer.from_pretrained(local_model_path) | 188 | self.tokenizer = AutoTokenizer.from_pretrained(local_model_path) |
| 170 | - self.model = AutoModelForSequenceClassification.from_pretrained(local_model_path) | 189 | + self.model = AutoModelForSequenceClassification.from_pretrained( |
| 190 | + local_model_path | ||
| 191 | + ) | ||
| 171 | else: | 192 | else: |
| 172 | print("首次使用,正在下载模型到本地...") | 193 | print("首次使用,正在下载模型到本地...") |
| 173 | # 下载并保存到本地 | 194 | # 下载并保存到本地 |
| 174 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) | 195 | self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
| 175 | - self.model = AutoModelForSequenceClassification.from_pretrained(model_name) | 196 | + self.model = AutoModelForSequenceClassification.from_pretrained( |
| 197 | + model_name | ||
| 198 | + ) | ||
| 176 | 199 | ||
| 177 | # 保存到本地 | 200 | # 保存到本地 |
| 178 | os.makedirs(local_model_path, exist_ok=True) | 201 | os.makedirs(local_model_path, exist_ok=True) |
| @@ -227,7 +250,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -227,7 +250,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 227 | return "" | 250 | return "" |
| 228 | 251 | ||
| 229 | # 去除多余空格 | 252 | # 去除多余空格 |
| 230 | - text = re.sub(r'\s+', ' ', text.strip()) | 253 | + text = re.sub(r"\s+", " ", text.strip()) |
| 231 | 254 | ||
| 232 | return text | 255 | return text |
| 233 | 256 | ||
| @@ -249,7 +272,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -249,7 +272,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 249 | probability_distribution={}, | 272 | probability_distribution={}, |
| 250 | success=False, | 273 | success=False, |
| 251 | error_message=self.disable_reason or "情感分析功能已禁用", | 274 | error_message=self.disable_reason or "情感分析功能已禁用", |
| 252 | - analysis_performed=False | 275 | + analysis_performed=False, |
| 253 | ) | 276 | ) |
| 254 | 277 | ||
| 255 | if not self.is_initialized: | 278 | if not self.is_initialized: |
| @@ -260,7 +283,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -260,7 +283,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 260 | probability_distribution={}, | 283 | probability_distribution={}, |
| 261 | success=False, | 284 | success=False, |
| 262 | error_message="模型未初始化,请先调用initialize() 方法", | 285 | error_message="模型未初始化,请先调用initialize() 方法", |
| 263 | - analysis_performed=False | 286 | + analysis_performed=False, |
| 264 | ) | 287 | ) |
| 265 | 288 | ||
| 266 | try: | 289 | try: |
| @@ -275,7 +298,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -275,7 +298,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 275 | probability_distribution={}, | 298 | probability_distribution={}, |
| 276 | success=False, | 299 | success=False, |
| 277 | error_message="输入文本为空或无效内容", | 300 | error_message="输入文本为空或无效内容", |
| 278 | - analysis_performed=False | 301 | + analysis_performed=False, |
| 279 | ) | 302 | ) |
| 280 | 303 | ||
| 281 | # 分词编码 | 304 | # 分词编码 |
| @@ -284,7 +307,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -284,7 +307,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 284 | max_length=512, | 307 | max_length=512, |
| 285 | padding=True, | 308 | padding=True, |
| 286 | truncation=True, | 309 | truncation=True, |
| 287 | - return_tensors='pt' | 310 | + return_tensors="pt", |
| 288 | ) | 311 | ) |
| 289 | 312 | ||
| 290 | # 转移到设备 | 313 | # 转移到设备 |
| @@ -311,7 +334,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -311,7 +334,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 311 | sentiment_label=label, | 334 | sentiment_label=label, |
| 312 | confidence=confidence, | 335 | confidence=confidence, |
| 313 | probability_distribution=prob_dist, | 336 | probability_distribution=prob_dist, |
| 314 | - success=True | 337 | + success=True, |
| 315 | ) | 338 | ) |
| 316 | 339 | ||
| 317 | except Exception as e: | 340 | except Exception as e: |
| @@ -322,10 +345,12 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -322,10 +345,12 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 322 | probability_distribution={}, | 345 | probability_distribution={}, |
| 323 | success=False, | 346 | success=False, |
| 324 | error_message=f"预测时发生错误: {str(e)}", | 347 | error_message=f"预测时发生错误: {str(e)}", |
| 325 | - analysis_performed=False | 348 | + analysis_performed=False, |
| 326 | ) | 349 | ) |
| 327 | 350 | ||
| 328 | - def analyze_batch(self, texts: List[str], show_progress: bool = True) -> BatchSentimentResult: | 351 | + def analyze_batch( |
| 352 | + self, texts: List[str], show_progress: bool = True | ||
| 353 | + ) -> BatchSentimentResult: | ||
| 329 | """ | 354 | """ |
| 330 | 批量情感分析 | 355 | 批量情感分析 |
| 331 | 356 | ||
| @@ -343,7 +368,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -343,7 +368,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 343 | success_count=0, | 368 | success_count=0, |
| 344 | failed_count=0, | 369 | failed_count=0, |
| 345 | average_confidence=0.0, | 370 | average_confidence=0.0, |
| 346 | - analysis_performed=not self.is_disabled and self.is_initialized | 371 | + analysis_performed=not self.is_disabled and self.is_initialized, |
| 347 | ) | 372 | ) |
| 348 | 373 | ||
| 349 | if self.is_disabled or not self.is_initialized: | 374 | if self.is_disabled or not self.is_initialized: |
| @@ -355,7 +380,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -355,7 +380,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 355 | probability_distribution={}, | 380 | probability_distribution={}, |
| 356 | success=False, | 381 | success=False, |
| 357 | error_message=self.disable_reason or "情感分析功能不可用", | 382 | error_message=self.disable_reason or "情感分析功能不可用", |
| 358 | - analysis_performed=False | 383 | + analysis_performed=False, |
| 359 | ) | 384 | ) |
| 360 | for text in texts | 385 | for text in texts |
| 361 | ] | 386 | ] |
| @@ -365,7 +390,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -365,7 +390,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 365 | success_count=0, | 390 | success_count=0, |
| 366 | failed_count=len(texts), | 391 | failed_count=len(texts), |
| 367 | average_confidence=0.0, | 392 | average_confidence=0.0, |
| 368 | - analysis_performed=False | 393 | + analysis_performed=False, |
| 369 | ) | 394 | ) |
| 370 | 395 | ||
| 371 | results = [] | 396 | results = [] |
| @@ -374,7 +399,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -374,7 +399,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 374 | 399 | ||
| 375 | for i, text in enumerate(texts): | 400 | for i, text in enumerate(texts): |
| 376 | if show_progress and len(texts) > 1: | 401 | if show_progress and len(texts) > 1: |
| 377 | - print(f"处理进度: {i+1}/{len(texts)}") | 402 | + print(f"处理进度: {i + 1}/{len(texts)}") |
| 378 | 403 | ||
| 379 | result = self.analyze_single_text(text) | 404 | result = self.analyze_single_text(text) |
| 380 | results.append(result) | 405 | results.append(result) |
| @@ -383,7 +408,9 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -383,7 +408,9 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 383 | success_count += 1 | 408 | success_count += 1 |
| 384 | total_confidence += result.confidence | 409 | total_confidence += result.confidence |
| 385 | 410 | ||
| 386 | - average_confidence = total_confidence / success_count if success_count > 0 else 0.0 | 411 | + average_confidence = ( |
| 412 | + total_confidence / success_count if success_count > 0 else 0.0 | ||
| 413 | + ) | ||
| 387 | failed_count = len(texts) - success_count | 414 | failed_count = len(texts) - success_count |
| 388 | 415 | ||
| 389 | return BatchSentimentResult( | 416 | return BatchSentimentResult( |
| @@ -392,7 +419,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -392,7 +419,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 392 | success_count=success_count, | 419 | success_count=success_count, |
| 393 | failed_count=failed_count, | 420 | failed_count=failed_count, |
| 394 | average_confidence=average_confidence, | 421 | average_confidence=average_confidence, |
| 395 | - analysis_performed=True | 422 | + analysis_performed=True, |
| 396 | ) | 423 | ) |
| 397 | 424 | ||
| 398 | def _build_passthrough_analysis( | 425 | def _build_passthrough_analysis( |
| @@ -400,7 +427,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -400,7 +427,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 400 | original_data: List[Dict[str, Any]], | 427 | original_data: List[Dict[str, Any]], |
| 401 | reason: str, | 428 | reason: str, |
| 402 | texts: Optional[List[str]] = None, | 429 | texts: Optional[List[str]] = None, |
| 403 | - results: Optional[List[SentimentResult]] = None | 430 | + results: Optional[List[SentimentResult]] = None, |
| 404 | ) -> Dict[str, Any]: | 431 | ) -> Dict[str, Any]: |
| 405 | """ | 432 | """ |
| 406 | 构建在情感分析不可用时的透传结果 | 433 | 构建在情感分析不可用时的透传结果 |
| @@ -416,7 +443,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -416,7 +443,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 416 | "sentiment_distribution": {}, | 443 | "sentiment_distribution": {}, |
| 417 | "high_confidence_results": [], | 444 | "high_confidence_results": [], |
| 418 | "summary": f"情感分析未执行:{reason}", | 445 | "summary": f"情感分析未执行:{reason}", |
| 419 | - "original_texts": original_data | 446 | + "original_texts": original_data, |
| 420 | } | 447 | } |
| 421 | } | 448 | } |
| 422 | 449 | ||
| @@ -431,9 +458,12 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -431,9 +458,12 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 431 | 458 | ||
| 432 | return response | 459 | return response |
| 433 | 460 | ||
| 434 | - def analyze_query_results(self, query_results: List[Dict[str, Any]], | 461 | + def analyze_query_results( |
| 462 | + self, | ||
| 463 | + query_results: List[Dict[str, Any]], | ||
| 435 | text_field: str = "content", | 464 | text_field: str = "content", |
| 436 | - min_confidence: float = 0.5) -> Dict[str, Any]: | 465 | + min_confidence: float = 0.5, |
| 466 | + ) -> Dict[str, Any]: | ||
| 437 | """ | 467 | """ |
| 438 | 对查询结果进行情感分析 | 468 | 对查询结果进行情感分析 |
| 439 | 专门用于分析从MediaCrawlerDB返回的查询结果 | 469 | 专门用于分析从MediaCrawlerDB返回的查询结果 |
| @@ -452,7 +482,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -452,7 +482,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 452 | "total_analyzed": 0, | 482 | "total_analyzed": 0, |
| 453 | "sentiment_distribution": {}, | 483 | "sentiment_distribution": {}, |
| 454 | "high_confidence_results": [], | 484 | "high_confidence_results": [], |
| 455 | - "summary": "没有内容需要分析" | 485 | + "summary": "没有内容需要分析", |
| 456 | } | 486 | } |
| 457 | } | 487 | } |
| 458 | 488 | ||
| @@ -478,7 +508,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -478,7 +508,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 478 | "total_analyzed": 0, | 508 | "total_analyzed": 0, |
| 479 | "sentiment_distribution": {}, | 509 | "sentiment_distribution": {}, |
| 480 | "high_confidence_results": [], | 510 | "high_confidence_results": [], |
| 481 | - "summary": "查询结果中没有找到可分析的文本内容" | 511 | + "summary": "查询结果中没有找到可分析的文本内容", |
| 482 | } | 512 | } |
| 483 | } | 513 | } |
| 484 | 514 | ||
| @@ -486,7 +516,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -486,7 +516,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 486 | return self._build_passthrough_analysis( | 516 | return self._build_passthrough_analysis( |
| 487 | original_data=original_data, | 517 | original_data=original_data, |
| 488 | reason=self.disable_reason or "情感分析模型不可用", | 518 | reason=self.disable_reason or "情感分析模型不可用", |
| 489 | - texts=texts_to_analyze | 519 | + texts=texts_to_analyze, |
| 490 | ) | 520 | ) |
| 491 | 521 | ||
| 492 | # 执行批量情感分析 | 522 | # 执行批量情感分析 |
| @@ -496,14 +526,17 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -496,14 +526,17 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 496 | if not batch_result.analysis_performed: | 526 | if not batch_result.analysis_performed: |
| 497 | reason = self.disable_reason or "情感分析功能不可用" | 527 | reason = self.disable_reason or "情感分析功能不可用" |
| 498 | if batch_result.results: | 528 | if batch_result.results: |
| 499 | - candidate_error = next((r.error_message for r in batch_result.results if r.error_message), None) | 529 | + candidate_error = next( |
| 530 | + (r.error_message for r in batch_result.results if r.error_message), | ||
| 531 | + None, | ||
| 532 | + ) | ||
| 500 | if candidate_error: | 533 | if candidate_error: |
| 501 | reason = candidate_error | 534 | reason = candidate_error |
| 502 | return self._build_passthrough_analysis( | 535 | return self._build_passthrough_analysis( |
| 503 | original_data=original_data, | 536 | original_data=original_data, |
| 504 | reason=reason, | 537 | reason=reason, |
| 505 | texts=texts_to_analyze, | 538 | texts=texts_to_analyze, |
| 506 | - results=batch_result.results | 539 | + results=batch_result.results, |
| 507 | ) | 540 | ) |
| 508 | 541 | ||
| 509 | # 统计情感分布 | 542 | # 统计情感分布 |
| @@ -520,18 +553,22 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -520,18 +553,22 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 520 | 553 | ||
| 521 | # 收集高置信度结果 | 554 | # 收集高置信度结果 |
| 522 | if result.confidence >= min_confidence: | 555 | if result.confidence >= min_confidence: |
| 523 | - high_confidence_results.append({ | 556 | + high_confidence_results.append( |
| 557 | + { | ||
| 524 | "original_data": original_item, | 558 | "original_data": original_item, |
| 525 | "sentiment": result.sentiment_label, | 559 | "sentiment": result.sentiment_label, |
| 526 | "confidence": result.confidence, | 560 | "confidence": result.confidence, |
| 527 | - "text_preview": result.text[:100] + "..." if len(result.text) > 100 else result.text | ||
| 528 | - }) | 561 | + "text_preview": result.text[:100] + "..." |
| 562 | + if len(result.text) > 100 | ||
| 563 | + else result.text, | ||
| 564 | + } | ||
| 565 | + ) | ||
| 529 | 566 | ||
| 530 | # 生成情感分析摘要 | 567 | # 生成情感分析摘要 |
| 531 | total_analyzed = batch_result.success_count | 568 | total_analyzed = batch_result.success_count |
| 532 | if total_analyzed > 0: | 569 | if total_analyzed > 0: |
| 533 | dominant_sentiment = max(sentiment_distribution.items(), key=lambda x: x[1]) | 570 | dominant_sentiment = max(sentiment_distribution.items(), key=lambda x: x[1]) |
| 534 | - sentiment_summary = f"共分析{total_analyzed}条内容,主要情感倾向为'{dominant_sentiment[0]}'({dominant_sentiment[1]}条,占{dominant_sentiment[1]/total_analyzed*100:.1f}%)" | 571 | + sentiment_summary = f"共分析{total_analyzed}条内容,主要情感倾向为'{dominant_sentiment[0]}'({dominant_sentiment[1]}条,占{dominant_sentiment[1] / total_analyzed * 100:.1f}%)" |
| 535 | else: | 572 | else: |
| 536 | sentiment_summary = "情感分析失败" | 573 | sentiment_summary = "情感分析失败" |
| 537 | 574 | ||
| @@ -542,7 +579,7 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -542,7 +579,7 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 542 | "average_confidence": round(batch_result.average_confidence, 4), | 579 | "average_confidence": round(batch_result.average_confidence, 4), |
| 543 | "sentiment_distribution": sentiment_distribution, | 580 | "sentiment_distribution": sentiment_distribution, |
| 544 | "high_confidence_results": high_confidence_results, # 返回所有高置信度结果,不做限制 | 581 | "high_confidence_results": high_confidence_results, # 返回所有高置信度结果,不做限制 |
| 545 | - "summary": sentiment_summary | 582 | + "summary": sentiment_summary, |
| 546 | } | 583 | } |
| 547 | } | 584 | } |
| 548 | 585 | ||
| @@ -556,14 +593,32 @@ class WeiboMultilingualSentimentAnalyzer: | @@ -556,14 +593,32 @@ class WeiboMultilingualSentimentAnalyzer: | ||
| 556 | return { | 593 | return { |
| 557 | "model_name": "tabularisai/multilingual-sentiment-analysis", | 594 | "model_name": "tabularisai/multilingual-sentiment-analysis", |
| 558 | "supported_languages": [ | 595 | "supported_languages": [ |
| 559 | - "中文", "英文", "西班牙文", "阿拉伯文", "日文", "韩文", | ||
| 560 | - "德文", "法文", "意大利文", "葡萄牙文", "俄文", "荷兰文", | ||
| 561 | - "波兰文", "土耳其文", "丹麦文", "希腊文", "芬兰文", | ||
| 562 | - "瑞典文", "挪威文", "匈牙利文", "捷克文", "保加利亚文" | 596 | + "中文", |
| 597 | + "英文", | ||
| 598 | + "西班牙文", | ||
| 599 | + "阿拉伯文", | ||
| 600 | + "日文", | ||
| 601 | + "韩文", | ||
| 602 | + "德文", | ||
| 603 | + "法文", | ||
| 604 | + "意大利文", | ||
| 605 | + "葡萄牙文", | ||
| 606 | + "俄文", | ||
| 607 | + "荷兰文", | ||
| 608 | + "波兰文", | ||
| 609 | + "土耳其文", | ||
| 610 | + "丹麦文", | ||
| 611 | + "希腊文", | ||
| 612 | + "芬兰文", | ||
| 613 | + "瑞典文", | ||
| 614 | + "挪威文", | ||
| 615 | + "匈牙利文", | ||
| 616 | + "捷克文", | ||
| 617 | + "保加利亚文", | ||
| 563 | ], | 618 | ], |
| 564 | "sentiment_levels": list(self.sentiment_map.values()), | 619 | "sentiment_levels": list(self.sentiment_map.values()), |
| 565 | "is_initialized": self.is_initialized, | 620 | "is_initialized": self.is_initialized, |
| 566 | - "device": str(self.device) if self.device else "未设置" | 621 | + "device": str(self.device) if self.device else "未设置", |
| 567 | } | 622 | } |
| 568 | 623 | ||
| 569 | 624 | ||
| @@ -576,13 +631,16 @@ def enable_sentiment_analysis() -> bool: | @@ -576,13 +631,16 @@ def enable_sentiment_analysis() -> bool: | ||
| 576 | return multilingual_sentiment_analyzer.enable() | 631 | return multilingual_sentiment_analyzer.enable() |
| 577 | 632 | ||
| 578 | 633 | ||
| 579 | -def disable_sentiment_analysis(reason: Optional[str] = None, drop_state: bool = False) -> None: | 634 | +def disable_sentiment_analysis( |
| 635 | + reason: Optional[str] = None, drop_state: bool = False | ||
| 636 | +) -> None: | ||
| 580 | """Public helper to disable sentiment analysis at runtime.""" | 637 | """Public helper to disable sentiment analysis at runtime.""" |
| 581 | multilingual_sentiment_analyzer.disable(reason=reason, drop_state=drop_state) | 638 | multilingual_sentiment_analyzer.disable(reason=reason, drop_state=drop_state) |
| 582 | 639 | ||
| 583 | 640 | ||
| 584 | -def analyze_sentiment(text_or_texts: Union[str, List[str]], | ||
| 585 | - initialize_if_needed: bool = True) -> Union[SentimentResult, BatchSentimentResult]: | 641 | +def analyze_sentiment( |
| 642 | + text_or_texts: Union[str, List[str]], initialize_if_needed: bool = True | ||
| 643 | +) -> Union[SentimentResult, BatchSentimentResult]: | ||
| 586 | """ | 644 | """ |
| 587 | 便捷的情感分析函数 | 645 | 便捷的情感分析函数 |
| 588 | 646 | ||
| @@ -614,20 +672,26 @@ if __name__ == "__main__": | @@ -614,20 +672,26 @@ if __name__ == "__main__": | ||
| 614 | if analyzer.initialize(): | 672 | if analyzer.initialize(): |
| 615 | # 测试单个文本 | 673 | # 测试单个文本 |
| 616 | result = analyzer.analyze_single_text("今天天气真好,心情特别棒!") | 674 | result = analyzer.analyze_single_text("今天天气真好,心情特别棒!") |
| 617 | - print(f"单个文本分析: {result.sentiment_label} (置信度: {result.confidence:.4f})") | 675 | + print( |
| 676 | + f"单个文本分析: {result.sentiment_label} (置信度: {result.confidence:.4f})" | ||
| 677 | + ) | ||
| 618 | 678 | ||
| 619 | # 测试批量文本 | 679 | # 测试批量文本 |
| 620 | test_texts = [ | 680 | test_texts = [ |
| 621 | "这家餐厅的菜味道非常棒!", | 681 | "这家餐厅的菜味道非常棒!", |
| 622 | "服务态度太差了,很失望", | 682 | "服务态度太差了,很失望", |
| 623 | "I absolutely love this product!", | 683 | "I absolutely love this product!", |
| 624 | - "The customer service was disappointing." | 684 | + "The customer service was disappointing.", |
| 625 | ] | 685 | ] |
| 626 | 686 | ||
| 627 | batch_result = analyzer.analyze_batch(test_texts) | 687 | batch_result = analyzer.analyze_batch(test_texts) |
| 628 | - print(f"\n批量分析: 成功 {batch_result.success_count}/{batch_result.total_processed}") | 688 | + print( |
| 689 | + f"\n批量分析: 成功 {batch_result.success_count}/{batch_result.total_processed}" | ||
| 690 | + ) | ||
| 629 | 691 | ||
| 630 | for result in batch_result.results: | 692 | for result in batch_result.results: |
| 631 | - print(f"'{result.text[:30]}...' -> {result.sentiment_label} ({result.confidence:.4f})") | 693 | + print( |
| 694 | + f"'{result.text[:30]}...' -> {result.sentiment_label} ({result.confidence:.4f})" | ||
| 695 | + ) | ||
| 632 | else: | 696 | else: |
| 633 | print("模型初始化失败,无法进行测试") | 697 | print("模型初始化失败,无法进行测试") |
-
Please register or login to post a comment