666ghj

Completed requirements.txt, fixed the Dockerfile, and updated the README. Signif…

…icantly refactored the sentiment analyzer to be more robust against missing machine learning dependencies and controllable via a toggle.
@@ -43,8 +43,6 @@ WORKDIR /app @@ -43,8 +43,6 @@ WORKDIR /app
43 # Install Python dependencies first to leverage Docker layer caching 43 # Install Python dependencies first to leverage Docker layer caching
44 COPY requirements.txt ./ 44 COPY requirements.txt ./
45 RUN uv pip install --system -r requirements.txt && \ 45 RUN uv pip install --system -r requirements.txt && \
46 - uv pip install --system torch torchvision torchaudio && \  
47 - uv pip install --system transformers scikit-learn xgboost && \  
48 python -m playwright install chromium 46 python -m playwright install chromium
49 47
50 # Copy application source 48 # Copy application source
@@ -3,14 +3,39 @@ @@ -3,14 +3,39 @@
3 基于WeiboMultilingualSentiment模型为InsightEngine提供情感分析功能 3 基于WeiboMultilingualSentiment模型为InsightEngine提供情感分析功能
4 """ 4 """
5 5
6 -import torch  
7 -from transformers import AutoTokenizer, AutoModelForSequenceClassification  
8 import os 6 import os
9 import sys 7 import sys
10 from typing import List, Dict, Any, Optional, Union 8 from typing import List, Dict, Any, Optional, Union
11 from dataclasses import dataclass 9 from dataclasses import dataclass
12 import re 10 import re
13 11
  12 +try:
  13 + import torch
  14 + TORCH_AVAILABLE = True
  15 +except ImportError:
  16 + torch = None # type: ignore
  17 + TORCH_AVAILABLE = False
  18 +
  19 +try:
  20 + from transformers import AutoTokenizer, AutoModelForSequenceClassification
  21 + TRANSFORMERS_AVAILABLE = True
  22 +except ImportError:
  23 + AutoTokenizer = None # type: ignore
  24 + AutoModelForSequenceClassification = None # type: ignore
  25 + TRANSFORMERS_AVAILABLE = False
  26 +
  27 +
  28 +# INFO:若想跳过情感分析,可手动切换此开关为False
  29 +SENTIMENT_ANALYSIS_ENABLED = True
  30 +
  31 +def _describe_missing_dependencies() -> str:
  32 + missing = []
  33 + if not TORCH_AVAILABLE:
  34 + missing.append("PyTorch")
  35 + if not TRANSFORMERS_AVAILABLE:
  36 + missing.append("Transformers")
  37 + return " / ".join(missing)
  38 +
14 # 添加项目根目录到路径,以便导入WeiboMultilingualSentiment 39 # 添加项目根目录到路径,以便导入WeiboMultilingualSentiment
15 project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 40 project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
16 weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment") 41 weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment")
@@ -52,6 +77,7 @@ class WeiboMultilingualSentimentAnalyzer: @@ -52,6 +77,7 @@ class WeiboMultilingualSentimentAnalyzer:
52 self.device = None 77 self.device = None
53 self.is_initialized = False 78 self.is_initialized = False
54 self.is_disabled = False 79 self.is_disabled = False
  80 + self.disable_reason: Optional[str] = None
55 81
56 # 情感标签映射(5级分类) 82 # 情感标签映射(5级分类)
57 self.sentiment_map = { 83 self.sentiment_map = {
@@ -61,8 +87,52 @@ class WeiboMultilingualSentimentAnalyzer: @@ -61,8 +87,52 @@ class WeiboMultilingualSentimentAnalyzer:
61 3: "正面", 87 3: "正面",
62 4: "非常正面" 88 4: "非常正面"
63 } 89 }
64 -  
65 - print("WeiboMultilingualSentimentAnalyzer 已创建,调用 initialize() 来加载模型") 90 +
  91 + if not SENTIMENT_ANALYSIS_ENABLED:
  92 + self.disable("情感分析功能已在配置中关闭。")
  93 + elif not (TORCH_AVAILABLE and TRANSFORMERS_AVAILABLE):
  94 + missing = _describe_missing_dependencies() or "未知依赖"
  95 + self.disable(f"缺少依赖: {missing},情感分析已禁用。")
  96 +
  97 + if self.is_disabled:
  98 + reason = self.disable_reason or "Sentiment analysis disabled."
  99 + print(f"WeiboMultilingualSentimentAnalyzer initialized but disabled: {reason}")
  100 + else:
  101 + print("WeiboMultilingualSentimentAnalyzer 已创建,调用 initialize() 来加载模型")
  102 +
  103 + def disable(self, reason: Optional[str] = None, drop_state: bool = False) -> None:
  104 + """Disable sentiment analysis, optionally clearing loaded resources."""
  105 + self.is_disabled = True
  106 + self.disable_reason = reason or "Sentiment analysis disabled."
  107 + if drop_state:
  108 + self.model = None
  109 + self.tokenizer = None
  110 + self.device = None
  111 + self.is_initialized = False
  112 +
  113 + def enable(self) -> bool:
  114 + """Attempt to enable sentiment analysis; returns True if enabled."""
  115 + if not SENTIMENT_ANALYSIS_ENABLED:
  116 + self.disable("情感分析功能已在配置中关闭。")
  117 + return False
  118 + if not (TORCH_AVAILABLE and TRANSFORMERS_AVAILABLE):
  119 + missing = _describe_missing_dependencies() or "未知依赖"
  120 + self.disable(f"缺少依赖: {missing},情感分析已禁用。")
  121 + return False
  122 + self.is_disabled = False
  123 + self.disable_reason = None
  124 + return True
  125 +
  126 + def _select_device(self):
  127 + """Select the best available torch device."""
  128 + if not TORCH_AVAILABLE:
  129 + return None
  130 + if torch.cuda.is_available():
  131 + return torch.device("cuda")
  132 + mps_backend = getattr(torch.backends, "mps", None)
  133 + if mps_backend and getattr(mps_backend, "is_available", lambda: False)() and getattr(mps_backend, "is_built", lambda: False)():
  134 + return torch.device("mps")
  135 + return torch.device("cpu")
66 136
67 def initialize(self) -> bool: 137 def initialize(self) -> bool:
68 """ 138 """
@@ -72,7 +142,14 @@ class WeiboMultilingualSentimentAnalyzer: @@ -72,7 +142,14 @@ class WeiboMultilingualSentimentAnalyzer:
72 是否初始化成功 142 是否初始化成功
73 """ 143 """
74 if self.is_disabled: 144 if self.is_disabled:
75 - print("情感分析功能已禁用,跳过模型加载") 145 + reason = self.disable_reason or "情感分析功能已禁用"
  146 + print(f"情感分析功能已禁用,跳过模型加载:{reason}")
  147 + return False
  148 +
  149 + if not (TORCH_AVAILABLE and TRANSFORMERS_AVAILABLE):
  150 + missing = _describe_missing_dependencies() or "未知依赖"
  151 + self.disable(f"缺少依赖: {missing},情感分析已禁用。", drop_state=True)
  152 + print(f"缺少依赖: {missing},无法加载情感分析模型。")
76 return False 153 return False
77 154
78 if self.is_initialized: 155 if self.is_initialized:
@@ -104,11 +181,23 @@ class WeiboMultilingualSentimentAnalyzer: @@ -104,11 +181,23 @@ class WeiboMultilingualSentimentAnalyzer:
104 print(f"模型已保存到: {local_model_path}") 181 print(f"模型已保存到: {local_model_path}")
105 182
106 # 设置设备 183 # 设置设备
107 - self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 184 + device = self._select_device()
  185 + if device is None:
  186 + raise RuntimeError("未检测到可用的计算设备")
  187 +
  188 + self.device = device
108 self.model.to(self.device) 189 self.model.to(self.device)
109 self.model.eval() 190 self.model.eval()
110 self.is_initialized = True 191 self.is_initialized = True
111 - self.is_disabled = False 192 + self.enable()
  193 +
  194 + device_type = getattr(self.device, "type", str(self.device))
  195 + if device_type == "cuda":
  196 + print("检测到可用 GPU,已优先使用 CUDA 进行推理。")
  197 + elif device_type == "mps":
  198 + print("检测到 Apple MPS 设备,已使用 MPS 进行推理。")
  199 + else:
  200 + print("未检测到 GPU,自动使用 CPU 进行推理。")
112 201
113 print(f"模型加载成功! 使用设备: {self.device}") 202 print(f"模型加载成功! 使用设备: {self.device}")
114 print("支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言") 203 print("支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言")
@@ -117,14 +206,10 @@ class WeiboMultilingualSentimentAnalyzer: @@ -117,14 +206,10 @@ class WeiboMultilingualSentimentAnalyzer:
117 return True 206 return True
118 207
119 except Exception as e: 208 except Exception as e:
120 - print(f"模型加载失败: {e}") 209 + error_message = f"模型加载失败: {e}"
  210 + print(error_message)
121 print("请检查网络连接或模型文件") 211 print("请检查网络连接或模型文件")
122 - self.is_initialized = False  
123 - self.is_disabled = True  
124 - self.model = None  
125 - self.tokenizer = None  
126 - self.device = None  
127 - print("情感分析功能已禁用,将直接返回原始文本内容") 212 + self.disable(error_message, drop_state=True)
128 return False 213 return False
129 214
130 def _preprocess_text(self, text: str) -> str: 215 def _preprocess_text(self, text: str) -> str:
@@ -163,7 +248,7 @@ class WeiboMultilingualSentimentAnalyzer: @@ -163,7 +248,7 @@ class WeiboMultilingualSentimentAnalyzer:
163 confidence=0.0, 248 confidence=0.0,
164 probability_distribution={}, 249 probability_distribution={},
165 success=False, 250 success=False,
166 - error_message="情感分析功能已禁用", 251 + error_message=self.disable_reason or "情感分析功能已禁用",
167 analysis_performed=False 252 analysis_performed=False
168 ) 253 )
169 254
@@ -269,7 +354,7 @@ class WeiboMultilingualSentimentAnalyzer: @@ -269,7 +354,7 @@ class WeiboMultilingualSentimentAnalyzer:
269 confidence=0.0, 354 confidence=0.0,
270 probability_distribution={}, 355 probability_distribution={},
271 success=False, 356 success=False,
272 - error_message="情感分析功能不可用", 357 + error_message=self.disable_reason or "情感分析功能不可用",
273 analysis_performed=False 358 analysis_performed=False
274 ) 359 )
275 for text in texts 360 for text in texts
@@ -318,7 +403,7 @@ class WeiboMultilingualSentimentAnalyzer: @@ -318,7 +403,7 @@ class WeiboMultilingualSentimentAnalyzer:
318 results: Optional[List[SentimentResult]] = None 403 results: Optional[List[SentimentResult]] = None
319 ) -> Dict[str, Any]: 404 ) -> Dict[str, Any]:
320 """ 405 """
321 - 构建在情感分析不可用时的透传结�? 406 + 构建在情感分析不可用时的透传结
322 """ 407 """
323 total_items = len(texts) if texts is not None else len(original_data) 408 total_items = len(texts) if texts is not None else len(original_data)
324 response: Dict[str, Any] = { 409 response: Dict[str, Any] = {
@@ -400,7 +485,7 @@ class WeiboMultilingualSentimentAnalyzer: @@ -400,7 +485,7 @@ class WeiboMultilingualSentimentAnalyzer:
400 if self.is_disabled: 485 if self.is_disabled:
401 return self._build_passthrough_analysis( 486 return self._build_passthrough_analysis(
402 original_data=original_data, 487 original_data=original_data,
403 - reason="情感分析模型不可用", 488 + reason=self.disable_reason or "情感分析模型不可用",
404 texts=texts_to_analyze 489 texts=texts_to_analyze
405 ) 490 )
406 491
@@ -409,7 +494,7 @@ class WeiboMultilingualSentimentAnalyzer: @@ -409,7 +494,7 @@ class WeiboMultilingualSentimentAnalyzer:
409 batch_result = self.analyze_batch(texts_to_analyze, show_progress=True) 494 batch_result = self.analyze_batch(texts_to_analyze, show_progress=True)
410 495
411 if not batch_result.analysis_performed: 496 if not batch_result.analysis_performed:
412 - reason = "情感分析功能不可用" 497 + reason = self.disable_reason or "情感分析功能不可用"
413 if batch_result.results: 498 if batch_result.results:
414 candidate_error = next((r.error_message for r in batch_result.results if r.error_message), None) 499 candidate_error = next((r.error_message for r in batch_result.results if r.error_message), None)
415 if candidate_error: 500 if candidate_error:
@@ -486,6 +571,16 @@ class WeiboMultilingualSentimentAnalyzer: @@ -486,6 +571,16 @@ class WeiboMultilingualSentimentAnalyzer:
486 multilingual_sentiment_analyzer = WeiboMultilingualSentimentAnalyzer() 571 multilingual_sentiment_analyzer = WeiboMultilingualSentimentAnalyzer()
487 572
488 573
  574 +def enable_sentiment_analysis() -> bool:
  575 + """Public helper to enable sentiment analysis at runtime."""
  576 + return multilingual_sentiment_analyzer.enable()
  577 +
  578 +
  579 +def disable_sentiment_analysis(reason: Optional[str] = None, drop_state: bool = False) -> None:
  580 + """Public helper to disable sentiment analysis at runtime."""
  581 + multilingual_sentiment_analyzer.disable(reason=reason, drop_state=drop_state)
  582 +
  583 +
489 def analyze_sentiment(text_or_texts: Union[str, List[str]], 584 def analyze_sentiment(text_or_texts: Union[str, List[str]],
490 initialize_if_needed: bool = True) -> Union[SentimentResult, BatchSentimentResult]: 585 initialize_if_needed: bool = True) -> Union[SentimentResult, BatchSentimentResult]:
491 """ 586 """
@@ -204,17 +204,7 @@ conda activate your_conda_name @@ -204,17 +204,7 @@ conda activate your_conda_name
204 ```bash 204 ```bash
205 # Basic dependency installation 205 # Basic dependency installation
206 pip install -r requirements.txt 206 pip install -r requirements.txt
207 -  
208 -#========Below are optional========  
209 -# If you need local sentiment analysis functionality, install PyTorch  
210 -# CPU version  
211 -pip install torch torchvision torchaudio  
212 -  
213 -# CUDA 11.8 version (if you have GPU)  
214 -pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118  
215 -  
216 -# Install transformers and other AI-related dependencies  
217 -pip install transformers scikit-learn xgboost 207 +# If you do not want to use the local sentiment analysis model (which has low computational requirements and defaults to the CPU version), you can comment out the 'Machine Learning' section in this file before executing the command.
218 ``` 208 ```
219 209
220 ### 3. Install Playwright Browser Drivers 210 ### 3. Install Playwright Browser Drivers
@@ -206,17 +206,7 @@ conda activate your_conda_name @@ -206,17 +206,7 @@ conda activate your_conda_name
206 ```bash 206 ```bash
207 # 基础依赖安装 207 # 基础依赖安装
208 pip install -r requirements.txt 208 pip install -r requirements.txt
209 -  
210 -#========下面是可选项========  
211 -# 如果需要本地情感分析功能,安装PyTorch  
212 -# CPU版本  
213 -pip install torch torchvision torchaudio  
214 -  
215 -# CUDA 11.8版本(如有GPU)  
216 -pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118  
217 -  
218 -# 安装transformers等AI相关依赖  
219 -pip install transformers scikit-learn xgboost 209 +# 如果不想使用本地情感分析模型(算力需求很小,默认安装cpu版本),可以将该文件中的“机器学习”部分注释掉再执行指令
220 ``` 210 ```
221 211
222 ### 3. 安装Playwright浏览器驱动 212 ### 3. 安装Playwright浏览器驱动