Doiiars

PlatformCrawler: 初始化时提前检测MediaCrawler子模块是否完整

@@ -33,10 +33,14 @@ class PlatformCrawler: @@ -33,10 +33,14 @@ class PlatformCrawler:
33 self.supported_platforms = ['xhs', 'dy', 'ks', 'bili', 'wb', 'tieba', 'zhihu'] 33 self.supported_platforms = ['xhs', 'dy', 'ks', 'bili', 'wb', 'tieba', 'zhihu']
34 self.crawl_stats = {} 34 self.crawl_stats = {}
35 35
36 - # 确保MediaCrawler目录存在  
37 - if not self.mediacrawler_path.exists():  
38 - raise FileNotFoundError(f"MediaCrawler目录不存在: {self.mediacrawler_path}")  
39 - 36 + # 确保MediaCrawler子模块已初始化
  37 + db_config_path = self.mediacrawler_path / "config" / "db_config.py"
  38 + if not self.mediacrawler_path.exists() or not db_config_path.exists():
  39 + logger.error("MediaCrawler子模块未初始化或不完整")
  40 + logger.error("请在项目根目录运行以下命令初始化子模块:")
  41 + logger.error(" git submodule update --init --recursive")
  42 + raise FileNotFoundError("MediaCrawler子模块未初始化,请先运行: git submodule update --init --recursive")
  43 +
40 logger.info(f"初始化平台爬虫管理器,MediaCrawler路径: {self.mediacrawler_path}") 44 logger.info(f"初始化平台爬虫管理器,MediaCrawler路径: {self.mediacrawler_path}")
41 45
42 def configure_mediacrawler_db(self): 46 def configure_mediacrawler_db(self):