init_database.py 5.26 KB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
MindSpider AI爬虫项目 - 数据库初始化脚本
用于创建项目所需的所有数据库表
"""

import os
import sys
import pymysql
from pathlib import Path

# 添加项目根目录到路径
project_root = Path(__file__).parent.parent
sys.path.append(str(project_root))

# 导入配置
try:
    import config
except ImportError:
    print("错误: 无法导入config.py配置文件")
    print("请确保config.py文件存在于项目根目录")
    sys.exit(1)

def create_database_connection():
    """创建数据库连接"""
    try:
        connection = pymysql.connect(
            host=config.DB_HOST,
            port=config.DB_PORT,
            user=config.DB_USER,
            password=config.DB_PASSWORD,
            charset=config.DB_CHARSET,
            autocommit=True
        )
        print(f"成功连接到MySQL服务器: {config.DB_HOST}:{config.DB_PORT}")
        return connection
    except Exception as e:
        print(f"连接数据库失败: {e}")
        return None

def create_database(connection):
    """创建数据库"""
    try:
        cursor = connection.cursor()
        cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{config.DB_NAME}` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci")
        cursor.execute(f"USE `{config.DB_NAME}`")
        print(f"数据库 '{config.DB_NAME}' 创建/选择成功")
        return True
    except Exception as e:
        print(f"创建数据库失败: {e}")
        return False

def execute_sql_file(connection, sql_file_path, description=""):
    """执行SQL文件"""
    if not os.path.exists(sql_file_path):
        print(f"警告: SQL文件不存在: {sql_file_path}")
        return False
    
    try:
        cursor = connection.cursor()
        with open(sql_file_path, 'r', encoding='utf-8') as f:
            sql_content = f.read()
        
        # 分割SQL语句(简单实现,按分号分割)
        sql_statements = [stmt.strip() for stmt in sql_content.split(';') if stmt.strip()]
        
        success_count = 0
        error_count = 0
        
        for stmt in sql_statements:
            if not stmt or stmt.startswith('--'):
                continue
            try:
                cursor.execute(stmt)
                success_count += 1
            except Exception as e:
                error_count += 1
                print(f"执行SQL语句失败: {str(e)[:100]}...")
        
        print(f"{description} - 成功执行: {success_count} 条语句, 失败: {error_count} 条语句")
        return error_count == 0
    
    except Exception as e:
        print(f"执行SQL文件失败 {sql_file_path}: {e}")
        return False

def main():
    """主函数"""
    print("=" * 60)
    print("MindSpider AI爬虫项目 - 数据库初始化")
    print("=" * 60)
    
    # 检查配置
    print("检查数据库配置...")
    print(f"数据库主机: {config.DB_HOST}")
    print(f"数据库端口: {config.DB_PORT}")
    print(f"数据库名称: {config.DB_NAME}")
    print(f"数据库用户: {config.DB_USER}")
    print(f"字符集: {config.DB_CHARSET}")
    print()
    
    # 创建数据库连接
    print("正在连接数据库...")
    connection = create_database_connection()
    if not connection:
        print("数据库初始化失败!")
        return False
    
    try:
        # 创建数据库
        print("正在创建/选择数据库...")
        if not create_database(connection):
            return False
        
        # 获取SQL文件路径
        schema_dir = Path(__file__).parent
        mediacrawler_sql = schema_dir.parent / "DeepSentimentCrawling" / "MediaCrawler" / "schema" / "tables.sql"
        mindspider_sql = schema_dir / "mindspider_tables.sql"
        
        print()
        print("开始执行SQL脚本...")
        
        # 1. 执行MediaCrawler的原始表结构
        if mediacrawler_sql.exists():
            print("1. 创建MediaCrawler基础表...")
            execute_sql_file(connection, str(mediacrawler_sql), "MediaCrawler基础表")
        else:
            print("警告: MediaCrawler SQL文件不存在,跳过基础表创建")
        
        # 2. 执行MindSpider扩展表结构
        print("2. 创建MindSpider扩展表...")
        if mindspider_sql.exists():
            execute_sql_file(connection, str(mindspider_sql), "MindSpider扩展表")
        else:
            print("错误: MindSpider SQL文件不存在")
            return False
        
        print()
        print("=" * 60)
        print("数据库初始化完成!")
        print("=" * 60)
        
        # 显示创建的表
        cursor = connection.cursor()
        cursor.execute("SHOW TABLES")
        tables = cursor.fetchall()
        
        print(f"数据库 '{config.DB_NAME}' 中共创建了 {len(tables)} 个表:")
        for table in tables:
            print(f"  - {table[0]}")
        
        print()
        print("数据库初始化成功完成!您现在可以开始使用MindSpider了。")
        return True
        
    except Exception as e:
        print(f"数据库初始化过程中发生错误: {e}")
        return False
    
    finally:
        if connection:
            connection.close()
            print("数据库连接已关闭")

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)