__init__.py 4.92 KB
# AIfeng/2025-07-11 13:36:00
"""
豆包ASR语音识别服务模块
提供完整的语音识别功能,支持流式和非流式识别
"""

__version__ = "1.0.0"
__author__ = "AIfeng"
__description__ = "豆包ASR语音识别服务模块"

# 导入核心类和函数
from .asr_client import DoubaoASRClient
from .config_manager import ConfigManager
from .service_factory import (
    DoubaoASRService,
    create_asr_service,
    recognize_file,
    recognize_audio_data,
    run_recognition
)
from .protocol import DoubaoProtocol, MessageType, MessageFlags, SerializationMethod, CompressionType
from .audio_utils import AudioProcessor
from .result_processor import (
    DoubaoResultProcessor,
    ASRResult,
    create_text_only_callback,
    extract_text_only
)

# 公共API
__all__ = [
    # 核心类
    'DoubaoASRClient',
    'DoubaoASRService',
    'ConfigManager',
    'DoubaoProtocol',
    'AudioProcessor',
    'DoubaoResultProcessor',
    'ASRResult',
    
    # 便捷函数
    'create_asr_service',
    'recognize_file',
    'recognize_audio_data',
    'run_recognition',
    'create_text_only_callback',
    'extract_text_only',
    
    # 协议常量
    'MessageType',
    'MessageFlags',
    'SerializationMethod',
    'CompressionType',
    
    # 版本信息
    '__version__',
    '__author__',
    '__description__'
]


# 快速开始示例
def get_quick_start_example() -> str:
    """
    获取快速开始示例代码
    
    Returns:
        str: 示例代码
    """
    return '''
# 豆包ASR快速开始示例

import asyncio
from asr.doubao import recognize_file, create_asr_service

# 方式1: 使用便捷函数(推荐用于简单场景)
async def simple_recognition():
    result = await recognize_file(
        audio_path="path/to/your/audio.wav",
        app_key="your_app_key",
        access_key="your_access_key",
        streaming=True
    )
    print(result)

# 方式2: 使用服务实例(推荐用于复杂场景)
async def advanced_recognition():
    # 创建服务实例
    service = create_asr_service(
        app_key="your_app_key",
        access_key="your_access_key",
        streaming=True,
        debug=True
    )
    
    # 定义结果回调函数
    def on_result(result):
        if result.get('payload_msg'):
            print(f"实时结果: {result['payload_msg']}")
    
    try:
        # 执行识别
        result = await service.recognize_file(
            "path/to/your/audio.wav",
            result_callback=on_result
        )
        print(f"最终结果: {result}")
    finally:
        await service.close()

# 方式3: 使用配置文件
async def config_based_recognition():
    result = await recognize_file(
        audio_path="path/to/your/audio.wav",
        config_path="path/to/config.json"
    )
    print(result)

# 同步方式(简单场景)
def sync_recognition():
    from asr.doubao import run_recognition
    
    result = run_recognition(
        audio_path="path/to/your/audio.wav",
        app_key="your_app_key",
        access_key="your_access_key"
    )
    print(result)

# 运行示例
if __name__ == "__main__":
    # 选择一种方式运行
    asyncio.run(simple_recognition())
    # asyncio.run(advanced_recognition())
    # asyncio.run(config_based_recognition())
    # sync_recognition()
'''


def get_config_template() -> str:
    """
    获取配置文件模板
    
    Returns:
        str: 配置文件模板
    """
    return '''
{
  "asr_config": {
    "ws_url": "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel",
    "ws_url_nostream": "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel_nostream",
    "resource_id": "volc.bigasr.sauc.duration",
    "model_name": "bigmodel",
    "enable_punc": true,
    "streaming_mode": true,
    "seg_duration": 200,
    "mp3_seg_size": 1000
  },
  "auth_config": {
    "app_key": "your_app_key_here",
    "access_key": "your_access_key_here"
  },
  "audio_config": {
    "default_format": "wav",
    "default_rate": 16000,
    "default_bits": 16,
    "default_channel": 1,
    "default_codec": "raw",
    "supported_formats": ["wav", "mp3", "pcm"]
  },
  "connection_config": {
    "max_size": 1000000000,
    "timeout": 30,
    "retry_times": 3,
    "retry_delay": 1
  },
  "logging_config": {
    "enable_debug": false,
    "log_requests": true,
    "log_responses": true
  }
}
'''


def print_info():
    """
    打印模块信息
    """
    print(f"豆包ASR语音识别服务模块 v{__version__}")
    print(f"作者: {__author__}")
    print(f"描述: {__description__}")
    print("\n支持的功能:")
    print("- 流式语音识别")
    print("- 非流式语音识别")
    print("- 多种音频格式支持 (WAV, MP3, PCM)")
    print("- 灵活的配置管理")
    print("- 异步和同步API")
    print("- 实时结果回调")
    print("\n快速开始:")
    print("from asr.doubao import recognize_file")
    print("result = await recognize_file('audio.wav', app_key='...', access_key='...')")


if __name__ == "__main__":
    print_info()