recorder_sync.py 20.9 KB

Raw Blame History Permalink

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AIfeng/2025-01-02 10:27:06
服务端音频录音模块 - 同步版本
基于eman-Fay-main-copy项目的同步实现模式
"""

import audioop
import math
import time
import threading
from abc import abstractmethod
from queue import Queue
import numpy as np
import tempfile
import wave
from typing import Optional, Callable

from funasr_asr_sync import FunASRSync
from core import get_web_instance, get_instance
from scheduler.thread_manager import MyThread
from utils import util
from utils import config_util as cfg
from core import fay_core
from core import interact

try:
    import pyaudio
    PYAUDIO_AVAILABLE = True
except ImportError:
    PYAUDIO_AVAILABLE = False
    print("警告: pyaudio未安装，服务端录音功能不可用")

# 麦克风启动时间 (秒)
_ATTACK = 0.1

# 麦克风释放时间 (秒)
_RELEASE = 0.5

class RecorderSync:
    """同步录音器 - 基于参考项目实现"""

    def __init__(self, fay):
        self.__fay = fay
        self.__running = True
        self.__processing = False
        self.__history_level = []
        self.__history_data = []
        self.__dynamic_threshold = 0.05  # 动态阈值，进一步降低以提高语音检测灵敏度

        self.__MAX_LEVEL = 25000
        self.__MAX_BLOCK = 100

        # ASR模式配置
        self.ASRMode = cfg.ASR_mode
        self.__aLiNls = None
        self.is_awake = False
        self.wakeup_matched = False

        if cfg.config['source']['wake_word_enabled']:
            self.timer = threading.Timer(60, self.reset_wakeup_status)

        self.username = 'User'  # 默认用户
        self.channels = 1
        self.sample_rate = 16000  # 固定使用16000Hz，与ASR服务匹配
        self.is_reading = False
        self.stream = None

        self.__last_ws_notify_time = 0
        self.__ws_notify_interval = 0.5  # 最小通知间隔（秒）
        self.__ws_notify_thread = None

        # PyAudio配置
        self.chunk_size = 1024
        self.device_index = None
        self.pyaudio_instance = None
        self.audio_stream = None

        # 状态变化回调
        self.status_change_callback = None

        util.log(1, f"同步录音器初始化完成: {self.sample_rate}Hz, {self.channels}ch")

    def asrclient(self):
        """创建ASR客户端"""
        if self.ASRMode == "funasr" or self.ASRMode == "sensevoice":
            asrcli = FunASRSync(self.username)
        else:
            # 可以扩展其他ASR实现
            asrcli = FunASRSync(self.username)
        return asrcli

    def save_buffer_to_file(self, buffer):
        """保存音频缓冲区到临时文件"""
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav", dir="cache_data")
        wf = wave.open(temp_file.name, 'wb')
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(16000)
        wf.writeframes(buffer)
        wf.close()
        return temp_file.name

    def __get_history_average(self, number):
        """获取历史音量平均值"""
        total = 0
        num = 0
        for i in range(len(self.__history_level) - 1, -1, -1):
            level = self.__history_level[i]
            total += level
            num += 1
            if num >= number:
                break
        return total / num if num > 0 else 0

    def __get_history_percentage(self, number):
        """获取历史音量百分比"""
        return (self.__get_history_average(number) / self.__MAX_LEVEL) * 1.05 + 0.02

    def reset_wakeup_status(self):
        """重置唤醒状态"""
        self.wakeup_matched = False
        with fay_core.auto_play_lock:
            fay_core.can_auto_play = True
        util.log(1, "唤醒状态已重置")

    def __waitingResult(self, iat, audio_data):
        """等待ASR识别结果"""
        self.__processing = True
        # 通知处理状态变化
        self._notify_status_change()

        t = time.time()
        tm = time.time()

        if self.ASRMode == "funasr" or self.ASRMode == "sensevoice":
            file_url = self.save_buffer_to_file(audio_data)
            util.log(1, f"音频数据已保存到文件: {file_url}, 大小: {len(audio_data)} bytes")
            iat.send_url(file_url)
            util.log(1, f"音频文件URL已发送到ASR客户端: {file_url}")

        # 等待结果返回
        while not iat.done and time.time() - t < 10:  # 增加超时时间
            time.sleep(0.01)

        text = iat.finalResults
        util.log(1, f"语音处理完成！耗时: {math.floor((time.time() - tm) * 1000)} ms")

        if len(text) > 0:
            if cfg.config['source']['wake_word_enabled']:
                self.__handle_wake_word(text)
            else:
                # 非唤醒模式
                self.on_speaking(text)
                self.__processing = False
                # 通知处理状态变化
                self._notify_status_change()
        else:
            self.__processing = False
            # 通知处理状态变化
            self._notify_status_change()
            util.log(1, "[!] 语音未检测到内容！")
            self.__dynamic_threshold = self.__get_history_percentage(30)

            # 通知Web客户端
            if get_web_instance().is_connected(self.username):
                import asyncio
                # 创建状态消息直接推送
                status_message = {
                    "type": "status_update",
                    "panelMsg": "",
                    'Username': self.username,
                    'robot': f'{cfg.fay_url}/robot/Normal.jpg'
                }
                # 使用直接发送方法，避免wsa_command封装
                asyncio.create_task(get_web_instance().send_direct_message(status_message))

            # Human客户端通知改为日志记录（避免重复通知当前服务）
            util.log(1, f"语音未检测到内容[{self.username}]")

    def __handle_wake_word(self, text):
        """处理唤醒词逻辑"""
        util.log(1, f"检测到语音: {text}")

        if cfg.config['source']['wake_word_type'] == 'common':
            self.__handle_common_wake_word(text)
        elif cfg.config['source']['wake_word_type'] == 'front':
            self.__handle_front_wake_word(text)

        # 通知处理状态变化
        self._notify_status_change()

    def __handle_common_wake_word(self, text):
        """处理普通唤醒模式"""
        if not self.wakeup_matched:
            wake_word = cfg.config['source']['wake_word']
            wake_word_list = wake_word.split(',')
            wake_up = False

            for word in wake_word_list:
                if word in text:
                    wake_up = True
                    break

            if wake_up:
                util.log(1, "唤醒成功！")
                self.__notify_wake_success()
                self.wakeup_matched = True

                with fay_core.auto_play_lock:
                    fay_core.can_auto_play = False

                intt = interact.Interact("auto_play", 2, {'user': self.username, 'text': "在呢，你说？"})
                self.__fay.on_interact(intt)
                self.__processing = False
                self.timer.cancel()
            else:
                util.log(1, "[!] 待唤醒！")
                self.__notify_waiting_wake()
        else:
            self.on_speaking(text)
            self.__processing = False
            self.timer.cancel()
            self.timer = threading.Timer(60, self.reset_wakeup_status)
            self.timer.start()

    def __handle_front_wake_word(self, text):
        """处理前置唤醒词模式"""
        wake_word = cfg.config['source']['wake_word']
        wake_word_list = wake_word.split(',')
        wake_up = False
        wake_up_word = ""

        for word in wake_word_list:
            if text.startswith(word):
                wake_up_word = word
                wake_up = True
                break

        if wake_up:
            util.log(1, "唤醒成功！")
            self.__notify_wake_success()

            # 去除唤醒词后的语句
            question = text
            self.__fay.sound_query = Queue()
            time.sleep(0.3)
            self.on_speaking(question)
            self.__processing = False
        else:
            util.log(1, "[!] 待唤醒！")
            self.__notify_waiting_wake()

    def __notify_wake_success(self):
        """通知唤醒成功"""
        if get_web_instance().is_connected_human(self.username):
            import asyncio
            # 创建唤醒成功消息直接推送
            wake_message = {
                "type": "status_update",
                "panelMsg": "唤醒成功！",
                "Username": self.username,
                'robot': f'{cfg.fay_url}/robot/Listening.jpg'
            }
            # 使用直接发送方法，避免wsa_command封装
            asyncio.create_task(get_web_instance().send_direct_message(wake_message))

        # Human客户端通知改为日志记录（避免重复通知当前服务）
        util.log(1, f"唤醒成功[{self.username}]")

    def __notify_waiting_wake(self):
        """通知待唤醒状态"""
        if get_web_instance().is_connected(self.username):
            import asyncio
            # 创建待唤醒状态消息直接推送
            wait_message = {
                "type": "status_update",
                "panelMsg": "[!] 待唤醒！",
                "Username": self.username,
                'robot': f'{cfg.fay_url}/robot/Normal.jpg'
            }
            # 使用直接发送方法，避免wsa_command封装
            asyncio.create_task(get_web_instance().send_direct_message(wait_message))

        # Human客户端通知改为日志记录（避免重复通知当前服务）
        util.log(1, f"待唤醒状态[{self.username}]")

    def list_audio_devices(self) -> list:
        """列出可用的音频设备"""
        if not PYAUDIO_AVAILABLE:
            return []

        devices = []
        try:
            if not self.pyaudio_instance:
                self.pyaudio_instance = pyaudio.PyAudio()

            for i in range(self.pyaudio_instance.get_device_count()):
                device_info = self.pyaudio_instance.get_device_info_by_index(i)
                if device_info['maxInputChannels'] > 0:
                    devices.append({
                        'index': i,
                        'name': device_info['name'],
                        'channels': device_info['maxInputChannels'],
                        'sample_rate': int(device_info['defaultSampleRate'])
                    })
        except Exception as e:
            util.log(3, f"列出音频设备时出错: {e}")

        return devices

    def get_stream(self):
        """获取音频流 - 子类需要实现"""
        if not PYAUDIO_AVAILABLE:
            raise ImportError("pyaudio未安装，无法创建音频流")

        try:
            if not self.pyaudio_instance:
                self.pyaudio_instance = pyaudio.PyAudio()

            # 获取设备信息并适配采样率
            device_sample_rate = self.sample_rate
            if self.device_index is not None:
                try:
                    device_info = self.pyaudio_instance.get_device_info_by_index(self.device_index)
                    device_sample_rate = int(device_info['defaultSampleRate'])
                    util.log(1, f"设备默认采样率: {device_sample_rate}Hz")

                    # 如果设备采样率与目标不同，记录但仍使用16000Hz
                    if device_sample_rate != self.sample_rate:
                        util.log(2, f"设备采样率({device_sample_rate}Hz)与目标采样率({self.sample_rate}Hz)不匹配，将使用目标采样率")
                except Exception as e:
                    util.log(2, f"获取设备信息失败: {e}，使用默认采样率")

            self.audio_stream = self.pyaudio_instance.open(
                format=pyaudio.paInt16,
                channels=self.channels,
                rate=self.sample_rate,  # 始终使用16000Hz以匹配ASR服务
                input=True,
                input_device_index=self.device_index,
                frames_per_buffer=self.chunk_size
            )

            util.log(1, f"音频流创建成功: {self.sample_rate}Hz, 设备索引: {self.device_index}")
            return self.audio_stream

        except Exception as e:
            util.log(3, f"创建音频流时出错: {e}")
            raise e

    def start_recording(self):
        """开始录音"""
        if not PYAUDIO_AVAILABLE:
            util.log(3, "pyaudio未安装，无法开始录音")
            return False

        try:
            self.__running = True
            self.get_stream()  # 创建音频流

            # 启动录音线程
            self.recording_thread = threading.Thread(target=self.__record)
            self.recording_thread.daemon = True
            self.recording_thread.start()

            util.log(1, "录音已开始")
            return True

        except Exception as e:
            util.log(3, f"开始录音时出错: {e}")
            return False

    def stop_recording(self):
        """停止录音"""
        self.__running = False

        if self.audio_stream:
            try:
                self.audio_stream.stop_stream()
                self.audio_stream.close()
            except Exception as e:
                util.log(2, f"关闭音频流时出错: {e}")
            finally:
                self.audio_stream = None

        if self.pyaudio_instance:
            try:
                self.pyaudio_instance.terminate()
            except Exception as e:
                util.log(2, f"终止PyAudio时出错: {e}")
            finally:
                self.pyaudio_instance = None

        util.log(1, "录音已停止")

    def __record(self):
        """录音主循环"""
        try:
            stream = self.get_stream()
            util.log(1, "开始录音循环")

            # 预缓冲区，用于保存语音开始前的音频数据
            pre_buffer = []
            pre_buffer_size = 10  # 保存语音开始前10个数据块

            while self.__running:
                try:
                    # 读取音频数据
                    data = stream.read(self.chunk_size, exception_on_overflow=False)

                    # 计算音量
                    level = audioop.rms(data, 2)
                    self.__history_level.append(level)

                    if len(self.__history_level) > self.__MAX_BLOCK:
                        self.__history_level.pop(0)

                    # 检测语音活动
                    threshold_value = self.__dynamic_threshold * self.__MAX_LEVEL

                    # 每100次循环输出一次调试信息
                    if hasattr(self, '_debug_counter'):
                        self._debug_counter += 1
                    else:
                        self._debug_counter = 0

                    if self._debug_counter % 100 == 0:
                        util.log(1, f"音量监控: 当前={level}, 阈值={threshold_value:.2f}, 动态阈值={self.__dynamic_threshold:.3f}")

                    # 维护预缓冲区
                    pre_buffer.append(data)
                    if len(pre_buffer) > pre_buffer_size:
                        pre_buffer.pop(0)

                    if level > threshold_value:
                        if not self.is_reading:
                            self.is_reading = True
                            # 将预缓冲区的数据加入到录音数据中，避免丢失开头
                            self.__history_data = list(pre_buffer)
                            util.log(1, f"检测到语音开始，音量: {level}, 阈值: {threshold_value:.2f}, 预缓冲: {len(pre_buffer)} 块")
                            # 通知状态变化
                            self._notify_status_change()

                        self.__history_data.append(data)
                    else:
                        if self.is_reading:
                            self.is_reading = False
                            util.log(1, f"语音结束，数据块数: {len(self.__history_data)}")
                            # 通知状态变化
                            self._notify_status_change()

                            if len(self.__history_data) > 3:  # 降低最小音频数据要求
                                audio_data = b''.join(self.__history_data)
                                util.log(1, f"开始处理音频数据，总长度: {len(audio_data)} bytes")

                                # 创建ASR客户端并处理音频
                                iat = self.asrclient()
                                iat.start()

                                # 等待连接建立
                                time.sleep(0.1)

                                # 处理音频数据
                                self.__waitingResult(iat, audio_data)

                                # 结束ASR
                                iat.end()
                            else:
                                util.log(1, f"音频数据不足，跳过处理: {len(self.__history_data)} 块")

                            self.__history_data = []

                    time.sleep(0.01)  # 短暂休眠

                except Exception as e:
                    util.log(3, f"录音循环中出错: {e}")
                    time.sleep(0.1)

        except Exception as e:
            util.log(3, f"录音主循环出错: {e}")
        finally:
            util.log(1, "录音循环结束")

    @abstractmethod
    def on_speaking(self, text):
        """语音识别结果回调 - 子类需要实现"""
        pass

    def is_recording(self):
        """检查是否正在录音"""
        return self.__running

    def get_status(self):
        """获取录音状态"""
        return {
            'is_recording': self.__running,
            'is_processing': self.__processing,
            'is_reading': self.is_reading,
            'dynamic_threshold': self.__dynamic_threshold,
            'username': self.username,
            'sample_rate': self.sample_rate,
            'device_index': self.device_index
        }

    def set_status_change_callback(self, callback):
        """设置状态变化回调函数"""
        self.status_change_callback = callback

    def _notify_status_change(self):
        """通知状态变化"""
        if self.status_change_callback:
            try:
                status = self.get_status()
                self.status_change_callback(status)
            except Exception as e:
                util.log(2, f"状态变化通知失败: {e}")

# 全局录音器实例
_global_recorder = None

def get_global_recorder():
    """获取全局录音器实例"""
    return _global_recorder

def create_global_recorder(fay):
    """创建全局录音器实例"""
    global _global_recorder
    if _global_recorder is None:
        _global_recorder = RecorderSync(fay)
    return _global_recorder

def destroy_global_recorder():
    """销毁全局录音器实例"""
    global _global_recorder
    if _global_recorder:
        _global_recorder.stop_recording()
        _global_recorder = None

if __name__ == "__main__":
    # 测试代码
    class TestFay:
        def on_interact(self, interact):
            print(f"收到交互: {interact}")

    class TestRecorder(RecorderSync):
        def on_speaking(self, text):
            print(f"识别结果: {text}")

    # 创建测试实例
    test_fay = TestFay()
    recorder = TestRecorder(test_fay)

    # 列出音频设备
    devices = recorder.list_audio_devices()
    print("可用音频设备:")
    for device in devices:
        print(f"  {device['index']}: {device['name']} ({device['channels']}ch, {device['sample_rate']}Hz)")

    # 开始录音测试
    if devices:
        print("\n开始录音测试，按Ctrl+C停止...")
        recorder.start_recording()

        try:
            while True:
                time.sleep(1)
                status = recorder.get_status()
                print(f"状态: {status}")
        except KeyboardInterrupt:
            print("\n停止录音测试")
            recorder.stop_recording()
    else:
        print("没有可用的音频设备")