funasr_asr_sync.py 18.2 KB

Raw Blame History Permalink

# -*- coding: utf-8 -*-
"""
AIfeng/2025-01-02 10:27:06
FunASR语音识别模块 - 同步版本
基于eman-Fay-main-copy项目的同步实现模式
"""

from threading import Thread
import websocket
import json
import time
import ssl
import _thread as thread
import os
import asyncio
import threading

from core import get_web_instance, get_instance
from utils import config_util as cfg
from utils import util

class FunASRSync:
    """FunASR同步客户端 - 基于参考项目实现"""

    def __init__(self, username):
        self.__URL = "ws://{}:{}".format(cfg.local_asr_ip, cfg.local_asr_port)
        self.__ws = None
        self.__connected = False
        self.__frames = []
        self.__state = 0
        self.__closing = False
        self.__task_id = ''
        self.done = False
        self.finalResults = ""
        self.__reconnect_delay = 1
        self.__reconnecting = False
        self.username = username
        self.started = True
        self.__result_callback = None  # 添加结果回调

        util.log(1, f"FunASR同步客户端初始化完成，用户: {username}")

    def on_message(self, ws, message):
        """收到websocket消息的处理"""
        try:
            util.log(1, f"收到FunASR消息: {message}")

            # 尝试解析JSON消息以区分状态消息和识别结果
            try:
                import json
                parsed_message = json.loads(message)

                # 检查是否为状态消息（如分块准备消息）
                if isinstance(parsed_message, dict) and 'status' in parsed_message:
                    status = parsed_message.get('status')
                    if status == 'ready':
                        util.log(1, f"收到分块准备状态: {parsed_message.get('message', '')}")
                        return  # 状态消息不触发回调
                    elif status in ['processing', 'chunk_received']:
                        util.log(1, f"收到处理状态: {status}")
                        return  # 处理状态消息不触发回调
                    elif status == 'error':
                        util.log(3, f"收到错误状态: {parsed_message.get('message', '')}")
                        return

                # 如果是字典但不是状态消息，可能是结构化的识别结果
                if isinstance(parsed_message, dict) and 'text' in parsed_message:
                    # 结构化识别结果
                    recognition_text = parsed_message.get('text', '')
                    if recognition_text.strip():  # 只有非空结果才处理
                        self.done = True
                        self.finalResults = recognition_text
                        util.log(1, f"收到结构化识别结果: {recognition_text}")
                        self._trigger_result_callback()
                    return

            except json.JSONDecodeError:
                # 不是JSON格式，可能是纯文本识别结果
                pass

            # 处理纯文本识别结果
            if isinstance(message, str) and message.strip():
                # 过滤掉明显的状态消息
                if any(keyword in message.lower() for keyword in ['status', 'ready', '准备接收', 'processing', 'chunk']):
                    util.log(1, f"跳过状态消息: {message}")
                    return

                # 这是真正的识别结果
                self.done = True
                self.finalResults = message
                util.log(1, f"收到文本识别结果: {message}")
                self._trigger_result_callback()

        except Exception as e:
            util.log(3, f"处理识别结果时出错: {e}")

        if self.__closing:
            try:
                self.__ws.close()
            except Exception as e:
                util.log(2, f"关闭WebSocket时出错: {e}")

    def _trigger_result_callback(self):
        """触发结果回调函数"""
        if self.__result_callback:
            try:
                # 创建chat_message直接推送
                chat_message = {
                    "type":"chat_message",
                    "sender":"回音",
                    "text": self.finalResults,
                    "Username": self.username,
                    "model_info":"Funasr"
                }

                self.__result_callback(chat_message)
                util.log(1, f"已触发结果回调: {self.finalResults}")
            except Exception as e:
                util.log(3, f"调用结果回调时出错: {e}")

            # 发送到Web客户端（改进的异步调用方式）
            # try:
            #     # 先检查WSA服务是否已初始化
            #     web_instance = get_web_instance()
            #     if web_instance and web_instance.is_connected(self.username):
            #         # 创建chat_message直接推送
            #         chat_message = {
            #             "type":"chat_message",
            #             "sender":"回音",
            #             "content": self.finalResults,
            #             "Username": self.username,
            #             "model_info":"Funasr"
            #         }
            #         # 方案1: 使用add_cmd推送wsa_command类型数据
            #         # web_instance.add_cmd(chat_message)

            #         util.log(1, f"FunASR识别结果已推送到Web客户端[{self.username}]: {self.finalResults}")
            #     else:
            #         util.log(2, f"用户{self.username}未连接到Web客户端，跳过推送")
            # except RuntimeError as e:
            #     # WSA服务未初始化，这是正常情况（服务启动顺序问题）
            #     util.log(2, f"WSA服务未初始化，跳过Web客户端通知: {e}")
            # except Exception as e:
            #     util.log(3, f"发送到Web客户端时出错: {e}")

            # Human客户端通知改为日志记录（避免重复通知当前服务）
            # util.log(1, f"FunASR识别结果[{self.username}]: {self.finalResults}")

        if self.__closing:
            try:
                self.__ws.close()
            except Exception as e:
                util.log(2, f"关闭WebSocket时出错: {e}")

    def on_close(self, ws, code, msg):
        """收到websocket关闭的处理"""
        self.__connected = False
        util.log(2, f"FunASR连接关闭: {msg}")
        self.__ws = None

    def on_error(self, ws, error):
        """收到websocket错误的处理"""
        self.__connected = False
        util.log(3, f"FunASR连接错误: {error}")
        self.__ws = None

    def __attempt_reconnect(self):
        """重连机制"""
        if not self.__reconnecting:
            self.__reconnecting = True
            util.log(1, "尝试重连FunASR...")
            while not self.__connected:
                time.sleep(self.__reconnect_delay)
                self.start()
                self.__reconnect_delay *= 2
            self.__reconnect_delay = 1
            self.__reconnecting = False

    def on_open(self, ws):
        """收到websocket连接建立的处理"""
        self.__connected = True
        util.log(1, "FunASR WebSocket连接建立")

        def run(*args):
            while self.__connected:
                try:
                    if len(self.__frames) > 0:
                        frame = self.__frames[0]
                        self.__frames.pop(0)

                        if type(frame) == dict:
                            ws.send(json.dumps(frame))
                        elif type(frame) == bytes:
                            ws.send(frame, websocket.ABNF.OPCODE_BINARY)

                except Exception as e:
                    util.log(3, f"发送帧数据时出错: {e}")
                # 优化发送间隔，从0.04秒减少到0.02秒提高效率
                time.sleep(0.02)

        thread.start_new_thread(run, ())

    def __connect(self):
        """建立WebSocket连接"""
        self.finalResults = ""
        self.done = False
        self.__frames.clear()
        websocket.enableTrace(False)

        self.__ws = websocket.WebSocketApp(
            self.__URL,
            on_message=self.on_message,
            on_close=self.on_close,
            on_error=self.on_error
        )
        self.__ws.on_open = self.on_open
        self.__ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

    def add_frame(self, frame):
        """添加帧到发送队列"""
        self.__frames.append(frame)

    def send(self, buf):
        """发送音频数据"""
        self.__frames.append(buf)

    def send_url(self, url):
        """发送音频文件URL"""
        # 确保使用绝对路径，相对路径对funasr服务无效
        absolute_url = os.path.abspath(url)
        frame = {'url': absolute_url}
        if self.__ws and self.__connected:
            util.log(1, f"发送音频文件URL到FunASR: {absolute_url}")
            self.__ws.send(json.dumps(frame))
            util.log(1, f"音频文件URL已发送: {frame}")
        else:
            util.log(2, f"WebSocket未连接，无法发送URL: {absolute_url}")

    def send_audio_data(self, audio_bytes, filename="audio.wav"):
        """发送音频数据（支持大文件分块）"""
        import base64
        import math

        try:
            # 确保audio_bytes是bytes类型，避免memoryview缓冲区问题
            if hasattr(audio_bytes, 'tobytes'):
                audio_bytes = bytes(audio_bytes.tobytes())  # Fix BufferError: memoryview has 1 exported buffer
            elif isinstance(audio_bytes, memoryview):
                audio_bytes = bytes(audio_bytes)

            total_size = len(audio_bytes)

            # 大文件阈值：1MB，超过则使用分块发送
            large_file_threshold = 512 * 1024  # aiohttp限制默认1M，但再处理base64,会增加33%

            if total_size > large_file_threshold:
                util.log(1, f"检测到大文件({total_size} bytes)，使用分块发送模式")
                return self._send_audio_data_chunked(audio_bytes, filename)
            else:
                # 小文件使用原有方式
                return self._send_audio_data_simple(audio_bytes, filename)

        except Exception as e:
            util.log(3, f"发送音频数据时出错: {e}")
            return False

    def _send_audio_data_simple(self, audio_bytes, filename):
        """简单发送模式（小文件）"""
        import base64

        try:
            # 将音频字节数据编码为Base64
            audio_data_b64 = base64.b64encode(audio_bytes).decode('utf-8')

            # 构造发送格式，与funasr服务的process_audio_data函数兼容
            frame = {
                'audio_data': audio_data_b64,
                'filename': filename
            }

            if self.__ws and self.__connected:
                util.log(1, f"发送音频数据到FunASR: {filename}, 大小: {len(audio_bytes)} bytes")
                success = self._send_frame_with_retry(frame)
                if success:
                    util.log(1, f"音频数据已发送: {filename}")
                    return True
                else:
                    util.log(3, f"音频数据发送失败: {filename}")
                    return False
            else:
                util.log(2, f"WebSocket未连接，无法发送音频数据: {filename}")
                return False

        except Exception as e:
            util.log(3, f"简单发送音频数据时出错: {e}")
            return False

    def _send_audio_data_chunked(self, audio_bytes, filename, chunk_size=512*1024):
        """分块发送音频数据（大文件）"""
        import base64
        import math

        try:
            total_size = len(audio_bytes)
            total_chunks = math.ceil(total_size / chunk_size)

            util.log(1, f"开始分块发送: {filename}, 总大小: {total_size} bytes, 分块数: {total_chunks}")

            # 发送开始信号
            start_frame = {
                'type': 'audio_start',
                'filename': filename,
                'total_size': total_size,
                'total_chunks': total_chunks,
                'chunk_size': chunk_size
            }

            if not self._send_frame_with_retry(start_frame):
                util.log(3, f"发送开始信号失败: {filename}")
                return False

            # 分块发送
            for i in range(total_chunks):
                start_pos = i * chunk_size
                end_pos = min(start_pos + chunk_size, total_size)
                chunk_data = audio_bytes[start_pos:end_pos]

                # Base64编码分块
                chunk_b64 = base64.b64encode(chunk_data).decode('utf-8')

                chunk_frame = {
                    'type': 'audio_chunk',
                    'filename': filename,
                    'chunk_index': i,
                    'chunk_data': chunk_b64,
                    'is_last': (i == total_chunks - 1)
                }

                # 发送分块并检查结果
                success = self._send_frame_with_retry(chunk_frame)
                if not success:
                    util.log(3, f"分块 {i+1}/{total_chunks} 发送失败")
                    return False

                # 进度日志
                if (i + 1) % 10 == 0 or i == total_chunks - 1:
                    progress = ((i + 1) / total_chunks) * 100
                    util.log(1, f"发送进度: {progress:.1f}% ({i+1}/{total_chunks})")

                # 流控延迟
                time.sleep(0.01)

            # 发送结束信号
            end_frame = {
                'type': 'audio_end',
                'filename': filename
            }

            if self._send_frame_with_retry(end_frame):
                util.log(1, f"音频数据分块发送完成: {filename}")
                return True
            else:
                util.log(3, f"发送结束信号失败: {filename}")
                return False

        except Exception as e:
            util.log(3, f"分块发送音频数据时出错: {e}")
            return False

    def _send_frame_with_retry(self, frame, max_retries=3, timeout=10):
        """带重试的帧发送"""
        for attempt in range(max_retries):
            try:
                if self.__ws and self.__connected:
                    # 设置发送超时
                    start_time = time.time()
                    self.__ws.send(json.dumps(frame))

                    # 简单的发送确认检查
                    time.sleep(0.05)  # 等待发送完成

                    if time.time() - start_time < timeout:
                        return True
                    else:
                        util.log(2, f"发送超时，尝试 {attempt + 1}/{max_retries}")
                else:
                    util.log(2, f"连接不可用，尝试 {attempt + 1}/{max_retries}")

            except Exception as e:
                util.log(2, f"发送失败，尝试 {attempt + 1}/{max_retries}: {e}")

            if attempt < max_retries - 1:
                time.sleep(0.5 * (attempt + 1))  # 指数退避

        return False

    def set_result_callback(self, callback):
        """设置结果回调函数"""
        self.__result_callback = callback
        util.log(1, f"已设置结果回调函数")


    def connect(self):
        """连接到FunASR服务（同步版本）"""
        try:
            if not self.__connected:
                self.start()  # 调用现有的start方法

                # 等待连接建立，最多等待30秒（针对大文件处理优化）
                max_wait_time = 30.0
                wait_interval = 0.1
                waited_time = 0.0

                while not self.__connected and waited_time < max_wait_time:
                    time.sleep(wait_interval)
                    waited_time += wait_interval

                    # 每5秒输出一次等待日志
                    if waited_time % 5.0 < wait_interval:
                        util.log(1, f"等待FunASR连接中... {waited_time:.1f}s/{max_wait_time}s")

                if self.__connected:
                    util.log(1, f"FunASR连接成功，耗时: {waited_time:.2f}秒")
                else:
                    util.log(3, f"FunASR连接超时，等待了{waited_time:.2f}秒")

                return self.__connected
            return True
        except Exception as e:
            util.log(3, f"连接FunASR服务时出错: {e}")
            return False

    def start(self):
        """启动FunASR客户端"""
        Thread(target=self.__connect, args=[]).start()
        data = {
            'vad_need': False,
            'state': 'StartTranscription'
        }
        self.add_frame(data)
        util.log(1, "FunASR客户端启动")

    def is_connected(self):
        """检查连接状态"""
        return self.__connected

    def end(self):
        """结束FunASR客户端"""
        if self.__connected:
            try:
                # 发送剩余帧
                for frame in self.__frames:
                    self.__frames.pop(0)
                    if type(frame) == dict:
                        self.__ws.send(json.dumps(frame))
                    elif type(frame) == bytes:
                        self.__ws.send(frame, websocket.ABNF.OPCODE_BINARY)

                self.__frames.clear()

                # 发送停止信号
                frame = {'vad_need': False, 'state': 'StopTranscription'}
                self.__ws.send(json.dumps(frame))

            except Exception as e:
                util.log(3, f"结束FunASR时出错: {e}")

        self.__closing = True
        self.__connected = False
        util.log(1, "FunASR客户端结束")