audio_recognition_app.py 41.1 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979
import tkinter as tk
from tkinter import filedialog
import websocket
import threading
import pyaudio
import wave
import os
import json
import struct
import time
import datetime

class AudioRecognitionApp:
    def __init__(self, root):
        self.root = root
        self.root.title("音频识别应用")
        self.root.geometry("500x400")
        
        self.ws = None
        self.is_recording = False
        self.is_testing_mic = False
        self.is_cache_recording = False  # 添加先存后传录音状态
        self.audio_stream = None
        self.p = pyaudio.PyAudio()
        
        # 调整音频参数
        self.sample_rate = 16000  # 采样率16kHz
        self.chunk_size = 1600    # 每个音频块100ms的数据量 (16000 * 0.1)
        self.format = pyaudio.paInt16  # 16位采样
        self.channels = 1  # 单声道
        
        # 调整静默检测参数 - 设置更合理的阈值
        self.silence_threshold = 300  # 调整静默阈值,确保能正确区分语音和背景噪音
        self.silence_frames = 0  # 连续静默帧数
        self.max_silence_frames = 20  # 2秒静默 (16000Hz / 1600帧 * 2秒)
        
        # 添加静默检测控制UI
        self.silence_threshold_var = tk.IntVar(value=self.silence_threshold)
        
        self.ws = None
        self.is_recording = False
        self.is_testing_mic = False
        self.is_cache_recording = False  # 添加先存后传录音状态
        self.audio_stream = None
        self.p = pyaudio.PyAudio()
        
        # 调整音频参数
        self.sample_rate = 16000  # 采样率16kHz
        self.chunk_size = 1600    # 每个音频块100ms的数据量 (16000 * 0.1)
        self.format = pyaudio.paInt16  # 16位采样
        self.channels = 1  # 单声道
        
        # 其他参数
        self.recording_frames = []  # 存储录音数据
        self.ws_timeout = 10  # WebSocket连接超时设置(秒)
        self.volume_gain = 5.0  # 音量增益倍数
        
        # 添加累积缓冲区和发送控制
        self.buffer_frames = []  # 累积的音频帧
        self.buffer_max_frames = 16  # 累积10帧后发送(约1秒)
        self.last_send_time = 0  # 上次发送数据的时间
        self.min_send_interval = 0.8  # 最小发送间隔(秒)
        
        # 先存后传录音相关参数
        self.cache_segment_duration = 1.0  # 每段缓存录音的时长(秒)
        self.cache_segment_frames = int(self.sample_rate * self.cache_segment_duration / self.chunk_size)  # 每段录音的帧数
        self.cache_dir = "cache"  # 缓存目录
        self.cache_files = []  # 存储缓存文件路径
        self.current_cache_frames = []  # 当前缓存段的帧
        
        # 创建缓存目录
        if not os.path.exists(self.cache_dir):
            os.makedirs(self.cache_dir)
        
        # 创建UI元素
        self.setup_ui()
        
    def setup_ui(self):
        # 调整窗口大小以适应更多内容
        self.root.geometry("600x700")  # 增加高度以容纳新控件
        
        # 创建顶部控制区域框架
        control_frame = tk.Frame(self.root)
        control_frame.pack(fill=tk.X, padx=10, pady=10)
        
        # 连接状态标签
        self.status_label = tk.Label(control_frame, text="未连接", fg="red")
        self.status_label.pack(side=tk.LEFT, padx=5)
        
        # 创建麦克风设备选择区域
        mic_frame = tk.Frame(self.root)
        mic_frame.pack(fill=tk.X, padx=10, pady=5)
        
        # 麦克风设备标签
        mic_label = tk.Label(mic_frame, text="选择麦克风设备:")
        mic_label.pack(side=tk.LEFT, padx=5)
        
        # 获取可用的音频输入设备
        self.mic_devices = []
        self.mic_device_names = []
        for i in range(self.p.get_device_count()):
            device_info = self.p.get_device_info_by_index(i)
            if device_info['maxInputChannels'] > 0:  # 只显示输入设备
                self.mic_devices.append(i)
                name = device_info['name']
                self.mic_device_names.append(f"{i}: {name}")
        
        # 默认选择第一个设备
        self.selected_mic_index = tk.StringVar()
        if self.mic_device_names:
            self.selected_mic_index.set(self.mic_device_names[0])
        
        # 麦克风设备下拉菜单
        self.mic_dropdown = tk.OptionMenu(mic_frame, self.selected_mic_index, *self.mic_device_names)
        self.mic_dropdown.pack(side=tk.LEFT, padx=5)
        
        # 刷新麦克风列表按钮
        refresh_button = tk.Button(mic_frame, text="刷新设备列表", command=self.refresh_mic_devices)
        refresh_button.pack(side=tk.LEFT, padx=5)
        
        # 添加音量增益控制区域
        gain_frame = tk.Frame(self.root)
        gain_frame.pack(fill=tk.X, padx=10, pady=5)
        
        gain_label = tk.Label(gain_frame, text="音量增益:")
        gain_label.pack(side=tk.LEFT, padx=5)
        
        self.gain_value = tk.DoubleVar(value=self.volume_gain)
        self.gain_slider = tk.Scale(gain_frame, from_=1.0, to=10.0, resolution=0.5, 
                                   orient=tk.HORIZONTAL, length=200, 
                                   variable=self.gain_value, command=self.update_gain)
        self.gain_slider.pack(side=tk.LEFT, padx=5)
        self.gain_label = tk.Label(gain_frame, text=f"{self.volume_gain:.1f}x")
        self.gain_label.pack(side=tk.LEFT, padx=5)
        
        # 连接按钮
        self.connect_button = tk.Button(control_frame, text="连接WebSocket", command=self.connect_websocket)
        self.connect_button.pack(side=tk.LEFT, padx=5)
        
        # 断开连接按钮
        self.disconnect_button = tk.Button(control_frame, text="断开连接", command=self.disconnect_websocket, state=tk.DISABLED)
        self.disconnect_button.pack(side=tk.LEFT, padx=5)
        
        # 创建操作区域框架
        operation_frame = tk.Frame(self.root)
        operation_frame.pack(fill=tk.X, padx=10, pady=5)
        
        # 上传文件按钮
        self.upload_button = tk.Button(operation_frame, text="上传音频文件", command=self.upload_audio_file, state=tk.DISABLED)
        self.upload_button.pack(side=tk.LEFT, padx=5)
        
        # 麦克风录音按钮
        self.mic_button = tk.Button(operation_frame, text="开始麦克风录音", command=self.toggle_microphone, state=tk.DISABLED)
        self.mic_button.pack(side=tk.LEFT, padx=5)
        
        # 麦克风测试按钮
        self.test_mic_button = tk.Button(operation_frame, text="测试麦克风", command=self.toggle_test_microphone)
        self.test_mic_button.pack(side=tk.LEFT, padx=5)
        
        # 添加"先存后传录音"按钮
        self.cache_record_button = tk.Button(operation_frame, text="先存后传录音", command=self.toggle_cache_recording, state=tk.DISABLED)
        self.cache_record_button.pack(side=tk.LEFT, padx=5)
        
        # 创建客户端操作日志区域
        client_log_frame = tk.LabelFrame(self.root, text="客户端操作日志")
        client_log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
        
        self.client_log_text = tk.Text(client_log_frame, height=6, width=60)
        self.client_log_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
        client_scroll = tk.Scrollbar(self.client_log_text)
        client_scroll.pack(side=tk.RIGHT, fill=tk.Y)
        self.client_log_text.config(yscrollcommand=client_scroll.set)
        client_scroll.config(command=self.client_log_text.yview)
        
        # 创建服务端识别结果区域
        server_result_frame = tk.LabelFrame(self.root, text="服务端识别结果")
        server_result_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
        
        self.result_text = tk.Text(server_result_frame, height=8, width=60)
        self.result_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
        result_scroll = tk.Scrollbar(self.result_text)
        result_scroll.pack(side=tk.RIGHT, fill=tk.Y)
        self.result_text.config(yscrollcommand=result_scroll.set)
        result_scroll.config(command=self.result_text.yview)
        
        # 添加音量增益控制区域后,添加静默阈值控制
        silence_frame = tk.Frame(self.root)
        silence_frame.pack(fill=tk.X, padx=10, pady=5)
        
        silence_label = tk.Label(silence_frame, text="静默阈值:")
        silence_label.pack(side=tk.LEFT, padx=5)
        
        self.silence_slider = tk.Scale(silence_frame, from_=100, to=2000, resolution=50, 
                                      orient=tk.HORIZONTAL, length=200, 
                                      variable=self.silence_threshold_var, command=self.update_silence_threshold)
        self.silence_slider.pack(side=tk.LEFT, padx=5)
        self.silence_value_label = tk.Label(silence_frame, text=f"{self.silence_threshold}")
        self.silence_value_label.pack(side=tk.LEFT, padx=5)
        
    def connect_websocket(self):
        def on_message(ws, message):
            try:
                self.update_result(f"收到WebSocket消息,长度:{len(message)}字节")
                result = json.loads(message)
                if "text" in result:
                    # 将服务端返回的识别结果显示在结果区域
                    self.update_result(result["text"], is_server_result=True)
                    # 在客户端日志中记录收到结果
                    self.update_result(f"收到识别结果: {result['text'][:20]}{'...' if len(result['text']) > 20 else ''}")
                elif "status" in result:
                    # 服务端状态消息显示在客户端日志中
                    self.update_result(f"服务端状态: {result['status']} - {result.get('message', '')}")
                else:
                    # 其他消息显示在客户端日志中
                    self.update_result(f"收到消息: {message}")
            except json.JSONDecodeError:
                self.update_result(f"收到非JSON消息: {message}")
            except Exception as e:
                self.update_result(f"处理WebSocket消息时发生异常:{str(e)}")
        
        def on_error(ws, error):
            self.update_status("连接错误", "red")
            self.update_result(f"WebSocket错误: {error}")
            print(f"错误: {error}")
        
        def on_close(ws, close_status_code, close_msg):
            self.update_status("连接已关闭", "red")
            self.update_result(f"WebSocket连接已关闭,状态码:{close_status_code},消息:{close_msg}")
            self.upload_button.config(state=tk.DISABLED)
            self.mic_button.config(state=tk.DISABLED)
            self.cache_record_button.config(state=tk.DISABLED)  # 禁用先存后传录音按钮
            self.disconnect_button.config(state=tk.DISABLED)
            self.connect_button.config(state=tk.NORMAL)
            print("连接已关闭")
        
        def on_open(ws):
            self.update_status("已连接", "green")
            self.update_result("WebSocket连接已建立,准备发送/接收数据")
            self.upload_button.config(state=tk.NORMAL)
            self.mic_button.config(state=tk.NORMAL)
            self.cache_record_button.config(state=tk.NORMAL)  # 启用先存后传录音按钮
            self.disconnect_button.config(state=tk.NORMAL)
            self.connect_button.config(state=tk.DISABLED)
            print("连接已建立")
        
        try:
            # 如果已经有连接,先关闭
            if self.ws:
                self.disconnect_websocket()
            # "ws://sitigrs.boeart.cn/jeecg-boot/api/speech/recognize"    # 
            self.ws = websocket.WebSocketApp("ws://localhost:10197",
                                            on_open=on_open,
                                            on_message=on_message,
                                            on_error=on_error,
                                            on_close=on_close)
            
            wst = threading.Thread(target=lambda: self.ws.run_forever(ping_interval=30, ping_timeout=10, ping_payload="ping"))
            wst.daemon = True
            wst.start()
            
            self.update_status("正在连接...", "orange")
        except Exception as e:
            self.update_status(f"连接失败: {str(e)}", "red")
    
    def update_gain(self, value):
        """更新音量增益值"""
        self.volume_gain = float(value)
        self.gain_label.config(text=f"{self.volume_gain:.1f}x")
        self.update_result(f"音量增益已调整为: {self.volume_gain:.1f}x")
    
    def update_silence_threshold(self, value):
        """更新静默阈值"""
        self.silence_threshold = int(value)
        self.silence_value_label.config(text=f"{self.silence_threshold}")
        self.update_result(f"静默阈值已调整为: {self.silence_threshold}")
    
    def upload_audio_file(self):
        file_path = filedialog.askopenfilename(
            filetypes=[("音频文件", "*.wav *.mp3")]
        )
        
        if file_path:
            try:
                if file_path.endswith('.wav'):
                    with open(file_path, 'rb') as audio_data:
                        audio_data = audio_data.read()
                        if self.ws and self.ws.sock:
                            self.ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
                            self.update_result(f"已发送文件: {os.path.basename(file_path)}")
                else:
                    self.update_result("目前只支持WAV格式文件")
            except Exception as e:
                self.update_result(f"发送文件错误: {str(e)}")
    
    def toggle_microphone(self):
        if not self.is_recording:
            self.start_recording()
            self.mic_button.config(text="停止麦克风录音")
        else:
            self.stop_recording()
            self.mic_button.config(text="开始麦克风录音")
    
    def toggle_test_microphone(self):
        if not self.is_testing_mic:
            self.start_test_recording()
            self.test_mic_button.config(text="停止麦克风测试")
        else:
            self.stop_test_recording()
            self.test_mic_button.config(text="测试麦克风")
    
    def start_recording(self):
        self.is_recording = True
        self.recording_frames = []  # 重置录音帧
        self.buffer_frames = []     # 重置缓冲区
        self.silence_frames = 0     # 重置静默计数
        self.last_send_time = time.time()
        
        # 获取选中的麦克风设备索引
        selected_device = self.selected_mic_index.get()
        if selected_device:
            device_index = int(selected_device.split(":")[0])
            self.update_result(f"使用麦克风设备: {selected_device}")
        else:
            device_index = None  # 使用系统默认设备
            self.update_result("使用系统默认麦克风设备")
        
        self.update_result("开始录音...")
        
        def audio_callback(in_data, frame_count, time_info, status):
            if self.is_recording:
                # 确保使用原始字节数据
                audio_data = in_data
                
                # 计算RMS值用于音量显示和静默检测
                rms = self.calculate_rms(audio_data)
                
                # 应用音量增益
                amplified_data = self.apply_volume_gain(audio_data)
                
                # 更新音量指示器
                self.update_volume_indicator(rms)
                
                # 存储放大后的录音数据 - 始终保存到录音帧中,不再重置
                self.recording_frames.append(amplified_data)
                
                # 添加到发送缓冲区
                self.buffer_frames.append(amplified_data)
                
                # 静默检测 - 只用于显示,不再中断录音或发送结束命令
                if rms < self.silence_threshold:
                    self.silence_frames += 1
                else:
                    self.silence_frames = 0
                
                # 当累积足够的帧数或距离上次发送已经过了足够时间时发送数据
                current_time = time.time()
                if (len(self.buffer_frames) >= self.buffer_max_frames or 
                    (current_time - self.last_send_time >= self.min_send_interval and self.buffer_frames)):
                    
                    buffer_data = b''.join(self.buffer_frames)
                    buffer_size = len(buffer_data)
                    
                    if buffer_size > 0:
                        self.send_audio_data(buffer_data)
                        self.last_send_time = current_time
                    
                    # 清空缓冲区,但不清空录音帧
                    self.buffer_frames = []
            
            return (in_data, pyaudio.paContinue)
        
        # 打开音频流
        self.audio_stream = self.p.open(
            format=self.format,
            channels=self.channels,
            rate=self.sample_rate,
            input=True,
            output=False,
            frames_per_buffer=self.chunk_size,
            input_device_index=device_index,
            stream_callback=audio_callback
        )
        
        self.audio_stream.start_stream()
        self.update_result("开始录音...")
    
    def start_test_recording(self):
        self.is_testing_mic = True
        self.recording_frames = []
        
        # 获取选中的麦克风设备索引
        selected_device = self.selected_mic_index.get()
        if selected_device:
            device_index = int(selected_device.split(":")[0])
            self.update_result(f"测试麦克风设备: {selected_device}")
        else:
            device_index = None  # 使用系统默认设备
            self.update_result("测试系统默认麦克风设备")
        
        def audio_callback(in_data, frame_count, time_info, status):
            if self.is_testing_mic:
                # 计算音量,用于显示音量指示
                audio_data = in_data
                rms = self.calculate_rms(audio_data)
                
                # 应用音量增益
                amplified_data = self.apply_volume_gain(audio_data)
                
                # 存储放大后的录音数据
                self.recording_frames.append(amplified_data)
                
                # 更新音量指示器
                self.update_volume_indicator(rms)
            return (in_data, pyaudio.paContinue)
        
        # 打开音频流
        self.audio_stream = self.p.open(
            format=self.format,
            channels=self.channels,
            rate=self.sample_rate,
            input=True,
            output=False,
            frames_per_buffer=self.chunk_size,
            input_device_index=device_index,
            stream_callback=audio_callback
        )
        
        self.audio_stream.start_stream()
        self.update_result("麦克风测试已开始")
    
    def update_volume_indicator(self, rms):
        """更新音量指示器"""
        # 创建一个更直观的音量指示
        volume_level = min(int(rms / 10), 100)  # 将音量标准化到0-100范围
        volume_text = "█" * (volume_level // 5)  # 每5个单位显示一个方块
        
        # 根据音量大小设置不同颜色
        if rms > self.silence_threshold:
            color = "green"
            status = "检测到声音"
        else:
            color = "orange"
            status = "静默"
        
        # 更新状态标签
        self.root.after(0, lambda: self.status_label.config(
            text=f"{status}: {int(rms)} [{volume_text}]", 
            fg=color
        ))
        
        # 在日志中记录较大的音量变化(避免过多日志)
        if rms > self.silence_threshold * 2 and volume_level % 20 == 0:
            self.update_result(f"当前音量: {int(rms)}")
    
    def stop_recording(self):
        # 先标记录音状态为False,防止回调函数继续处理
        self.is_recording = False
        
        # 安全关闭音频流
        if self.audio_stream:
            try:
                # 检查流是否打开
                if self.audio_stream.is_active():
                    self.audio_stream.stop_stream()
                self.audio_stream.close()
            except OSError as e:
                # 捕获可能的超时或流已关闭错误
                self.update_result(f"关闭音频流时出现错误: {str(e)}")
            finally:
                self.audio_stream = None
        
        # 发送剩余缓冲区数据
        if self.buffer_frames and self.ws and self.ws.sock and self.ws.sock.connected:
            buffer_data = b''.join(self.buffer_frames)
            if len(buffer_data) > 0:
                self.send_audio_data(buffer_data)
            self.buffer_frames = []
        
        # 发送结束命令
        if self.ws and self.ws.sock and self.ws.sock.connected:
            try:
                self.ws.send(json.dumps({"command": "end"}), websocket.ABNF.OPCODE_TEXT)
                self.update_result("已发送结束命令")
            except Exception as e:
                self.update_result(f"发送结束命令时出现错误: {str(e)}")
        
        # 保存完整录音 - 只在停止录音时保存一次
        if self.recording_frames:
            filename = self.save_recording()
            self.update_result(f"已保存完整录音: {filename}")
            self.recording_frames = []  # 保存后清空录音帧
        
        self.update_result("录音已停止")
    
    def stop_test_recording(self):
        # 先标记测试状态为False,防止回调函数继续处理
        self.is_testing_mic = False
        
        # 安全关闭音频流
        if self.audio_stream:
            try:
                # 检查流是否打开
                if self.audio_stream.is_active():
                    self.audio_stream.stop_stream()
                self.audio_stream.close()
            except OSError as e:
                # 捕获可能的超时或流已关闭错误
                self.update_result(f"关闭测试音频流时出现错误: {str(e)}")
            finally:
                self.audio_stream = None
        
        # 保存测试录音
        if self.recording_frames:
            filename = self.save_recording("test_recording")
            self.update_result(f"测试录音已保存到: {filename}")
            self.recording_frames = []
        
        self.update_result("麦克风测试已停止")
    
    def update_status(self, text, color):
        self.status_label.config(text=text, fg=color)
    
    def update_result(self, text, is_server_result=False):
        """更新结果显示
        
        Args:
            text: 要显示的文本
            is_server_result: 是否为服务端返回的识别结果
        """
        if is_server_result:
            # 服务端识别结果显示在结果区域
            self.root.after(0, lambda: self.result_text.insert(tk.END, text + "\n"))
            self.root.after(0, lambda: self.result_text.see(tk.END))
        else:
            # 客户端操作日志显示在日志区域
            self.root.after(0, lambda: self.client_log_text.insert(tk.END, text + "\n"))
            self.root.after(0, lambda: self.client_log_text.see(tk.END))
    
    def calculate_rms(self, data):
        """计算音频数据的RMS值,用于检测静默"""
        try:
            # 将字节数据转换为16位整数数组
            count = len(data) // 2
            if count == 0:
                self.update_result("警告:计算RMS时收到空数据")
                return 0
                
            format = f"{count}h"
            shorts = struct.unpack(format, bytes(data))
            
            # 计算RMS值
            sum_squares = 0
            for sample in shorts:
                sum_squares += sample * sample
            
            rms_value = (sum_squares / count) ** 0.5 if count > 0 else 0
            return rms_value
        except Exception as e:
            self.update_result(f"错误:计算RMS值时发生异常:{str(e)}")
            return 0
        
    def send_audio_data(self, audio_data):
        """发送音频数据到WebSocket服务器"""
        if not self.ws or not self.ws.sock or not self.ws.sock.connected:
            self.update_result("WebSocket未连接,无法发送数据")
            return
        
        data_size = len(audio_data)
        if data_size == 0:
            self.update_result("警告:尝试发送空数据")
            return
        
        # 计算RMS值用于日志
        rms = self.calculate_rms(audio_data)
        
        # 记录发送信息
        self.update_result(f"发送WebSocket数据:{data_size}字节,RMS值:{int(rms)}")
        
        # 记录数据前几个字节用于调试
        if data_size > 10:
            first_bytes = audio_data[:10]
            self.update_result(f"数据前10字节:{first_bytes}")
        
        try:
            # 发送数据
            result = self.ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
            self.update_result(f"WebSocket发送结果:{result}")
        except Exception as e:
            self.update_result(f"发送数据错误: {str(e)}")
    
    def apply_volume_gain(self, data):
        """应用音量增益到音频数据"""
        # 将字节数据转换为16位整数数组
        count = len(data) // 2
        if count == 0:
            self.update_result("错误:应用音量增益时收到空数据")
            return data
            
        format = f"{count}h"
        try:
            shorts = struct.unpack(format, bytes(data))
            
            # 检查是否全是0或接近0的值
            is_silent = True
            for sample in shorts[:100]:  # 检查前100个样本
                if abs(sample) > 10:  # 非零阈值
                    is_silent = False
                    break
                    
            if is_silent:
                # 记录警告但仍处理数据
                if len(self.recording_frames) % 50 == 0:  # 减少日志频率
                    self.update_result("警告:检测到可能的静默数据")
            
            # 计算原始数据的RMS值
            original_rms = 0
            for sample in shorts:
                original_rms += sample * sample
            original_rms = (original_rms / count) ** 0.5 if count > 0 else 0
            
            # 应用音量增益
            amplified_shorts = []
            for sample in shorts:
                # 应用增益并确保不超过16位整数范围
                amplified_sample = int(sample * self.volume_gain)
                if amplified_sample > 32767:
                    amplified_sample = 32767
                elif amplified_sample < -32768:
                    amplified_sample = -32768
                amplified_shorts.append(amplified_sample)
            
            # 计算放大后数据的RMS值
            amplified_rms = 0
            for sample in amplified_shorts:
                amplified_rms += sample * sample
            amplified_rms = (amplified_rms / count) ** 0.5 if count > 0 else 0
            
            # 每16帧记录一次增益效果,避免日志过多
            if len(self.recording_frames) % 16 == 0:
                self.update_result(f"音量增益:原始RMS={int(original_rms)} -> 放大后RMS={int(amplified_rms)},增益倍数={self.volume_gain:.1f}x")
            
            # 将放大后的数据打包回字节数组
            result = struct.pack(format, *amplified_shorts)
            return result
        except Exception as e:
            self.update_result(f"错误:应用音量增益时发生异常:{str(e)}")
            return data  # 出错时返回原始数据
    
    def save_recording(self, prefix="recording"):
        """保存录音数据到WAV文件"""
        if not self.recording_frames:
            self.update_result("没有录音数据可保存")
            return None
        
        # 合并所有帧
        audio_data = b''.join(self.recording_frames)
        data_size = len(audio_data)
        frame_count = data_size // (2 * self.channels)  # 16位采样,每帧2字节
        duration = frame_count / self.sample_rate  # 计算录音时长(秒)
        
        # 如果数据太少,给出警告但仍然保存
        if data_size < 8000:  # 少于0.5秒的数据
            self.update_result(f"警告:录音数据较短({data_size}字节,约{duration:.2f}秒)")
        
        # 创建时间戳文件名
        timestamp = time.strftime("%Y%m%d-%H%M%S")
        filename = f"{prefix}_{timestamp}.wav"
        
        self.update_result(f"准备保存录音文件:{filename},数据大小:{data_size}字节,时长:{duration:.1f}秒")
        
        # 检查数据有效性
        if data_size == 0:
            self.update_result("错误:没有有效的音频数据可保存")
            return None
            
        # 保存为WAV文件
        try:
            wf = wave.open(filename, 'wb')
            wf.setnchannels(self.channels)
            wf.setsampwidth(self.p.get_sample_size(self.format))
            wf.setframerate(self.sample_rate)
            wf.writeframes(audio_data)
            wf.close()
            
            file_size = os.path.getsize(filename)
            self.update_result(f"录音文件已保存:{filename},文件大小:{file_size}字节")
            
            return filename
        except Exception as e:
            self.update_result(f"保存录音文件时出错:{str(e)}")
            return None
    
    def disconnect_websocket(self):
        if self.ws:
            self.ws.close()
            self.ws = None
            self.update_status("已断开连接", "red")
            self.upload_button.config(state=tk.DISABLED)
            self.mic_button.config(state=tk.DISABLED)
            self.cache_record_button.config(state=tk.DISABLED)  # 禁用先存后传录音按钮
            self.disconnect_button.config(state=tk.DISABLED)
            self.connect_button.config(state=tk.NORMAL)
            # 清空录音状态
            if self.is_recording:
                self.stop_recording()
            if self.is_cache_recording:  # 如果正在进行先存后传录音,也需要停止
                self.stop_cache_recording()
            self.update_result("已断开与服务器的连接")
    
    def refresh_mic_devices(self):
        """刷新麦克风设备列表"""
        # 清空当前设备列表
        self.mic_devices = []
        self.mic_device_names = []
        
        # 重新获取设备列表
        for i in range(self.p.get_device_count()):
            device_info = self.p.get_device_info_by_index(i)
            if device_info['maxInputChannels'] > 0:  # 只显示输入设备
                self.mic_devices.append(i)
                name = device_info['name']
                self.mic_device_names.append(f"{i}: {name}")
        
        # 更新下拉菜单
        menu = self.mic_dropdown["menu"]
        menu.delete(0, "end")
        for name in self.mic_device_names:
            menu.add_command(label=name, command=lambda value=name: self.selected_mic_index.set(value))
        
        # 如果有设备,选择第一个
        if self.mic_device_names:
            self.selected_mic_index.set(self.mic_device_names[0])
            self.update_result(f"已刷新麦克风设备列表,找到 {len(self.mic_device_names)} 个设备")
        else:
            self.update_result("未找到麦克风设备")
    
    def on_closing(self):
        if self.is_recording:
            self.stop_recording()
        
        if self.is_testing_mic:
            self.stop_test_recording()
        
        if self.is_cache_recording:  # 添加对先存后传录音的处理
            self.stop_cache_recording()
        
        if self.ws:
            self.ws.close()
        
        self.p.terminate()
        self.root.destroy()
        
    def toggle_cache_recording(self):
        """切换先存后传录音状态"""
        if not self.is_cache_recording:
            self.start_cache_recording()
            self.cache_record_button.config(text="停止先存后传")
        else:
            self.stop_cache_recording()
            self.cache_record_button.config(text="先存后传录音")
    
    def start_cache_recording(self):
        """开始先存后传录音"""
        self.is_cache_recording = True
        self.current_cache_frames = []  # 重置当前缓存段
        self.cache_files = []  # 重置缓存文件列表
        self.silence_frames = 0  # 重置静默计数
        
        # 清空缓存目录中的旧文件
        for file in os.listdir(self.cache_dir):
            if file.endswith(".wav"):
                try:
                    os.remove(os.path.join(self.cache_dir, file))
                except Exception as e:
                    self.update_result(f"清理缓存文件时出错: {str(e)}")
        
        # 获取选中的麦克风设备索引
        selected_device = self.selected_mic_index.get()
        if selected_device:
            device_index = int(selected_device.split(":")[0])
            self.update_result(f"使用麦克风设备: {selected_device}")
        else:
            device_index = None  # 使用系统默认设备
            self.update_result("使用系统默认麦克风设备")
        
        self.update_result(f"开始先存后传录音... 静默阈值: {self.silence_threshold}")
        
        def audio_callback(in_data, frame_count, time_info, status):
            if self.is_cache_recording:
                # 确保使用原始字节数据
                audio_data = in_data
                
                # 计算RMS值用于音量显示和静默检测
                rms = self.calculate_rms(audio_data)
                
                # 应用音量增益
                amplified_data = self.apply_volume_gain(audio_data)
                
                # 更新音量指示器
                self.update_volume_indicator(rms)
                
                # 静默检测
                is_silent = rms < self.silence_threshold
                
                if is_silent:
                    self.silence_frames += 1
                    if self.silence_frames % 10 == 0:  # 每10帧记录一次静默状态
                        self.update_result(f"缓存录音检测到静默: {self.silence_frames}帧, RMS={int(rms)}")
                else:
                    if self.silence_frames > 0:
                        self.update_result(f"缓存录音静默结束,持续了{self.silence_frames}帧")
                    self.silence_frames = 0
                
                # 添加到当前缓存段 - 无论是否静默都保存,但标记静默状态
                frame_info = {
                    'data': amplified_data,
                    'is_silent': is_silent,
                    'rms': rms
                }
                self.current_cache_frames.append(frame_info)
                
                # 当累积足够的帧数时,保存为一个缓存文件
                if len(self.current_cache_frames) >= self.cache_segment_frames:
                    cache_filename = self.save_cache_segment()
                    if cache_filename:
                        self.cache_files.append(cache_filename)
                        # 如果WebSocket已连接,立即发送该文件(只有非静默文件才发送)
                        if not self.is_segment_silent(self.current_cache_frames):
                            self.send_cache_file(cache_filename)
                        else:
                            self.update_result(f"缓存段静默比例过高,不发送: {os.path.basename(cache_filename)}")
                    # 重置当前缓存段
                    self.current_cache_frames = []
            
            return (in_data, pyaudio.paContinue)
        
        # 打开音频流
        self.audio_stream = self.p.open(
            format=self.format,
            channels=self.channels,
            rate=self.sample_rate,
            input=True,
            output=False,
            frames_per_buffer=self.chunk_size,
            input_device_index=device_index,
            stream_callback=audio_callback
        )
        
        self.audio_stream.start_stream()
    
    def save_cache_segment(self):
        """保存当前缓存段为WAV文件"""
        if not self.current_cache_frames:
            return None
        
        # 提取音频数据
        audio_data_list = [frame['data'] for frame in self.current_cache_frames]
        
        # 合并所有帧
        audio_data = b''.join(audio_data_list)
        data_size = len(audio_data)
        
        # 如果数据太少,不保存
        if data_size < 1000:  # 少于约60ms的数据
            self.update_result("缓存段数据太少,不保存")
            return None
        
        # 创建时间戳文件名
        timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")[:19]  # 精确到毫秒
        
        # 判断是否为静默段
        is_silent = self.is_segment_silent(self.current_cache_frames)
        prefix = "silent" if is_silent else "cache"
        
        filename = os.path.join(self.cache_dir, f"{prefix}_{timestamp}.wav")
        
        # 保存为WAV文件
        try:
            wf = wave.open(filename, 'wb')
            wf.setnchannels(self.channels)
            wf.setsampwidth(self.p.get_sample_size(self.format))
            wf.setframerate(self.sample_rate)
            wf.writeframes(audio_data)
            wf.close()
            
            file_size = os.path.getsize(filename)
            duration = len(self.current_cache_frames) * self.chunk_size / self.sample_rate
            
            status = "静默" if is_silent else "有声音"
            self.update_result(f"已保存{status}缓存段: {filename}, 大小: {file_size}字节, 时长: {duration:.2f}秒")
            
            return filename
        except Exception as e:
            self.update_result(f"保存缓存段时出错: {str(e)}")
            return None
    
    # 添加缺失的is_segment_silent方法
    def is_segment_silent(self, frames):
        """判断一个缓存段是否大部分是静默"""
        if not frames:
            return True
            
        silent_frames = sum(1 for frame in frames if frame['is_silent'])
        silent_ratio = silent_frames / len(frames)
        
        # 如果静默帧占比超过70%,则认为整个段是静默的
        is_silent = silent_ratio > 0.7
        
        if is_silent:
            avg_rms = sum(frame['rms'] for frame in frames) / len(frames)
            self.update_result(f"缓存段静默比例: {silent_ratio:.2f}, 平均RMS: {int(avg_rms)}")
            
        return is_silent
    
    def send_cache_file(self, filename):
        """发送缓存文件到WebSocket服务器"""
        if not self.ws or not self.ws.sock or not self.ws.sock.connected:
            self.update_result(f"WebSocket未连接,无法发送缓存文件: {filename}")
            return False
        
        try:
            with open(filename, 'rb') as f:
                audio_data = f.read()
                
            # 发送文件数据
            self.update_result(f"发送缓存文件: {os.path.basename(filename)}, 大小: {len(audio_data)}字节")
            self.ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
            return True
        except Exception as e:
            self.update_result(f"发送缓存文件时出错: {str(e)}")
            return False
    
    def stop_cache_recording(self):
        """停止先存后传录音"""
        # 先标记录音状态为False,防止回调函数继续处理
        self.is_cache_recording = False
        
        # 安全关闭音频流
        if self.audio_stream:
            try:
                if self.audio_stream.is_active():
                    self.audio_stream.stop_stream()
                self.audio_stream.close()
            except OSError as e:
                self.update_result(f"关闭音频流时出现错误: {str(e)}")
            finally:
                self.audio_stream = None
        
        # 保存最后一个缓存段
        if self.current_cache_frames:
            cache_filename = self.save_cache_segment()
            if cache_filename:
                self.cache_files.append(cache_filename)
                # 如果WebSocket已连接,发送该文件
                self.send_cache_file(cache_filename)
        
        # 发送结束命令
        if self.ws and self.ws.sock and self.ws.sock.connected:
            try:
                self.ws.send(json.dumps({"command": "end"}), websocket.ABNF.OPCODE_TEXT)
                self.update_result("已发送结束命令")
            except Exception as e:
                self.update_result(f"发送结束命令时出现错误: {str(e)}")
        
        # 显示录音统计信息
        if self.cache_files:
            total_size = sum(os.path.getsize(f) for f in self.cache_files)
            self.update_result(f"先存后传录音已完成,共{len(self.cache_files)}个文件,总大小: {total_size}字节")
        else:
            self.update_result("先存后传录音已完成,但没有保存任何文件")
        
        self.current_cache_frames = []


if __name__ == "__main__":
    root = tk.Tk()
    app = AudioRecognitionApp(root)
    root.protocol("WM_DELETE_WINDOW", app.on_closing)
    root.mainloop()