streaming_config.json
3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
{
"_comment": "AIfeng/2025-07-07 09:34:55 - 流式语音识别系统配置",
"streaming_vad": {
"sample_rate": 16000,
"chunk_size": 1024,
"volume_threshold": 0.002,
"silence_duration": 2,
"min_speech_duration": 0.2,
"max_speech_duration": 15.0,
"partial_result_interval": 2.0,
"dynamic_threshold": {
"enabled": false,
"adaptation_rate": 0.1,
"min_threshold": 0.003,
"max_threshold": 0.1,
"noise_floor_samples": 50
},
"pre_buffer": {
"enabled": true,
"duration": 0.5,
"max_frames": 32
}
},
"streaming_recognition": {
"confidence_threshold": 0.6,
"max_session_duration": 30.0,
"result_merge_window": 1.0,
"auto_cleanup_interval": 60.0,
"max_partial_results": 100,
"max_final_results": 50,
"similarity_threshold": 0.8,
"duplicate_detection": {
"enabled": true,
"time_window": 2.0,
"text_similarity_threshold": 0.9
}
},
"streaming_recorder": {
"audio": {
"format": "paInt16",
"channels": 1,
"rate": 16000,
"chunk": 1024,
"input_device_index": null,
"enable_audio_gain": true,
"audio_gain": 3.0
},
"processing": {
"max_concurrent_requests": 3,
"request_timeout": 10.0,
"retry_attempts": 2,
"retry_delay": 1.0
},
"callbacks": {
"status_update_interval": 0.1,
"partial_result_debounce": 0.2,
"final_result_delay": 0.5
}
},
"asr_integration": {
"service_type": "funasr",
"connection": {
"max_retries": 3,
"retry_delay": 2.0,
"connection_timeout": 5.0,
"heartbeat_interval": 30.0
},
"request_format": {
"audio_encoding": "wav",
"sample_rate": 16000,
"language": "zh-CN",
"enable_partial_results": true,
"enable_word_confidence": true
}
},
"performance": {
"memory_management": {
"max_audio_buffer_size": 1048576,
"cleanup_interval": 30.0,
"gc_threshold": 0.8
},
"threading": {
"max_worker_threads": 4,
"thread_pool_timeout": 30.0,
"queue_max_size": 100
},
"monitoring": {
"enable_performance_metrics": true,
"metrics_interval": 10.0,
"log_slow_operations": true,
"slow_operation_threshold": 1.0
}
},
"logging": {
"level": "DEBUG",
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
"file": "logs/streaming_recognition.log",
"max_file_size": "10MB",
"backup_count": 5,
"modules": {
"streaming_vad": "DEBUG",
"streaming_recognition_manager": "DEBUG",
"streaming_recorder": "DEBUG",
"asr_client": "DEBUG"
}
},
"debug": {
"enabled": true,
"save_audio_segments": true,
"audio_segments_dir": "debug/audio_segments",
"save_recognition_results": true,
"results_dir": "debug/recognition_results",
"performance_profiling": true,
"verbose_callbacks": true
},
"experimental": {
"adaptive_thresholds": {
"enabled": false,
"learning_rate": 0.01,
"adaptation_window": 100
},
"semantic_segmentation": {
"enabled": false,
"model_path": null,
"confidence_threshold": 0.7
},
"noise_reduction": {
"enabled": false,
"algorithm": "spectral_subtraction",
"strength": 0.5
}
}
}