@@ -72,24 +72,33 @@ def save_audio_to_file(self, opus_data: List[bytes], session_id: str) -> str:
72
72
wav_path = os .path .join (self .output_dir , f"{ base_name } .wav" )
73
73
opus_path = os .path .join (self .output_dir , f"{ base_name } .opus" )
74
74
75
- # 先保存原始Opus数据到临时文件
76
- with tempfile .NamedTemporaryFile (suffix = '.raw' , delete = False ) as temp_raw :
77
- for packet in opus_data :
78
- temp_raw .write (packet )
79
- temp_raw_path = temp_raw .name
75
+ # 解码Opus数据为PCM并保存为WAV文件
76
+ decoder = opuslib_next .Decoder (16000 , 1 ) # 16kHz, 单声道
77
+ pcm_data = []
78
+
79
+ for opus_packet in opus_data :
80
+ try :
81
+ pcm_frame = decoder .decode (opus_packet , 960 ) # 960 samples = 60ms
82
+ pcm_data .append (pcm_frame )
83
+ except opuslib_next .OpusError as e :
84
+ logger .bind (tag = TAG ).error (f"Opus解码错误: { e } " , exc_info = True )
85
+
86
+ with wave .open (wav_path , "wb" ) as wf :
87
+ wf .setnchannels (1 )
88
+ wf .setsampwidth (2 ) # 2 bytes = 16-bit
89
+ wf .setframerate (16000 )
90
+ wf .writeframes (b"" .join (pcm_data ))
80
91
92
+ # 使用FFmpeg将WAV转换为标准Ogg Opus文件
81
93
try :
82
- # 使用FFmpeg将原始Opus数据转换为标准Ogg Opus文件
83
94
cmd = [
84
95
'ffmpeg' ,
85
- '-f' , 'data' , # 输入格式为原始数据
86
- '-i' , temp_raw_path , # 输入文件
87
- '-c:a' , 'copy' , # 复制音频流,不重新编码
88
- '-f' , 'opus' , # 输出格式为opus
96
+ '-i' , wav_path , # 输入WAV文件
97
+ '-c:a' , 'libopus' , # 使用libopus编码器
98
+ '-b:a' , '32k' , # 比特率
89
99
opus_path # 输出文件
90
100
]
91
101
92
- # 执行FFmpeg命令
93
102
result = subprocess .run (
94
103
cmd ,
95
104
stdout = subprocess .PIPE ,
@@ -104,29 +113,7 @@ def save_audio_to_file(self, opus_data: List[bytes], session_id: str) -> str:
104
113
105
114
except Exception as e :
106
115
logger .bind (tag = TAG ).error (f"保存Ogg Opus文件失败: { e } " , exc_info = True )
107
-
108
- finally :
109
- # 删除临时文件
110
- if os .path .exists (temp_raw_path ):
111
- os .remove (temp_raw_path )
112
-
113
- # 保存WAV文件的现有逻辑
114
- decoder = opuslib_next .Decoder (16000 , 1 )
115
- pcm_data = []
116
-
117
- for opus_packet in opus_data :
118
- try :
119
- pcm_frame = decoder .decode (opus_packet , 960 ) # 960 samples = 60ms
120
- pcm_data .append (pcm_frame )
121
- except opuslib_next .OpusError as e :
122
- logger .bind (tag = TAG ).error (f"Opus解码错误: { e } " , exc_info = True )
123
-
124
- with wave .open (wav_path , "wb" ) as wf :
125
- wf .setnchannels (1 )
126
- wf .setsampwidth (2 ) # 2 bytes = 16-bit
127
- wf .setframerate (16000 )
128
- wf .writeframes (b"" .join (pcm_data ))
129
-
116
+
130
117
return wav_path
131
118
132
119
def speech_to_text (self , opus_data : List [bytes ], session_id : str ) -> Tuple [Optional [str ], Optional [str ]]:
0 commit comments