Skip to content

Commit 8eb5e07

Browse files
author
jiangtao
committed
支持输出文字
1 parent f0e9c2e commit 8eb5e07

File tree

1 file changed

+21
-34
lines changed

1 file changed

+21
-34
lines changed

core/utils/asr.py

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -72,24 +72,33 @@ def save_audio_to_file(self, opus_data: List[bytes], session_id: str) -> str:
7272
wav_path = os.path.join(self.output_dir, f"{base_name}.wav")
7373
opus_path = os.path.join(self.output_dir, f"{base_name}.opus")
7474

75-
# 先保存原始Opus数据到临时文件
76-
with tempfile.NamedTemporaryFile(suffix='.raw', delete=False) as temp_raw:
77-
for packet in opus_data:
78-
temp_raw.write(packet)
79-
temp_raw_path = temp_raw.name
75+
# 解码Opus数据为PCM并保存为WAV文件
76+
decoder = opuslib_next.Decoder(16000, 1) # 16kHz, 单声道
77+
pcm_data = []
78+
79+
for opus_packet in opus_data:
80+
try:
81+
pcm_frame = decoder.decode(opus_packet, 960) # 960 samples = 60ms
82+
pcm_data.append(pcm_frame)
83+
except opuslib_next.OpusError as e:
84+
logger.bind(tag=TAG).error(f"Opus解码错误: {e}", exc_info=True)
85+
86+
with wave.open(wav_path, "wb") as wf:
87+
wf.setnchannels(1)
88+
wf.setsampwidth(2) # 2 bytes = 16-bit
89+
wf.setframerate(16000)
90+
wf.writeframes(b"".join(pcm_data))
8091

92+
# 使用FFmpeg将WAV转换为标准Ogg Opus文件
8193
try:
82-
# 使用FFmpeg将原始Opus数据转换为标准Ogg Opus文件
8394
cmd = [
8495
'ffmpeg',
85-
'-f', 'data', # 输入格式为原始数据
86-
'-i', temp_raw_path, # 输入文件
87-
'-c:a', 'copy', # 复制音频流,不重新编码
88-
'-f', 'opus', # 输出格式为opus
96+
'-i', wav_path, # 输入WAV文件
97+
'-c:a', 'libopus', # 使用libopus编码器
98+
'-b:a', '32k', # 比特率
8999
opus_path # 输出文件
90100
]
91101

92-
# 执行FFmpeg命令
93102
result = subprocess.run(
94103
cmd,
95104
stdout=subprocess.PIPE,
@@ -104,29 +113,7 @@ def save_audio_to_file(self, opus_data: List[bytes], session_id: str) -> str:
104113

105114
except Exception as e:
106115
logger.bind(tag=TAG).error(f"保存Ogg Opus文件失败: {e}", exc_info=True)
107-
108-
finally:
109-
# 删除临时文件
110-
if os.path.exists(temp_raw_path):
111-
os.remove(temp_raw_path)
112-
113-
# 保存WAV文件的现有逻辑
114-
decoder = opuslib_next.Decoder(16000, 1)
115-
pcm_data = []
116-
117-
for opus_packet in opus_data:
118-
try:
119-
pcm_frame = decoder.decode(opus_packet, 960) # 960 samples = 60ms
120-
pcm_data.append(pcm_frame)
121-
except opuslib_next.OpusError as e:
122-
logger.bind(tag=TAG).error(f"Opus解码错误: {e}", exc_info=True)
123-
124-
with wave.open(wav_path, "wb") as wf:
125-
wf.setnchannels(1)
126-
wf.setsampwidth(2) # 2 bytes = 16-bit
127-
wf.setframerate(16000)
128-
wf.writeframes(b"".join(pcm_data))
129-
116+
130117
return wav_path
131118

132119
def speech_to_text(self, opus_data: List[bytes], session_id: str) -> Tuple[Optional[str], Optional[str]]:

0 commit comments

Comments
 (0)