Skip to content

Commit add3fb4

Browse files
author
jiangtao
committed
支持输出文字
1 parent 60752f6 commit add3fb4

File tree

1 file changed

+28
-5
lines changed

1 file changed

+28
-5
lines changed

core/utils/asr.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from funasr import AutoModel
1515
from funasr.utils.postprocess_utils import rich_transcription_postprocess
1616

17+
import struct
18+
1719
TAG = __name__
1820
logger = setup_logging()
1921

@@ -63,19 +65,40 @@ def __init__(self, config: dict, delete_audio_file: bool):
6365
)
6466

6567
def save_audio_to_file(self, opus_data: List[bytes], session_id: str) -> str:
66-
"""将Opus音频数据解码并保存为WAV文件和Opus文件"""
68+
"""将Opus音频数据解码并保存为WAV文件和标准的Ogg Opus文件"""
6769
base_name = f"asr_{session_id}_{uuid.uuid4()}"
6870
wav_path = os.path.join(self.output_dir, f"{base_name}.wav")
6971
opus_path = os.path.join(self.output_dir, f"{base_name}.opus")
7072

7173
# 保存标准的Ogg Opus文件
7274
try:
73-
with pyogg.OggOpusWriter(opus_path, sample_rate=16000, channels=1) as writer:
75+
with open(opus_path, 'wb') as f:
76+
# Ogg header
77+
f.write(b'OggS\x00') # 捕获模式
78+
f.write(b'\x02') # 版本
79+
f.write(b'\x00') # header type
80+
81+
# Opus header
82+
header = bytearray()
83+
header.extend(b'OpusHead') # Magic signature
84+
header.extend([1]) # 版本
85+
header.extend([1]) # 声道数
86+
header.extend(struct.pack('<H', 0)) # 预跳过采样数
87+
header.extend(struct.pack('<I', 16000)) # 采样率
88+
header.extend(struct.pack('<H', 0)) # 输出增益
89+
90+
# 写入头部长度
91+
f.write(struct.pack('<I', len(header)))
92+
f.write(header)
93+
94+
# 写入音频数据
7495
for packet in opus_data:
75-
writer.write(packet)
76-
logger.bind(tag=TAG).info(f"已保存Opus文件: {opus_path}")
96+
f.write(struct.pack('<I', len(packet))) # 包长度
97+
f.write(packet) # 包数据
98+
99+
logger.bind(tag=TAG).info(f"已保存Ogg Opus文件: {opus_path}")
77100
except Exception as e:
78-
logger.bind(tag=TAG).error(f"保存Opus文件失败: {e}", exc_info=True)
101+
logger.bind(tag=TAG).error(f"保存Ogg Opus文件失败: {e}", exc_info=True)
79102

80103
# 保存WAV文件的现有逻辑
81104
decoder = opuslib_next.Decoder(16000, 1)

0 commit comments

Comments
 (0)