Skip to content

Commit d75a17a

Browse files
author
jiangtao
committed
语音发送新增同步机制
1 parent f516a42 commit d75a17a

File tree

2 files changed

+9
-75
lines changed

2 files changed

+9
-75
lines changed

core/providers/tts/base.py

Lines changed: 5 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -42,56 +42,13 @@ def to_tts(self, text):
4242
async def text_to_speak(self, text, output_file):
4343
pass
4444

45-
def get_opus_data(self, file_path):
46-
"""直接从opus文件获取数据和时长"""
47-
try:
48-
# 读取opus文件
49-
with open(file_path, 'rb') as f:
50-
opus_data = f.read()
51-
52-
# 获取音频时长
53-
duration = self.get_audio_duration(file_path)
54-
55-
opus_datas = []
56-
current_pos = 0
57-
58-
while current_pos < len(opus_data):
59-
# 读取帧长度(前2个字节)
60-
if current_pos + 2 > len(opus_data):
61-
break
62-
63-
frame_length = int.from_bytes(opus_data[current_pos:current_pos + 2], 'little')
64-
current_pos += 2
65-
66-
# 确保有足够的数据读取
67-
if current_pos + frame_length > len(opus_data):
68-
break
69-
70-
# 读取帧数据
71-
frame_data = opus_data[current_pos:current_pos + frame_length]
72-
opus_datas.append(frame_data)
73-
current_pos += frame_length
74-
75-
return opus_datas, duration
76-
77-
except Exception as e:
78-
logger.bind(tag=TAG).error(f"处理opus文件失败: {e}")
79-
return [], 0
80-
81-
@abstractmethod
82-
def get_audio_duration(self, file_path):
83-
"""获取音频时长的抽象方法,由具体实现类提供"""
84-
pass
85-
86-
def wav_to_opus_data(self, file_path):
87-
"""保持原有接口兼容"""
88-
if file_path.endswith('.opus'):
89-
return self.get_opus_data(file_path)
90-
91-
file_type = os.path.splitext(file_path)[1]
45+
def wav_to_opus_data(self, wav_file_path):
46+
# 使用pydub加载PCM文件
47+
# 获取文件后缀名
48+
file_type = os.path.splitext(wav_file_path)[1]
9249
if file_type:
9350
file_type = file_type.lstrip('.')
94-
audio = AudioSegment.from_file(file_path, format=file_type)
51+
audio = AudioSegment.from_file(wav_file_path, format=file_type)
9552

9653
duration = len(audio) / 1000.0
9754

core/providers/tts/doubao.py

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ async def text_to_speak(self, text, output_file):
3535
},
3636
"audio": {
3737
"voice_type": self.voice,
38-
"encoding": "ogg_opus",
38+
"encoding": "wav",
3939
"speed_ratio": 1.0,
4040
"volume_ratio": 1.0,
4141
"pitch_ratio": 1.0,
42-
"rate": 16000
42+
"rate": 16000,
4343
},
4444
"request": {
4545
"reqid": str(uuid.uuid4()),
@@ -53,29 +53,6 @@ async def text_to_speak(self, text, output_file):
5353

5454
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
5555
if "data" in resp.json():
56-
duration = resp.json()["addition"]["duration"]
5756
data = resp.json()["data"]
58-
59-
# 保存音频数据
60-
with open(output_file, "wb") as f:
61-
f.write(base64.b64decode(data))
62-
63-
# 保存duration信息,去掉.opus后缀
64-
base_path = output_file.rsplit('.opus', 1)[0]
65-
duration_file = base_path + '.duration'
66-
with open(duration_file, "w") as f:
67-
f.write(str(duration))
68-
69-
self.logger.bind(tag=TAG).info(f"音频文件生成成功: {text}")
70-
71-
def get_audio_duration(self, file_path):
72-
"""从duration文件中读取音频时长"""
73-
try:
74-
base_path = file_path.rsplit('.opus', 1)[0]
75-
duration_file = base_path + '.duration'
76-
with open(duration_file, "r") as f:
77-
duration = float(f.read().strip()) / 1000 # 转换为秒
78-
return duration
79-
except Exception as e:
80-
self.logger.bind(tag=TAG).error(f"读取音频时长失败: {e}")
81-
return 0
57+
file_to_save = open(output_file, "wb")
58+
file_to_save.write(base64.b64decode(data))

0 commit comments

Comments
 (0)