语音发送新增同步机制

jiangtao · jiangtao · commit d75a17a33dc0 · 2025-02-26T23:07:01.000+08:00
diff --git a/core/providers/tts/base.py b/core/providers/tts/base.py
@@ -42,56 +42,13 @@ def to_tts(self, text):
     async def text_to_speak(self, text, output_file):
         pass
 
-    def get_opus_data(self, file_path):
-        """直接从opus文件获取数据和时长"""
-        try:
-            # 读取opus文件
-            with open(file_path, 'rb') as f:
-                opus_data = f.read()
-            
-            # 获取音频时长
-            duration = self.get_audio_duration(file_path)
-            
-            opus_datas = []
-            current_pos = 0
-            
-            while current_pos < len(opus_data):
-                # 读取帧长度（前2个字节）
-                if current_pos + 2 > len(opus_data):
-                    break
-                    
-                frame_length = int.from_bytes(opus_data[current_pos:current_pos + 2], 'little')
-                current_pos += 2
-                
-                # 确保有足够的数据读取
-                if current_pos + frame_length > len(opus_data):
-                    break
-                    
-                # 读取帧数据
-                frame_data = opus_data[current_pos:current_pos + frame_length]
-                opus_datas.append(frame_data)
-                current_pos += frame_length
-            
-            return opus_datas, duration
-            
-        except Exception as e:
-            logger.bind(tag=TAG).error(f"处理opus文件失败: {e}")
-            return [], 0
-    
-    @abstractmethod
-    def get_audio_duration(self, file_path):
-        """获取音频时长的抽象方法，由具体实现类提供"""
-        pass
-
-    def wav_to_opus_data(self, file_path):
-        """保持原有接口兼容"""
-        if file_path.endswith('.opus'):
-            return self.get_opus_data(file_path)
-
-        file_type = os.path.splitext(file_path)[1]
+    def wav_to_opus_data(self, wav_file_path):
+        # 使用pydub加载PCM文件
+        # 获取文件后缀名
+        file_type = os.path.splitext(wav_file_path)[1]
         if file_type:
             file_type = file_type.lstrip('.')
-        audio = AudioSegment.from_file(file_path, format=file_type)
+        audio = AudioSegment.from_file(wav_file_path, format=file_type)
 
         duration = len(audio) / 1000.0
 
diff --git a/core/providers/tts/doubao.py b/core/providers/tts/doubao.py
@@ -35,11 +35,11 @@ async def text_to_speak(self, text, output_file):
             },
             "audio": {
                 "voice_type": self.voice,
-                "encoding": "ogg_opus",
+                "encoding": "wav",
                 "speed_ratio": 1.0,
                 "volume_ratio": 1.0,
                 "pitch_ratio": 1.0,
-                "rate": 16000
+                "rate": 16000,
             },
             "request": {
                 "reqid": str(uuid.uuid4()),
@@ -53,29 +53,6 @@ async def text_to_speak(self, text, output_file):
 
         resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
         if "data" in resp.json():
-            duration = resp.json()["addition"]["duration"]
             data = resp.json()["data"]
-            
-            # 保存音频数据
-            with open(output_file, "wb") as f:
-                f.write(base64.b64decode(data))
-            
-            # 保存duration信息，去掉.opus后缀
-            base_path = output_file.rsplit('.opus', 1)[0]
-            duration_file = base_path + '.duration'
-            with open(duration_file, "w") as f:
-                f.write(str(duration))
-                
-            self.logger.bind(tag=TAG).info(f"音频文件生成成功: {text}")
-
-    def get_audio_duration(self, file_path):
-        """从duration文件中读取音频时长"""
-        try:
-            base_path = file_path.rsplit('.opus', 1)[0]
-            duration_file = base_path + '.duration'
-            with open(duration_file, "r") as f:
-                duration = float(f.read().strip()) / 1000  # 转换为秒
-            return duration
-        except Exception as e:
-            self.logger.bind(tag=TAG).error(f"读取音频时长失败: {e}")
-            return 0
+            file_to_save = open(output_file, "wb")
+            file_to_save.write(base64.b64decode(data))