Skip to content

Commit 3d3d16a

Browse files
feat(api): realtime API updates
1 parent 463e870 commit 3d3d16a

File tree

163 files changed

+7657
-486
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

163 files changed

+7657
-486
lines changed

.stats.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
configured_endpoints: 119
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-8517ffa1004e31ca2523d617629e64be6fe4f13403ddfd9db5b3be002656cbde.yml
3-
openapi_spec_hash: b64dd8c8b23082a7aa2a3e5c5fffd8bd
4-
config_hash: fe0ea26680ac2075a6cd66416aefe7db
1+
configured_endpoints: 118
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-356b4364203ff36d7724074cd04f6e684253bfcc3c9d969122d730aa7bc51b46.yml
3+
openapi_spec_hash: 4ab8e96f52699bc3d2b0c4432aa92af8
4+
config_hash: b854932c0ea24b400bdd64e4376936bd

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ async def main():
226226
asyncio.run(main())
227227
```
228228

229-
## Realtime API beta
229+
## Realtime API
230230

231231
The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a WebSocket connection.
232232

@@ -243,7 +243,7 @@ from openai import AsyncOpenAI
243243
async def main():
244244
client = AsyncOpenAI()
245245

246-
async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
246+
async with client.realtime.connect(model="gpt-realtime") as connection:
247247
await connection.session.update(session={'modalities': ['text']})
248248

249249
await connection.conversation.item.create(
@@ -277,7 +277,7 @@ Whenever an error occurs, the Realtime API will send an [`error` event](https://
277277
```py
278278
client = AsyncOpenAI()
279279

280-
async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
280+
async with client.realtime.connect(model="gpt-realtime") as connection:
281281
...
282282
async for event in connection:
283283
if event.type == 'error':

api.md

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ from openai.types.webhooks import (
431431
FineTuningJobCancelledWebhookEvent,
432432
FineTuningJobFailedWebhookEvent,
433433
FineTuningJobSucceededWebhookEvent,
434+
RealtimeCallIncomingWebhookEvent,
434435
ResponseCancelledWebhookEvent,
435436
ResponseCompletedWebhookEvent,
436437
ResponseFailedWebhookEvent,
@@ -832,6 +833,7 @@ from openai.types.responses import (
832833
ToolChoiceMcp,
833834
ToolChoiceOptions,
834835
ToolChoiceTypes,
836+
WebSearchPreviewTool,
835837
WebSearchTool,
836838
)
837839
```
@@ -855,6 +857,115 @@ Methods:
855857

856858
- <code title="get /responses/{response_id}/input_items">client.responses.input_items.<a href="./src/openai/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="src/openai/types/responses/input_item_list_params.py">params</a>) -> <a href="./src/openai/types/responses/response_item.py">SyncCursorPage[ResponseItem]</a></code>
857859

860+
# Realtime
861+
862+
Types:
863+
864+
```python
865+
from openai.types.realtime import (
866+
ConversationCreatedEvent,
867+
ConversationItem,
868+
ConversationItemAdded,
869+
ConversationItemCreateEvent,
870+
ConversationItemCreatedEvent,
871+
ConversationItemDeleteEvent,
872+
ConversationItemDeletedEvent,
873+
ConversationItemDone,
874+
ConversationItemInputAudioTranscriptionCompletedEvent,
875+
ConversationItemInputAudioTranscriptionDeltaEvent,
876+
ConversationItemInputAudioTranscriptionFailedEvent,
877+
ConversationItemInputAudioTranscriptionSegment,
878+
ConversationItemRetrieveEvent,
879+
ConversationItemTruncateEvent,
880+
ConversationItemTruncatedEvent,
881+
ConversationItemWithReference,
882+
InputAudioBufferAppendEvent,
883+
InputAudioBufferClearEvent,
884+
InputAudioBufferClearedEvent,
885+
InputAudioBufferCommitEvent,
886+
InputAudioBufferCommittedEvent,
887+
InputAudioBufferSpeechStartedEvent,
888+
InputAudioBufferSpeechStoppedEvent,
889+
InputAudioBufferTimeoutTriggered,
890+
LogProbProperties,
891+
McpListToolsCompleted,
892+
McpListToolsFailed,
893+
McpListToolsInProgress,
894+
OutputAudioBufferClearEvent,
895+
RateLimitsUpdatedEvent,
896+
RealtimeAudioConfig,
897+
RealtimeClientEvent,
898+
RealtimeClientSecretConfig,
899+
RealtimeConversationItemAssistantMessage,
900+
RealtimeConversationItemFunctionCall,
901+
RealtimeConversationItemFunctionCallOutput,
902+
RealtimeConversationItemSystemMessage,
903+
RealtimeConversationItemUserMessage,
904+
RealtimeError,
905+
RealtimeErrorEvent,
906+
RealtimeMcpApprovalRequest,
907+
RealtimeMcpApprovalResponse,
908+
RealtimeMcpListTools,
909+
RealtimeMcpProtocolError,
910+
RealtimeMcpToolCall,
911+
RealtimeMcpToolExecutionError,
912+
RealtimeMcphttpError,
913+
RealtimeResponse,
914+
RealtimeResponseStatus,
915+
RealtimeResponseUsage,
916+
RealtimeResponseUsageInputTokenDetails,
917+
RealtimeResponseUsageOutputTokenDetails,
918+
RealtimeServerEvent,
919+
RealtimeSession,
920+
RealtimeSessionCreateRequest,
921+
RealtimeToolChoiceConfig,
922+
RealtimeToolsConfig,
923+
RealtimeToolsConfigUnion,
924+
RealtimeTracingConfig,
925+
RealtimeTranscriptionSessionCreateRequest,
926+
RealtimeTruncation,
927+
ResponseAudioDeltaEvent,
928+
ResponseAudioDoneEvent,
929+
ResponseAudioTranscriptDeltaEvent,
930+
ResponseAudioTranscriptDoneEvent,
931+
ResponseCancelEvent,
932+
ResponseContentPartAddedEvent,
933+
ResponseContentPartDoneEvent,
934+
ResponseCreateEvent,
935+
ResponseCreatedEvent,
936+
ResponseDoneEvent,
937+
ResponseFunctionCallArgumentsDeltaEvent,
938+
ResponseFunctionCallArgumentsDoneEvent,
939+
ResponseMcpCallArgumentsDelta,
940+
ResponseMcpCallArgumentsDone,
941+
ResponseMcpCallCompleted,
942+
ResponseMcpCallFailed,
943+
ResponseMcpCallInProgress,
944+
ResponseOutputItemAddedEvent,
945+
ResponseOutputItemDoneEvent,
946+
ResponseTextDeltaEvent,
947+
ResponseTextDoneEvent,
948+
SessionCreatedEvent,
949+
SessionUpdateEvent,
950+
SessionUpdatedEvent,
951+
TranscriptionSessionCreated,
952+
TranscriptionSessionUpdate,
953+
TranscriptionSessionUpdatedEvent,
954+
)
955+
```
956+
957+
## ClientSecrets
958+
959+
Types:
960+
961+
```python
962+
from openai.types.realtime import RealtimeSessionCreateResponse, ClientSecretCreateResponse
963+
```
964+
965+
Methods:
966+
967+
- <code title="post /realtime/client_secrets">client.realtime.client_secrets.<a href="./src/openai/resources/realtime/client_secrets.py">create</a>(\*\*<a href="src/openai/types/realtime/client_secret_create_params.py">params</a>) -> <a href="./src/openai/types/realtime/client_secret_create_response.py">ClientSecretCreateResponse</a></code>
968+
858969
# Conversations
859970

860971
Types:

examples/realtime/audio_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sounddevice as sd
1212
from pydub import AudioSegment
1313

14-
from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection
14+
from openai.resources.realtime.realtime import AsyncRealtimeConnection
1515

1616
CHUNK_LENGTH_S = 0.05 # 100ms
1717
SAMPLE_RATE = 24000

examples/realtime/azure_realtime.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,16 @@ async def main() -> None:
2626
azure_ad_token_provider=get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default"),
2727
api_version="2024-10-01-preview",
2828
)
29-
async with client.beta.realtime.connect(
30-
model="gpt-4o-realtime-preview", # deployment name for your model
29+
async with client.realtime.connect(
30+
model="gpt-realtime", # deployment name for your model
3131
) as connection:
32-
await connection.session.update(session={"modalities": ["text"]}) # type: ignore
32+
await connection.session.update(
33+
session={
34+
"output_modalities": ["text"],
35+
"model": "gpt-realtime",
36+
"type": "realtime",
37+
}
38+
)
3339
while True:
3440
user_input = input("Enter a message: ")
3541
if user_input == "q":
@@ -44,9 +50,9 @@ async def main() -> None:
4450
)
4551
await connection.response.create()
4652
async for event in connection:
47-
if event.type == "response.text.delta":
53+
if event.type == "response.output_text.delta":
4854
print(event.delta, flush=True, end="")
49-
elif event.type == "response.text.done":
55+
elif event.type == "response.output_text.done":
5056
print()
5157
elif event.type == "response.done":
5258
break

examples/realtime/push_to_talk_app.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@
3838
from textual.containers import Container
3939

4040
from openai import AsyncOpenAI
41-
from openai.types.beta.realtime.session import Session
42-
from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection
41+
from openai.types.realtime.session import Session
42+
from openai.resources.realtime.realtime import AsyncRealtimeConnection
4343

4444

4545
class SessionDisplay(Static):
@@ -154,13 +154,21 @@ async def on_mount(self) -> None:
154154
self.run_worker(self.send_mic_audio())
155155

156156
async def handle_realtime_connection(self) -> None:
157-
async with self.client.beta.realtime.connect(model="gpt-4o-realtime-preview") as conn:
157+
async with self.client.realtime.connect(model="gpt-realtime") as conn:
158158
self.connection = conn
159159
self.connected.set()
160160

161161
# note: this is the default and can be omitted
162162
# if you want to manually handle VAD yourself, then set `'turn_detection': None`
163-
await conn.session.update(session={"turn_detection": {"type": "server_vad"}})
163+
await conn.session.update(
164+
session={
165+
"audio": {
166+
"input": {"turn_detection": {"type": "server_vad"}},
167+
},
168+
"model": "gpt-realtime",
169+
"type": "realtime",
170+
}
171+
)
164172

165173
acc_items: dict[str, Any] = {}
166174

@@ -176,7 +184,7 @@ async def handle_realtime_connection(self) -> None:
176184
self.session = event.session
177185
continue
178186

179-
if event.type == "response.audio.delta":
187+
if event.type == "response.output_audio.delta":
180188
if event.item_id != self.last_audio_item_id:
181189
self.audio_player.reset_frame_count()
182190
self.last_audio_item_id = event.item_id
@@ -185,7 +193,7 @@ async def handle_realtime_connection(self) -> None:
185193
self.audio_player.add_data(bytes_data)
186194
continue
187195

188-
if event.type == "response.audio_transcript.delta":
196+
if event.type == "response.output_audio_transcript.delta":
189197
try:
190198
text = acc_items[event.item_id]
191199
except KeyError:

examples/realtime/realtime.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env rye run python
2+
import asyncio
3+
4+
from openai import AsyncOpenAI
5+
6+
# Azure OpenAI Realtime Docs
7+
8+
# How-to: https://learn.microsoft.com/azure/ai-services/openai/how-to/realtime-audio
9+
# Supported models and API versions: https://learn.microsoft.com/azure/ai-services/openai/how-to/realtime-audio#supported-models
10+
# Entra ID auth: https://learn.microsoft.com/azure/ai-services/openai/how-to/managed-identity
11+
12+
13+
async def main() -> None:
14+
"""The following example demonstrates how to configure OpenAI to use the Realtime API.
15+
For an audio example, see push_to_talk_app.py and update the client and model parameter accordingly.
16+
17+
When prompted for user input, type a message and hit enter to send it to the model.
18+
Enter "q" to quit the conversation.
19+
"""
20+
21+
client = AsyncOpenAI()
22+
async with client.realtime.connect(
23+
model="gpt-realtime",
24+
) as connection:
25+
await connection.session.update(
26+
session={
27+
"output_modalities": ["text"],
28+
"model": "gpt-realtime",
29+
"type": "realtime",
30+
}
31+
)
32+
while True:
33+
user_input = input("Enter a message: ")
34+
if user_input == "q":
35+
break
36+
37+
await connection.conversation.item.create(
38+
item={
39+
"type": "message",
40+
"role": "user",
41+
"content": [{"type": "input_text", "text": user_input}],
42+
}
43+
)
44+
await connection.response.create()
45+
async for event in connection:
46+
if event.type == "response.output_text.delta":
47+
print(event.delta, flush=True, end="")
48+
elif event.type == "response.output_text.done":
49+
print()
50+
elif event.type == "response.done":
51+
break
52+
53+
54+
asyncio.run(main())

src/openai/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ def _reset_client() -> None: # type: ignore[reportUnusedFunction]
379379
models as models,
380380
batches as batches,
381381
uploads as uploads,
382+
realtime as realtime,
382383
webhooks as webhooks,
383384
responses as responses,
384385
containers as containers,

0 commit comments

Comments
 (0)