From 847ff0b83841d9262ba0d9c4fdf46f0478004ad0 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 10 Sep 2025 11:03:41 -0400 Subject: [PATCH] release: 1.107.1 (#2619) * chore(api): fix realtime GA types * release: 1.107.1 --------- Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com> --- .release-please-manifest.json | 2 +- .stats.yml | 6 +- CHANGELOG.md | 8 ++ api.md | 5 +- pyproject.toml | 2 +- src/openai/_version.py | 2 +- src/openai/resources/realtime/realtime.py | 38 +------ src/openai/types/realtime/__init__.py | 14 +-- .../realtime/client_secret_create_response.py | 7 +- .../realtime_audio_input_turn_detection.py | 2 +- ...altime_audio_input_turn_detection_param.py | 2 +- .../types/realtime/realtime_client_event.py | 2 - .../realtime/realtime_client_event_param.py | 2 - .../{models.py => realtime_function_tool.py} | 4 +- ...ram.py => realtime_function_tool_param.py} | 4 +- .../realtime_response_create_params.py | 4 +- .../realtime_response_create_params_param.py | 4 +- .../types/realtime/realtime_server_event.py | 4 - .../realtime_session_create_response.py | 18 ++-- .../realtime/realtime_tools_config_param.py | 4 +- .../realtime/realtime_tools_config_union.py | 4 +- .../realtime_tools_config_union_param.py | 4 +- ...ime_transcription_session_client_secret.py | 20 ---- ...e_transcription_session_create_response.py | 61 ++++++++---- ...ption_session_input_audio_transcription.py | 36 ------- .../realtime/transcription_session_created.py | 24 ----- .../realtime/transcription_session_update.py | 98 ------------------ .../transcription_session_update_param.py | 99 ------------------- .../transcription_session_updated_event.py | 24 ----- 29 files changed, 94 insertions(+), 410 deletions(-) rename src/openai/types/realtime/{models.py => realtime_function_tool.py} (89%) rename src/openai/types/realtime/{models_param.py => realtime_function_tool_param.py} (85%) delete mode 100644 src/openai/types/realtime/realtime_transcription_session_client_secret.py delete mode 100644 src/openai/types/realtime/realtime_transcription_session_input_audio_transcription.py delete mode 100644 src/openai/types/realtime/transcription_session_created.py delete mode 100644 src/openai/types/realtime/transcription_session_update.py delete mode 100644 src/openai/types/realtime/transcription_session_update_param.py delete mode 100644 src/openai/types/realtime/transcription_session_updated_event.py diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 12cec28d56..25880b2e7b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.107.0" + ".": "1.107.1" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 36a3c7f587..2aa16be875 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 118 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-7807ec6037efcee1af7decbfd3974a42b761fb6c6a71b4050fe43484d7fcbac4.yml -openapi_spec_hash: da6851e3891ad2659a50ed6a736fd32a -config_hash: 74d955cdc2377213f5268ea309090f6c +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-16cb18bed32bae8c5840fb39a1bf664026cc40463ad0c487dcb0df1bd3d72db0.yml +openapi_spec_hash: 4cb51b22f98dee1a90bc7add82d1d132 +config_hash: 930dac3aa861344867e4ac84f037b5df diff --git a/CHANGELOG.md b/CHANGELOG.md index 76d5dcb2dd..19eab7da7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 1.107.1 (2025-09-10) + +Full Changelog: [v1.107.0...v1.107.1](https://github.com/openai/openai-python/compare/v1.107.0...v1.107.1) + +### Chores + +* **api:** fix realtime GA types ([570fc5a](https://github.com/openai/openai-python/commit/570fc5a28ada665fd658b24675361680cfeb086f)) + ## 1.107.0 (2025-09-08) Full Changelog: [v1.106.1...v1.107.0](https://github.com/openai/openai-python/compare/v1.106.1...v1.107.0) diff --git a/api.md b/api.md index 7c947fffe1..73b8427387 100644 --- a/api.md +++ b/api.md @@ -892,7 +892,6 @@ from openai.types.realtime import ( McpListToolsCompleted, McpListToolsFailed, McpListToolsInProgress, - Models, NoiseReductionType, OutputAudioBufferClearEvent, RateLimitsUpdatedEvent, @@ -909,6 +908,7 @@ from openai.types.realtime import ( RealtimeConversationItemUserMessage, RealtimeError, RealtimeErrorEvent, + RealtimeFunctionTool, RealtimeMcpApprovalRequest, RealtimeMcpApprovalResponse, RealtimeMcpListTools, @@ -961,7 +961,6 @@ from openai.types.realtime import ( SessionCreatedEvent, SessionUpdateEvent, SessionUpdatedEvent, - TranscriptionSessionCreated, TranscriptionSessionUpdate, TranscriptionSessionUpdatedEvent, ) @@ -975,9 +974,7 @@ Types: from openai.types.realtime import ( RealtimeSessionClientSecret, RealtimeSessionCreateResponse, - RealtimeTranscriptionSessionClientSecret, RealtimeTranscriptionSessionCreateResponse, - RealtimeTranscriptionSessionInputAudioTranscription, RealtimeTranscriptionSessionTurnDetection, ClientSecretCreateResponse, ) diff --git a/pyproject.toml b/pyproject.toml index 5c3985cc7c..326dc5a004 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openai" -version = "1.107.0" +version = "1.107.1" description = "The official Python library for the openai API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/openai/_version.py b/src/openai/_version.py index 06826fc4de..f337b21cd5 100644 --- a/src/openai/_version.py +++ b/src/openai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "openai" -__version__ = "1.107.0" # x-release-please-version +__version__ = "1.107.1" # x-release-please-version diff --git a/src/openai/resources/realtime/realtime.py b/src/openai/resources/realtime/realtime.py index 81e6dc54f5..64fca72915 100644 --- a/src/openai/resources/realtime/realtime.py +++ b/src/openai/resources/realtime/realtime.py @@ -32,7 +32,7 @@ ClientSecretsWithStreamingResponse, AsyncClientSecretsWithStreamingResponse, ) -from ...types.realtime import session_update_event_param, transcription_session_update_param +from ...types.realtime import session_update_event_param from ...types.websocket_connection_options import WebsocketConnectionOptions from ...types.realtime.realtime_client_event import RealtimeClientEvent from ...types.realtime.realtime_server_event import RealtimeServerEvent @@ -199,7 +199,6 @@ class AsyncRealtimeConnection: input_audio_buffer: AsyncRealtimeInputAudioBufferResource conversation: AsyncRealtimeConversationResource output_audio_buffer: AsyncRealtimeOutputAudioBufferResource - transcription_session: AsyncRealtimeTranscriptionSessionResource _connection: AsyncWebsocketConnection @@ -211,7 +210,6 @@ def __init__(self, connection: AsyncWebsocketConnection) -> None: self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self) self.conversation = AsyncRealtimeConversationResource(self) self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self) - self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self) async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]: """ @@ -381,7 +379,6 @@ class RealtimeConnection: input_audio_buffer: RealtimeInputAudioBufferResource conversation: RealtimeConversationResource output_audio_buffer: RealtimeOutputAudioBufferResource - transcription_session: RealtimeTranscriptionSessionResource _connection: WebsocketConnection @@ -393,7 +390,6 @@ def __init__(self, connection: WebsocketConnection) -> None: self.input_audio_buffer = RealtimeInputAudioBufferResource(self) self.conversation = RealtimeConversationResource(self) self.output_audio_buffer = RealtimeOutputAudioBufferResource(self) - self.transcription_session = RealtimeTranscriptionSessionResource(self) def __iter__(self) -> Iterator[RealtimeServerEvent]: """ @@ -565,8 +561,7 @@ def update(self, *, session: session_update_event_param.Session, event_id: str | """ Send this event to update the session’s configuration. The client may send this event at any time to update any field - except for `voice` and `model`. `voice` can be updated only if there have been no other - audio outputs yet. + except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet. When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective configuration. @@ -800,19 +795,6 @@ def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: ) -class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource): - def update( - self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN - ) -> None: - """Send this event to update a transcription session.""" - self._connection.send( - cast( - RealtimeClientEventParam, - strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}), - ) - ) - - class BaseAsyncRealtimeConnectionResource: def __init__(self, connection: AsyncRealtimeConnection) -> None: self._connection = connection @@ -825,8 +807,7 @@ async def update( """ Send this event to update the session’s configuration. The client may send this event at any time to update any field - except for `voice` and `model`. `voice` can be updated only if there have been no other - audio outputs yet. + except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet. When the server receives a `session.update`, it will respond with a `session.updated` event showing the full, effective configuration. @@ -1058,16 +1039,3 @@ async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: await self._connection.send( cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id})) ) - - -class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource): - async def update( - self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN - ) -> None: - """Send this event to update a transcription session.""" - await self._connection.send( - cast( - RealtimeClientEventParam, - strip_not_given({"type": "transcription_session.update", "session": session, "event_id": event_id}), - ) - ) diff --git a/src/openai/types/realtime/__init__.py b/src/openai/types/realtime/__init__.py index 6873ba6a2a..2d947c8a2f 100644 --- a/src/openai/types/realtime/__init__.py +++ b/src/openai/types/realtime/__init__.py @@ -2,8 +2,6 @@ from __future__ import annotations -from .models import Models as Models -from .models_param import ModelsParam as ModelsParam from .realtime_error import RealtimeError as RealtimeError from .conversation_item import ConversationItem as ConversationItem from .realtime_response import RealtimeResponse as RealtimeResponse @@ -25,6 +23,7 @@ from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent from .conversation_item_done import ConversationItemDone as ConversationItemDone from .realtime_audio_formats import RealtimeAudioFormats as RealtimeAudioFormats +from .realtime_function_tool import RealtimeFunctionTool as RealtimeFunctionTool from .realtime_mcp_tool_call import RealtimeMcpToolCall as RealtimeMcpToolCall from .realtime_mcphttp_error import RealtimeMcphttpError as RealtimeMcphttpError from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent @@ -60,15 +59,14 @@ from .response_mcp_call_completed import ResponseMcpCallCompleted as ResponseMcpCallCompleted from .realtime_audio_config_output import RealtimeAudioConfigOutput as RealtimeAudioConfigOutput from .realtime_audio_formats_param import RealtimeAudioFormatsParam as RealtimeAudioFormatsParam +from .realtime_function_tool_param import RealtimeFunctionToolParam as RealtimeFunctionToolParam from .realtime_mcp_tool_call_param import RealtimeMcpToolCallParam as RealtimeMcpToolCallParam from .realtime_mcphttp_error_param import RealtimeMcphttpErrorParam as RealtimeMcphttpErrorParam -from .transcription_session_update import TranscriptionSessionUpdate as TranscriptionSessionUpdate from .client_secret_create_response import ClientSecretCreateResponse as ClientSecretCreateResponse from .realtime_mcp_approval_request import RealtimeMcpApprovalRequest as RealtimeMcpApprovalRequest from .realtime_mcp_list_tools_param import RealtimeMcpListToolsParam as RealtimeMcpListToolsParam from .realtime_tracing_config_param import RealtimeTracingConfigParam as RealtimeTracingConfigParam from .response_mcp_call_in_progress import ResponseMcpCallInProgress as ResponseMcpCallInProgress -from .transcription_session_created import TranscriptionSessionCreated as TranscriptionSessionCreated from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent @@ -100,11 +98,9 @@ from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta as ResponseMcpCallArgumentsDelta from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent from .realtime_audio_config_output_param import RealtimeAudioConfigOutputParam as RealtimeAudioConfigOutputParam -from .transcription_session_update_param import TranscriptionSessionUpdateParam as TranscriptionSessionUpdateParam from .realtime_audio_input_turn_detection import RealtimeAudioInputTurnDetection as RealtimeAudioInputTurnDetection from .realtime_mcp_approval_request_param import RealtimeMcpApprovalRequestParam as RealtimeMcpApprovalRequestParam from .realtime_truncation_retention_ratio import RealtimeTruncationRetentionRatio as RealtimeTruncationRetentionRatio -from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent as TranscriptionSessionUpdatedEvent from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam @@ -181,9 +177,6 @@ from .realtime_response_usage_output_token_details import ( RealtimeResponseUsageOutputTokenDetails as RealtimeResponseUsageOutputTokenDetails, ) -from .realtime_transcription_session_client_secret import ( - RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret, -) from .response_function_call_arguments_delta_event import ( ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent, ) @@ -229,9 +222,6 @@ from .conversation_item_input_audio_transcription_failed_event import ( ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent, ) -from .realtime_transcription_session_input_audio_transcription import ( - RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription, -) from .realtime_transcription_session_audio_input_turn_detection import ( RealtimeTranscriptionSessionAudioInputTurnDetection as RealtimeTranscriptionSessionAudioInputTurnDetection, ) diff --git a/src/openai/types/realtime/client_secret_create_response.py b/src/openai/types/realtime/client_secret_create_response.py index 8d61be3ab7..2aed66a25b 100644 --- a/src/openai/types/realtime/client_secret_create_response.py +++ b/src/openai/types/realtime/client_secret_create_response.py @@ -1,15 +1,18 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import Union -from typing_extensions import TypeAlias +from typing_extensions import Annotated, TypeAlias +from ..._utils import PropertyInfo from ..._models import BaseModel from .realtime_session_create_response import RealtimeSessionCreateResponse from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse __all__ = ["ClientSecretCreateResponse", "Session"] -Session: TypeAlias = Union[RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse] +Session: TypeAlias = Annotated[ + Union[RealtimeSessionCreateResponse, RealtimeTranscriptionSessionCreateResponse], PropertyInfo(discriminator="type") +] class ClientSecretCreateResponse(BaseModel): diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_audio_input_turn_detection.py index ea9423f6a1..1c736ab2b7 100644 --- a/src/openai/types/realtime/realtime_audio_input_turn_detection.py +++ b/src/openai/types/realtime/realtime_audio_input_turn_detection.py @@ -27,7 +27,7 @@ class RealtimeAudioInputTurnDetection(BaseModel): idle_timeout_ms: Optional[int] = None """ Optional idle timeout after which turn detection will auto-timeout when no - additional audio is received. + additional audio is received and emits a `timeout_triggered` event. """ interrupt_response: Optional[bool] = None diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py index ec398f52e6..79cabec708 100644 --- a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py +++ b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py @@ -27,7 +27,7 @@ class RealtimeAudioInputTurnDetectionParam(TypedDict, total=False): idle_timeout_ms: Optional[int] """ Optional idle timeout after which turn detection will auto-timeout when no - additional audio is received. + additional audio is received and emits a `timeout_triggered` event. """ interrupt_response: bool diff --git a/src/openai/types/realtime/realtime_client_event.py b/src/openai/types/realtime/realtime_client_event.py index 8c2c95e849..3b1c348daa 100644 --- a/src/openai/types/realtime/realtime_client_event.py +++ b/src/openai/types/realtime/realtime_client_event.py @@ -7,7 +7,6 @@ from .session_update_event import SessionUpdateEvent from .response_cancel_event import ResponseCancelEvent from .response_create_event import ResponseCreateEvent -from .transcription_session_update import TranscriptionSessionUpdate from .conversation_item_create_event import ConversationItemCreateEvent from .conversation_item_delete_event import ConversationItemDeleteEvent from .input_audio_buffer_clear_event import InputAudioBufferClearEvent @@ -32,7 +31,6 @@ ResponseCancelEvent, ResponseCreateEvent, SessionUpdateEvent, - TranscriptionSessionUpdate, ], PropertyInfo(discriminator="type"), ] diff --git a/src/openai/types/realtime/realtime_client_event_param.py b/src/openai/types/realtime/realtime_client_event_param.py index 8e042dd64b..cda5766e2a 100644 --- a/src/openai/types/realtime/realtime_client_event_param.py +++ b/src/openai/types/realtime/realtime_client_event_param.py @@ -8,7 +8,6 @@ from .session_update_event_param import SessionUpdateEventParam from .response_cancel_event_param import ResponseCancelEventParam from .response_create_event_param import ResponseCreateEventParam -from .transcription_session_update_param import TranscriptionSessionUpdateParam from .conversation_item_create_event_param import ConversationItemCreateEventParam from .conversation_item_delete_event_param import ConversationItemDeleteEventParam from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam @@ -32,5 +31,4 @@ ResponseCancelEventParam, ResponseCreateEventParam, SessionUpdateEventParam, - TranscriptionSessionUpdateParam, ] diff --git a/src/openai/types/realtime/models.py b/src/openai/types/realtime/realtime_function_tool.py similarity index 89% rename from src/openai/types/realtime/models.py rename to src/openai/types/realtime/realtime_function_tool.py index d4827538a3..48dbf9929d 100644 --- a/src/openai/types/realtime/models.py +++ b/src/openai/types/realtime/realtime_function_tool.py @@ -5,10 +5,10 @@ from ..._models import BaseModel -__all__ = ["Models"] +__all__ = ["RealtimeFunctionTool"] -class Models(BaseModel): +class RealtimeFunctionTool(BaseModel): description: Optional[str] = None """ The description of the function, including guidance on when and how to call it, diff --git a/src/openai/types/realtime/models_param.py b/src/openai/types/realtime/realtime_function_tool_param.py similarity index 85% rename from src/openai/types/realtime/models_param.py rename to src/openai/types/realtime/realtime_function_tool_param.py index 1db2d7e464..f42e3e497c 100644 --- a/src/openai/types/realtime/models_param.py +++ b/src/openai/types/realtime/realtime_function_tool_param.py @@ -4,10 +4,10 @@ from typing_extensions import Literal, TypedDict -__all__ = ["ModelsParam"] +__all__ = ["RealtimeFunctionToolParam"] -class ModelsParam(TypedDict, total=False): +class RealtimeFunctionToolParam(TypedDict, total=False): description: str """ The description of the function, including guidance on when and how to call it, diff --git a/src/openai/types/realtime/realtime_response_create_params.py b/src/openai/types/realtime/realtime_response_create_params.py index 3b5a8907a1..4dfd1fd386 100644 --- a/src/openai/types/realtime/realtime_response_create_params.py +++ b/src/openai/types/realtime/realtime_response_create_params.py @@ -3,10 +3,10 @@ from typing import List, Union, Optional from typing_extensions import Literal, TypeAlias -from .models import Models from ..._models import BaseModel from ..shared.metadata import Metadata from .conversation_item import ConversationItem +from .realtime_function_tool import RealtimeFunctionTool from ..responses.response_prompt import ResponsePrompt from ..responses.tool_choice_mcp import ToolChoiceMcp from ..responses.tool_choice_options import ToolChoiceOptions @@ -18,7 +18,7 @@ ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunction, ToolChoiceMcp] -Tool: TypeAlias = Union[Models, RealtimeResponseCreateMcpTool] +Tool: TypeAlias = Union[RealtimeFunctionTool, RealtimeResponseCreateMcpTool] class RealtimeResponseCreateParams(BaseModel): diff --git a/src/openai/types/realtime/realtime_response_create_params_param.py b/src/openai/types/realtime/realtime_response_create_params_param.py index 6800d36a31..eceffcccb7 100644 --- a/src/openai/types/realtime/realtime_response_create_params_param.py +++ b/src/openai/types/realtime/realtime_response_create_params_param.py @@ -5,9 +5,9 @@ from typing import List, Union, Iterable, Optional from typing_extensions import Literal, TypeAlias, TypedDict -from .models_param import ModelsParam from ..shared_params.metadata import Metadata from .conversation_item_param import ConversationItemParam +from .realtime_function_tool_param import RealtimeFunctionToolParam from ..responses.tool_choice_options import ToolChoiceOptions from ..responses.response_prompt_param import ResponsePromptParam from ..responses.tool_choice_mcp_param import ToolChoiceMcpParam @@ -19,7 +19,7 @@ ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceFunctionParam, ToolChoiceMcpParam] -Tool: TypeAlias = Union[ModelsParam, RealtimeResponseCreateMcpToolParam] +Tool: TypeAlias = Union[RealtimeFunctionToolParam, RealtimeResponseCreateMcpToolParam] class RealtimeResponseCreateParamsParam(TypedDict, total=False): diff --git a/src/openai/types/realtime/realtime_server_event.py b/src/openai/types/realtime/realtime_server_event.py index 8094bcfa96..1605b81a97 100644 --- a/src/openai/types/realtime/realtime_server_event.py +++ b/src/openai/types/realtime/realtime_server_event.py @@ -25,7 +25,6 @@ from .response_audio_delta_event import ResponseAudioDeltaEvent from .response_mcp_call_completed import ResponseMcpCallCompleted from .response_mcp_call_in_progress import ResponseMcpCallInProgress -from .transcription_session_created import TranscriptionSessionCreated from .conversation_item_created_event import ConversationItemCreatedEvent from .conversation_item_deleted_event import ConversationItemDeletedEvent from .response_output_item_done_event import ResponseOutputItemDoneEvent @@ -37,7 +36,6 @@ from .response_content_part_added_event import ResponseContentPartAddedEvent from .response_mcp_call_arguments_delta import ResponseMcpCallArgumentsDelta from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent -from .transcription_session_updated_event import TranscriptionSessionUpdatedEvent from .input_audio_buffer_timeout_triggered import InputAudioBufferTimeoutTriggered from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent @@ -137,8 +135,6 @@ class OutputAudioBufferCleared(BaseModel): ResponseTextDoneEvent, SessionCreatedEvent, SessionUpdatedEvent, - TranscriptionSessionUpdatedEvent, - TranscriptionSessionCreated, OutputAudioBufferStarted, OutputAudioBufferStopped, OutputAudioBufferCleared, diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py index 9c10b84588..7779f07a6e 100644 --- a/src/openai/types/realtime/realtime_session_create_response.py +++ b/src/openai/types/realtime/realtime_session_create_response.py @@ -3,12 +3,12 @@ from typing import Dict, List, Union, Optional from typing_extensions import Literal, TypeAlias -from .models import Models from ..._models import BaseModel from .audio_transcription import AudioTranscription from .realtime_truncation import RealtimeTruncation from .noise_reduction_type import NoiseReductionType from .realtime_audio_formats import RealtimeAudioFormats +from .realtime_function_tool import RealtimeFunctionTool from ..responses.response_prompt import ResponsePrompt from ..responses.tool_choice_mcp import ToolChoiceMcp from ..responses.tool_choice_options import ToolChoiceOptions @@ -64,7 +64,7 @@ class AudioInputTurnDetection(BaseModel): idle_timeout_ms: Optional[int] = None """ Optional idle timeout after which turn detection will auto-timeout when no - additional audio is received. + additional audio is received and emits a `timeout_triggered` event. """ interrupt_response: Optional[bool] = None @@ -298,7 +298,7 @@ class ToolMcpTool(BaseModel): """ -Tool: TypeAlias = Union[Models, ToolMcpTool] +Tool: TypeAlias = Union[RealtimeFunctionTool, ToolMcpTool] class TracingTracingConfiguration(BaseModel): @@ -325,12 +325,15 @@ class TracingTracingConfiguration(BaseModel): class RealtimeSessionCreateResponse(BaseModel): + client_secret: RealtimeSessionClientSecret + """Ephemeral key returned by the API.""" + + type: Literal["realtime"] + """The type of session to create. Always `realtime` for the Realtime API.""" + audio: Optional[Audio] = None """Configuration for input and output audio.""" - client_secret: Optional[RealtimeSessionClientSecret] = None - """Ephemeral key returned by the API.""" - include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None """Additional fields to include in server outputs. @@ -415,6 +418,3 @@ class RealtimeSessionCreateResponse(BaseModel): Controls how the realtime conversation is truncated prior to model inference. The default is `auto`. """ - - type: Optional[Literal["realtime"]] = None - """The type of session to create. Always `realtime` for the Realtime API.""" diff --git a/src/openai/types/realtime/realtime_tools_config_param.py b/src/openai/types/realtime/realtime_tools_config_param.py index 700b548fe2..630fc74691 100644 --- a/src/openai/types/realtime/realtime_tools_config_param.py +++ b/src/openai/types/realtime/realtime_tools_config_param.py @@ -6,7 +6,7 @@ from typing_extensions import Literal, Required, TypeAlias, TypedDict from ..._types import SequenceNotStr -from .models_param import ModelsParam +from .realtime_function_tool_param import RealtimeFunctionToolParam __all__ = [ "RealtimeToolsConfigParam", @@ -138,6 +138,6 @@ class Mcp(TypedDict, total=False): """ -RealtimeToolsConfigUnionParam: TypeAlias = Union[ModelsParam, Mcp] +RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp] RealtimeToolsConfigParam: TypeAlias = List[RealtimeToolsConfigUnionParam] diff --git a/src/openai/types/realtime/realtime_tools_config_union.py b/src/openai/types/realtime/realtime_tools_config_union.py index 8a064d78d4..e7126ed60d 100644 --- a/src/openai/types/realtime/realtime_tools_config_union.py +++ b/src/openai/types/realtime/realtime_tools_config_union.py @@ -3,9 +3,9 @@ from typing import Dict, List, Union, Optional from typing_extensions import Literal, Annotated, TypeAlias -from .models import Models from ..._utils import PropertyInfo from ..._models import BaseModel +from .realtime_function_tool import RealtimeFunctionTool __all__ = [ "RealtimeToolsConfigUnion", @@ -138,4 +138,4 @@ class Mcp(BaseModel): """ -RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[Models, Mcp], PropertyInfo(discriminator="type")] +RealtimeToolsConfigUnion: TypeAlias = Annotated[Union[RealtimeFunctionTool, Mcp], PropertyInfo(discriminator="type")] diff --git a/src/openai/types/realtime/realtime_tools_config_union_param.py b/src/openai/types/realtime/realtime_tools_config_union_param.py index 179ad040d9..9ee58fdbe6 100644 --- a/src/openai/types/realtime/realtime_tools_config_union_param.py +++ b/src/openai/types/realtime/realtime_tools_config_union_param.py @@ -6,7 +6,7 @@ from typing_extensions import Literal, Required, TypeAlias, TypedDict from ..._types import SequenceNotStr -from .models_param import ModelsParam +from .realtime_function_tool_param import RealtimeFunctionToolParam __all__ = [ "RealtimeToolsConfigUnionParam", @@ -137,4 +137,4 @@ class Mcp(TypedDict, total=False): """ -RealtimeToolsConfigUnionParam: TypeAlias = Union[ModelsParam, Mcp] +RealtimeToolsConfigUnionParam: TypeAlias = Union[RealtimeFunctionToolParam, Mcp] diff --git a/src/openai/types/realtime/realtime_transcription_session_client_secret.py b/src/openai/types/realtime/realtime_transcription_session_client_secret.py deleted file mode 100644 index 0cfde4c0a2..0000000000 --- a/src/openai/types/realtime/realtime_transcription_session_client_secret.py +++ /dev/null @@ -1,20 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from ..._models import BaseModel - -__all__ = ["RealtimeTranscriptionSessionClientSecret"] - - -class RealtimeTranscriptionSessionClientSecret(BaseModel): - expires_at: int - """Timestamp for when the token expires. - - Currently, all tokens expire after one minute. - """ - - value: str - """ - Ephemeral key usable in client environments to authenticate connections to the - Realtime API. Use this in client-side environments rather than a standard API - token, which should only be used server-side. - """ diff --git a/src/openai/types/realtime/realtime_transcription_session_create_response.py b/src/openai/types/realtime/realtime_transcription_session_create_response.py index a08538aa8f..301af1ac3f 100644 --- a/src/openai/types/realtime/realtime_transcription_session_create_response.py +++ b/src/openai/types/realtime/realtime_transcription_session_create_response.py @@ -4,33 +4,32 @@ from typing_extensions import Literal from ..._models import BaseModel -from .realtime_transcription_session_client_secret import RealtimeTranscriptionSessionClientSecret +from .audio_transcription import AudioTranscription +from .noise_reduction_type import NoiseReductionType +from .realtime_audio_formats import RealtimeAudioFormats from .realtime_transcription_session_turn_detection import RealtimeTranscriptionSessionTurnDetection -from .realtime_transcription_session_input_audio_transcription import ( - RealtimeTranscriptionSessionInputAudioTranscription, -) -__all__ = ["RealtimeTranscriptionSessionCreateResponse"] +__all__ = ["RealtimeTranscriptionSessionCreateResponse", "Audio", "AudioInput", "AudioInputNoiseReduction"] -class RealtimeTranscriptionSessionCreateResponse(BaseModel): - client_secret: RealtimeTranscriptionSessionClientSecret - """Ephemeral key returned by the API. +class AudioInputNoiseReduction(BaseModel): + type: Optional[NoiseReductionType] = None + """Type of noise reduction. - Only present when the session is created on the server via REST API. + `near_field` is for close-talking microphones such as headphones, `far_field` is + for far-field microphones such as laptop or conference room microphones. """ - input_audio_format: Optional[str] = None - """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" - input_audio_transcription: Optional[RealtimeTranscriptionSessionInputAudioTranscription] = None - """Configuration of the transcription model.""" +class AudioInput(BaseModel): + format: Optional[RealtimeAudioFormats] = None + """The PCM audio format. Only a 24kHz sample rate is supported.""" - modalities: Optional[List[Literal["text", "audio"]]] = None - """The set of modalities the model can respond with. + noise_reduction: Optional[AudioInputNoiseReduction] = None + """Configuration for input audio noise reduction.""" - To disable audio, set this to ["text"]. - """ + transcription: Optional[AudioTranscription] = None + """Configuration of the transcription model.""" turn_detection: Optional[RealtimeTranscriptionSessionTurnDetection] = None """Configuration for turn detection. @@ -39,3 +38,31 @@ class RealtimeTranscriptionSessionCreateResponse(BaseModel): the start and end of speech based on audio volume and respond at the end of user speech. """ + + +class Audio(BaseModel): + input: Optional[AudioInput] = None + + +class RealtimeTranscriptionSessionCreateResponse(BaseModel): + id: str + """Unique identifier for the session that looks like `sess_1234567890abcdef`.""" + + object: str + """The object type. Always `realtime.transcription_session`.""" + + type: Literal["transcription"] + """The type of session. Always `transcription` for transcription sessions.""" + + audio: Optional[Audio] = None + """Configuration for input audio for the session.""" + + expires_at: Optional[int] = None + """Expiration timestamp for the session, in seconds since epoch.""" + + include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None + """Additional fields to include in server outputs. + + - `item.input_audio_transcription.logprobs`: Include logprobs for input audio + transcription. + """ diff --git a/src/openai/types/realtime/realtime_transcription_session_input_audio_transcription.py b/src/openai/types/realtime/realtime_transcription_session_input_audio_transcription.py deleted file mode 100644 index 52254bed33..0000000000 --- a/src/openai/types/realtime/realtime_transcription_session_input_audio_transcription.py +++ /dev/null @@ -1,36 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from typing import Optional -from typing_extensions import Literal - -from ..._models import BaseModel - -__all__ = ["RealtimeTranscriptionSessionInputAudioTranscription"] - - -class RealtimeTranscriptionSessionInputAudioTranscription(BaseModel): - language: Optional[str] = None - """The language of the input audio. - - Supplying the input language in - [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) - format will improve accuracy and latency. - """ - - model: Optional[Literal["whisper-1", "gpt-4o-transcribe-latest", "gpt-4o-mini-transcribe", "gpt-4o-transcribe"]] = ( - None - ) - """The model to use for transcription. - - Current options are `whisper-1`, `gpt-4o-transcribe-latest`, - `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`. - """ - - prompt: Optional[str] = None - """ - An optional text to guide the model's style or continue a previous audio - segment. For `whisper-1`, the - [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting). - For `gpt-4o-transcribe` models, the prompt is a free text string, for example - "expect words related to technology". - """ diff --git a/src/openai/types/realtime/transcription_session_created.py b/src/openai/types/realtime/transcription_session_created.py deleted file mode 100644 index c358c5e8b0..0000000000 --- a/src/openai/types/realtime/transcription_session_created.py +++ /dev/null @@ -1,24 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from typing_extensions import Literal - -from ..._models import BaseModel -from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse - -__all__ = ["TranscriptionSessionCreated"] - - -class TranscriptionSessionCreated(BaseModel): - event_id: str - """The unique ID of the server event.""" - - session: RealtimeTranscriptionSessionCreateResponse - """A new Realtime transcription session configuration. - - When a session is created on the server via REST API, the session object also - contains an ephemeral key. Default TTL for keys is 10 minutes. This property is - not present when a session is updated via the WebSocket API. - """ - - type: Literal["transcription_session.created"] - """The event type, must be `transcription_session.created`.""" diff --git a/src/openai/types/realtime/transcription_session_update.py b/src/openai/types/realtime/transcription_session_update.py deleted file mode 100644 index 0faff9cb57..0000000000 --- a/src/openai/types/realtime/transcription_session_update.py +++ /dev/null @@ -1,98 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from typing import List, Optional -from typing_extensions import Literal - -from ..._models import BaseModel -from .audio_transcription import AudioTranscription -from .noise_reduction_type import NoiseReductionType - -__all__ = ["TranscriptionSessionUpdate", "Session", "SessionInputAudioNoiseReduction", "SessionTurnDetection"] - - -class SessionInputAudioNoiseReduction(BaseModel): - type: Optional[NoiseReductionType] = None - """Type of noise reduction. - - `near_field` is for close-talking microphones such as headphones, `far_field` is - for far-field microphones such as laptop or conference room microphones. - """ - - -class SessionTurnDetection(BaseModel): - prefix_padding_ms: Optional[int] = None - """Amount of audio to include before the VAD detected speech (in milliseconds). - - Defaults to 300ms. - """ - - silence_duration_ms: Optional[int] = None - """Duration of silence to detect speech stop (in milliseconds). - - Defaults to 500ms. With shorter values the model will respond more quickly, but - may jump in on short pauses from the user. - """ - - threshold: Optional[float] = None - """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. - - A higher threshold will require louder audio to activate the model, and thus - might perform better in noisy environments. - """ - - type: Optional[Literal["server_vad"]] = None - """Type of turn detection. - - Only `server_vad` is currently supported for transcription sessions. - """ - - -class Session(BaseModel): - include: Optional[List[Literal["item.input_audio_transcription.logprobs"]]] = None - """The set of items to include in the transcription. - - Current available items are: `item.input_audio_transcription.logprobs` - """ - - input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None - """The format of input audio. - - Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must - be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian - byte order. - """ - - input_audio_noise_reduction: Optional[SessionInputAudioNoiseReduction] = None - """Configuration for input audio noise reduction. - - This can be set to `null` to turn off. Noise reduction filters audio added to - the input audio buffer before it is sent to VAD and the model. Filtering the - audio can improve VAD and turn detection accuracy (reducing false positives) and - model performance by improving perception of the input audio. - """ - - input_audio_transcription: Optional[AudioTranscription] = None - """Configuration for input audio transcription. - - The client can optionally set the language and prompt for transcription, these - offer additional guidance to the transcription service. - """ - - turn_detection: Optional[SessionTurnDetection] = None - """Configuration for turn detection. - - Can be set to `null` to turn off. Server VAD means that the model will detect - the start and end of speech based on audio volume and respond at the end of user - speech. - """ - - -class TranscriptionSessionUpdate(BaseModel): - session: Session - """Realtime transcription session object configuration.""" - - type: Literal["transcription_session.update"] - """The event type, must be `transcription_session.update`.""" - - event_id: Optional[str] = None - """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/realtime/transcription_session_update_param.py b/src/openai/types/realtime/transcription_session_update_param.py deleted file mode 100644 index 55c67798b6..0000000000 --- a/src/openai/types/realtime/transcription_session_update_param.py +++ /dev/null @@ -1,99 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from __future__ import annotations - -from typing import List -from typing_extensions import Literal, Required, TypedDict - -from .noise_reduction_type import NoiseReductionType -from .audio_transcription_param import AudioTranscriptionParam - -__all__ = ["TranscriptionSessionUpdateParam", "Session", "SessionInputAudioNoiseReduction", "SessionTurnDetection"] - - -class SessionInputAudioNoiseReduction(TypedDict, total=False): - type: NoiseReductionType - """Type of noise reduction. - - `near_field` is for close-talking microphones such as headphones, `far_field` is - for far-field microphones such as laptop or conference room microphones. - """ - - -class SessionTurnDetection(TypedDict, total=False): - prefix_padding_ms: int - """Amount of audio to include before the VAD detected speech (in milliseconds). - - Defaults to 300ms. - """ - - silence_duration_ms: int - """Duration of silence to detect speech stop (in milliseconds). - - Defaults to 500ms. With shorter values the model will respond more quickly, but - may jump in on short pauses from the user. - """ - - threshold: float - """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. - - A higher threshold will require louder audio to activate the model, and thus - might perform better in noisy environments. - """ - - type: Literal["server_vad"] - """Type of turn detection. - - Only `server_vad` is currently supported for transcription sessions. - """ - - -class Session(TypedDict, total=False): - include: List[Literal["item.input_audio_transcription.logprobs"]] - """The set of items to include in the transcription. - - Current available items are: `item.input_audio_transcription.logprobs` - """ - - input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] - """The format of input audio. - - Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must - be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian - byte order. - """ - - input_audio_noise_reduction: SessionInputAudioNoiseReduction - """Configuration for input audio noise reduction. - - This can be set to `null` to turn off. Noise reduction filters audio added to - the input audio buffer before it is sent to VAD and the model. Filtering the - audio can improve VAD and turn detection accuracy (reducing false positives) and - model performance by improving perception of the input audio. - """ - - input_audio_transcription: AudioTranscriptionParam - """Configuration for input audio transcription. - - The client can optionally set the language and prompt for transcription, these - offer additional guidance to the transcription service. - """ - - turn_detection: SessionTurnDetection - """Configuration for turn detection. - - Can be set to `null` to turn off. Server VAD means that the model will detect - the start and end of speech based on audio volume and respond at the end of user - speech. - """ - - -class TranscriptionSessionUpdateParam(TypedDict, total=False): - session: Required[Session] - """Realtime transcription session object configuration.""" - - type: Required[Literal["transcription_session.update"]] - """The event type, must be `transcription_session.update`.""" - - event_id: str - """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/realtime/transcription_session_updated_event.py b/src/openai/types/realtime/transcription_session_updated_event.py deleted file mode 100644 index f6a52a12f3..0000000000 --- a/src/openai/types/realtime/transcription_session_updated_event.py +++ /dev/null @@ -1,24 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from typing_extensions import Literal - -from ..._models import BaseModel -from .realtime_transcription_session_create_response import RealtimeTranscriptionSessionCreateResponse - -__all__ = ["TranscriptionSessionUpdatedEvent"] - - -class TranscriptionSessionUpdatedEvent(BaseModel): - event_id: str - """The unique ID of the server event.""" - - session: RealtimeTranscriptionSessionCreateResponse - """A new Realtime transcription session configuration. - - When a session is created on the server via REST API, the session object also - contains an ephemeral key. Default TTL for keys is 10 minutes. This property is - not present when a session is updated via the WebSocket API. - """ - - type: Literal["transcription_session.updated"] - """The event type, must be `transcription_session.updated`."""