diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 000000000..ff37db326
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,69 @@
+Welcome to the OpenAI Agents SDK repository. This file contains the main points for new contributors.
+
+## Repository overview
+
+- **Source code**: `src/agents/` contains the implementation.
+- **Tests**: `tests/` with a short guide in `tests/README.md`.
+- **Examples**: under `examples/`.
+- **Documentation**: markdown pages live in `docs/` with `mkdocs.yml` controlling the site.
+- **Utilities**: developer commands are defined in the `Makefile`.
+- **PR template**: `.github/PULL_REQUEST_TEMPLATE/pull_request_template.md` describes the information every PR must include.
+
+## Local workflow
+
+1. Format, lint and type‑check your changes:
+
+   ```bash
+   make format
+   make lint
+   make mypy
+   ```
+
+2. Run the tests:
+
+   ```bash
+   make tests
+   ```
+
+   To run a single test, use `uv run pytest -s -k <test_name>`.
+
+3. Build the documentation (optional but recommended for docs changes):
+
+   ```bash
+   make build-docs
+   ```
+
+   Coverage can be generated with `make coverage`.
+
+## Snapshot tests
+
+Some tests rely on inline snapshots. See `tests/README.md` for details on updating them:
+
+```bash
+make snapshots-fix      # update existing snapshots
+make snapshots-create   # create new snapshots
+```
+
+Run `make tests` again after updating snapshots to ensure they pass.
+
+## Style notes
+
+- Write comments as full sentences and end them with a period.
+
+## Pull request expectations
+
+PRs should use the template located at `.github/PULL_REQUEST_TEMPLATE/pull_request_template.md`. Provide a summary, test plan and issue number if applicable, then check that:
+
+- New tests are added when needed.
+- Documentation is updated.
+- `make lint` and `make format` have been run.
+- The full test suite passes.
+
+Commit messages should be concise and written in the imperative mood. Small, focused commits are preferred.
+
+## What reviewers look for
+
+- Tests covering new behaviour.
+- Consistent style: code formatted with `ruff format`, imports sorted, and type hints passing `mypy`.
+- Clear documentation for any public API changes.
+- Clean history and a helpful PR description.
diff --git a/docs/tracing.md b/docs/tracing.md
index dd883c5aa..4a9c1bd90 100644
--- a/docs/tracing.md
+++ b/docs/tracing.md
@@ -115,3 +115,4 @@ To customize this default setup, to send traces to alternative or additional bac
 -   [Langfuse](https://langfuse.com/docs/integrations/openaiagentssdk/openai-agents)
 -   [Langtrace](https://docs.langtrace.ai/supported-integrations/llm-frameworks/openai-agents-sdk)
 -   [Okahu-Monocle](https://github.com/monocle2ai/monocle)
+-   [Galileo](https://v2docs.galileo.ai/integrations/openai-agent-integration#openai-agent-integration)
diff --git a/examples/hosted_mcp/__init__.py b/examples/hosted_mcp/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/hosted_mcp/approvals.py b/examples/hosted_mcp/approvals.py
new file mode 100644
index 000000000..2cabb3ee2
--- /dev/null
+++ b/examples/hosted_mcp/approvals.py
@@ -0,0 +1,61 @@
+import argparse
+import asyncio
+
+from agents import (
+    Agent,
+    HostedMCPTool,
+    MCPToolApprovalFunctionResult,
+    MCPToolApprovalRequest,
+    Runner,
+)
+
+"""This example demonstrates how to use the hosted MCP support in the OpenAI Responses API, with
+approval callbacks."""
+
+
+def approval_callback(request: MCPToolApprovalRequest) -> MCPToolApprovalFunctionResult:
+    answer = input(f"Approve running the tool `{request.data.name}`? (y/n) ")
+    result: MCPToolApprovalFunctionResult = {"approve": answer == "y"}
+    if not result["approve"]:
+        result["reason"] = "User denied"
+    return result
+
+
+async def main(verbose: bool, stream: bool):
+    agent = Agent(
+        name="Assistant",
+        tools=[
+            HostedMCPTool(
+                tool_config={
+                    "type": "mcp",
+                    "server_label": "gitmcp",
+                    "server_url": "https://gitmcp.io/openai/codex",
+                    "require_approval": "always",
+                },
+                on_approval_request=approval_callback,
+            )
+        ],
+    )
+
+    if stream:
+        result = Runner.run_streamed(agent, "Which language is this repo written in?")
+        async for event in result.stream_events():
+            if event.type == "run_item_stream_event":
+                print(f"Got event of type {event.item.__class__.__name__}")
+        print(f"Done streaming; final result: {result.final_output}")
+    else:
+        res = await Runner.run(agent, "Which language is this repo written in?")
+        print(res.final_output)
+
+    if verbose:
+        for item in result.new_items:
+            print(item)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--verbose", action="store_true", default=False)
+    parser.add_argument("--stream", action="store_true", default=False)
+    args = parser.parse_args()
+
+    asyncio.run(main(args.verbose, args.stream))
diff --git a/examples/hosted_mcp/simple.py b/examples/hosted_mcp/simple.py
new file mode 100644
index 000000000..508c3a7ae
--- /dev/null
+++ b/examples/hosted_mcp/simple.py
@@ -0,0 +1,47 @@
+import argparse
+import asyncio
+
+from agents import Agent, HostedMCPTool, Runner
+
+"""This example demonstrates how to use the hosted MCP support in the OpenAI Responses API, with
+approvals not required for any tools. You should only use this for trusted MCP servers."""
+
+
+async def main(verbose: bool, stream: bool):
+    agent = Agent(
+        name="Assistant",
+        tools=[
+            HostedMCPTool(
+                tool_config={
+                    "type": "mcp",
+                    "server_label": "gitmcp",
+                    "server_url": "https://gitmcp.io/openai/codex",
+                    "require_approval": "never",
+                }
+            )
+        ],
+    )
+
+    if stream:
+        result = Runner.run_streamed(agent, "Which language is this repo written in?")
+        async for event in result.stream_events():
+            if event.type == "run_item_stream_event":
+                print(f"Got event of type {event.item.__class__.__name__}")
+        print(f"Done streaming; final result: {result.final_output}")
+    else:
+        res = await Runner.run(agent, "Which language is this repo written in?")
+        print(res.final_output)
+        # The repository is primarily written in multiple languages, including Rust and TypeScript...
+
+    if verbose:
+        for item in result.new_items:
+            print(item)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--verbose", action="store_true", default=False)
+    parser.add_argument("--stream", action="store_true", default=False)
+    args = parser.parse_args()
+
+    asyncio.run(main(args.verbose, args.stream))
diff --git a/examples/tools/code_interpreter.py b/examples/tools/code_interpreter.py
new file mode 100644
index 000000000..a5843ce3f
--- /dev/null
+++ b/examples/tools/code_interpreter.py
@@ -0,0 +1,34 @@
+import asyncio
+
+from agents import Agent, CodeInterpreterTool, Runner, trace
+
+
+async def main():
+    agent = Agent(
+        name="Code interpreter",
+        instructions="You love doing math.",
+        tools=[
+            CodeInterpreterTool(
+                tool_config={"type": "code_interpreter", "container": {"type": "auto"}},
+            )
+        ],
+    )
+
+    with trace("Code interpreter example"):
+        print("Solving math problem...")
+        result = Runner.run_streamed(agent, "What is the square root of273 * 312821 plus 1782?")
+        async for event in result.stream_events():
+            if (
+                event.type == "run_item_stream_event"
+                and event.item.type == "tool_call_item"
+                and event.item.raw_item.type == "code_interpreter_call"
+            ):
+                print(f"Code interpreter code:\n```\n{event.item.raw_item.code}\n```\n")
+            elif event.type == "run_item_stream_event":
+                print(f"Other event: {event.item.type}")
+
+        print(f"Final output: {result.final_output}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tools/image_generator.py b/examples/tools/image_generator.py
new file mode 100644
index 000000000..fd6fcc6ba
--- /dev/null
+++ b/examples/tools/image_generator.py
@@ -0,0 +1,54 @@
+import asyncio
+import base64
+import os
+import subprocess
+import sys
+import tempfile
+
+from agents import Agent, ImageGenerationTool, Runner, trace
+
+
+def open_file(path: str) -> None:
+    if sys.platform.startswith("darwin"):
+        subprocess.run(["open", path], check=False)  # macOS
+    elif os.name == "nt":  # Windows
+        os.astartfile(path)  # type: ignore
+    elif os.name == "posix":
+        subprocess.run(["xdg-open", path], check=False)  # Linux/Unix
+    else:
+        print(f"Don't know how to open files on this platform: {sys.platform}")
+
+
+async def main():
+    agent = Agent(
+        name="Image generator",
+        instructions="You are a helpful agent.",
+        tools=[
+            ImageGenerationTool(
+                tool_config={"type": "image_generation", "quality": "low"},
+            )
+        ],
+    )
+
+    with trace("Image generation example"):
+        print("Generating image, this may take a while...")
+        result = await Runner.run(
+            agent, "Create an image of a frog eating a pizza, comic book style."
+        )
+        print(result.final_output)
+        for item in result.new_items:
+            if (
+                item.type == "tool_call_item"
+                and item.raw_item.type == "image_generation_call"
+                and (img_result := item.raw_item.result)
+            ):
+                with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+                    tmp.write(base64.b64decode(img_result))
+                    temp_path = tmp.name
+
+                # Open the image
+                open_file(temp_path)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
index 672258c42..38a2f2b64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,13 @@
 [project]
 name = "openai-agents"
-version = "0.0.15"
+version = "0.0.16"
 description = "OpenAI Agents SDK"
 readme = "README.md"
 requires-python = ">=3.9"
 license = "MIT"
 authors = [{ name = "OpenAI", email = "support@openai.com" }]
 dependencies = [
-    "openai>=1.76.0",
+    "openai>=1.81.0",
     "pydantic>=2.10, <3",
     "griffe>=1.5.6, <2",
     "typing-extensions>=4.12.2, <5",
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
index 6d7c90b4f..58949157a 100644
--- a/src/agents/__init__.py
+++ b/src/agents/__init__.py
@@ -54,10 +54,19 @@
     StreamEvent,
 )
 from .tool import (
+    CodeInterpreterTool,
     ComputerTool,
     FileSearchTool,
     FunctionTool,
     FunctionToolResult,
+    HostedMCPTool,
+    ImageGenerationTool,
+    LocalShellCommandRequest,
+    LocalShellExecutor,
+    LocalShellTool,
+    MCPToolApprovalFunction,
+    MCPToolApprovalFunctionResult,
+    MCPToolApprovalRequest,
     Tool,
     WebSearchTool,
     default_tool_error_function,
@@ -206,8 +215,17 @@ def enable_verbose_stdout_logging():
     "FunctionToolResult",
     "ComputerTool",
     "FileSearchTool",
+    "CodeInterpreterTool",
+    "ImageGenerationTool",
+    "LocalShellCommandRequest",
+    "LocalShellExecutor",
+    "LocalShellTool",
     "Tool",
     "WebSearchTool",
+    "HostedMCPTool",
+    "MCPToolApprovalFunction",
+    "MCPToolApprovalRequest",
+    "MCPToolApprovalFunctionResult",
     "function_tool",
     "Usage",
     "add_trace_processor",
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
index b5a83685c..2cfa270e0 100644
--- a/src/agents/_run_impl.py
+++ b/src/agents/_run_impl.py
@@ -14,6 +14,9 @@
     ResponseFunctionWebSearch,
     ResponseOutputMessage,
 )
+from openai.types.responses.response_code_interpreter_tool_call import (
+    ResponseCodeInterpreterToolCall,
+)
 from openai.types.responses.response_computer_tool_call import (
     ActionClick,
     ActionDoubleClick,
@@ -25,7 +28,13 @@
     ActionType,
     ActionWait,
 )
-from openai.types.responses.response_input_param import ComputerCallOutput
+from openai.types.responses.response_input_param import ComputerCallOutput, McpApprovalResponse
+from openai.types.responses.response_output_item import (
+    ImageGenerationCall,
+    LocalShellCall,
+    McpApprovalRequest,
+    McpListTools,
+)
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 
 from .agent import Agent, ToolsToFinalOutputResult
@@ -38,6 +47,9 @@
     HandoffCallItem,
     HandoffOutputItem,
     ItemHelpers,
+    MCPApprovalRequestItem,
+    MCPApprovalResponseItem,
+    MCPListToolsItem,
     MessageOutputItem,
     ModelResponse,
     ReasoningItem,
@@ -52,7 +64,16 @@
 from .models.interface import ModelTracing
 from .run_context import RunContextWrapper, TContext
 from .stream_events import RunItemStreamEvent, StreamEvent
-from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
+from .tool import (
+    ComputerTool,
+    FunctionTool,
+    FunctionToolResult,
+    HostedMCPTool,
+    LocalShellCommandRequest,
+    LocalShellTool,
+    MCPToolApprovalRequest,
+    Tool,
+)
 from .tracing import (
     SpanError,
     Trace,
@@ -112,15 +133,29 @@ class ToolRunComputerAction:
     computer_tool: ComputerTool
 
 
+@dataclass
+class ToolRunMCPApprovalRequest:
+    request_item: McpApprovalRequest
+    mcp_tool: HostedMCPTool
+
+
+@dataclass
+class ToolRunLocalShellCall:
+    tool_call: LocalShellCall
+    local_shell_tool: LocalShellTool
+
+
 @dataclass
 class ProcessedResponse:
     new_items: list[RunItem]
     handoffs: list[ToolRunHandoff]
     functions: list[ToolRunFunction]
     computer_actions: list[ToolRunComputerAction]
+    local_shell_calls: list[ToolRunLocalShellCall]
     tools_used: list[str]  # Names of all tools used, including hosted tools
+    mcp_approval_requests: list[ToolRunMCPApprovalRequest]  # Only requests with callbacks
 
-    def has_tools_to_run(self) -> bool:
+    def has_tools_or_approvals_to_run(self) -> bool:
         # Handoffs, functions and computer actions need local processing
         # Hosted tools have already run, so there's nothing to do.
         return any(
@@ -128,6 +163,8 @@ def has_tools_to_run(self) -> bool:
                 self.handoffs,
                 self.functions,
                 self.computer_actions,
+                self.local_shell_calls,
+                self.mcp_approval_requests,
             ]
         )
 
@@ -226,7 +263,16 @@ async def execute_tools_and_side_effects(
         new_step_items.extend([result.run_item for result in function_results])
         new_step_items.extend(computer_results)
 
-        # Second, check if there are any handoffs
+        # Next, run the MCP approval requests
+        if processed_response.mcp_approval_requests:
+            approval_results = await cls.execute_mcp_approval_requests(
+                agent=agent,
+                approval_requests=processed_response.mcp_approval_requests,
+                context_wrapper=context_wrapper,
+            )
+            new_step_items.extend(approval_results)
+
+        # Next, check if there are any handoffs
         if run_handoffs := processed_response.handoffs:
             return await cls.execute_handoffs(
                 agent=agent,
@@ -240,7 +286,7 @@ async def execute_tools_and_side_effects(
                 run_config=run_config,
             )
 
-        # Third, we'll check if the tool use should result in a final output
+        # Next, we'll check if the tool use should result in a final output
         check_tool_use = await cls._check_for_final_output_from_tools(
             agent=agent,
             tool_results=function_results,
@@ -295,7 +341,7 @@ async def execute_tools_and_side_effects(
             )
         elif (
             not output_schema or output_schema.is_plain_text()
-        ) and not processed_response.has_tools_to_run():
+        ) and not processed_response.has_tools_or_approvals_to_run():
             return await cls.execute_final_output(
                 agent=agent,
                 original_input=original_input,
@@ -343,10 +389,20 @@ def process_model_response(
         run_handoffs = []
         functions = []
         computer_actions = []
+        local_shell_calls = []
+        mcp_approval_requests = []
         tools_used: list[str] = []
         handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
         function_map = {tool.name: tool for tool in all_tools if isinstance(tool, FunctionTool)}
         computer_tool = next((tool for tool in all_tools if isinstance(tool, ComputerTool)), None)
+        local_shell_tool = next(
+            (tool for tool in all_tools if isinstance(tool, LocalShellTool)), None
+        )
+        hosted_mcp_server_map = {
+            tool.tool_config["server_label"]: tool
+            for tool in all_tools
+            if isinstance(tool, HostedMCPTool)
+        }
 
         for output in response.output:
             if isinstance(output, ResponseOutputMessage):
@@ -375,6 +431,54 @@ def process_model_response(
                 computer_actions.append(
                     ToolRunComputerAction(tool_call=output, computer_tool=computer_tool)
                 )
+            elif isinstance(output, McpApprovalRequest):
+                items.append(MCPApprovalRequestItem(raw_item=output, agent=agent))
+                if output.server_label not in hosted_mcp_server_map:
+                    _error_tracing.attach_error_to_current_span(
+                        SpanError(
+                            message="MCP server label not found",
+                            data={"server_label": output.server_label},
+                        )
+                    )
+                    raise ModelBehaviorError(f"MCP server label {output.server_label} not found")
+                else:
+                    server = hosted_mcp_server_map[output.server_label]
+                    if server.on_approval_request:
+                        mcp_approval_requests.append(
+                            ToolRunMCPApprovalRequest(
+                                request_item=output,
+                                mcp_tool=server,
+                            )
+                        )
+                    else:
+                        logger.warning(
+                            f"MCP server {output.server_label} has no on_approval_request hook"
+                        )
+            elif isinstance(output, McpListTools):
+                items.append(MCPListToolsItem(raw_item=output, agent=agent))
+            elif isinstance(output, ImageGenerationCall):
+                items.append(ToolCallItem(raw_item=output, agent=agent))
+                tools_used.append("image_generation")
+            elif isinstance(output, ResponseCodeInterpreterToolCall):
+                items.append(ToolCallItem(raw_item=output, agent=agent))
+                tools_used.append("code_interpreter")
+            elif isinstance(output, LocalShellCall):
+                items.append(ToolCallItem(raw_item=output, agent=agent))
+                tools_used.append("local_shell")
+                if not local_shell_tool:
+                    _error_tracing.attach_error_to_current_span(
+                        SpanError(
+                            message="Local shell tool not found",
+                            data={},
+                        )
+                    )
+                    raise ModelBehaviorError(
+                        "Model produced local shell call without a local shell tool."
+                    )
+                local_shell_calls.append(
+                    ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool)
+                )
+
             elif not isinstance(output, ResponseFunctionToolCall):
                 logger.warning(f"Unexpected output type, ignoring: {type(output)}")
                 continue
@@ -416,7 +520,9 @@ def process_model_response(
             handoffs=run_handoffs,
             functions=functions,
             computer_actions=computer_actions,
+            local_shell_calls=local_shell_calls,
             tools_used=tools_used,
+            mcp_approval_requests=mcp_approval_requests,
         )
 
     @classmethod
@@ -489,6 +595,30 @@ async def run_single_tool(
             for tool_run, result in zip(tool_runs, results)
         ]
 
+    @classmethod
+    async def execute_local_shell_calls(
+        cls,
+        *,
+        agent: Agent[TContext],
+        calls: list[ToolRunLocalShellCall],
+        context_wrapper: RunContextWrapper[TContext],
+        hooks: RunHooks[TContext],
+        config: RunConfig,
+    ) -> list[RunItem]:
+        results: list[RunItem] = []
+        # Need to run these serially, because each call can affect the local shell state
+        for call in calls:
+            results.append(
+                await LocalShellAction.execute(
+                    agent=agent,
+                    call=call,
+                    hooks=hooks,
+                    context_wrapper=context_wrapper,
+                    config=config,
+                )
+            )
+        return results
+
     @classmethod
     async def execute_computer_actions(
         cls,
@@ -643,6 +773,40 @@ async def execute_handoffs(
             next_step=NextStepHandoff(new_agent),
         )
 
+    @classmethod
+    async def execute_mcp_approval_requests(
+        cls,
+        *,
+        agent: Agent[TContext],
+        approval_requests: list[ToolRunMCPApprovalRequest],
+        context_wrapper: RunContextWrapper[TContext],
+    ) -> list[RunItem]:
+        async def run_single_approval(approval_request: ToolRunMCPApprovalRequest) -> RunItem:
+            callback = approval_request.mcp_tool.on_approval_request
+            assert callback is not None, "Callback is required for MCP approval requests"
+            maybe_awaitable_result = callback(
+                MCPToolApprovalRequest(context_wrapper, approval_request.request_item)
+            )
+            if inspect.isawaitable(maybe_awaitable_result):
+                result = await maybe_awaitable_result
+            else:
+                result = maybe_awaitable_result
+            reason = result.get("reason", None)
+            raw_item: McpApprovalResponse = {
+                "approval_request_id": approval_request.request_item.id,
+                "approve": result["approve"],
+                "type": "mcp_approval_response",
+            }
+            if not result["approve"] and reason:
+                raw_item["reason"] = reason
+            return MCPApprovalResponseItem(
+                raw_item=raw_item,
+                agent=agent,
+            )
+
+        tasks = [run_single_approval(approval_request) for approval_request in approval_requests]
+        return await asyncio.gather(*tasks)
+
     @classmethod
     async def execute_final_output(
         cls,
@@ -727,6 +891,11 @@ def stream_step_result_to_queue(
                 event = RunItemStreamEvent(item=item, name="tool_output")
             elif isinstance(item, ReasoningItem):
                 event = RunItemStreamEvent(item=item, name="reasoning_item_created")
+            elif isinstance(item, MCPApprovalRequestItem):
+                event = RunItemStreamEvent(item=item, name="mcp_approval_requested")
+            elif isinstance(item, MCPListToolsItem):
+                event = RunItemStreamEvent(item=item, name="mcp_list_tools")
+
             else:
                 logger.warning(f"Unexpected item type: {type(item)}")
                 event = None
@@ -919,3 +1088,54 @@ async def _get_screenshot_async(
             await computer.wait()
 
         return await computer.screenshot()
+
+
+class LocalShellAction:
+    @classmethod
+    async def execute(
+        cls,
+        *,
+        agent: Agent[TContext],
+        call: ToolRunLocalShellCall,
+        hooks: RunHooks[TContext],
+        context_wrapper: RunContextWrapper[TContext],
+        config: RunConfig,
+    ) -> RunItem:
+        await asyncio.gather(
+            hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool),
+            (
+                agent.hooks.on_tool_start(context_wrapper, agent, call.local_shell_tool)
+                if agent.hooks
+                else _coro.noop_coroutine()
+            ),
+        )
+
+        request = LocalShellCommandRequest(
+            ctx_wrapper=context_wrapper,
+            data=call.tool_call,
+        )
+        output = call.local_shell_tool.executor(request)
+        if inspect.isawaitable(output):
+            result = await output
+        else:
+            result = output
+
+        await asyncio.gather(
+            hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result),
+            (
+                agent.hooks.on_tool_end(context_wrapper, agent, call.local_shell_tool, result)
+                if agent.hooks
+                else _coro.noop_coroutine()
+            ),
+        )
+
+        return ToolCallOutputItem(
+            agent=agent,
+            output=output,
+            raw_item={
+                "type": "local_shell_call_output",
+                "id": call.tool_call.call_id,
+                "output": result,
+                # "id": "out" + call.tool_call.id,  # TODO remove this, it should be optional
+            },
+        )
diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py
index d3b25a198..ffb2c3c1c 100644
--- a/src/agents/extensions/models/litellm_model.py
+++ b/src/agents/extensions/models/litellm_model.py
@@ -6,6 +6,7 @@
 from typing import Any, Literal, cast, overload
 
 import litellm.types
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
 
 from agents.exceptions import ModelBehaviorError
 
@@ -107,6 +108,16 @@ async def get_response(
                         input_tokens=response_usage.prompt_tokens,
                         output_tokens=response_usage.completion_tokens,
                         total_tokens=response_usage.total_tokens,
+                        input_tokens_details=InputTokensDetails(
+                            cached_tokens=getattr(
+                                response_usage.prompt_tokens_details, "cached_tokens", 0
+                            )
+                        ),
+                        output_tokens_details=OutputTokensDetails(
+                            reasoning_tokens=getattr(
+                                response_usage.completion_tokens_details, "reasoning_tokens", 0
+                            )
+                        ),
                     )
                     if response.usage
                     else Usage()
diff --git a/src/agents/items.py b/src/agents/items.py
index 8fb2b52a3..64797ad22 100644
--- a/src/agents/items.py
+++ b/src/agents/items.py
@@ -18,7 +18,22 @@
     ResponseOutputText,
     ResponseStreamEvent,
 )
-from openai.types.responses.response_input_item_param import ComputerCallOutput, FunctionCallOutput
+from openai.types.responses.response_code_interpreter_tool_call import (
+    ResponseCodeInterpreterToolCall,
+)
+from openai.types.responses.response_input_item_param import (
+    ComputerCallOutput,
+    FunctionCallOutput,
+    LocalShellCallOutput,
+    McpApprovalResponse,
+)
+from openai.types.responses.response_output_item import (
+    ImageGenerationCall,
+    LocalShellCall,
+    McpApprovalRequest,
+    McpCall,
+    McpListTools,
+)
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 from pydantic import BaseModel
 from typing_extensions import TypeAlias
@@ -108,6 +123,10 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):
     ResponseComputerToolCall,
     ResponseFileSearchToolCall,
     ResponseFunctionWebSearch,
+    McpCall,
+    ResponseCodeInterpreterToolCall,
+    ImageGenerationCall,
+    LocalShellCall,
 ]
 """A type that represents a tool call item."""
 
@@ -123,10 +142,12 @@ class ToolCallItem(RunItemBase[ToolCallItemTypes]):
 
 
 @dataclass
-class ToolCallOutputItem(RunItemBase[Union[FunctionCallOutput, ComputerCallOutput]]):
+class ToolCallOutputItem(
+    RunItemBase[Union[FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput]]
+):
     """Represents the output of a tool call."""
 
-    raw_item: FunctionCallOutput | ComputerCallOutput
+    raw_item: FunctionCallOutput | ComputerCallOutput | LocalShellCallOutput
     """The raw item from the model."""
 
     output: Any
@@ -147,6 +168,36 @@ class ReasoningItem(RunItemBase[ResponseReasoningItem]):
     type: Literal["reasoning_item"] = "reasoning_item"
 
 
+@dataclass
+class MCPListToolsItem(RunItemBase[McpListTools]):
+    """Represents a call to an MCP server to list tools."""
+
+    raw_item: McpListTools
+    """The raw MCP list tools call."""
+
+    type: Literal["mcp_list_tools_item"] = "mcp_list_tools_item"
+
+
+@dataclass
+class MCPApprovalRequestItem(RunItemBase[McpApprovalRequest]):
+    """Represents a request for MCP approval."""
+
+    raw_item: McpApprovalRequest
+    """The raw MCP approval request."""
+
+    type: Literal["mcp_approval_request_item"] = "mcp_approval_request_item"
+
+
+@dataclass
+class MCPApprovalResponseItem(RunItemBase[McpApprovalResponse]):
+    """Represents a response to an MCP approval request."""
+
+    raw_item: McpApprovalResponse
+    """The raw MCP approval response."""
+
+    type: Literal["mcp_approval_response_item"] = "mcp_approval_response_item"
+
+
 RunItem: TypeAlias = Union[
     MessageOutputItem,
     HandoffCallItem,
@@ -154,6 +205,9 @@ class ReasoningItem(RunItemBase[ResponseReasoningItem]):
     ToolCallItem,
     ToolCallOutputItem,
     ReasoningItem,
+    MCPListToolsItem,
+    MCPApprovalRequestItem,
+    MCPApprovalResponseItem,
 ]
 """An item generated by an agent."""
 
diff --git a/src/agents/mcp/server.py b/src/agents/mcp/server.py
index c5255ead7..414b517ab 100644
--- a/src/agents/mcp/server.py
+++ b/src/agents/mcp/server.py
@@ -12,7 +12,7 @@
 from mcp.client.sse import sse_client
 from mcp.client.streamable_http import GetSessionIdCallback, streamablehttp_client
 from mcp.shared.message import SessionMessage
-from mcp.types import CallToolResult
+from mcp.types import CallToolResult, InitializeResult
 from typing_extensions import NotRequired, TypedDict
 
 from ..exceptions import UserError
@@ -73,6 +73,7 @@ def __init__(self, cache_tools_list: bool, client_session_timeout_seconds: float
         self.exit_stack: AsyncExitStack = AsyncExitStack()
         self._cleanup_lock: asyncio.Lock = asyncio.Lock()
         self.cache_tools_list = cache_tools_list
+        self.server_initialize_result: InitializeResult | None = None
 
         self.client_session_timeout_seconds = client_session_timeout_seconds
 
@@ -122,7 +123,8 @@ async def connect(self):
                     else None,
                 )
             )
-            await session.initialize()
+            server_result = await session.initialize()
+            self.server_initialize_result = server_result
             self.session = session
         except Exception as e:
             logger.error(f"Error initializing MCP server: {e}")
diff --git a/src/agents/models/chatcmpl_stream_handler.py b/src/agents/models/chatcmpl_stream_handler.py
index c71adeb55..d18f5912a 100644
--- a/src/agents/models/chatcmpl_stream_handler.py
+++ b/src/agents/models/chatcmpl_stream_handler.py
@@ -38,6 +38,16 @@ class StreamingState:
     function_calls: dict[int, ResponseFunctionToolCall] = field(default_factory=dict)
 
 
+class SequenceNumber:
+    def __init__(self):
+        self._sequence_number = 0
+
+    def get_and_increment(self) -> int:
+        num = self._sequence_number
+        self._sequence_number += 1
+        return num
+
+
 class ChatCmplStreamHandler:
     @classmethod
     async def handle_stream(
@@ -47,13 +57,14 @@ async def handle_stream(
     ) -> AsyncIterator[TResponseStreamEvent]:
         usage: CompletionUsage | None = None
         state = StreamingState()
-
+        sequence_number = SequenceNumber()
         async for chunk in stream:
             if not state.started:
                 state.started = True
                 yield ResponseCreatedEvent(
                     response=response,
                     type="response.created",
+                    sequence_number=sequence_number.get_and_increment(),
                 )
 
             # This is always set by the OpenAI API, but not by others e.g. LiteLLM
@@ -89,6 +100,7 @@ async def handle_stream(
                         item=assistant_item,
                         output_index=0,
                         type="response.output_item.added",
+                        sequence_number=sequence_number.get_and_increment(),
                     )
                     yield ResponseContentPartAddedEvent(
                         content_index=state.text_content_index_and_output[0],
@@ -100,6 +112,7 @@ async def handle_stream(
                             annotations=[],
                         ),
                         type="response.content_part.added",
+                        sequence_number=sequence_number.get_and_increment(),
                     )
                 # Emit the delta for this segment of content
                 yield ResponseTextDeltaEvent(
@@ -108,6 +121,7 @@ async def handle_stream(
                     item_id=FAKE_RESPONSES_ID,
                     output_index=0,
                     type="response.output_text.delta",
+                    sequence_number=sequence_number.get_and_increment(),
                 )
                 # Accumulate the text into the response part
                 state.text_content_index_and_output[1].text += delta.content
@@ -134,6 +148,7 @@ async def handle_stream(
                         item=assistant_item,
                         output_index=0,
                         type="response.output_item.added",
+                        sequence_number=sequence_number.get_and_increment(),
                     )
                     yield ResponseContentPartAddedEvent(
                         content_index=state.refusal_content_index_and_output[0],
@@ -145,6 +160,7 @@ async def handle_stream(
                             annotations=[],
                         ),
                         type="response.content_part.added",
+                        sequence_number=sequence_number.get_and_increment(),
                     )
                 # Emit the delta for this segment of refusal
                 yield ResponseRefusalDeltaEvent(
@@ -153,6 +169,7 @@ async def handle_stream(
                     item_id=FAKE_RESPONSES_ID,
                     output_index=0,
                     type="response.refusal.delta",
+                    sequence_number=sequence_number.get_and_increment(),
                 )
                 # Accumulate the refusal string in the output part
                 state.refusal_content_index_and_output[1].refusal += delta.refusal
@@ -190,6 +207,7 @@ async def handle_stream(
                 output_index=0,
                 part=state.text_content_index_and_output[1],
                 type="response.content_part.done",
+                sequence_number=sequence_number.get_and_increment(),
             )
 
         if state.refusal_content_index_and_output:
@@ -201,6 +219,7 @@ async def handle_stream(
                 output_index=0,
                 part=state.refusal_content_index_and_output[1],
                 type="response.content_part.done",
+                sequence_number=sequence_number.get_and_increment(),
             )
 
         # Actually send events for the function calls
@@ -216,6 +235,7 @@ async def handle_stream(
                 ),
                 output_index=function_call_starting_index,
                 type="response.output_item.added",
+                sequence_number=sequence_number.get_and_increment(),
             )
             # Then, yield the args
             yield ResponseFunctionCallArgumentsDeltaEvent(
@@ -223,6 +243,7 @@ async def handle_stream(
                 item_id=FAKE_RESPONSES_ID,
                 output_index=function_call_starting_index,
                 type="response.function_call_arguments.delta",
+                sequence_number=sequence_number.get_and_increment(),
             )
             # Finally, the ResponseOutputItemDone
             yield ResponseOutputItemDoneEvent(
@@ -235,6 +256,7 @@ async def handle_stream(
                 ),
                 output_index=function_call_starting_index,
                 type="response.output_item.done",
+                sequence_number=sequence_number.get_and_increment(),
             )
 
         # Finally, send the Response completed event
@@ -258,6 +280,7 @@ async def handle_stream(
                 item=assistant_msg,
                 output_index=0,
                 type="response.output_item.done",
+                sequence_number=sequence_number.get_and_increment(),
             )
 
         for function_call in state.function_calls.values():
@@ -289,4 +312,5 @@ async def handle_stream(
         yield ResponseCompletedEvent(
             response=final_response,
             type="response.completed",
+            sequence_number=sequence_number.get_and_increment(),
         )
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
index 89619f838..4465ff2fd 100644
--- a/src/agents/models/openai_chatcompletions.py
+++ b/src/agents/models/openai_chatcompletions.py
@@ -9,6 +9,7 @@
 from openai.types import ChatModel
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
 from openai.types.responses import Response
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
 
 from .. import _debug
 from ..agent_output import AgentOutputSchemaBase
@@ -83,6 +84,18 @@ async def get_response(
                     input_tokens=response.usage.prompt_tokens,
                     output_tokens=response.usage.completion_tokens,
                     total_tokens=response.usage.total_tokens,
+                    input_tokens_details=InputTokensDetails(
+                        cached_tokens=getattr(
+                            response.usage.prompt_tokens_details, "cached_tokens", 0
+                        )
+                        or 0,
+                    ),
+                    output_tokens_details=OutputTokensDetails(
+                        reasoning_tokens=getattr(
+                            response.usage.completion_tokens_details, "reasoning_tokens", 0
+                        )
+                        or 0,
+                    ),
                 )
                 if response.usage
                 else Usage()
@@ -252,7 +265,7 @@ async def _fetch_response(
             stream_options=self._non_null_or_not_given(stream_options),
             store=self._non_null_or_not_given(store),
             reasoning_effort=self._non_null_or_not_given(reasoning_effort),
-            extra_headers={ **HEADERS, **(model_settings.extra_headers or {}) },
+            extra_headers={**HEADERS, **(model_settings.extra_headers or {})},
             extra_query=model_settings.extra_query,
             extra_body=model_settings.extra_body,
             metadata=self._non_null_or_not_given(model_settings.metadata),
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index c1ff85b98..86c8e69cb 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -10,6 +10,7 @@
 from openai.types.responses import (
     Response,
     ResponseCompletedEvent,
+    ResponseIncludable,
     ResponseStreamEvent,
     ResponseTextConfigParam,
     ToolParam,
@@ -23,7 +24,17 @@
 from ..handoffs import Handoff
 from ..items import ItemHelpers, ModelResponse, TResponseInputItem
 from ..logger import logger
-from ..tool import ComputerTool, FileSearchTool, FunctionTool, Tool, WebSearchTool
+from ..tool import (
+    CodeInterpreterTool,
+    ComputerTool,
+    FileSearchTool,
+    FunctionTool,
+    HostedMCPTool,
+    ImageGenerationTool,
+    LocalShellTool,
+    Tool,
+    WebSearchTool,
+)
 from ..tracing import SpanError, response_span
 from ..usage import Usage
 from ..version import __version__
@@ -36,13 +47,6 @@
 _USER_AGENT = f"Agents/Python {__version__}"
 _HEADERS = {"User-Agent": _USER_AGENT}
 
-# From the Responses API
-IncludeLiteral = Literal[
-    "file_search_call.results",
-    "message.input_image.image_url",
-    "computer_call_output.output.image_url",
-]
-
 
 class OpenAIResponsesModel(Model):
     """
@@ -98,6 +102,8 @@ async def get_response(
                         input_tokens=response.usage.input_tokens,
                         output_tokens=response.usage.output_tokens,
                         total_tokens=response.usage.total_tokens,
+                        input_tokens_details=response.usage.input_tokens_details,
+                        output_tokens_details=response.usage.output_tokens_details,
                     )
                     if response.usage
                     else Usage()
@@ -271,7 +277,7 @@ def _get_client(self) -> AsyncOpenAI:
 @dataclass
 class ConvertedTools:
     tools: list[ToolParam]
-    includes: list[IncludeLiteral]
+    includes: list[ResponseIncludable]
 
 
 class Converter:
@@ -299,6 +305,18 @@ def convert_tool_choice(
             return {
                 "type": "computer_use_preview",
             }
+        elif tool_choice == "image_generation":
+            return {
+                "type": "image_generation",
+            }
+        elif tool_choice == "code_interpreter":
+            return {
+                "type": "code_interpreter",
+            }
+        elif tool_choice == "mcp":
+            return {
+                "type": "mcp",
+            }
         else:
             return {
                 "type": "function",
@@ -328,7 +346,7 @@ def convert_tools(
         handoffs: list[Handoff[Any]],
     ) -> ConvertedTools:
         converted_tools: list[ToolParam] = []
-        includes: list[IncludeLiteral] = []
+        includes: list[ResponseIncludable] = []
 
         computer_tools = [tool for tool in tools if isinstance(tool, ComputerTool)]
         if len(computer_tools) > 1:
@@ -346,7 +364,7 @@ def convert_tools(
         return ConvertedTools(tools=converted_tools, includes=includes)
 
     @classmethod
-    def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, IncludeLiteral | None]:
+    def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, ResponseIncludable | None]:
         """Returns converted tool and includes"""
 
         if isinstance(tool, FunctionTool):
@@ -357,7 +375,7 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, IncludeLiteral | None]:
                 "type": "function",
                 "description": tool.description,
             }
-            includes: IncludeLiteral | None = None
+            includes: ResponseIncludable | None = None
         elif isinstance(tool, WebSearchTool):
             ws: WebSearchToolParam = {
                 "type": "web_search_preview",
@@ -387,7 +405,20 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, IncludeLiteral | None]:
                 "display_height": tool.computer.dimensions[1],
             }
             includes = None
-
+        elif isinstance(tool, HostedMCPTool):
+            converted_tool = tool.tool_config
+            includes = None
+        elif isinstance(tool, ImageGenerationTool):
+            converted_tool = tool.tool_config
+            includes = None
+        elif isinstance(tool, CodeInterpreterTool):
+            converted_tool = tool.tool_config
+            includes = None
+        elif isinstance(tool, LocalShellTool):
+            converted_tool = {
+                "type": "local_shell",
+            }
+            includes = None
         else:
             raise UserError(f"Unknown tool type: {type(tool)}, tool")
 
diff --git a/src/agents/run.py b/src/agents/run.py
index 849da7bfc..b196c3bf1 100644
--- a/src/agents/run.py
+++ b/src/agents/run.py
@@ -689,6 +689,8 @@ async def _run_single_turn_streamed(
                         input_tokens=event.response.usage.input_tokens,
                         output_tokens=event.response.usage.output_tokens,
                         total_tokens=event.response.usage.total_tokens,
+                        input_tokens_details=event.response.usage.input_tokens_details,
+                        output_tokens_details=event.response.usage.output_tokens_details,
                     )
                     if event.response.usage
                     else Usage()
diff --git a/src/agents/stream_events.py b/src/agents/stream_events.py
index bd37d11f3..111d0b951 100644
--- a/src/agents/stream_events.py
+++ b/src/agents/stream_events.py
@@ -35,6 +35,8 @@ class RunItemStreamEvent:
         "tool_called",
         "tool_output",
         "reasoning_item_created",
+        "mcp_approval_requested",
+        "mcp_list_tools",
     ]
     """The name of the event."""
 
diff --git a/src/agents/tool.py b/src/agents/tool.py
index c1c162423..fd5a21c89 100644
--- a/src/agents/tool.py
+++ b/src/agents/tool.py
@@ -7,9 +7,11 @@
 from typing import Any, Callable, Literal, Union, overload
 
 from openai.types.responses.file_search_tool_param import Filters, RankingOptions
+from openai.types.responses.response_output_item import LocalShellCall, McpApprovalRequest
+from openai.types.responses.tool_param import CodeInterpreter, ImageGeneration, Mcp
 from openai.types.responses.web_search_tool_param import UserLocation
 from pydantic import ValidationError
-from typing_extensions import Concatenate, ParamSpec
+from typing_extensions import Concatenate, NotRequired, ParamSpec, TypedDict
 
 from . import _debug
 from .computer import AsyncComputer, Computer
@@ -130,7 +132,115 @@ def name(self):
         return "computer_use_preview"
 
 
-Tool = Union[FunctionTool, FileSearchTool, WebSearchTool, ComputerTool]
+@dataclass
+class MCPToolApprovalRequest:
+    """A request to approve a tool call."""
+
+    ctx_wrapper: RunContextWrapper[Any]
+    """The run context."""
+
+    data: McpApprovalRequest
+    """The data from the MCP tool approval request."""
+
+
+class MCPToolApprovalFunctionResult(TypedDict):
+    """The result of an MCP tool approval function."""
+
+    approve: bool
+    """Whether to approve the tool call."""
+
+    reason: NotRequired[str]
+    """An optional reason, if rejected."""
+
+
+MCPToolApprovalFunction = Callable[
+    [MCPToolApprovalRequest], MaybeAwaitable[MCPToolApprovalFunctionResult]
+]
+"""A function that approves or rejects a tool call."""
+
+
+@dataclass
+class HostedMCPTool:
+    """A tool that allows the LLM to use a remote MCP server. The LLM will automatically list and
+    call tools, without requiring a a round trip back to your code.
+    If you want to run MCP servers locally via stdio, in a VPC or other non-publicly-accessible
+    environment, or you just prefer to run tool calls locally, then you can instead use the servers
+    in `agents.mcp` and pass `Agent(mcp_servers=[...])` to the agent."""
+
+    tool_config: Mcp
+    """The MCP tool config, which includes the server URL and other settings."""
+
+    on_approval_request: MCPToolApprovalFunction | None = None
+    """An optional function that will be called if approval is requested for an MCP tool. If not
+    provided, you will need to manually add approvals/rejections to the input and call
+    `Runner.run(...)` again."""
+
+    @property
+    def name(self):
+        return "hosted_mcp"
+
+
+@dataclass
+class CodeInterpreterTool:
+    """A tool that allows the LLM to execute code in a sandboxed environment."""
+
+    tool_config: CodeInterpreter
+    """The tool config, which includes the container and other settings."""
+
+    @property
+    def name(self):
+        return "code_interpreter"
+
+
+@dataclass
+class ImageGenerationTool:
+    """A tool that allows the LLM to generate images."""
+
+    tool_config: ImageGeneration
+    """The tool config, which image generation settings."""
+
+    @property
+    def name(self):
+        return "image_generation"
+
+
+@dataclass
+class LocalShellCommandRequest:
+    """A request to execute a command on a shell."""
+
+    ctx_wrapper: RunContextWrapper[Any]
+    """The run context."""
+
+    data: LocalShellCall
+    """The data from the local shell tool call."""
+
+
+LocalShellExecutor = Callable[[LocalShellCommandRequest], MaybeAwaitable[str]]
+"""A function that executes a command on a shell."""
+
+
+@dataclass
+class LocalShellTool:
+    """A tool that allows the LLM to execute commands on a shell."""
+
+    executor: LocalShellExecutor
+    """A function that executes a command on a shell."""
+
+    @property
+    def name(self):
+        return "local_shell"
+
+
+Tool = Union[
+    FunctionTool,
+    FileSearchTool,
+    WebSearchTool,
+    ComputerTool,
+    HostedMCPTool,
+    LocalShellTool,
+    ImageGenerationTool,
+    CodeInterpreterTool,
+]
 """A tool that can be used in an agent."""
 
 
diff --git a/src/agents/usage.py b/src/agents/usage.py
index 23d989b4b..843f62937 100644
--- a/src/agents/usage.py
+++ b/src/agents/usage.py
@@ -1,4 +1,6 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
 
 
 @dataclass
@@ -9,9 +11,18 @@ class Usage:
     input_tokens: int = 0
     """Total input tokens sent, across all requests."""
 
+    input_tokens_details: InputTokensDetails = field(
+        default_factory=lambda: InputTokensDetails(cached_tokens=0)
+    )
+    """Details about the input tokens, matching responses API usage details."""
     output_tokens: int = 0
     """Total output tokens received, across all requests."""
 
+    output_tokens_details: OutputTokensDetails = field(
+        default_factory=lambda: OutputTokensDetails(reasoning_tokens=0)
+    )
+    """Details about the output tokens, matching responses API usage details."""
+
     total_tokens: int = 0
     """Total tokens sent and received, across all requests."""
 
@@ -20,3 +31,12 @@ def add(self, other: "Usage") -> None:
         self.input_tokens += other.input_tokens if other.input_tokens else 0
         self.output_tokens += other.output_tokens if other.output_tokens else 0
         self.total_tokens += other.total_tokens if other.total_tokens else 0
+        self.input_tokens_details = InputTokensDetails(
+            cached_tokens=self.input_tokens_details.cached_tokens
+            + other.input_tokens_details.cached_tokens
+        )
+
+        self.output_tokens_details = OutputTokensDetails(
+            reasoning_tokens=self.output_tokens_details.reasoning_tokens
+            + other.output_tokens_details.reasoning_tokens
+        )
diff --git a/tests/fake_model.py b/tests/fake_model.py
index 32f919ef1..9f0c83a2f 100644
--- a/tests/fake_model.py
+++ b/tests/fake_model.py
@@ -129,6 +129,7 @@ async def stream_response(
             yield ResponseCompletedEvent(
                 type="response.completed",
                 response=get_response_obj(output, usage=self.hardcoded_usage),
+                sequence_number=0,
             )
 
 
diff --git a/tests/models/test_litellm_chatcompletions_stream.py b/tests/models/test_litellm_chatcompletions_stream.py
index 80bd8ea22..06e46b39c 100644
--- a/tests/models/test_litellm_chatcompletions_stream.py
+++ b/tests/models/test_litellm_chatcompletions_stream.py
@@ -8,7 +8,11 @@
     ChoiceDeltaToolCall,
     ChoiceDeltaToolCallFunction,
 )
-from openai.types.completion_usage import CompletionUsage
+from openai.types.completion_usage import (
+    CompletionTokensDetails,
+    CompletionUsage,
+    PromptTokensDetails,
+)
 from openai.types.responses import (
     Response,
     ResponseFunctionToolCall,
@@ -46,7 +50,13 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
         model="fake",
         object="chat.completion.chunk",
         choices=[Choice(index=0, delta=ChoiceDelta(content="llo"))],
-        usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
+        usage=CompletionUsage(
+            completion_tokens=5,
+            prompt_tokens=7,
+            total_tokens=12,
+            completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2),
+            prompt_tokens_details=PromptTokensDetails(cached_tokens=6),
+        ),
     )
 
     async def fake_stream() -> AsyncIterator[ChatCompletionChunk]:
@@ -112,6 +122,8 @@ async def patched_fetch_response(self, *args, **kwargs):
     assert completed_resp.usage.input_tokens == 7
     assert completed_resp.usage.output_tokens == 5
     assert completed_resp.usage.total_tokens == 12
+    assert completed_resp.usage.input_tokens_details.cached_tokens == 6
+    assert completed_resp.usage.output_tokens_details.reasoning_tokens == 2
 
 
 @pytest.mark.allow_call_model_methods
diff --git a/tests/test_extra_headers.py b/tests/test_extra_headers.py
index f29c25408..a6af30077 100644
--- a/tests/test_extra_headers.py
+++ b/tests/test_extra_headers.py
@@ -1,6 +1,7 @@
 import pytest
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
 
 from agents import ModelSettings, ModelTracing, OpenAIChatCompletionsModel, OpenAIResponsesModel
 
@@ -17,21 +18,29 @@ class DummyResponses:
         async def create(self, **kwargs):
             nonlocal called_kwargs
             called_kwargs = kwargs
+
             class DummyResponse:
                 id = "dummy"
                 output = []
                 usage = type(
-                    "Usage", (), {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+                    "Usage",
+                    (),
+                    {
+                        "input_tokens": 0,
+                        "output_tokens": 0,
+                        "total_tokens": 0,
+                        "input_tokens_details": InputTokensDetails(cached_tokens=0),
+                        "output_tokens_details": OutputTokensDetails(reasoning_tokens=0),
+                    },
                 )()
+
             return DummyResponse()
 
     class DummyClient:
         def __init__(self):
             self.responses = DummyResponses()
 
-
-
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=DummyClient())  # type: ignore
     extra_headers = {"X-Test-Header": "test-value"}
     await model.get_response(
         system_instructions=None,
@@ -47,7 +56,6 @@ def __init__(self):
     assert called_kwargs["extra_headers"]["X-Test-Header"] == "test-value"
 
 
-
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio
 async def test_extra_headers_passed_to_openai_client():
@@ -76,7 +84,7 @@ def __init__(self):
             self.chat = type("_Chat", (), {"completions": DummyCompletions()})()
             self.base_url = "https://api.openai.com"
 
-    model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=DummyClient()) # type: ignore
+    model = OpenAIChatCompletionsModel(model="gpt-4", openai_client=DummyClient())  # type: ignore
     extra_headers = {"X-Test-Header": "test-value"}
     await model.get_response(
         system_instructions=None,
diff --git a/tests/test_openai_chatcompletions.py b/tests/test_openai_chatcompletions.py
index ba3ec68d0..ba4605d08 100644
--- a/tests/test_openai_chatcompletions.py
+++ b/tests/test_openai_chatcompletions.py
@@ -13,7 +13,10 @@
     ChatCompletionMessageToolCall,
     Function,
 )
-from openai.types.completion_usage import CompletionUsage
+from openai.types.completion_usage import (
+    CompletionUsage,
+    PromptTokensDetails,
+)
 from openai.types.responses import (
     Response,
     ResponseFunctionToolCall,
@@ -51,7 +54,13 @@ async def test_get_response_with_text_message(monkeypatch) -> None:
         model="fake",
         object="chat.completion",
         choices=[choice],
-        usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
+        usage=CompletionUsage(
+            completion_tokens=5,
+            prompt_tokens=7,
+            total_tokens=12,
+            # completion_tokens_details left blank to test default
+            prompt_tokens_details=PromptTokensDetails(cached_tokens=3),
+        ),
     )
 
     async def patched_fetch_response(self, *args, **kwargs):
@@ -81,6 +90,8 @@ async def patched_fetch_response(self, *args, **kwargs):
     assert resp.usage.input_tokens == 7
     assert resp.usage.output_tokens == 5
     assert resp.usage.total_tokens == 12
+    assert resp.usage.input_tokens_details.cached_tokens == 3
+    assert resp.usage.output_tokens_details.reasoning_tokens == 0
     assert resp.response_id is None
 
 
@@ -127,6 +138,8 @@ async def patched_fetch_response(self, *args, **kwargs):
     assert resp.usage.requests == 0
     assert resp.usage.input_tokens == 0
     assert resp.usage.output_tokens == 0
+    assert resp.usage.input_tokens_details.cached_tokens == 0
+    assert resp.usage.output_tokens_details.reasoning_tokens == 0
 
 
 @pytest.mark.allow_call_model_methods
diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py
index b82f24303..5c8bb9e3a 100644
--- a/tests/test_openai_chatcompletions_stream.py
+++ b/tests/test_openai_chatcompletions_stream.py
@@ -8,7 +8,11 @@
     ChoiceDeltaToolCall,
     ChoiceDeltaToolCallFunction,
 )
-from openai.types.completion_usage import CompletionUsage
+from openai.types.completion_usage import (
+    CompletionTokensDetails,
+    CompletionUsage,
+    PromptTokensDetails,
+)
 from openai.types.responses import (
     Response,
     ResponseFunctionToolCall,
@@ -46,7 +50,13 @@ async def test_stream_response_yields_events_for_text_content(monkeypatch) -> No
         model="fake",
         object="chat.completion.chunk",
         choices=[Choice(index=0, delta=ChoiceDelta(content="llo"))],
-        usage=CompletionUsage(completion_tokens=5, prompt_tokens=7, total_tokens=12),
+        usage=CompletionUsage(
+            completion_tokens=5,
+            prompt_tokens=7,
+            total_tokens=12,
+            prompt_tokens_details=PromptTokensDetails(cached_tokens=2),
+            completion_tokens_details=CompletionTokensDetails(reasoning_tokens=3),
+        ),
     )
 
     async def fake_stream() -> AsyncIterator[ChatCompletionChunk]:
@@ -112,6 +122,8 @@ async def patched_fetch_response(self, *args, **kwargs):
     assert completed_resp.usage.input_tokens == 7
     assert completed_resp.usage.output_tokens == 5
     assert completed_resp.usage.total_tokens == 12
+    assert completed_resp.usage.input_tokens_details.cached_tokens == 2
+    assert completed_resp.usage.output_tokens_details.reasoning_tokens == 3
 
 
 @pytest.mark.allow_call_model_methods
diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py
index 0bc97a953..db24fe496 100644
--- a/tests/test_responses_tracing.py
+++ b/tests/test_responses_tracing.py
@@ -1,7 +1,10 @@
+from typing import Optional
+
 import pytest
 from inline_snapshot import snapshot
 from openai import AsyncOpenAI
 from openai.types.responses import ResponseCompletedEvent
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
 
 from agents import ModelSettings, ModelTracing, OpenAIResponsesModel, trace
 from agents.tracing.span_data import ResponseSpanData
@@ -16,10 +19,25 @@ def is_disabled(self):
 
 
 class DummyUsage:
-    def __init__(self, input_tokens=1, output_tokens=1, total_tokens=2):
+    def __init__(
+        self,
+        input_tokens: int = 1,
+        input_tokens_details: Optional[InputTokensDetails] = None,
+        output_tokens: int = 1,
+        output_tokens_details: Optional[OutputTokensDetails] = None,
+        total_tokens: int = 2,
+    ):
         self.input_tokens = input_tokens
         self.output_tokens = output_tokens
         self.total_tokens = total_tokens
+        self.input_tokens_details = (
+            input_tokens_details if input_tokens_details else InputTokensDetails(cached_tokens=0)
+        )
+        self.output_tokens_details = (
+            output_tokens_details
+            if output_tokens_details
+            else OutputTokensDetails(reasoning_tokens=0)
+        )
 
 
 class DummyResponse:
@@ -32,6 +50,7 @@ def __aiter__(self):
         yield ResponseCompletedEvent(
             type="response.completed",
             response=fake_model.get_response_obj(self.output),
+            sequence_number=0,
         )
 
 
@@ -183,6 +202,7 @@ async def __aiter__(self):
                     yield ResponseCompletedEvent(
                         type="response.completed",
                         response=fake_model.get_response_obj([], "dummy-id-123"),
+                        sequence_number=0,
                     )
 
             return DummyStream()
@@ -235,6 +255,7 @@ async def __aiter__(self):
                     yield ResponseCompletedEvent(
                         type="response.completed",
                         response=fake_model.get_response_obj([], "dummy-id-123"),
+                        sequence_number=0,
                     )
 
             return DummyStream()
@@ -286,6 +307,7 @@ async def __aiter__(self):
                     yield ResponseCompletedEvent(
                         type="response.completed",
                         response=fake_model.get_response_obj([], "dummy-id-123"),
+                        sequence_number=0,
                     )
 
             return DummyStream()
diff --git a/tests/test_usage.py b/tests/test_usage.py
new file mode 100644
index 000000000..405f99ddf
--- /dev/null
+++ b/tests/test_usage.py
@@ -0,0 +1,52 @@
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
+
+from agents.usage import Usage
+
+
+def test_usage_add_aggregates_all_fields():
+    u1 = Usage(
+        requests=1,
+        input_tokens=10,
+        input_tokens_details=InputTokensDetails(cached_tokens=3),
+        output_tokens=20,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=5),
+        total_tokens=30,
+    )
+    u2 = Usage(
+        requests=2,
+        input_tokens=7,
+        input_tokens_details=InputTokensDetails(cached_tokens=4),
+        output_tokens=8,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=6),
+        total_tokens=15,
+    )
+
+    u1.add(u2)
+
+    assert u1.requests == 3
+    assert u1.input_tokens == 17
+    assert u1.output_tokens == 28
+    assert u1.total_tokens == 45
+    assert u1.input_tokens_details.cached_tokens == 7
+    assert u1.output_tokens_details.reasoning_tokens == 11
+
+
+def test_usage_add_aggregates_with_none_values():
+    u1 = Usage()
+    u2 = Usage(
+        requests=2,
+        input_tokens=7,
+        input_tokens_details=InputTokensDetails(cached_tokens=4),
+        output_tokens=8,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=6),
+        total_tokens=15,
+    )
+
+    u1.add(u2)
+
+    assert u1.requests == 2
+    assert u1.input_tokens == 7
+    assert u1.output_tokens == 8
+    assert u1.total_tokens == 15
+    assert u1.input_tokens_details.cached_tokens == 4
+    assert u1.output_tokens_details.reasoning_tokens == 6
diff --git a/tests/voice/test_workflow.py b/tests/voice/test_workflow.py
index 2bdf2a657..035a05d56 100644
--- a/tests/voice/test_workflow.py
+++ b/tests/voice/test_workflow.py
@@ -81,11 +81,13 @@ async def stream_response(
                     type="response.output_text.delta",
                     output_index=0,
                     item_id=item.id,
+                    sequence_number=0,
                 )
 
         yield ResponseCompletedEvent(
             type="response.completed",
             response=get_response_obj(output),
+            sequence_number=1,
         )
 
 
diff --git a/uv.lock b/uv.lock
index 6ccc19966..6f2f3f843 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1461,7 +1461,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.76.0"
+version = "1.81.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1473,14 +1473,14 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/84/51/817969ec969b73d8ddad085670ecd8a45ef1af1811d8c3b8a177ca4d1309/openai-1.76.0.tar.gz", hash = "sha256:fd2bfaf4608f48102d6b74f9e11c5ecaa058b60dad9c36e409c12477dfd91fb2", size = 434660 }
+sdist = { url = "https://files.pythonhosted.org/packages/1c/89/a1e4f3fa7ca4f7fec90dbf47d93b7cd5ff65924926733af15044e302a192/openai-1.81.0.tar.gz", hash = "sha256:349567a8607e0bcffd28e02f96b5c2397d0d25d06732d90ab3ecbf97abf030f9", size = 456861 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/59/aa/84e02ab500ca871eb8f62784426963a1c7c17a72fea3c7f268af4bbaafa5/openai-1.76.0-py3-none-any.whl", hash = "sha256:a712b50e78cf78e6d7b2a8f69c4978243517c2c36999756673e07a14ce37dc0a", size = 661201 },
+    { url = "https://files.pythonhosted.org/packages/02/66/bcc7f9bf48e8610a33e3b5c96a5a644dad032d92404ea2a5e8b43ba067e8/openai-1.81.0-py3-none-any.whl", hash = "sha256:1c71572e22b43876c5d7d65ade0b7b516bb527c3d44ae94111267a09125f7bae", size = 717529 },
 ]
 
 [[package]]
 name = "openai-agents"
-version = "0.0.14"
+version = "0.0.16"
 source = { editable = "." }
 dependencies = [
     { name = "griffe" },
@@ -1536,7 +1536,7 @@ requires-dist = [
     { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.67.4.post1,<2" },
     { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.8.0,<2" },
     { name = "numpy", marker = "python_full_version >= '3.10' and extra == 'voice'", specifier = ">=2.2.0,<3" },
-    { name = "openai", specifier = ">=1.76.0" },
+    { name = "openai", specifier = ">=1.81.0" },
     { name = "pydantic", specifier = ">=2.10,<3" },
     { name = "requests", specifier = ">=2.0,<3" },
     { name = "types-requests", specifier = ">=2.0,<3" },