Skip to content

[5/n] MCP tracing #342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions examples/mcp/filesystem_example/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import shutil

from agents import Agent, Runner, trace
from agents import Agent, Runner, gen_trace_id, trace
from agents.mcp import MCPServer, MCPServerStdio


Expand Down Expand Up @@ -37,12 +37,15 @@ async def main():
samples_dir = os.path.join(current_dir, "sample_files")

async with MCPServerStdio(
name="Filesystem Server, via npx",
params={
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-filesystem", samples_dir],
}
},
) as server:
with trace(workflow_name="MCP Filesystem Example"):
trace_id = gen_trace_id()
with trace(workflow_name="MCP Filesystem Example", trace_id=trace_id):
print(f"View trace: https://platform.openai.com/traces/{trace_id}\n")
await run(server)


Expand Down
4 changes: 4 additions & 0 deletions src/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
GenerationSpanData,
GuardrailSpanData,
HandoffSpanData,
MCPListToolsSpanData,
Span,
SpanData,
SpanError,
Expand All @@ -89,6 +90,7 @@
get_current_trace,
guardrail_span,
handoff_span,
mcp_tools_span,
set_trace_processors,
set_tracing_disabled,
set_tracing_export_api_key,
Expand Down Expand Up @@ -220,6 +222,7 @@ def enable_verbose_stdout_logging():
"speech_group_span",
"transcription_span",
"speech_span",
"mcp_tools_span",
"trace",
"Trace",
"TracingProcessor",
Expand All @@ -234,6 +237,7 @@ def enable_verbose_stdout_logging():
"HandoffSpanData",
"SpeechGroupSpanData",
"SpeechSpanData",
"MCPListToolsSpanData",
"TranscriptionSpanData",
"set_default_openai_key",
"set_default_openai_client",
Expand Down
3 changes: 2 additions & 1 deletion src/agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,5 @@ async def get_mcp_tools(self) -> list[Tool]:

async def get_all_tools(self) -> list[Tool]:
"""All agent tools, including MCP tools and function tools."""
return await MCPUtil.get_all_function_tools(self.mcp_servers) + self.tools
mcp_tools = await self.get_mcp_tools()
return mcp_tools + self.tools
38 changes: 36 additions & 2 deletions src/agents/mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ async def connect(self):
"""
pass

@property
@abc.abstractmethod
def name(self) -> str:
"""A readable name for the server."""
pass

@abc.abstractmethod
async def cleanup(self):
"""Cleanup the server. For example, this might mean closing a subprocess or
Expand Down Expand Up @@ -171,7 +177,12 @@ class MCPServerStdio(_MCPServerWithClientSession):
details.
"""

def __init__(self, params: MCPServerStdioParams, cache_tools_list: bool = False):
def __init__(
self,
params: MCPServerStdioParams,
cache_tools_list: bool = False,
name: str | None = None,
):
"""Create a new MCP server based on the stdio transport.

Args:
Expand All @@ -185,6 +196,8 @@ def __init__(self, params: MCPServerStdioParams, cache_tools_list: bool = False)
invalidated by calling `invalidate_tools_cache()`. You should set this to `True`
if you know the server will not change its tools list, because it can drastically
improve latency (by avoiding a round-trip to the server every time).
name: A readable name for the server. If not provided, we'll create one from the
command.
"""
super().__init__(cache_tools_list)

Expand All @@ -197,6 +210,8 @@ def __init__(self, params: MCPServerStdioParams, cache_tools_list: bool = False)
encoding_error_handler=params.get("encoding_error_handler", "strict"),
)

self._name = name or f"stdio: {self.params.command}"

def create_streams(
self,
) -> AbstractAsyncContextManager[
Expand All @@ -208,6 +223,11 @@ def create_streams(
"""Create the streams for the server."""
return stdio_client(self.params)

@property
def name(self) -> str:
"""A readable name for the server."""
return self._name


class MCPServerSseParams(TypedDict):
"""Mirrors the params in`mcp.client.sse.sse_client`."""
Expand All @@ -231,7 +251,12 @@ class MCPServerSse(_MCPServerWithClientSession):
for details.
"""

def __init__(self, params: MCPServerSseParams, cache_tools_list: bool = False):
def __init__(
self,
params: MCPServerSseParams,
cache_tools_list: bool = False,
name: str | None = None,
):
"""Create a new MCP server based on the HTTP with SSE transport.

Args:
Expand All @@ -245,10 +270,14 @@ def __init__(self, params: MCPServerSseParams, cache_tools_list: bool = False):
invalidated by calling `invalidate_tools_cache()`. You should set this to `True`
if you know the server will not change its tools list, because it can drastically
improve latency (by avoiding a round-trip to the server every time).

name: A readable name for the server. If not provided, we'll create one from the
URL.
"""
super().__init__(cache_tools_list)

self.params = params
self._name = name or f"sse: {self.params['url']}"

def create_streams(
self,
Expand All @@ -265,3 +294,8 @@ def create_streams(
timeout=self.params.get("timeout", 5),
sse_read_timeout=self.params.get("sse_read_timeout", 60 * 5),
)

@property
def name(self) -> str:
"""A readable name for the server."""
return self._name
27 changes: 23 additions & 4 deletions src/agents/mcp/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ..logger import logger
from ..run_context import RunContextWrapper
from ..tool import FunctionTool, Tool
from ..tracing import FunctionSpanData, get_current_span, mcp_tools_span

if TYPE_CHECKING:
from mcp.types import Tool as MCPTool
Expand Down Expand Up @@ -38,7 +39,11 @@ async def get_all_function_tools(cls, servers: list["MCPServer"]) -> list[Tool]:
@classmethod
async def get_function_tools(cls, server: "MCPServer") -> list[Tool]:
"""Get all function tools from a single MCP server."""
tools = await server.list_tools()

with mcp_tools_span(server=server.name) as span:
tools = await server.list_tools()
span.span_data.result = [tool.name for tool in tools]

return [cls.to_function_tool(tool, server) for tool in tools]

@classmethod
Expand Down Expand Up @@ -88,9 +93,23 @@ async def invoke_mcp_tool(
# The MCP tool result is a list of content items, whereas OpenAI tool outputs are a single
# string. We'll try to convert.
if len(result.content) == 1:
return result.content[0].model_dump_json()
tool_output = result.content[0].model_dump_json()
elif len(result.content) > 1:
return json.dumps([item.model_dump() for item in result.content])
tool_output = json.dumps([item.model_dump() for item in result.content])
else:
logger.error(f"Errored MCP tool result: {result}")
return "Error running tool."
tool_output = "Error running tool."

current_span = get_current_span()
if current_span:
if isinstance(current_span.span_data, FunctionSpanData):
current_span.span_data.output = tool_output
current_span.span_data.mcp_data = {
"server": server.name,
}
else:
logger.warning(
f"Current span is not a FunctionSpanData, skipping tool output: {current_span}"
)

return tool_output
17 changes: 9 additions & 8 deletions src/agents/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

from openai.types.responses import ResponseCompletedEvent

from agents.tool import Tool

from ._run_impl import (
AgentToolUseTracker,
NextStepFinalOutput,
Expand Down Expand Up @@ -40,6 +38,7 @@
from .result import RunResult, RunResultStreaming
from .run_context import RunContextWrapper, TContext
from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent
from .tool import Tool
from .tracing import Span, SpanError, agent_span, get_current_trace, trace
from .tracing.span_data import AgentSpanData
from .usage import Usage
Expand Down Expand Up @@ -182,8 +181,6 @@ async def run(
# agent changes, or if the agent loop ends.
if current_span is None:
handoff_names = [h.agent_name for h in cls._get_handoffs(current_agent)]
all_tools = await cls._get_all_tools(current_agent)
tool_names = [t.name for t in all_tools]
if output_schema := cls._get_output_schema(current_agent):
output_type_name = output_schema.output_type_name()
else:
Expand All @@ -192,11 +189,13 @@ async def run(
current_span = agent_span(
name=current_agent.name,
handoffs=handoff_names,
tools=tool_names,
output_type=output_type_name,
)
current_span.start(mark_as_current=True)

all_tools = await cls._get_all_tools(current_agent)
current_span.span_data.tools = [t.name for t in all_tools]

current_turn += 1
if current_turn > max_turns:
_error_tracing.attach_error_to_span(
Expand Down Expand Up @@ -504,7 +503,6 @@ async def _run_streamed_impl(
# agent changes, or if the agent loop ends.
if current_span is None:
handoff_names = [h.agent_name for h in cls._get_handoffs(current_agent)]
tool_names = [t.name for t in current_agent.tools]
if output_schema := cls._get_output_schema(current_agent):
output_type_name = output_schema.output_type_name()
else:
Expand All @@ -513,11 +511,13 @@ async def _run_streamed_impl(
current_span = agent_span(
name=current_agent.name,
handoffs=handoff_names,
tools=tool_names,
output_type=output_type_name,
)
current_span.start(mark_as_current=True)

all_tools = await cls._get_all_tools(current_agent)
tool_names = [t.name for t in all_tools]
current_span.span_data.tools = tool_names
current_turn += 1
streamed_result.current_turn = current_turn

Expand Down Expand Up @@ -553,6 +553,7 @@ async def _run_streamed_impl(
run_config,
should_run_agent_start_hooks,
tool_use_tracker,
all_tools,
)
should_run_agent_start_hooks = False

Expand Down Expand Up @@ -621,6 +622,7 @@ async def _run_single_turn_streamed(
run_config: RunConfig,
should_run_agent_start_hooks: bool,
tool_use_tracker: AgentToolUseTracker,
all_tools: list[Tool],
) -> SingleStepResult:
if should_run_agent_start_hooks:
await asyncio.gather(
Expand All @@ -640,7 +642,6 @@ async def _run_single_turn_streamed(
system_prompt = await agent.get_system_prompt(context_wrapper)

handoffs = cls._get_handoffs(agent)
all_tools = await cls._get_all_tools(agent)
model = cls._get_model(agent, run_config)
model_settings = agent.model_settings.resolve(run_config.model_settings)
model_settings = RunImpl.maybe_reset_tool_choice(agent, tool_use_tracker, model_settings)
Expand Down
4 changes: 4 additions & 0 deletions src/agents/tracing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
get_current_trace,
guardrail_span,
handoff_span,
mcp_tools_span,
response_span,
speech_group_span,
speech_span,
Expand All @@ -25,6 +26,7 @@
GenerationSpanData,
GuardrailSpanData,
HandoffSpanData,
MCPListToolsSpanData,
ResponseSpanData,
SpanData,
SpeechGroupSpanData,
Expand Down Expand Up @@ -59,6 +61,7 @@
"GenerationSpanData",
"GuardrailSpanData",
"HandoffSpanData",
"MCPListToolsSpanData",
"ResponseSpanData",
"SpeechGroupSpanData",
"SpeechSpanData",
Expand All @@ -69,6 +72,7 @@
"speech_group_span",
"speech_span",
"transcription_span",
"mcp_tools_span",
]


Expand Down
29 changes: 29 additions & 0 deletions src/agents/tracing/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
GenerationSpanData,
GuardrailSpanData,
HandoffSpanData,
MCPListToolsSpanData,
ResponseSpanData,
SpeechGroupSpanData,
SpeechSpanData,
Expand Down Expand Up @@ -424,3 +425,31 @@ def speech_group_span(
parent=parent,
disabled=disabled,
)


def mcp_tools_span(
server: str | None = None,
result: list[str] | None = None,
span_id: str | None = None,
parent: Trace | Span[Any] | None = None,
disabled: bool = False,
) -> Span[MCPListToolsSpanData]:
"""Create a new MCP list tools span. The span will not be started automatically, you should
either do `with mcp_tools_span() ...` or call `span.start()` + `span.finish()` manually.

Args:
server: The name of the MCP server.
result: The result of the MCP list tools call.
span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
correctly formatted.
parent: The parent span or trace. If not provided, we will automatically use the current
trace/span as the parent.
disabled: If True, we will return a Span but the Span will not be recorded.
"""
return GLOBAL_TRACE_PROVIDER.create_span(
span_data=MCPListToolsSpanData(server=server, result=result),
span_id=span_id,
parent=parent,
disabled=disabled,
)
Loading