Skip to content

Commit b5ba229

Browse files
authored
[5/n] MCP tracing (#342)
## Summary: Adds tracing and tests for tracing. - Tools are added to the agents - Theres a span for the mcp tools lookup - Functions have MCP data ## Test Plan: Unit tests .
2 parents dd881ee + 0100227 commit b5ba229

File tree

13 files changed

+352
-22
lines changed

13 files changed

+352
-22
lines changed

examples/mcp/filesystem_example/main.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
import shutil
44

5-
from agents import Agent, Runner, trace
5+
from agents import Agent, Runner, gen_trace_id, trace
66
from agents.mcp import MCPServer, MCPServerStdio
77

88

@@ -37,12 +37,15 @@ async def main():
3737
samples_dir = os.path.join(current_dir, "sample_files")
3838

3939
async with MCPServerStdio(
40+
name="Filesystem Server, via npx",
4041
params={
4142
"command": "npx",
4243
"args": ["-y", "@modelcontextprotocol/server-filesystem", samples_dir],
43-
}
44+
},
4445
) as server:
45-
with trace(workflow_name="MCP Filesystem Example"):
46+
trace_id = gen_trace_id()
47+
with trace(workflow_name="MCP Filesystem Example", trace_id=trace_id):
48+
print(f"View trace: https://platform.openai.com/traces/{trace_id}\n")
4649
await run(server)
4750

4851

src/agents/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
GenerationSpanData,
7171
GuardrailSpanData,
7272
HandoffSpanData,
73+
MCPListToolsSpanData,
7374
Span,
7475
SpanData,
7576
SpanError,
@@ -89,6 +90,7 @@
8990
get_current_trace,
9091
guardrail_span,
9192
handoff_span,
93+
mcp_tools_span,
9294
set_trace_processors,
9395
set_tracing_disabled,
9496
set_tracing_export_api_key,
@@ -220,6 +222,7 @@ def enable_verbose_stdout_logging():
220222
"speech_group_span",
221223
"transcription_span",
222224
"speech_span",
225+
"mcp_tools_span",
223226
"trace",
224227
"Trace",
225228
"TracingProcessor",
@@ -234,6 +237,7 @@ def enable_verbose_stdout_logging():
234237
"HandoffSpanData",
235238
"SpeechGroupSpanData",
236239
"SpeechSpanData",
240+
"MCPListToolsSpanData",
237241
"TranscriptionSpanData",
238242
"set_default_openai_key",
239243
"set_default_openai_client",

src/agents/agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,4 +228,5 @@ async def get_mcp_tools(self) -> list[Tool]:
228228

229229
async def get_all_tools(self) -> list[Tool]:
230230
"""All agent tools, including MCP tools and function tools."""
231-
return await MCPUtil.get_all_function_tools(self.mcp_servers) + self.tools
231+
mcp_tools = await self.get_mcp_tools()
232+
return mcp_tools + self.tools

src/agents/mcp/server.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ async def connect(self):
2727
"""
2828
pass
2929

30+
@property
31+
@abc.abstractmethod
32+
def name(self) -> str:
33+
"""A readable name for the server."""
34+
pass
35+
3036
@abc.abstractmethod
3137
async def cleanup(self):
3238
"""Cleanup the server. For example, this might mean closing a subprocess or
@@ -171,7 +177,12 @@ class MCPServerStdio(_MCPServerWithClientSession):
171177
details.
172178
"""
173179

174-
def __init__(self, params: MCPServerStdioParams, cache_tools_list: bool = False):
180+
def __init__(
181+
self,
182+
params: MCPServerStdioParams,
183+
cache_tools_list: bool = False,
184+
name: str | None = None,
185+
):
175186
"""Create a new MCP server based on the stdio transport.
176187
177188
Args:
@@ -185,6 +196,8 @@ def __init__(self, params: MCPServerStdioParams, cache_tools_list: bool = False)
185196
invalidated by calling `invalidate_tools_cache()`. You should set this to `True`
186197
if you know the server will not change its tools list, because it can drastically
187198
improve latency (by avoiding a round-trip to the server every time).
199+
name: A readable name for the server. If not provided, we'll create one from the
200+
command.
188201
"""
189202
super().__init__(cache_tools_list)
190203

@@ -197,6 +210,8 @@ def __init__(self, params: MCPServerStdioParams, cache_tools_list: bool = False)
197210
encoding_error_handler=params.get("encoding_error_handler", "strict"),
198211
)
199212

213+
self._name = name or f"stdio: {self.params.command}"
214+
200215
def create_streams(
201216
self,
202217
) -> AbstractAsyncContextManager[
@@ -208,6 +223,11 @@ def create_streams(
208223
"""Create the streams for the server."""
209224
return stdio_client(self.params)
210225

226+
@property
227+
def name(self) -> str:
228+
"""A readable name for the server."""
229+
return self._name
230+
211231

212232
class MCPServerSseParams(TypedDict):
213233
"""Mirrors the params in`mcp.client.sse.sse_client`."""
@@ -231,7 +251,12 @@ class MCPServerSse(_MCPServerWithClientSession):
231251
for details.
232252
"""
233253

234-
def __init__(self, params: MCPServerSseParams, cache_tools_list: bool = False):
254+
def __init__(
255+
self,
256+
params: MCPServerSseParams,
257+
cache_tools_list: bool = False,
258+
name: str | None = None,
259+
):
235260
"""Create a new MCP server based on the HTTP with SSE transport.
236261
237262
Args:
@@ -245,10 +270,14 @@ def __init__(self, params: MCPServerSseParams, cache_tools_list: bool = False):
245270
invalidated by calling `invalidate_tools_cache()`. You should set this to `True`
246271
if you know the server will not change its tools list, because it can drastically
247272
improve latency (by avoiding a round-trip to the server every time).
273+
274+
name: A readable name for the server. If not provided, we'll create one from the
275+
URL.
248276
"""
249277
super().__init__(cache_tools_list)
250278

251279
self.params = params
280+
self._name = name or f"sse: {self.params['url']}"
252281

253282
def create_streams(
254283
self,
@@ -265,3 +294,8 @@ def create_streams(
265294
timeout=self.params.get("timeout", 5),
266295
sse_read_timeout=self.params.get("sse_read_timeout", 60 * 5),
267296
)
297+
298+
@property
299+
def name(self) -> str:
300+
"""A readable name for the server."""
301+
return self._name

src/agents/mcp/util.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from ..logger import logger
88
from ..run_context import RunContextWrapper
99
from ..tool import FunctionTool, Tool
10+
from ..tracing import FunctionSpanData, get_current_span, mcp_tools_span
1011

1112
if TYPE_CHECKING:
1213
from mcp.types import Tool as MCPTool
@@ -38,7 +39,11 @@ async def get_all_function_tools(cls, servers: list["MCPServer"]) -> list[Tool]:
3839
@classmethod
3940
async def get_function_tools(cls, server: "MCPServer") -> list[Tool]:
4041
"""Get all function tools from a single MCP server."""
41-
tools = await server.list_tools()
42+
43+
with mcp_tools_span(server=server.name) as span:
44+
tools = await server.list_tools()
45+
span.span_data.result = [tool.name for tool in tools]
46+
4247
return [cls.to_function_tool(tool, server) for tool in tools]
4348

4449
@classmethod
@@ -88,9 +93,23 @@ async def invoke_mcp_tool(
8893
# The MCP tool result is a list of content items, whereas OpenAI tool outputs are a single
8994
# string. We'll try to convert.
9095
if len(result.content) == 1:
91-
return result.content[0].model_dump_json()
96+
tool_output = result.content[0].model_dump_json()
9297
elif len(result.content) > 1:
93-
return json.dumps([item.model_dump() for item in result.content])
98+
tool_output = json.dumps([item.model_dump() for item in result.content])
9499
else:
95100
logger.error(f"Errored MCP tool result: {result}")
96-
return "Error running tool."
101+
tool_output = "Error running tool."
102+
103+
current_span = get_current_span()
104+
if current_span:
105+
if isinstance(current_span.span_data, FunctionSpanData):
106+
current_span.span_data.output = tool_output
107+
current_span.span_data.mcp_data = {
108+
"server": server.name,
109+
}
110+
else:
111+
logger.warning(
112+
f"Current span is not a FunctionSpanData, skipping tool output: {current_span}"
113+
)
114+
115+
return tool_output

src/agents/run.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77

88
from openai.types.responses import ResponseCompletedEvent
99

10-
from agents.tool import Tool
11-
1210
from ._run_impl import (
1311
AgentToolUseTracker,
1412
NextStepFinalOutput,
@@ -40,6 +38,7 @@
4038
from .result import RunResult, RunResultStreaming
4139
from .run_context import RunContextWrapper, TContext
4240
from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent
41+
from .tool import Tool
4342
from .tracing import Span, SpanError, agent_span, get_current_trace, trace
4443
from .tracing.span_data import AgentSpanData
4544
from .usage import Usage
@@ -182,8 +181,6 @@ async def run(
182181
# agent changes, or if the agent loop ends.
183182
if current_span is None:
184183
handoff_names = [h.agent_name for h in cls._get_handoffs(current_agent)]
185-
all_tools = await cls._get_all_tools(current_agent)
186-
tool_names = [t.name for t in all_tools]
187184
if output_schema := cls._get_output_schema(current_agent):
188185
output_type_name = output_schema.output_type_name()
189186
else:
@@ -192,11 +189,13 @@ async def run(
192189
current_span = agent_span(
193190
name=current_agent.name,
194191
handoffs=handoff_names,
195-
tools=tool_names,
196192
output_type=output_type_name,
197193
)
198194
current_span.start(mark_as_current=True)
199195

196+
all_tools = await cls._get_all_tools(current_agent)
197+
current_span.span_data.tools = [t.name for t in all_tools]
198+
200199
current_turn += 1
201200
if current_turn > max_turns:
202201
_error_tracing.attach_error_to_span(
@@ -504,7 +503,6 @@ async def _run_streamed_impl(
504503
# agent changes, or if the agent loop ends.
505504
if current_span is None:
506505
handoff_names = [h.agent_name for h in cls._get_handoffs(current_agent)]
507-
tool_names = [t.name for t in current_agent.tools]
508506
if output_schema := cls._get_output_schema(current_agent):
509507
output_type_name = output_schema.output_type_name()
510508
else:
@@ -513,11 +511,13 @@ async def _run_streamed_impl(
513511
current_span = agent_span(
514512
name=current_agent.name,
515513
handoffs=handoff_names,
516-
tools=tool_names,
517514
output_type=output_type_name,
518515
)
519516
current_span.start(mark_as_current=True)
520517

518+
all_tools = await cls._get_all_tools(current_agent)
519+
tool_names = [t.name for t in all_tools]
520+
current_span.span_data.tools = tool_names
521521
current_turn += 1
522522
streamed_result.current_turn = current_turn
523523

@@ -553,6 +553,7 @@ async def _run_streamed_impl(
553553
run_config,
554554
should_run_agent_start_hooks,
555555
tool_use_tracker,
556+
all_tools,
556557
)
557558
should_run_agent_start_hooks = False
558559

@@ -621,6 +622,7 @@ async def _run_single_turn_streamed(
621622
run_config: RunConfig,
622623
should_run_agent_start_hooks: bool,
623624
tool_use_tracker: AgentToolUseTracker,
625+
all_tools: list[Tool],
624626
) -> SingleStepResult:
625627
if should_run_agent_start_hooks:
626628
await asyncio.gather(
@@ -640,7 +642,6 @@ async def _run_single_turn_streamed(
640642
system_prompt = await agent.get_system_prompt(context_wrapper)
641643

642644
handoffs = cls._get_handoffs(agent)
643-
all_tools = await cls._get_all_tools(agent)
644645
model = cls._get_model(agent, run_config)
645646
model_settings = agent.model_settings.resolve(run_config.model_settings)
646647
model_settings = RunImpl.maybe_reset_tool_choice(agent, tool_use_tracker, model_settings)

src/agents/tracing/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
get_current_trace,
1010
guardrail_span,
1111
handoff_span,
12+
mcp_tools_span,
1213
response_span,
1314
speech_group_span,
1415
speech_span,
@@ -25,6 +26,7 @@
2526
GenerationSpanData,
2627
GuardrailSpanData,
2728
HandoffSpanData,
29+
MCPListToolsSpanData,
2830
ResponseSpanData,
2931
SpanData,
3032
SpeechGroupSpanData,
@@ -59,6 +61,7 @@
5961
"GenerationSpanData",
6062
"GuardrailSpanData",
6163
"HandoffSpanData",
64+
"MCPListToolsSpanData",
6265
"ResponseSpanData",
6366
"SpeechGroupSpanData",
6467
"SpeechSpanData",
@@ -69,6 +72,7 @@
6972
"speech_group_span",
7073
"speech_span",
7174
"transcription_span",
75+
"mcp_tools_span",
7276
]
7377

7478

src/agents/tracing/create.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
GenerationSpanData,
1313
GuardrailSpanData,
1414
HandoffSpanData,
15+
MCPListToolsSpanData,
1516
ResponseSpanData,
1617
SpeechGroupSpanData,
1718
SpeechSpanData,
@@ -424,3 +425,31 @@ def speech_group_span(
424425
parent=parent,
425426
disabled=disabled,
426427
)
428+
429+
430+
def mcp_tools_span(
431+
server: str | None = None,
432+
result: list[str] | None = None,
433+
span_id: str | None = None,
434+
parent: Trace | Span[Any] | None = None,
435+
disabled: bool = False,
436+
) -> Span[MCPListToolsSpanData]:
437+
"""Create a new MCP list tools span. The span will not be started automatically, you should
438+
either do `with mcp_tools_span() ...` or call `span.start()` + `span.finish()` manually.
439+
440+
Args:
441+
server: The name of the MCP server.
442+
result: The result of the MCP list tools call.
443+
span_id: The ID of the span. Optional. If not provided, we will generate an ID. We
444+
recommend using `util.gen_span_id()` to generate a span ID, to guarantee that IDs are
445+
correctly formatted.
446+
parent: The parent span or trace. If not provided, we will automatically use the current
447+
trace/span as the parent.
448+
disabled: If True, we will return a Span but the Span will not be recorded.
449+
"""
450+
return GLOBAL_TRACE_PROVIDER.create_span(
451+
span_data=MCPListToolsSpanData(server=server, result=result),
452+
span_id=span_id,
453+
parent=parent,
454+
disabled=disabled,
455+
)

0 commit comments

Comments
 (0)