diff --git a/Makefile b/Makefile
index 16ed5fe55..f6b779e36 100644
--- a/Makefile
+++ b/Makefile
@@ -5,6 +5,7 @@ sync:
 .PHONY: format
 format: 
 	uv run ruff format
+	uv run ruff check --fix
 
 .PHONY: lint
 lint: 
diff --git a/tests/test_agent_tracing.py b/tests/test_agent_tracing.py
index 3d7196ab0..8318b60b3 100644
--- a/tests/test_agent_tracing.py
+++ b/tests/test_agent_tracing.py
@@ -9,7 +9,7 @@
 
 from .fake_model import FakeModel
 from .test_responses import get_text_message
-from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans, fetch_traces
 
 
 @pytest.mark.asyncio
@@ -23,9 +23,6 @@ async def test_single_run_is_single_trace():
 
     await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -45,12 +42,6 @@ async def test_single_run_is_single_trace():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1, (
-        f"Got {len(spans)}, but expected 1: the agent span. data:"
-        f"{[span.span_data for span in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_multiple_runs_are_multiple_traces():
@@ -69,9 +60,6 @@ async def test_multiple_runs_are_multiple_traces():
     await Runner.run(agent, input="first_test")
     await Runner.run(agent, input="second_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -105,9 +93,6 @@ async def test_multiple_runs_are_multiple_traces():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run"
-
 
 @pytest.mark.asyncio
 async def test_wrapped_trace_is_single_trace():
@@ -129,9 +114,6 @@ async def test_wrapped_trace_is_single_trace():
         await Runner.run(agent, input="second_test")
         await Runner.run(agent, input="third_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -169,9 +151,6 @@ async def test_wrapped_trace_is_single_trace():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run"
-
 
 @pytest.mark.asyncio
 async def test_parent_disabled_trace_disabled_agent_trace():
@@ -185,15 +164,8 @@ async def test_parent_disabled_trace_disabled_agent_trace():
 
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
     assert fetch_normalized_spans() == snapshot([])
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 0, (
-        f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_manual_disabling_works():
@@ -206,13 +178,8 @@ async def test_manual_disabling_works():
 
     await Runner.run(agent, input="first_test", run_config=RunConfig(tracing_disabled=True))
 
-    traces = fetch_traces()
-    assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
     assert fetch_normalized_spans() == snapshot([])
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 0, f"Got {len(spans)}, but expected no spans"
-
 
 @pytest.mark.asyncio
 async def test_trace_config_works():
@@ -255,9 +222,6 @@ async def test_not_starting_streaming_creates_trace():
             break
         await asyncio.sleep(0.1)
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -277,9 +241,6 @@ async def test_not_starting_streaming_creates_trace():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span"
-
     # Await the stream to avoid warnings about it not being awaited
     async for _ in result.stream_events():
         pass
diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py
index 41b87eb35..eda65cf17 100644
--- a/tests/test_responses_tracing.py
+++ b/tests/test_responses_tracing.py
@@ -64,13 +64,6 @@ async def dummy_fetch_response(
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1
-
-    assert isinstance(spans[0].span_data, ResponseSpanData)
-    assert spans[0].span_data.response is not None
-    assert spans[0].span_data.response.id == "dummy-id"
-
 
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio
@@ -164,12 +157,6 @@ async def __aiter__(self):
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1
-    assert isinstance(spans[0].span_data, ResponseSpanData)
-    assert spans[0].span_data.response is not None
-    assert spans[0].span_data.response.id == "dummy-id-123"
-
 
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio
diff --git a/tests/test_tracing_errors.py b/tests/test_tracing_errors.py
index 5dbd7c1b4..baa776815 100644
--- a/tests/test_tracing_errors.py
+++ b/tests/test_tracing_errors.py
@@ -18,7 +18,6 @@
     Runner,
     TResponseInputItem,
 )
-from agents.tracing import AgentSpanData, FunctionSpanData, GenerationSpanData
 
 from .fake_model import FakeModel
 from .test_responses import (
@@ -28,7 +27,7 @@
     get_handoff_tool_call,
     get_text_message,
 )
-from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans
 
 
 @pytest.mark.asyncio
@@ -43,9 +42,6 @@ async def test_single_turn_model_error():
     with pytest.raises(ValueError):
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -74,13 +70,6 @@ async def test_single_turn_model_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
-
-    generation_span = spans[1]
-    assert isinstance(generation_span.span_data, GenerationSpanData)
-    assert generation_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_multi_turn_no_handoffs():
@@ -106,9 +95,6 @@ async def test_multi_turn_no_handoffs():
     with pytest.raises(ValueError):
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -146,15 +132,6 @@ async def test_multi_turn_no_handoffs():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 4, (
-        f"should have agent, generation, tool, generation, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1]
-    assert last_generation_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_tool_call_error():
@@ -173,9 +150,6 @@ async def test_tool_call_error():
     with pytest.raises(ModelBehaviorError):
         await Runner.run(agent, input="first_test")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -209,15 +183,6 @@ async def test_tool_call_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 3, (
-        f"should have agent, generation, tool spans, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0]
-    assert function_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_multiple_handoff_doesnt_error():
@@ -255,9 +220,6 @@ async def test_multiple_handoff_doesnt_error():
     result = await Runner.run(agent_3, input="user_message")
     assert result.last_agent == agent_1, "should have picked first handoff"
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -295,12 +257,6 @@ async def test_multiple_handoff_doesnt_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 7, (
-        f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 class Foo(TypedDict):
     bar: str
@@ -326,9 +282,6 @@ async def test_multiple_final_output_doesnt_error():
     result = await Runner.run(agent_1, input="user_message")
     assert result.final_output == Foo(bar="abc")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -344,12 +297,6 @@ async def test_multiple_final_output_doesnt_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, (
-        f"should have 1 agent, 1 generation, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_handoffs_lead_to_correct_agent_spans():
@@ -399,9 +346,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
         f"should have ended on the third agent, got {result.last_agent.name}"
     )
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -472,12 +416,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 12, (
-        f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_max_turns_exceeded():
@@ -503,9 +441,6 @@ async def test_max_turns_exceeded():
     with pytest.raises(MaxTurnsExceeded):
         await Runner.run(agent, input="user_message", max_turns=2)
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -538,15 +473,6 @@ async def test_max_turns_exceeded():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 5, (
-        f"should have 1 agent span, 2 generations, 2 function calls, got "
-        f"{len(spans)} with data: {[x.span_data for x in spans]}"
-    )
-
-    agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
-    assert agent_span.error, "last agent should have error"
-
 
 def guardrail_function(
     context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem]
@@ -568,9 +494,6 @@ async def test_guardrail_error():
     with pytest.raises(InputGuardrailTripwireTriggered):
         await Runner.run(agent, input="user_message")
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -594,12 +517,3 @@ async def test_guardrail_error():
             }
         ]
     )
-
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, (
-        f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
-    assert agent_span.error, "last agent should have error"
diff --git a/tests/test_tracing_errors_streamed.py b/tests/test_tracing_errors_streamed.py
index 74cda2de1..7e65ff124 100644
--- a/tests/test_tracing_errors_streamed.py
+++ b/tests/test_tracing_errors_streamed.py
@@ -10,9 +10,6 @@
 
 from agents import (
     Agent,
-    AgentSpanData,
-    FunctionSpanData,
-    GenerationSpanData,
     GuardrailFunctionOutput,
     InputGuardrail,
     InputGuardrailTripwireTriggered,
@@ -33,7 +30,7 @@
     get_handoff_tool_call,
     get_text_message,
 )
-from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces
+from .testing_processor import fetch_normalized_spans
 
 
 @pytest.mark.asyncio
@@ -50,9 +47,6 @@ async def test_single_turn_model_error():
         async for _ in result.stream_events():
             pass
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -82,13 +76,6 @@ async def test_single_turn_model_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}"
-
-    generation_span = spans[1]
-    assert isinstance(generation_span.span_data, GenerationSpanData)
-    assert generation_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_multi_turn_no_handoffs():
@@ -116,9 +103,6 @@ async def test_multi_turn_no_handoffs():
         async for _ in result.stream_events():
             pass
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -157,15 +141,6 @@ async def test_multi_turn_no_handoffs():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 4, (
-        f"should have agent, generation, tool, generation, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1]
-    assert last_generation_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_tool_call_error():
@@ -186,9 +161,6 @@ async def test_tool_call_error():
         async for _ in result.stream_events():
             pass
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -226,15 +198,6 @@ async def test_tool_call_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 3, (
-        f"should have agent, generation, tool spans, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0]
-    assert function_span.error, "should have error"
-
 
 @pytest.mark.asyncio
 async def test_multiple_handoff_doesnt_error():
@@ -275,9 +238,6 @@ async def test_multiple_handoff_doesnt_error():
 
     assert result.last_agent == agent_1, "should have picked first handoff"
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -315,12 +275,6 @@ async def test_multiple_handoff_doesnt_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 7, (
-        f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 class Foo(TypedDict):
     bar: str
@@ -350,9 +304,6 @@ async def test_multiple_final_output_no_error():
     assert isinstance(result.final_output, dict)
     assert result.final_output["bar"] == "abc"
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -368,12 +319,6 @@ async def test_multiple_final_output_no_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, (
-        f"should have 1 agent, 1 generation, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
 
 @pytest.mark.asyncio
 async def test_handoffs_lead_to_correct_agent_spans():
@@ -425,85 +370,6 @@ async def test_handoffs_lead_to_correct_agent_spans():
         f"should have ended on the third agent, got {result.last_agent.name}"
     )
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
-    assert fetch_normalized_spans() == snapshot(
-        [
-            {
-                "workflow_name": "Agent workflow",
-                "children": [
-                    {
-                        "type": "agent",
-                        "data": {
-                            "name": "test_agent_3",
-                            "handoffs": ["test_agent_1", "test_agent_2"],
-                            "tools": ["some_function"],
-                            "output_type": "str",
-                        },
-                        "children": [
-                            {"type": "generation"},
-                            {
-                                "type": "function",
-                                "data": {
-                                    "name": "some_function",
-                                    "input": '{"a": "b"}',
-                                    "output": "result",
-                                },
-                            },
-                            {"type": "generation"},
-                            {
-                                "type": "handoff",
-                                "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"},
-                            },
-                        ],
-                    },
-                    {
-                        "type": "agent",
-                        "data": {
-                            "name": "test_agent_1",
-                            "handoffs": ["test_agent_3"],
-                            "tools": ["some_function"],
-                            "output_type": "str",
-                        },
-                        "children": [
-                            {"type": "generation"},
-                            {
-                                "type": "function",
-                                "data": {
-                                    "name": "some_function",
-                                    "input": '{"a": "b"}',
-                                    "output": "result",
-                                },
-                            },
-                            {"type": "generation"},
-                            {
-                                "type": "handoff",
-                                "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"},
-                            },
-                        ],
-                    },
-                    {
-                        "type": "agent",
-                        "data": {
-                            "name": "test_agent_3",
-                            "handoffs": ["test_agent_1", "test_agent_2"],
-                            "tools": ["some_function"],
-                            "output_type": "str",
-                        },
-                        "children": [{"type": "generation"}],
-                    },
-                ],
-            }
-        ]
-    )
-
-    spans = fetch_ordered_spans()
-    assert len(spans) == 12, (
-        f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -601,9 +467,6 @@ async def test_max_turns_exceeded():
         async for _ in result.stream_events():
             pass
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -636,15 +499,6 @@ async def test_max_turns_exceeded():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 5, (
-        f"should have 1 agent, 2 generations, 2 function calls, got "
-        f"{len(spans)} with data: {[x.span_data for x in spans]}"
-    )
-
-    agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
-    assert agent_span.error, "last agent should have error"
-
 
 def input_guardrail_function(
     context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem]
@@ -673,9 +527,6 @@ async def test_input_guardrail_error():
 
     await asyncio.sleep(1)
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -703,15 +554,6 @@ async def test_input_guardrail_error():
         ]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, (
-        f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
-    assert agent_span.error, "last agent should have error"
-
 
 def output_guardrail_function(
     context: RunContextWrapper[Any], agent: Agent[Any], agent_output: Any
@@ -740,9 +582,6 @@ async def test_output_guardrail_error():
 
     await asyncio.sleep(1)
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-
     assert fetch_normalized_spans() == snapshot(
         [
             {
@@ -766,12 +605,3 @@ async def test_output_guardrail_error():
             }
         ]
     )
-
-    spans = fetch_ordered_spans()
-    assert len(spans) == 2, (
-        f"should have 1 agent, 1 guardrail, got {len(spans)} with data: "
-        f"{[x.span_data for x in spans]}"
-    )
-
-    agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1]
-    assert agent_span.error, "last agent should have error"