diff --git a/tests/test_agent_tracing.py b/tests/test_agent_tracing.py
index 8318b60b3..bb16cab26 100644
--- a/tests/test_agent_tracing.py
+++ b/tests/test_agent_tracing.py
@@ -9,7 +9,7 @@
 
 from .fake_model import FakeModel
 from .test_responses import get_text_message
-from .testing_processor import fetch_normalized_spans, fetch_traces
+from .testing_processor import assert_no_traces, fetch_normalized_spans
 
 
 @pytest.mark.asyncio
@@ -164,7 +164,7 @@ async def test_parent_disabled_trace_disabled_agent_trace():
 
         await Runner.run(agent, input="first_test")
 
-    assert fetch_normalized_spans() == snapshot([])
+    assert_no_traces()
 
 
 @pytest.mark.asyncio
@@ -178,7 +178,7 @@ async def test_manual_disabling_works():
 
     await Runner.run(agent, input="first_test", run_config=RunConfig(tracing_disabled=True))
 
-    assert fetch_normalized_spans() == snapshot([])
+    assert_no_traces()
 
 
 @pytest.mark.asyncio
@@ -193,16 +193,29 @@ async def test_trace_config_works():
     await Runner.run(
         agent,
         input="first_test",
-        run_config=RunConfig(workflow_name="Foo bar", group_id="123", trace_id="456"),
+        run_config=RunConfig(workflow_name="Foo bar", group_id="123", trace_id="trace_456"),
     )
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
-    export = traces[0].export()
-    assert export is not None, "Trace export should not be None"
-    assert export["workflow_name"] == "Foo bar"
-    assert export["group_id"] == "123"
-    assert export["id"] == "456"
+    assert fetch_normalized_spans(keep_trace_id=True) == snapshot(
+        [
+            {
+                "id": "trace_456",
+                "workflow_name": "Foo bar",
+                "group_id": "123",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            }
+        ]
+    )
 
 
 @pytest.mark.asyncio
@@ -259,8 +272,24 @@ async def test_streaming_single_run_is_single_trace():
     async for _ in x.stream_events():
         pass
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            }
+        ]
+    )
 
 
 @pytest.mark.asyncio
@@ -285,8 +314,38 @@ async def test_multiple_streamed_runs_are_multiple_traces():
     async for _ in x.stream_events():
         pass
 
-    traces = fetch_traces()
-    assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}"
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            },
+            {
+                "workflow_name": "Agent workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    }
+                ],
+            },
+        ]
+    )
 
 
 @pytest.mark.asyncio
@@ -317,8 +376,42 @@ async def test_wrapped_streaming_trace_is_single_trace():
         async for _ in x.stream_events():
             pass
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "test_workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                ],
+            }
+        ]
+    )
 
 
 @pytest.mark.asyncio
@@ -347,8 +440,42 @@ async def test_wrapped_mixed_trace_is_single_trace():
         async for _ in x.stream_events():
             pass
 
-    traces = fetch_traces()
-    assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}"
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "test_workflow",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                    {
+                        "type": "agent",
+                        "data": {
+                            "name": "test_agent_1",
+                            "handoffs": [],
+                            "tools": [],
+                            "output_type": "str",
+                        },
+                    },
+                ],
+            }
+        ]
+    )
 
 
 @pytest.mark.asyncio
@@ -370,8 +497,7 @@ async def test_parent_disabled_trace_disables_streaming_agent_trace():
         async for _ in x.stream_events():
             pass
 
-    traces = fetch_traces()
-    assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
+    assert_no_traces()
 
 
 @pytest.mark.asyncio
@@ -392,5 +518,4 @@ async def test_manual_streaming_disabling_works():
     async for _ in x.stream_events():
         pass
 
-    traces = fetch_traces()
-    assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}"
+    assert_no_traces()
diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py
index eda65cf17..40bdfafbb 100644
--- a/tests/test_responses_tracing.py
+++ b/tests/test_responses_tracing.py
@@ -7,7 +7,7 @@
 from agents.tracing.span_data import ResponseSpanData
 from tests import fake_model
 
-from .testing_processor import fetch_normalized_spans, fetch_ordered_spans
+from .testing_processor import assert_no_spans, fetch_normalized_spans, fetch_ordered_spans
 
 
 class DummyTracing:
@@ -89,9 +89,8 @@ async def dummy_fetch_response(
         [{"workflow_name": "test", "children": [{"type": "response"}]}]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1
-    assert spans[0].span_data.response is None
+    [span] = fetch_ordered_spans()
+    assert span.span_data.response is None
 
 
 @pytest.mark.allow_call_model_methods
@@ -116,8 +115,7 @@ async def dummy_fetch_response(
 
     assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 0
+    assert_no_spans()
 
 
 @pytest.mark.allow_call_model_methods
@@ -190,10 +188,9 @@ async def __aiter__(self):
         [{"workflow_name": "test", "children": [{"type": "response"}]}]
     )
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 1
-    assert isinstance(spans[0].span_data, ResponseSpanData)
-    assert spans[0].span_data.response is None
+    [span] = fetch_ordered_spans()
+    assert isinstance(span.span_data, ResponseSpanData)
+    assert span.span_data.response is None
 
 
 @pytest.mark.allow_call_model_methods
@@ -226,5 +223,4 @@ async def __aiter__(self):
 
     assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
 
-    spans = fetch_ordered_spans()
-    assert len(spans) == 0
+    assert_no_spans()
diff --git a/tests/test_tracing.py b/tests/test_tracing.py
index c54c3d86b..8f7635093 100644
--- a/tests/test_tracing.py
+++ b/tests/test_tracing.py
@@ -4,6 +4,7 @@
 from typing import Any
 
 import pytest
+from inline_snapshot import snapshot
 
 from agents.tracing import (
     Span,
@@ -17,7 +18,12 @@
 )
 from agents.tracing.spans import SpanError
 
-from .testing_processor import fetch_events, fetch_ordered_spans, fetch_traces
+from .testing_processor import (
+    SPAN_PROCESSOR_TESTING,
+    assert_no_traces,
+    fetch_events,
+    fetch_normalized_spans,
+)
 
 ### HELPERS
 
@@ -47,7 +53,7 @@ def simple_tracing():
     x = trace("test")
     x.start()
 
-    span_1 = agent_span(name="agent_1", parent=x)
+    span_1 = agent_span(name="agent_1", span_id="span_1", parent=x)
     span_1.start()
     span_1.finish()
 
@@ -66,33 +72,36 @@ def simple_tracing():
 def test_simple_tracing() -> None:
     simple_tracing()
 
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 3
-    assert len(traces) == 1
-
-    trace = traces[0]
-    standard_trace_checks(trace, name_check="test")
-    trace_id = trace.trace_id
-
-    first_span = spans[0]
-    standard_span_checks(first_span, trace_id=trace_id, parent_id=None, span_type="agent")
-    assert first_span.span_data.name == "agent_1"
-
-    second_span = spans[1]
-    standard_span_checks(second_span, trace_id=trace_id, parent_id=None, span_type="custom")
-    assert second_span.span_id == "span_2"
-    assert second_span.span_data.name == "custom_1"
-
-    third_span = spans[2]
-    standard_span_checks(
-        third_span, trace_id=trace_id, parent_id=second_span.span_id, span_type="custom"
+    assert fetch_normalized_spans(keep_span_id=True) == snapshot(
+        [
+            {
+                "workflow_name": "test",
+                "children": [
+                    {
+                        "type": "agent",
+                        "id": "span_1",
+                        "data": {"name": "agent_1"},
+                    },
+                    {
+                        "type": "custom",
+                        "id": "span_2",
+                        "data": {"name": "custom_1", "data": {}},
+                        "children": [
+                            {
+                                "type": "custom",
+                                "id": "span_3",
+                                "data": {"name": "custom_2", "data": {}},
+                            }
+                        ],
+                    },
+                ],
+            }
+        ]
     )
-    assert third_span.span_id == "span_3"
-    assert third_span.span_data.name == "custom_2"
 
 
 def ctxmanager_spans():
-    with trace(workflow_name="test", trace_id="123", group_id="456"):
+    with trace(workflow_name="test", trace_id="trace_123", group_id="456"):
         with custom_span(name="custom_1", span_id="span_1"):
             with custom_span(name="custom_2", span_id="span_1_inner"):
                 pass
@@ -104,36 +113,38 @@ def ctxmanager_spans():
 def test_ctxmanager_spans() -> None:
     ctxmanager_spans()
 
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 3
-    assert len(traces) == 1
-
-    trace = traces[0]
-    standard_trace_checks(trace, name_check="test")
-    trace_id = trace.trace_id
-
-    first_span = spans[0]
-    standard_span_checks(first_span, trace_id=trace_id, parent_id=None, span_type="custom")
-    assert first_span.span_id == "span_1"
-
-    first_inner_span = spans[1]
-    standard_span_checks(
-        first_inner_span, trace_id=trace_id, parent_id=first_span.span_id, span_type="custom"
+    assert fetch_normalized_spans(keep_span_id=True) == snapshot(
+        [
+            {
+                "workflow_name": "test",
+                "group_id": "456",
+                "children": [
+                    {
+                        "type": "custom",
+                        "id": "span_1",
+                        "data": {"name": "custom_1", "data": {}},
+                        "children": [
+                            {
+                                "type": "custom",
+                                "id": "span_1_inner",
+                                "data": {"name": "custom_2", "data": {}},
+                            }
+                        ],
+                    },
+                    {"type": "custom", "id": "span_2", "data": {"name": "custom_2", "data": {}}},
+                ],
+            }
+        ]
     )
-    assert first_inner_span.span_id == "span_1_inner"
-
-    second_span = spans[2]
-    standard_span_checks(second_span, trace_id=trace_id, parent_id=None, span_type="custom")
-    assert second_span.span_id == "span_2"
 
 
 async def run_subtask(span_id: str | None = None) -> None:
     with generation_span(span_id=span_id):
-        await asyncio.sleep(0.01)
+        await asyncio.sleep(0.0001)
 
 
 async def simple_async_tracing():
-    with trace(workflow_name="test", trace_id="123", group_id="456"):
+    with trace(workflow_name="test", trace_id="trace_123", group_id="group_456"):
         await run_subtask(span_id="span_1")
         await run_subtask(span_id="span_2")
 
@@ -142,21 +153,18 @@ async def simple_async_tracing():
 async def test_async_tracing() -> None:
     await simple_async_tracing()
 
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 2
-    assert len(traces) == 1
-
-    trace = traces[0]
-    standard_trace_checks(trace, name_check="test")
-    trace_id = trace.trace_id
-
-    # We don't care about ordering here, just that they're there
-    for s in spans:
-        standard_span_checks(s, trace_id=trace_id, parent_id=None, span_type="generation")
-
-    ids = [span.span_id for span in spans]
-    assert "span_1" in ids
-    assert "span_2" in ids
+    assert fetch_normalized_spans(keep_span_id=True) == snapshot(
+        [
+            {
+                "workflow_name": "test",
+                "group_id": "group_456",
+                "children": [
+                    {"type": "generation", "id": "span_1"},
+                    {"type": "generation", "id": "span_2"},
+                ],
+            }
+        ]
+    )
 
 
 async def run_tasks_parallel(span_ids: list[str]) -> None:
@@ -171,13 +179,11 @@ async def run_tasks_as_children(first_span_id: str, second_span_id: str) -> None
 
 
 async def complex_async_tracing():
-    with trace(workflow_name="test", trace_id="123", group_id="456"):
-        await asyncio.sleep(0.01)
+    with trace(workflow_name="test", trace_id="trace_123", group_id="456"):
         await asyncio.gather(
             run_tasks_parallel(["span_1", "span_2"]),
             run_tasks_parallel(["span_3", "span_4"]),
         )
-        await asyncio.sleep(0.01)
         await asyncio.gather(
             run_tasks_as_children("span_5", "span_6"),
             run_tasks_as_children("span_7", "span_8"),
@@ -186,39 +192,38 @@ async def complex_async_tracing():
 
 @pytest.mark.asyncio
 async def test_complex_async_tracing() -> None:
-    await complex_async_tracing()
-
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 8
-    assert len(traces) == 1
-
-    trace = traces[0]
-    standard_trace_checks(trace, name_check="test")
-    trace_id = trace.trace_id
-
-    # First ensure 1,2,3,4 exist and are in parallel with the trace as parent
-    for span_id in ["span_1", "span_2", "span_3", "span_4"]:
-        span = next((s for s in spans if s.span_id == span_id), None)
-        assert span is not None
-        standard_span_checks(span, trace_id=trace_id, parent_id=None, span_type="generation")
-
-    # Ensure 5 and 7 exist and have the trace as parent
-    for span_id in ["span_5", "span_7"]:
-        span = next((s for s in spans if s.span_id == span_id), None)
-        assert span is not None
-        standard_span_checks(span, trace_id=trace_id, parent_id=None, span_type="generation")
-
-    # Ensure 6 and 8 exist and have 5 and 7 as parents
-    six = next((s for s in spans if s.span_id == "span_6"), None)
-    assert six is not None
-    standard_span_checks(six, trace_id=trace_id, parent_id="span_5", span_type="generation")
-    eight = next((s for s in spans if s.span_id == "span_8"), None)
-    assert eight is not None
-    standard_span_checks(eight, trace_id=trace_id, parent_id="span_7", span_type="generation")
+    for _ in range(300):
+        SPAN_PROCESSOR_TESTING.clear()
+        await complex_async_tracing()
+
+        assert fetch_normalized_spans(keep_span_id=True) == (
+            [
+                {
+                    "workflow_name": "test",
+                    "group_id": "456",
+                    "children": [
+                        {"type": "generation", "id": "span_1"},
+                        {"type": "generation", "id": "span_2"},
+                        {"type": "generation", "id": "span_3"},
+                        {"type": "generation", "id": "span_4"},
+                        {
+                            "type": "generation",
+                            "id": "span_5",
+                            "children": [{"type": "generation", "id": "span_6"}],
+                        },
+                        {
+                            "type": "generation",
+                            "id": "span_7",
+                            "children": [{"type": "generation", "id": "span_8"}],
+                        },
+                    ],
+                }
+            ]
+        )
 
 
 def spans_with_setters():
-    with trace(workflow_name="test", trace_id="123", group_id="456"):
+    with trace(workflow_name="test", trace_id="trace_123", group_id="456"):
         with agent_span(name="agent_1") as span_a:
             span_a.span_data.name = "agent_2"
 
@@ -236,34 +241,33 @@ def spans_with_setters():
 def test_spans_with_setters() -> None:
     spans_with_setters()
 
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 4
-    assert len(traces) == 1
-
-    trace = traces[0]
-    standard_trace_checks(trace, name_check="test")
-    trace_id = trace.trace_id
-
-    # Check the spans
-    first_span = spans[0]
-    standard_span_checks(first_span, trace_id=trace_id, parent_id=None, span_type="agent")
-    assert first_span.span_data.name == "agent_2"
-
-    second_span = spans[1]
-    standard_span_checks(
-        second_span, trace_id=trace_id, parent_id=first_span.span_id, span_type="function"
-    )
-    assert second_span.span_data.input == "i"
-    assert second_span.span_data.output == "o"
-
-    third_span = spans[2]
-    standard_span_checks(
-        third_span, trace_id=trace_id, parent_id=first_span.span_id, span_type="generation"
-    )
-
-    fourth_span = spans[3]
-    standard_span_checks(
-        fourth_span, trace_id=trace_id, parent_id=first_span.span_id, span_type="handoff"
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "test",
+                "group_id": "456",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {"name": "agent_2"},
+                        "children": [
+                            {
+                                "type": "function",
+                                "data": {"name": "function_1", "input": "i", "output": "o"},
+                            },
+                            {
+                                "type": "generation",
+                                "data": {"input": [{"foo": "bar"}]},
+                            },
+                            {
+                                "type": "handoff",
+                                "data": {"from_agent": "agent_1", "to_agent": "agent_2"},
+                            },
+                        ],
+                    }
+                ],
+            }
+        ]
     )
 
 
@@ -276,14 +280,11 @@ def disabled_tracing():
 
 def test_disabled_tracing():
     disabled_tracing()
-
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 0
-    assert len(traces) == 0
+    assert_no_traces()
 
 
 def enabled_trace_disabled_span():
-    with trace(workflow_name="test", trace_id="123"):
+    with trace(workflow_name="test", trace_id="trace_123"):
         with agent_span(name="agent_1"):
             with function_span(name="function_1", disabled=True):
                 with generation_span():
@@ -293,17 +294,19 @@ def enabled_trace_disabled_span():
 def test_enabled_trace_disabled_span():
     enabled_trace_disabled_span()
 
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 1  # Only the agent span is recorded
-    assert len(traces) == 1  # The trace is recorded
-
-    trace = traces[0]
-    standard_trace_checks(trace, name_check="test")
-    trace_id = trace.trace_id
-
-    first_span = spans[0]
-    standard_span_checks(first_span, trace_id=trace_id, parent_id=None, span_type="agent")
-    assert first_span.span_data.name == "agent_1"
+    assert fetch_normalized_spans() == snapshot(
+        [
+            {
+                "workflow_name": "test",
+                "children": [
+                    {
+                        "type": "agent",
+                        "data": {"name": "agent_1"},
+                    }
+                ],
+            }
+        ]
+    )
 
 
 def test_start_and_end_called_manual():
@@ -367,9 +370,7 @@ async def test_noop_span_doesnt_record():
         with custom_span(name="span_1") as span:
             span.set_error(SpanError(message="test", data={}))
 
-    spans, traces = fetch_ordered_spans(), fetch_traces()
-    assert len(spans) == 0
-    assert len(traces) == 0
+    assert_no_traces()
 
     assert t.export() is None
     assert span.export() is None
diff --git a/tests/testing_processor.py b/tests/testing_processor.py
index 371ea8658..a38c3956f 100644
--- a/tests/testing_processor.py
+++ b/tests/testing_processor.py
@@ -80,26 +80,44 @@ def fetch_events() -> list[TestSpanProcessorEvent]:
     return SPAN_PROCESSOR_TESTING._events
 
 
-def fetch_normalized_spans():
+def assert_no_spans():
+    spans = fetch_ordered_spans()
+    if spans:
+        raise AssertionError(f"Expected 0 spans, got {len(spans)}")
+
+
+def assert_no_traces():
+    traces = fetch_traces()
+    if traces:
+        raise AssertionError(f"Expected 0 traces, got {len(traces)}")
+    assert_no_spans()
+
+
+def fetch_normalized_spans(
+    keep_span_id: bool = False, keep_trace_id: bool = False
+) -> list[dict[str, Any]]:
     nodes: dict[tuple[str, str | None], dict[str, Any]] = {}
     traces = []
     for trace_obj in fetch_traces():
         trace = trace_obj.export()
         assert trace
         assert trace.pop("object") == "trace"
-        assert trace.pop("id").startswith("trace_")
+        assert trace["id"].startswith("trace_")
+        if not keep_trace_id:
+            del trace["id"]
         trace = {k: v for k, v in trace.items() if v is not None}
         nodes[(trace_obj.trace_id, None)] = trace
         traces.append(trace)
 
-    if not traces:
-        assert not fetch_ordered_spans()
+    assert traces, "Use assert_no_traces() to check for empty traces"
 
     for span_obj in fetch_ordered_spans():
         span = span_obj.export()
         assert span
         assert span.pop("object") == "trace.span"
-        assert span.pop("id").startswith("span_")
+        assert span["id"].startswith("span_")
+        if not keep_span_id:
+            del span["id"]
         assert datetime.fromisoformat(span.pop("started_at"))
         assert datetime.fromisoformat(span.pop("ended_at"))
         parent_id = span.pop("parent_id")
diff --git a/uv.lock b/uv.lock
index 15e091a17..a9c79e214 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1050,7 +1050,7 @@ wheels = [
 
 [[package]]
 name = "openai-agents"
-version = "0.0.5"
+version = "0.0.6"
 source = { editable = "." }
 dependencies = [
     { name = "griffe" },