diff --git a/Makefile b/Makefile index 16ed5fe5..f6b779e3 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ sync: .PHONY: format format: uv run ruff format + uv run ruff check --fix .PHONY: lint lint: diff --git a/tests/test_agent_tracing.py b/tests/test_agent_tracing.py index 3d7196ab..8318b60b 100644 --- a/tests/test_agent_tracing.py +++ b/tests/test_agent_tracing.py @@ -9,7 +9,7 @@ from .fake_model import FakeModel from .test_responses import get_text_message -from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces +from .testing_processor import fetch_normalized_spans, fetch_traces @pytest.mark.asyncio @@ -23,9 +23,6 @@ async def test_single_run_is_single_trace(): await Runner.run(agent, input="first_test") - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -45,12 +42,6 @@ async def test_single_run_is_single_trace(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 1, ( - f"Got {len(spans)}, but expected 1: the agent span. data:" - f"{[span.span_data for span in spans]}" - ) - @pytest.mark.asyncio async def test_multiple_runs_are_multiple_traces(): @@ -69,9 +60,6 @@ async def test_multiple_runs_are_multiple_traces(): await Runner.run(agent, input="first_test") await Runner.run(agent, input="second_test") - traces = fetch_traces() - assert len(traces) == 2, f"Expected 2 traces, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -105,9 +93,6 @@ async def test_multiple_runs_are_multiple_traces(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 2, f"Got {len(spans)}, but expected 2: agent span per run" - @pytest.mark.asyncio async def test_wrapped_trace_is_single_trace(): @@ -129,9 +114,6 @@ async def test_wrapped_trace_is_single_trace(): await Runner.run(agent, input="second_test") await Runner.run(agent, input="third_test") - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -169,9 +151,6 @@ async def test_wrapped_trace_is_single_trace(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 3, f"Got {len(spans)}, but expected 3: the agent span per run" - @pytest.mark.asyncio async def test_parent_disabled_trace_disabled_agent_trace(): @@ -185,15 +164,8 @@ async def test_parent_disabled_trace_disabled_agent_trace(): await Runner.run(agent, input="first_test") - traces = fetch_traces() - assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}" assert fetch_normalized_spans() == snapshot([]) - spans = fetch_ordered_spans() - assert len(spans) == 0, ( - f"Expected no spans, got {len(spans)}, with {[x.span_data for x in spans]}" - ) - @pytest.mark.asyncio async def test_manual_disabling_works(): @@ -206,13 +178,8 @@ async def test_manual_disabling_works(): await Runner.run(agent, input="first_test", run_config=RunConfig(tracing_disabled=True)) - traces = fetch_traces() - assert len(traces) == 0, f"Expected 0 traces, got {len(traces)}" assert fetch_normalized_spans() == snapshot([]) - spans = fetch_ordered_spans() - assert len(spans) == 0, f"Got {len(spans)}, but expected no spans" - @pytest.mark.asyncio async def test_trace_config_works(): @@ -255,9 +222,6 @@ async def test_not_starting_streaming_creates_trace(): break await asyncio.sleep(0.1) - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -277,9 +241,6 @@ async def test_not_starting_streaming_creates_trace(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 1, f"Got {len(spans)}, but expected 1: the agent span" - # Await the stream to avoid warnings about it not being awaited async for _ in result.stream_events(): pass diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py index 41b87eb3..eda65cf1 100644 --- a/tests/test_responses_tracing.py +++ b/tests/test_responses_tracing.py @@ -64,13 +64,6 @@ async def dummy_fetch_response( ] ) - spans = fetch_ordered_spans() - assert len(spans) == 1 - - assert isinstance(spans[0].span_data, ResponseSpanData) - assert spans[0].span_data.response is not None - assert spans[0].span_data.response.id == "dummy-id" - @pytest.mark.allow_call_model_methods @pytest.mark.asyncio @@ -164,12 +157,6 @@ async def __aiter__(self): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 1 - assert isinstance(spans[0].span_data, ResponseSpanData) - assert spans[0].span_data.response is not None - assert spans[0].span_data.response.id == "dummy-id-123" - @pytest.mark.allow_call_model_methods @pytest.mark.asyncio diff --git a/tests/test_tracing_errors.py b/tests/test_tracing_errors.py index 5dbd7c1b..baa77681 100644 --- a/tests/test_tracing_errors.py +++ b/tests/test_tracing_errors.py @@ -18,7 +18,6 @@ Runner, TResponseInputItem, ) -from agents.tracing import AgentSpanData, FunctionSpanData, GenerationSpanData from .fake_model import FakeModel from .test_responses import ( @@ -28,7 +27,7 @@ get_handoff_tool_call, get_text_message, ) -from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces +from .testing_processor import fetch_normalized_spans @pytest.mark.asyncio @@ -43,9 +42,6 @@ async def test_single_turn_model_error(): with pytest.raises(ValueError): await Runner.run(agent, input="first_test") - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -74,13 +70,6 @@ async def test_single_turn_model_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}" - - generation_span = spans[1] - assert isinstance(generation_span.span_data, GenerationSpanData) - assert generation_span.error, "should have error" - @pytest.mark.asyncio async def test_multi_turn_no_handoffs(): @@ -106,9 +95,6 @@ async def test_multi_turn_no_handoffs(): with pytest.raises(ValueError): await Runner.run(agent, input="first_test") - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -146,15 +132,6 @@ async def test_multi_turn_no_handoffs(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 4, ( - f"should have agent, generation, tool, generation, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - - last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1] - assert last_generation_span.error, "should have error" - @pytest.mark.asyncio async def test_tool_call_error(): @@ -173,9 +150,6 @@ async def test_tool_call_error(): with pytest.raises(ModelBehaviorError): await Runner.run(agent, input="first_test") - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -209,15 +183,6 @@ async def test_tool_call_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 3, ( - f"should have agent, generation, tool spans, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - - function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0] - assert function_span.error, "should have error" - @pytest.mark.asyncio async def test_multiple_handoff_doesnt_error(): @@ -255,9 +220,6 @@ async def test_multiple_handoff_doesnt_error(): result = await Runner.run(agent_3, input="user_message") assert result.last_agent == agent_1, "should have picked first handoff" - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -295,12 +257,6 @@ async def test_multiple_handoff_doesnt_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 7, ( - f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - class Foo(TypedDict): bar: str @@ -326,9 +282,6 @@ async def test_multiple_final_output_doesnt_error(): result = await Runner.run(agent_1, input="user_message") assert result.final_output == Foo(bar="abc") - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -344,12 +297,6 @@ async def test_multiple_final_output_doesnt_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 2, ( - f"should have 1 agent, 1 generation, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - @pytest.mark.asyncio async def test_handoffs_lead_to_correct_agent_spans(): @@ -399,9 +346,6 @@ async def test_handoffs_lead_to_correct_agent_spans(): f"should have ended on the third agent, got {result.last_agent.name}" ) - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -472,12 +416,6 @@ async def test_handoffs_lead_to_correct_agent_spans(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 12, ( - f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - @pytest.mark.asyncio async def test_max_turns_exceeded(): @@ -503,9 +441,6 @@ async def test_max_turns_exceeded(): with pytest.raises(MaxTurnsExceeded): await Runner.run(agent, input="user_message", max_turns=2) - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -538,15 +473,6 @@ async def test_max_turns_exceeded(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 5, ( - f"should have 1 agent span, 2 generations, 2 function calls, got " - f"{len(spans)} with data: {[x.span_data for x in spans]}" - ) - - agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1] - assert agent_span.error, "last agent should have error" - def guardrail_function( context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem] @@ -568,9 +494,6 @@ async def test_guardrail_error(): with pytest.raises(InputGuardrailTripwireTriggered): await Runner.run(agent, input="user_message") - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -594,12 +517,3 @@ async def test_guardrail_error(): } ] ) - - spans = fetch_ordered_spans() - assert len(spans) == 2, ( - f"should have 1 agent, 1 guardrail, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - - agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1] - assert agent_span.error, "last agent should have error" diff --git a/tests/test_tracing_errors_streamed.py b/tests/test_tracing_errors_streamed.py index 74cda2de..7e65ff12 100644 --- a/tests/test_tracing_errors_streamed.py +++ b/tests/test_tracing_errors_streamed.py @@ -10,9 +10,6 @@ from agents import ( Agent, - AgentSpanData, - FunctionSpanData, - GenerationSpanData, GuardrailFunctionOutput, InputGuardrail, InputGuardrailTripwireTriggered, @@ -33,7 +30,7 @@ get_handoff_tool_call, get_text_message, ) -from .testing_processor import fetch_normalized_spans, fetch_ordered_spans, fetch_traces +from .testing_processor import fetch_normalized_spans @pytest.mark.asyncio @@ -50,9 +47,6 @@ async def test_single_turn_model_error(): async for _ in result.stream_events(): pass - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -82,13 +76,6 @@ async def test_single_turn_model_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 2, f"should have agent and generation spans, got {len(spans)}" - - generation_span = spans[1] - assert isinstance(generation_span.span_data, GenerationSpanData) - assert generation_span.error, "should have error" - @pytest.mark.asyncio async def test_multi_turn_no_handoffs(): @@ -116,9 +103,6 @@ async def test_multi_turn_no_handoffs(): async for _ in result.stream_events(): pass - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -157,15 +141,6 @@ async def test_multi_turn_no_handoffs(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 4, ( - f"should have agent, generation, tool, generation, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - - last_generation_span = [x for x in spans if isinstance(x.span_data, GenerationSpanData)][-1] - assert last_generation_span.error, "should have error" - @pytest.mark.asyncio async def test_tool_call_error(): @@ -186,9 +161,6 @@ async def test_tool_call_error(): async for _ in result.stream_events(): pass - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -226,15 +198,6 @@ async def test_tool_call_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 3, ( - f"should have agent, generation, tool spans, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - - function_span = [x for x in spans if isinstance(x.span_data, FunctionSpanData)][0] - assert function_span.error, "should have error" - @pytest.mark.asyncio async def test_multiple_handoff_doesnt_error(): @@ -275,9 +238,6 @@ async def test_multiple_handoff_doesnt_error(): assert result.last_agent == agent_1, "should have picked first handoff" - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -315,12 +275,6 @@ async def test_multiple_handoff_doesnt_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 7, ( - f"should have 2 agent, 1 function, 3 generation, 1 handoff, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - class Foo(TypedDict): bar: str @@ -350,9 +304,6 @@ async def test_multiple_final_output_no_error(): assert isinstance(result.final_output, dict) assert result.final_output["bar"] == "abc" - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -368,12 +319,6 @@ async def test_multiple_final_output_no_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 2, ( - f"should have 1 agent, 1 generation, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - @pytest.mark.asyncio async def test_handoffs_lead_to_correct_agent_spans(): @@ -425,85 +370,6 @@ async def test_handoffs_lead_to_correct_agent_spans(): f"should have ended on the third agent, got {result.last_agent.name}" ) - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - - assert fetch_normalized_spans() == snapshot( - [ - { - "workflow_name": "Agent workflow", - "children": [ - { - "type": "agent", - "data": { - "name": "test_agent_3", - "handoffs": ["test_agent_1", "test_agent_2"], - "tools": ["some_function"], - "output_type": "str", - }, - "children": [ - {"type": "generation"}, - { - "type": "function", - "data": { - "name": "some_function", - "input": '{"a": "b"}', - "output": "result", - }, - }, - {"type": "generation"}, - { - "type": "handoff", - "data": {"from_agent": "test_agent_3", "to_agent": "test_agent_1"}, - }, - ], - }, - { - "type": "agent", - "data": { - "name": "test_agent_1", - "handoffs": ["test_agent_3"], - "tools": ["some_function"], - "output_type": "str", - }, - "children": [ - {"type": "generation"}, - { - "type": "function", - "data": { - "name": "some_function", - "input": '{"a": "b"}', - "output": "result", - }, - }, - {"type": "generation"}, - { - "type": "handoff", - "data": {"from_agent": "test_agent_1", "to_agent": "test_agent_3"}, - }, - ], - }, - { - "type": "agent", - "data": { - "name": "test_agent_3", - "handoffs": ["test_agent_1", "test_agent_2"], - "tools": ["some_function"], - "output_type": "str", - }, - "children": [{"type": "generation"}], - }, - ], - } - ] - ) - - spans = fetch_ordered_spans() - assert len(spans) == 12, ( - f"should have 3 agents, 2 function, 5 generation, 2 handoff, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - assert fetch_normalized_spans() == snapshot( [ { @@ -601,9 +467,6 @@ async def test_max_turns_exceeded(): async for _ in result.stream_events(): pass - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -636,15 +499,6 @@ async def test_max_turns_exceeded(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 5, ( - f"should have 1 agent, 2 generations, 2 function calls, got " - f"{len(spans)} with data: {[x.span_data for x in spans]}" - ) - - agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1] - assert agent_span.error, "last agent should have error" - def input_guardrail_function( context: RunContextWrapper[Any], agent: Agent[Any], input: str | list[TResponseInputItem] @@ -673,9 +527,6 @@ async def test_input_guardrail_error(): await asyncio.sleep(1) - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -703,15 +554,6 @@ async def test_input_guardrail_error(): ] ) - spans = fetch_ordered_spans() - assert len(spans) == 2, ( - f"should have 1 agent, 1 guardrail, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - - agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1] - assert agent_span.error, "last agent should have error" - def output_guardrail_function( context: RunContextWrapper[Any], agent: Agent[Any], agent_output: Any @@ -740,9 +582,6 @@ async def test_output_guardrail_error(): await asyncio.sleep(1) - traces = fetch_traces() - assert len(traces) == 1, f"Expected 1 trace, got {len(traces)}" - assert fetch_normalized_spans() == snapshot( [ { @@ -766,12 +605,3 @@ async def test_output_guardrail_error(): } ] ) - - spans = fetch_ordered_spans() - assert len(spans) == 2, ( - f"should have 1 agent, 1 guardrail, got {len(spans)} with data: " - f"{[x.span_data for x in spans]}" - ) - - agent_span = [x for x in spans if isinstance(x.span_data, AgentSpanData)][-1] - assert agent_span.error, "last agent should have error"