diff --git a/docs/agents.md b/docs/agents.md index 39d4afd5..7f164974 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -9,6 +9,10 @@ The most common properties of an agent you'll configure are: - `instructions`: also known as a developer message or system prompt. - `model`: which LLM to use, and optional `model_settings` to configure model tuning parameters like temperature, top_p, etc. - `tools`: Tools that the agent can use to achieve its tasks. +- `memory`: Enables conversation memory for the agent. Can be `bool | SessionMemory | None`. + - `True`: Uses the default `SQLiteSessionMemory` (in-memory by default, suitable for single-process applications). + - `SessionMemory instance`: Uses the provided custom memory implementation (e.g., for persistent storage or custom logic). + - `None` (default): No memory is used. The agent will not remember previous turns, and conversation history must be managed manually by passing all previous messages in the `input` to `Runner.run()`. ```python from agents import Agent, ModelSettings, function_tool @@ -131,6 +135,150 @@ robot_agent = pirate_agent.clone( ) ``` +## Agent Memory + +The `memory` parameter on the `Agent` class allows you to easily enable conversation memory, so the agent can remember previous turns of a conversation. + +When `memory` is enabled, the agent automatically loads history before calling the LLM and saves the new turn's interactions (input and output) after the LLM responds. + +### Default Memory + +Setting `memory=True` uses the default `SQLiteSessionMemory`, which stores the conversation in an in-memory SQLite database. This is convenient for quick setups and single-process applications. + +```python +from agents import Agent, Runner # Ensure Runner is imported +import asyncio # For running async code + +# Example for docs +async def run_conversation_with_default_memory(): + agent = Agent( + name="ConversationalAgent", + instructions="Remember our previous conversation. Be friendly!", + model="o3-mini", # Assuming o3-mini is a valid model for your setup + memory=True # Enable default SQLite memory + ) + + # Let's mock the LLM responses for predictable behavior in docs + # In a real scenario, the LLM would generate these + # For this example, we'll assume the LLM just acknowledges or uses memory. + + # First turn + # Mocking LLM to just acknowledge. + # In a real scenario, Runner.run would call the LLM. + # For documentation, we often show illustrative interaction patterns. + # Here, we'll simulate the interaction conceptually. + + print("Simulating conversation with default memory:") + + # Turn 1 + user_input_1 = "My favorite color is blue." + print(f"User: {user_input_1}") + # In a real run: result1 = await Runner.run(agent, user_input_1) + # Simulated agent response: + agent_response_1 = "Okay, I'll remember that your favorite color is blue." + print(f"Agent: {agent_response_1}") + # Manually add to memory for simulation continuity if not running real LLM + if agent.memory: # Check if memory is enabled + await agent.memory.add_items([ + {"role": "user", "content": user_input_1}, + {"role": "assistant", "content": agent_response_1} # Or the structured output + ]) + + + # Turn 2 + user_input_2 = "What did I say my favorite color was?" + print(f"User: {user_input_2}") + # In a real run: result2 = await Runner.run(agent, user_input_2) + # Simulated agent response (assuming LLM uses memory): + # The LLM would have access to the history: [user: "My fav color is blue", assistant: "Okay..."] + agent_response_2 = "You said your favorite color is blue." + print(f"Agent: {agent_response_2}") + + # To actually run this example, you would need a configured model + # and uncomment the Runner.run calls, e.g.: + # agent_llm_mock = ... # setup a mock model for testing if needed + # agent.model = agent_llm_mock + # result1 = await Runner.run(agent, user_input_1) + # print(f"Agent: {result1.final_output}") + # result2 = await Runner.run(agent, user_input_2) + # print(f"Agent: {result2.final_output}") + + +# Example of how you might run it (if it were a fully runnable example): +# if __name__ == "__main__": +# asyncio.run(run_conversation_with_default_memory()) +``` + +### Custom Memory + +For more control, such as using persistent storage (e.g., a different database, file system) or implementing custom history management logic (e.g., summarization, windowing), you can provide your own session memory implementation. + +The [`SessionMemory`][agents.memory.SessionMemory] type is a `typing.Protocol` (specifically, a `@runtime_checkable` protocol). This means your custom memory class must define all the methods specified by the protocol (like `get_history`, `add_items`, `add_message`, and `clear`) with matching signatures. While explicit inheritance from `SessionMemory` is not strictly required by the protocol mechanism for runtime checks (thanks to `@runtime_checkable`), inheriting is still good practice for clarity and to help with static type checking. + +The example below demonstrates creating a custom memory class by inheriting from `SessionMemory`: + +```python +from agents.memory import SessionMemory, TResponseInputItem # Adjust imports as necessary + +class MyCustomMemory(SessionMemory): + def __init__(self): + self.history: list[TResponseInputItem] = [] + + async def get_history(self) -> list[TResponseInputItem]: + # In a real implementation, this might fetch from a DB + print(f"CustomMemory: Getting history (current length {len(self.history)})") + return list(self.history) # Return a copy + + async def add_items(self, items: list[TResponseInputItem]) -> None: + # In a real implementation, this might save to a DB + print(f"CustomMemory: Adding {len(items)} items.") + self.history.extend(items) + + async def add_message(self, item: TResponseInputItem) -> None: + # Helper, could be part of add_items + print(f"CustomMemory: Adding 1 message.") + self.history.append(item) + + async def clear(self) -> None: + print("CustomMemory: Clearing history.") + self.history.clear() + +# How to use the custom memory: +custom_memory_instance = MyCustomMemory() +custom_agent = Agent( + name="CustomMemoryAgent", + instructions="I have a special memory.", + model="o3-mini", # Example model + memory=custom_memory_instance +) + +# Example usage (conceptual) +async def run_with_custom_memory(): + print("\nSimulating conversation with custom memory:") + user_q1 = "My name is Bob." + print(f"User: {user_q1}") + # await Runner.run(custom_agent, user_q1) # Actual run + # Simulated interaction: + await custom_agent.memory.add_items([{"role": "user", "content": user_q1}, {"role": "assistant", "content": "Nice to meet you, Bob!"}]) + print(f"Agent: Nice to meet you, Bob!") + + + user_q2 = "What's my name?" + print(f"User: {user_q2}") + # history_for_llm = await custom_agent.memory.get_history() + # print(f"History provided to LLM for 2nd turn: {history_for_llm}") + # await Runner.run(custom_agent, user_q2) # Actual run + # Simulated interaction: + print(f"Agent: Your name is Bob.") # Assuming LLM uses memory + +# if __name__ == "__main__": +# asyncio.run(run_conversation_with_default_memory()) +# asyncio.run(run_with_custom_memory()) + +``` +As mentioned, the `SessionMemory` protocol defines `get_history`, `add_items`, `add_message`, and `clear` methods that your custom class must implement. + + ## Forcing tool use Supplying a list of tools doesn't always mean the LLM will use a tool. You can force tool use by setting [`ModelSettings.tool_choice`][agents.model_settings.ModelSettings.tool_choice]. Valid values are: diff --git a/docs/running_agents.md b/docs/running_agents.md index f631cf46..b4298b03 100644 --- a/docs/running_agents.md +++ b/docs/running_agents.md @@ -84,6 +84,39 @@ async def main(): # California ``` +!!! note "Simplified Conversations with Agent Memory" + + The above example demonstrates manual conversation management. If the agent is configured with memory (e.g., `Agent(..., memory=True)`), the history is automatically managed. The same conversation would look like this: + + ```python + async def main_with_memory(): + # Note: Agent is initialized with memory=True + agent_with_memory = Agent( + name="Assistant", + instructions="Reply very concisely. Remember our conversation.", + memory=True # Enables automatic memory management + ) + + with trace(workflow_name="ConversationWithMemory", group_id=thread_id): # Assuming thread_id is defined + # First turn + # The agent's memory is empty initially. + result1 = await Runner.run(agent_with_memory, "What city is the Golden Gate Bridge in?") + print(result1.final_output) + # Expected: San Francisco + # The agent's memory now contains: + # - User: "What city is the Golden Gate Bridge in?" + # - Assistant: "San Francisco" (or its structured representation) + + # Second turn + # Runner.run will automatically use the history from agent_with_memory.memory + result2 = await Runner.run(agent_with_memory, "What state is it in?") + print(result2.final_output) + # Expected: California + # The agent's memory now contains the full conversation. + ``` + Refer to the [Agent Memory documentation in `agents.md`](agents.md#agent-memory) for more details on configuring memory. + + ## Exceptions The SDK raises exceptions in certain cases. The full list is in [`agents.exceptions`][]. As an overview: diff --git a/src/agents/agent.py b/src/agents/agent.py index e22f579f..7a959f18 100644 --- a/src/agents/agent.py +++ b/src/agents/agent.py @@ -13,6 +13,7 @@ from .handoffs import Handoff from .items import ItemHelpers from .logger import logger +from .memory import SessionMemory, SQLiteSessionMemory from .mcp import MCPUtil from .model_settings import ModelSettings from .models.interface import Model @@ -39,6 +40,12 @@ class ToolsToFinalOutputResult: `output_type` of the agent. """ + memory: bool | SessionMemory | None = field(default=None, repr=False) + """If True, a default SQLiteSessionMemory will be used. If a SessionMemory instance is + provided, it will be used directly. If None or False, no memory will be used. + Set to `repr=False` because it can be a complex object. + """ + ToolsToFinalOutputFunction: TypeAlias = Callable[ [RunContextWrapper[TContext], list[FunctionToolResult]], @@ -178,6 +185,14 @@ class Agent(Generic[TContext]): """Whether to reset the tool choice to the default value after a tool has been called. Defaults to True. This ensures that the agent doesn't enter an infinite loop of tool usage.""" + def __post_init__(self): + if self.memory is True: + # Defaulting to an in-memory SQLite database for now. + # This could be made configurable later if needed (e.g., via Agent constructor or a global config). + self.memory = SQLiteSessionMemory() + elif self.memory is False: # Explicitly setting memory to False + self.memory = None + def clone(self, **kwargs: Any) -> Agent[TContext]: """Make a copy of the agent, with the given arguments changed. For example, you could do: ``` diff --git a/src/agents/memory.py b/src/agents/memory.py new file mode 100644 index 00000000..208b60ea --- /dev/null +++ b/src/agents/memory.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +# Removed abc import as it's no longer needed by SessionMemory +import sqlite3 +import json +import time +from typing import TYPE_CHECKING, Protocol, runtime_checkable # Added Protocol, runtime_checkable + +if TYPE_CHECKING: + from .items import TResponseInputItem + + +@runtime_checkable +class SessionMemory(Protocol): # Changed from abc.ABC to Protocol + """Protocol for session memory implementations.""" + + async def get_history(self) -> list[TResponseInputItem]: + """Returns the conversation history as a list of input items.""" + ... # Changed from pass to ... + + async def add_message(self, item: TResponseInputItem) -> None: + """Adds a single message/item to the history.""" + ... # Changed from pass to ... + + async def add_items(self, items: list[TResponseInputItem]) -> None: + """Adds a list of items to the history.""" + ... # Changed from pass to ... + + async def clear(self) -> None: + """Clears the entire history.""" + ... # Changed from pass to ... + + +class SQLiteSessionMemory(SessionMemory): # SQLiteSessionMemory still "implements" the protocol + """ + A SessionMemory implementation that uses an SQLite database to store conversation history. + Each message is stored as a JSON string in the database. + """ + + def __init__(self, db_path: str | None = None, *, table_name: str = "chat_history"): + """ + Initializes the SQLite session memory. + + Args: + db_path: Path to the SQLite database file. If None, an in-memory database is used. + table_name: The name of the table to store chat history. + """ + self.db_path = db_path if db_path else ":memory:" + self.table_name = table_name + self._init_db() + + def _get_conn(self): + # For a simple default, synchronous sqlite3 is okay. + # For production async, aiosqlite would be better. + return sqlite3.connect(self.db_path) + + def _init_db(self): + with self._get_conn() as conn: + cursor = conn.cursor() + # Added session_id to allow for multiple conversations, though not used in this version + cursor.execute(f""" + CREATE TABLE IF NOT EXISTS {self.table_name} ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp REAL NOT NULL, + item_json TEXT NOT NULL + ) + """) + cursor.execute(f"CREATE INDEX IF NOT EXISTS idx_timestamp ON {self.table_name} (timestamp)") + conn.commit() + + async def get_history(self) -> list[TResponseInputItem]: + """Returns the conversation history, ordered by timestamp.""" + with self._get_conn() as conn: + cursor = conn.cursor() + cursor.execute(f"SELECT item_json FROM {self.table_name} ORDER BY timestamp ASC") + rows = cursor.fetchall() + history = [] + for row in rows: + try: + item = json.loads(row[0]) + history.append(item) + except json.JSONDecodeError as e: + # In a real app, use logging + print(f"Warning: SQLiteSessionMemory - Could not decode JSON from database: {row[0]}. Error: {e}") + return history + + async def add_message(self, item: TResponseInputItem) -> None: + """Adds a single message/item to the history.""" + # This can be implemented more efficiently if needed, but for simplicity: + await self.add_items([item]) + + async def add_items(self, items: list[TResponseInputItem]) -> None: + """Adds a list of items to the history.""" + current_timestamp = time.time() + with self._get_conn() as conn: + cursor = conn.cursor() + for i, item in enumerate(items): + # Ensure unique timestamp for ordering within a batch + item_timestamp = current_timestamp + (i * 1e-7) # Small offset for ordering + try: + item_json = json.dumps(item) + except TypeError as e: + print(f"Warning: SQLiteSessionMemory - Error serializing item to JSON: {item}. Error: {e}") + continue + + cursor.execute( + f"INSERT INTO {self.table_name} (timestamp, item_json) VALUES (?, ?)", + (item_timestamp, item_json), + ) + conn.commit() + + async def clear(self) -> None: + """Clears the entire history from the table.""" + with self._get_conn() as conn: + cursor = conn.cursor() + cursor.execute(f"DELETE FROM {self.table_name}") + conn.commit() diff --git a/src/agents/result.py b/src/agents/result.py index 243db155..97698d8b 100644 --- a/src/agents/result.py +++ b/src/agents/result.py @@ -78,7 +78,24 @@ def final_output_as(self, cls: type[T], raise_if_incorrect_type: bool = False) - return cast(T, self.final_output) def to_input_list(self) -> list[TResponseInputItem]: - """Creates a new input list, merging the original input with all the new items generated.""" + """ + Creates a new list of input items, representing the sequence of interactions + for the specific agent run that produced this result. It merges the + `self.input` (the input items that initiated this particular run) with + all `self.new_items` (items generated during this run, like messages, + tool calls, and tool results). + + This method is useful for: + - Manually continuing a conversation if the agent is run without session memory. + - Inspecting the specific inputs and outputs of a single `Runner.run()` call, + even if that run was part of a larger conversation managed by built-in agent memory. + - Extracting a specific segment of a conversation for logging or debugging. + + Note: If the agent has active session memory, this list does NOT include + items from turns prior to this specific `RunResult`'s generating run. + To get the complete history from an agent with memory, you would typically + access the memory object directly if needed for other purposes. + """ original_items: list[TResponseInputItem] = ItemHelpers.input_to_new_input_list(self.input) new_items = [item.to_input_item() for item in self.new_items] diff --git a/src/agents/run.py b/src/agents/run.py index b196c3bf..bf261a78 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -32,6 +32,7 @@ from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem from .lifecycle import RunHooks from .logger import logger +from .memory import SessionMemory # Added for session memory from .model_settings import ModelSettings from .models.interface import Model, ModelProvider from .models.multi_provider import MultiProvider @@ -666,13 +667,23 @@ async def _run_single_turn_streamed( final_response: ModelResponse | None = None - input = ItemHelpers.input_to_new_input_list(streamed_result.input) - input.extend([item.to_input_item() for item in streamed_result.new_items]) + # Start: Memory integration - Input Preparation for _run_single_turn_streamed + current_turn_input_items_for_llm_non_history: list[TResponseInputItem] = ItemHelpers.input_to_new_input_list(streamed_result.input) + # streamed_result.new_items are items from previous turns in the streaming context + current_turn_input_items_for_llm_non_history.extend([item.to_input_item() for item in streamed_result.new_items]) + + complete_input_for_model: list[TResponseInputItem] = [] + if agent.memory and isinstance(agent.memory, SessionMemory): + history_items = await agent.memory.get_history() + complete_input_for_model.extend(history_items) + + complete_input_for_model.extend(current_turn_input_items_for_llm_non_history) + # End: Memory integration - Input Preparation # 1. Stream the output events async for event in model.stream_response( system_prompt, - input, + complete_input_for_model, # Use the potentially history-augmented input model_settings, all_tools, output_schema, @@ -723,6 +734,20 @@ async def _run_single_turn_streamed( tool_use_tracker=tool_use_tracker, ) + # Start: Memory integration - History Update for _run_single_turn_streamed + if agent.memory and isinstance(agent.memory, SessionMemory): + items_to_save_to_memory: list[TResponseInputItem] = [] + # Add the inputs that were new for this turn (excluding prior history from memory) + items_to_save_to_memory.extend(current_turn_input_items_for_llm_non_history) + + # Add all items newly generated in this turn (LLM response, tool calls, tool results) + # single_step_result.generated_items are RunItem objects. + for run_item in single_step_result.generated_items: # These are new items from *this* turn's execution + items_to_save_to_memory.append(run_item.to_input_item()) + + await agent.memory.add_items(items_to_save_to_memory) + # End: Memory integration - History Update + RunImpl.stream_step_result_to_queue(single_step_result, streamed_result._event_queue) return single_step_result @@ -756,13 +781,24 @@ async def _run_single_turn( output_schema = cls._get_output_schema(agent) handoffs = cls._get_handoffs(agent) - input = ItemHelpers.input_to_new_input_list(original_input) - input.extend([generated_item.to_input_item() for generated_item in generated_items]) + + # Start: Memory integration - Input Preparation for _run_single_turn + current_turn_input_items_for_llm_non_history: list[TResponseInputItem] = ItemHelpers.input_to_new_input_list(original_input) + # The 'generated_items' passed into this function are from *previous* turns. + current_turn_input_items_for_llm_non_history.extend([generated_item.to_input_item() for generated_item in generated_items]) + + complete_input_for_model: list[TResponseInputItem] = [] + if agent.memory and isinstance(agent.memory, SessionMemory): + history_items = await agent.memory.get_history() + complete_input_for_model.extend(history_items) + + complete_input_for_model.extend(current_turn_input_items_for_llm_non_history) + # End: Memory integration - Input Preparation new_response = await cls._get_new_response( agent, system_prompt, - input, + complete_input_for_model, # Use the potentially history-augmented input output_schema, all_tools, handoffs, @@ -786,6 +822,22 @@ async def _run_single_turn( tool_use_tracker=tool_use_tracker, ) + # Start: Memory integration - History Update for _run_single_turn + if agent.memory and isinstance(agent.memory, SessionMemory): + items_to_save_to_memory: list[TResponseInputItem] = [] + # Add the inputs that were new for this turn (excluding prior history from memory) + items_to_save_to_memory.extend(current_turn_input_items_for_llm_non_history) + + # Add all items newly generated in this turn (LLM response, tool calls, tool results) + # single_step_result.generated_items are RunItem objects. + for run_item in single_step_result.generated_items: # These are new items from *this* turn's execution + items_to_save_to_memory.append(run_item.to_input_item()) + + await agent.memory.add_items(items_to_save_to_memory) + # End: Memory integration - History Update + + return single_step_result + @classmethod async def _get_single_step_result_from_response( cls, diff --git a/tests/test_agent_memory.py b/tests/test_agent_memory.py new file mode 100644 index 00000000..3d5541f7 --- /dev/null +++ b/tests/test_agent_memory.py @@ -0,0 +1,233 @@ +import asyncio +import sqlite3 +import unittest +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +# Assuming TResponseInputItem is accessible for type hinting and test data construction. +# Adjust the import path if necessary based on project structure. +# from src.agents.items import TResponseInputItem # Might be needed for direct use +# For testing, constructing dicts that match TResponseInputItem structure is often sufficient. + +from src.agents.agent import Agent +from src.agents.memory import SessionMemory, SQLiteSessionMemory +from src.agents.run import Runner, RunConfig +from src.agents.models.interface import Model # For mock model +from src.agents.items import ModelResponse, TResponseInputItem, TResponseOutputItem # For constructing mock responses + + +# Test data - sample input items +user_msg_1: TResponseInputItem = {"role": "user", "content": "Hello there!"} +asst_msg_1: TResponseInputItem = {"role": "assistant", "content": "Hi! How can I help?"} +user_msg_2: TResponseInputItem = {"role": "user", "content": "What's the weather?"} +asst_msg_2: TResponseInputItem = {"role": "assistant", "content": "It's sunny!"} + + +@pytest.fixture +def in_memory_sqlite_memory() -> SQLiteSessionMemory: + """Provides an in-memory SQLiteSessionMemory instance for testing.""" + memory = SQLiteSessionMemory(db_path=":memory:") + # Ensure fresh table for each test using this fixture + asyncio.run(memory.clear()) # Clear in case a previous test in the same session didn't clean up + memory._init_db() # Re-initialize schema + return memory + +class TestSQLiteSessionMemory: + @pytest.mark.asyncio + async def test_add_and_get_history(self, in_memory_sqlite_memory: SQLiteSessionMemory): + memory = in_memory_sqlite_memory + await memory.add_message(user_msg_1) + await memory.add_items([asst_msg_1, user_msg_2]) + + history = await memory.get_history() + assert len(history) == 3 + assert history[0] == user_msg_1 + assert history[1] == asst_msg_1 + assert history[2] == user_msg_2 + + @pytest.mark.asyncio + async def test_clear_history(self, in_memory_sqlite_memory: SQLiteSessionMemory): + memory = in_memory_sqlite_memory + await memory.add_message(user_msg_1) + await memory.clear() + history = await memory.get_history() + assert len(history) == 0 + + @pytest.mark.asyncio + async def test_add_items_maintains_order(self, in_memory_sqlite_memory: SQLiteSessionMemory): + memory = in_memory_sqlite_memory + items = [{"role": "user", "content": f"Message {i}"} for i in range(5)] + await memory.add_items(items) + history = await memory.get_history() + assert history == items + + def test_persistent_db(self, tmp_path): + db_file = tmp_path / "test_persistent.db" + memory1 = SQLiteSessionMemory(db_path=str(db_file)) + asyncio.run(memory1.add_message(user_msg_1)) + + # Create a new instance with the same file + memory2 = SQLiteSessionMemory(db_path=str(db_file)) + history = asyncio.run(memory2.get_history()) + assert len(history) == 1 + assert history[0] == user_msg_1 + asyncio.run(memory2.clear()) # Clean up + + +class TestAgentMemoryInitialization: + def test_agent_memory_true_initializes_sqlite(self): + agent = Agent(name="TestAgent", memory=True) + assert isinstance(agent.memory, SQLiteSessionMemory) + + def test_agent_memory_false_sets_none(self): + agent = Agent(name="TestAgent", memory=False) + assert agent.memory is None + + def test_agent_memory_none_is_default(self): + agent = Agent(name="TestAgent") + assert agent.memory is None + + def test_agent_custom_memory_instance(self): + custom_memory_mock = AsyncMock(spec=SessionMemory) + agent = Agent(name="TestAgent", memory=custom_memory_mock) + assert agent.memory is custom_memory_mock + + +@pytest.mark.asyncio +async def test_runner_with_memory_integration(): + """Test Runner.run with an agent that has memory over multiple turns.""" + + # Mock the model provider and model + mock_model = AsyncMock(spec=Model) + + # Define model responses for multiple turns + # Turn 1: User says "Hello", Assistant says "Hi" + model_response_1_output: list[TResponseOutputItem] = [{"type": "message", "role": "assistant", "content": [{"type": "text", "text": "Hi"}]}] + model_response_1 = ModelResponse(output=model_response_1_output, usage=MagicMock(), response_id="res1") + + # Turn 2: User says "State?", Assistant says "California" + model_response_2_output: list[TResponseOutputItem] = [{"type": "message", "role": "assistant", "content": [{"type": "text", "text": "California"}]}] + model_response_2 = ModelResponse(output=model_response_2_output, usage=MagicMock(), response_id="res2") + + mock_model.get_response.side_effect = [model_response_1, model_response_2] + # stream_response would also need mocking if testing streaming runs here + + mock_provider = MagicMock() + mock_provider.get_model.return_value = mock_model + + run_config = RunConfig(model_provider=mock_provider) + + # Use a real SQLiteSessionMemory (in-memory) + agent_memory = SQLiteSessionMemory(db_path=":memory:") + agent = Agent(name="TestAgentWithMemory", model="test-model", memory=agent_memory) + + # Turn 1 + input_turn_1 = "Hello" + result_turn_1 = await Runner.run(agent, input_turn_1, run_config=run_config) + + assert result_turn_1.final_output == "Hi" # Assuming default output_type is str + + # Check memory after turn 1 + history_after_turn_1 = await agent_memory.get_history() + assert len(history_after_turn_1) == 2 # User: Hello, Assistant: Hi + assert history_after_turn_1[0]["role"] == "user" + assert history_after_turn_1[0]["content"] == "Hello" + assert history_after_turn_1[1]["role"] == "assistant" + assert history_after_turn_1[1]["content"] == [{"type": "text", "text": "Hi"}] # ModelResponse output structure + + # Verify model was called with correct history (i.e., empty for first turn) + args_call_1, kwargs_call_1 = mock_model.get_response.call_args_list[0] + input_to_model_turn_1 = kwargs_call_1.get('input') + assert len(input_to_model_turn_1) == 1 + assert input_to_model_turn_1[0]["content"] == "Hello" + + + # Turn 2 - using the *same agent instance* which now has memory + input_turn_2 = "What state is it in?" # Example from issue + result_turn_2 = await Runner.run(agent, input_turn_2, run_config=run_config) + + assert result_turn_2.final_output == "California" + + # Check memory after turn 2 + history_after_turn_2 = await agent_memory.get_history() + assert len(history_after_turn_2) == 4 # User: Hello, Asst: Hi, User: State?, Asst: California + assert history_after_turn_2[2]["role"] == "user" + assert history_after_turn_2[2]["content"] == "What state is it in?" + assert history_after_turn_2[3]["role"] == "assistant" + assert history_after_turn_2[3]["content"] == [{"type": "text", "text": "California"}] + + + # Verify model was called with history for second turn + args_call_2, kwargs_call_2 = mock_model.get_response.call_args_list[1] + input_to_model_turn_2 = kwargs_call_2.get('input') + + assert len(input_to_model_turn_2) == 3 # History (User:Hello, Asst:Hi) + New (User:State?) + assert input_to_model_turn_2[0]["role"] == "user" + assert input_to_model_turn_2[0]["content"] == "Hello" + assert input_to_model_turn_2[1]["role"] == "assistant" + # The content saved from ModelResponse is the list of dicts, not just plain text + assert input_to_model_turn_2[1]["content"] == [{"type": "text", "text": "Hi"}] + assert input_to_model_turn_2[2]["role"] == "user" + assert input_to_model_turn_2[2]["content"] == "What state is it in?" + + # Test clearing memory via agent reference (if Agent had a clear_memory method, or directly) + await agent.memory.clear() + history_after_clear = await agent_memory.get_history() + assert len(history_after_clear) == 0 + + +@pytest.mark.asyncio +async def test_issue_example_with_memory(): + """Test the specific example from the issue description.""" + mock_model = AsyncMock(spec=Model) + + # Mock responses + # 1. "What city is the Golden Gate Bridge in?" -> "San Francisco" + res1_output: list[TResponseOutputItem] = [{"type": "message", "role": "assistant", "content": [{"type": "text", "text": "San Francisco"}]}] + res1 = ModelResponse(output=res1_output, usage=MagicMock(), response_id="res_city") + + # 2. "What state is it in?" -> "California" + res2_output: list[TResponseOutputItem] = [{"type": "message", "role": "assistant", "content": [{"type": "text", "text": "California"}]}] + res2 = ModelResponse(output=res2_output, usage=MagicMock(), response_id="res_state") + + mock_model.get_response.side_effect = [res1, res2] + + mock_provider = MagicMock() + mock_provider.get_model.return_value = mock_model + run_config = RunConfig(model_provider=mock_provider) + + # Agent with memory=True uses default SQLiteSessionMemory + agent = Agent(name="Assistant", instructions="Reply very concisely.", model="test-model", memory=True) + + # First turn + result1 = await Runner.run(agent, "What city is the Golden Gate Bridge in?", run_config=run_config) + # print(f"Result 1: {result1.final_output}") # For debugging if needed + assert result1.final_output == "San Francisco" + + # Check memory content (optional, but good for deep check) + if isinstance(agent.memory, SQLiteSessionMemory): # Should be + history1 = await agent.memory.get_history() + assert len(history1) == 2 + assert history1[0]["content"] == "What city is the Golden Gate Bridge in?" + assert history1[1]["content"] == [{"type": "text", "text": "San Francisco"}] + + + # Second turn - memory should be used automatically + result2 = await Runner.run(agent, "What state is it in?", run_config=run_config) + # print(f"Result 2: {result2.final_output}") # For debugging + assert result2.final_output == "California" + + # Check that the model received the history in the second call + assert mock_model.get_response.call_count == 2 + args_call_2, kwargs_call_2 = mock_model.get_response.call_args_list[1] + input_to_model_turn_2 = kwargs_call_2.get('input') + + assert len(input_to_model_turn_2) == 3 # History (User, Asst) + New User Question + assert input_to_model_turn_2[0]["content"] == "What city is the Golden Gate Bridge in?" + assert input_to_model_turn_2[1]["content"] == [{"type": "text", "text": "San Francisco"}] + assert input_to_model_turn_2[2]["content"] == "What state is it in?" + +# TODO: Add tests for streaming runs with memory if time permits. +# The logic in _run_single_turn_streamed is very similar to _run_single_turn, +# so these tests provide good coverage of the core memory handling.