added playwright example

tianx-dev · tianx-dev · commit 743877cb7e00 · 2025-03-27T12:09:32.000-04:00
diff --git a/examples/mcp/playwright_example/README.md b/examples/mcp/playwright_example/README.md
@@ -0,0 +1,56 @@
+# Playwright Screenshot Example
+
+This example demonstrates how to use the OpenAI Agents SDK with Playwright MCP (Machine Control Protocol) to automate browser interactions and capture screenshots.
+
+## Features
+
+- Navigates to websites
+- Captures full-page screenshots
+- Saves screenshots with descriptive filenames
+- Reports file information
+
+## Requirements
+
+- Node.js and npm
+- Python 3.9+ with the OpenAI Agents SDK installed
+
+## Installation
+
+Before running this example, make sure you have the Playwright MCP package available:
+
+```bash
+# This will be installed automatically when you run the example
+npm install -g @playwright/mcp
+```
+
+## Usage
+
+Run the example:
+
+```bash
+python main.py
+```
+
+The script will:
+1. Launch a headless Playwright browser via the MCP server
+2. Navigate to OpenAI's website
+3. Capture screenshots
+4. Save them to the `screenshots` directory
+
+## Customization
+
+- Modify `user_request` in `main.py` to capture different websites
+- Remove the `--headless` flag in the MCP server configuration to see the browser in action
+- Extend the agent's instructions to handle more complex browser automation tasks
+
+## Additional Capabilities
+
+This example only demonstrates basic screenshot functionality, but the Playwright MCP server can be used for:
+
+- Form filling and submission
+- Web scraping
+- UI testing
+- Authentication flows
+- And much more!
+
+For more information on Playwright capabilities, see the [Playwright documentation](https://playwright.dev/docs/intro).
diff --git a/examples/mcp/playwright_example/main.py b/examples/mcp/playwright_example/main.py
@@ -0,0 +1,106 @@
+import asyncio
+import glob
+import os
+import shutil
+from datetime import datetime
+
+from agents import Agent, Runner, gen_trace_id, trace
+from agents.mcp import MCPServer, MCPServerStdio
+
+
+async def run(mcp_server: MCPServer):
+    # Create a directory for screenshots if it doesn't exist
+    screenshots_dir = os.path.join(os.path.dirname(__file__), "screenshots")
+    os.makedirs(screenshots_dir, exist_ok=True)
+
+    # agent with enhanced instructions for screenshot capability
+    agent = Agent(
+        name="Web Screenshot Assistant",
+        instructions="""
+        You are a web screenshot assistant that can navigate to websites and capture screenshots.
+
+        Use the Playwright tools to:
+        1. Navigate to websites requested by the user
+        2. Take screenshots of the entire page or specific elements
+        3. Save screenshots with descriptive filenames that include the website name and current date
+        4. Report back with the location of saved screenshots
+
+        Follow these guidelines:
+        - Always wait for the page to fully load before taking screenshots
+        - If asked to capture a specific element, use page.locator() and then screenshot that element
+        - If no specific path is provided, save screenshots to the "screenshots" directory
+        - Generate descriptive filenames that include the website domain and current date
+        - Always confirm successful screenshot capture and provide the filepath where it was saved
+        - For full-page screenshots, capture the entire page scrolling content
+
+        When saving screenshots:
+        - Use PNG format for better quality
+        - Include helpful information in the filename (website, date, element if applicable)
+        - Report success with the full path where the screenshot was saved
+        """,
+        mcp_servers=[mcp_server],
+    )
+
+    # Gather the available tools to understand what we can do with Playwright
+    tools = await agent.get_all_tools()
+    print("Available tools:", [tool.name for tool in tools])
+
+    # Define the default screenshot path with current date
+    today_date = datetime.now().strftime("%Y-%m-%d")
+    default_screenshot_path = os.path.join(screenshots_dir, f"openai_{today_date}.png")
+
+    # Run the agent with a screenshot task
+    user_request = f"""
+    Please perform these steps:
+    1. Navigate to https://example.com
+    2. Wait for the page to fully load
+    3. Take a screenshot of the entire page
+    4. Save it to {default_screenshot_path}
+    5. Then navigate to https://httpbin.org
+    6. Take another screenshot with a descriptive filename in the screenshots directory
+    """
+
+    print(f"\nProcessing request: {user_request}\n")
+
+    # Run the agent with our request
+    result = await Runner.run(starting_agent=agent, input=user_request)
+
+    # Display the result
+    print("\nResult:", result)
+
+    # Verify the screenshots were created
+    if os.path.exists(default_screenshot_path):
+        print(f"\nSuccessfully created primary screenshot at: {default_screenshot_path}")
+        # Get the file size
+        file_size = os.path.getsize(default_screenshot_path) / 1024  # KB
+        print(f"Screenshot size: {file_size:.2f} KB")
+
+    # List all screenshots created
+    all_screenshots = glob.glob(os.path.join(screenshots_dir, "*.png"))
+    print(f"\nAll screenshots in directory ({len(all_screenshots)}):")
+    for screenshot in all_screenshots:
+        file_size = os.path.getsize(screenshot) / 1024  # KB
+        print(f" - {os.path.basename(screenshot)} ({file_size:.2f} KB)")
+
+
+async def main():
+    # Configure the MCP server
+    # Note: remove --headless if you want to see the browser in action
+    async with MCPServerStdio(
+        name="Playwright Screenshot Server",
+        params={
+            "command": "npx",
+            "args": ["-y", "@playwright/mcp@latest", "--headless"],
+        },
+    ) as server:
+        trace_id = gen_trace_id()
+        with trace(workflow_name="Playwright Screenshot Example", trace_id=trace_id):
+            print(f"View trace: https://platform.openai.com/traces/{trace_id}\n")
+            print(f"Starting Playwright server: {server}")
+
+            # Run our screenshot example
+            await run(server)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())