diff --git a/playwright/_impl/_browser_type.py b/playwright/_impl/_browser_type.py index 8366dd220..a27d5099c 100644 --- a/playwright/_impl/_browser_type.py +++ b/playwright/_impl/_browser_type.py @@ -15,7 +15,7 @@ import asyncio import pathlib from pathlib import Path -from typing import Dict, List, Optional, Union, cast +from typing import TYPE_CHECKING, Dict, List, Optional, Union, cast from playwright._impl._api_structures import ( Geolocation, @@ -42,6 +42,9 @@ from playwright._impl._transport import WebSocketTransport from playwright._impl._wait_helper import throw_on_timeout +if TYPE_CHECKING: + from playwright._impl._playwright import Playwright + class BrowserType(ChannelOwner): def __init__( @@ -191,23 +194,22 @@ async def connect( self._connection._dispatcher_fiber, self._connection._object_factory, transport, + self._connection._loop, ) connection._is_sync = self._connection._is_sync - connection._loop = self._connection._loop connection._loop.create_task(connection.run()) - future = connection._loop.create_task( - connection.wait_for_object_with_known_name("Playwright") - ) + playwright_future = connection.get_playwright_future() + timeout_future = throw_on_timeout(timeout, Error("Connection timed out")) done, pending = await asyncio.wait( - {transport.on_error_future, future, timeout_future}, + {transport.on_error_future, playwright_future, timeout_future}, return_when=asyncio.FIRST_COMPLETED, ) - if not future.done(): - future.cancel() + if not playwright_future.done(): + playwright_future.cancel() if not timeout_future.done(): timeout_future.cancel() - playwright = next(iter(done)).result() + playwright: "Playwright" = next(iter(done)).result() self._connection._child_ws_connections.append(connection) pre_launched_browser = playwright._initializer.get("preLaunchedBrowser") assert pre_launched_browser diff --git a/playwright/_impl/_connection.py b/playwright/_impl/_connection.py index 37aa96cb1..032879942 100644 --- a/playwright/_impl/_connection.py +++ b/playwright/_impl/_connection.py @@ -16,7 +16,7 @@ import sys import traceback from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union from greenlet import greenlet from pyee import AsyncIOEventEmitter @@ -24,6 +24,9 @@ from playwright._impl._helper import ParsedMessagePayload, parse_error from playwright._impl._transport import Transport +if TYPE_CHECKING: + from playwright._impl._playwright import Playwright + class Channel(AsyncIOEventEmitter): def __init__(self, connection: "Connection", guid: str) -> None: @@ -119,7 +122,17 @@ def __init__(self, loop: asyncio.AbstractEventLoop) -> None: class RootChannelOwner(ChannelOwner): def __init__(self, connection: "Connection") -> None: - super().__init__(connection, "", "", {}) + super().__init__(connection, "Root", "", {}) + + async def initialize(self) -> "Playwright": + return from_channel( + await self._channel.send( + "initialize", + { + "sdkLanguage": "python", + }, + ) + ) class Connection: @@ -128,6 +141,7 @@ def __init__( dispatcher_fiber: Any, object_factory: Callable[[ChannelOwner, str, str, Dict], ChannelOwner], transport: Transport, + loop: asyncio.AbstractEventLoop, ) -> None: self._dispatcher_fiber = dispatcher_fiber self._transport = transport @@ -140,6 +154,8 @@ def __init__( self._is_sync = False self._api_name = "" self._child_ws_connections: List["Connection"] = [] + self._loop = loop + self._playwright_future: asyncio.Future["Playwright"] = loop.create_future() async def run_as_sync(self) -> None: self._is_sync = True @@ -148,8 +164,17 @@ async def run_as_sync(self) -> None: async def run(self) -> None: self._loop = asyncio.get_running_loop() self._root_object = RootChannelOwner(self) + + async def init() -> None: + self._playwright_future.set_result(await self._root_object.initialize()) + + await self._transport.connect() + self._loop.create_task(init()) await self._transport.run() + def get_playwright_future(self) -> asyncio.Future: + return self._playwright_future + def stop_sync(self) -> None: self._transport.request_stop() self._dispatcher_fiber.switch() @@ -164,17 +189,6 @@ def cleanup(self) -> None: for ws_connection in self._child_ws_connections: ws_connection._transport.dispose() - async def wait_for_object_with_known_name(self, guid: str) -> ChannelOwner: - if guid in self._objects: - return self._objects[guid] - callback: asyncio.Future[ChannelOwner] = self._loop.create_future() - - def callback_wrapper(result: ChannelOwner) -> None: - callback.set_result(result) - - self._waiting_for_object[guid] = callback_wrapper - return await callback - def call_on_object_with_known_name( self, guid: str, callback: Callable[[ChannelOwner], None] ) -> None: diff --git a/playwright/_impl/_transport.py b/playwright/_impl/_transport.py index d5b3ec0af..5c7d6fc41 100644 --- a/playwright/_impl/_transport.py +++ b/playwright/_impl/_transport.py @@ -60,6 +60,10 @@ def dispose(self) -> None: async def wait_until_stopped(self) -> None: pass + @abstractmethod + async def connect(self) -> None: + pass + @abstractmethod async def run(self) -> None: pass @@ -91,6 +95,7 @@ def __init__( self._driver_executable = driver_executable def request_stop(self) -> None: + assert self._output self._stopped = True self._output.close() @@ -98,7 +103,7 @@ async def wait_until_stopped(self) -> None: await self._stopped_future await self._proc.wait() - async def run(self) -> None: + async def connect(self) -> None: self._stopped_future: asyncio.Future = asyncio.Future() # Hide the command-line window on Windows when using Pythonw.exe creationflags = 0 @@ -111,7 +116,7 @@ async def run(self) -> None: if getattr(sys, "frozen", False): env["PLAYWRIGHT_BROWSERS_PATH"] = "0" - self._proc = proc = await asyncio.create_subprocess_exec( + self._proc = await asyncio.create_subprocess_exec( str(self._driver_executable), "run-driver", stdin=asyncio.subprocess.PIPE, @@ -123,20 +128,21 @@ async def run(self) -> None: ) except Exception as exc: self.on_error_future.set_exception(exc) - return + raise exc - assert proc.stdout - assert proc.stdin - self._output = proc.stdin + self._output = self._proc.stdin + async def run(self) -> None: + assert self._proc.stdout + assert self._proc.stdin while not self._stopped: try: - buffer = await proc.stdout.readexactly(4) + buffer = await self._proc.stdout.readexactly(4) length = int.from_bytes(buffer, byteorder="little", signed=False) buffer = bytes(0) while length: to_read = min(length, 32768) - data = await proc.stdout.readexactly(to_read) + data = await self._proc.stdout.readexactly(to_read) length -= to_read if len(buffer): buffer = buffer + data @@ -151,6 +157,7 @@ async def run(self) -> None: self._stopped_future.set_result(None) def send(self, message: Dict) -> None: + assert self._output data = self.serialize_message(message) self._output.write( len(data).to_bytes(4, byteorder="little", signed=False) + data @@ -184,15 +191,16 @@ def dispose(self) -> None: async def wait_until_stopped(self) -> None: await self._connection.wait_closed() - async def run(self) -> None: + async def connect(self) -> None: try: self._connection = await websocket_connect( self.ws_endpoint, extra_headers=self.headers ) except Exception as exc: self.on_error_future.set_exception(Error(f"websocket.connect: {str(exc)}")) - return + raise exc + async def run(self) -> None: while not self._stopped: try: message = await self._connection.recv() @@ -220,7 +228,7 @@ async def run(self) -> None: break def send(self, message: Dict) -> None: - if self._stopped or self._connection.closed: + if self._stopped or (hasattr(self, "_connection") and self._connection.closed): raise Error("Playwright connection closed") data = self.serialize_message(message) self._loop.create_task(self._connection.send(data)) diff --git a/playwright/async_api/_context_manager.py b/playwright/async_api/_context_manager.py index 1f60509b5..516454782 100644 --- a/playwright/async_api/_context_manager.py +++ b/playwright/async_api/_context_manager.py @@ -32,12 +32,11 @@ async def __aenter__(self) -> AsyncPlaywright: None, create_remote_object, PipeTransport(loop, compute_driver_executable()), + loop, ) - self._connection._loop = loop loop.create_task(self._connection.run()) - playwright_future = loop.create_task( - self._connection.wait_for_object_with_known_name("Playwright") - ) + playwright_future = self._connection.get_playwright_future() + done, pending = await asyncio.wait( {self._connection._transport.on_error_future, playwright_future}, return_when=asyncio.FIRST_COMPLETED, diff --git a/playwright/async_api/_generated.py b/playwright/async_api/_generated.py index 01e489828..53e1d40f4 100644 --- a/playwright/async_api/_generated.py +++ b/playwright/async_api/_generated.py @@ -2748,18 +2748,18 @@ async def goto( Returns the main resource response. In case of multiple redirects, the navigation will resolve with the response of the last redirect. - `frame.goto` will throw an error if: + The method will throw an error if: - there's an SSL error (e.g. in case of self-signed certificates). - target URL is invalid. - the `timeout` is exceeded during navigation. - the remote server does not respond or is unreachable. - the main resource failed to load. - `frame.goto` will not throw an error when any valid HTTP status code is returned by the remote server, including 404 - \"Not Found\" and 500 \"Internal Server Error\". The status code for such responses can be retrieved by calling + The method will not throw an error when any valid HTTP status code is returned by the remote server, including 404 \"Not + Found\" and 500 \"Internal Server Error\". The status code for such responses can be retrieved by calling `response.status()`. - > NOTE: `frame.goto` either throws an error or returns a main resource response. The only exceptions are navigation to + > NOTE: The method either throws an error or returns a main resource response. The only exceptions are navigation to `about:blank` or navigation to the same URL with a different hash, which would succeed and return `null`. > NOTE: Headless mode doesn't support navigation to a PDF document. See the [upstream issue](https://bugs.chromium.org/p/chromium/issues/detail?id=761295). @@ -4936,7 +4936,42 @@ async def register( An example of registering selector engine that queries elements based on a tag name: ```py - # FIXME: add snippet + import asyncio + from playwright.async_api import async_playwright + + async def run(playwright): + tag_selector = \"\"\" + { + // Returns the first element matching given selector in the root's subtree. + query(root, selector) { + return root.querySelector(selector); + }, + // Returns all elements matching given selector in the root's subtree. + queryAll(root, selector) { + return Array.from(root.querySelectorAll(selector)); + } + }\"\"\" + + # Register the engine. Selectors will be prefixed with \"tag=\". + await playwright.selectors.register(\"tag\", tag_selector) + browser = await playwright.chromium.launch() + page = await browser.new_page() + await page.set_content('
') + + # Use the selector prefixed with its name. + button = await page.query_selector('tag=button') + # Combine it with other selector engines. + await page.click('tag=div >> text=\"Click me\"') + # Can use it in any methods supporting selectors. + button_count = await page.eval_on_selector_all('tag=button', 'buttons => buttons.length') + print(button_count) + await browser.close() + + async def main(): + async with async_playwright() as playwright: + await run(playwright) + + asyncio.run(main()) ``` Parameters @@ -6389,18 +6424,18 @@ async def goto( Returns the main resource response. In case of multiple redirects, the navigation will resolve with the response of the last redirect. - `page.goto` will throw an error if: + The method will throw an error if: - there's an SSL error (e.g. in case of self-signed certificates). - target URL is invalid. - the `timeout` is exceeded during navigation. - the remote server does not respond or is unreachable. - the main resource failed to load. - `page.goto` will not throw an error when any valid HTTP status code is returned by the remote server, including 404 \"Not + The method will not throw an error when any valid HTTP status code is returned by the remote server, including 404 \"Not Found\" and 500 \"Internal Server Error\". The status code for such responses can be retrieved by calling `response.status()`. - > NOTE: `page.goto` either throws an error or returns a main resource response. The only exceptions are navigation to + > NOTE: The method either throws an error or returns a main resource response. The only exceptions are navigation to `about:blank` or navigation to the same URL with a different hash, which would succeed and return `null`. > NOTE: Headless mode doesn't support navigation to a PDF document. See the [upstream issue](https://bugs.chromium.org/p/chromium/issues/detail?id=761295). @@ -10144,7 +10179,8 @@ async def launch( Network proxy settings. downloads_path : Union[pathlib.Path, str, NoneType] If specified, accepted downloads are downloaded into this directory. Otherwise, temporary directory is created and is - deleted when browser is closed. + deleted when browser is closed. In either case, the downloads are deleted when the browser context they were created in + is closed. slow_mo : Union[float, NoneType] Slows down Playwright operations by the specified amount of milliseconds. Useful so that you can see what is going on. traces_dir : Union[pathlib.Path, str, NoneType] @@ -10283,7 +10319,8 @@ async def launch_persistent_context( Network proxy settings. downloads_path : Union[pathlib.Path, str, NoneType] If specified, accepted downloads are downloaded into this directory. Otherwise, temporary directory is created and is - deleted when browser is closed. + deleted when browser is closed. In either case, the downloads are deleted when the browser context they were created in + is closed. slow_mo : Union[float, NoneType] Slows down Playwright operations by the specified amount of milliseconds. Useful so that you can see what is going on. viewport : Union[{width: int, height: int}, NoneType] diff --git a/playwright/sync_api/_context_manager.py b/playwright/sync_api/_context_manager.py index 289483bb9..47a41794e 100644 --- a/playwright/sync_api/_context_manager.py +++ b/playwright/sync_api/_context_manager.py @@ -56,6 +56,7 @@ def greenlet_main() -> None: dispatcher_fiber, create_remote_object, PipeTransport(loop, compute_driver_executable()), + loop, ) g_self = greenlet.getcurrent() diff --git a/playwright/sync_api/_generated.py b/playwright/sync_api/_generated.py index 01d7ff67f..5c26ddf40 100644 --- a/playwright/sync_api/_generated.py +++ b/playwright/sync_api/_generated.py @@ -2731,18 +2731,18 @@ def goto( Returns the main resource response. In case of multiple redirects, the navigation will resolve with the response of the last redirect. - `frame.goto` will throw an error if: + The method will throw an error if: - there's an SSL error (e.g. in case of self-signed certificates). - target URL is invalid. - the `timeout` is exceeded during navigation. - the remote server does not respond or is unreachable. - the main resource failed to load. - `frame.goto` will not throw an error when any valid HTTP status code is returned by the remote server, including 404 - \"Not Found\" and 500 \"Internal Server Error\". The status code for such responses can be retrieved by calling + The method will not throw an error when any valid HTTP status code is returned by the remote server, including 404 \"Not + Found\" and 500 \"Internal Server Error\". The status code for such responses can be retrieved by calling `response.status()`. - > NOTE: `frame.goto` either throws an error or returns a main resource response. The only exceptions are navigation to + > NOTE: The method either throws an error or returns a main resource response. The only exceptions are navigation to `about:blank` or navigation to the same URL with a different hash, which would succeed and return `null`. > NOTE: Headless mode doesn't support navigation to a PDF document. See the [upstream issue](https://bugs.chromium.org/p/chromium/issues/detail?id=761295). @@ -4909,7 +4909,38 @@ def register( An example of registering selector engine that queries elements based on a tag name: ```py - # FIXME: add snippet + from playwright.sync_api import sync_playwright + + def run(playwright): + tag_selector = \"\"\" + { + // Returns the first element matching given selector in the root's subtree. + query(root, selector) { + return root.querySelector(selector); + }, + // Returns all elements matching given selector in the root's subtree. + queryAll(root, selector) { + return Array.from(root.querySelectorAll(selector)); + } + }\"\"\" + + # Register the engine. Selectors will be prefixed with \"tag=\". + playwright.selectors.register(\"tag\", tag_selector) + browser = playwright.chromium.launch() + page = browser.new_page() + page.set_content('
') + + # Use the selector prefixed with its name. + button = page.query_selector('tag=button') + # Combine it with other selector engines. + page.click('tag=div >> text=\"Click me\"') + # Can use it in any methods supporting selectors. + button_count = page.eval_on_selector_all('tag=button', 'buttons => buttons.length') + print(button_count) + browser.close() + + with sync_playwright() as playwright: + run(playwright) ``` Parameters @@ -6347,18 +6378,18 @@ def goto( Returns the main resource response. In case of multiple redirects, the navigation will resolve with the response of the last redirect. - `page.goto` will throw an error if: + The method will throw an error if: - there's an SSL error (e.g. in case of self-signed certificates). - target URL is invalid. - the `timeout` is exceeded during navigation. - the remote server does not respond or is unreachable. - the main resource failed to load. - `page.goto` will not throw an error when any valid HTTP status code is returned by the remote server, including 404 \"Not + The method will not throw an error when any valid HTTP status code is returned by the remote server, including 404 \"Not Found\" and 500 \"Internal Server Error\". The status code for such responses can be retrieved by calling `response.status()`. - > NOTE: `page.goto` either throws an error or returns a main resource response. The only exceptions are navigation to + > NOTE: The method either throws an error or returns a main resource response. The only exceptions are navigation to `about:blank` or navigation to the same URL with a different hash, which would succeed and return `null`. > NOTE: Headless mode doesn't support navigation to a PDF document. See the [upstream issue](https://bugs.chromium.org/p/chromium/issues/detail?id=761295). @@ -10083,7 +10114,8 @@ def launch( Network proxy settings. downloads_path : Union[pathlib.Path, str, NoneType] If specified, accepted downloads are downloaded into this directory. Otherwise, temporary directory is created and is - deleted when browser is closed. + deleted when browser is closed. In either case, the downloads are deleted when the browser context they were created in + is closed. slow_mo : Union[float, NoneType] Slows down Playwright operations by the specified amount of milliseconds. Useful so that you can see what is going on. traces_dir : Union[pathlib.Path, str, NoneType] @@ -10222,7 +10254,8 @@ def launch_persistent_context( Network proxy settings. downloads_path : Union[pathlib.Path, str, NoneType] If specified, accepted downloads are downloaded into this directory. Otherwise, temporary directory is created and is - deleted when browser is closed. + deleted when browser is closed. In either case, the downloads are deleted when the browser context they were created in + is closed. slow_mo : Union[float, NoneType] Slows down Playwright operations by the specified amount of milliseconds. Useful so that you can see what is going on. viewport : Union[{width: int, height: int}, NoneType] diff --git a/setup.py b/setup.py index b5595a6c5..faf0b1add 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ InWheel = None from wheel.bdist_wheel import bdist_wheel as BDistWheelCommand -driver_version = "1.15.0-next-1629385562000" +driver_version = "1.15.0-next-1629487941000" def extractall(zip: zipfile.ZipFile, path: str) -> None: