Skip to content

Commit 77880ab

Browse files
committed
gh-136810: Emscripten: Add support for async input devices
This is useful for implementing proper `input()`. It requires the JavaScript engine to support the wasm JSPI spec which is now stage 4. It is supported on Chrome since version 137 and on Firefox and node behind a flag. We override the `__wasi_fd_read()` syscall with our own variant that checks for a readAsync operation. If it has it, we use our own async variant of `fd_read()`, otherwise we use the original `fd_read()`. We also add a variant of `FS.createDevice()` called `FS.createAsyncInputDevice()`. Finally, if JSPI is available, we wrap the `main()` symbol with `WebAssembly.promising()` so that we can stack switch from `fd_read()`. If JSPI is not available, attempting to read from an AsyncInputDevice will raise an `OSError`.
1 parent 28937d3 commit 77880ab

File tree

4 files changed

+244
-1
lines changed

4 files changed

+244
-1
lines changed

Lib/test/test_capi/test_emscripten.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import unittest
2+
from test.support import is_emscripten
3+
4+
unittest.skipUnless(is_emscripten, "only available on Emscripten")
5+
6+
from _testinternalcapi import emscripten_set_up_async_input_device
7+
from pathlib import Path
8+
9+
10+
class EmscriptenAsyncInputDeviceTest(unittest.TestCase):
11+
def test_emscripten_async_input_device(self):
12+
supported = emscripten_set_up_async_input_device()
13+
p = Path("/dev/blah")
14+
self.addCleanup(p.unlink)
15+
if not supported:
16+
with open(p, "r") as f:
17+
self.assertRaises(OSError, f.readline)
18+
return
19+
20+
with open(p, "r") as f:
21+
for _ in range(10):
22+
self.assertEqual(f.readline().strip(), "ab")
23+
self.assertEqual(f.readline().strip(), "fi")
24+
self.assertEqual(f.readline().strip(), "xy")

Modules/_testinternalcapi.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,6 +2345,37 @@ incref_decref_delayed(PyObject *self, PyObject *op)
23452345
Py_RETURN_NONE;
23462346
}
23472347

2348+
#ifdef __EMSCRIPTEN__
2349+
#include "emscripten.h"
2350+
2351+
EM_JS(int, emscripten_set_up_async_input_device_js, (void), {
2352+
let idx = 0;
2353+
const encoder = new TextEncoder();
2354+
const bufs = [
2355+
encoder.encode("ab\n"),
2356+
encoder.encode("fi\n"),
2357+
encoder.encode("xy\n"),
2358+
];
2359+
function sleep(t) {
2360+
return new Promise(res => setTimeout(res, t));
2361+
}
2362+
FS.createAsyncInputDevice("/dev", "blah", async () => {
2363+
await sleep(5);
2364+
return bufs[(idx ++) % 3];
2365+
});
2366+
return !!WebAssembly.promising;
2367+
});
2368+
2369+
static PyObject *
2370+
emscripten_set_up_async_input_device(PyObject *self, PyObject *Py_UNUSED(ignored)) {
2371+
if (emscripten_set_up_async_input_device_js()) {
2372+
Py_RETURN_TRUE;
2373+
} else {
2374+
Py_RETURN_FALSE;
2375+
}
2376+
}
2377+
#endif
2378+
23482379
static PyMethodDef module_functions[] = {
23492380
{"get_configs", get_configs, METH_NOARGS},
23502381
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -2447,6 +2478,9 @@ static PyMethodDef module_functions[] = {
24472478
{"is_static_immortal", is_static_immortal, METH_O},
24482479
{"incref_decref_delayed", incref_decref_delayed, METH_O},
24492480
GET_NEXT_DICT_KEYS_VERSION_METHODDEF
2481+
#ifdef __EMSCRIPTEN__
2482+
{"emscripten_set_up_async_input_device", emscripten_set_up_async_input_device, METH_NOARGS},
2483+
#endif
24502484
{NULL, NULL} /* sentinel */
24512485
};
24522486

Python/emscripten_syscalls.c

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,178 @@ EM_JS(int, __syscall_umask_js, (int mask), {
3737
int __syscall_umask(int mask) {
3838
return __syscall_umask_js(mask);
3939
}
40+
41+
#include <wasi/api.h>
42+
#include <errno.h>
43+
#undef errno
44+
45+
// Variant of EM_JS that does C preprocessor substitution on the body
46+
#define EM_JS_MACROS(ret, func_name, args, body...) \
47+
EM_JS(ret, func_name, args, body)
48+
49+
EM_JS_MACROS(void, _emscripten_promising_main_js, (void), {
50+
// Define FS.createAsyncInputDevice(), This is quite similar to
51+
// FS.createDevice() defined here:
52+
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libfs.js?plain=1#L1642
53+
// but instead of returning one byte at a time, the input() function should
54+
// return a Uint8Array. This makes the handler code simpler, the
55+
// `createAsyncInputDevice` simpler, and everything faster.
56+
FS.createAsyncInputDevice = function(parent, name, input) {
57+
parent = typeof parent == 'string' ? parent : FS.getPath(parent);
58+
var path = PATH.join2(parent, name);
59+
var mode = FS_getMode(true, false);
60+
FS.createDevice.major ||= 64;
61+
var dev = FS.makedev(FS.createDevice.major++, 0);
62+
async function getDataBuf() {
63+
var buf;
64+
try {
65+
buf = await input();
66+
} catch (e) {
67+
throw new FS.ErrnoError(EIO);
68+
}
69+
if (!buf?.byteLength) {
70+
throw new FS.ErrnoError(EAGAIN);
71+
}
72+
ops._dataBuf = buf;
73+
}
74+
75+
var ops = {
76+
_dataBuf: new Uint8Array(0),
77+
open(stream) {
78+
stream.seekable = false;
79+
},
80+
async readAsync(stream, buffer, offset, length, pos /* ignored */) {
81+
buffer = buffer.subarray(offset, offset + length);
82+
if (!ops._dataBuf.byteLength) {
83+
await getDataBuf();
84+
}
85+
var toRead = Math.min(ops._dataBuf.byteLength, buffer.byteLength);
86+
buffer.subarray(0, toRead).set(ops._dataBuf);
87+
buffer = buffer.subarray(toRead);
88+
ops._dataBuf = ops._dataBuf.subarray(toRead);
89+
if (toRead) {
90+
stream.node.atime = Date.now();
91+
}
92+
return toRead;
93+
},
94+
};
95+
FS.registerDevice(dev, ops);
96+
return FS.mkdev(path, mode, dev);
97+
};
98+
if (!WebAssembly.promising) {
99+
// No stack switching support =(
100+
return;
101+
}
102+
const origResolveGlobalSymbol = resolveGlobalSymbol;
103+
if (!Module.onExit && process?.exit) {
104+
Module.onExit = (code) => process.exit(code);
105+
}
106+
// * wrap the main symbol with WebAssembly.promising,
107+
// * call exit_with_live_runtime() to prevent emscripten from shutting down
108+
// the runtime before the promise resolves,
109+
// * call onExit / process.exit ourselves, since exit_with_live_runtime()
110+
// prevented Emscripten from calling it normally.
111+
resolveGlobalSymbol = function (name, direct = false) {
112+
const orig = origResolveGlobalSymbol(name, direct);
113+
if (name === "main") {
114+
const main = WebAssembly.promising(orig.sym);
115+
orig.sym = (...args) => {
116+
(async () => {
117+
const ret = await main(...args);
118+
process?.exit?.(ret);
119+
})();
120+
_emscripten_exit_with_live_runtime();
121+
};
122+
}
123+
return orig;
124+
};
125+
})
126+
127+
__attribute__((constructor)) void _emscripten_promising_main(void) {
128+
_emscripten_promising_main_js();
129+
}
130+
131+
132+
#define IOVEC_T_BUF_OFFSET 0
133+
#define IOVEC_T_BUF_LEN_OFFSET 4
134+
#define IOVEC_T_SIZE 8
135+
_Static_assert(offsetof(__wasi_iovec_t, buf) == IOVEC_T_BUF_OFFSET,
136+
"Unexpected __wasi_iovec_t layout");
137+
_Static_assert(offsetof(__wasi_iovec_t, buf_len) == IOVEC_T_BUF_LEN_OFFSET,
138+
"Unexpected __wasi_iovec_t layout");
139+
_Static_assert(sizeof(__wasi_iovec_t) == IOVEC_T_SIZE,
140+
"Unexpected __wasi_iovec_t layout");
141+
142+
// If the stream has a readAsync handler, read to buffer defined in iovs, write
143+
// number of bytes read to *nread, and return a promise that resolves to the
144+
// errno. Otherwise, return null.
145+
EM_JS_MACROS(__externref_t, __maybe_fd_read_async, (
146+
__wasi_fd_t fd,
147+
const __wasi_iovec_t *iovs,
148+
size_t iovcnt,
149+
__wasi_size_t *nread
150+
), {
151+
var stream = SYSCALLS.getStreamFromFD(fd);
152+
if (!WebAssembly.promising) {
153+
return null;
154+
}
155+
if (!stream.stream_ops.readAsync) {
156+
// Not an async device. Fall back to __wasi_fd_read_orig().
157+
return null;
158+
}
159+
return (async () => {
160+
// This is the same as libwasi.js fd_read() and doReadv() except we use
161+
// readAsync and we await it.
162+
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L331
163+
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L197
164+
try {
165+
var ret = 0;
166+
for (var i = 0; i < iovcnt; i++) {
167+
var ptr = HEAP32[(iovs + IOVEC_T_BUF_OFFSET)/4];
168+
var len = HEAP32[(iovs + IOVEC_T_BUF_LEN_OFFSET)/4];
169+
iovs += IOVEC_T_SIZE;
170+
var curr = await stream.stream_ops.readAsync(stream, HEAP8, ptr, len);
171+
if (curr < 0) return -1;
172+
ret += curr;
173+
if (curr < len) break; // nothing more to read
174+
}
175+
HEAP32[nread/4] = ret;
176+
return 0;
177+
} catch (e) {
178+
if (e.name !== 'ErrnoError') {
179+
throw e;
180+
}
181+
return e.errno;
182+
}
183+
})();
184+
};
185+
);
186+
187+
// Bind original fd_read syscall to __wasi_fd_read_orig().
188+
__wasi_errno_t __wasi_fd_read_orig(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
189+
size_t iovs_len, __wasi_size_t *nread)
190+
__attribute__((__import_module__("wasi_snapshot_preview1"),
191+
__import_name__("fd_read"), __warn_unused_result__));
192+
193+
// Take a promise that resolves to __wasi_errno_t and suspend until it resolves,
194+
// get the output.
195+
EM_JS(__wasi_errno_t, __block_for_errno, (__externref_t p), {
196+
return p;
197+
}
198+
if (WebAssembly.Suspending) {
199+
__block_for_errno = new WebAssembly.Suspending(__block_for_errno);
200+
}
201+
)
202+
203+
// Replacement for fd_read syscall. Call __maybe_fd_read_async. If it returned
204+
// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_errno
205+
// to get the result.
206+
__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
207+
size_t iovs_len, __wasi_size_t *nread) {
208+
__externref_t p = __maybe_fd_read_async(fd, iovs, iovs_len, nread);
209+
if (__builtin_wasm_ref_is_null_extern(p)) {
210+
return __wasi_fd_read_orig(fd, iovs, iovs_len, nread);
211+
}
212+
__wasi_errno_t res = __block_for_errno(p);
213+
return res;
214+
}

Tools/wasm/emscripten/__main__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,20 @@ def configure_emscripten_python(context, working_dir):
263263
REALPATH=abs_path
264264
fi
265265
266+
# Before node 24, --experimental-wasm-jspi uses different API,
267+
# After node 24 JSPI is on by default.
268+
ARGS=$({host_runner} -e "$(cat <<"EOF"
269+
const major_version = Number(process.version.split(".")[0].slice(1));
270+
if (major_version === 24) {{
271+
process.stdout.write("--experimental-wasm-jspi");
272+
}}
273+
EOF
274+
)")
275+
266276
# We compute our own path, not following symlinks and pass it in so that
267277
# node_entry.mjs can set sys.executable correctly.
268278
# Intentionally allow word splitting on NODEFLAGS.
269-
exec {host_runner} $NODEFLAGS {node_entry} --this-program="$($REALPATH "$0")" "$@"
279+
exec {host_runner} $NODEFLAGS $ARGS {node_entry} --this-program="$($REALPATH "$0")" "$@"
270280
"""
271281
)
272282
)

0 commit comments

Comments
 (0)