Skip to content

Commit d118bc0

Browse files
[3.14] gh-124621: Emscripten: Add support for async input devices (GH-136822) (GH-136935)
This is useful for implementing proper `input()`. It requires the JavaScript engine to support the wasm JSPI spec which is now stage 4. It is supported on Chrome since version 137 and on Firefox and node behind a flag. We override the `__wasi_fd_read()` syscall with our own variant that checks for a readAsync operation. If it has it, we use our own async variant of `fd_read()`, otherwise we use the original `fd_read()`. We also add a variant of `FS.createDevice()` called `FS.createAsyncInputDevice()`. Finally, if JSPI is available, we wrap the `main()` symbol with `WebAssembly.promising()` so that we can stack switch from `fd_read()`. If JSPI is not available, attempting to read from an AsyncInputDevice will raise an `OSError`. (cherry picked from commit 7ae4749) Co-authored-by: Hood Chatham <roberthoodchatham@gmail.com>
1 parent 8e43b13 commit d118bc0

File tree

4 files changed

+252
-1
lines changed

4 files changed

+252
-1
lines changed

Lib/test/test_capi/test_emscripten.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import unittest
2+
from test.support import is_emscripten
3+
4+
if not is_emscripten:
5+
raise unittest.SkipTest("Emscripten-only test")
6+
7+
from _testinternalcapi import emscripten_set_up_async_input_device
8+
from pathlib import Path
9+
10+
11+
class EmscriptenAsyncInputDeviceTest(unittest.TestCase):
12+
def test_emscripten_async_input_device(self):
13+
jspi_supported = emscripten_set_up_async_input_device()
14+
p = Path("/dev/blah")
15+
self.addCleanup(p.unlink)
16+
if not jspi_supported:
17+
with open(p, "r") as f:
18+
self.assertRaises(OSError, f.readline)
19+
return
20+
21+
with open(p, "r") as f:
22+
for _ in range(10):
23+
self.assertEqual(f.readline().strip(), "ab")
24+
self.assertEqual(f.readline().strip(), "fi")
25+
self.assertEqual(f.readline().strip(), "xy")

Modules/_testinternalcapi.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2346,6 +2346,37 @@ incref_decref_delayed(PyObject *self, PyObject *op)
23462346
Py_RETURN_NONE;
23472347
}
23482348

2349+
#ifdef __EMSCRIPTEN__
2350+
#include "emscripten.h"
2351+
2352+
EM_JS(int, emscripten_set_up_async_input_device_js, (void), {
2353+
let idx = 0;
2354+
const encoder = new TextEncoder();
2355+
const bufs = [
2356+
encoder.encode("ab\n"),
2357+
encoder.encode("fi\n"),
2358+
encoder.encode("xy\n"),
2359+
];
2360+
function sleep(t) {
2361+
return new Promise(res => setTimeout(res, t));
2362+
}
2363+
FS.createAsyncInputDevice("/dev", "blah", async () => {
2364+
await sleep(5);
2365+
return bufs[(idx ++) % 3];
2366+
});
2367+
return !!WebAssembly.promising;
2368+
});
2369+
2370+
static PyObject *
2371+
emscripten_set_up_async_input_device(PyObject *self, PyObject *Py_UNUSED(ignored)) {
2372+
if (emscripten_set_up_async_input_device_js()) {
2373+
Py_RETURN_TRUE;
2374+
} else {
2375+
Py_RETURN_FALSE;
2376+
}
2377+
}
2378+
#endif
2379+
23492380
static PyMethodDef module_functions[] = {
23502381
{"get_configs", get_configs, METH_NOARGS},
23512382
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -2448,6 +2479,9 @@ static PyMethodDef module_functions[] = {
24482479
{"is_static_immortal", is_static_immortal, METH_O},
24492480
{"incref_decref_delayed", incref_decref_delayed, METH_O},
24502481
GET_NEXT_DICT_KEYS_VERSION_METHODDEF
2482+
#ifdef __EMSCRIPTEN__
2483+
{"emscripten_set_up_async_input_device", emscripten_set_up_async_input_device, METH_NOARGS},
2484+
#endif
24512485
{NULL, NULL} /* sentinel */
24522486
};
24532487

Python/emscripten_syscalls.c

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,185 @@ EM_JS(int, __syscall_umask_js, (int mask), {
3737
int __syscall_umask(int mask) {
3838
return __syscall_umask_js(mask);
3939
}
40+
41+
#include <wasi/api.h>
42+
#include <errno.h>
43+
#undef errno
44+
45+
// Variant of EM_JS that does C preprocessor substitution on the body
46+
#define EM_JS_MACROS(ret, func_name, args, body...) \
47+
EM_JS(ret, func_name, args, body)
48+
49+
EM_JS_MACROS(void, _emscripten_promising_main_js, (void), {
50+
// Define FS.createAsyncInputDevice(), This is quite similar to
51+
// FS.createDevice() defined here:
52+
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libfs.js?plain=1#L1642
53+
// but instead of returning one byte at a time, the input() function should
54+
// return a Uint8Array. This makes the handler code simpler, the
55+
// `createAsyncInputDevice` simpler, and everything faster.
56+
FS.createAsyncInputDevice = function(parent, name, input) {
57+
parent = typeof parent == 'string' ? parent : FS.getPath(parent);
58+
var path = PATH.join2(parent, name);
59+
var mode = FS_getMode(true, false);
60+
FS.createDevice.major ||= 64;
61+
var dev = FS.makedev(FS.createDevice.major++, 0);
62+
async function getDataBuf() {
63+
var buf;
64+
try {
65+
buf = await input();
66+
} catch (e) {
67+
throw new FS.ErrnoError(EIO);
68+
}
69+
if (!buf?.byteLength) {
70+
throw new FS.ErrnoError(EAGAIN);
71+
}
72+
ops._dataBuf = buf;
73+
}
74+
75+
var ops = {
76+
_dataBuf: new Uint8Array(0),
77+
open(stream) {
78+
stream.seekable = false;
79+
},
80+
async readAsync(stream, buffer, offset, length, pos /* ignored */) {
81+
buffer = buffer.subarray(offset, offset + length);
82+
if (!ops._dataBuf.byteLength) {
83+
await getDataBuf();
84+
}
85+
var toRead = Math.min(ops._dataBuf.byteLength, buffer.byteLength);
86+
buffer.subarray(0, toRead).set(ops._dataBuf);
87+
buffer = buffer.subarray(toRead);
88+
ops._dataBuf = ops._dataBuf.subarray(toRead);
89+
if (toRead) {
90+
stream.node.atime = Date.now();
91+
}
92+
return toRead;
93+
},
94+
};
95+
FS.registerDevice(dev, ops);
96+
return FS.mkdev(path, mode, dev);
97+
};
98+
if (!WebAssembly.promising) {
99+
// No stack switching support =(
100+
return;
101+
}
102+
const origResolveGlobalSymbol = resolveGlobalSymbol;
103+
if (!Module.onExit && process?.exit) {
104+
Module.onExit = (code) => process.exit(code);
105+
}
106+
// * wrap the main symbol with WebAssembly.promising,
107+
// * call exit_with_live_runtime() to prevent emscripten from shutting down
108+
// the runtime before the promise resolves,
109+
// * call onExit / process.exit ourselves, since exit_with_live_runtime()
110+
// prevented Emscripten from calling it normally.
111+
resolveGlobalSymbol = function (name, direct = false) {
112+
const orig = origResolveGlobalSymbol(name, direct);
113+
if (name === "main") {
114+
const main = WebAssembly.promising(orig.sym);
115+
orig.sym = (...args) => {
116+
(async () => {
117+
const ret = await main(...args);
118+
process?.exit?.(ret);
119+
})();
120+
_emscripten_exit_with_live_runtime();
121+
};
122+
}
123+
return orig;
124+
};
125+
})
126+
127+
__attribute__((constructor)) void _emscripten_promising_main(void) {
128+
_emscripten_promising_main_js();
129+
}
130+
131+
132+
#define IOVEC_T_BUF_OFFSET 0
133+
#define IOVEC_T_BUF_LEN_OFFSET 4
134+
#define IOVEC_T_SIZE 8
135+
_Static_assert(offsetof(__wasi_iovec_t, buf) == IOVEC_T_BUF_OFFSET,
136+
"Unexpected __wasi_iovec_t layout");
137+
_Static_assert(offsetof(__wasi_iovec_t, buf_len) == IOVEC_T_BUF_LEN_OFFSET,
138+
"Unexpected __wasi_iovec_t layout");
139+
_Static_assert(sizeof(__wasi_iovec_t) == IOVEC_T_SIZE,
140+
"Unexpected __wasi_iovec_t layout");
141+
142+
// If the stream has a readAsync handler, read to buffer defined in iovs, write
143+
// number of bytes read to *nread, and return a promise that resolves to the
144+
// errno. Otherwise, return null.
145+
EM_JS_MACROS(__externref_t, __maybe_fd_read_async, (
146+
__wasi_fd_t fd,
147+
const __wasi_iovec_t *iovs,
148+
size_t iovcnt,
149+
__wasi_size_t *nread
150+
), {
151+
if (!WebAssembly.promising) {
152+
return null;
153+
}
154+
var stream;
155+
try {
156+
stream = SYSCALLS.getStreamFromFD(fd);
157+
} catch (e) {
158+
// If the fd was already closed or never existed, getStreamFromFD()
159+
// raises. We'll let fd_read_orig() handle setting errno.
160+
return null;
161+
}
162+
if (!stream.stream_ops.readAsync) {
163+
// Not an async device. Fall back to __wasi_fd_read_orig().
164+
return null;
165+
}
166+
return (async () => {
167+
// This is the same as libwasi.js fd_read() and doReadv() except we use
168+
// readAsync and we await it.
169+
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L331
170+
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L197
171+
try {
172+
var ret = 0;
173+
for (var i = 0; i < iovcnt; i++) {
174+
var ptr = HEAP32[(iovs + IOVEC_T_BUF_OFFSET)/4];
175+
var len = HEAP32[(iovs + IOVEC_T_BUF_LEN_OFFSET)/4];
176+
iovs += IOVEC_T_SIZE;
177+
var curr = await stream.stream_ops.readAsync(stream, HEAP8, ptr, len);
178+
if (curr < 0) return -1;
179+
ret += curr;
180+
if (curr < len) break; // nothing more to read
181+
}
182+
HEAP32[nread/4] = ret;
183+
return 0;
184+
} catch (e) {
185+
if (e.name !== 'ErrnoError') {
186+
throw e;
187+
}
188+
return e.errno;
189+
}
190+
})();
191+
};
192+
);
193+
194+
// Bind original fd_read syscall to __wasi_fd_read_orig().
195+
__wasi_errno_t __wasi_fd_read_orig(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
196+
size_t iovs_len, __wasi_size_t *nread)
197+
__attribute__((__import_module__("wasi_snapshot_preview1"),
198+
__import_name__("fd_read"), __warn_unused_result__));
199+
200+
// Take a promise that resolves to __wasi_errno_t and suspend until it resolves,
201+
// get the output.
202+
EM_JS(__wasi_errno_t, __block_for_errno, (__externref_t p), {
203+
return p;
204+
}
205+
if (WebAssembly.Suspending) {
206+
__block_for_errno = new WebAssembly.Suspending(__block_for_errno);
207+
}
208+
)
209+
210+
// Replacement for fd_read syscall. Call __maybe_fd_read_async. If it returned
211+
// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_errno
212+
// to get the result.
213+
__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
214+
size_t iovs_len, __wasi_size_t *nread) {
215+
__externref_t p = __maybe_fd_read_async(fd, iovs, iovs_len, nread);
216+
if (__builtin_wasm_ref_is_null_extern(p)) {
217+
return __wasi_fd_read_orig(fd, iovs, iovs_len, nread);
218+
}
219+
__wasi_errno_t res = __block_for_errno(p);
220+
return res;
221+
}

Tools/wasm/emscripten/__main__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,10 +274,20 @@ def configure_emscripten_python(context, working_dir):
274274
REALPATH=abs_path
275275
fi
276276
277+
# Before node 24, --experimental-wasm-jspi uses different API,
278+
# After node 24 JSPI is on by default.
279+
ARGS=$({host_runner} -e "$(cat <<"EOF"
280+
const major_version = Number(process.version.split(".")[0].slice(1));
281+
if (major_version === 24) {{
282+
process.stdout.write("--experimental-wasm-jspi");
283+
}}
284+
EOF
285+
)")
286+
277287
# We compute our own path, not following symlinks and pass it in so that
278288
# node_entry.mjs can set sys.executable correctly.
279289
# Intentionally allow word splitting on NODEFLAGS.
280-
exec {host_runner} $NODEFLAGS {node_entry} --this-program="$($REALPATH "$0")" "$@"
290+
exec {host_runner} $NODEFLAGS $ARGS {node_entry} --this-program="$($REALPATH "$0")" "$@"
281291
"""
282292
)
283293
)

0 commit comments

Comments
 (0)