-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathwasi_ephemeral_nn.witx
169 lines (146 loc) · 6.11 KB
/
wasi_ephemeral_nn.witx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
;; WASI Machine Learning API.
;;
;; This module draws inspiration from the inference side of [WebNN](https://webmachinelearning.github.io/webnn/#api).
;; In other words, this API will allow a user to execute an inference using an execution graph (i.e. a model), but not
;; train or design the graph. The graph is an opaque byte sequence to this API and must be interpreted by the underlying
;; implementation of this API.
;;
;; This is a `witx` file. See [here](https://github.com/WebAssembly/WASI/tree/master/docs/witx.md)
;; for an explanation of what that means.
;;; The size of a graph buffer. This is equivalent to `$size` in `typenames.witx` but renamed since `typenames.witx` is
;;; not included here but is included in the overall ephemeral phase.
(typename $buffer_size u32)
;;; Error codes returned by functions in this API. This is prefixed to avoid conflicts with the `$errno` in
;;; `typenames.witx`.
(typename $nn_errno
(enum u16
;;; No error occurred.
$success
;;; Caller module passed an invalid argument.
$invalid_argument
;;; Caller module is missing a memory export.
$missing_memory
;;; Device or resource busy.
$busy
)
)
;;; The dimensions of a tensor.
;;;
;;; The array length matches the tensor rank and each element in the array
;;; describes the size of each dimension.
(typename $tensor_dimensions (array u32))
;;; The type of the elements in a tensor.
(typename $tensor_type
(enum u8
$f16
$f32
$u8
$i32
)
)
;;; The tensor data
;;;
;;; Initially conceived as a sparse representation, each empty cell would be filled with zeroes and
;;; the array length must match the product of all of the dimensions and the number of bytes in the type (e.g. a 2x2
;;; tensor with 4-byte f32 elements would have a data array of length 16). Naturally, this representation requires
;;; some knowledge of how to lay out data in memory--e.g. using row-major ordering--and could perhaps be improved
;;; by future witx features (TODO).
(typename $tensor_data (array u8))
;;; A tensor.
(typename $tensor
(struct
;;; Describe the size of the tensor (e.g. 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor containing a single value,
;;; use `[1]` for the tensor dimensions.
(field $dimensions $tensor_dimensions)
;; Describe the type of element in the tensor (e.g. f32).
(field $type $tensor_type)
;;; Contains the tensor data.
(field $data $tensor_data)
)
)
;;; The graph initialization data. This consists of an array of buffers because implementing backends may encode their
;;; graph IR in parts (e.g. OpenVINO stores its IR and weights separately).
(typename $graph_builder (array u8))
(typename $graph_builder_array (array $graph_builder))
;;; An execution graph for performing inference (i.e. a model).
(typename $graph (handle))
;;; Describes the encoding of the graph. This allows the API to be implemented by various backends that encode (i.e.
;;; serialize) their graph IR differently.
(typename $graph_encoding
(enum u8
;;; TODO document buffer order
$openvino
)
)
;;; Define where the graph should be executed.
(typename $execution_target
(enum u8
$cpu
$gpu
$tpu
)
)
;;; A $graph_execution_context allows for attaching inputs prior to calling `compute` on a graph and retrieving outputs after
;;; the computation has completed. TODO a handle may not be the right type but we want it to be opaque to users.
(typename $graph_execution_context (handle))
(module $wasi_ephemeral_nn
;;; Linear memory to be accessed by WASI functions that need it.
(import "memory" (memory))
;;; Load an opaque sequence of bytes to use for inference.
;;;
;;; This allows runtime implementations to support multiple graph encoding formats. For unsupported graph encodings,
;;; return `errno::inval`.
(@interface func (export "load")
;;; The bytes necessary to build the graph.
(param $builder $graph_builder_array)
;;; The encoding of the graph.
(param $encoding $graph_encoding)
;;; Where to execute the graph.
(param $target $execution_target)
(result $error $nn_errno)
(result $graph $graph)
)
;;; TODO Functions like `describe_graph_inputs` and `describe_graph_outputs` (returning
;;; an array of `$tensor_description`s) might be useful for introspecting the graph but are not yet included here.
;;; Create an execution instance of a loaded graph.
;;; TODO this may need to accept flags that might affect the compilation or execution of the graph.
(@interface func (export "init_execution_context")
(param $graph $graph)
(result $error $nn_errno)
(result $context $graph_execution_context)
)
;;; Define the inputs to use for inference.
;;;
;;; This should return an $nn_errno (TODO define) if the input tensor does not match the expected dimensions and type.
(@interface func (export "set_input")
(param $context $graph_execution_context)
;;; The index of the input to change.
(param $index u32)
;;; The tensor to set as the input.
(param $tensor $tensor)
(result $error $nn_errno)
)
;;; Extract the outputs after inference.
;;;
;;; This should return an $nn_errno (TODO define) if the inference has not yet run.
(@interface func (export "get_output")
(param $context $graph_execution_context)
;;; The index of the output to retrieve.
(param $index u32)
;;; An out parameter to which to copy the tensor data. The caller is responsible for allocating enough memory for
;;; the tensor data or an error will be returned. Currently there is no dynamic way to extract the additional
;;; tensor metadata (i.e. dimension, element type) but this should be added at some point.
(param $out_buffer (@witx pointer u8))
(param $out_buffer_max_size $buffer_size)
(result $error $nn_errno)
;;; The number of bytes of tensor data written to the `$out_buffer`.
(result $bytes_written $buffer_size)
)
;;; Compute the inference on the given inputs (see `set_input`).
;;;
;;; This should return an $nn_errno (TODO define) if the inputs are not all defined.
(@interface func (export "compute")
(param $context $graph_execution_context)
(result $error $nn_errno)
)
)