From 4820d6534832041ddb1046d223145405db69472a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Sun, 26 Nov 2023 14:55:58 -0800
Subject: [PATCH 01/44] Upgrade version to 0.2.0 (#55)

---
 _doc/index.rst             | 1 +
 onnx_array_api/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/_doc/index.rst b/_doc/index.rst
index 93ca000..f2f8998 100644
--- a/_doc/index.rst
+++ b/_doc/index.rst
@@ -142,4 +142,5 @@ Light API
 Older versions
 ++++++++++++++
 
+* `0.1.3 <../v0.1.3/index.html>`_
 * `0.1.2 <../v0.1.2/index.html>`_
diff --git a/onnx_array_api/__init__.py b/onnx_array_api/__init__.py
index 09a2edd..c4bc456 100644
--- a/onnx_array_api/__init__.py
+++ b/onnx_array_api/__init__.py
@@ -3,5 +3,5 @@
 APIs to create ONNX Graphs.
 """
 
-__version__ = "0.1.3"
+__version__ = "0.2.0"
 __author__ = "Xavier Dupré"

From bd1a44104bde6cb6b8f05403597cdf9433ddc94f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Wed, 20 Dec 2023 15:00:07 +0100
Subject: [PATCH 02/44] Delay unnecessary import. (#56)

* delay import

* lint
---
 onnx_array_api/npx/npx_functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnx_array_api/npx/npx_functions.py b/onnx_array_api/npx/npx_functions.py
index db29ca2..2f547d6 100644
--- a/onnx_array_api/npx/npx_functions.py
+++ b/onnx_array_api/npx/npx_functions.py
@@ -1,5 +1,4 @@
 from typing import Tuple, Union
-import array_api_compat.numpy as np_array_api
 import numpy as np
 from onnx import FunctionProto, ModelProto, NodeProto, TensorProto
 from onnx.helper import make_tensor, tensor_dtype_to_np_dtype
@@ -624,6 +623,8 @@ def isdtype(
     See :epkg:`BaseArrayAPI:isdtype`.
     This function is not converted into an onnx graph.
     """
+    import array_api_compat.numpy as np_array_api
+
     if isinstance(dtype, DType):
         dti = tensor_dtype_to_np_dtype(dtype.code)
         return np_array_api.isdtype(dti, kind)

From 954b9595637bfb4932be40921b71d11b70f70424 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 25 Dec 2023 18:18:55 +0100
Subject: [PATCH 03/44] Implements GraphBuilder for the graph_api (#57)

* add graph_builder

* documentation

* documentation

* remove some torch issues

* better constant

* fix tiny bug

* tiny changes

* add method check_order

* fix unused

* documentation

* more coverge

* improve code coverage
---
 .gitignore                                    |   1 +
 CHANGELOGS.rst                                |   1 +
 _doc/api/graph_api.rst                        |  16 +
 _doc/api/index.rst                            |   1 +
 .../ut_graph_api/data/debug_7951-CPUep.0.onnx | Bin 0 -> 7951 bytes
 _unittests/ut_graph_api/test_graph_builder.py | 381 ++++++++
 .../ut_graph_api/test_graph_builder_optim.py  |  38 +
 onnx_array_api/graph_api/__init__.py          |   1 +
 onnx_array_api/graph_api/graph_builder.py     | 840 ++++++++++++++++++
 pyproject.toml                                |   1 +
 10 files changed, 1280 insertions(+)
 create mode 100644 _doc/api/graph_api.rst
 create mode 100644 _unittests/ut_graph_api/data/debug_7951-CPUep.0.onnx
 create mode 100644 _unittests/ut_graph_api/test_graph_builder.py
 create mode 100644 _unittests/ut_graph_api/test_graph_builder_optim.py
 create mode 100644 onnx_array_api/graph_api/__init__.py
 create mode 100644 onnx_array_api/graph_api/graph_builder.py

diff --git a/.gitignore b/.gitignore
index 303cd33..ca8ce49 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ build/*
 *egg-info/*
 onnxruntime_profile*
 prof
+_doc/sg_execution_times.rst
 _doc/auto_examples/*
 _doc/examples/_cache/*
 _doc/examples/onnxruntime_profile*
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 9fb4ed8..a5b1577 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.1.3
 +++++
 
+* :pr:`57`: implements GraphBuilder
 * :pr:`49`: adds command line to export a model into code
 * :pr:`48`: support for subgraph in light API
 * :pr:`47`: extends export onnx to code to support inner API
diff --git a/_doc/api/graph_api.rst b/_doc/api/graph_api.rst
new file mode 100644
index 0000000..2cb5045
--- /dev/null
+++ b/_doc/api/graph_api.rst
@@ -0,0 +1,16 @@
+========================
+onnx_array_api.graph_api
+========================
+
+
+GraphBuilder
+============
+
+.. autoclass:: onnx_array_api.graph_api.GraphBuilder
+    :members:
+
+OptimizationOptions
+===================
+
+.. autoclass:: onnx_array_api.graph_api.graph_builder.OptimizationOptions
+    :members:
diff --git a/_doc/api/index.rst b/_doc/api/index.rst
index 0f595f0..121c416 100644
--- a/_doc/api/index.rst
+++ b/_doc/api/index.rst
@@ -7,6 +7,7 @@ API
     :maxdepth: 1
 
     array_api
+    graph_api
     light_api
     npx_core_api
     npx_functions
diff --git a/_unittests/ut_graph_api/data/debug_7951-CPUep.0.onnx b/_unittests/ut_graph_api/data/debug_7951-CPUep.0.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..77ba3775b1d901f844c3af10d571dd1c684c02fa
GIT binary patch
literal 7951
zcmb_hO>Eo96&7X5lEyz#W)nxz=7(UKZbCSQNd4JDvvu8cg9O{`!cL38iv>YROhj2C
z6_Ltcdq}ZB(aQq86}=R>6b1IShhBOwTKL#Yuf4QDd+MRLqCnrwaL6Go$;u)IERi$w
z-n{qbd-Hyj&*R+arSFbAy>eFH(zhzDe_lWzqQYow56yvR)bWyUXyv@=TaMuyy0Xlk
z^i-=fI(o9@I?fBPGqy+m7Ug0L?6yeXB=mYnUtNg4hI7U<b6{v%8GYY%Jl}MDqo(Bf
zvw9X~VSZw%*<#_olzJm&^c4Ds6p6hPJ!;XTjyPYu(s7+<?qK5Eu2U`<R4HM|AbJ^f
zWn=n`icz6!4@^Ss?Ml1S8!4x*q2(uIcQW!y@S0XHpmlfRk0!n`GP_;dd9s_@1xa%t
zd<I55^2&#bDChZOyK8Y~bItpmNTVv2{Lr|Kg^at6$cd6;X@-=|#VK_{Sq&+-7oxn2
zM0?}R*cwdAE7a)Z(b!Q6;mcgrw~^%I49KsQ83jC6782pFX+*ch7l!5wW8}I6y=y$T
z{hnbvUAtp>_~MKSfd4fH1HK?e$DpEVihSB4wTKEG%N`g*w`-LE3u(YYp5uta=p0%(
zvc`@zFg*K}wVT<^5YWJ=V|4AIXP7SlCF$KIGWs4u!Kmw40)t@il{I!b#Ll6dfI~id
z1}!t-@qNyt^@-Cq{Z7y78e`KrD#1qSxo7fG;H`S4yuv-y&~(9F74EJZ`E)UNzbK{H
zpZtP-GynY@%{rREZS@MyJhQFm<y8t6@@Qz)06OGtu9#lrH8n)}5G-ZEH;2RW8s$iz
z4Xvi2=v6&~vgTmaGtZ||ufO|7lGJsSX<L5yK6leQKuk8k%M|BYAUb=gFN*q?k?0K+
zaAIDZ3q+19(0F$We1!NGfeLy|h2}R<VLMPkCo1ST;IGG3*nVd!U^Fys$9OU}N4>+#
zktOT6h}QEGMrbMjS|T6$!%NY06Q4nAFkL|DJX>;jJ(}IYCA0yv7-l8<&zVSv*P>+_
z-azMISrL}V!O|>zXAdt#i*yhn4~sHn(czV7qKeO=Gcd72g+rPAn}17(rD(neGX<E>
zGU0`phBzaMn{5l5q4ouG1kIC%Xm5~->IoCHuLPH9q!!VhsP<Fx-@b_G<vd03b`{OO
zV5ymM)J&Y;Rp58EU1FXZN=U3t?EpIkjvxrhPBEO)lT#8*ak$}iv<kM%!!~3~)Tg%4
z!kq{nC^Gug77;Ic(1hT6JgChu36R4z!w-_mPLHdv&Ajde(8R3KFY((Xf+kbBU*>vg
zCv@)5$9#V-=0V9Y6`X_l{vwzM3x~X7nD3v0dC)r~jALG$iDO>eEQWb@&QmbY&iN$f
zK{}Q=%sSxNnD2iR%!5ueI)z9KG2fq!`EV-2DIp_Rg{fJX-@AlnCh<jSFN0D=ymt*<
zot5Zsb==|5b&Arl^vetiDg!0sS%b%Fer$P@0i<WGCZZ#d^+7sOPCFGPvky<L)7ZA^
zIA=S4qXzXu+R=o3qRjJ&^<u6irT6$U$Rk>*)-A-};ox8+yc%#ZI;Q6XE0UVP)5rFa
z1<OBRt59qe&OkBHg|$^B`-7(wa{vqf3|9{s#i432UsKZe9;=_B6~6}sQXBT`^z?gZ
zE6^G4C}IUzSWyZOVb&YDo&}{sW@L?rkg<`9pq0Xu{4NPmMx%|$)kerL%x2Y3(26zi
zEYAEkk^|;kjtXU;zt^=K-}YY;c7%YlTSehQ%CME%yW-z?XgQuczG2>!$#LD3Z+$3-
zL-!Xs^ZOJY`!fM_Ed3Xu8TFHrwH>I8l;!7^{iNqBnYL|u3i7#ux@v*LN7H#}q|>rV
zj|wk97UKgY$#6uCupqKpDA*Zifzlub)D}gh454v2LX42nYe26@K)J*wHJo=`gXYpO
zUsaYKxIVz>KN1SZ3BXB(Sce4+Nnc2DC_P0g+k^N(Meu>M2+<N4$`>DMNeUx$BQOXP
z0CFqFz6!LOQuvYMJ)KzAD?mM7C3s~B(e?HUaTy{=btdEjO9$&}Od0`ls}bZxhP424
ztv@ay;e10MY67D=^`XBfH2;f)elKyk>iobU_XentTr~8nH7GisMZQ%QN^`KvS5gER
zsF$*|M2C{*D=Dbf)o&#WbxM^e-l2A*@S}6|N!<vGb~?ymnlNrX>CS%uJw<>1orHcd
zQ%~->4QlTU(NukF1>Z_^;TY@f=;~<jb9|i^;2U`<$k(99`18au`W(R93^Jes?dL%0
zI|aaMcvpdM6W81hk+K8ynvy%12H{#$5Os%P_{VWG-$B<S-Z&Ke<lQL%8Q89&XlY=A
zy&ZB4kGeAH9>}H-jhptww>)_?wr)X01V6I6vhT`|ZC?gAlRtUz;F0W&tj^XuxmXXO
zMo)H@0D*_`{?oDa3b7UmDK>+kjK-Ef89Su8)`GN#-27M=V<6jy^$;W8Cv-uWNXCfM
z@BZJeqf`4Kfl%wD8DsGKzM`U_Ap=bwU2KKuL90UNjSM~Po#;_tMNjlEbfH6{^BxLm
zjD#<wq0(vu)_B`U@;#i`kD@f{R9;1e@S3K#PGUI(gjTZULIVtbFW!flp1j;)!D!lA
z{60Be+QTty&xmsaDTU1D%QTv|7%x*!Bm%J@^<~1xe}ckPE%2w%%j2n9il=H7nyNv(
zq8Q`}TP4_u#p7A=N~2dQ6{*5p{o}Z+w-CoR6P$!+2%Xb|mB3Bocih(!w8e<=bi5*f
zlY~>+Dg=IYCHF(~#ixK=avz6XJ$l5^t9mgRj6%q&SJ5i(^YzS;MPrBvcL-Kd#Nm)v
z&&PvK;1YnMpG7iD20rr`n$x6ZA4%MZB%UMHl_^pQ9s~;UI7bB*ra*=o&R9;@1M@V6
zOy!bz(NCdj!m0b_*e7(`0^Q{kbfWvhc6FQ~z6)eK3z1PrC;pbS4rziC0T2}*QLZ@K
zwFbUfUUHzpoIM!W9ab!G3?Md-3;edKlBJrmbTDayY=2vt?^20N4-1zb;iJ=7`oDyq
z{Y&D(G|0a~H=7zvbtIyf#nT#I6A(=kPcf)+{v_WJJGfK%Z=y(f#=Q}lgCus<8R5EF
zX`e-LIdF=ET5iiG^Xp$HQYFYOHe)))l?gJ8$j{NYBbL=jG?q!sxDQUswjGy^r5CN#
zJBj7T(x2d0tFWETA|iZ3=U){9bL%=>lrRRK0-v;Afhw95u`2W@eYcc@iw975Gq_KK
zWr^4h2fc53<g$VAULg(hL>0IxAG{pibNtUfm&rbfqVUvu?^kIgG|ss$L(_-a3O@Ci
zMxP=wrqNG52)=^GfF98Bt~DIOZ*8mr554!VB=$(ManggQL-YwWdPIBSV#cx=l(aJ0
zNqo^aWfhd|m;-Yx6GpdTI(Vhc{@a?M{At<FVd58o!{ko}$I@@0cd!q=h@kL|S>ob}
zDmHt-4ykpSr2=ugfHe=`bU<U}p?PG{6cY;PBiVgSpJ^p0!{rb4{Z=6RX}y+X=?lo~
z?|IAyZz*MoNhy}nOu*Jnu|AHHBKRFFOp565U;y0@*bP!)Us?jXQL?gE+ow6K{JdvD
day=GQ?wOuU4N=*Y-LY(YvMGD?j2AV-{{e(m=?ee=

literal 0
HcmV?d00001

diff --git a/_unittests/ut_graph_api/test_graph_builder.py b/_unittests/ut_graph_api/test_graph_builder.py
new file mode 100644
index 0000000..3369b2c
--- /dev/null
+++ b/_unittests/ut_graph_api/test_graph_builder.py
@@ -0,0 +1,381 @@
+import contextlib
+import io
+import unittest
+import numpy as np
+import onnx
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.graph_api.graph_builder import GraphBuilder, OptimizationOptions
+from onnx_array_api.reference import (
+    from_array_extended,
+    ExtendedReferenceEvaluator as ReferenceEvaluator,
+)
+
+
+class TestGraphBuilder(ExtTestCase):
+    def call_optimizer(self, onx):
+        gr = GraphBuilder(onx)
+        gr.remove_unused()
+        return gr.to_onnx()
+
+    def test_remove_unused_nodes(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        onx = self.call_optimizer(model)
+        self.assertEqual(len(onx.graph.node), 1)
+        self.assertEqual(onx.graph.node[0].op_type, "Mul")
+
+    def test_initializers(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z)
+            <float two = {2.0}> {
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        self.assertEqual(len(model.graph.initializer), 1)
+        onx = self.call_optimizer(model)
+        self.assertEqual(len(onx.graph.node), 1)
+        self.assertEqual(onx.graph.node[0].op_type, "Mul")
+        self.assertEqual(len(onx.graph.initializer), 0)
+
+    def test_keep_unused_outputs(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[M] z) {
+                w1, w2, w3 = Split (x)
+                z = Mul(w3, w3)
+            }"""
+        )
+        onx = self.call_optimizer(model)
+        self.assertEqual(len(onx.graph.node), 2)
+        self.assertEqual(onx.graph.node[0].op_type, "Split")
+
+    def test_exc(self):
+        self.assertRaise(lambda: GraphBuilder([]), NotImplementedError)
+
+    def test_simple(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_simple_big(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (30, 40)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (30, 1))
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_constant_folding(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.constant_folding()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Transpose", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_constant_folding2(self):
+        g = GraphBuilder(
+            optimization_options=OptimizationOptions(constant_folding=True)
+        )
+
+        shape = (10, 4)
+        w = np.random.randn(*shape).astype(np.float32)
+        x = g.make_tensor_input("X", np.float32, shape)
+        weight = g.make_initializer(w)
+        cst = g.get_constant(weight)
+        self.assertEqualArray(w, cst)
+        one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+        transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+        res = g.op.MatMul(x, transposed)
+        g.op.Reshape(res, one, outputs="y")
+        g.make_tensor_output("y", np.float32, (10, 1))
+
+        g.optimize()
+
+        onx = g.to_onnx()
+        node_types = [n.op_type for n in onx.graph.node]
+        self.assertNotIn("Transpose", node_types)
+        ref = ReferenceEvaluator(onx)
+        x = np.random.randn(*shape).astype(np.float32)
+        expected = (x @ w.T).reshape((-1, 1))
+        feeds = {"X": x}
+        got = ref.run(None, feeds)
+        self.assertEqualArray(expected, got[0])
+
+    def test_remove_identity(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.Identity(g.op.MatMul(x, transposed))
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Identity", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_remove_identity_input(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            x = g.op.Identity(x)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Identity", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_remove_identity_output(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            r = g.op.Reshape(res, one)
+            g.op.Identity(r, outputs=["y"])
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Identity", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_remove_unused_nodes_simple(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            cst = g.make_initializer(np.array([2], dtype=np.float32))
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Add(res, cst)
+            g.op.Reshape(res, one, outputs=["y"])
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Add", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_constant_array(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            res = g.op.MatMul(x, w.T)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_constant_array_2(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            opc = g.op.Constant(value=from_array_extended(w.T))
+            res = g.op.MatMul(x, opc)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+            self.assertTrue(g.has_shape("X"))
+            self.assertTrue(g.has_type("X"))
+            self.assertEqual(g.get_type("X"), 1)
+            self.assertEqual(g.get_shape("X"), (10, 4))
+            self.assertEqual(g.rank("X"), 2)
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_get_type(self):
+        g = GraphBuilder()
+        self.assertEqual(g._get_type(np.float32), onnx.TensorProto.FLOAT)
+        self.assertEqual(g._get_type(np.int64), onnx.TensorProto.INT64)
+        self.assertEqual(g._get_type(None), onnx.TensorProto.UNDEFINED)
+
+    def test_make_nodes_prefix(self):
+        g1 = GraphBuilder()
+        g1.make_tensor_input("X", np.float32, shape=None)
+        g1.op.Add("X", np.array([1], dtype=np.float32), outputs=["y"])
+        g1.make_tensor_output("y", np.float32, shape=None)
+
+        g = GraphBuilder()
+
+        shape = (10, 4)
+        w = np.random.randn(*shape).astype(np.float32)
+
+        x = g.make_tensor_input("X", np.float32, shape)
+        weight = g.make_initializer(w)
+        one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+        transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+        res = g.op.MatMul(x, transposed)
+        res2 = g.make_nodes(g1, [res], ["k"], prefix="J")
+        g.op.Reshape(res2, one, outputs="y")
+        g.make_tensor_output("y", np.float32, (10, 1))
+        onx = g.to_onnx()
+        ref = ReferenceEvaluator(onx)
+        x = np.random.randn(*shape).astype(np.float32)
+        expected = (x @ w.T).reshape((-1, 1)) + 1
+        feeds = {"X": x}
+        got = ref.run(None, feeds)
+        self.assertEqualArray(expected, got[0])
+
+    def test_make_nodes_noprefix(self):
+        g1 = GraphBuilder()
+        g1.make_tensor_input("X", np.float32, shape=None)
+        g1.op.Add("X", np.array([1], dtype=np.float32), outputs=["y"])
+        g1.make_tensor_output("y", np.float32, shape=None)
+
+        g = GraphBuilder()
+
+        shape = (10, 4)
+        w = np.random.randn(*shape).astype(np.float32)
+
+        x = g.make_tensor_input("X", np.float32, shape)
+        weight = g.make_initializer(w)
+        one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+        transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+        res = g.op.MatMul(x, transposed)
+        res2 = g.make_nodes(g1, [res], ["k"])
+        g.op.Reshape(res2, one, outputs="y")
+        g.make_tensor_output("y", np.float32, (10, 1))
+        onx = g.to_onnx()
+        ref = ReferenceEvaluator(onx)
+        x = np.random.randn(*shape).astype(np.float32)
+        expected = (x @ w.T).reshape((-1, 1)) + 1
+        feeds = {"X": x}
+        got = ref.run(None, feeds)
+        self.assertEqualArray(expected, got[0])
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_graph_api/test_graph_builder_optim.py b/_unittests/ut_graph_api/test_graph_builder_optim.py
new file mode 100644
index 0000000..5ec827d
--- /dev/null
+++ b/_unittests/ut_graph_api/test_graph_builder_optim.py
@@ -0,0 +1,38 @@
+import os
+import unittest
+import onnx
+from onnx.inliner import inline_local_functions
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.graph_api.graph_builder import GraphBuilder
+
+
+class TestGraphBuilderOptim(ExtTestCase):
+    def test_wcheck_afiles(self):
+        import onnxruntime
+
+        data = os.path.join(os.path.dirname(__file__), "data")
+        filename = [f for f in os.listdir(data) if f.endswith(".onnx")]
+        for f in filename:
+            with self.subTest(f=f):
+                onx = onnx.load(os.path.join(data, f))
+                sess = onnxruntime.InferenceSession(
+                    os.path.join(data, f), providers=["CPUExecutionProvider"]
+                )
+                assert sess
+                onxi = inline_local_functions(onx)
+                sess = onnxruntime.InferenceSession(
+                    onxi.SerializeToString(), providers=["CPUExecutionProvider"]
+                )
+                assert sess
+                g = GraphBuilder(onxi)
+                g.optimize(check_order=True)
+                g.check_order()
+                onx2 = g.to_onnx()
+                sess2 = onnxruntime.InferenceSession(
+                    onx2.SerializeToString(), providers=["CPUExecutionProvider"]
+                )
+                assert sess2
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnx_array_api/graph_api/__init__.py b/onnx_array_api/graph_api/__init__.py
new file mode 100644
index 0000000..ea89a2e
--- /dev/null
+++ b/onnx_array_api/graph_api/__init__.py
@@ -0,0 +1 @@
+from .graph_builder import GraphBuilder
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
new file mode 100644
index 0000000..b92d96b
--- /dev/null
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -0,0 +1,840 @@
+import sys
+from functools import partial
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
+import numpy as np
+from onnx.defs import onnx_opset_version
+import onnx.helper as oh
+import onnx.numpy_helper as onh
+from onnx import (
+    AttributeProto,
+    FunctionProto,
+    GraphProto,
+    ModelProto,
+    NodeProto,
+    TensorProto,
+)
+from onnx.reference import ReferenceEvaluator
+
+T = "TENSOR"
+
+
+class OptimizationOptions:
+    def __init__(
+        self,
+        remove_unused: bool = True,
+        constant_folding: bool = False,
+        constant_size: int = 1024,
+    ):
+        self.remove_unused = remove_unused
+        self.constant_folding = constant_folding
+        self.constant_size = constant_size
+
+
+class Opset:
+    # defined for opset >= 18
+    # name: number of expected outputs
+    _implemented = {
+        "Add": 1,
+        "And": 1,
+        "Cast": 1,
+        "Concat": 1,
+        "Constant": 1,
+        "Div": 1,
+        "Exp": 1,
+        "Expand": 1,
+        "GatherElements": 1,
+        "Gemm": 1,
+        "Identity": 1,
+        "MatMul": 1,
+        "MaxPool": 2,
+        "Mul": 1,
+        "Log": 1,
+        "Or": 1,
+        "Relu": 1,
+        "Reshape": 1,
+        "Shape": 1,
+        "Slice": 1,
+        "Squeeze": 1,
+        "Sub": 1,
+        "Transpose": 1,
+        "Unsqueeze": 1,
+    }
+
+    def __init__(self, builder: "GraphBuilder", opset: int):
+        self.opset = opset
+        self.builder = builder
+
+    def __getattr__(self, name):
+        if name in self._implemented:
+            return partial(self.make_node, name)
+        try:
+            return super().__getattr__(name)
+        except AttributeError as e:
+            raise AttributeError(f"Unable to access attribute {name!r}.") from e
+
+    def make_node(
+        self,
+        op_type: str,
+        *inputs: Optional[Union[str, List[str]]],
+        outputs: Optional[Union[int, List[str], str]] = None,
+        domain: str = "",
+        **kwargs,
+    ):
+        if outputs is None:
+            outputs = self._implemented[op_type]
+        if inputs is None:
+            inputs = []
+        new_inputs = []
+        for i in inputs:
+            if not isinstance(i, str):
+                name = self.builder.unique_name("cst")
+                self.builder.make_initializer(i, name=name, exists=True)
+                new_inputs.append(name)
+            else:
+                new_inputs.append(i)
+
+        return self.builder.make_node(
+            op_type, new_inputs, outputs=outputs, domain=domain, **kwargs
+        )
+
+
+class GraphBuilder:
+    def __init__(
+        self,
+        target_opset_or_existing_proto: Optional[
+            Union[int, Dict[str, int], ModelProto, FunctionProto]
+        ] = None,
+        input_names: Optional[Sequence[str]] = None,
+        as_function: bool = False,
+        optimization_options: Optional[OptimizationOptions] = None,
+        args: Optional[List[Any]] = None,
+        verbose: int = 0,
+    ):
+        self.optimization_options = optimization_options or OptimizationOptions()
+        self.as_function = as_function
+        self.input_args = args
+        self.verbose = verbose
+
+        if target_opset_or_existing_proto is None:
+            target_opset_or_existing_proto = onnx_opset_version() - 1
+        if isinstance(target_opset_or_existing_proto, (int, dict)):
+            self.opsets = (
+                {"": target_opset_or_existing_proto}
+                if isinstance(target_opset_or_existing_proto, int)
+                else target_opset_or_existing_proto
+            )
+            self.nodes = []
+            self.initializers_dict = {}
+            self.inputs = []
+            self.outputs = []
+            self._unique_names = set()
+            self.input_names = input_names or []
+            self.current_input = 0
+            self._known_shapes = {}
+            self._known_types = {}
+            self.constants_ = {}
+        elif isinstance(target_opset_or_existing_proto, ModelProto):
+            assert (
+                not input_names
+            ), "input_names must be empty if the input is an existing model."
+            proto = target_opset_or_existing_proto
+            self.opsets = {d.domain: d.version for d in proto.opset_import}
+            self.nodes = list(proto.graph.node)
+            self.initializers_dict = {i.name: i for i in proto.graph.initializer}
+            self.initializers_dict.update(
+                {i.name: i for i in proto.graph.sparse_initializer}
+            )
+            self.inputs = list(proto.graph.input)
+            self.outputs = list(proto.graph.output)
+            self.input_names = [i.name for i in proto.graph.input]
+            self.current_input = len(self.inputs)
+            # This should be improve.
+            self._known_shapes = {}
+            self._known_types = {}
+            self.constants_ = {}
+            for k, v in self.initializers_dict.items():
+                self.constants_[k] = None
+                self.set_shape(k, self._get_tensor_shape(v))
+                self.set_type(k, self._get_tensor_type(v))
+            for node in self.nodes:
+                if node.op_type == "Constant":
+                    self.constants_[node.output[0]] = node
+                    self.set_shape(node.output[0], self._get_tensor_shape(node))
+                    self.set_type(node.output[0], self._get_tensor_type(node))
+        else:
+            raise NotImplementedError(
+                f"{type(target_opset_or_existing_proto)} is not supported."
+            )
+
+        self.op = Opset(self, self.opsets[""])
+        self._cache_array = []
+
+    def _get_tensor_shape(
+        self, proto: Union[NodeProto, TensorProto]
+    ) -> Tuple[int, ...]:
+        if isinstance(proto, TensorProto):
+            return tuple(proto.dims)
+        if isinstance(proto, NodeProto):
+            for att in proto.attribute:
+                if att.name == "value_float":
+                    return tuple()
+                if att.name == "value_int":
+                    return tuple()
+                if att.name == "value_floats":
+                    return tuple(att.floats)
+                if att.name == "value_ints":
+                    return (len(att.ints),)
+                if att.name == "value":
+                    t = onh.to_array(att.t)
+                    return t.shape
+        raise TypeError(
+            f"Unexpected or unsupported scenario type {type(proto)}: {proto}."
+        )
+
+    def _get_tensor_type(self, proto: Union[NodeProto, TensorProto]) -> int:
+        if isinstance(proto, TensorProto):
+            return proto.data_type
+        if isinstance(proto, NodeProto):
+            for att in proto.attribute:
+                if att.name == "value_float":
+                    return TensorProto.FLOAT
+                if att.name == "value_int":
+                    return TensorProto.INT64
+                if att.name == "value_floats":
+                    return TensorProto.FLOAT
+                if att.name == "value_ints":
+                    return TensorProto.INT64
+                if att.name == "value":
+                    t = onh.to_array(att.t)
+                    return oh.np_dtype_to_tensor_dtype(t.dtype)
+        raise ValueError(f"Unexpected type or value {type(proto)}: {proto}.")
+
+    def is_constant(self, name: str) -> bool:
+        """Tells if a result is a constant."""
+        return name in self.constants_
+
+    def get_constant(self, name: str) -> np.ndarray:
+        assert self.is_constant(name), f"Result {name!r} is not a constant."
+        assert (
+            name in self.initializers_dict
+        ), f"Result {name!r} was never evaluated within method 'constant_folding'."
+        value = self.initializers_dict[name]
+        if isinstance(value, np.ndarray):
+            return value
+
+        raise TypeError(f"Unable to convert type {type(value)} into numpy array.")
+
+    def set_shape(self, name: str, shape: Tuple[int, ...]):
+        assert isinstance(
+            name, str
+        ), f"Unexpected type {type(name)} for name, it should be a string."
+        if name in self._known_shapes:
+            assert shape == self._known_shapes[name], (
+                f"Name {name!r} already exists and it is different "
+                f"{self._known_shapes[name]} != {shape}"
+            )
+            return
+        assert isinstance(
+            shape, tuple
+        ), f"Unexpected shape type {type(shape)}, it should be a tuple."
+        self._known_shapes[name] = shape
+
+    def set_type(self, name: str, dtype: int):
+        assert isinstance(name, str), f"Unexpected type {type(name)} for name."
+        int_type = dtype if isinstance(dtype, int) else self._get_type(dtype)
+        if name in self._known_types:
+            assert int_type == self._known_types[name], (
+                f"Name {name!r} already exists and it is different "
+                f"{self._known_types[name]} != {int_type}."
+            )
+        self._known_types[name] = int_type
+
+    def rank(self, name: str) -> int:
+        return len(self.get_shape(name))
+
+    def has_shape(self, name: str) -> bool:
+        return name in self._known_shapes
+
+    def get_shape(self, name: str) -> int:
+        assert name in self._known_shapes, (
+            f"Shape is unknown for result {name!r}, "
+            f"known_shapes={self._known_shapes}."
+        )
+        return self._known_shapes[name]
+
+    def has_type(self, name: str) -> bool:
+        return name in self._known_types
+
+    def get_type(self, name: str) -> int:
+        assert name in self._known_types, (
+            f"Type is unknown for result {name!r}, " f"known_types={self._known_types}."
+        )
+        return self._known_types[name]
+
+    def unique_name(self, prefix: str) -> str:
+        if prefix in self._unique_names:
+            i = 2
+            sug = f"{prefix}2"
+            while sug in self._unique_names:
+                i += 1
+                sug = f"{prefix}{i}"
+            self._unique_names.add(sug)
+            return sug
+        self._unique_names.add(prefix)
+        return prefix
+
+    def _prepare_inputs(self, schema: Optional[Any], *inputs: List[Any]) -> List[str]:
+        input_names = []
+        for i in inputs:
+            self.make_input(i.name, i.dtype, i.shape)
+            input_names.append(i.name)
+        return input_names
+
+    def _get_type(self, elem_type: Any, exc: bool = True) -> int:
+        if not isinstance(elem_type, int):
+            st = str(elem_type)
+            if "float32" in st:
+                elem_type = TensorProto.FLOAT
+            elif "int64" in st:
+                elem_type = TensorProto.INT64
+            elif elem_type is None:
+                elem_type = TensorProto.UNDEFINED
+            elif exc:
+                raise ValueError(f"Unable to interpret elem_type {elem_type!r}.")
+        return elem_type
+
+    def make_initializer(
+        self, value: Any, name: str = "", external: bool = False, exists: bool = False
+    ) -> str:
+        if external:
+            raise NotImplementedError("External initializers are not implemented yet.")
+        if name == "":
+            if exists:
+                raise ValueError("Undefined name cannot exist.")
+            name = self.unique_name("cst")
+        elif not exists:
+            if name in self._unique_names:
+                raise ValueError(f"{name!r} is already assigned.")
+            self._unique_names.add(name)
+        self.set_shape(name, value.shape)
+        self.set_type(name, self._get_type(value.dtype))
+        self.initializers_dict[name] = value
+        self.constants_[name] = None
+        if self.verbose and np.prod(value.shape) > 100:
+            print(
+                f"[GraphBuilder] make_initializer:{name}[{value.dtype}:{value.shape}]"
+            )
+        return name
+
+    def make_tensor_input(
+        self, name: str, elem_type: Any, shape: Tuple[int, ...]
+    ) -> str:
+        if self.current_input < len(self.input_names):
+            # The input needs to be renamed, an identity node is added.
+            input_name = self.input_names[self.current_input]
+            self.make_node("Identity", [input_name], [name])
+        else:
+            self.input_names.append(name)
+            input_name = name
+            if name in self._unique_names:
+                raise ValueError(f"{name!r} is already assigned.")
+            self._unique_names.add(name)
+        self.current_input += 1
+        elem_type = self._get_type(elem_type)
+        self.inputs.append(oh.make_tensor_value_info(input_name, elem_type, shape))
+        if self.verbose:
+            print(f"[GraphBuilder] make_tensor_input:{name}[{elem_type}:{shape}]")
+        if shape:
+            self.set_shape(name, shape)
+        if elem_type:
+            self.set_type(name, elem_type)
+        return name
+
+    def make_tensor_output(
+        self,
+        name: Union[str, List[str]],
+        elem_type: Optional[int] = None,
+        shape: Optional[Tuple[int, ...]] = None,
+    ) -> Union[str, List[str]]:
+        if isinstance(name, list):
+            res = []
+            for n in name:
+                res.append(self.make_tensor_output(n, elem_type, shape))
+            return res
+
+        elem_type = self._get_type(elem_type, False)
+        assert (
+            self.as_function or elem_type != 0
+        ), f"Undefined element type for {name!r}."
+        self.outputs.append(oh.make_tensor_value_info(name, elem_type, shape))
+        if self.verbose:
+            print(f"[GraphBuilder] make_tensor_output:{name}[{elem_type}:{shape}]")
+        if shape:
+            self.set_shape(name, shape)
+        if elem_type:
+            self.set_type(name, elem_type)
+        return name
+
+    def make_node(
+        self,
+        op_type: str,
+        inputs: Union[str, List[str]],
+        outputs: Union[int, List[str], str] = 1,
+        domain: str = "",
+        attributes: Optional[List[AttributeProto]] = None,
+        **kwargs,
+    ) -> Union[str, List[str]]:
+        assert (
+            not kwargs or not attributes
+        ), f"Only attributes or kwargs can be filled for node {op_type!r}."
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        if isinstance(outputs, int):
+            assert outputs > 0, f"outputs={outputs} must be > 0."
+            lower = op_type.lower()
+            output_names = [
+                self.unique_name(f"_onx_{lower}{i}") for i in range(outputs)
+            ]
+        elif isinstance(outputs, str):
+            output_names = [outputs]
+        else:
+            output_names = outputs
+        if isinstance(inputs, str):
+            inputs = [inputs]
+
+        # next
+        try:
+            node = oh.make_node(op_type, inputs, output_names, domain=domain, **kwargs)
+        except TypeError as e:
+            raise TypeError(
+                f"A node {op_type!r} cannot be created with "
+                f"inputs={inputs} (types={[type(i) for i in inputs]}), "
+                f"outputs={outputs} "
+                f"(types={[type(o) for o in outputs] if isinstance(outputs, (tuple, list)) else outputs}), "
+                f"domain={domain!r}, kwargs={kwargs}."
+            ) from e
+        if attributes:
+            node.attribute.extend(attributes)
+
+        # constant handling, shape, type
+        if node.op_type == "Constant":
+            size = len(node.SerializeToString())
+            assert size < self.optimization_options.constant_size, (
+                f"A node Constant holds a tensor bigger than "
+                f"the constant: {size} >= {self.constant_size}."
+            )
+            k = node.output[0]
+            self.constants_[k] = node
+            shape = self._get_tensor_shape(node)
+            dtype = self._get_tensor_type(node)
+            self.set_shape(k, shape)
+            self.set_type(k, dtype)
+            if self.verbose and np.prod(shape) > 100:
+                print(f"[GraphBuilder] make_constant:{k}[{dtype}:{shape}]")
+        elif node.op_type == "Identity":
+            if node.input[0] in self._known_shapes:
+                self.set_shape(node.output[0], self._known_shapes[node.input[0]])
+            if node.input[0] in self._known_types:
+                self.set_type(node.output[0], self._known_types[node.input[0]])
+            if self.is_constant(node.input[0]):
+                self.constants_[node.output[0]] = node
+        else:
+            if all(map(self.is_constant, node.input)):
+                for o in node.output:
+                    self.constants_[o] = node
+
+        # add the node
+        self.nodes.append(node)
+        if len(output_names) == 1:
+            return output_names[0]
+        return output_names
+
+    def make_nodes(
+        self,
+        builder: "GraphBuilder",
+        input_names: List[str],
+        output_names: List[str],
+        prefix: str = "",
+    ) -> Union[str, List[str]]:
+        """
+        Appends all nodes and initializers from another builder.
+        Handles the renaming of results.
+        The content stored in 'builder' is modified inplace to avoid copying.
+
+        :param builder: other builder
+        :param input_names: input names
+        :param output_names: output names
+        :param prefix: prefix all name from this builder
+        :return: output names
+        """
+        renaming = {}
+        for init, value in builder.initializers_dict.items():
+            name = self.unique_name(f"{prefix}{init}")
+            renaming[init] = name
+            if isinstance(value, TensorProto):
+                value.name = name
+            self.initializers_dict[name] = value
+
+            self.constants_[name] = None
+            self.set_shape(name, builder._known_shapes[init])
+            self.set_type(name, builder._known_types[init])
+
+        assert len(input_names) == len(builder.inputs), (
+            f"Inconsistency between input_names={input_names} "
+            f"and the other builder inputs={builder.inputs}."
+        )
+
+        for name, inp in zip(input_names, builder.inputs):
+            new_name = self.unique_name(f"{prefix}{inp.name}")
+            renaming[inp.name] = new_name
+            if builder.has_shape(inp.name):
+                self.set_shape(new_name, builder.get_shape(inp.name))
+            if builder.has_type(inp.name):
+                self.set_type(new_name, builder.get_type(inp.name))
+            self.make_node("Identity", [name], [new_name])
+
+        for node in builder.nodes:
+            new_inputs = [renaming[i] for i in node.input]
+            new_outputs = [self.unique_name(f"{prefix}{o}") for o in node.output]
+            for o, no in zip(node.output, new_outputs):
+                renaming[o] = no
+            self.make_node(
+                node.op_type,
+                new_inputs,
+                new_outputs,
+                domain=node.domain,
+                attributes=node.attribute,
+            )
+            for o, no in zip(node.output, new_outputs):
+                if builder.has_shape(o):
+                    self.set_shape(no, builder.get_shape(o))
+                if builder.has_type(o):
+                    self.set_type(no, builder.get_type(o))
+
+        assert len(output_names) == len(builder.outputs), (
+            f"Inconsistency between output_names={output_names} and "
+            f"outputs={builder.outputs}, renaming={renaming}."
+        )
+        for name, out in zip(output_names, builder.outputs):
+            self.make_node("Identity", [renaming[out.name]], [name])
+
+        # opsets and domains
+        for o, v in builder.opsets.items():
+            if o in self.opsets:
+                assert self.opsets[o] == builder.opsets[o], (
+                    f"Opset mismatch for domain {o!r}, "
+                    f"{self.opsets[o]} != {builder.opsets[o]}."
+                )
+                continue
+            self.opsets[o] = v
+
+        if len(output_names) == 1:
+            return output_names[0]
+        return output_names
+
+    def from_array(self, arr: T, name: str = None) -> TensorProto:  # noqa: F821
+        if isinstance(arr, np.ndarray):
+            return self.from_np_array(arr, name)
+        raise NotImplementedError(
+            f"{type(arr)} is not supported yet but initializer {name or ''!r} is."
+        )
+
+    def from_np_array(self, arr: np.ndarray, name: str = None) -> TensorProto:
+        arr_cpu = np.ascontiguousarray(arr) if not arr.flags["C_CONTIGUOUS"] else arr
+        if arr_cpu.ctypes.data == arr.ctypes.data:
+            if sys.byteorder == "big":
+                arr_cpu = arr_cpu.copy()
+                np.byteswap(
+                    np.frombuffer(arr_cpu.ctypes.data, dtype=arr_cpu.dtype),
+                    inplace=True,
+                )
+        else:
+            if sys.byteorder == "big":
+                np.byteswap(
+                    np.frombuffer(arr_cpu.ctypes.data, dtype=arr_cpu.dtype),
+                    inplace=True,
+                )
+        # let's the tensor until the builder is released
+        # so the pointer does not disappear
+        self._cache_array.append(arr_cpu)
+
+        tensor = TensorProto()
+        tensor.dims.extend(arr_cpu.shape)
+        tensor.name = name
+        tensor.data_type = self._get_type(arr_cpu.dtype)
+        # this does not work...
+        # tensor.raw_data = arr_cpu.ctypes.data
+        tensor.raw_data = arr_cpu.tobytes()
+        if self.verbose and np.prod(arr_cpu.shape) > 100:
+            print(
+                f"[GraphBuilder] from_array:{tensor.data_type}[{arr_cpu.shape}]:"
+                f"{'swapped' if sys.byteorder == 'big' else ''}"
+            )
+        return tensor
+
+    def _build_initializers(self) -> List[TensorProto]:
+        res = []
+        for k, v in sorted(self.initializers_dict.items()):
+            if isinstance(v, np.ndarray):
+                if np.prod(v.shape) > 100:
+                    if self.verbose:
+                        print(f"[GraphBuilder] from_array:{k}:{v.dtype}[{v.shape}]")
+                    t = self.from_array(v, name=k)
+                else:
+                    t = onh.from_array(v, name=k)
+                res.append(t)
+                continue
+            raise TypeError(
+                f"Unable to convert initializer {k!r} with type "
+                f"{type(v)} into a TensorProto."
+            )
+        return res
+
+    def process(
+        self,
+        graph_module: Any,
+        interpreter: "Interpreter",  # noqa: F821
+    ):
+        for node in graph_module.graph.nodes:
+            interpreter.run_node(node)
+
+    def to_onnx(
+        self, as_function: bool = False, optimize: bool = True
+    ) -> Union[FunctionProto, ModelProto]:
+        if optimize:
+            self.optimize()
+        if as_function:
+            raise NotImplementedError("Export as FunctionProto is not implemented yet.")
+        dense = self._build_initializers()
+        opsets = [oh.make_opsetid(*o) for o in self.opsets.items()]
+        if as_function:
+            return oh.make_function(
+                self.nodes,
+                self.name,
+                [i.name for i in self.inputs],
+                [o.name for o in self.outputs],
+                domain=self.domain,
+            )
+
+        if self.verbose:
+            print("[GraphBuilder] onh.make_graph")
+        graph = oh.make_graph(
+            self.nodes, "experiment", self.inputs, self.outputs, dense
+        )
+        if self.verbose:
+            print("[GraphBuilder] onh.make_model")
+        model = oh.make_model(graph, opset_imports=opsets)
+        return model
+
+    def _check_order_node(self, ind: int, node: NodeProto, existing: Set[str]):
+        for i in node.input:
+            assert i in existing, (
+                f"Unknown input {i!r} from node {ind}:{node.op_type}:{node.name}. "
+                f"Known: {existing}."
+            )
+        for att in node.attribute:
+            if att.type == AttributeProto.GRAPH and att.g:
+                g_existing = existing.copy()
+                for i in att.g.input:
+                    g_existing.add(i.name)
+                for ind2, node2 in enumerate(att.g.node):
+                    self._check_order_node((ind, ind2), node2, g_existing)
+                for o in att.g.output:
+                    assert (
+                        o.name in g_existing
+                    ), f"Unknown output {o.name!r}. Known: {g_existing}."
+        for o in node.output:
+            existing.add(o)
+
+    def check_order(self):
+        existing = set(self.initializers_dict)
+        for i in self.inputs:
+            existing.add(i.name)
+        for ind, node in enumerate(self.nodes):
+            self._check_order_node(ind, node, existing)
+        for o in self.outputs:
+            assert o.name in existing, f"Unknown output {o.name!r}. Known: {existing}."
+
+    def optimize(self, check_order: bool = False):
+        if check_order:
+            self.check_order()
+        self.remove_identity_nodes()
+        if check_order:
+            self.check_order()
+        if self.optimization_options.remove_unused:
+            self.remove_unused()
+            if check_order:
+                self.check_order()
+        if self.optimization_options.constant_folding:
+            self.constant_folding()
+            if check_order:
+                self.check_order()
+            if self.optimization_options.remove_unused:
+                self.remove_unused()
+                if check_order:
+                    self.check_order()
+
+    def hidden_inputs_graph(self, graph: GraphProto) -> Set[str]:
+        hidden = set()
+        memo = set(i.name for i in graph.initializer)
+        memo |= set(i.name for i in graph.sparse_initializer)
+        for node in graph.node:
+            for i in node.input:
+                if i not in memo:
+                    hidden.add(i)
+            for att in node.attribute:
+                if att.type == AttributeProto.GRAPH and att.g:
+                    hid = self.hidden_inputs_graph(att.g)
+                    less = set(h for h in hid if h not in memo)
+                    hidden |= less
+            memo |= set(node.output)
+        return hidden
+
+    def remove_unused(self):
+        """
+        Simple function to remove unused nodes.
+        It does not look into subgraphs and assumes there is none.
+        Everything is done in one pass.
+        """
+
+        # mark outputs
+        marked = {o.name: set() for o in self.outputs}
+        for node in reversed(self.nodes):
+            used = False
+            for o in node.output:
+                if o in marked:
+                    for i in node.input:
+                        marked[o].add(i)
+                        used = True
+            for att in node.attribute:
+                if att.type == AttributeProto.GRAPH and att.g:
+                    hidden_inputs = self.hidden_inputs_graph(att.g)
+                    for i in hidden_inputs:
+                        marked[i] = set()
+            if used:
+                for i in node.input:
+                    marked[i] = set()
+
+        # removed nodes
+        removed = set()
+        marked_set = set(marked)
+        for ind, node in enumerate(self.nodes):
+            if not (set(node.output) & marked_set):
+                removed.add(ind)
+
+        if self.verbose:
+            for k, v in self.initializers_dict.items():
+                if k not in marked:
+                    v = self.initializers_dict[k]
+                    print(f"[GraphBuilder] remove_initializer:{k}:{v.dtype}[{v.shape}]")
+        self.initializers_dict = {
+            k: v for k, v in self.initializers_dict.items() if k in marked
+        }
+        self.constants_ = {k: v for k, v in self.constants_.items() if k in marked}
+        self.nodes = [node for i, node in enumerate(self.nodes) if i not in removed]
+
+    def _apply_transpose(self, node: NodeProto, feeds: Dict[str, T]) -> T:  # noqa: F821
+        perm = None
+        for att in node.attribute:
+            if att.name == "perm":
+                perm = tuple(att.ints)
+                break
+        assert perm, f"perm not here in node {node}"
+        return [np.transpose(feeds[node.input[0]], perm)]
+
+    def constant_folding(self):
+        """
+        Folds all constants. Constants are marked during the creation of the graph.
+        There is no need to propagate this information.
+        """
+
+        updates = {}
+        node_to_remove = set()
+        for k, v in self.constants_.items():
+            if v is None:
+                # this is an initiliazer
+                continue
+            # a node
+            if all(map(self.is_constant, v.output)):
+                node_to_remove.add(tuple(v.output))
+                # node evaluation
+                if v.op_type == "Transpose":
+                    # bypassing onnx.numpy_helper.from_array, too slow
+                    feeds = {i: self.initializers_dict[i] for i in v.input}
+                    output = self._apply_transpose(v, feeds)
+                else:
+                    ref = ReferenceEvaluator(v)
+                    feeds = {i: self.get_constant(i) for i in v.input}
+                    output = ref.run(None, feeds)
+                for name, value in zip(v.output, output):
+                    updates[name] = None
+                    self.initializers_dict[name] = value
+                    if self.verbose:
+                        print(
+                            f"[GraphBuilder] fold_constant:{v.op_type}:{name}[{value.dtype}:"
+                            f"{value.shape}]:from:{','.join(sorted(feeds))}"
+                        )
+
+        self.constants_.update(updates)
+        new_nodes = []
+        for node in self.nodes:
+            if tuple(node.output) in node_to_remove:
+                continue
+            new_nodes.append(node)
+        self.nodes = new_nodes
+
+    def remove_identity_nodes(self):
+        """
+        Removes identity nodes.
+        """
+        # f<irst pass: detect replacements
+        new_nodes = []
+        input_names = set(i.name for i in self.inputs)
+        output_names = set(i.name for i in self.outputs)
+        replacements = {}
+        for node in self.nodes:
+            if node.op_type != "Identity":
+                new_nodes.append(node)
+                continue
+
+            if node.output[0] not in output_names:
+                old_name, new_name = node.output[0], node.input[0]
+            elif node.input[0] not in input_names:
+                old_name, new_name = node.input[0], node.output[0]
+            else:
+                new_nodes.append(node)
+                continue
+
+            # the new name can be set for replacements as well
+            assert old_name not in replacements
+            if new_name in replacements:
+                new_name = replacements[new_name]
+                assert new_name not in replacements
+            replacements[old_name] = new_name
+
+        # second pass: replacements in initializer
+        for k, v in replacements.items():
+            if k in self.initializers_dict:
+                self.initializers_dict[v] = self.initializers_dict[k]
+                del self.initializers_dict[k]
+                assert self.constants_[v]
+                self.constants_[v] = self.constants_[k]
+                del self.constants_[k]
+
+        # third pass: replacements in node
+        self.nodes = []
+        for node in new_nodes:
+            repo = {o for o in node.output if o in replacements}
+            repi = {o for o in node.input if o in replacements}
+            if repi or repo:
+                new_node = oh.make_node(
+                    node.op_type,
+                    [replacements.get(i, i) for i in node.input],
+                    [replacements.get(i, i) for i in node.output],
+                    domain=node.domain,
+                    name=node.name,
+                )
+                new_node.attribute.extend(node.attribute)
+                self.nodes.append(new_node)
+            else:
+                self.nodes.append(node)
diff --git a/pyproject.toml b/pyproject.toml
index 4101adf..fd94bd3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ max-complexity = 10
 "_doc/examples/plot_first_example.py" = ["E402", "F811"]
 "_doc/examples/plot_onnxruntime.py" = ["E402", "F811"]
 "onnx_array_api/array_api/_onnx_common.py" = ["F821"]
+"onnx_array_api/graph_api/__init__.py" = ["F401"]
 "onnx_array_api/light_api/__init__.py" = ["F401"]
 "onnx_array_api/light_api/_op_var.py" = ["F821"]
 "onnx_array_api/light_api/_op_vars.py" = ["F821"]

From 6718ee8f0cc9f60538080b99c82f29c38b117945 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 26 Dec 2023 22:18:09 +0100
Subject: [PATCH 04/44] Adds graph API to the tutorial (#58)

---
 _doc/tutorial/graph_api.rst               | 59 +++++++++++++++++++
 _doc/tutorial/index.rst                   |  1 +
 _doc/tutorial/onnx_api.rst                | 70 ++++++++++++++++-------
 onnx_array_api/graph_api/graph_builder.py |  2 +
 onnx_array_api/plotting/text_plot.py      | 24 +++-----
 5 files changed, 119 insertions(+), 37 deletions(-)
 create mode 100644 _doc/tutorial/graph_api.rst

diff --git a/_doc/tutorial/graph_api.rst b/_doc/tutorial/graph_api.rst
new file mode 100644
index 0000000..b373cc3
--- /dev/null
+++ b/_doc/tutorial/graph_api.rst
@@ -0,0 +1,59 @@
+.. _l-graph-api:
+
+=================================
+GraphBuilder: common API for ONNX
+=================================
+
+This is a very common way to build ONNX graph. There are some
+annoying steps while building an ONNX graph. The first one is to
+give unique names to every intermediate result in the graph. The second
+is the conversion from numpy arrays to onnx tensors. A *graph builder*,
+here implemented by class
+:class:`GraphBuilder <onnx_array_api.graph_api.GraphBuilder>`
+usually makes these two frequent tasks easier.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.make_node("Sub", ["X", "Y"])  # the name given to the output is given by the class,
+                                         # it ensures the name is unique
+    init = g.make_initializer(np.array([2], dtype=np.int64))  # the class automatically
+                                                              # converts the array to a tensor
+    r2 = g.make_node("Pow", [r1, init])
+    g.make_node("ReduceSum", [r2], outputs=["Z"])  # the output name is given because
+                                                   # the user wants to choose the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+
+    onx = g.to_onnx()  # final conversion to onnx
+
+    print(onnx_simple_text_plot(onx))
+
+A more simple versions of the same code to produce the same graph.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.op.Sub("X", "Y")  # the method name indicates which operator to use,
+                             # this can be used when there is no ambiguity about the
+                             # number of outputs
+    r2 = g.op.Pow(r1, np.array([2], dtype=np.int64))
+    g.op.ReduceSum(r2, outputs=["Z"])  # the still wants the user to specify the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+    
+    onx = g.to_onnx()
+
+    print(onnx_simple_text_plot(onx))
diff --git a/_doc/tutorial/index.rst b/_doc/tutorial/index.rst
index e3ca8d7..f4cce00 100644
--- a/_doc/tutorial/index.rst
+++ b/_doc/tutorial/index.rst
@@ -7,6 +7,7 @@ Tutorial
     :maxdepth: 1
 
     onnx_api
+    graph_api
     light_api
     numpy_api
     benchmarks
diff --git a/_doc/tutorial/onnx_api.rst b/_doc/tutorial/onnx_api.rst
index f27eb05..a4f80be 100644
--- a/_doc/tutorial/onnx_api.rst
+++ b/_doc/tutorial/onnx_api.rst
@@ -584,37 +584,31 @@ The second part modifies it.
 
     onnx.save(gs.export_onnx(graph), "modified.onnx")
 
-numpy API for onnx
-++++++++++++++++++
+Graph Builder API
++++++++++++++++++
 
-See :ref:`l-numpy-api-onnx`. This API was introduced to create graphs
-by using numpy API. If a function is defined only with numpy,
-it should be possible to use the exact same code to create the
-corresponding onnx graph. That's what this API tries to achieve.
-It works with the exception of control flow. In that case, the function
-produces different onnx graphs depending on the execution path.
+See :ref:`l-graph-api`. This API is very similar to what *skl2onnx* implements.
+It is still about adding nodes to a graph but some tasks are automated such as
+naming the results or converting constants to onnx classes.
 
 .. runpython::
     :showcode:
 
     import numpy as np
-    from onnx_array_api.npx import jit_onnx
+    from onnx_array_api.graph_api  import GraphBuilder
     from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
 
-    def l2_loss(x, y):
-        return ((x - y) ** 2).sum(keepdims=1)
-
-    jitted_myloss = jit_onnx(l2_loss)
-    dummy = np.array([0], dtype=np.float32)
-
-    # The function is executed. Only then a onnx graph is created.
-    # One is created depending on the input type.
-    jitted_myloss(dummy, dummy)
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.op.Sub("X", "Y")
+    r2 = g.op.Pow(r1, np.array([2], dtype=np.int64))
+    g.op.ReduceSum(r2, outputs=["Z"])
+    g.make_tensor_output("Z", np.float32, (None, None))
+    
+    onx = g.to_onnx()
 
-    # get_onnx only works if it was executed once or at least with
-    # the same input type
-    model = jitted_myloss.get_onnx()
-    print(onnx_simple_text_plot(model))
+    print(onnx_simple_text_plot(onx))
 
 Light API
 +++++++++
@@ -647,3 +641,35 @@ There is no eager mode.
     )
 
     print(onnx_simple_text_plot(model))
+
+numpy API for onnx
+++++++++++++++++++
+
+See :ref:`l-numpy-api-onnx`. This API was introduced to create graphs
+by using numpy API. If a function is defined only with numpy,
+it should be possible to use the exact same code to create the
+corresponding onnx graph. That's what this API tries to achieve.
+It works with the exception of control flow. In that case, the function
+produces different onnx graphs depending on the execution path.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.npx import jit_onnx
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    def l2_loss(x, y):
+        return ((x - y) ** 2).sum(keepdims=1)
+
+    jitted_myloss = jit_onnx(l2_loss)
+    dummy = np.array([0], dtype=np.float32)
+
+    # The function is executed. Only then a onnx graph is created.
+    # One is created depending on the input type.
+    jitted_myloss(dummy, dummy)
+
+    # get_onnx only works if it was executed once or at least with
+    # the same input type
+    model = jitted_myloss.get_onnx()
+    print(onnx_simple_text_plot(model))
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index b92d96b..0080cff 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -50,7 +50,9 @@ class Opset:
         "Mul": 1,
         "Log": 1,
         "Or": 1,
+        "Pow": 1,
         "Relu": 1,
+        "ReduceSum": 1,
         "Reshape": 1,
         "Shape": 1,
         "Slice": 1,
diff --git a/onnx_array_api/plotting/text_plot.py b/onnx_array_api/plotting/text_plot.py
index 36f9feb..9449acb 100644
--- a/onnx_array_api/plotting/text_plot.py
+++ b/onnx_array_api/plotting/text_plot.py
@@ -184,9 +184,7 @@ def iterate(nodes, node, depth=0, true_false=""):
             rows.extend(r)
         return "\n".join(rows)
 
-    raise NotImplementedError(  # pragma: no cover
-        f"Type {node.op_type!r} cannot be displayed."
-    )
+    raise NotImplementedError(f"Type {node.op_type!r} cannot be displayed.")
 
 
 def _append_succ_pred(
@@ -403,7 +401,7 @@ def _find_sequence(node_name, known, done):
                 )
 
         if not sequences:
-            raise RuntimeError(  # pragma: no cover
+            raise RuntimeError(
                 "Unexpected empty sequence (len(possibles)=%d, "
                 "len(done)=%d, len(nodes)=%d). This is usually due to "
                 "a name used both as result name and node node. "
@@ -434,7 +432,7 @@ def _find_sequence(node_name, known, done):
                         best = k
 
         if best is None:
-            raise RuntimeError(  # pragma: no cover
+            raise RuntimeError(
                 f"Wrong implementation (len(sequence)={len(sequences)})."
             )
         if verbose:
@@ -453,7 +451,7 @@ def _find_sequence(node_name, known, done):
             known |= set(v.output)
 
     if len(new_nodes) != len(nodes):
-        raise RuntimeError(  # pragma: no cover
+        raise RuntimeError(
             "The returned new nodes are different. "
             "len(nodes=%d) != %d=len(new_nodes). done=\n%r"
             "\n%s\n----------\n%s"
@@ -486,7 +484,7 @@ def _find_sequence(node_name, known, done):
     n0s = set(n.name for n in nodes)
     n1s = set(n.name for n in new_nodes)
     if n0s != n1s:
-        raise RuntimeError(  # pragma: no cover
+        raise RuntimeError(
             "The returned new nodes are different.\n"
             "%r !=\n%r\ndone=\n%r"
             "\n----------\n%s\n----------\n%s"
@@ -758,7 +756,7 @@ def str_node(indent, node):
                     try:
                         val = str(to_array(att.t).tolist())
                     except TypeError as e:
-                        raise TypeError(  # pragma: no cover
+                        raise TypeError(
                             "Unable to display tensor type %r.\n%s"
                             % (att.type, str(att))
                         ) from e
@@ -853,9 +851,7 @@ def str_node(indent, node):
             if isinstance(att, str):
                 rows.append(f"attribute: {att!r}")
             else:
-                raise NotImplementedError(  # pragma: no cover
-                    "Not yet introduced in onnx."
-                )
+                raise NotImplementedError("Not yet introduced in onnx.")
 
     # initializer
     if hasattr(model, "initializer"):
@@ -894,7 +890,7 @@ def str_node(indent, node):
 
     try:
         nodes = reorder_nodes_for_display(model.node, verbose=verbose)
-    except RuntimeError as e:  # pragma: no cover
+    except RuntimeError as e:
         if raise_exc:
             raise e
         else:
@@ -924,9 +920,7 @@ def str_node(indent, node):
                 indent = mi
                 if previous_indent is not None and indent < previous_indent:
                     if verbose:
-                        print(  # pragma: no cover
-                            f"[onnx_simple_text_plot] break2 {node.op_type}"
-                        )
+                        print(f"[onnx_simple_text_plot] break2 {node.op_type}")
                     add_break = True
             if not add_break and previous_out is not None:
                 if not (set(node.input) & previous_out):

From 71aa3a0a1a8bd16e6ae0090bff3f9419a1b55f17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Thu, 4 Jan 2024 20:25:15 +0100
Subject: [PATCH 05/44] Add methods to update nodes in GraphAPI (#59)

* Add methods to update nodes

* update doc
---
 CHANGELOGS.rst                                |   5 +
 _doc/api/graph_api.rst                        |   6 +
 _unittests/ut_graph_api/test_graph_builder.py |  58 +++++++++
 onnx_array_api/graph_api/__init__.py          |   2 +-
 onnx_array_api/graph_api/graph_builder.py     | 121 +++++++++++++++++-
 5 files changed, 188 insertions(+), 4 deletions(-)

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index a5b1577..c3c667d 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -1,6 +1,11 @@
 Change Logs
 ===========
 
+0.2.0
++++++
+
+* :pr:`59`: add methods to update nodes in GraphAPI 
+
 0.1.3
 +++++
 
diff --git a/_doc/api/graph_api.rst b/_doc/api/graph_api.rst
index 2cb5045..f618b7b 100644
--- a/_doc/api/graph_api.rst
+++ b/_doc/api/graph_api.rst
@@ -9,6 +9,12 @@ GraphBuilder
 .. autoclass:: onnx_array_api.graph_api.GraphBuilder
     :members:
 
+NodePattern
+===========
+
+.. autoclass:: onnx_array_api.graph_api.NodePattern
+    :members:
+
 OptimizationOptions
 ===================
 
diff --git a/_unittests/ut_graph_api/test_graph_builder.py b/_unittests/ut_graph_api/test_graph_builder.py
index 3369b2c..33c3155 100644
--- a/_unittests/ut_graph_api/test_graph_builder.py
+++ b/_unittests/ut_graph_api/test_graph_builder.py
@@ -376,6 +376,64 @@ def test_make_nodes_noprefix(self):
         got = ref.run(None, feeds)
         self.assertEqualArray(expected, got[0])
 
+    def test_node_pattern(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, four)
+            }"""
+        )
+        gr = GraphBuilder(model)
+        p = gr.np(index=0)
+        r = repr(p)
+        self.assertEqual("NodePattern(index=0, op_type=None, name=None)", r)
+
+    def test_update_node_attribute(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, four)
+            }"""
+        )
+        gr = GraphBuilder(model)
+        self.assertEqual(len(gr.nodes), 3)
+        m = gr.update_attribute(gr.np(op_type="Constant"), value_float=float(1))
+        self.assertEqual(m, 1)
+        self.assertEqual(len(gr.nodes), 3)
+        onx = gr.to_onnx()
+        self.assertEqual(len(onx.graph.node), 3)
+        node = onx.graph.node[0]
+        self.assertIn("f: 1", str(node))
+
+    def test_delete_node_attribute(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, four)
+            }"""
+        )
+        gr = GraphBuilder(model)
+        self.assertEqual(len(gr.nodes), 3)
+        m = gr.update_attribute(
+            gr.np(op_type="Constant"), value_float=gr.DELETE, value_int=1
+        )
+        self.assertEqual(m, 1)
+        self.assertEqual(len(gr.nodes), 3)
+        onx = gr.to_onnx()
+        self.assertEqual(len(onx.graph.node), 3)
+        node = onx.graph.node[0]
+        self.assertNotIn('name: "value_float"', str(node))
+        self.assertIn("i: 1", str(node))
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/graph_api/__init__.py b/onnx_array_api/graph_api/__init__.py
index ea89a2e..15e274e 100644
--- a/onnx_array_api/graph_api/__init__.py
+++ b/onnx_array_api/graph_api/__init__.py
@@ -1 +1 @@
-from .graph_builder import GraphBuilder
+from .graph_builder import GraphBuilder, NodePattern
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index 0080cff..85a838f 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -1,6 +1,6 @@
 import sys
 from functools import partial
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
 import numpy as np
 from onnx.defs import onnx_opset_version
 import onnx.helper as oh
@@ -30,6 +30,51 @@ def __init__(
         self.constant_size = constant_size
 
 
+class NodePattern:
+    """
+    Class defining a matching pattern able to find nodes in a set of nodes.
+    """
+
+    def __init__(
+        self,
+        index: Optional[int] = None,
+        op_type: Optional[str] = None,
+        name: Optional[None] = None,
+    ):
+        self.index = index
+        self.op_type = op_type
+        self.name = name
+
+    def __repr__(self):
+        "usual"
+        args = ["index", "op_type", "name"]
+        sargs = []
+        for a in args:
+            if a:
+                sargs.append(f"{a}={getattr(self, a)!r}")
+        return f"{self.__class__.__name__}({', '.join(sargs)})"
+
+    def find(self, graph: "GraphBuilder") -> Iterator:
+        """
+        Iterates on nodes matching the pattern.
+        """
+        for index, node in enumerate(graph.nodes):
+            if self.match(index, node):
+                yield node
+
+    def match(self, index, node: NodeProto) -> bool:
+        """
+        Tells if a node is matching this pattern.
+        """
+        if self.index is not None and self.index != index:
+            return False
+        if self.op_type is not None and self.op_type != node.op_type:
+            return False
+        if self.name is not None and self.name != node.name:
+            return False
+        return True
+
+
 class Opset:
     # defined for opset >= 18
     # name: number of expected outputs
@@ -168,7 +213,7 @@ def __init__(
                 f"{type(target_opset_or_existing_proto)} is not supported."
             )
 
-        self.op = Opset(self, self.opsets[""])
+        self.op = Opset(self, self.opsets[""]) if "" in self.opsets else None
         self._cache_array = []
 
     def _get_tensor_shape(
@@ -749,7 +794,6 @@ def constant_folding(self):
         Folds all constants. Constants are marked during the creation of the graph.
         There is no need to propagate this information.
         """
-
         updates = {}
         node_to_remove = set()
         for k, v in self.constants_.items():
@@ -840,3 +884,74 @@ def remove_identity_nodes(self):
                 self.nodes.append(new_node)
             else:
                 self.nodes.append(node)
+
+    def np(
+        self,
+        index: Optional[int] = None,
+        op_type: Optional[str] = None,
+        name: Optional[str] = None,
+    ) -> NodePattern:
+        """
+        Returns an instance of :class:`NodePattern
+        <onnx_array_api.graph_api.graph_builder.NodePattern>`.
+        """
+        return NodePattern(index=index, op_type=op_type, name=name)
+
+    def update_attribute(
+        self,
+        pat: NodePattern,
+        recursive: bool = False,
+        **kwargs: Dict[str, Any],
+    ) -> int:
+        """
+        Udates attributes for nodes matching the
+
+        :param pat: returned by method :meth:`GraphBuilder.np`
+        :param recursive: walk through subgraph
+        :param kwargs: attributes to modify
+        :return: number of modified nodes
+        """
+        assert not recursive, "recursive=True is not implemented."
+        modified = 0
+        for node in pat.find(self):
+            up = self.update_node(node, **kwargs)
+            if up:
+                modified += 1
+        return modified
+
+    DELETE = object()
+
+    def update_node(self, node: NodeProto, **kwargs) -> bool:
+        """
+        Updates attributes of a node proto.
+        Returns True if the node was updated.
+        """
+        processed = set()
+        modified = True
+        atts = []
+        for att in node.attribute:
+            if att.name in kwargs:
+                processed.add(att.name)
+                if kwargs[att.name] is GraphBuilder.DELETE:
+                    continue
+                new_att = oh.make_attribute(att.name, kwargs[att.name])
+                assert new_att.type == att.type, (
+                    f"Mismatch value for attribute {att.name!r} has type "
+                    f"{att.type} but the new value leads to "
+                    f"type={new_att.type}."
+                )
+                atts.append(new_att)
+                modified = True
+                continue
+            atts.append(att)
+        for k, v in kwargs.items():
+            if k in processed or v is GraphBuilder.DELETE:
+                continue
+            modified = True
+            new_att = oh.make_attribute(k, v)
+            atts.append(new_att)
+
+        if modified:
+            del node.attribute[:]
+            node.attribute.extend(atts)
+        return modified

From 7895c275e6a3dc4221aa44fc8ec2e6dbe4688f5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 8 Jan 2024 11:32:06 +0100
Subject: [PATCH 06/44] Support translation of local functions (#60)

* add function to translate functions

* doc

* fix translation of local functions

* refactoring

* fix missing import

* verbose

* link
---
 CHANGELOGS.rst                                |   1 +
 _doc/api/light_api.rst                        |  23 ++-
 .../custom_ops_type_inference_fails_0.onnx    | Bin 0 -> 2086 bytes
 .../ut_light_api/test_backend_export.py       |  11 +-
 _unittests/ut_light_api/test_translate.py     |   2 +-
 .../ut_light_api/test_translate_classic.py    | 124 ++++++++++++-
 onnx_array_api/light_api/__init__.py          |   2 +-
 .../light_api/{emitter.py => base_emitter.py} | 166 +++++++-----------
 onnx_array_api/light_api/inner_emitter.py     |  77 +++++++-
 onnx_array_api/light_api/light_emitter.py     | 104 +++++++++++
 onnx_array_api/light_api/make_helper.py       |  65 +++++++
 onnx_array_api/light_api/translate.py         |  64 +++++--
 12 files changed, 492 insertions(+), 147 deletions(-)
 create mode 100644 _unittests/ut_light_api/_data/custom_ops_type_inference_fails_0.onnx
 rename onnx_array_api/light_api/{emitter.py => base_emitter.py} (57%)
 create mode 100644 onnx_array_api/light_api/light_emitter.py
 create mode 100644 onnx_array_api/light_api/make_helper.py

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index c3c667d..39aaea9 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.2.0
 +++++
 
+* :pr:`60`: supports translation of local functions
 * :pr:`59`: add methods to update nodes in GraphAPI 
 
 0.1.3
diff --git a/_doc/api/light_api.rst b/_doc/api/light_api.rst
index 544b35f..15342c1 100644
--- a/_doc/api/light_api.rst
+++ b/_doc/api/light_api.rst
@@ -16,6 +16,13 @@ translate
 
 .. autofunction:: onnx_array_api.light_api.translate
 
+make_helper
++++++++++++
+
+.. autofunction:: onnx_array_api.light_api.make_helper.make_node_extended
+
+.. autofunction:: onnx_array_api.light_api.make_helper.make_ref_attribute
+
 Classes for the Light API
 =========================
 
@@ -68,19 +75,13 @@ Classes for the Translater
 BaseEmitter
 +++++++++++
 
-.. autoclass:: onnx_array_api.light_api.emitter.BaseEmitter
-    :members:
-
-Emitter
-+++++++
-
-.. autoclass:: onnx_array_api.light_api.emitter.Emitter
+.. autoclass:: onnx_array_api.light_api.base_emitter.BaseEmitter
     :members:
 
 EventType
 +++++++++
 
-.. autoclass:: onnx_array_api.light_api.translate.EventType
+.. autoclass:: onnx_array_api.light_api.base_emitter.EventType
     :members:
 
 InnerEmitter
@@ -89,6 +90,12 @@ InnerEmitter
 .. autoclass:: onnx_array_api.light_api.inner_emitter.InnerEmitter
     :members:
 
+LightEmitter
+++++++++++++
+
+.. autoclass:: onnx_array_api.light_api.light_emitter.LightEmitter
+    :members:
+
 Translater
 ++++++++++
 
diff --git a/_unittests/ut_light_api/_data/custom_ops_type_inference_fails_0.onnx b/_unittests/ut_light_api/_data/custom_ops_type_inference_fails_0.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..8116ec338064567cea06fafe45168567813071ed
GIT binary patch
literal 2086
zcmah}|4!pZ5cb+m;ta<T77lUzIJIsMom?q`aH^26sKG!^MUb|HQvp@7eD<ce#CEVZ
zT;L)445)v$QXipzR@J>i-=I<-p}o%9agw8mMoQutf3x%L%zUG1R9cX9>6uiM%wJS!
z0y(DYvEAOFrDHpBoemS`byt71a}_#)?|$8LLhfI)eLrMQY?MLf(fsrckxkl(5MR9z
zfT|Y-jvvAw1k%%><R;u+v)4d^{9u?6zBGsMQ%#uPu^`arwtESWf|vN*=tK_R*nNJm
z-e?+C42gnnkF%qzF4LE`Us?NGEqnXRgRfp4?b_?LhTYg|9oa{X_1X<lj0aHgI3ni>
z@-wDDi=#IO&i7F~FA2va6nX4~$=3U(m73;<Q5g1Z*5@qb%&SH*kGe^f_jnZhculw4
zgBLWtLJ%Zp4)W2qqcGukdqao9rPJv#V@^{lB~y`Q*&2u55EV{gc;T!PgJUQ;(H9RK
ze$eY2vh?61L9!bq%yQiGHKeU}Ssc_!9dA*(tL#*{jw0i+CTnVC+R976r%B&-BG12w
zjYk!d@flQqPyOxlpI@9#iS7ev8lfsSN_xEv<b=+{96Bh-nhY8(_QG@9UKN&wpvsK(
z6#l<dm~Rg57!dS71aSPSD0VgIROGkh&m!DCY&z*1<jR<6iLp$7JfnQd;;0+>A+O;e
znpIl3Mn`+B7mIl>rYb~NCHz<!2Ag1hMzxT=x!t~%JY+#5*gklde~2}Ni|7T&P6E=W
zs>q9JorLfE=gh=2QYLi7P*=rUoJwSh^Y;@GjHg9#A#}oiS1){#G@YhNA+xC}+`7_?
zIHpRCA=n*DHF^gr3o7^9df}Th7Bh1W&=6l*>L)18nCS{mmT5q4Q>EDp^ztF|dM<1A
z0-=+0#=4##B$*PPWqe$!?6B}&<SdB8K;Q)UQdf5TOT3SGriHX+y4t#kx%n|wF1e^z
zE}0o9r;@R;mBZqE1Of%aqSG>(D99v=_0m8cW`dEqmqNn5_5kr<-+9eCyP+F-EH>t0
z;+$P2;`~paC-vz%t<ThFQ9;HEOs1(5yDhd&viZaE@7Ct8l@n|8>EE-B&3Av%#tkW&
zV{>X&VJs6>Mb>*OvjiyyvLa9g1I9Y|WZ(zkr{e>jRj`VEhjBNIrizj){o(stc`p^_
z-YsPv-l4y@<VO|m+cu~<y^!hyrDuLJ9P_+wk8`O^rcxo2m5!iL$10VBwXqI=kV9wx
zqbdaE5w&|F{u${|4sgS(=8O<TDdlG)(U#c5<|5>tTo2x=UVQ0rRD`(<*>wPOQmuo6
zv~kjhuWKNCxZHv@7`~%rToHD*BZ@e$uEUK9P@TR%(8rSK&Im-6esVS%&lM0hI(e*@
zhpQY_Hr(QMS?qCK7-^1-Gg7Dx*cX$I?=lZ>C;rV17&vaRoM`)@)47l56J)|;7zc{s
M$%T|n&0SOSFP?~MApigX

literal 0
HcmV?d00001

diff --git a/_unittests/ut_light_api/test_backend_export.py b/_unittests/ut_light_api/test_backend_export.py
index b0c1cbc..f597d21 100644
--- a/_unittests/ut_light_api/test_backend_export.py
+++ b/_unittests/ut_light_api/test_backend_export.py
@@ -1,3 +1,4 @@
+import sys
 import unittest
 from typing import Any, Dict, List, Optional
 from difflib import unified_diff
@@ -17,12 +18,16 @@
     make_opsetid,
     make_tensor_value_info,
 )
+from onnx.reference.op_run import to_array_extended
 from onnx.numpy_helper import from_array, to_array
 from onnx.backend.base import Device, DeviceType
 from onnx_array_api.reference import ExtendedReferenceEvaluator
+from onnx_array_api.light_api.make_helper import make_node_extended
 from onnx_array_api.light_api import translate
 from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
 
+verbosity = 10 if "-v" in sys.argv or "--verbose" in sys.argv else 0
+
 
 class ReferenceImplementationError(RuntimeError):
     "Fails, export cannot be compared."
@@ -34,7 +39,7 @@ class ExportWrapper:
 
     def __init__(self, model):
         self.model = model
-        self.expected_sess = ExtendedReferenceEvaluator(self.model)
+        self.expected_sess = ExtendedReferenceEvaluator(self.model, verbose=verbosity)
 
     @property
     def input_names(self):
@@ -85,6 +90,7 @@ def run(
             locs = {
                 "np": numpy,
                 "to_array": to_array,
+                "to_array_extended": to_array_extended,
                 "from_array": from_array,
                 "TensorProto": TensorProto,
                 "make_function": make_function,
@@ -92,6 +98,7 @@ def run(
                 "make_model": make_model,
                 "make_graph": make_graph,
                 "make_node": make_node,
+                "make_node_extended": make_node_extended,
                 "make_tensor_value_info": make_tensor_value_info,
             }
             globs = locs.copy()
@@ -105,7 +112,7 @@ def run(
                     f"Unable to executed code for api {api!r}\n{new_code}"
                 ) from e
             export_model = locs["model"]
-            ref = ExtendedReferenceEvaluator(export_model)
+            ref = ExtendedReferenceEvaluator(export_model, verbose=verbosity)
             try:
                 got = ref.run(names, feeds)
             except (TypeError, AttributeError) as e:
diff --git a/_unittests/ut_light_api/test_translate.py b/_unittests/ut_light_api/test_translate.py
index c2b2c70..9974f81 100644
--- a/_unittests/ut_light_api/test_translate.py
+++ b/_unittests/ut_light_api/test_translate.py
@@ -6,7 +6,7 @@
 from onnx.reference import ReferenceEvaluator
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.light_api import start, translate, g
-from onnx_array_api.light_api.emitter import EventType
+from onnx_array_api.light_api.base_emitter import EventType
 
 OPSET_API = min(19, onnx_opset_version() - 1)
 
diff --git a/_unittests/ut_light_api/test_translate_classic.py b/_unittests/ut_light_api/test_translate_classic.py
index cb7d6a4..4d52183 100644
--- a/_unittests/ut_light_api/test_translate_classic.py
+++ b/_unittests/ut_light_api/test_translate_classic.py
@@ -5,6 +5,7 @@
 from onnx import ModelProto, TensorProto, load
 from onnx.defs import onnx_opset_version
 from onnx.reference import ReferenceEvaluator
+from onnx.reference.op_run import OpRun
 from onnx.helper import (
     make_tensor_value_info,
     make_node,
@@ -68,7 +69,7 @@ def test_exp(self):
         functions = []
         inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
         nodes.append(
-            make_node(
+            make_node_extended(
                 'Exp',
                 ['X'],
                 ['Y']
@@ -144,14 +145,14 @@ def test_transpose(self):
             )
             inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
             nodes.append(
-                make_node(
+                make_node_extended(
                     'Reshape',
                     ['X', 'r'],
                     ['r0_0']
                 )
             )
             nodes.append(
-                make_node(
+                make_node_extended(
                     'Transpose',
                     ['r0_0'],
                     ['Y'],
@@ -210,7 +211,7 @@ def test_topk_reverse(self):
             inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
             inputs.append(make_tensor_value_info('K', TensorProto.INT64, shape=[]))
             nodes.append(
-                make_node(
+                make_node_extended(
                     'TopK',
                     ['X', 'K'],
                     ['Values', 'Indices'],
@@ -264,7 +265,6 @@ def test_aionnxml(self):
             .to_onnx()
         )
         code = translate(onx, api="onnx")
-        print(code)
         expected = dedent(
             """
             opset_imports = [
@@ -285,14 +285,14 @@ def test_aionnxml(self):
             )
             inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
             nodes.append(
-                make_node(
+                make_node_extended(
                     'Reshape',
                     ['X', 'r'],
                     ['USE']
                 )
             )
             nodes.append(
-                make_node(
+                make_node_extended(
                     'Normalizer',
                     ['USE'],
                     ['Y'],
@@ -318,7 +318,115 @@ def test_aionnxml(self):
         self.maxDiff = None
         self.assertEqual(expected, code)
 
+    @classmethod
+    def _code_line(cls, code):
+        lines = code.split("\n")
+        return "\n".join(f"{i+1:03d} {line}" for i, line in enumerate(lines))
+
+    @classmethod
+    def _run(cls, code):
+        try:
+            code_compiled = compile(code, "<string>", mode="exec")
+        except Exception as e:
+            raise AssertionError(
+                f"Compilation failed due to {e}\n---\n{cls._code_line(code)}\n---\n{e}"
+            ) from e
+
+        import onnx
+        import onnx.helper
+        import onnx.numpy_helper
+        import onnx_array_api.light_api.make_helper
+        import onnx.reference.custom_element_types
+
+        def from_array_extended(tensor, name=None):
+            dt = tensor.dtype
+            if (
+                dt == onnx.reference.custom_element_types.float8e4m3fn
+                and dt.descr[0][0] == "e4m3fn"
+            ):
+                to = TensorProto.FLOAT8E4M3FN
+                dt_to = np.uint8
+            elif (
+                dt == onnx.reference.custom_element_types.bfloat16
+                and dt.descr[0][0] == "bfloat16"
+            ):
+                to = TensorProto.BFLOAT16
+                dt_to = np.uint16
+            else:
+                return onnx.numpy_helper.from_array(tensor, name)
+
+            t = onnx.numpy_helper.from_array(tensor.astype(dt_to), name)
+            t.data_type = to
+            return t
+
+        globs = onnx.__dict__.copy()
+        globs.update(onnx.helper.__dict__)
+        globs.update(onnx.numpy_helper.__dict__)
+        globs.update(onnx_array_api.light_api.make_helper.__dict__)
+        globs.update(onnx.reference.custom_element_types.__dict__)
+        globs["from_array_extended"] = from_array_extended
+        locs = {}
+        try:
+            exec(code_compiled, globs, locs)
+        except Exception as e:
+            raise AssertionError(
+                f"Execution failed due to {e}\n---\n{cls._code_line(code)}\n---\n{e}"
+            ) from e
+        return globs, locs
+
+    def test_remove_nodes(self):
+        path = os.path.join(
+            os.path.dirname(__file__), "_data", "custom_ops_type_inference_fails_0.onnx"
+        )
+        onx = load(path)
+        code = translate(onx, api="onnx")
+        _, locs = self._run(code)
+        self.assertIn("model", locs)
+        model = locs["model"]
+        x = np.arange(4).reshape((-1, 2)).astype(np.float32)
+        feeds = {"X": x}
+
+        class CustomGemmFloat8E4M3FN(OpRun):
+            op_domain = "onnx_extented.ortops.tutorial.cpu"
+
+            def _run(
+                self,
+                x,
+                y,
+                bias=None,
+                scale_x=None,
+                scale_y=None,
+                scale_z=None,
+                transA=False,
+                transB=False,
+                dtype=None,
+                rowMajor=None,
+                computeType=None,
+            ):
+                if scale_x is not None:
+                    x = x * scale_x
+                if transA:
+                    x = x.T
+                if scale_y is not None:
+                    y = y * scale_y
+                if transB:
+                    y = y.T
+                z = x @ y
+                if bias is not None:
+                    z += bias
+                if scale_z is not None:
+                    z = z / scale_z
+                return (z,)
+
+        ref = ReferenceEvaluator(onx, new_ops=[CustomGemmFloat8E4M3FN])
+        expected = ref.run(None, feeds)[0]
+        ref2 = ReferenceEvaluator(model, new_ops=[CustomGemmFloat8E4M3FN])
+        got = ref2.run(None, feeds)[0]
+        self.assertEqualArray(expected, got)
+
+        # with open("debug_test_remove_nodes.py", "w") as f:
+        #     f.write(code)
+
 
 if __name__ == "__main__":
-    # TestLightApi().test_topk()
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/light_api/__init__.py b/onnx_array_api/light_api/__init__.py
index be6e9dd..558e626 100644
--- a/onnx_array_api/light_api/__init__.py
+++ b/onnx_array_api/light_api/__init__.py
@@ -67,7 +67,7 @@ def translate(proto: ModelProto, single_line: bool = False, api: str = "light")
     :param single_line: as a single line or not
     :param api: API to export into,
         default is `"light"` and this is handle by class
-        :class:`onnx_array_api.light_api.emitter.Emitter`,
+        :class:`onnx_array_api.light_api.light_emitter.LightEmitter`,
         another value is `"onnx"` which is the inner API implemented
         in onnx package.
     :return: code
diff --git a/onnx_array_api/light_api/emitter.py b/onnx_array_api/light_api/base_emitter.py
similarity index 57%
rename from onnx_array_api/light_api/emitter.py
rename to onnx_array_api/light_api/base_emitter.py
index a1b0e40..3a0dfb6 100644
--- a/onnx_array_api/light_api/emitter.py
+++ b/onnx_array_api/light_api/base_emitter.py
@@ -1,9 +1,8 @@
 import inspect
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 from enum import IntEnum
 import numpy as np
 from onnx import AttributeProto
-from .annotations import ELEMENT_TYPE_NAME
 
 
 class EventType(IntEnum):
@@ -11,13 +10,17 @@ class EventType(IntEnum):
     INPUT = 1
     OUTPUT = 2
     NODE = 3
-    TO_ONNX = 4
+    TO_ONNX_MODEL = 4
     BEGIN_GRAPH = 5
     END_GRAPH = 6
     BEGIN_FUNCTION = 7
     END_FUNCTION = 8
     INITIALIZER = 9
     SPARSE_INITIALIZER = 10
+    FUNCTION_INPUT = 11
+    FUNCTION_OUTPUT = 12
+    FUNCTION_ATTRIBUTES = 13
+    TO_ONNX_FUNCTION = 14
 
     @classmethod
     def to_str(cls, self) -> str:
@@ -54,8 +57,11 @@ def __call__(self, event: EventType, **kwargs: Dict[str, Any]) -> List[str]:
         if event == EventType.START:
             return self._emit_start(**kwargs)
 
-        if event == EventType.TO_ONNX:
-            return self._emit_to_onnx(**kwargs)
+        if event == EventType.TO_ONNX_MODEL:
+            return self._emit_to_onnx_model(**kwargs)
+
+        if event == EventType.TO_ONNX_FUNCTION:
+            return self._emit_to_onnx_function(**kwargs)
 
         if event == EventType.BEGIN_GRAPH:
             return self._emit_begin_graph(**kwargs)
@@ -63,6 +69,21 @@ def __call__(self, event: EventType, **kwargs: Dict[str, Any]) -> List[str]:
         if event == EventType.END_GRAPH:
             return self._emit_end_graph(**kwargs)
 
+        if event == EventType.BEGIN_FUNCTION:
+            return self._emit_begin_function(**kwargs)
+
+        if event == EventType.END_FUNCTION:
+            return self._emit_end_function(**kwargs)
+
+        if event == EventType.FUNCTION_INPUT:
+            return self._emit_function_input(**kwargs)
+
+        if event == EventType.FUNCTION_OUTPUT:
+            return self._emit_function_output(**kwargs)
+
+        if event == EventType.FUNCTION_ATTRIBUTES:
+            return self._emit_function_attributes(**kwargs)
+
         raise ValueError(f"Unexpected event {EventType.to_str(event)}.")
 
     def render_attribute_value(self, value: Any) -> Tuple[List[str], str]:
@@ -104,11 +125,27 @@ def render_attribute_value(self, value: Any) -> Tuple[List[str], str]:
             srows = ".".join(rows[:-1])
             return [], f"g().{srows}"
 
+        if isinstance(value, tuple) and len(value) == 2 and value[1] is None:
+            # in a function, an attribute receiving a value from an attribute
+            v = value[0]
+            name = v.name
+            ref = v.ref_attr_name
+            dt = v.type
+            return [], self._make_attribute(name=name, ref_attr_name=ref, attr_type=dt)
+
         raise ValueError(
             f"Unable to render an attribute {type(v)}, "
             f"attribute type={value[0].type}, "
             f"dtype={getattr(v, 'dtype', '-')}, "
-            f"shape={getattr(v, 'shape', '-')}, {value}."
+            f"shape={getattr(v, 'shape', '-')}, type(value)={type(value)}, "
+            f"value={value!r}."
+        )
+
+    def _make_attribute(
+        self, name: str, attr_type: int, ref_attr_name: Optional[str] = None
+    ) -> str:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
         )
 
     def join(self, rows: List[str], single_line: bool = False) -> str:
@@ -121,7 +158,12 @@ def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
             f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
         )
 
-    def _emit_to_onnx(self, **kwargs: Dict[str, Any]) -> List[str]:
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
         raise NotImplementedError(
             f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
         )
@@ -161,100 +203,22 @@ def _emit_sparse_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
             f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
         )
 
+    def _emit_begin_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
 
-class Emitter(BaseEmitter):
-    """
-    Converts event into proper code.
-    """
-
-    def join(self, rows: List[str], single_line: bool = False) -> str:
-        "Join the rows"
-        if single_line:
-            return ".".join(rows)
-        return "".join(["(\n    ", "\n    .".join(rows), "\n)"])
-
-    def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
-        opsets = kwargs.get("opsets", {})
-        opset = opsets.get("", None)
-        if opset is not None:
-            del opsets[""]
-        args = []
-        if opset:
-            args.append(f"opset={opset}")
-        if opsets:
-            args.append(f"opsets={opsets}")
-        return [f"start({', '.join(args)})"]
-
-    def _emit_to_onnx(self, **kwargs: Dict[str, Any]) -> List[str]:
-        return ["to_onnx()"]
-
-    def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
-        return []
-
-    def _emit_end_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
-        return []
-
-    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
-        name = kwargs["name"]
-        value = kwargs["value"]
-        repl = {"bool": "bool_", "object": "object_", "str": "str_"}
-        sdtype = repl.get(str(value.dtype), str(str(value.dtype)))
-        return [
-            f"cst(np.array({value.tolist()}, dtype=np.{sdtype}))",
-            f"rename({name!r})",
-        ]
-
-    def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
-        name = kwargs["name"]
-        elem_type = kwargs.get("elem_type", None)
-        shape = kwargs.get("shape", None)
-        if elem_type and shape:
-            return [
-                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r})"
-            ]
-        if elem_type:
-            return [
-                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]})"
-            ]
-        return [f"vin({name!r})"]
+    def _emit_function_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
 
-    def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
-        inst = []
-        if "name" in kwargs:
-            name = kwargs["name"]
-            inst.append(f"bring({name!r})")
-        elem_type = kwargs.get("elem_type", None)
-        shape = kwargs.get("shape", None)
-        if elem_type and shape:
-            inst.append(
-                f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r})"
-            )
-        elif elem_type:
-            inst.append(f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]})")
-        else:
-            inst.append("vout()")
-        return inst
+    def _emit_function_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
 
-    def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
-        op_type = kwargs["op_type"]
-        inputs = kwargs["inputs"]
-        outputs = kwargs["outputs"]
-        if kwargs.get("domain", "") != "":
-            domain = kwargs["domain"]
-            op_type = f"{domain}.{op_type}"
-        atts = kwargs.get("atts", {})
-        args = []
-        for k, v in atts.items():
-            before, vatt = self.render_attribute_value(v)
-            if before:
-                raise NotImplementedError("Graph attribute not supported yet.")
-            args.append(f"{k}={vatt}")
-
-        str_inputs = ", ".join([f"{i!r}" for i in inputs])
-        inst = [f"bring({str_inputs})", f"{op_type}({', '.join(args)})"]
-        if len(outputs) == 1:
-            inst.append(f"rename({outputs[0]!r})")
-        else:
-            str_outputs = ", ".join([f"{o!r}" for o in outputs])
-            inst.append(f"rename({str_outputs})")
-        return inst
+    def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
diff --git a/onnx_array_api/light_api/inner_emitter.py b/onnx_array_api/light_api/inner_emitter.py
index f5d5e4d..72ee725 100644
--- a/onnx_array_api/light_api/inner_emitter.py
+++ b/onnx_array_api/light_api/inner_emitter.py
@@ -1,7 +1,7 @@
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 from onnx import AttributeProto
 from .annotations import ELEMENT_TYPE_NAME
-from .emitter import BaseEmitter
+from .base_emitter import BaseEmitter
 from .translate import Translater
 
 
@@ -31,6 +31,15 @@ def render_attribute_value(self, value: Any) -> Tuple[List[str], str]:
 
         return super().render_attribute_value(value)
 
+    def _make_attribute(
+        self, name: str, attr_type: int, ref_attr_name: Optional[str] = None
+    ) -> str:
+        if ref_attr_name is None:
+            raise NotImplementedError(
+                f"Cannot create attribute with name={name!r}, attr_type={attr_type}."
+            )
+        return f"make_ref_attribute(key={name!r}, attr_type={attr_type}, ref_attr_name={ref_attr_name!r})"
+
     def join(self, rows: List[str], single_line: bool = False) -> str:
         "Returns the separators. `single_line` is unused."
         return "\n".join(rows)
@@ -43,7 +52,7 @@ def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
         lines.append("]")
         return lines
 
-    def _emit_to_onnx(self, **kwargs: Dict[str, Any]) -> List[str]:
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
         lines = [
             "model = make_model(",
             "    graph,",
@@ -82,11 +91,22 @@ def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
         name = kwargs["name"]
         value = kwargs["value"]
         repl = {"bool": "bool_", "object": "object_", "str": "str_"}
-        sdtype = repl.get(str(value.dtype), str(str(value.dtype)))
+        fra = "from_array"
+        sdtype = repl.get(str(value.dtype), str(value.dtype))
+        if sdtype.startswith("("):
+            from onnx.reference.custom_element_types import float8e4m3fn
+
+            if sdtype == str(float8e4m3fn):
+                sdtype = "float8e4m3fn"
+                fra = "from_array_extended"
+            else:
+                raise NotImplementedError(f"Unexpected dtype={sdtype}.")
+        else:
+            sdtype = f"np.{sdtype}"
         return [
             "initializers.append(",
-            "    from_array(",
-            f"        np.array({value.tolist()}, dtype=np.{sdtype}),",
+            f"    {fra}(",
+            f"        np.array({value.tolist()}, dtype={sdtype}),",
             f"        name={name!r}",
             "    )",
             ")",
@@ -124,7 +144,7 @@ def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
         before_lines = []
         lines = [
             "nodes.append(",
-            "    make_node(",
+            "    make_node_extended(",
             f"        {op_type!r},",
             f"        {inputs},",
             f"        {outputs},",
@@ -140,3 +160,46 @@ def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
         lines[-1] = lines[-1][:-1]
         lines.extend(["    )", ")"])
         return before_lines + lines
+
+    def _emit_begin_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        lines = [
+            "",
+            f"name_f = {kwargs['name']!r}",
+            f"domain_f = {kwargs['domain']!r}",
+            "nodes = []",
+            "inputs = []",
+            "outputs = []",
+            "atts = []",
+        ]
+        return lines
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_function_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return [f"inputs.append({kwargs['name']!r})"]
+
+    def _emit_function_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return [f"outputs.append({kwargs['name']!r})"]
+
+    def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
+        atts = kwargs["attributes"]
+        if isinstance(atts, list) and all(map(lambda t: isinstance(t, str), atts)):
+            return [f"atts.extend({atts!r})"]
+        raise NotImplementedError(f"Unable to process function attributes {atts!r}.")
+
+    def _emit_end_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        lines = [
+            "functions.append(",
+            "    make_function(",
+            "        domain_f, ",
+            "        name_f, ",
+            "        inputs, ",
+            "        outputs, ",
+            "        nodes, ",
+            "        attributes=atts, ",
+            "        opset_imports=opset_imports,",
+            "   )",
+            ")",
+        ]
+        return lines
diff --git a/onnx_array_api/light_api/light_emitter.py b/onnx_array_api/light_api/light_emitter.py
new file mode 100644
index 0000000..c2925b5
--- /dev/null
+++ b/onnx_array_api/light_api/light_emitter.py
@@ -0,0 +1,104 @@
+from typing import Any, Dict, List
+from .annotations import ELEMENT_TYPE_NAME
+from .base_emitter import BaseEmitter
+
+
+class LightEmitter(BaseEmitter):
+    """
+    Converts event into proper code.
+    """
+
+    def join(self, rows: List[str], single_line: bool = False) -> str:
+        "Join the rows"
+        if single_line:
+            return ".".join(rows)
+        return "".join(["(\n    ", "\n    .".join(rows), "\n)"])
+
+    def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
+        opsets = kwargs.get("opsets", {})
+        opset = opsets.get("", None)
+        if opset is not None:
+            del opsets[""]
+        args = []
+        if opset:
+            args.append(f"opset={opset}")
+        if opsets:
+            args.append(f"opsets={opsets}")
+        return [f"start({', '.join(args)})"]
+
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return ["to_onnx()"]
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        value = kwargs["value"]
+        repl = {"bool": "bool_", "object": "object_", "str": "str_"}
+        sdtype = repl.get(str(value.dtype), str(str(value.dtype)))
+        return [
+            f"cst(np.array({value.tolist()}, dtype=np.{sdtype}))",
+            f"rename({name!r})",
+        ]
+
+    def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        elem_type = kwargs.get("elem_type", None)
+        shape = kwargs.get("shape", None)
+        if elem_type and shape:
+            return [
+                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r})"
+            ]
+        if elem_type:
+            return [
+                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]})"
+            ]
+        return [f"vin({name!r})"]
+
+    def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        inst = []
+        if "name" in kwargs:
+            name = kwargs["name"]
+            inst.append(f"bring({name!r})")
+        elem_type = kwargs.get("elem_type", None)
+        shape = kwargs.get("shape", None)
+        if elem_type and shape:
+            inst.append(
+                f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r})"
+            )
+        elif elem_type:
+            inst.append(f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]})")
+        else:
+            inst.append("vout()")
+        return inst
+
+    def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
+        op_type = kwargs["op_type"]
+        inputs = kwargs["inputs"]
+        outputs = kwargs["outputs"]
+        if kwargs.get("domain", "") != "":
+            domain = kwargs["domain"]
+            op_type = f"{domain}.{op_type}"
+        atts = kwargs.get("atts", {})
+        args = []
+        for k, v in atts.items():
+            before, vatt = self.render_attribute_value(v)
+            if before:
+                raise NotImplementedError("Graph attribute not supported yet.")
+            args.append(f"{k}={vatt}")
+
+        str_inputs = ", ".join([f"{i!r}" for i in inputs])
+        inst = [f"bring({str_inputs})", f"{op_type}({', '.join(args)})"]
+        if len(outputs) == 1:
+            inst.append(f"rename({outputs[0]!r})")
+        else:
+            str_outputs = ", ".join([f"{o!r}" for o in outputs])
+            inst.append(f"rename({str_outputs})")
+        return inst
diff --git a/onnx_array_api/light_api/make_helper.py b/onnx_array_api/light_api/make_helper.py
new file mode 100644
index 0000000..8b2703c
--- /dev/null
+++ b/onnx_array_api/light_api/make_helper.py
@@ -0,0 +1,65 @@
+from typing import Any, Optional, Sequence
+from onnx import AttributeProto, NodeProto
+from onnx.helper import make_attribute
+
+
+def make_ref_attribute(
+    key: str, attr_type: int, ref_attr_name: Optional[str] = None
+) -> AttributeProto:
+    """
+    Creates an attribute.
+
+    :param key: atttribute name
+    :param attr_type: attribute type
+    :param ref_attr_name: if not None, link this attribute
+        to a function attribute
+    :return: attribute
+    """
+    att = AttributeProto()
+    att.name = key
+    att.type = attr_type
+    att.ref_attr_name = ref_attr_name
+    return att
+
+
+def make_node_extended(
+    op_type: str,
+    inputs: Sequence[str],
+    outputs: Sequence[str],
+    name: Optional[str] = None,
+    doc_string: Optional[str] = None,
+    domain: Optional[str] = None,
+    **kwargs: Any,
+) -> NodeProto:
+    """
+    Constructs a NodeProto.
+
+    :param op_type: The name of the operator to construct
+    :param inputs: list of input names
+    :param outputs: list of output names
+    :param name: optional unique identifier for NodeProto
+    :param doc_string: optional documentation string for NodeProto
+    :param domain: optional domain for NodeProto.
+        If it's None, we will just use default domain (which is empty)
+    :param kwargs: the attributes of the node.
+    :return: node proto
+    """
+    node = NodeProto()
+    node.op_type = op_type
+    node.input.extend(inputs)
+    node.output.extend(outputs)
+    if name:
+        node.name = name
+    if doc_string:
+        node.doc_string = doc_string
+    if domain is not None:
+        node.domain = domain
+    if kwargs:
+        for key, value in sorted(kwargs.items()):
+            if value is None:
+                continue
+            if isinstance(value, AttributeProto):
+                node.attribute.append(value)
+            else:
+                node.attribute.append(make_attribute(key, value))
+    return node
diff --git a/onnx_array_api/light_api/translate.py b/onnx_array_api/light_api/translate.py
index a61ce24..31c1bce 100644
--- a/onnx_array_api/light_api/translate.py
+++ b/onnx_array_api/light_api/translate.py
@@ -2,7 +2,9 @@
 import numpy as np
 from onnx import AttributeProto, FunctionProto, GraphProto, ModelProto, NodeProto
 from onnx.numpy_helper import to_array
-from .emitter import EventType, Emitter
+from ..reference import to_array_extended
+from .base_emitter import EventType
+from .light_emitter import LightEmitter
 
 
 class Translater:
@@ -13,10 +15,10 @@ class Translater:
     def __init__(
         self,
         proto: Union[ModelProto, FunctionProto, GraphProto],
-        emitter: Optional[Emitter] = None,
+        emitter: Optional[LightEmitter] = None,
     ):
         self.proto_ = proto
-        self.emitter = emitter or Emitter()
+        self.emitter = emitter or LightEmitter()
 
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}(<{type(self.proto_)})"
@@ -30,6 +32,7 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
         :return: list of instructions
         """
         rows = []
+        last_event = None
         if isinstance(self.proto_, ModelProto):
             opsets = {d.domain: d.version for d in self.proto_.opset_import}
             rows.extend(self.emitter(EventType.START, opsets=opsets))
@@ -38,6 +41,9 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
             nodes = self.proto_.graph.node
             initializers = self.proto_.graph.initializer
             sparse_initializers = self.proto_.graph.sparse_initializer
+            attributes = []
+            last_event = EventType.TO_ONNX_MODEL
+            is_function = False
         elif isinstance(self.proto_, (FunctionProto, GraphProto)):
             inputs = self.proto_.input
             outputs = self.proto_.output
@@ -48,30 +54,43 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
             else:
                 initializers = []
                 sparse_initializers = []
+            attributes = (
+                self.proto_.attribute if hasattr(self.proto_, "attribute") else []
+            )
+            is_function = isinstance(self.proto_, FunctionProto)
+            last_event = (
+                EventType.TO_ONNX_FUNCTION if is_function else EventType.TO_ONNX_MODEL
+            )
         else:
             raise ValueError(f"Unexpected type {type(self.proto_)} for proto.")
 
         if sparse_initializers:
             raise NotImplementedError("Sparse initializer not supported yet.")
 
-        rows.extend(
-            self.emitter(
-                EventType.BEGIN_FUNCTION
-                if isinstance(self.proto_, FunctionProto)
-                else EventType.BEGIN_GRAPH
+        if is_function:
+            rows.extend(
+                self.emitter(
+                    EventType.BEGIN_FUNCTION,
+                    name=self.proto_.name,
+                    domain=self.proto_.domain,
+                )
             )
-        )
+        else:
+            rows.extend(self.emitter(EventType.BEGIN_GRAPH))
 
         for i in initializers:
             rows.extend(
                 self.emitter(
-                    EventType.INITIALIZER, name=i.name, init=i, value=to_array(i)
+                    EventType.INITIALIZER,
+                    name=i.name,
+                    init=i,
+                    value=to_array_extended(i),
                 )
             )
 
         for i in inputs:
-            if isinstance(i, str):
-                rows.extend(self.emitter(EventType.INPUT, name=i))
+            if is_function:
+                rows.extend(self.emitter(EventType.FUNCTION_INPUT, name=i))
             else:
                 rows.extend(
                     self.emitter(
@@ -85,6 +104,11 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                     )
                 )
 
+        if is_function and attributes:
+            rows.extend(
+                self.emitter(EventType.FUNCTION_ATTRIBUTES, attributes=list(attributes))
+            )
+
         for node in nodes:
             atts = self.extract_attributes(node)
             rows.extend(
@@ -99,8 +123,8 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
             )
 
         for o in outputs:
-            if isinstance(o, str):
-                rows.extend(self.emitter(EventType.INPUT, name=o))
+            if is_function:
+                rows.extend(self.emitter(EventType.FUNCTION_OUTPUT, name=o))
             else:
                 rows.extend(
                     self.emitter(
@@ -117,19 +141,21 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
             name = self.proto_.name
         else:
             name = self.proto_.graph.name
+
         rows.extend(
             self.emitter(
-                EventType.END_FUNCTION
-                if isinstance(self.proto_, FunctionProto)
-                else EventType.END_GRAPH,
+                EventType.END_FUNCTION if is_function else EventType.END_GRAPH,
                 name=name,
             )
         )
 
         if isinstance(self.proto_, ModelProto) and len(self.proto_.functions) > 0:
-            raise NotImplementedError("Local functions are not yet implemented.")
+            for fu in self.proto_.functions:
+                cl = self.__class__(fu, self.emitter)
+                text = cl.export(False, single_line=False)
+                rows.extend(text)
 
-        rows.extend(self.emitter(EventType.TO_ONNX))
+        rows.extend(self.emitter(last_event))
         if as_str:
             return self.emitter.join(rows, single_line=single_line)
         return rows

From ebafa262a60a284f90a372d93c5ac1ed4f93cd64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 8 Jan 2024 23:54:34 +0100
Subject: [PATCH 07/44] Adds function to plot onnx model as graphs (#61)

* Add methods to draw onnx plots

* improve versatility

* doc

* disable test when graphviz not installed

* documentation

* add missing function
---
 .gitignore                                 |   1 +
 CHANGELOGS.rst                             |   1 +
 _unittests/ut_plotting/test_graphviz.py    |  51 +++++
 onnx_array_api/ext_test_case.py            |  18 ++
 onnx_array_api/plotting/graphviz_helper.py | 236 +++++++++++++++++++++
 5 files changed, 307 insertions(+)
 create mode 100644 _unittests/ut_plotting/test_graphviz.py
 create mode 100644 onnx_array_api/plotting/graphviz_helper.py

diff --git a/.gitignore b/.gitignore
index ca8ce49..64d45d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ build/*
 *egg-info/*
 onnxruntime_profile*
 prof
+test*.png
 _doc/sg_execution_times.rst
 _doc/auto_examples/*
 _doc/examples/_cache/*
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 39aaea9..dad0930 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.2.0
 +++++
 
+* :pr:`61`: adds function to plot onnx model as graphs
 * :pr:`60`: supports translation of local functions
 * :pr:`59`: add methods to update nodes in GraphAPI 
 
diff --git a/_unittests/ut_plotting/test_graphviz.py b/_unittests/ut_plotting/test_graphviz.py
new file mode 100644
index 0000000..420779e
--- /dev/null
+++ b/_unittests/ut_plotting/test_graphviz.py
@@ -0,0 +1,51 @@
+import os
+import unittest
+import onnx.parser
+from onnx_array_api.ext_test_case import (
+    ExtTestCase,
+    skipif_ci_windows,
+    skipif_ci_apple,
+)
+from onnx_array_api.plotting.dot_plot import to_dot
+from onnx_array_api.plotting.graphviz_helper import draw_graph_graphviz, plot_dot
+
+
+class TestGraphviz(ExtTestCase):
+    @classmethod
+    def _get_graph(cls):
+        return onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+
+    @skipif_ci_windows("graphviz not installed")
+    @skipif_ci_apple("graphviz not installed")
+    def test_draw_graph_graphviz(self):
+        fout = "test_draw_graph_graphviz.png"
+        dot = to_dot(self._get_graph())
+        draw_graph_graphviz(dot, image=fout)
+        self.assertExists(os.path.exists(fout))
+
+    @skipif_ci_windows("graphviz not installed")
+    @skipif_ci_apple("graphviz not installed")
+    def test_draw_graph_graphviz_proto(self):
+        fout = "test_draw_graph_graphviz_proto.png"
+        dot = self._get_graph()
+        draw_graph_graphviz(dot, image=fout)
+        self.assertExists(os.path.exists(fout))
+
+    @skipif_ci_windows("graphviz not installed")
+    @skipif_ci_apple("graphviz not installed")
+    def test_plot_dot(self):
+        dot = to_dot(self._get_graph())
+        ax = plot_dot(dot)
+        ax.get_figure().savefig("test_plot_dot.png")
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnx_array_api/ext_test_case.py b/onnx_array_api/ext_test_case.py
index 1068bda..3c12e65 100644
--- a/onnx_array_api/ext_test_case.py
+++ b/onnx_array_api/ext_test_case.py
@@ -19,6 +19,10 @@ def is_windows() -> bool:
     return sys.platform == "win32"
 
 
+def is_apple() -> bool:
+    return sys.platform == "darwin"
+
+
 def skipif_ci_windows(msg) -> Callable:
     """
     Skips a unit test if it runs on :epkg:`azure pipeline` on :epkg:`Windows`.
@@ -29,6 +33,16 @@ def skipif_ci_windows(msg) -> Callable:
     return lambda x: x
 
 
+def skipif_ci_apple(msg) -> Callable:
+    """
+    Skips a unit test if it runs on :epkg:`azure pipeline` on :epkg:`Windows`.
+    """
+    if is_apple() and is_azure():
+        msg = f"Test does not work on azure pipeline (Apple). {msg}"
+        return unittest.skip(msg)
+    return lambda x: x
+
+
 def ignore_warnings(warns: List[Warning]) -> Callable:
     """
     Catches warnings.
@@ -230,6 +244,10 @@ def assertEmpty(self, value: Any):
             return
         raise AssertionError(f"value is not empty: {value!r}.")
 
+    def assertExists(self, name):
+        if not os.path.exists(name):
+            raise AssertionError(f"File or folder {name!r} does not exists.")
+
     def assertHasAttr(self, cls: type, name: str):
         if not hasattr(cls, name):
             raise AssertionError(f"Class {cls} has no attribute {name!r}.")
diff --git a/onnx_array_api/plotting/graphviz_helper.py b/onnx_array_api/plotting/graphviz_helper.py
new file mode 100644
index 0000000..2dd93c2
--- /dev/null
+++ b/onnx_array_api/plotting/graphviz_helper.py
@@ -0,0 +1,236 @@
+import os
+import subprocess
+import sys
+import tempfile
+from typing import List, Optional, Tuple, Union
+import numpy as np
+from onnx import ModelProto
+
+
+def _find_in_PATH(prog: str) -> Optional[str]:
+    """
+    Looks into every path mentioned in ``%PATH%`` a specific file,
+    it raises an exception if not found.
+
+    :param prog: program to look for
+    :return: path
+    """
+    sep = ";" if sys.platform.startswith("win") else ":"
+    path = os.environ["PATH"]
+    for p in path.split(sep):
+        f = os.path.join(p, prog)
+        if os.path.exists(f):
+            return p
+    return None
+
+
+def _find_graphviz_dot(exc: bool = True) -> str:
+    """
+    Determines the path to graphviz (on Windows),
+    the function tests the existence of versions 34 to 45
+    assuming it was installed in a standard folder:
+    ``C:\\Program Files\\MiKTeX 2.9\\miktex\\bin\\x64``.
+
+    :param exc: raise exception of be silent
+    :return: path to dot
+    :raises FileNotFoundError: if graphviz not found
+    """
+    if sys.platform.startswith("win"):
+        version = list(range(34, 60))
+        version.extend([f"{v}.1" for v in version])
+        for v in version:
+            graphviz_dot = f"C:\\Program Files (x86)\\Graphviz2.{v}\\bin\\dot.exe"
+            if os.path.exists(graphviz_dot):
+                return graphviz_dot
+        extra = ["build/update_modules/Graphviz/bin"]
+        for ext in extra:
+            graphviz_dot = os.path.join(ext, "dot.exe")
+            if os.path.exists(graphviz_dot):
+                return graphviz_dot
+        p = _find_in_PATH("dot.exe")
+        if p is None:
+            if exc:
+                raise FileNotFoundError(
+                    f"Unable to find graphviz, look into paths such as {graphviz_dot}."
+                )
+            return None
+        return os.path.join(p, "dot.exe")
+    # linux
+    return "dot"
+
+
+def _run_subprocess(
+    args: List[str],
+    cwd: Optional[str] = None,
+):
+    assert not isinstance(
+        args, str
+    ), "args should be a sequence of strings, not a string."
+
+    p = subprocess.Popen(
+        args,
+        cwd=cwd,
+        shell=False,
+        env=os.environ,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    )
+    raise_exception = False
+    output = ""
+    while True:
+        output = p.stdout.readline().decode(errors="ignore")
+        if output == "" and p.poll() is not None:
+            break
+        if output:
+            if (
+                "fatal error" in output
+                or "CMake Error" in output
+                or "gmake: ***" in output
+                or "): error C" in output
+                or ": error: " in output
+            ):
+                raise_exception = True
+    p.poll()
+    p.stdout.close()
+    if raise_exception:
+        raise RuntimeError(
+            "An error was found in the output. The build is stopped.\n{output}"
+        )
+    return output
+
+
+def _run_graphviz(filename: str, image: str, engine: str = "dot") -> str:
+    """
+    Run :epkg:`Graphviz`.
+
+    :param filename: filename which contains the graph definition
+    :param image: output image
+    :param engine: *dot* or *neato*
+    :return: output of graphviz
+    """
+    ext = os.path.splitext(image)[-1]
+    assert ext in {
+        ".png",
+        ".bmp",
+        ".fig",
+        ".gif",
+        ".ico",
+        ".jpg",
+        ".jpeg",
+        ".pdf",
+        ".ps",
+        ".svg",
+        ".vrml",
+        ".tif",
+        ".tiff",
+        ".wbmp",
+    }, f"Unexpected extension {ext!r} for {image!r}."
+    if sys.platform.startswith("win"):
+        bin_ = os.path.dirname(_find_graphviz_dot())
+        # if bin not in os.environ["PATH"]:
+        #    os.environ["PATH"] = os.environ["PATH"] + ";" + bin
+        exe = os.path.join(bin_, engine)
+    else:
+        exe = engine
+    if os.path.exists(image):
+        os.remove(image)
+    output = _run_subprocess([exe, f"-T{ext[1:]}", filename, "-o", image])
+    assert os.path.exists(image), f"Graphviz failed due to {output}"
+    return output
+
+
+def draw_graph_graphviz(
+    dot: Union[str, ModelProto],
+    image: str,
+    engine: str = "dot",
+) -> str:
+    """
+    Draws a graph using :epkg:`Graphviz`.
+
+    :param dot: dot graph or ModelProto
+    :param image: output image, None, just returns the output
+    :param engine: *dot* or *neato*
+    :return: :epkg:`Graphviz` output or
+        the dot text if *image* is None
+
+    The function creates a temporary file to store the dot file if *image* is not None.
+    """
+    if isinstance(dot, ModelProto):
+        from .dot_plot import to_dot
+
+        sdot = to_dot(dot)
+    else:
+        sdot = dot
+    with tempfile.NamedTemporaryFile(delete=False) as fp:
+        fp.write(sdot.encode("utf-8"))
+        fp.close()
+
+        filename = fp.name
+        assert os.path.exists(
+            filename
+        ), f"File {filename!r} cannot be created to store the graph."
+        out = _run_graphviz(filename, image, engine=engine)
+        assert os.path.exists(
+            image
+        ), f"Graphviz failed with no reason, {image!r} not found, output is {out}."
+        os.remove(filename)
+        return out
+
+
+def plot_dot(
+    dot: Union[str, ModelProto],
+    ax: Optional["matplotlib.axis.Axis"] = None,  # noqa: F821
+    engine: str = "dot",
+    figsize: Optional[Tuple[int, int]] = None,
+) -> "matplotlib.axis.Axis":  # noqa: F821
+    """
+    Draws a dot graph into a matplotlib graph.
+
+    :param dot: dot graph or ModelProto
+    :param image: output image, None, just returns the output
+    :param engine: *dot* or *neato*
+    :param figsize: figsize of ax is None
+    :return: :epkg:`Graphviz` output or
+        the dot text if *image* is None
+
+    .. plot::
+
+        import matplotlib.pyplot as plt
+        import onnx.parser
+
+        model = onnx.parser.parse_model(
+                    '''
+                    <ir_version: 8, opset_import: [ "": 18]>
+                    agraph (float[N] x) => (float[N] z) {
+                        two = Constant <value_float=2.0> ()
+                        four = Add(two, two)
+                        z = Mul(four, four)
+                    }''')
+        ax = plot_dot(dot)
+        ax.set_title("Dummy graph")
+        plt.show()
+    """
+    if ax is None:
+        import matplotlib.pyplot as plt
+
+        _, ax = plt.subplots(1, 1, figsize=figsize)
+        clean = True
+    else:
+        clean = False
+
+    from PIL import Image
+
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp:
+        fp.close()
+
+        draw_graph_graphviz(dot, fp.name, engine=engine)
+        img = np.asarray(Image.open(fp.name))
+        os.remove(fp.name)
+
+        ax.imshow(img)
+
+    if clean:
+        ax.get_xaxis().set_visible(False)
+        ax.get_yaxis().set_visible(False)
+        ax.get_figure().tight_layout()
+    return ax

From c7375caf77529a927d2cf95cc24e924c98851739 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 9 Jan 2024 11:41:07 +0100
Subject: [PATCH 08/44] Refactoring and fixes minor bugs in light API (#62)

* fix minour bugs in light API

* refactoring

* complete refactoring

* fix unit test file

* fix wrong import

* improve shape handling

* move files

* fix documentation

* doc
---
 _doc/api/index.rst                            |   1 +
 _doc/api/light_api.rst                        |  46 +------------
 _doc/api/translate_api.rst                    |  52 ++++++++++++++
 .../ut_light_api/test_backend_export.py       |   4 +-
 _unittests/ut_light_api/test_light_api.py     |  20 +++++-
 .../custom_ops_type_inference_fails_0.onnx    | Bin
 .../_data/stft_inlined_batch_1.onnx           | Bin
 .../test_translate.py                         |   5 +-
 .../test_translate_classic.py                 |   7 +-
 onnx_array_api/_command_lines_parser.py       |   2 +-
 onnx_array_api/{light_api => }/annotations.py |  14 +++-
 onnx_array_api/graph_api/graph_builder.py     |   3 +
 onnx_array_api/light_api/__init__.py          |  63 +----------------
 onnx_array_api/light_api/_op_var.py           |   2 +-
 onnx_array_api/light_api/_op_vars.py          |  24 +++----
 onnx_array_api/light_api/model.py             |   8 ++-
 onnx_array_api/light_api/var.py               |   6 +-
 onnx_array_api/translate_api/__init__.py      |  64 ++++++++++++++++++
 .../base_emitter.py                           |   0
 .../inner_emitter.py                          |   2 +-
 .../light_emitter.py                          |   2 +-
 .../make_helper.py                            |   0
 .../{light_api => translate_api}/translate.py |   0
 pyproject.toml                                |   3 +-
 24 files changed, 189 insertions(+), 139 deletions(-)
 create mode 100644 _doc/api/translate_api.rst
 rename _unittests/{ut_light_api => ut_translate_api}/_data/custom_ops_type_inference_fails_0.onnx (100%)
 rename _unittests/{ut_light_api => ut_translate_api}/_data/stft_inlined_batch_1.onnx (100%)
 rename _unittests/{ut_light_api => ut_translate_api}/test_translate.py (97%)
 rename _unittests/{ut_light_api => ut_translate_api}/test_translate_classic.py (98%)
 rename onnx_array_api/{light_api => }/annotations.py (87%)
 create mode 100644 onnx_array_api/translate_api/__init__.py
 rename onnx_array_api/{light_api => translate_api}/base_emitter.py (100%)
 rename onnx_array_api/{light_api => translate_api}/inner_emitter.py (99%)
 rename onnx_array_api/{light_api => translate_api}/light_emitter.py (98%)
 rename onnx_array_api/{light_api => translate_api}/make_helper.py (100%)
 rename onnx_array_api/{light_api => translate_api}/translate.py (100%)

diff --git a/_doc/api/index.rst b/_doc/api/index.rst
index 121c416..8cfe033 100644
--- a/_doc/api/index.rst
+++ b/_doc/api/index.rst
@@ -9,6 +9,7 @@ API
     array_api
     graph_api
     light_api
+    translate_api
     npx_core_api
     npx_functions
     npx_jit_eager
diff --git a/_doc/api/light_api.rst b/_doc/api/light_api.rst
index 15342c1..e2a2d32 100644
--- a/_doc/api/light_api.rst
+++ b/_doc/api/light_api.rst
@@ -11,17 +11,10 @@ start
 
 .. autofunction:: onnx_array_api.light_api.start
 
-translate
-+++++++++
-
-.. autofunction:: onnx_array_api.light_api.translate
-
-make_helper
-+++++++++++
+g
++
 
-.. autofunction:: onnx_array_api.light_api.make_helper.make_node_extended
-
-.. autofunction:: onnx_array_api.light_api.make_helper.make_ref_attribute
+.. autofunction:: onnx_array_api.light_api.g
 
 Classes for the Light API
 =========================
@@ -69,39 +62,6 @@ Vars
     :members:
     :inherited-members:
 
-Classes for the Translater
-==========================
-
-BaseEmitter
-+++++++++++
-
-.. autoclass:: onnx_array_api.light_api.base_emitter.BaseEmitter
-    :members:
-
-EventType
-+++++++++
-
-.. autoclass:: onnx_array_api.light_api.base_emitter.EventType
-    :members:
-
-InnerEmitter
-++++++++++++
-
-.. autoclass:: onnx_array_api.light_api.inner_emitter.InnerEmitter
-    :members:
-
-LightEmitter
-++++++++++++
-
-.. autoclass:: onnx_array_api.light_api.light_emitter.LightEmitter
-    :members:
-
-Translater
-++++++++++
-
-.. autoclass:: onnx_array_api.light_api.translate.Translater
-    :members:
-
 Available operators
 ===================
 
diff --git a/_doc/api/translate_api.rst b/_doc/api/translate_api.rst
new file mode 100644
index 0000000..b554538
--- /dev/null
+++ b/_doc/api/translate_api.rst
@@ -0,0 +1,52 @@
+============================
+onnx_array_api.translate_api
+============================
+
+
+Main API
+========
+
+translate
++++++++++
+
+.. autofunction:: onnx_array_api.translate_api.translate
+
+make_helper
++++++++++++
+
+.. autofunction:: onnx_array_api.translate_api.make_helper.make_node_extended
+
+.. autofunction:: onnx_array_api.translate_api.make_helper.make_ref_attribute
+
+Classes for the Translater
+==========================
+
+BaseEmitter
++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.base_emitter.BaseEmitter
+    :members:
+
+EventType
++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.base_emitter.EventType
+    :members:
+
+InnerEmitter
+++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.inner_emitter.InnerEmitter
+    :members:
+
+LightEmitter
+++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.light_emitter.LightEmitter
+    :members:
+
+Translater
+++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.translate.Translater
+    :members:
diff --git a/_unittests/ut_light_api/test_backend_export.py b/_unittests/ut_light_api/test_backend_export.py
index f597d21..42ac7f5 100644
--- a/_unittests/ut_light_api/test_backend_export.py
+++ b/_unittests/ut_light_api/test_backend_export.py
@@ -22,8 +22,8 @@
 from onnx.numpy_helper import from_array, to_array
 from onnx.backend.base import Device, DeviceType
 from onnx_array_api.reference import ExtendedReferenceEvaluator
-from onnx_array_api.light_api.make_helper import make_node_extended
-from onnx_array_api.light_api import translate
+from onnx_array_api.translate_api.make_helper import make_node_extended
+from onnx_array_api.translate_api import translate
 from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
 
 verbosity = 10 if "-v" in sys.argv or "--verbose" in sys.argv else 0
diff --git a/_unittests/ut_light_api/test_light_api.py b/_unittests/ut_light_api/test_light_api.py
index f6ae051..6b22ae9 100644
--- a/_unittests/ut_light_api/test_light_api.py
+++ b/_unittests/ut_light_api/test_light_api.py
@@ -211,7 +211,7 @@ def test_neg(self):
         self.assertIsInstance(v, Var)
         self.assertEqual(["X"], v.parent.input_names)
         s = str(v)
-        self.assertEqual("X:FLOAT", s)
+        self.assertEqual("X:FLOAT:[]", s)
         onx = start().vin("X").Neg().rename("Y").vout().to_onnx()
         self.assertIsInstance(onx, ModelProto)
         ref = ReferenceEvaluator(onx)
@@ -510,7 +510,23 @@ def ah(self):
         expected = (a > 0).astype(int).astype(np.float32).reshape((-1, 1))
         self.assertEqualArray(expected, got)
 
+    def test_input_shape(self):
+        kernel = (np.arange(9) + 1).reshape(3, 3).astype(np.float32)
+        model = (
+            start()
+            .vin("X", shape=[None, None])
+            .cst(kernel[np.newaxis, np.newaxis, ...])
+            .rename("W")
+            .bring("X", "W")
+            .Conv(pads=[1, 1, 1, 1])
+            .rename("Y")
+            .vout(shape=[])
+            .to_onnx()
+        )
+        i = str(model.graph.input[0]).replace("\n", "").replace(" ", "")
+        self.assertNotIn("shape{}", i)
+
 
 if __name__ == "__main__":
-    TestLightApi().test_domain()
+    TestLightApi().test_add()
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_light_api/_data/custom_ops_type_inference_fails_0.onnx b/_unittests/ut_translate_api/_data/custom_ops_type_inference_fails_0.onnx
similarity index 100%
rename from _unittests/ut_light_api/_data/custom_ops_type_inference_fails_0.onnx
rename to _unittests/ut_translate_api/_data/custom_ops_type_inference_fails_0.onnx
diff --git a/_unittests/ut_light_api/_data/stft_inlined_batch_1.onnx b/_unittests/ut_translate_api/_data/stft_inlined_batch_1.onnx
similarity index 100%
rename from _unittests/ut_light_api/_data/stft_inlined_batch_1.onnx
rename to _unittests/ut_translate_api/_data/stft_inlined_batch_1.onnx
diff --git a/_unittests/ut_light_api/test_translate.py b/_unittests/ut_translate_api/test_translate.py
similarity index 97%
rename from _unittests/ut_light_api/test_translate.py
rename to _unittests/ut_translate_api/test_translate.py
index 9974f81..d505135 100644
--- a/_unittests/ut_light_api/test_translate.py
+++ b/_unittests/ut_translate_api/test_translate.py
@@ -5,8 +5,9 @@
 from onnx.defs import onnx_opset_version
 from onnx.reference import ReferenceEvaluator
 from onnx_array_api.ext_test_case import ExtTestCase
-from onnx_array_api.light_api import start, translate, g
-from onnx_array_api.light_api.base_emitter import EventType
+from onnx_array_api.light_api import start, g
+from onnx_array_api.translate_api import translate
+from onnx_array_api.translate_api.base_emitter import EventType
 
 OPSET_API = min(19, onnx_opset_version() - 1)
 
diff --git a/_unittests/ut_light_api/test_translate_classic.py b/_unittests/ut_translate_api/test_translate_classic.py
similarity index 98%
rename from _unittests/ut_light_api/test_translate_classic.py
rename to _unittests/ut_translate_api/test_translate_classic.py
index 4d52183..c6cb412 100644
--- a/_unittests/ut_light_api/test_translate_classic.py
+++ b/_unittests/ut_translate_api/test_translate_classic.py
@@ -15,7 +15,8 @@
 )
 from onnx.checker import check_model
 from onnx_array_api.ext_test_case import ExtTestCase
-from onnx_array_api.light_api import start, translate
+from onnx_array_api.light_api import start
+from onnx_array_api.translate_api import translate
 
 OPSET_API = min(19, onnx_opset_version() - 1)
 
@@ -335,7 +336,7 @@ def _run(cls, code):
         import onnx
         import onnx.helper
         import onnx.numpy_helper
-        import onnx_array_api.light_api.make_helper
+        import onnx_array_api.translate_api.make_helper
         import onnx.reference.custom_element_types
 
         def from_array_extended(tensor, name=None):
@@ -362,7 +363,7 @@ def from_array_extended(tensor, name=None):
         globs = onnx.__dict__.copy()
         globs.update(onnx.helper.__dict__)
         globs.update(onnx.numpy_helper.__dict__)
-        globs.update(onnx_array_api.light_api.make_helper.__dict__)
+        globs.update(onnx_array_api.translate_api.make_helper.__dict__)
         globs.update(onnx.reference.custom_element_types.__dict__)
         globs["from_array_extended"] = from_array_extended
         locs = {}
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index 3860f18..71f5a35 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -56,7 +56,7 @@ def get_parser_translate() -> ArgumentParser:
 
 
 def _cmd_translate(argv: List[Any]):
-    from .light_api import translate
+    from .translate_api import translate
 
     parser = get_parser_translate()
     args = parser.parse_args(argv[1:])
diff --git a/onnx_array_api/light_api/annotations.py b/onnx_array_api/annotations.py
similarity index 87%
rename from onnx_array_api/light_api/annotations.py
rename to onnx_array_api/annotations.py
index 3fe7973..9941f95 100644
--- a/onnx_array_api/light_api/annotations.py
+++ b/onnx_array_api/annotations.py
@@ -81,9 +81,17 @@ def elem_type_int(elem_type: ELEMENT_TYPE) -> int:
     return np_dtype_to_tensor_dtype(elem_type)
 
 
-def make_shape(shape: TensorShapeProto) -> SHAPE_TYPE:
+def _pick_dim(d, empty_dim):
+    if d.dim_value:
+        return d.dim_value
+    if d.dim_param:
+        return d.dim_param
+    return empty_dim
+
+
+def make_shape(shape: TensorShapeProto, empty_dim: Optional[Any] = None) -> SHAPE_TYPE:
     "Extracts a shape from a tensor type."
-    if hasattr(shape, "dims"):
-        res = [(d.dim_value if d.dim_value else d.dim_param) for d in shape.dims]
+    if hasattr(shape, "dim"):
+        res = [_pick_dim(d, empty_dim=empty_dim) for i, d in enumerate(shape.dim)]
         return tuple(res)
     return None
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index 85a838f..f238eee 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -631,6 +631,9 @@ def _build_initializers(self) -> List[TensorProto]:
                     t = onh.from_array(v, name=k)
                 res.append(t)
                 continue
+            if isinstance(v, TensorProto):
+                res.append(v)
+                continue
             raise TypeError(
                 f"Unable to convert initializer {k!r} with type "
                 f"{type(v)} into a TensorProto."
diff --git a/onnx_array_api/light_api/__init__.py b/onnx_array_api/light_api/__init__.py
index 558e626..3fe9489 100644
--- a/onnx_array_api/light_api/__init__.py
+++ b/onnx_array_api/light_api/__init__.py
@@ -1,10 +1,8 @@
 from typing import Dict, Optional
 from onnx import ModelProto
-from .annotations import domain
+from ..annotations import domain
 from .model import OnnxGraph, ProtoType
-from .translate import Translater
 from .var import Var, Vars
-from .inner_emitter import InnerEmitter
 
 
 def start(
@@ -56,62 +54,3 @@ def g() -> OnnxGraph:
     :return: an instance of :class:`onnx_array_api.light_api.OnnxGraph`
     """
     return OnnxGraph(proto_type=ProtoType.GRAPH)
-
-
-def translate(proto: ModelProto, single_line: bool = False, api: str = "light") -> str:
-    """
-    Translates an ONNX proto into a code using :ref:`l-light-api`
-    to describe the ONNX graph.
-
-    :param proto: model to translate
-    :param single_line: as a single line or not
-    :param api: API to export into,
-        default is `"light"` and this is handle by class
-        :class:`onnx_array_api.light_api.light_emitter.LightEmitter`,
-        another value is `"onnx"` which is the inner API implemented
-        in onnx package.
-    :return: code
-
-    .. runpython::
-        :showcode:
-
-        from onnx_array_api.light_api import start, translate
-
-        onx = (
-            start()
-            .vin("X")
-            .reshape((-1, 1))
-            .Transpose(perm=[1, 0])
-            .rename("Y")
-            .vout()
-            .to_onnx()
-        )
-        code = translate(onx)
-        print(code)
-
-    The inner API from onnx packahe is also available.
-
-    .. runpython::
-        :showcode:
-
-        from onnx_array_api.light_api import start, translate
-
-        onx = (
-            start()
-            .vin("X")
-            .reshape((-1, 1))
-            .Transpose(perm=[1, 0])
-            .rename("Y")
-            .vout()
-            .to_onnx()
-        )
-        code = translate(onx, api="onnx")
-        print(code)
-    """
-    if api == "light":
-        tr = Translater(proto)
-        return tr.export(single_line=single_line, as_str=True)
-    if api == "onnx":
-        tr = Translater(proto, emitter=InnerEmitter())
-        return tr.export(as_str=True)
-    raise ValueError(f"Unexpected value {api!r} for api.")
diff --git a/onnx_array_api/light_api/_op_var.py b/onnx_array_api/light_api/_op_var.py
index 8a995b3..27a04d1 100644
--- a/onnx_array_api/light_api/_op_var.py
+++ b/onnx_array_api/light_api/_op_var.py
@@ -1,5 +1,5 @@
 from typing import List, Optional, Union
-from .annotations import AI_ONNX_ML, domain
+from ..annotations import AI_ONNX_ML, domain
 
 
 class OpsVar:
diff --git a/onnx_array_api/light_api/_op_vars.py b/onnx_array_api/light_api/_op_vars.py
index f4dee1c..64d0d2d 100644
--- a/onnx_array_api/light_api/_op_vars.py
+++ b/onnx_array_api/light_api/_op_vars.py
@@ -49,19 +49,17 @@ def Conv(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
-        return self.make_node(
-            "Conv",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        return self.make_node(
+            "Conv", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def ConvInteger(
diff --git a/onnx_array_api/light_api/model.py b/onnx_array_api/light_api/model.py
index 67fc18e..5a7eef5 100644
--- a/onnx_array_api/light_api/model.py
+++ b/onnx_array_api/light_api/model.py
@@ -14,7 +14,7 @@
 )
 from onnx.numpy_helper import from_array
 from ..ext_test_case import is_azure, is_windows
-from .annotations import (
+from ..annotations import (
     elem_type_int,
     make_shape,
     GRAPH_PROTO,
@@ -180,6 +180,8 @@ def make_output(
         :param elem_type: element type (the input is assumed to be a tensor)
         :param shape: shape
         :return: an instance of ValueInfoProto
+
+        If the checker fails, try `shape=[]`.
         """
         if not self.has_name(name):
             raise ValueError(f"Name {name!r} does not exist.")
@@ -332,7 +334,7 @@ def _fix_name_tensor_input(
     ) -> Union[TensorProto, SparseTensorProto, ValueInfoProto]:
         obj = self._fix_name_tensor(obj)
         shape = make_shape(obj.type.tensor_type.shape)
-        if shape is None:
+        if not shape:
             tensor_type_proto = make_tensor_type_proto(
                 obj.type.tensor_type.elem_type, []
             )
@@ -344,7 +346,7 @@ def _fix_name_tensor_output(
     ) -> Union[TensorProto, SparseTensorProto, ValueInfoProto]:
         obj = self._fix_name_tensor(obj)
         shape = make_shape(obj.type.tensor_type.shape)
-        if shape is None:
+        if not shape:
             tensor_type_proto = make_tensor_type_proto(
                 obj.type.tensor_type.elem_type, []
             )
diff --git a/onnx_array_api/light_api/var.py b/onnx_array_api/light_api/var.py
index 882dcb7..2d7eac8 100644
--- a/onnx_array_api/light_api/var.py
+++ b/onnx_array_api/light_api/var.py
@@ -3,7 +3,7 @@
 import numpy as np
 from onnx import TensorProto
 from onnx.defs import get_schema
-from .annotations import (
+from ..annotations import (
     elem_type_int,
     make_shape,
     ELEMENT_TYPE,
@@ -318,6 +318,8 @@ def vout(
         :param elem_type: element_type
         :param shape: shape
         :return: instance of :class:`onnx_array_api.light_api.Var`
+
+        If the checker fails, try `shape=[]`.
         """
         output = self.parent.make_output(self.name, elem_type=elem_type, shape=shape)
         return Var(
@@ -461,6 +463,8 @@ def vout(
 
         :param elem_type_shape: list of tuple(element_type, shape)
         :return: instance of :class:`onnx_array_api.light_api.Vars`
+
+        If the checker fails, try `shape=[]`.
         """
         vars = []
         for i, v in enumerate(self.vars_):
diff --git a/onnx_array_api/translate_api/__init__.py b/onnx_array_api/translate_api/__init__.py
new file mode 100644
index 0000000..25daef6
--- /dev/null
+++ b/onnx_array_api/translate_api/__init__.py
@@ -0,0 +1,64 @@
+from onnx import ModelProto
+from .translate import Translater
+from .inner_emitter import InnerEmitter
+
+
+def translate(proto: ModelProto, single_line: bool = False, api: str = "light") -> str:
+    """
+    Translates an ONNX proto into a code using :ref:`l-light-api`
+    to describe the ONNX graph.
+
+    :param proto: model to translate
+    :param single_line: as a single line or not
+    :param api: API to export into,
+        default is `"light"` and this is handle by class
+        :class:`onnx_array_api.translate_api.light_emitter.LightEmitter`,
+        another value is `"onnx"` which is the inner API implemented
+        in onnx package.
+    :return: code
+
+    .. runpython::
+        :showcode:
+
+        from onnx_array_api.light_api import start
+        from onnx_array_api.translate_api import translate
+
+        onx = (
+            start()
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx)
+        print(code)
+
+    The inner API from onnx packahe is also available.
+
+    .. runpython::
+        :showcode:
+
+        from onnx_array_api.light_api import start
+        from onnx_array_api.translate_api import translate
+
+        onx = (
+            start()
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx, api="onnx")
+        print(code)
+    """
+    if api == "light":
+        tr = Translater(proto)
+        return tr.export(single_line=single_line, as_str=True)
+    if api == "onnx":
+        tr = Translater(proto, emitter=InnerEmitter())
+        return tr.export(as_str=True)
+    raise ValueError(f"Unexpected value {api!r} for api.")
diff --git a/onnx_array_api/light_api/base_emitter.py b/onnx_array_api/translate_api/base_emitter.py
similarity index 100%
rename from onnx_array_api/light_api/base_emitter.py
rename to onnx_array_api/translate_api/base_emitter.py
diff --git a/onnx_array_api/light_api/inner_emitter.py b/onnx_array_api/translate_api/inner_emitter.py
similarity index 99%
rename from onnx_array_api/light_api/inner_emitter.py
rename to onnx_array_api/translate_api/inner_emitter.py
index 72ee725..50d4f5e 100644
--- a/onnx_array_api/light_api/inner_emitter.py
+++ b/onnx_array_api/translate_api/inner_emitter.py
@@ -1,6 +1,6 @@
 from typing import Any, Dict, List, Optional, Tuple
 from onnx import AttributeProto
-from .annotations import ELEMENT_TYPE_NAME
+from ..annotations import ELEMENT_TYPE_NAME
 from .base_emitter import BaseEmitter
 from .translate import Translater
 
diff --git a/onnx_array_api/light_api/light_emitter.py b/onnx_array_api/translate_api/light_emitter.py
similarity index 98%
rename from onnx_array_api/light_api/light_emitter.py
rename to onnx_array_api/translate_api/light_emitter.py
index c2925b5..7a7aef9 100644
--- a/onnx_array_api/light_api/light_emitter.py
+++ b/onnx_array_api/translate_api/light_emitter.py
@@ -1,5 +1,5 @@
 from typing import Any, Dict, List
-from .annotations import ELEMENT_TYPE_NAME
+from ..annotations import ELEMENT_TYPE_NAME
 from .base_emitter import BaseEmitter
 
 
diff --git a/onnx_array_api/light_api/make_helper.py b/onnx_array_api/translate_api/make_helper.py
similarity index 100%
rename from onnx_array_api/light_api/make_helper.py
rename to onnx_array_api/translate_api/make_helper.py
diff --git a/onnx_array_api/light_api/translate.py b/onnx_array_api/translate_api/translate.py
similarity index 100%
rename from onnx_array_api/light_api/translate.py
rename to onnx_array_api/translate_api/translate.py
diff --git a/pyproject.toml b/pyproject.toml
index fd94bd3..0b0e71d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,8 +23,9 @@ max-complexity = 10
 "onnx_array_api/light_api/__init__.py" = ["F401"]
 "onnx_array_api/light_api/_op_var.py" = ["F821"]
 "onnx_array_api/light_api/_op_vars.py" = ["F821"]
-"onnx_array_api/light_api/annotations.py" = ["F821"]
+"onnx_array_api/annotations.py" = ["F821"]
 "onnx_array_api/light_api/model.py" = ["F821"]
+"onnx_array_api/translate_api/__init__.py" = ["F401"]
 "onnx_array_api/npx/__init__.py" = ["F401", "F403"]
 "onnx_array_api/npx/npx_functions.py" = ["F821"]
 "onnx_array_api/npx/npx_functions_test.py" = ["F821"]

From 63875fa9cd96c2265d1b7565811fc11ed32417ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 9 Jan 2024 12:07:11 +0100
Subject: [PATCH 09/44] Fix default values (#63)

---
 onnx_array_api/light_api/_op_vars.py | 163 +++++++++++++--------------
 1 file changed, 81 insertions(+), 82 deletions(-)

diff --git a/onnx_array_api/light_api/_op_vars.py b/onnx_array_api/light_api/_op_vars.py
index 64d0d2d..4f30dbe 100644
--- a/onnx_array_api/light_api/_op_vars.py
+++ b/onnx_array_api/light_api/_op_vars.py
@@ -10,8 +10,10 @@ def BitShift(self, direction: str = "") -> "Var":
         return self.make_node("BitShift", *self.vars_, direction=direction)
 
     def CenterCropPad(self, axes: Optional[List[int]] = None) -> "Var":
-        axes = axes or []
-        return self.make_node("CenterCropPad", *self.vars_, axes=axes)
+        kwargs = {}
+        if axes is not None:
+            kwargs["axes"] = axes
+        return self.make_node("CenterCropPad", *self.vars_, **kwargs)
 
     def Clip(
         self,
@@ -27,12 +29,14 @@ def Col2Im(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        pads = pads or []
-        strides = strides or []
-        return self.make_node(
-            "Col2Im", *self.vars_, dilations=dilations, pads=pads, strides=strides
-        )
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        return self.make_node("Col2Im", *self.vars_, **kwargs)
 
     def Compress(self, axis: int = 0) -> "Var":
         return self.make_node("Compress", *self.vars_, axis=axis)
@@ -71,19 +75,17 @@ def ConvInteger(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
         return self.make_node(
-            "ConvInteger",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
+            "ConvInteger", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def ConvTranspose(
@@ -97,23 +99,21 @@ def ConvTranspose(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        output_padding = output_padding or []
-        output_shape = output_shape or []
-        pads = pads or []
-        strides = strides or []
-        return self.make_node(
-            "ConvTranspose",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            output_padding=output_padding,
-            output_shape=output_shape,
-            pads=pads,
-            strides=strides,
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        if output_padding is not None:
+            kwargs["output_padding"] = output_padding
+        if output_shape is not None:
+            kwargs["output_shape"] = output_shape
+        return self.make_node(
+            "ConvTranspose", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def CumSum(self, exclusive: int = 0, reverse: int = 0) -> "Var":
@@ -135,19 +135,17 @@ def DeformConv(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
         return self.make_node(
-            "DeformConv",
-            *self.vars_,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            offset_group=offset_group,
-            pads=pads,
-            strides=strides,
+            "DeformConv", *self.vars_, group=group, offset_group=offset_group, **kwargs
         )
 
     def DequantizeLinear(self, axis: int = 1) -> "Var":
@@ -204,12 +202,11 @@ def MatMulInteger(
     def MaxRoiPool(
         self, pooled_shape: Optional[List[int]] = None, spatial_scale: float = 1.0
     ) -> "Var":
-        pooled_shape = pooled_shape or []
+        kwargs = {}
+        if pooled_shape is not None:
+            kwargs["pooled_shape"] = pooled_shape
         return self.make_node(
-            "MaxRoiPool",
-            *self.vars_,
-            pooled_shape=pooled_shape,
-            spatial_scale=spatial_scale,
+            "MaxRoiPool", *self.vars_, spatial_scale=spatial_scale, **kwargs
         )
 
     def MaxUnpool(
@@ -218,16 +215,14 @@ def MaxUnpool(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
-        return self.make_node(
-            "MaxUnpool",
-            *self.vars_,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
-        )
+        kwargs = {}
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        return self.make_node("MaxUnpool", *self.vars_, **kwargs)
 
     def MelWeightMatrix(self, output_datatype: int = 1) -> "Var":
         return self.make_node(
@@ -267,19 +262,17 @@ def QLinearConv(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
+        kwargs = {}
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        if dilations is not None:
+            kwargs["dilations"] = dilations
         return self.make_node(
-            "QLinearConv",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
+            "QLinearConv", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def QLinearMatMul(
@@ -303,7 +296,9 @@ def RandomNormal(
         seed: float = 0.0,
         shape: Optional[List[int]] = None,
     ) -> "Var":
-        shape = shape or []
+        kwargs = {}
+        if shape is not None:
+            kwargs["shape"] = shape
         return self.make_node(
             "RandomNormal",
             *self.vars_,
@@ -311,7 +306,7 @@ def RandomNormal(
             mean=mean,
             scale=scale,
             seed=seed,
-            shape=shape,
+            **kwargs,
         )
 
     def RandomUniform(
@@ -322,7 +317,9 @@ def RandomUniform(
         seed: float = 0.0,
         shape: Optional[List[int]] = None,
     ) -> "Var":
-        shape = shape or []
+        kwargs = {}
+        if shape is not None:
+            kwargs["shape"] = shape
         return self.make_node(
             "RandomUniform",
             *self.vars_,
@@ -330,7 +327,7 @@ def RandomUniform(
             high=high,
             low=low,
             seed=seed,
-            shape=shape,
+            **kwargs,
         )
 
     def Range(
@@ -437,12 +434,13 @@ def Resize(
         mode: str = "nearest",
         nearest_mode: str = "round_prefer_floor",
     ) -> "Var":
-        axes = axes or []
+        kwargs = {}
+        if axes is not None:
+            kwargs["axes"] = axes
         return self.make_node(
             "Resize",
             *self.vars_,
             antialias=antialias,
-            axes=axes,
             coordinate_transformation_mode=coordinate_transformation_mode,
             cubic_coeff_a=cubic_coeff_a,
             exclude_outside=exclude_outside,
@@ -450,6 +448,7 @@ def Resize(
             keep_aspect_ratio_policy=keep_aspect_ratio_policy,
             mode=mode,
             nearest_mode=nearest_mode,
+            **kwargs,
         )
 
     def RoiAlign(

From 642a0cce5bc9d008767cb54171faa415ce9f47a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 9 Jan 2024 16:41:29 +0100
Subject: [PATCH 10/44] small change (#64)

---
 .../ut_xrun_doc/test_documentation_examples.py      | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/_unittests/ut_xrun_doc/test_documentation_examples.py b/_unittests/ut_xrun_doc/test_documentation_examples.py
index 170e82b..12a36ba 100644
--- a/_unittests/ut_xrun_doc/test_documentation_examples.py
+++ b/_unittests/ut_xrun_doc/test_documentation_examples.py
@@ -65,14 +65,15 @@ def add_test_methods(cls):
         fold = os.path.normpath(os.path.join(this, "..", "..", "_doc", "examples"))
         found = os.listdir(fold)
         for name in found:
-            if name.startswith("plot_") and name.endswith(".py"):
-                short_name = os.path.split(os.path.splitext(name)[0])[-1]
+            if not name.startswith("plot_") or not name.endswith(".py"):
+                continue
+            short_name = os.path.split(os.path.splitext(name)[0])[-1]
 
-                def _test_(self, name=name):
-                    res = self.run_test(fold, name, verbose=VERBOSE)
-                    self.assertTrue(res)
+            def _test_(self, name=name):
+                res = self.run_test(fold, name, verbose=VERBOSE)
+                self.assertTrue(res)
 
-                setattr(cls, f"test_{short_name}", _test_)
+            setattr(cls, f"test_{short_name}", _test_)
 
 
 TestDocumentationExamples.add_test_methods()

From d93af40b2405044891ba30f4446a1d0c494de94d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Fri, 12 Jan 2024 09:53:37 +0100
Subject: [PATCH 11/44] update license (#65)

* update license

* year

* year
---
 LICENSE.txt  | 2 +-
 _doc/conf.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index fa034ef..e027853 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-﻿Copyright (c) 2023, Xavier Dupré
+﻿Copyright (c) 2023-2024, Xavier Dupré
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/_doc/conf.py b/_doc/conf.py
index d942076..30356d1 100644
--- a/_doc/conf.py
+++ b/_doc/conf.py
@@ -35,7 +35,7 @@
 source_suffix = ".rst"
 master_doc = "index"
 project = "onnx-array-api"
-copyright = "2023, Xavier Dupré"
+copyright = "2023-2024, Xavier Dupré"
 author = "Xavier Dupré"
 version = __version__
 release = __version__

From c62184cc1bf9ae95a31e9b6aa3a21c3450f5935f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 23 Jan 2024 12:15:35 +0100
Subject: [PATCH 12/44] Improves documentation (#66)

---
 CODE_OF_CONDUCT.md |  15 +++++++
 README.rst         |  38 +++++++++++++++-
 _doc/index.rst     | 105 ++++++++++++++++++++++++++++++++-------------
 3 files changed, 126 insertions(+), 32 deletions(-)
 create mode 100644 CODE_OF_CONDUCT.md

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..b4e1709
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,15 @@
+# Code of Conduct
+
+We are a community based on openness, as well as friendly and didactic discussions.
+
+We aspire to treat everybody equally, and value their contributions.
+
+Decisions are made based on technical merit and consensus.
+
+Code is not the only way to help the project. Reviewing pull requests,
+answering questions to help others on mailing lists or issues, organizing and
+teaching tutorials, working on the website, improving the documentation, are
+all priceless contributions.
+
+We abide by the principles of openness, respect, and consideration of others of
+the Python Software Foundation: https://www.python.org/psf/codeofconduct/
diff --git a/README.rst b/README.rst
index b24b73d..fa6cdb7 100644
--- a/README.rst
+++ b/README.rst
@@ -31,6 +31,10 @@ onnx-array-api: APIs to create ONNX Graphs
 
 **onnx-array-api** implements APIs to create custom ONNX graphs.
 The objective is to speed up the implementation of converter libraries.
+
+Numpy API
++++++++++
+
 The first one matches **numpy API**.
 It gives the user the ability to convert functions written
 following the numpy API to convert that function into ONNX as
@@ -113,10 +117,15 @@ It supports eager mode as well:
     l2_loss=[0.002]
     [0.042]
 
+Light API
++++++++++
+
 The second API or **Light API** tends to do every thing in one line.
+It is inspired from the `Reverse Polish Notation
+<https://en.wikipedia.org/wiki/Reverse_Polish_notation>`_.
 The euclidean distance looks like the following:
 
-::
+.. code-block:: python
 
     import numpy as np
     from onnx_array_api.light_api import start
@@ -142,3 +151,30 @@ The library is released on
 `pypi/onnx-array-api <https://pypi.org/project/onnx-array-api/>`_
 and its documentation is published at
 `APIs to create ONNX Graphs <https://sdpython.github.io/doc/onnx-array-api/dev/>`_.
+
+GraphBuilder API
+++++++++++++++++
+
+Almost every converting library (converting a machine learned model to ONNX) is implementing
+its own graph builder and customizes it for its needs.
+It handles some frequent tasks such as giving names to intermediate
+results, loading, saving onnx models. It can be used as well to extend an existing graph.
+
+.. code-block:: python
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.make_node("Sub", ["X", "Y"])  # the name given to the output is given by the class,
+                                         # it ensures the name is unique
+    init = g.make_initializer(np.array([2], dtype=np.int64))  # the class automatically
+                                                              # converts the array to a tensor
+    r2 = g.make_node("Pow", [r1, init])
+    g.make_node("ReduceSum", [r2], outputs=["Z"])  # the output name is given because
+                                                   # the user wants to choose the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+
+    onx = g.to_onnx()  # final conversion to onnx
diff --git a/_doc/index.rst b/_doc/index.rst
index f2f8998..02c4eed 100644
--- a/_doc/index.rst
+++ b/_doc/index.rst
@@ -45,11 +45,83 @@ The objective is to speed up the implementation of converter libraries.
     CHANGELOGS
     license
 
+Sources available on
+`github/onnx-array-api <https://github.com/sdpython/onnx-array-api>`_.
+
+GraphBuilder API
+++++++++++++++++
+
+Almost every converting library (converting a machine learned model to ONNX) is implementing
+its own graph builder and customizes it for its needs.
+It handles some frequent tasks such as giving names to intermediate
+results, loading, saving onnx models. It can be used as well to extend an existing graph.
+See :ref:`l-graph-api`.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.make_node("Sub", ["X", "Y"])  # the name given to the output is given by the class,
+                                         # it ensures the name is unique
+    init = g.make_initializer(np.array([2], dtype=np.int64))  # the class automatically
+                                                              # converts the array to a tensor
+    r2 = g.make_node("Pow", [r1, init])
+    g.make_node("ReduceSum", [r2], outputs=["Z"])  # the output name is given because
+                                                   # the user wants to choose the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+
+    onx = g.to_onnx()  # final conversion to onnx
+
+    print(onnx_simple_text_plot(onx))
+
+Light API
++++++++++
+
+The syntax is inspired from the
+`Reverse Polish Notation <https://en.wikipedia.org/wiki/Reverse_Polish_notation>`_.
+This kind of API is easy to use to build new graphs,
+less easy to extend an existing graph. See :ref:`l-light-api`.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.light_api import start
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    model = (
+        start()
+        .vin("X")
+        .vin("Y")
+        .bring("X", "Y")
+        .Sub()
+        .rename("dxy")
+        .cst(np.array([2], dtype=np.int64), "two")
+        .bring("dxy", "two")
+        .Pow()
+        .ReduceSum()
+        .rename("Z")
+        .vout()
+        .to_onnx()
+    )
+
+    print(onnx_simple_text_plot(model))
+
 Numpy API
 +++++++++
 
-Sources available on
-`github/onnx-array-api <https://github.com/sdpython/onnx-array-api>`_.
+Writing ONNX graphs requires to know ONNX syntax unless
+it is possible to reuse an existing syntax such as :epkg:`numpy`.
+This is what this API is doing.
+This kind of API is easy to use to build new graphs,
+almost impossible to use to extend new graphs as it usually requires
+to know onnx for that. See :ref:`l-numpy-api-onnx`.
 
 .. runpython::
     :showcode:
@@ -110,35 +182,6 @@ Sources available on
     res = jitted_myloss(x, y)
     print(to_dot(jitted_myloss.get_onnx()))
 
-Light API
-+++++++++
-
-.. runpython::
-    :showcode:
-
-    import numpy as np
-    from onnx_array_api.light_api import start
-    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
-
-    model = (
-        start()
-        .vin("X")
-        .vin("Y")
-        .bring("X", "Y")
-        .Sub()
-        .rename("dxy")
-        .cst(np.array([2], dtype=np.int64), "two")
-        .bring("dxy", "two")
-        .Pow()
-        .ReduceSum()
-        .rename("Z")
-        .vout()
-        .to_onnx()
-    )
-
-    print(onnx_simple_text_plot(model))
-
-
 Older versions
 ++++++++++++++
 

From daaf49d2f53a93bb768282f01e0deac69d2dda63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Sat, 27 Jan 2024 12:28:38 +0100
Subject: [PATCH 13/44] update style (#67)

---
 _doc/examples/plot_benchmark_rf.py       | 1 +
 _doc/examples/plot_optimization.py       | 1 +
 _doc/examples/plot_profiling.py          | 1 +
 _unittests/ut_xrun_doc/test_profiling.py | 1 +
 4 files changed, 4 insertions(+)

diff --git a/_doc/examples/plot_benchmark_rf.py b/_doc/examples/plot_benchmark_rf.py
index 8b62e3d..423669c 100644
--- a/_doc/examples/plot_benchmark_rf.py
+++ b/_doc/examples/plot_benchmark_rf.py
@@ -12,6 +12,7 @@
 import and registration of necessary converters
 ++++++++++++++++++++++++++++++++++++++++++++++++
 """
+
 import pickle
 import os
 import time
diff --git a/_doc/examples/plot_optimization.py b/_doc/examples/plot_optimization.py
index 466fac0..c78419b 100644
--- a/_doc/examples/plot_optimization.py
+++ b/_doc/examples/plot_optimization.py
@@ -15,6 +15,7 @@
 Optimize a model with onnxruntime
 +++++++++++++++++++++++++++++++++
 """
+
 import os
 from pprint import pprint
 import numpy
diff --git a/_doc/examples/plot_profiling.py b/_doc/examples/plot_profiling.py
index 7a61b68..201de95 100644
--- a/_doc/examples/plot_profiling.py
+++ b/_doc/examples/plot_profiling.py
@@ -15,6 +15,7 @@
 Optimize a model with onnxruntime
 +++++++++++++++++++++++++++++++++
 """
+
 import os
 import numpy
 import matplotlib.pyplot as plt
diff --git a/_unittests/ut_xrun_doc/test_profiling.py b/_unittests/ut_xrun_doc/test_profiling.py
index e6c7e69..a7d3ce1 100644
--- a/_unittests/ut_xrun_doc/test_profiling.py
+++ b/_unittests/ut_xrun_doc/test_profiling.py
@@ -1,6 +1,7 @@
 """
 @brief      test tree node (time=5s)
 """
+
 import os
 import sys
 import time

From 9544f97c56bb212ec52da87bc72e5c33513c0f0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Fri, 2 Feb 2024 17:49:49 +0100
Subject: [PATCH 14/44] update requirements (#68)

---
 azure-pipelines.yml  | 4 ++--
 requirements-dev.txt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 907bb9f..61587f4 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -4,8 +4,8 @@ jobs:
     vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python311-Linux:
-        python.version: '3.11'
+      Python312-Linux:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5804529..5e262e3 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -11,7 +11,7 @@ lightgbm
 matplotlib
 ml-dtypes
 git+https://github.com/onnx/onnxmltools.git
-onnxruntime>=1.16.1
+onnxruntime>=1.17.0
 openpyxl
 packaging
 pandas

From ad22d16dac97d0426cc4a573ef731b41c2b27089 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Fri, 2 Feb 2024 17:51:18 +0100
Subject: [PATCH 15/44] update requirements (#69)


From 6ed1d1c0608fbae1cb55f7d8453d3ce7e35cdcb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 5 Feb 2024 12:46:53 +0100
Subject: [PATCH 16/44] Fix bugs in remove_identity (#70)

* update requirements

* fix bugs in remove_identity nodes
---
 onnx_array_api/graph_api/graph_builder.py | 41 +++++++++++++++++++----
 pyproject.toml                            |  4 +--
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index f238eee..c9c2059 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -836,11 +836,12 @@ def remove_identity_nodes(self):
         """
         Removes identity nodes.
         """
-        # f<irst pass: detect replacements
+        # first pass: detect replacements
         new_nodes = []
         input_names = set(i.name for i in self.inputs)
         output_names = set(i.name for i in self.outputs)
         replacements = {}
+        replacements_rev = {}
         for node in self.nodes:
             if node.op_type != "Identity":
                 new_nodes.append(node)
@@ -848,18 +849,44 @@ def remove_identity_nodes(self):
 
             if node.output[0] not in output_names:
                 old_name, new_name = node.output[0], node.input[0]
-            elif node.input[0] not in input_names:
+            elif (
+                node.input[0] not in input_names
+                and node.input[0] not in output_names
+                and node.input[0] not in replacements
+            ):
                 old_name, new_name = node.input[0], node.output[0]
             else:
                 new_nodes.append(node)
                 continue
 
             # the new name can be set for replacements as well
-            assert old_name not in replacements
             if new_name in replacements:
                 new_name = replacements[new_name]
-                assert new_name not in replacements
+                assert new_name not in replacements, (
+                    f"Name {old_name!r} still in {replacements}, node.op_type={node.op_type!r}, "
+                    f"node.input={node.input}, node.output={node.output}, "
+                    f"input_names={input_names}, output_names={output_names}"
+                )
+            if old_name in replacements_rev:
+                old_old_name = replacements_rev[old_name]
+                replacements[old_old_name] = new_name
+                replacements_rev[new_name] = old_old_name
+            if old_name in replacements:
+                replacements[replacements[old_name]] = new_name
+            assert new_name not in replacements, (
+                f"Name {old_name!r} still in {replacements}, node.op_type={node.op_type!r}, "
+                f"node.input={node.input}, node.output={node.output}, "
+                f"input_names={input_names}, output_names={output_names}"
+            )
             replacements[old_name] = new_name
+            replacements_rev[new_name] = old_name
+
+            # verification
+            for k, v in replacements.items():
+                assert v not in replacements, (
+                    f"replacement {k}->{v} is not possible because of "
+                    f"{v}->{replacements[v]}, old_name={old_name!r}, new_name={new_name!r}"
+                )
 
         # second pass: replacements in initializer
         for k, v in replacements.items():
@@ -876,10 +903,12 @@ def remove_identity_nodes(self):
             repo = {o for o in node.output if o in replacements}
             repi = {o for o in node.input if o in replacements}
             if repi or repo:
+                new_inputs = [replacements.get(i, i) for i in node.input]
+                new_outputs = [replacements.get(i, i) for i in node.output]
                 new_node = oh.make_node(
                     node.op_type,
-                    [replacements.get(i, i) for i in node.input],
-                    [replacements.get(i, i) for i in node.output],
+                    new_inputs,
+                    new_outputs,
                     domain=node.domain,
                     name=node.name,
                 )
diff --git a/pyproject.toml b/pyproject.toml
index 0b0e71d..525b648 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,11 +11,11 @@ exclude = [
 # Same as Black.
 line-length = 88
 
-[tool.ruff.mccabe]
+[tool.ruff.lint.mccabe]
 # Unlike Flake8, default to a complexity level of 10.
 max-complexity = 10
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "_doc/examples/plot_first_example.py" = ["E402", "F811"]
 "_doc/examples/plot_onnxruntime.py" = ["E402", "F811"]
 "onnx_array_api/array_api/_onnx_common.py" = ["F821"]

From 8835156200180fb52108b08f625e11d2fe7d11b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 6 Feb 2024 01:41:01 +0100
Subject: [PATCH 17/44] Add class to yield results form onnx model and computes
 differences between two runs (#71)

* update requirements

* Add class to yield results

* black

* add sumarry

* add distance

* text

* compare function

* fix FusedMatMul

* fix alpha

* example

* documentation

* fix length

* doc
---
 CHANGELOGS.rst                                |   1 +
 _doc/api/reference.rst                        |  30 ++
 _doc/command_lines.rst                        |  52 ++
 _doc/examples/plot_onnx_diff.py               |  68 +++
 _doc/index.rst                                |   1 +
 _doc/tutorial/index.rst                       |   1 +
 _doc/tutorial/tools.rst                       |  20 +
 _unittests/ut_reference/test_array_tensor.py  |  26 +-
 .../ut_reference/test_evaluator_yield.py      | 464 ++++++++++++++++++
 _unittests/ut_xrun_doc/test_command_lines1.py |  37 ++
 onnx_array_api/_command_lines_parser.py       |  58 ++-
 onnx_array_api/reference/__init__.py          |   7 +
 onnx_array_api/reference/evaluator.py         |   2 +
 onnx_array_api/reference/evaluator_yield.py   | 449 +++++++++++++++++
 .../reference/ops/op_fused_matmul.py          |  31 ++
 15 files changed, 1243 insertions(+), 4 deletions(-)
 create mode 100644 _doc/command_lines.rst
 create mode 100644 _doc/examples/plot_onnx_diff.py
 create mode 100644 _doc/tutorial/tools.rst
 create mode 100644 _unittests/ut_reference/test_evaluator_yield.py
 create mode 100644 onnx_array_api/reference/evaluator_yield.py
 create mode 100644 onnx_array_api/reference/ops/op_fused_matmul.py

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index dad0930..d0b6445 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.2.0
 +++++
 
+* :pr:`71`: adds tools to compare two onnx graphs
 * :pr:`61`: adds function to plot onnx model as graphs
 * :pr:`60`: supports translation of local functions
 * :pr:`59`: add methods to update nodes in GraphAPI 
diff --git a/_doc/api/reference.rst b/_doc/api/reference.rst
index acbf90a..3b4ae7d 100644
--- a/_doc/api/reference.rst
+++ b/_doc/api/reference.rst
@@ -5,3 +5,33 @@ ExtendedReferenceEvaluator
 ++++++++++++++++++++++++++
 
 .. autoclass:: onnx_array_api.reference.ExtendedReferenceEvaluator
+    :members:
+
+ResultType
+++++++++++
+
+.. autoclass:: onnx_array_api.reference.ResultType
+    :members:
+
+ResultExecution
++++++++++++++++
+
+.. autoclass:: onnx_array_api.reference.ResultExecution
+    :members:
+
+YieldEvaluator
+++++++++++++++
+
+.. autoclass:: onnx_array_api.reference.YieldEvaluator
+    :members:
+
+DistanceExecution
++++++++++++++++++
+
+.. autoclass:: onnx_array_api.reference.DistanceExecution
+    :members:
+
+compare_onnx_execution
+++++++++++++++++++++++
+
+.. autofunction:: onnx_array_api.reference.compare_onnx_execution
diff --git a/_doc/command_lines.rst b/_doc/command_lines.rst
new file mode 100644
index 0000000..38ca5f2
--- /dev/null
+++ b/_doc/command_lines.rst
@@ -0,0 +1,52 @@
+=============
+command lines
+=============
+
+compare
+=======
+
+The function convers an onnx file into some code.
+
+::
+
+    python -m compare -m1 model1.onnx -m2 model2.onnx -v 1
+
+Output example::
+
+    [compare_onnx_execution] got 2 inputs
+    [compare_onnx_execution] execute first model
+    [compare_onnx_execution] got 5 results
+    [compare_onnx_execution] execute second model
+    [compare_onnx_execution] got 5 results
+    [compare_onnx_execution] compute edit distance
+    [compare_onnx_execution] got 4 pairs
+    [compare_onnx_execution] done
+    = | INPUT  float32  5x6             AAAA          X    | INPUT  float32  5x6             AAAA          X   
+    = | INPUT  float32  5x6             AAAA          Y    | INPUT  float32  5x6             AAAA          Y   
+    = | RESULT float32  5x6             AABB Add      res  | RESULT float32  5x6             AABB Add      res 
+    = | RESULT float32  5x6             AAAA Cos      Z    | RESULT float32  5x6             AAAA Cos      Z 
+
+.. runpython::
+
+    from onnx_array_api._command_lines_parser import get_parser_compare
+    get_parser_compare().print_help()
+
+See function :func:`onnx_array_api.reference.compare_onnx_execution`.
+
+translate
+=========
+
+The function convers an onnx file into some code.
+
+::
+
+    python -m translate ...
+
+Output example::
+
+    not yet ready  
+
+.. runpython::
+
+    from onnx_array_api._command_lines_parser import get_parser_translate
+    get_parser_translate().print_help()
diff --git a/_doc/examples/plot_onnx_diff.py b/_doc/examples/plot_onnx_diff.py
new file mode 100644
index 0000000..7a5f1d3
--- /dev/null
+++ b/_doc/examples/plot_onnx_diff.py
@@ -0,0 +1,68 @@
+"""
+
+.. _l-onnx-diff-example:
+
+Compares the conversions of the same model with different options
+=================================================================
+
+The script compares two onnx models obtained with the same trained
+scikit-learn models but converted with different options.
+
+A model
++++++++
+"""
+
+from sklearn.mixture import GaussianMixture
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from skl2onnx import to_onnx
+from onnx_array_api.reference import compare_onnx_execution
+from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+
+data = load_iris()
+X_train, X_test = train_test_split(data.data)
+model = GaussianMixture()
+model.fit(X_train)
+
+#################################
+# Conversion to onnx
+# ++++++++++++++++++
+
+onx = to_onnx(
+    model, X_train[:1], options={id(model): {"score_samples": True}}, target_opset=12
+)
+
+print(onnx_simple_text_plot(onx))
+
+##################################
+# Conversion to onnx without ReduceLogSumExp
+# ++++++++++++++++++++++++++++++++++++++++++
+
+onx2 = to_onnx(
+    model,
+    X_train[:1],
+    options={id(model): {"score_samples": True}},
+    black_op={"ReduceLogSumExp"},
+    target_opset=12,
+)
+
+print(onnx_simple_text_plot(onx2))
+
+
+#############################################
+# Differences
+# +++++++++++
+#
+# Function :func:`onnx_array_api.reference.compare_onnx_execution`
+# compares the intermediate results of two onnx models. Then it finds
+# the best alignmet between the two models using an edit distance.
+
+res1, res2, align, dc = compare_onnx_execution(onx, onx2, verbose=1)
+print("------------")
+text = dc.to_str(res1, res2, align)
+print(text)
+
+###############################
+# The display shows that ReduceSumSquare was replaced by Mul + ReduceSum,
+# and ReduceLogSumExp by ReduceMax + Sub + Exp + Log + Add.
diff --git a/_doc/index.rst b/_doc/index.rst
index 02c4eed..b81be4f 100644
--- a/_doc/index.rst
+++ b/_doc/index.rst
@@ -36,6 +36,7 @@ The objective is to speed up the implementation of converter libraries.
     tutorial/index
     api/index
     tech/index
+    command_lines
     auto_examples/index
 
 .. toctree::
diff --git a/_doc/tutorial/index.rst b/_doc/tutorial/index.rst
index f4cce00..9fcc557 100644
--- a/_doc/tutorial/index.rst
+++ b/_doc/tutorial/index.rst
@@ -10,4 +10,5 @@ Tutorial
     graph_api
     light_api
     numpy_api
+    tools
     benchmarks
diff --git a/_doc/tutorial/tools.rst b/_doc/tutorial/tools.rst
new file mode 100644
index 0000000..fe673f7
--- /dev/null
+++ b/_doc/tutorial/tools.rst
@@ -0,0 +1,20 @@
+=====
+Tools
+=====
+
+Some of useful tools.
+
+Text representation
+===================
+
+Plotting a graph is great but difficult to read when
+the graph is big and it is slow.
+:func:`onnx_array_api.plotting.text_plot.onnx_simple_text_plot`
+prints out a text representation.
+
+Differences between two models
+==============================
+
+How to understand the differences between two models
+assuming they are producing the same outputs?
+Example :ref:`l-onnx-diff-example` shows how to do it.
diff --git a/_unittests/ut_reference/test_array_tensor.py b/_unittests/ut_reference/test_array_tensor.py
index 59fe5f1..f13c3e5 100644
--- a/_unittests/ut_reference/test_array_tensor.py
+++ b/_unittests/ut_reference/test_array_tensor.py
@@ -1,7 +1,13 @@
 import unittest
 import numpy as np
 from onnx import TensorProto
-from onnx.helper import make_graph, make_model, make_node, make_tensor_value_info
+from onnx.helper import (
+    make_graph,
+    make_model,
+    make_node,
+    make_tensor_value_info,
+    make_opsetid,
+)
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.reference import (
     to_array_extended,
@@ -51,6 +57,24 @@ def make_model_f8(fr, to):
                         back = from_array_extended(got, "a")
                         self.assertEqual(to, back.data_type)
 
+    def test_fused_matmul(self):
+        model = make_model(
+            make_graph(
+                [make_node("FusedMatMul", ["X", "Y"], ["Z"], domain="com.microsoft")],
+                "name",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, None),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, None),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+        )
+        ref = ExtendedReferenceEvaluator(model)
+        a = np.arange(4).reshape(-1, 2)
+        got = ref.run(None, {"X": a, "Y": a})
+        self.assertEqualArray(a @ a, got[0])
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py
new file mode 100644
index 0000000..7181456
--- /dev/null
+++ b/_unittests/ut_reference/test_evaluator_yield.py
@@ -0,0 +1,464 @@
+import unittest
+import numpy as np
+from onnx import TensorProto
+from onnx.helper import (
+    make_function,
+    make_graph,
+    make_model,
+    make_node,
+    make_opsetid,
+    make_tensor_value_info,
+)
+from onnx.parser import parse_model
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.reference import (
+    YieldEvaluator,
+    ResultType,
+    DistanceExecution,
+    ResultExecution,
+    compare_onnx_execution,
+)
+from onnx_array_api.reference.evaluator_yield import make_summary
+
+
+class TestArrayTensor(ExtTestCase):
+    def test_make_summary(self):
+        a = np.arange(12).reshape(3, 4)
+        v = make_summary(a)
+        self.assertEqual(v, "DMVE")
+        a = np.arange(12)
+        v = make_summary(a)
+        self.assertEqual(v, "DMVE")
+        a = np.arange(12).astype(np.float32)
+        v = make_summary(a)
+        self.assertEqual(v, "DMVE")
+        a = np.arange(13)
+        a[-1] = 0
+        v = make_summary(a)
+        self.assertEqual(v, "GWMA")
+
+    def test_evaluator_yield(self):
+        new_domain = "custom_domain"
+        opset_imports = [make_opsetid("", 14), make_opsetid(new_domain, 1)]
+
+        node1 = make_node("MatMul", ["X", "A"], ["XA"])
+        node2 = make_node("Add", ["XA", "B"], ["Y"])
+
+        linear_regression = make_function(
+            new_domain,
+            "LinearRegression",
+            ["X", "A", "B"],
+            ["Y"],
+            [node1, node2],
+            opset_imports,
+            [],
+        )
+
+        X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        A = make_tensor_value_info("A", TensorProto.FLOAT, [None, None])
+        B = make_tensor_value_info("B", TensorProto.FLOAT, [None, None])
+        Y = make_tensor_value_info("Y", TensorProto.FLOAT, None)
+
+        graph = make_graph(
+            [
+                make_node(
+                    "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain
+                ),
+                make_node("Abs", ["Y1"], ["Y"]),
+            ],
+            "example",
+            [X, A, B],
+            [Y],
+        )
+
+        onnx_model = make_model(
+            graph, opset_imports=opset_imports, functions=[linear_regression]
+        )
+
+        cst = np.arange(4).reshape((-1, 2)).astype(np.float32)
+        yield_eval = YieldEvaluator(onnx_model)
+        results = list(
+            yield_eval.enumerate_results(None, {"A": cst, "B": cst, "X": cst})
+        )
+        expected = [
+            (
+                ResultType.INPUT,
+                "A",
+                np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32),
+                None,
+            ),
+            (
+                ResultType.INPUT,
+                "B",
+                np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32),
+                None,
+            ),
+            (
+                ResultType.INPUT,
+                "X",
+                np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32),
+                None,
+            ),
+            (
+                ResultType.RESULT,
+                "Y1",
+                np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32),
+                "LinearRegression",
+            ),
+            (
+                ResultType.RESULT,
+                "Y",
+                np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32),
+                "Abs",
+            ),
+            (
+                ResultType.OUTPUT,
+                "Y",
+                np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32),
+                None,
+            ),
+        ]
+        self.assertEqual(len(expected), len(results))
+        for a, b in zip(expected, results):
+            self.assertEqual(len(a), len(b))
+            self.assertEqual(a[0], b[0])
+            self.assertEqual(a[1], b[1])
+            self.assertEqual(a[2].tolist(), b[2].tolist())
+            self.assertEqual(a[3], b[3])
+
+    def test_evaluator_yield_summary(self):
+        new_domain = "custom_domain"
+        opset_imports = [make_opsetid("", 14), make_opsetid(new_domain, 1)]
+
+        node1 = make_node("MatMul", ["X", "A"], ["XA"])
+        node2 = make_node("Add", ["XA", "B"], ["Y"])
+
+        linear_regression = make_function(
+            new_domain,
+            "LinearRegression",
+            ["X", "A", "B"],
+            ["Y"],
+            [node1, node2],
+            opset_imports,
+            [],
+        )
+
+        X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        A = make_tensor_value_info("A", TensorProto.FLOAT, [None, None])
+        B = make_tensor_value_info("B", TensorProto.FLOAT, [None, None])
+        Y = make_tensor_value_info("Y", TensorProto.FLOAT, None)
+
+        graph = make_graph(
+            [
+                make_node(
+                    "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain
+                ),
+                make_node("Abs", ["Y1"], ["Y"]),
+            ],
+            "example",
+            [X, A, B],
+            [Y],
+        )
+
+        onnx_model = make_model(
+            graph, opset_imports=opset_imports, functions=[linear_regression]
+        )
+
+        cst = np.arange(4).reshape((-1, 2)).astype(np.float32)
+        yield_eval = YieldEvaluator(onnx_model)
+        results = list(
+            yield_eval.enumerate_summarized(None, {"A": cst, "B": cst, "X": cst})
+        )
+        expected = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        self.assertEqual(len(expected), len(results))
+        for a, b in zip(expected, results):
+            self.assertEqual(len(a), len(b))
+            self.assertEqual(a[0], b[0])
+            self.assertEqual(a[1], b[1])
+            self.assertEqual(a[2], b[2])
+            self.assertEqual(a[3], b[3])
+            self.assertEqual(a[4], b[4])
+            self.assertEqual(a[5], b[5])
+
+    def test_distance_pair(self):
+        el1 = (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None)
+        el2 = el1
+        dc = DistanceExecution()
+        self.assertEqual(dc.distance_pair(el1, el2), 0)
+        el2 = (ResultType.INPUT, np.dtype("float16"), (2, 2), "ABCD", None)
+        self.assertEqual(dc.distance_pair(el1, el2), 2)
+        el2 = (ResultType.OUTPUT, np.dtype("float16"), (2, 2, 4), "GBCD", "Abs")
+        self.assertEqual(dc.distance_pair(el1, el2), 1130)
+        el2 = (ResultType.OUTPUT, np.dtype("float16"), (2, 3), "GBCD", "Abs")
+        self.assertEqual(dc.distance_pair(el1, el2), 1021)
+
+    def test_distance_sequence_0(self):
+        expected = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(expected, expected)
+        self.assertEqual(d, 0)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_ins(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, dc.insert_cost)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 3), (5, 4)])
+        d, align = dc.distance_sequence(s2, s1)
+        self.assertEqual(d, dc.insert_cost)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (3, 4), (4, 5)])
+
+    def test_distance_sequence_equal(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 0)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_diff(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIP", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 1)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_diff2(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 3), "CEIP", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIP", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 5)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_str(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 3), "ABCD", None, "X"),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Exp", "H"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 3), "CEIP", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIP", None, "Y"),
+        ]
+        s1 = [ResultExecution(*s) for s in s1]
+        s2 = [ResultExecution(*s) for s in s2]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 1008)
+        self.assertEqual(
+            align, [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4), (6, 5)]
+        )
+        text = dc.to_str(s1, s2, align)
+        self.assertIn("OUTPUT", text)
+        expected = """
+            =|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA
+            =|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB
+            ~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX
+            -|RESULTfloat322x2CEIOExpH|
+            =|RESULTfloat322x2CEIOLinearRegrY1|RESULTfloat322x2CEIOLinearRegrY1
+            ~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ
+            ~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY
+        """.replace(
+            "            ", ""
+        ).strip(
+            "\n "
+        )
+        self.assertEqual(expected, text.replace(" ", "").strip("\n"))
+
+    def test_compare_execution(self):
+        m1 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        m2 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                z = Mul(x, x)
+            }"""
+        )
+        res1, res2, align, dc = compare_onnx_execution(m1, m2)
+        text = dc.to_str(res1, res2, align)
+        self.assertIn("CAAA Constant", text)
+        self.assertEqual(len(align), 5)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_command_lines1.py b/_unittests/ut_xrun_doc/test_command_lines1.py
index 8aa17ee..02f84bd 100644
--- a/_unittests/ut_xrun_doc/test_command_lines1.py
+++ b/_unittests/ut_xrun_doc/test_command_lines1.py
@@ -14,6 +14,7 @@
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api._command_lines_parser import (
     get_main_parser,
+    get_parser_compare,
     get_parser_translate,
     main,
 )
@@ -70,6 +71,42 @@ def test_command_translate(self):
             code = st.getvalue()
             self.assertIn("start(opset=", code)
 
+    def test_parser_compare(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_compare().print_help()
+        text = st.getvalue()
+        self.assertIn("model1", text)
+
+    def test_command_compare(self):
+        X = make_tensor_value_info("X", TensorProto.FLOAT, [5, 6])
+        Y = make_tensor_value_info("Y", TensorProto.FLOAT, [5, 6])
+        Z = make_tensor_value_info("Z", TensorProto.FLOAT, [5, 6])
+        graph = make_graph(
+            [
+                make_node("Add", ["X", "Y"], ["res"]),
+                make_node("Cos", ["res"], ["Z"]),
+            ],
+            "g",
+            [X, Y],
+            [Z],
+        )
+        onnx_model = make_model(graph, opset_imports=[make_opsetid("", 18)])
+
+        with tempfile.TemporaryDirectory() as root:
+            model_file = os.path.join(root, "model.onnx")
+            with open(model_file, "wb") as f:
+                f.write(onnx_model.SerializeToString())
+
+            args = ["compare", "-m1", model_file, "-m2", model_file, "-v", "1"]
+            st = StringIO()
+            with redirect_stdout(st):
+                main(args)
+
+            code = st.getvalue()
+            self.assertIn("[compare_onnx_execution]", code)
+            self.assertIn("ADFF", code)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index 71f5a35..a180deb 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -14,12 +14,13 @@ def get_main_parser() -> ArgumentParser:
     )
     parser.add_argument(
         "cmd",
-        choices=["translate"],
+        choices=["translate", "compare"],
         help=dedent(
             """
         Selects a command.
         
-        'translate' exports an onnx graph into a piece of code replicating it.
+        'translate' exports an onnx graph into a piece of code replicating it,
+        'compares' compares the execution of two onnx models
         """
         ),
     )
@@ -65,8 +66,59 @@ def _cmd_translate(argv: List[Any]):
     print(code)
 
 
+def get_parser_compare() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="compare",
+        description=dedent(
+            """
+        Compares the execution of two onnx models.
+        """
+        ),
+        epilog="This is used when two models are different but should produce the same results.",
+    )
+    parser.add_argument(
+        "-m1",
+        "--model1",
+        type=str,
+        required=True,
+        help="first onnx model",
+    )
+    parser.add_argument(
+        "-m2",
+        "--model2",
+        type=str,
+        required=True,
+        help="second onnx model",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=0,
+        help="verbosity",
+    )
+    parser.add_argument(
+        "-c",
+        "--column-size",
+        default=50,
+        help="column size when displaying the results",
+    )
+    return parser
+
+
+def _cmd_compare(argv: List[Any]):
+    from .reference import compare_onnx_execution
+
+    parser = get_parser_compare()
+    args = parser.parse_args(argv[1:])
+    onx1 = onnx.load(args.model1)
+    onx2 = onnx.load(args.model2)
+    res1, res2, align, dc = compare_onnx_execution(onx1, onx2, verbose=args.verbose)
+    text = dc.to_str(res1, res2, align, column_size=args.column_size)
+    print(text)
+
+
 def main(argv: Optional[List[Any]] = None):
-    fcts = dict(translate=_cmd_translate)
+    fcts = dict(translate=_cmd_translate, compare=_cmd_compare)
 
     if argv is None:
         argv = sys.argv[1:]
diff --git a/onnx_array_api/reference/__init__.py b/onnx_array_api/reference/__init__.py
index d8c5aa5..fd1d27c 100644
--- a/onnx_array_api/reference/__init__.py
+++ b/onnx_array_api/reference/__init__.py
@@ -11,6 +11,13 @@
 )
 from onnx.reference.op_run import to_array_extended
 from .evaluator import ExtendedReferenceEvaluator
+from .evaluator_yield import (
+    DistanceExecution,
+    ResultExecution,
+    ResultType,
+    YieldEvaluator,
+    compare_onnx_execution,
+)
 
 
 def from_array_extended(tensor: np.array, name: Optional[str] = None) -> TensorProto:
diff --git a/onnx_array_api/reference/evaluator.py b/onnx_array_api/reference/evaluator.py
index e20be76..54f0c26 100644
--- a/onnx_array_api/reference/evaluator.py
+++ b/onnx_array_api/reference/evaluator.py
@@ -7,6 +7,7 @@
 from .ops.op_cast_like import CastLike_15, CastLike_19
 from .ops.op_concat import Concat
 from .ops.op_constant_of_shape import ConstantOfShape
+from .ops.op_fused_matmul import FusedMatMul
 
 
 logger = getLogger("onnx-array-api-eval")
@@ -32,6 +33,7 @@ class ExtendedReferenceEvaluator(ReferenceEvaluator):
         CastLike_15,
         CastLike_19,
         ConstantOfShape,
+        FusedMatMul,
     ]
 
     @staticmethod
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
new file mode 100644
index 0000000..3935913
--- /dev/null
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -0,0 +1,449 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Iterator, Optional, Tuple
+from enum import IntEnum
+import numpy as np
+from onnx import ModelProto, TensorProto, ValueInfoProto
+from .evaluator import ExtendedReferenceEvaluator
+
+
+def _align(res: str, limit: int) -> str:
+    if len(res) == limit:
+        return res
+    if len(res) > limit:
+        return res[:limit]
+    return res + " " * (limit - len(res))
+
+
+class ResultType(IntEnum):
+    RESULT = 1
+    INITIALIZER = 2
+    SPARSE_INITIALIZER = 4
+    INPUT = 8
+    OUTPUT = 16
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}.{self._name_}"
+
+
+@dataclass
+class ResultExecution:
+    """
+    The description of a result.
+    """
+
+    kind: ResultType
+    dtype: object
+    shape: tuple
+    summary: str
+    op_type: str
+    name: str
+
+    def __len__(self) -> int:
+        return 6
+
+    def __getitem__(self, i: int) -> Any:
+        if i == 0:
+            return self.kind
+        if i == 1:
+            return self.dtype
+        if i == 2:
+            return self.shape
+        if i == 3:
+            return self.summary
+        if i == 4:
+            return self.op_type
+        if i == 5:
+            return self.name
+        raise IndexError(f"i={i} out of boundary")
+
+    def __str__(self):
+        els = [
+            _align(self.kind._name_, 6),
+            _align(str(self.dtype).replace("dtype(", "").replace(")", ""), 8),
+            _align("x".join(map(str, self.shape)), 15),
+            self.summary,
+            _align(self.op_type or "", 10),
+            self.name or "",
+        ]
+        return " ".join(els)
+
+
+def make_summary(value: Any, length: int = 4, modulo: int = 26) -> str:
+    """
+    Create a short string summarizing the value (discretization).
+
+    :param value: array
+    :param length: number of value to produce
+    :param module: discretization parameter
+    :return: short string
+    """
+    value4 = np.zeros(length, dtype=np.float64)
+    if value.size <= length:
+        value4[: value.size] = value.flatten().astype(np.float64)
+    else:
+        if value.size % length != 0:
+            value2 = np.zeros(
+                value.size + length - value.size % length, dtype=np.float64
+            )
+            value2[: value.size] = value.flatten().astype(np.float64)
+        else:
+            value2 = value.flatten().astype(np.float64)
+        value4 = value2.reshape((4, -1)).sum(axis=1)
+    value4i = value4.astype(np.int64) % modulo
+    s = "".join([chr(65 + i) for i in value4i])
+    return s
+
+
+class YieldEvaluator:
+    """
+    This class implements method `enumerate_results` which iterates on
+    intermediates results. By default, it uses
+    :class:`onnx_array_api.reference.ExtendedReferenceEvaluator`.
+
+    :param onnx_model: model to run
+    :param recursive: dig into subgraph and functions as well
+    """
+
+    def __init__(
+        self,
+        onnx_model: ModelProto,
+        recursive: bool = False,
+        cls=ExtendedReferenceEvaluator,
+    ):
+        assert not recursive, "recursive=True is not yet implemented"
+        self.onnx_model = onnx_model
+        self.evaluator = cls(onnx_model) if cls is not None else None
+
+    def enumerate_results(
+        self,
+        output_names: Optional[List[str]] = None,
+        feed_inputs: Optional[Dict[str, Any]] = None,
+    ) -> Iterator[Tuple[ResultType, str, Any]]:
+        """
+        Executes the onnx model and enumerate all the intermediate results.
+
+        Args:
+            output_names: requested outputs by names, None for all
+            feed_inputs: dictionary `{ input name: input value }`
+
+        Returns:
+            iterator on tuple(result kind, name, value, node.op_type or None)
+        """
+        assert isinstance(self.evaluator, ExtendedReferenceEvaluator), (
+            f"This implementation only works with "
+            f"ExtendedReferenceEvaluator not {type(self.evaluator)}"
+        )
+        attributes = {}
+        if output_names is None:
+            output_names = self.evaluator.output_names
+
+        results = {"": None}
+        results.update(self.evaluator.rt_inits_)
+        results.update(feed_inputs)
+        # step 0: initializer
+        for k, v in self.evaluator.rt_inits_.items():
+            yield ResultType.INITIALIZER, k, v, None
+        # step 1: inputs
+        for k, v in feed_inputs.items():
+            yield ResultType.INPUT, k, v, None
+
+        # step 2: execute nodes
+        for node in self.evaluator.rt_nodes_:
+            for i in node.input:
+                if i not in results:
+                    raise RuntimeError(
+                        f"Unable to find input {i!r} in known results {sorted(results)}, "
+                        f"self.rt_inits_ has {sorted(self.evaluator.rt_inits_)}, "
+                        f"feed_inputs has {sorted(feed_inputs)}."
+                    )
+            inputs = [results[i] for i in node.input]
+            linked_attributes = {}
+            if node.has_linked_attribute and attributes:
+                linked_attributes["linked_attributes"] = attributes
+            if node.need_context():
+                outputs = node.run(*inputs, context=results, **linked_attributes)
+            else:
+                outputs = node.run(*inputs, **linked_attributes)
+            for name, value in zip(node.output, outputs):
+                yield ResultType.RESULT, name, value, node.op_type
+                results[name] = value
+
+        # step 3: outputs
+        for name in output_names:
+            if name not in results:
+                raise RuntimeError(
+                    f"Unable to find output name {name!r} in {sorted(results)}, proto is\n{self.proto_}"
+                )
+            yield ResultType.OUTPUT, name, results[name], None
+
+    def enumerate_summarized(
+        self,
+        output_names: Optional[List[str]] = None,
+        feed_inputs: Optional[Dict[str, Any]] = None,
+    ) -> Iterator[ResultExecution]:
+        """
+        Executes the onnx model and enumerate intermediate results without their names.
+
+        Args:
+            output_names: requested outputs by names, None for all
+            feed_inputs: dictionary `{ input name: input value }`
+
+        Returns:
+            iterator on tuple(result kind, node.type, dtype, shape, value, result name)
+        """
+        for kind, name, value, op_type in self.enumerate_results(
+            output_names, feed_inputs
+        ):
+            summary = make_summary(value)
+            yield ResultExecution(
+                kind, value.dtype, value.shape, summary, op_type, name
+            )
+
+
+class DistanceExecution:
+    """
+    Computes a distance between two results.
+    """
+
+    float_types = {
+        np.float16,
+        np.float32,
+        np.float64,
+        np.dtype("float16"),
+        np.dtype("float32"),
+        np.dtype("float64"),
+    }
+
+    def __init__(self, max_lag: int = 50):
+        self.kind_cost = 1000
+        self.type_cost = 10
+        self.rank_cost = 100
+        self.op_type_cost = 10
+        self.max_lag = max_lag
+        self.insert_cost = 1000
+
+    def distance_pair(self, r1: ResultExecution, r2: ResultExecution) -> float:
+        """
+        (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs"),
+
+        :param r1: first result
+        :param r2: second result
+        :return: distance
+        """
+        d = 0
+        if r1[0] != r2[0]:
+            # difference type
+            d += self.kind_cost
+        if r1[1] != r2[1]:
+            d += self._cost_type(r1[1], r2[1]) * self.type_cost
+        if r1[2] != r2[2]:
+            d += self._cost_shape(r1[2], r2[2])
+        if r1[3] != r2[3]:
+            d += self._cost_summary(r1[3], r2[3])
+        if r1[4] != r2[4]:
+            d += self.op_type_cost
+        return d
+
+    def _cost_type(self, t1: "np.dtype", t2: "np.dtype") -> float:
+        if t1 in self.float_types and t2 in self.float_types:
+            return 0.2
+        return 1
+
+    def _cost_shape(self, s1: Tuple[int, ...], s2: Tuple[int, ...]) -> float:
+        d = abs(np.prod(s1) - np.prod(s2))
+        if len(s1) != len(s2):
+            return self.rank_cost + d
+        for i, j in zip(s1, s2):
+            d += abs(i - j)
+        return d
+
+    def _cost_summary(self, s1: str, s2: str) -> float:
+        if len(s1) != len(s2):
+            return 1e6
+        d = 0
+        for a, b in zip(s1, s2):
+            d += abs(ord(a) - ord(b))
+        return d
+
+    def distance_sequence(
+        self, s1: List[ResultExecution], s2: List[ResultExecution]
+    ) -> Tuple[float, List[Tuple[int, int]]]:
+        """
+        Computes the distance between two sequences of results.
+
+        :param s1: first sequence
+        :param s2: second sequence
+        :return: distance and alignment
+        """
+        delay = self.max_lag
+        distance = {(-1, -1): 0}
+        predecessor = {(-1, -1): None}
+        for i in range(len(s1)):
+            for j in range(max(0, i - delay), min(len(s2), i + delay)):
+                best = 1e100
+                pred = None
+                ki, kj = i - 1, j - 1
+                if (ki, kj) in distance:
+                    d = distance[ki, kj] + self.distance_pair(s1[i], s2[j])
+                    if d < best:
+                        best = d
+                        pred = (ki, kj)
+                ki, kj = i - 1, j
+                if (ki, kj) in distance:
+                    d = distance[ki, kj] + self.insert_cost
+                    if d < best:
+                        best = d
+                        pred = (ki, kj)
+                ki, kj = i, j - 1
+                if (ki, kj) in distance:
+                    d = distance[ki, kj] + self.insert_cost
+                    if d < best:
+                        best = d
+                        pred = (ki, kj)
+                distance[i, j] = best
+                predecessor[i, j] = pred
+
+        # reverse
+        way = []
+        last = len(s1) - 1, len(s2) - 1
+        while last is not None:
+            way.append(last)
+            last = predecessor[last]
+        return distance[len(s1) - 1, len(s2) - 1], list(reversed(way))[1:]
+
+    def to_str(
+        self,
+        s1: List[ResultExecution],
+        s2: List[ResultExecution],
+        alignment: List[Tuple[int, int]],
+        column_size: int = 60,
+    ) -> str:
+        """
+        Prints out the alignment between two sequences into a string.
+        :param s1: first sequence
+        :param s2: second sequence
+        :param alignment: alignment
+        :param column_size: column size
+        :return: test
+        """
+        rows = []
+        last = -1, -1
+        for i, j in alignment:
+            assert i < len(s1), f"Unexpected value i={i} >= len(s1)={len(s1)}"
+            assert j < len(s2), f"Unexpected value i={j} >= len(s2)={len(s2)}"
+            expected = last[0] + 1, last[1] + 1
+
+            if expected == (i, j):
+                d1 = s1[i]
+                d2 = s2[j]
+                d = self.distance_pair(d1, d2)
+                symbol = "=" if d == 0 else "~"
+                rows.append(
+                    f"{symbol} | {_align(str(d1), column_size)} | {_align(str(d2), column_size)}"
+                )
+            elif i == last[0]:
+                d2 = s2[j]
+                rows.append(
+                    f"+ | {_align('', column_size)} | {_align(str(d2), column_size)} "
+                )
+            else:
+                d1 = s1[i]
+                rows.append(
+                    f"- | {_align(str(d1), column_size)} | {_align('', column_size)}"
+                )
+            last = i, j
+        return "\n".join(rows)
+
+
+def generate_input(info: ValueInfoProto) -> np.ndarray:
+    """
+    Generates one input.
+    """
+    elem_type = info.type.tensor_type.elem_type
+    shape = [
+        (getattr(d, "dim_value", None) or getattr(d, "dim_param"))
+        for d in info.type.tensor_type.shape.dim
+    ]
+    new_shape = []
+    for sh in shape:
+        if isinstance(sh, str):
+            if len(new_shape) == 0:
+                new_shape.append(1)
+            else:
+                new_shape.append(16)
+        else:
+            new_shape.append(sh)
+    new_shape = tuple(new_shape)
+    p = np.prod(new_shape)
+    value = np.arange(p)
+    if elem_type == TensorProto.INT32:
+        return value.astype(np.int32).reshape(new_shape)
+    if elem_type == TensorProto.INT64:
+        return value.astype(np.int64).reshape(new_shape)
+    if elem_type == TensorProto.FLOAT:
+        return (value.astype(np.float32) / p).astype(np.float32).reshape(new_shape)
+    if elem_type == TensorProto.FLOAT16:
+        return (value.astype(np.float16) / p).astype(np.float16).reshape(new_shape)
+    if elem_type == TensorProto.DOUBLE:
+        return (value.astype(np.float64) / p).astype(np.float64).reshape(new_shape)
+    raise RuntimeError(f"Unexpected element_type {elem_type} for info={info}")
+
+
+def generate_inputs(model: ModelProto) -> List[np.ndarray]:
+    """
+    Generates inputs for a specific model.
+
+    :param model: ModelProto
+    :return: list of inputs
+    """
+    inputs = []
+    inits = set(i.name for i in model.graph.initializer)
+    for inp in model.graph.input:
+        if inp.name in inits:
+            break
+        inputs.append(generate_input(inp))
+    return inputs
+
+
+def compare_onnx_execution(
+    model1: ModelProto,
+    model2: ModelProto,
+    inputs: Optional[List[Any]] = None,
+    verbose: int = 0,
+) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]:
+    """
+    Compares the execution of two onnx models.
+    The function assumes both models takes the same inputs.
+    See :ref:`l-onnx-diff-example` to see a full example using
+    this function.
+
+    :param model1: first model
+    :param model2: second model
+    :param inputs: inputs to use
+    :param verbose: verbosity
+    :return: four results, a sequence of results for the first model and the second model,
+        the alignment between the two, DistanceExecution
+    """
+    if verbose:
+        print("[compare_onnx_execution] generate inputs")
+    if inputs is None:
+        inputs = generate_inputs(model1)
+    feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)}
+    feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)}
+    if verbose:
+        print(f"[compare_onnx_execution] got {len(inputs)} inputs")
+        print("[compare_onnx_execution] execute first model")
+    res1 = list(YieldEvaluator(model1).enumerate_summarized(None, feeds1))
+    if verbose:
+        print(f"[compare_onnx_execution] got {len(res1)} results")
+        print("[compare_onnx_execution] execute second model")
+    res2 = list(YieldEvaluator(model2).enumerate_summarized(None, feeds2))
+    if verbose:
+        print(f"[compare_onnx_execution] got {len(res2)} results")
+        print("[compare_onnx_execution] compute edit distance")
+    dc = DistanceExecution()
+    _, align = dc.distance_sequence(res1, res2)
+    if verbose:
+        print(f"[compare_onnx_execution] got {len(align)} pairs")
+        print("[compare_onnx_execution] done")
+    return res1, res2, align, dc
diff --git a/onnx_array_api/reference/ops/op_fused_matmul.py b/onnx_array_api/reference/ops/op_fused_matmul.py
new file mode 100644
index 0000000..0f738c7
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_fused_matmul.py
@@ -0,0 +1,31 @@
+import numpy as np
+from onnx.reference.op_run import OpRun
+
+
+class FusedMatMul(OpRun):
+    op_domain = "com.microsoft"
+
+    def _run(
+        self,
+        A,
+        B,
+        alpha: float = 1,
+        transA: int = 0,
+        transB: int = 0,
+        transBatchA: int = 0,
+        transBatchB: int = 0,
+    ):
+        assert (
+            transBatchA == 0
+        ), f"Not implemented for transBatchA==1 and {A.shape}x{B.shape}"
+        assert (
+            transBatchB == 0
+        ), f"Not implemented for transBatchB==1 and {A.shape}x{B.shape}"
+        if transA:
+            dim = len(A.shape)
+            A = A.transpose(axes=(dim - 2, dim - 1))
+        if transB:
+            dim = len(B.shape)
+            B = B.transpose(axes=(dim - 2, dim - 1))
+        a = np.array(alpha, dtype=A.dtype)
+        return (A @ B * a,)

From 19e8a9e26b392600e912fb8111d356815732f45d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 6 Feb 2024 13:29:36 +0100
Subject: [PATCH 18/44] Add line number to the diff report (#72)

* update requirements

* add line numbers

* doc
---
 .../ut_reference/test_evaluator_yield.py      | 15 ++--
 onnx_array_api/reference/evaluator_yield.py   | 68 ++++++++++++-------
 2 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py
index 7181456..467e6f9 100644
--- a/_unittests/ut_reference/test_evaluator_yield.py
+++ b/_unittests/ut_reference/test_evaluator_yield.py
@@ -422,18 +422,19 @@ def test_distance_sequence_str(self):
         text = dc.to_str(s1, s2, align)
         self.assertIn("OUTPUT", text)
         expected = """
-            =|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA
-            =|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB
-            ~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX
-            -|RESULTfloat322x2CEIOExpH|
-            =|RESULTfloat322x2CEIOLinearRegrY1|RESULTfloat322x2CEIOLinearRegrY1
-            ~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ
-            ~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY
+            1=|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA
+            2=|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB
+            3~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX
+            4-|RESULTfloat322x2CEIOExpH|
+            5=|RESULTfloat322x2CEIOLinearRegrY1|RESULTfloat322x2CEIOLinearRegrY1
+            6~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ
+            7~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY
         """.replace(
             "            ", ""
         ).strip(
             "\n "
         )
+        self.maxDiff = None
         self.assertEqual(expected, text.replace(" ", "").strip("\n"))
 
     def test_compare_execution(self):
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 3935913..a5e4d4f 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -118,6 +118,7 @@ def enumerate_results(
         self,
         output_names: Optional[List[str]] = None,
         feed_inputs: Optional[Dict[str, Any]] = None,
+        raise_exc: bool = True,
     ) -> Iterator[Tuple[ResultType, str, Any]]:
         """
         Executes the onnx model and enumerate all the intermediate results.
@@ -148,6 +149,7 @@ def enumerate_results(
             yield ResultType.INPUT, k, v, None
 
         # step 2: execute nodes
+        yield_output = True
         for node in self.evaluator.rt_nodes_:
             for i in node.input:
                 if i not in results:
@@ -160,39 +162,48 @@ def enumerate_results(
             linked_attributes = {}
             if node.has_linked_attribute and attributes:
                 linked_attributes["linked_attributes"] = attributes
-            if node.need_context():
-                outputs = node.run(*inputs, context=results, **linked_attributes)
-            else:
-                outputs = node.run(*inputs, **linked_attributes)
+
+            try:
+                if node.need_context():
+                    outputs = node.run(*inputs, context=results, **linked_attributes)
+                else:
+                    outputs = node.run(*inputs, **linked_attributes)
+            except Exception:
+                if raise_exc:
+                    raise
+                yield_output = False
+                break
+
             for name, value in zip(node.output, outputs):
                 yield ResultType.RESULT, name, value, node.op_type
                 results[name] = value
 
         # step 3: outputs
-        for name in output_names:
-            if name not in results:
-                raise RuntimeError(
-                    f"Unable to find output name {name!r} in {sorted(results)}, proto is\n{self.proto_}"
-                )
-            yield ResultType.OUTPUT, name, results[name], None
+        if yield_output:
+            for name in output_names:
+                if name not in results:
+                    raise RuntimeError(
+                        f"Unable to find output name {name!r} in {sorted(results)}, proto is\n{self.proto_}"
+                    )
+                yield ResultType.OUTPUT, name, results[name], None
 
     def enumerate_summarized(
         self,
         output_names: Optional[List[str]] = None,
         feed_inputs: Optional[Dict[str, Any]] = None,
+        raise_exc: bool = True,
     ) -> Iterator[ResultExecution]:
         """
         Executes the onnx model and enumerate intermediate results without their names.
 
-        Args:
-            output_names: requested outputs by names, None for all
-            feed_inputs: dictionary `{ input name: input value }`
-
-        Returns:
-            iterator on tuple(result kind, node.type, dtype, shape, value, result name)
+        :param output_names: requested outputs by names, None for all
+        :param feed_inputs: dictionary `{ input name: input value }`
+        :param raise_exc: raises an exception if the execution fails or stop
+            where it is
+        :return: iterator on ResultExecution
         """
         for kind, name, value, op_type in self.enumerate_results(
-            output_names, feed_inputs
+            output_names, feed_inputs, raise_exc=raise_exc
         ):
             summary = make_summary(value)
             yield ResultExecution(
@@ -328,6 +339,7 @@ def to_str(
         """
         rows = []
         last = -1, -1
+        row_index = 1
         for i, j in alignment:
             assert i < len(s1), f"Unexpected value i={i} >= len(s1)={len(s1)}"
             assert j < len(s2), f"Unexpected value i={j} >= len(s2)={len(s2)}"
@@ -338,20 +350,18 @@ def to_str(
                 d2 = s2[j]
                 d = self.distance_pair(d1, d2)
                 symbol = "=" if d == 0 else "~"
-                rows.append(
-                    f"{symbol} | {_align(str(d1), column_size)} | {_align(str(d2), column_size)}"
-                )
+                line = f"{symbol} | {_align(str(d1), column_size)} | {_align(str(d2), column_size)}"
             elif i == last[0]:
                 d2 = s2[j]
-                rows.append(
+                line = (
                     f"+ | {_align('', column_size)} | {_align(str(d2), column_size)} "
                 )
             else:
                 d1 = s1[i]
-                rows.append(
-                    f"- | {_align(str(d1), column_size)} | {_align('', column_size)}"
-                )
+                line = f"- | {_align(str(d1), column_size)} | {_align('', column_size)}"
+            rows.append(f"{row_index: 3d} {line}")
             last = i, j
+            row_index += 1
         return "\n".join(rows)
 
 
@@ -410,6 +420,7 @@ def compare_onnx_execution(
     model2: ModelProto,
     inputs: Optional[List[Any]] = None,
     verbose: int = 0,
+    raise_exc: bool = True,
 ) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]:
     """
     Compares the execution of two onnx models.
@@ -421,6 +432,7 @@ def compare_onnx_execution(
     :param model2: second model
     :param inputs: inputs to use
     :param verbose: verbosity
+    :param raise_exc: raise exception if the execution fails or stop at the error
     :return: four results, a sequence of results for the first model and the second model,
         the alignment between the two, DistanceExecution
     """
@@ -433,11 +445,15 @@ def compare_onnx_execution(
     if verbose:
         print(f"[compare_onnx_execution] got {len(inputs)} inputs")
         print("[compare_onnx_execution] execute first model")
-    res1 = list(YieldEvaluator(model1).enumerate_summarized(None, feeds1))
+    res1 = list(
+        YieldEvaluator(model1).enumerate_summarized(None, feeds1, raise_exc=raise_exc)
+    )
     if verbose:
         print(f"[compare_onnx_execution] got {len(res1)} results")
         print("[compare_onnx_execution] execute second model")
-    res2 = list(YieldEvaluator(model2).enumerate_summarized(None, feeds2))
+    res2 = list(
+        YieldEvaluator(model2).enumerate_summarized(None, feeds2, raise_exc=raise_exc)
+    )
     if verbose:
         print(f"[compare_onnx_execution] got {len(res2)} results")
         print("[compare_onnx_execution] compute edit distance")

From f5d9ed1796a879b49acd340663e9acecf7d1e0e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:37:17 +0100
Subject: [PATCH 19/44] Better error messages (#73)

* update requirements

* refactoring

* fix edit distance

* remove prints

* improves robustness

* fix remaining issue

* disable failing tests
---
 _unittests/onnx-numpy-skips.txt               |  1 +
 _unittests/ut_reference/test_array_tensor.py  | 26 +-------
 _unittests/ut_reference/test_reference_ops.py | 64 +++++++++++++++++++
 onnx_array_api/reference/evaluator.py         |  3 +
 onnx_array_api/reference/evaluator_yield.py   | 30 +++++++--
 .../reference/ops/op_fused_matmul.py          | 14 ++--
 6 files changed, 101 insertions(+), 37 deletions(-)
 create mode 100644 _unittests/ut_reference/test_reference_ops.py

diff --git a/_unittests/onnx-numpy-skips.txt b/_unittests/onnx-numpy-skips.txt
index bf91e86..d47cefd 100644
--- a/_unittests/onnx-numpy-skips.txt
+++ b/_unittests/onnx-numpy-skips.txt
@@ -4,6 +4,7 @@
 array_api_tests/test_creation_functions.py::test_asarray_arrays
 array_api_tests/test_creation_functions.py::test_empty
 array_api_tests/test_creation_functions.py::test_empty_like
+array_api_tests/test_creation_functions.py::test_eye
 # fails to precision issue
 array_api_tests/test_creation_functions.py::test_linspace
 array_api_tests/test_creation_functions.py::test_meshgrid
diff --git a/_unittests/ut_reference/test_array_tensor.py b/_unittests/ut_reference/test_array_tensor.py
index f13c3e5..59fe5f1 100644
--- a/_unittests/ut_reference/test_array_tensor.py
+++ b/_unittests/ut_reference/test_array_tensor.py
@@ -1,13 +1,7 @@
 import unittest
 import numpy as np
 from onnx import TensorProto
-from onnx.helper import (
-    make_graph,
-    make_model,
-    make_node,
-    make_tensor_value_info,
-    make_opsetid,
-)
+from onnx.helper import make_graph, make_model, make_node, make_tensor_value_info
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.reference import (
     to_array_extended,
@@ -57,24 +51,6 @@ def make_model_f8(fr, to):
                         back = from_array_extended(got, "a")
                         self.assertEqual(to, back.data_type)
 
-    def test_fused_matmul(self):
-        model = make_model(
-            make_graph(
-                [make_node("FusedMatMul", ["X", "Y"], ["Z"], domain="com.microsoft")],
-                "name",
-                [
-                    make_tensor_value_info("X", TensorProto.FLOAT, None),
-                    make_tensor_value_info("Y", TensorProto.FLOAT, None),
-                ],
-                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
-            ),
-            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
-        )
-        ref = ExtendedReferenceEvaluator(model)
-        a = np.arange(4).reshape(-1, 2)
-        got = ref.run(None, {"X": a, "Y": a})
-        self.assertEqualArray(a @ a, got[0])
-
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_reference/test_reference_ops.py b/_unittests/ut_reference/test_reference_ops.py
new file mode 100644
index 0000000..6a44d64
--- /dev/null
+++ b/_unittests/ut_reference/test_reference_ops.py
@@ -0,0 +1,64 @@
+import unittest
+import numpy as np
+from onnx import TensorProto
+from onnx.helper import (
+    make_graph,
+    make_model,
+    make_node,
+    make_tensor_value_info,
+    make_opsetid,
+)
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.reference import ExtendedReferenceEvaluator
+
+
+class TestReferenceOps(ExtTestCase):
+
+    def test_fused_matmul(self):
+        model = make_model(
+            make_graph(
+                [make_node("FusedMatMul", ["X", "Y"], ["Z"], domain="com.microsoft")],
+                "name",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, None),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, None),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+        )
+        ref = ExtendedReferenceEvaluator(model)
+        a = np.arange(4).reshape(-1, 2)
+        got = ref.run(None, {"X": a, "Y": a})
+        self.assertEqualArray(a @ a, got[0])
+
+    def test_fused_matmul11(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node(
+                        "FusedMatMul",
+                        ["X", "Y"],
+                        ["Z"],
+                        transA=1,
+                        transB=1,
+                        domain="com.microsoft",
+                    )
+                ],
+                "name",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, None),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, None),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+        )
+        ref = ExtendedReferenceEvaluator(model)
+        a = np.arange(4).reshape(-1, 2)
+        got = ref.run(None, {"X": a, "Y": a})
+        self.assertEqualArray(a.T @ a.T, got[0])
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnx_array_api/reference/evaluator.py b/onnx_array_api/reference/evaluator.py
index 54f0c26..e6ab25f 100644
--- a/onnx_array_api/reference/evaluator.py
+++ b/onnx_array_api/reference/evaluator.py
@@ -110,4 +110,7 @@ def run(self, *args, **kwargs):
         """
         See :meth:`onnx.reference.ReferenceEvaluator.run`.
         """
+        if len(args) == 1 and isinstance(args[0], list):
+            feeds = dict(zip(self.input_names, args[0]))
+            return self.run(None, feeds, **kwargs)
         return ReferenceEvaluator.run(self, *args, **kwargs)
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index a5e4d4f..40ec97f 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Any, Dict, List, Iterator, Optional, Tuple
+from typing import Any, Dict, List, Iterator, Optional, Tuple, Union
 from enum import IntEnum
 import numpy as np
 from onnx import ModelProto, TensorProto, ValueInfoProto
@@ -77,6 +77,12 @@ def make_summary(value: Any, length: int = 4, modulo: int = 26) -> str:
     :param module: discretization parameter
     :return: short string
     """
+    if isinstance(value, np.float32):
+        # This should not happen.
+        value = np.array(value)
+    assert isinstance(
+        value, np.ndarray
+    ), f"Unexpected type {type(value)} for value, it must be a numpy array."
     value4 = np.zeros(length, dtype=np.float64)
     if value.size <= length:
         value4[: value.size] = value.flatten().astype(np.float64)
@@ -170,6 +176,9 @@ def enumerate_results(
                     outputs = node.run(*inputs, **linked_attributes)
             except Exception:
                 if raise_exc:
+                    # ExtendedReferenceEvaluator(self.onnx_model, verbose=10).run(
+                    #   None, feed_inputs
+                    # )
                     raise
                 yield_output = False
                 break
@@ -286,12 +295,12 @@ def distance_sequence(
         :param s2: second sequence
         :return: distance and alignment
         """
-        delay = self.max_lag
+        delay = max(self.max_lag, abs(len(s2) - len(s1)) + 1)
         distance = {(-1, -1): 0}
         predecessor = {(-1, -1): None}
         for i in range(len(s1)):
             for j in range(max(0, i - delay), min(len(s2), i + delay)):
-                best = 1e100
+                best = distance.get((i, j), 1e100)
                 pred = None
                 ki, kj = i - 1, j - 1
                 if (ki, kj) in distance:
@@ -418,7 +427,7 @@ def generate_inputs(model: ModelProto) -> List[np.ndarray]:
 def compare_onnx_execution(
     model1: ModelProto,
     model2: ModelProto,
-    inputs: Optional[List[Any]] = None,
+    inputs: Optional[Union[List[Any], Tuple[Dict[str, Any]]]] = None,
     verbose: int = 0,
     raise_exc: bool = True,
 ) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]:
@@ -430,7 +439,8 @@ def compare_onnx_execution(
 
     :param model1: first model
     :param model2: second model
-    :param inputs: inputs to use
+    :param inputs: inputs to use, a list of inputs if both models have
+        the same number of inputs or two dictionaries, one for each model
     :param verbose: verbosity
     :param raise_exc: raise exception if the execution fails or stop at the error
     :return: four results, a sequence of results for the first model and the second model,
@@ -440,8 +450,14 @@ def compare_onnx_execution(
         print("[compare_onnx_execution] generate inputs")
     if inputs is None:
         inputs = generate_inputs(model1)
-    feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)}
-    feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)}
+    if isinstance(inputs, tuple):
+        assert len(inputs) == 2, f"Unexpected number  {len(inputs)} of inputs."
+        feeds1, feeds2 = inputs
+    else:
+        feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)}
+        feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)}
+    assert isinstance(feeds1, dict), f"Unexpected type {type(feeds1)} for inputs"
+    assert isinstance(feeds2, dict), f"Unexpected type {type(feeds2)} for inputs"
     if verbose:
         print(f"[compare_onnx_execution] got {len(inputs)} inputs")
         print("[compare_onnx_execution] execute first model")
diff --git a/onnx_array_api/reference/ops/op_fused_matmul.py b/onnx_array_api/reference/ops/op_fused_matmul.py
index 0f738c7..1ee0f04 100644
--- a/onnx_array_api/reference/ops/op_fused_matmul.py
+++ b/onnx_array_api/reference/ops/op_fused_matmul.py
@@ -22,10 +22,14 @@ def _run(
             transBatchB == 0
         ), f"Not implemented for transBatchB==1 and {A.shape}x{B.shape}"
         if transA:
-            dim = len(A.shape)
-            A = A.transpose(axes=(dim - 2, dim - 1))
+            perm = list(range(len(A.shape)))
+            dim = len(perm)
+            perm[dim - 2], perm[dim - 1] = perm[dim - 1], perm[dim - 2]
+            A = np.transpose(A, perm)
         if transB:
-            dim = len(B.shape)
-            B = B.transpose(axes=(dim - 2, dim - 1))
+            perm = list(range(len(B.shape)))
+            dim = len(perm)
+            perm[dim - 2], perm[dim - 1] = perm[dim - 1], perm[dim - 2]
+            B = np.transpose(B, perm)
         a = np.array(alpha, dtype=A.dtype)
-        return (A @ B * a,)
+        return (np.matmul(A, B) * a,)

From a070da3340e8a498dfdabd5aa5460f97160e85c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Thu, 8 Feb 2024 17:32:13 +0100
Subject: [PATCH 20/44] Update readme (#74)

* update requirements

* improves readme

* improve documentation

* documentation

* fix format
---
 README.rst                                    |  9 ++--
 _doc/examples/plot_onnx_diff.py               |  1 +
 _doc/index.rst                                |  1 +
 _doc/long_outputs.rst                         | 50 +++++++++++++++++++
 _unittests/onnx-numpy-skips.txt               |  1 +
 .../ut_reference/test_evaluator_yield.py      | 14 +++---
 onnx_array_api/reference/evaluator_yield.py   |  2 +-
 7 files changed, 65 insertions(+), 13 deletions(-)
 create mode 100644 _doc/long_outputs.rst

diff --git a/README.rst b/README.rst
index fa6cdb7..f7b387f 100644
--- a/README.rst
+++ b/README.rst
@@ -31,6 +31,10 @@ onnx-array-api: APIs to create ONNX Graphs
 
 **onnx-array-api** implements APIs to create custom ONNX graphs.
 The objective is to speed up the implementation of converter libraries.
+The library is released on
+`pypi/onnx-array-api <https://pypi.org/project/onnx-array-api/>`_
+and its documentation is published at
+`APIs to create ONNX Graphs <https://sdpython.github.io/doc/onnx-array-api/dev/>`_.
 
 Numpy API
 +++++++++
@@ -147,11 +151,6 @@ The euclidean distance looks like the following:
         .to_onnx()
     )    
 
-The library is released on
-`pypi/onnx-array-api <https://pypi.org/project/onnx-array-api/>`_
-and its documentation is published at
-`APIs to create ONNX Graphs <https://sdpython.github.io/doc/onnx-array-api/dev/>`_.
-
 GraphBuilder API
 ++++++++++++++++
 
diff --git a/_doc/examples/plot_onnx_diff.py b/_doc/examples/plot_onnx_diff.py
index 7a5f1d3..7b6ecdf 100644
--- a/_doc/examples/plot_onnx_diff.py
+++ b/_doc/examples/plot_onnx_diff.py
@@ -64,5 +64,6 @@
 print(text)
 
 ###############################
+# See :ref:`l-long-output-compare_onnx_execution` for a better view.
 # The display shows that ReduceSumSquare was replaced by Mul + ReduceSum,
 # and ReduceLogSumExp by ReduceMax + Sub + Exp + Log + Add.
diff --git a/_doc/index.rst b/_doc/index.rst
index b81be4f..f9a07e5 100644
--- a/_doc/index.rst
+++ b/_doc/index.rst
@@ -45,6 +45,7 @@ The objective is to speed up the implementation of converter libraries.
 
     CHANGELOGS
     license
+    long_outputs
 
 Sources available on
 `github/onnx-array-api <https://github.com/sdpython/onnx-array-api>`_.
diff --git a/_doc/long_outputs.rst b/_doc/long_outputs.rst
new file mode 100644
index 0000000..64c0b84
--- /dev/null
+++ b/_doc/long_outputs.rst
@@ -0,0 +1,50 @@
+:hide-toc:
+
+==========================
+Long outputs uneasy to see
+==========================
+
+.. contents::
+    :local:
+
+onnx
+====
+
+.. _l-long-output-compare_onnx_execution:
+
+onnx_array_api.reference.compare_onnx_execution
++++++++++++++++++++++++++++++++++++++++++++++++
+
+From example :ref:`l-onnx-diff-example` for function
+:func:`onnx_array_api.reference.compare_onnx_execution`.
+See also `raw rendering <https://github.com/sdpython/onnx-array-api/blob/main/_doc/long_outputs.rst#onnx_array_apireferencecompare_onnx_execution>`_.
+
+::
+
+     1 = | INITIA float64  1               HAAA            Ad_Addcst    | INITIA float64  1               HAAA            Ad_Addcst   
+     2 = | INITIA float64  4x4             ADZF            Ge_Gemmcst   | INITIA float64  4x4             ADZF            Ge_Gemmcst  
+     3 = | INITIA float64  4               USEA            Ge_Gemmcst1  | INITIA float64  4               USEA            Ge_Gemmcst1 
+     4 = | INITIA float64  1               AAAA            Mu_Mulcst    | INITIA float64  1               AAAA            Mu_Mulcst   
+     5 = | INITIA float64  1               DAAA            Ad_Addcst1   | INITIA float64  1               DAAA            Ad_Addcst1  
+     6 = | INITIA float64  1               AAAA            Ad_Addcst2   | INITIA float64  1               AAAA            Ad_Addcst2  
+     7 = | INPUT  float64  1x4             AAAA            X            | INPUT  float64  1x4             AAAA            X           
+     8 = | RESULT float64  1x4             UTFC Gemm       Ge_Y0        | RESULT float64  1x4             UTFC Gemm       Ge_Y0       
+     9 + |                                                              | RESULT float64  1x4             TIEG Mul        Mu_C01       
+    10 ~ | RESULT float64  1x1             NAAA ReduceSumS Re_reduced0  | RESULT float64  1x1             NAAA ReduceSum  Re_reduced0 
+    11 = | RESULT float64  1x1             NAAA Concat     Co_concat_re | RESULT float64  1x1             NAAA Concat     Co_concat_re
+    12 = | RESULT float64  1x1             UAAA Add        Ad_C02       | RESULT float64  1x1             UAAA Add        Ad_C02      
+    13 = | RESULT float64  1x1             DAAA Mul        Mu_C0        | RESULT float64  1x1             DAAA Mul        Mu_C0       
+    14 = | RESULT float64  1x1             GAAA Add        Ad_C01       | RESULT float64  1x1             GAAA Add        Ad_C01      
+    15 = | RESULT float64  1x1             GAAA Add        Ad_C0        | RESULT float64  1x1             GAAA Add        Ad_C0       
+    16 = | RESULT int64    1x1             AAAA ArgMax     label        | RESULT int64    1x1             AAAA ArgMax     label       
+    17 + |                                                              | RESULT float64  1x1             GAAA ReduceMax  Re_reduced03 
+    18 + |                                                              | RESULT float64  1x1             AAAA Sub        Su_C01       
+    19 + |                                                              | RESULT float64  1x1             BAAA Exp        Ex_output0   
+    20 + |                                                              | RESULT float64  1x1             BAAA ReduceSum  Re_reduced02 
+    21 + |                                                              | RESULT float64  1x1             AAAA Log        Lo_output0   
+    22 ~ | RESULT float64  1x1             GAAA ReduceLogS score_sample | RESULT float64  1x1             GAAA Add        score_sample
+    23 = | RESULT float64  1x1             AAAA Sub        Su_C0        | RESULT float64  1x1             AAAA Sub        Su_C0       
+    24 = | RESULT float64  1x1             BAAA Exp        probabilitie | RESULT float64  1x1             BAAA Exp        probabilitie
+    25 = | OUTPUT int64    1x1             AAAA            label        | OUTPUT int64    1x1             AAAA            label       
+    26 = | OUTPUT float64  1x1             BAAA            probabilitie | OUTPUT float64  1x1             BAAA            probabilitie
+    27 = | OUTPUT float64  1x1             GAAA            score_sample | OUTPUT float64  1x1             GAAA            score_sample    
diff --git a/_unittests/onnx-numpy-skips.txt b/_unittests/onnx-numpy-skips.txt
index d47cefd..1d46bbb 100644
--- a/_unittests/onnx-numpy-skips.txt
+++ b/_unittests/onnx-numpy-skips.txt
@@ -1,6 +1,7 @@
 # API failures
 # see https://github.com/data-apis/array-api-tests/blob/master/numpy-skips.txt
 # uses __setitem__
+array_api_tests/test_creation_functions.py::test_arange
 array_api_tests/test_creation_functions.py::test_asarray_arrays
 array_api_tests/test_creation_functions.py::test_empty
 array_api_tests/test_creation_functions.py::test_empty_like
diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py
index 467e6f9..e7f6817 100644
--- a/_unittests/ut_reference/test_evaluator_yield.py
+++ b/_unittests/ut_reference/test_evaluator_yield.py
@@ -422,13 +422,13 @@ def test_distance_sequence_str(self):
         text = dc.to_str(s1, s2, align)
         self.assertIn("OUTPUT", text)
         expected = """
-            1=|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA
-            2=|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB
-            3~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX
-            4-|RESULTfloat322x2CEIOExpH|
-            5=|RESULTfloat322x2CEIOLinearRegrY1|RESULTfloat322x2CEIOLinearRegrY1
-            6~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ
-            7~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY
+            001=|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA
+            002=|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB
+            003~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX
+            004-|RESULTfloat322x2CEIOExpH|
+            005=|RESULTfloat322x2CEIOLinearRegrY1|RESULTfloat322x2CEIOLinearRegrY1
+            006~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ
+            007~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY
         """.replace(
             "            ", ""
         ).strip(
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 40ec97f..df171a6 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -368,7 +368,7 @@ def to_str(
             else:
                 d1 = s1[i]
                 line = f"- | {_align(str(d1), column_size)} | {_align('', column_size)}"
-            rows.append(f"{row_index: 3d} {line}")
+            rows.append(f"{row_index:03d} {line}")
             last = i, j
             row_index += 1
         return "\n".join(rows)

From 7675869e2776644bdd87f570b2e48f08a30cbe62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:00:24 +0100
Subject: [PATCH 21/44] Extend ExtendedReferenceEvaluator (#75)

* update requirements

* add more operator to the reference evaluator

* extend unit test copverage
---
 CHANGELOGS.rst                                |  1 +
 _unittests/ut_reference/test_reference_ops.py | 82 ++++++++++++++++
 onnx_array_api/reference/evaluator.py         |  7 ++
 .../reference/ops/op_memcpy_host.py           | 11 +++
 onnx_array_api/reference/ops/op_quick_gelu.py | 23 +++++
 .../reference/ops/op_scatter_elements.py      | 98 +++++++++++++++++++
 6 files changed, 222 insertions(+)
 create mode 100644 onnx_array_api/reference/ops/op_memcpy_host.py
 create mode 100644 onnx_array_api/reference/ops/op_quick_gelu.py
 create mode 100644 onnx_array_api/reference/ops/op_scatter_elements.py

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index d0b6445..e139c0a 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.2.0
 +++++
 
+* :pr:`75`: add QuickGelu to ExtendedReferenceEvaluator
 * :pr:`71`: adds tools to compare two onnx graphs
 * :pr:`61`: adds function to plot onnx model as graphs
 * :pr:`60`: supports translation of local functions
diff --git a/_unittests/ut_reference/test_reference_ops.py b/_unittests/ut_reference/test_reference_ops.py
index 6a44d64..9ae6fec 100644
--- a/_unittests/ut_reference/test_reference_ops.py
+++ b/_unittests/ut_reference/test_reference_ops.py
@@ -59,6 +59,88 @@ def test_fused_matmul11(self):
         got = ref.run(None, {"X": a, "Y": a})
         self.assertEqualArray(a.T @ a.T, got[0])
 
+    def test_memcpy(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node("MemcpyToHost", ["X"], ["Z"]),
+                    make_node("MemcpyFromHost", ["X"], ["Z"]),
+                ],
+                "name",
+                [make_tensor_value_info("X", TensorProto.FLOAT, None)],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+            ir_version=9,
+        )
+        a = np.arange(4).reshape(-1, 2).astype(np.float32)
+        ref = ExtendedReferenceEvaluator(model)
+        got = ref.run(None, {"X": a})
+        self.assertEqualArray(a, got[0])
+
+    def test_quick_gelu(self):
+        from onnxruntime import InferenceSession
+
+        for alpha in [0.0, 2.0]:
+            model = make_model(
+                make_graph(
+                    [
+                        make_node(
+                            "QuickGelu",
+                            ["X"],
+                            ["Z"],
+                            domain="com.microsoft",
+                            alpha=alpha,
+                        )
+                    ],
+                    "name",
+                    [make_tensor_value_info("X", TensorProto.FLOAT, None)],
+                    [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+                ),
+                opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+                ir_version=9,
+            )
+            sess = InferenceSession(
+                model.SerializeToString(), providers=["CPUExecutionProvider"]
+            )
+            a = np.arange(4).reshape(-1, 2).astype(np.float32)
+            expected = sess.run(None, {"X": a})
+            ref = ExtendedReferenceEvaluator(model)
+            got = ref.run(None, {"X": a})
+            self.assertEqualArray(expected[0], got[0])
+
+    def test_scatter_elements(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node(
+                        "ScatterElements",
+                        ["data", "indices", "updates"],
+                        ["Z"],
+                        axis=3,
+                        reduction="add",
+                    )
+                ],
+                "name",
+                [
+                    make_tensor_value_info("data", TensorProto.FLOAT, None),
+                    make_tensor_value_info("indices", TensorProto.INT64, None),
+                    make_tensor_value_info("updates", TensorProto.FLOAT, None),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18)],
+        )
+        data = np.zeros(2**4, dtype=np.float32).reshape((2, 2, 2, 2))
+        indices = np.array([[[[0]]]], dtype=np.int64)
+        updates = np.array([[[[1]]]], dtype=np.float32)
+        y = np.array(
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32
+        ).reshape((2, 2, 2, 2))
+        ref = ExtendedReferenceEvaluator(model)
+        got = ref.run(None, {"data": data, "indices": indices, "updates": updates})
+        self.assertEqualArray(y, got[0])
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/reference/evaluator.py b/onnx_array_api/reference/evaluator.py
index e6ab25f..89b5a84 100644
--- a/onnx_array_api/reference/evaluator.py
+++ b/onnx_array_api/reference/evaluator.py
@@ -8,6 +8,9 @@
 from .ops.op_concat import Concat
 from .ops.op_constant_of_shape import ConstantOfShape
 from .ops.op_fused_matmul import FusedMatMul
+from .ops.op_memcpy_host import MemcpyFromHost, MemcpyToHost
+from .ops.op_quick_gelu import QuickGelu
+from .ops.op_scatter_elements import ScatterElements
 
 
 logger = getLogger("onnx-array-api-eval")
@@ -34,6 +37,10 @@ class ExtendedReferenceEvaluator(ReferenceEvaluator):
         CastLike_19,
         ConstantOfShape,
         FusedMatMul,
+        MemcpyFromHost,
+        MemcpyToHost,
+        QuickGelu,
+        ScatterElements,
     ]
 
     @staticmethod
diff --git a/onnx_array_api/reference/ops/op_memcpy_host.py b/onnx_array_api/reference/ops/op_memcpy_host.py
new file mode 100644
index 0000000..ac365e7
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_memcpy_host.py
@@ -0,0 +1,11 @@
+from onnx.reference.op_run import OpRun
+
+
+class MemcpyFromHost(OpRun):
+    def _run(self, x):
+        return (x,)
+
+
+class MemcpyToHost(OpRun):
+    def _run(self, x):
+        return (x,)
diff --git a/onnx_array_api/reference/ops/op_quick_gelu.py b/onnx_array_api/reference/ops/op_quick_gelu.py
new file mode 100644
index 0000000..e30c5ec
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_quick_gelu.py
@@ -0,0 +1,23 @@
+import numpy as np
+from onnx.reference.op_run import OpRun
+
+
+def sigmoid(x):  # type: ignore
+    if x > 0:
+        return 1 / (1 + np.exp(-x))
+    return np.exp(x) / (1 + np.exp(x))
+
+
+class QuickGelu(OpRun):
+    op_domain = "com.microsoft"
+
+    def __init__(self, onnx_node, run_params):  # type: ignore
+        OpRun.__init__(self, onnx_node, run_params)
+        self.vf = np.vectorize(sigmoid)
+
+    def _run(self, X, alpha=1.0):
+        if len(X.shape) == 0:
+            return ((X * sigmoid(X * alpha)).astype(X.dtype),)
+        if X.size == 0:
+            return (X,)
+        return ((X * self.vf(X * alpha)).astype(X.dtype),)
diff --git a/onnx_array_api/reference/ops/op_scatter_elements.py b/onnx_array_api/reference/ops/op_scatter_elements.py
new file mode 100644
index 0000000..c4b0efa
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_scatter_elements.py
@@ -0,0 +1,98 @@
+import numpy as np
+
+from onnx.reference.op_run import OpRun
+
+
+def scatter_elements(data, indices, updates, axis=0, reduction=None):  # type: ignore
+    if reduction == "add":
+
+        def f(x, y):
+            return x + y
+
+    elif reduction == "min":
+
+        def f(x, y):
+            return min(x, y)
+
+    elif reduction == "max":
+
+        def f(x, y):
+            return max(x, y)
+
+    else:
+
+        def f(x, y):
+            return y
+
+    if axis < 0:
+        axis = data.ndim + axis
+
+    if len(data.shape) == 1 and axis == 0:
+        scattered = np.copy(data)
+        for pos, up in zip(indices, updates):
+            scattered[pos] = f(scattered[pos], up)
+        return scattered
+
+    if len(indices.shape) == 2:
+        scattered = np.copy(data)
+        if axis == 0:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    scattered[indices[i, j], j] = f(
+                        scattered[indices[i, j], j], updates[i, j]
+                    )
+        else:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    scattered[i, indices[i, j]] = f(
+                        scattered[i, indices[i, j]], updates[i, j]
+                    )
+        return scattered
+
+    if len(indices.shape) == 3:
+        scattered = np.copy(data)
+        if axis == 0:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    for k in range(indices.shape[2]):
+                        scattered[indices[i, j, k], j, k] = f(
+                            scattered[indices[i, j, k], j, k], updates[i, j, k]
+                        )
+        elif axis == 1:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    for k in range(indices.shape[2]):
+                        scattered[i, indices[i, j, k], k] = f(
+                            scattered[i, indices[i, j, k], k], updates[i, j, k]
+                        )
+        elif axis == 2:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    for k in range(indices.shape[2]):
+                        scattered[i, j, indices[i, j, k]] = f(
+                            scattered[i, j, indices[i, j, k]], updates[i, j, k]
+                        )
+        return scattered
+
+    if len(indices.shape) == 4:
+        scattered = np.copy(data)
+        if axis == 3:
+            for a in range(indices.shape[0]):
+                for i in range(indices.shape[1]):
+                    for j in range(indices.shape[2]):
+                        for k in range(indices.shape[3]):
+                            scattered[a, i, j, indices[a, i, j, k]] = f(
+                                scattered[a, i, j, indices[a, i, j, k]],
+                                updates[a, i, j, k],
+                            )
+            return scattered
+
+    raise RuntimeError(
+        f"Not implemented for indices.shape={indices.shape} and axis={axis}"
+    )
+
+
+class ScatterElements(OpRun):
+    def _run(self, data, indices, updates, axis=None, reduction=None):  # type: ignore
+        res = scatter_elements(data, indices, updates, axis=axis, reduction=reduction)
+        return (res,)

From 4cf9dcc573774013bc322fc0ea74610dca32eaea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Wed, 21 Feb 2024 13:31:52 +0100
Subject: [PATCH 22/44] Adds a mode to compare models without execution (#76)

* update requirements

* Add a mode to compare model without execution

* changelogs

* improve initializer

* fix display

* fix side
---
 CHANGELOGS.rst                                |   1 +
 .../ut_reference/test_evaluator_yield.py      |  78 ++++++-
 onnx_array_api/_command_lines_parser.py       |  17 +-
 onnx_array_api/reference/evaluator_yield.py   | 197 +++++++++++++++---
 4 files changed, 255 insertions(+), 38 deletions(-)

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index e139c0a..13c81ab 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.2.0
 +++++
 
+* :pr:`76`: add a mode to compare models without execution
 * :pr:`75`: add QuickGelu to ExtendedReferenceEvaluator
 * :pr:`71`: adds tools to compare two onnx graphs
 * :pr:`61`: adds function to plot onnx model as graphs
diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py
index e7f6817..83fabe2 100644
--- a/_unittests/ut_reference/test_evaluator_yield.py
+++ b/_unittests/ut_reference/test_evaluator_yield.py
@@ -1,6 +1,7 @@
 import unittest
 import numpy as np
 from onnx import TensorProto
+from onnx.checker import check_model
 from onnx.helper import (
     make_function,
     make_graph,
@@ -9,6 +10,7 @@
     make_opsetid,
     make_tensor_value_info,
 )
+from onnx.numpy_helper import from_array
 from onnx.parser import parse_model
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.reference import (
@@ -422,13 +424,13 @@ def test_distance_sequence_str(self):
         text = dc.to_str(s1, s2, align)
         self.assertIn("OUTPUT", text)
         expected = """
-            001=|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA
-            002=|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB
-            003~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX
-            004-|RESULTfloat322x2CEIOExpH|
-            005=|RESULTfloat322x2CEIOLinearRegrY1|RESULTfloat322x2CEIOLinearRegrY1
-            006~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ
-            007~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY
+            001=|INPUTfloat322:2x2ABCDA|INPUTfloat322:2x2ABCDA
+            002=|INPUTfloat322:2x2ABCDB|INPUTfloat322:2x2ABCDB
+            003~|INPUTfloat322:2x3ABCDX|INPUTfloat322:2x2ABCDX
+            004-|RESULTfloat322:2x2CEIOExpH|
+            005=|RESULTfloat322:2x2CEIOLinearRegressioY1|RESULTfloat322:2x2CEIOLinearRegressioY1
+            006~|RESULTfloat322:2x2CEIOAbsY|RESULTfloat322:2x3CEIPAbsZ
+            007~|OUTPUTfloat322:2x2CEIOY|OUTPUTfloat322:2x2CEIPY
         """.replace(
             "            ", ""
         ).strip(
@@ -460,6 +462,68 @@ def test_compare_execution(self):
         self.assertIn("CAAA Constant", text)
         self.assertEqual(len(align), 5)
 
+    def test_no_execution(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node("Unsqueeze", ["X", "zero"], ["xu1"]),
+                    make_node("Unsqueeze", ["xu1", "un"], ["xu2"]),
+                    make_node("Reshape", ["xu2", "shape1"], ["xm1"]),
+                    make_node("Reshape", ["Y", "shape2"], ["xm2c"]),
+                    make_node("Cast", ["xm2c"], ["xm2"], to=1),
+                    make_node("MatMul", ["xm1", "xm2"], ["xm"]),
+                    make_node("Reshape", ["xm", "shape3"], ["Z"]),
+                ],
+                "dummy",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, [32, 128]),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, [3, 5, 128, 64]),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, [3, 5, 32, "N"])],
+                [
+                    from_array(np.array([0], dtype=np.int64), name="zero"),
+                    from_array(np.array([1], dtype=np.int64), name="un"),
+                    from_array(np.array([1, 32, 128], dtype=np.int64), name="shape1"),
+                    from_array(np.array([15, 128, 64], dtype=np.int64), name="shape2"),
+                    from_array(np.array([3, 5, 32, 64], dtype=np.int64), name="shape3"),
+                ],
+            )
+        )
+        check_model(model)
+        res1, res2, align, dc = compare_onnx_execution(model, model, mode="nodes")
+        text = dc.to_str(res1, res2, align)
+        self.assertIn("012 = | NODE", text)
+
+        model2 = make_model(
+            make_graph(
+                [
+                    make_node("Unsqueeze", ["X", "zero"], ["xu1"]),
+                    make_node("Unsqueeze", ["xu1", "un"], ["xu2"]),
+                    make_node("Reshape", ["xu2", "shape1"], ["xm1"]),
+                    make_node("Reshape", ["Y", "shape2"], ["xm2c"]),
+                    make_node("MatMul", ["xm1", "xm2c"], ["xm"]),
+                    make_node("Reshape", ["xm", "shape3"], ["Z"]),
+                ],
+                "dummy",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, [32, 128]),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, [3, 5, 128, 64]),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, [3, 5, 32, "N"])],
+                [
+                    from_array(np.array([0], dtype=np.int64), name="zero"),
+                    from_array(np.array([1], dtype=np.int64), name="un"),
+                    from_array(np.array([1, 32, 128], dtype=np.int64), name="shape1"),
+                    from_array(np.array([15, 128, 64], dtype=np.int64), name="shape2"),
+                    from_array(np.array([3, 5, 32, 64], dtype=np.int64), name="shape3"),
+                ],
+            )
+        )
+        check_model(model2)
+        res1, res2, align, dc = compare_onnx_execution(model, model2, mode="nodes")
+        text = dc.to_str(res1, res2, align)
+        self.assertIn("012 = | NODE", text)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index a180deb..0450977 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -20,7 +20,7 @@ def get_main_parser() -> ArgumentParser:
         Selects a command.
         
         'translate' exports an onnx graph into a piece of code replicating it,
-        'compares' compares the execution of two onnx models
+        'compare' compares the execution of two onnx models
         """
         ),
     )
@@ -90,6 +90,13 @@ def get_parser_compare() -> ArgumentParser:
         required=True,
         help="second onnx model",
     )
+    parser.add_argument(
+        "-m",
+        "--mode",
+        choices=["execute", "nodes"],
+        default="execute",
+        help="compare the execution ('execute') or the nodes only ('nodes')",
+    )
     parser.add_argument(
         "-v",
         "--verbose",
@@ -112,8 +119,10 @@ def _cmd_compare(argv: List[Any]):
     args = parser.parse_args(argv[1:])
     onx1 = onnx.load(args.model1)
     onx2 = onnx.load(args.model2)
-    res1, res2, align, dc = compare_onnx_execution(onx1, onx2, verbose=args.verbose)
-    text = dc.to_str(res1, res2, align, column_size=args.column_size)
+    res1, res2, align, dc = compare_onnx_execution(
+        onx1, onx2, verbose=args.verbose, mode=args.mode
+    )
+    text = dc.to_str(res1, res2, align, column_size=int(args.column_size))
     print(text)
 
 
@@ -127,7 +136,7 @@ def main(argv: Optional[List[Any]] = None):
             parser = get_main_parser()
             parser.parse_args(argv)
         else:
-            parsers = dict(translate=get_parser_translate)
+            parsers = dict(translate=get_parser_translate, compare=get_parser_compare)
             cmd = argv[0]
             if cmd not in parsers:
                 raise ValueError(
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index df171a6..7942d8f 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -2,7 +2,10 @@
 from typing import Any, Dict, List, Iterator, Optional, Tuple, Union
 from enum import IntEnum
 import numpy as np
-from onnx import ModelProto, TensorProto, ValueInfoProto
+from onnx import ModelProto, TensorProto, ValueInfoProto, load
+from onnx.helper import tensor_dtype_to_np_dtype
+from onnx.shape_inference import infer_shapes
+from . import to_array_extended
 from .evaluator import ExtendedReferenceEvaluator
 
 
@@ -20,11 +23,28 @@ class ResultType(IntEnum):
     SPARSE_INITIALIZER = 4
     INPUT = 8
     OUTPUT = 16
+    NODE = 32
 
     def __repr__(self):
         return f"{self.__class__.__name__}.{self._name_}"
 
 
+def _dimension_to_str(d):
+    if isinstance(d, int):
+        return str(d)
+    try:
+        int(d)
+    except ValueError:
+        return d
+    return f"{d!r}"
+
+
+def _rank_to_str(shape):
+    if shape:
+        return f"{len(shape)}:"
+    return "  "
+
+
 @dataclass
 class ResultExecution:
     """
@@ -57,12 +77,19 @@ def __getitem__(self, i: int) -> Any:
         raise IndexError(f"i={i} out of boundary")
 
     def __str__(self):
+        dtype = self.dtype if self.dtype != 0 else ""
         els = [
             _align(self.kind._name_, 6),
-            _align(str(self.dtype).replace("dtype(", "").replace(")", ""), 8),
-            _align("x".join(map(str, self.shape)), 15),
+            _align(str(dtype).replace("dtype(", "").replace(")", ""), 8),
+            _rank_to_str(self.shape)
+            + _align(
+                "x".join(
+                    "" if self.shape is None else map(_dimension_to_str, self.shape)
+                ),
+                18,
+            ),
             self.summary,
-            _align(self.op_type or "", 10),
+            _align(self.op_type or "", 15),
             self.name or "",
         ]
         return " ".join(els)
@@ -270,6 +297,22 @@ def _cost_type(self, t1: "np.dtype", t2: "np.dtype") -> float:
         return 1
 
     def _cost_shape(self, s1: Tuple[int, ...], s2: Tuple[int, ...]) -> float:
+        if s1 is None or s2 is None:
+            return self.rank_cost
+        if any(map(lambda s: isinstance(s, str), s1)) or any(
+            map(lambda s: isinstance(s, str), s2)
+        ):
+            # dynamic shapes
+            if len(s1) != len(s2):
+                return self.rank_cost
+            d = 0
+            for i, j in zip(s1, s2):
+                if isinstance(i, int) and isinstance(j, int):
+                    d += abs(i - j)
+                elif i != j:
+                    d += self.rank_cost / 2
+            return d
+
         d = abs(np.prod(s1) - np.prod(s2))
         if len(s1) != len(s2):
             return self.rank_cost + d
@@ -424,12 +467,90 @@ def generate_inputs(model: ModelProto) -> List[np.ndarray]:
     return inputs
 
 
+def _update_shape_types_with_proto(
+    proto: ModelProto,
+) -> Dict[str, Tuple[int, Tuple[Union[int, str], ...]]]:
+    """
+    Retrieves the shapes and types for a model.
+    """
+    assert isinstance(proto, ModelProto), f"Unexpected type {type(proto)} for proto"
+    res = {}
+
+    for val in proto.graph.input:
+        itype = val.type.tensor_type.elem_type
+        shape = tuple(
+            d.dim_param if d.dim_param else d.dim_value
+            for d in val.type.tensor_type.shape.dim
+        )
+        res[val.name] = [itype, shape]
+
+    for val in proto.graph.output:
+        itype = val.type.tensor_type.elem_type
+        shape = tuple(
+            d.dim_param if d.dim_param else d.dim_value
+            for d in val.type.tensor_type.shape.dim
+        )
+        res[val.name] = [itype, shape]
+
+    for val in proto.graph.initializer:
+        itype = val.data_type
+        shape = tuple(d for d in val.dims)
+        res[val.name] = [itype, shape]
+
+    new_proto = infer_shapes(proto)
+    for val in new_proto.graph.value_info:
+        itype = val.type.tensor_type.elem_type
+        shape = tuple(
+            d.dim_param if d.dim_param else d.dim_value
+            for d in val.type.tensor_type.shape.dim
+        )
+        res[val.name] = [itype, shape]
+
+    return res
+
+
+def _enumerate_result_no_execution(model: ModelProto) -> Iterator[ResultType]:
+    """
+    Produces a list of results based on a model in order to
+    trigger the edit distance comparison.
+    """
+    type_shape = _update_shape_types_with_proto(model)
+    for i in model.graph.initializer:
+        itype, shape = type_shape.get(i.name, (0, None))
+        dtype = tensor_dtype_to_np_dtype(itype)
+        yield ResultExecution(
+            ResultType.INITIALIZER,
+            dtype,
+            shape,
+            make_summary(to_array_extended(i)),
+            "INIT",
+            i.name,
+        )
+    for i in model.graph.input:
+        itype, shape = type_shape.get(i.name, (0, None))
+        dtype = tensor_dtype_to_np_dtype(itype)
+        yield ResultExecution(ResultType.INPUT, dtype, shape, "????", "INPUT", i.name)
+    for node in model.graph.node:
+        yield ResultExecution(ResultType.NODE, 0, None, "????", node.op_type, node.name)
+        for o in node.output:
+            itype, shape = type_shape.get(o, (0, None))
+            dtype = 0 if itype == 0 else tensor_dtype_to_np_dtype(itype)
+            yield ResultExecution(
+                ResultType.RESULT, dtype, shape, "????", node.op_type, o
+            )
+    for i in model.graph.output:
+        itype, shape = type_shape.get(i.name, (0, None))
+        dtype = tensor_dtype_to_np_dtype(itype)
+        yield ResultExecution(ResultType.OUTPUT, dtype, shape, "????", "OUTPUT", i.name)
+
+
 def compare_onnx_execution(
     model1: ModelProto,
     model2: ModelProto,
     inputs: Optional[Union[List[Any], Tuple[Dict[str, Any]]]] = None,
     verbose: int = 0,
     raise_exc: bool = True,
+    mode: str = "execute",
 ) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]:
     """
     Compares the execution of two onnx models.
@@ -443,33 +564,55 @@ def compare_onnx_execution(
         the same number of inputs or two dictionaries, one for each model
     :param verbose: verbosity
     :param raise_exc: raise exception if the execution fails or stop at the error
+    :param mode: the model should be executed but the function can be executed
+        but the comparison may append on nodes only
     :return: four results, a sequence of results for the first model and the second model,
         the alignment between the two, DistanceExecution
     """
-    if verbose:
-        print("[compare_onnx_execution] generate inputs")
-    if inputs is None:
-        inputs = generate_inputs(model1)
-    if isinstance(inputs, tuple):
-        assert len(inputs) == 2, f"Unexpected number  {len(inputs)} of inputs."
-        feeds1, feeds2 = inputs
+    assert mode in {"execute", "nodes"}, f"Unexpected value for mode={mode!r}."
+
+    if mode == "execute":
+        if inputs is None:
+            if verbose:
+                print("[compare_onnx_execution] generate inputs")
+            inputs = generate_inputs(model1)
+        if isinstance(inputs, tuple):
+            assert len(inputs) == 2, f"Unexpected number {len(inputs)} of inputs."
+            feeds1, feeds2 = inputs
+        else:
+            feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)}
+            feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)}
+        assert isinstance(feeds1, dict), f"Unexpected type {type(feeds1)} for inputs"
+        assert isinstance(feeds2, dict), f"Unexpected type {type(feeds2)} for inputs"
+        if verbose:
+            print(f"[compare_onnx_execution] execute with {len(inputs)} inputs")
+            print("[compare_onnx_execution] execute first model")
+        res1 = list(
+            YieldEvaluator(model1).enumerate_summarized(
+                None, feeds1, raise_exc=raise_exc
+            )
+        )
+        if verbose:
+            print(f"[compare_onnx_execution] got {len(res1)} results")
+            print("[compare_onnx_execution] execute second model")
+        res2 = list(
+            YieldEvaluator(model2).enumerate_summarized(
+                None, feeds2, raise_exc=raise_exc
+            )
+        )
+    elif mode == "nodes":
+        # No execution.
+        if verbose:
+            print("[compare_onnx_execution] loading first model")
+        proto1 = load(model1) if isinstance(model1, str) else model1
+        if verbose:
+            print("[compare_onnx_execution] loading first model")
+        proto2 = load(model2) if isinstance(model2, str) else model2
+        res1 = list(_enumerate_result_no_execution(proto1))
+        res2 = list(_enumerate_result_no_execution(proto2))
     else:
-        feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)}
-        feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)}
-    assert isinstance(feeds1, dict), f"Unexpected type {type(feeds1)} for inputs"
-    assert isinstance(feeds2, dict), f"Unexpected type {type(feeds2)} for inputs"
-    if verbose:
-        print(f"[compare_onnx_execution] got {len(inputs)} inputs")
-        print("[compare_onnx_execution] execute first model")
-    res1 = list(
-        YieldEvaluator(model1).enumerate_summarized(None, feeds1, raise_exc=raise_exc)
-    )
-    if verbose:
-        print(f"[compare_onnx_execution] got {len(res1)} results")
-        print("[compare_onnx_execution] execute second model")
-    res2 = list(
-        YieldEvaluator(model2).enumerate_summarized(None, feeds2, raise_exc=raise_exc)
-    )
+        return
+
     if verbose:
         print(f"[compare_onnx_execution] got {len(res2)} results")
         print("[compare_onnx_execution] compute edit distance")

From 2dd068672d352ed1761d889c071a9bac02790663 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Thu, 22 Feb 2024 15:55:26 +0100
Subject: [PATCH 23/44] Add ConstantOfShape to light API (#77)

* update requirements

* Add ConstantOfShape to light API
---
 _unittests/ut_light_api/test_light_api.py | 14 +++++++++++++-
 onnx_array_api/light_api/__init__.py      |  4 +++-
 onnx_array_api/light_api/_op_var.py       |  7 +++++++
 onnx_array_api/light_api/model.py         |  5 +++++
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/_unittests/ut_light_api/test_light_api.py b/_unittests/ut_light_api/test_light_api.py
index 6b22ae9..0483354 100644
--- a/_unittests/ut_light_api/test_light_api.py
+++ b/_unittests/ut_light_api/test_light_api.py
@@ -2,7 +2,7 @@
 import unittest
 from typing import Callable, Optional
 import numpy as np
-from onnx import GraphProto, ModelProto
+from onnx import GraphProto, ModelProto, TensorProto
 from onnx.defs import (
     get_all_schemas_with_history,
     onnx_opset_version,
@@ -526,6 +526,18 @@ def test_input_shape(self):
         i = str(model.graph.input[0]).replace("\n", "").replace(" ", "")
         self.assertNotIn("shape{}", i)
 
+    def test_constant_of_shape(self):
+        onx = (
+            start()
+            .vin("X", TensorProto.INT64, shape=[None, None])
+            .ConstantOfShape()
+            .vout(shape=[])
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx)
+        got = ref.run(None, {"X": np.array([2, 3], dtype=np.int64)})[0]
+        self.assertEqualArray(np.zeros((2, 3), dtype=np.float32), got)
+
 
 if __name__ == "__main__":
     TestLightApi().test_add()
diff --git a/onnx_array_api/light_api/__init__.py b/onnx_array_api/light_api/__init__.py
index 3fe9489..83e8878 100644
--- a/onnx_array_api/light_api/__init__.py
+++ b/onnx_array_api/light_api/__init__.py
@@ -8,12 +8,14 @@
 def start(
     opset: Optional[int] = None,
     opsets: Optional[Dict[str, int]] = None,
+    ir_version: Optional[int] = None,
 ) -> OnnxGraph:
     """
     Starts an onnx model.
 
     :param opset: main opset version
     :param opsets: others opsets as a dictionary
+    :param ir_version: specify the ir_version as well
     :return: an instance of :class:`onnx_array_api.light_api.OnnxGraph`
 
     A very simple model:
@@ -45,7 +47,7 @@ def start(
         )
         print(onx)
     """
-    return OnnxGraph(opset=opset, opsets=opsets)
+    return OnnxGraph(opset=opset, opsets=opsets, ir_version=ir_version)
 
 
 def g() -> OnnxGraph:
diff --git a/onnx_array_api/light_api/_op_var.py b/onnx_array_api/light_api/_op_var.py
index 27a04d1..3a74ed2 100644
--- a/onnx_array_api/light_api/_op_var.py
+++ b/onnx_array_api/light_api/_op_var.py
@@ -1,4 +1,6 @@
 from typing import List, Optional, Union
+import numpy as np
+from ..reference import from_array_extended
 from ..annotations import AI_ONNX_ML, domain
 
 
@@ -69,6 +71,11 @@ def Cast(self, saturate: int = 1, to: int = 0) -> "Var":
     def Celu(self, alpha: float = 1.0) -> "Var":
         return self.make_node("Celu", self, alpha=alpha)
 
+    def ConstantOfShape(self, value: Optional[np.array] = None) -> "Var":
+        if value is None:
+            return self.make_node("ConstantOfShape", self)
+        return self.make_node("ConstantOfShape", self, value=from_array_extended(value))
+
     def DepthToSpace(self, blocksize: int = 0, mode: str = "DCR") -> "Var":
         return self.make_node("DepthToSpace", self, blocksize=blocksize, mode=mode)
 
diff --git a/onnx_array_api/light_api/model.py b/onnx_array_api/light_api/model.py
index 5a7eef5..25194ac 100644
--- a/onnx_array_api/light_api/model.py
+++ b/onnx_array_api/light_api/model.py
@@ -42,6 +42,7 @@ class OnnxGraph:
 
     :param opset: main opset version
     :param opsets: other opsets as a dictionary
+    :param ir_version: to specify an ir_version
     :param is_function: a :class:`onnx.ModelProto` or a :class:`onnx.FunctionProto`
     """
 
@@ -49,6 +50,7 @@ def __init__(
         self,
         opset: Optional[int] = None,
         opsets: Optional[Dict[str, int]] = None,
+        ir_version: Optional[int] = None,
         proto_type: ProtoType = ProtoType.MODEL,
     ):
         if opsets is not None and "" in opsets:
@@ -65,6 +67,7 @@ def __init__(
         self.proto_type = proto_type
         self.opsets = opsets
         self.opset = opset
+        self.ir_version = ir_version
         self.nodes: List[Union[NodeProto, TensorProto]] = []
         self.inputs: List[ValueInfoProto] = []
         self.outputs: List[ValueInfoProto] = []
@@ -402,6 +405,8 @@ def to_onnx(self) -> GRAPH_PROTO:
             # If no opsets, it a subgraph, not a model.
             return graph
         model = make_model(graph, opset_imports=opsets)
+        if self.ir_version:
+            model.ir_version = ir_version
         if not is_windows() or not is_azure():
             # check_model fails sometimes on Windows
             check_model(model)

From a906010230dd09c3b54530488e4ade2a3ee6f457 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Thu, 22 Feb 2024 17:21:03 +0100
Subject: [PATCH 24/44] Documentation (#78)

* update requirements

* Add ConstantOfShape to light API

* add slice

* changelogs

* k
---
 CHANGELOGS.rst                            |  1 +
 _unittests/ut_light_api/test_light_api.py | 30 ++++++++++++++++++++++-
 onnx_array_api/light_api/_op_var.py       |  7 ++++++
 onnx_array_api/light_api/model.py         |  2 +-
 4 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 13c81ab..9f22a80 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.2.0
 +++++
 
+* :pr:`77`: supports ConcatOfShape and Slice with the light API
 * :pr:`76`: add a mode to compare models without execution
 * :pr:`75`: add QuickGelu to ExtendedReferenceEvaluator
 * :pr:`71`: adds tools to compare two onnx graphs
diff --git a/_unittests/ut_light_api/test_light_api.py b/_unittests/ut_light_api/test_light_api.py
index 0483354..e14896a 100644
--- a/_unittests/ut_light_api/test_light_api.py
+++ b/_unittests/ut_light_api/test_light_api.py
@@ -538,7 +538,35 @@ def test_constant_of_shape(self):
         got = ref.run(None, {"X": np.array([2, 3], dtype=np.int64)})[0]
         self.assertEqualArray(np.zeros((2, 3), dtype=np.float32), got)
 
+    def test_constant_of_shape_value(self):
+        onx = (
+            start()
+            .vin("X", TensorProto.INT64, shape=[None, None])
+            .ConstantOfShape(value=np.array([1], dtype=np.float32))
+            .vout(shape=[])
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx)
+        got = ref.run(None, {"X": np.array([2, 3], dtype=np.int64)})[0]
+        self.assertEqualArray(np.ones((2, 3), dtype=np.float32), got)
+
+    def test_slice(self):
+        onx = (
+            start(opset=18, ir_version=9)
+            .cst(np.array([1], dtype=np.int64), name="one")
+            .cst(np.array([2], dtype=np.int64), name="two")
+            .vin("X", TensorProto.INT64, shape=[None, None])
+            .ConstantOfShape(value=np.array([1], dtype=np.float32))
+            .rename("CX")
+            .bring("CX", "one", "two", "one")
+            .Slice()
+            .vout(shape=[])
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx)
+        got = ref.run(None, {"X": np.array([2, 3], dtype=np.int64)})[0]
+        self.assertEqualArray(np.ones((2, 1), dtype=np.float32), got)
+
 
 if __name__ == "__main__":
-    TestLightApi().test_add()
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/light_api/_op_var.py b/onnx_array_api/light_api/_op_var.py
index 3a74ed2..1291594 100644
--- a/onnx_array_api/light_api/_op_var.py
+++ b/onnx_array_api/light_api/_op_var.py
@@ -314,6 +314,13 @@ def Selu(
     def Shrink(self, bias: float = 0.0, lambd: float = 0.5) -> "Var":
         return self.make_node("Shrink", self, bias=bias, lambd=lambd)
 
+    def Slice(
+        self, starts: "Var", ends: "Var", axes: "Var", steps: Optional["Var"] = None
+    ) -> "Var":
+        if steps is None:
+            return self.make_node("Slice", self, starts, ends, axes)
+        return self.make_node("Slice", self, starts, ends, axes, steps)
+
     def Softmax(self, axis: int = -1) -> "Var":
         return self.make_node("Softmax", self, axis=axis)
 
diff --git a/onnx_array_api/light_api/model.py b/onnx_array_api/light_api/model.py
index 25194ac..6478c4d 100644
--- a/onnx_array_api/light_api/model.py
+++ b/onnx_array_api/light_api/model.py
@@ -406,7 +406,7 @@ def to_onnx(self) -> GRAPH_PROTO:
             return graph
         model = make_model(graph, opset_imports=opsets)
         if self.ir_version:
-            model.ir_version = ir_version
+            model.ir_version = self.ir_version
         if not is_windows() or not is_azure():
             # check_model fails sometimes on Windows
             check_model(model)

From dcc2ddd0f3e00785b8dc0604e54d8412ceb76ea8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Wed, 28 Feb 2024 17:27:06 +0100
Subject: [PATCH 25/44] Add discrepancies when comparing the execution of two
 models (#79)

* update requirements

* add discrepancies figures

* fix command line

* doc
---
 CHANGELOGS.rst                                |  2 +-
 .../ut_reference/test_evaluator_yield.py      | 25 ++++++++++
 onnx_array_api/_command_lines_parser.py       | 14 +++++-
 onnx_array_api/reference/evaluator_yield.py   | 49 ++++++++++++++++---
 4 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 9f22a80..f6feee7 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -5,7 +5,7 @@ Change Logs
 +++++
 
 * :pr:`77`: supports ConcatOfShape and Slice with the light API
-* :pr:`76`: add a mode to compare models without execution
+* :pr:`76`, :pr:`79`: add a mode to compare models without execution
 * :pr:`75`: add QuickGelu to ExtendedReferenceEvaluator
 * :pr:`71`: adds tools to compare two onnx graphs
 * :pr:`61`: adds function to plot onnx model as graphs
diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py
index 83fabe2..605c1f8 100644
--- a/_unittests/ut_reference/test_evaluator_yield.py
+++ b/_unittests/ut_reference/test_evaluator_yield.py
@@ -462,6 +462,31 @@ def test_compare_execution(self):
         self.assertIn("CAAA Constant", text)
         self.assertEqual(len(align), 5)
 
+    def test_compare_execution_discrepancies(self):
+        m1 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        m2 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                z = Mul(x, x)
+            }"""
+        )
+        res1, res2, align, dc = compare_onnx_execution(m1, m2, keep_tensor=True)
+        text = dc.to_str(res1, res2, align)
+        print(text)
+        self.assertIn("CAAA Constant", text)
+        self.assertIn("| a=", text)
+        self.assertIn(" r=", text)
+
     def test_no_execution(self):
         model = make_model(
             make_graph(
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index 0450977..15ee153 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -106,9 +106,15 @@ def get_parser_compare() -> ArgumentParser:
     parser.add_argument(
         "-c",
         "--column-size",
-        default=50,
+        default=60,
         help="column size when displaying the results",
     )
+    parser.add_argument(
+        "-d",
+        "--discrepancies",
+        default=0,
+        help="show precise discrepancies when mode is execution",
+    )
     return parser
 
 
@@ -120,7 +126,11 @@ def _cmd_compare(argv: List[Any]):
     onx1 = onnx.load(args.model1)
     onx2 = onnx.load(args.model2)
     res1, res2, align, dc = compare_onnx_execution(
-        onx1, onx2, verbose=args.verbose, mode=args.mode
+        onx1,
+        onx2,
+        verbose=args.verbose,
+        mode=args.mode,
+        keep_tensor=args.discrepancies in (1, "1", "True", True),
     )
     text = dc.to_str(res1, res2, align, column_size=int(args.column_size))
     print(text)
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 7942d8f..f9f587f 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -57,6 +57,7 @@ class ResultExecution:
     summary: str
     op_type: str
     name: str
+    value: Optional[Any] = None
 
     def __len__(self) -> int:
         return 6
@@ -122,9 +123,11 @@ def make_summary(value: Any, length: int = 4, modulo: int = 26) -> str:
         else:
             value2 = value.flatten().astype(np.float64)
         value4 = value2.reshape((4, -1)).sum(axis=1)
-    value4i = value4.astype(np.int64) % modulo
-    s = "".join([chr(65 + i) for i in value4i])
-    return s
+    value4 = np.where(np.abs(value4) < 1e10, value4, np.nan)
+    s = []
+    for v in value4:
+        s.append("?" if np.isnan(v) else (chr(65 + int(v) % modulo)))
+    return "".join(s)
 
 
 class YieldEvaluator:
@@ -228,6 +231,7 @@ def enumerate_summarized(
         output_names: Optional[List[str]] = None,
         feed_inputs: Optional[Dict[str, Any]] = None,
         raise_exc: bool = True,
+        keep_tensor: bool = False,
     ) -> Iterator[ResultExecution]:
         """
         Executes the onnx model and enumerate intermediate results without their names.
@@ -236,6 +240,7 @@ def enumerate_summarized(
         :param feed_inputs: dictionary `{ input name: input value }`
         :param raise_exc: raises an exception if the execution fails or stop
             where it is
+        :param keep_tensor:keep the tensor in order to compute precise distances
         :return: iterator on ResultExecution
         """
         for kind, name, value, op_type in self.enumerate_results(
@@ -243,10 +248,32 @@ def enumerate_summarized(
         ):
             summary = make_summary(value)
             yield ResultExecution(
-                kind, value.dtype, value.shape, summary, op_type, name
+                kind,
+                value.dtype,
+                value.shape,
+                summary,
+                op_type,
+                name,
+                value=value if keep_tensor else None,
             )
 
 
+def discrepancies(
+    expected: np.ndarray, value: np.ndarray, eps: float = 1e-7
+) -> Dict[str, float]:
+    """
+    Computes absolute error and relative error between two matrices.
+    """
+    assert (
+        expected.size == value.size
+    ), f"Incompatible shapes v1.shape={expected.shape}, v2.shape={value.shape}"
+    expected = expected.ravel().astype(np.float32)
+    value = value.ravel().astype(np.float32)
+    diff = np.abs(expected - value)
+    rel = diff / (np.abs(expected) + eps)
+    return dict(aerr=float(diff.max()), rerr=float(rel.max()))
+
+
 class DistanceExecution:
     """
     Computes a distance between two results.
@@ -403,6 +430,14 @@ def to_str(
                 d = self.distance_pair(d1, d2)
                 symbol = "=" if d == 0 else "~"
                 line = f"{symbol} | {_align(str(d1), column_size)} | {_align(str(d2), column_size)}"
+                if (
+                    d1.value is not None
+                    and d2.value is not None
+                    and d1.value.size == d2.value.size
+                ):
+                    disc = discrepancies(d1.value, d2.value)
+                    a, r = disc["aerr"], disc["rerr"]
+                    line += f" | a={a:.3f} r={r:.3f}"
             elif i == last[0]:
                 d2 = s2[j]
                 line = (
@@ -551,6 +586,7 @@ def compare_onnx_execution(
     verbose: int = 0,
     raise_exc: bool = True,
     mode: str = "execute",
+    keep_tensor: bool = False,
 ) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]:
     """
     Compares the execution of two onnx models.
@@ -566,6 +602,7 @@ def compare_onnx_execution(
     :param raise_exc: raise exception if the execution fails or stop at the error
     :param mode: the model should be executed but the function can be executed
         but the comparison may append on nodes only
+    :param keep_tensor: keeps the tensor in order to compute a precise distance
     :return: four results, a sequence of results for the first model and the second model,
         the alignment between the two, DistanceExecution
     """
@@ -589,7 +626,7 @@ def compare_onnx_execution(
             print("[compare_onnx_execution] execute first model")
         res1 = list(
             YieldEvaluator(model1).enumerate_summarized(
-                None, feeds1, raise_exc=raise_exc
+                None, feeds1, raise_exc=raise_exc, keep_tensor=keep_tensor
             )
         )
         if verbose:
@@ -597,7 +634,7 @@ def compare_onnx_execution(
             print("[compare_onnx_execution] execute second model")
         res2 = list(
             YieldEvaluator(model2).enumerate_summarized(
-                None, feeds2, raise_exc=raise_exc
+                None, feeds2, raise_exc=raise_exc, keep_tensor=keep_tensor
             )
         )
     elif mode == "nodes":

From 492b6d4ce3662513a11b01fc9f40f1c3bfa0141a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Fri, 1 Mar 2024 16:19:43 +0100
Subject: [PATCH 26/44] Fix documentation (#81)

* update requirements

* fix names in to_dot

* doc

* fix doc

* doc

* fix doc

* doc
---
 _doc/api/plotting.rst                       |  2 ++
 _doc/conf.py                                |  1 +
 _doc/tutorial/onnx_api.rst                  |  6 +++-
 onnx_array_api/plotting/dot_plot.py         |  7 +++-
 onnx_array_api/plotting/graphviz_helper.py  | 40 ++++++++++++---------
 onnx_array_api/reference/evaluator_yield.py |  7 ++--
 onnx_array_api/validation/docs.py           |  4 ++-
 7 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/_doc/api/plotting.rst b/_doc/api/plotting.rst
index 830cc86..db6076c 100644
--- a/_doc/api/plotting.rst
+++ b/_doc/api/plotting.rst
@@ -6,6 +6,8 @@ Dot
 
 .. autofunction:: onnx_array_api.plotting.dot_plot.to_dot
 
+.. autofunction:: onnx_array_api.plotting.graphviz_helper.plot_dot
+
 Statistics
 ++++++++++
 
diff --git a/_doc/conf.py b/_doc/conf.py
index 30356d1..3c7a1ad 100644
--- a/_doc/conf.py
+++ b/_doc/conf.py
@@ -117,6 +117,7 @@
     "ast": "https://docs.python.org/3/library/ast.html",
     "cProfile.Profile": "https://docs.python.org/3/library/profile.html#profile.Profile",
     "DOT": "https://graphviz.org/doc/info/lang.html",
+    "Graphviz": "https://graphviz.org/",
     "inner API": "https://onnx.ai/onnx/intro/python.html",
     "JIT": "https://en.wikipedia.org/wiki/Just-in-time_compilation",
     "onnx": "https://onnx.ai/onnx/",
diff --git a/_doc/tutorial/onnx_api.rst b/_doc/tutorial/onnx_api.rst
index a4f80be..2b673fb 100644
--- a/_doc/tutorial/onnx_api.rst
+++ b/_doc/tutorial/onnx_api.rst
@@ -71,7 +71,11 @@ the true implementation would be the following.
             n2 = oh.make_node("Pow", ["dxy", "two"], ["dxy2"])
             n3 = oh.make_node("ReduceSum", ["dxy2"], [output_name])
             graph = oh.make_graph([n1, n2, n3], "euclidian", [X, Y], [Z], [two])
-            model = oh.make_model(graph, opset_imports=[oh.make_opsetid("", opset)])
+            model = oh.make_model(
+                graph,
+                opset_imports=[oh.make_opsetid("", opset)],
+                ir_version=9,
+            )
             return model
 
 
diff --git a/onnx_array_api/plotting/dot_plot.py b/onnx_array_api/plotting/dot_plot.py
index cff93f5..5bfba5d 100644
--- a/onnx_array_api/plotting/dot_plot.py
+++ b/onnx_array_api/plotting/dot_plot.py
@@ -116,7 +116,12 @@ def myloss(x, y):
     clean_label_reg2 = re.compile("\\\\p\\{[0-9P]{1,6}\\}")
 
     def dot_name(text):
-        return text.replace("/", "_").replace(":", "__").replace(".", "_")
+        return (
+            text.replace("/", "_")
+            .replace(":", "__")
+            .replace(".", "_")
+            .replace("-", "_")
+        )
 
     def dot_label(text):
         if text is None:
diff --git a/onnx_array_api/plotting/graphviz_helper.py b/onnx_array_api/plotting/graphviz_helper.py
index 2dd93c2..4aec5e4 100644
--- a/onnx_array_api/plotting/graphviz_helper.py
+++ b/onnx_array_api/plotting/graphviz_helper.py
@@ -73,7 +73,7 @@ def _run_subprocess(
         shell=False,
         env=os.environ,
         stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
+        stderr=subprocess.PIPE,
     )
     raise_exception = False
     output = ""
@@ -91,12 +91,14 @@ def _run_subprocess(
             ):
                 raise_exception = True
     p.poll()
+    error = p.stderr.readline().decode(errors="ignore")
     p.stdout.close()
-    if raise_exception:
+    if error and raise_exception:
         raise RuntimeError(
-            "An error was found in the output. The build is stopped.\n{output}"
+            f"An error was found in the output. The build is stopped."
+            f"\n{output}\n---\n{error}"
         )
-    return output
+    return output + "\n" + error
 
 
 def _run_graphviz(filename: str, image: str, engine: str = "dot") -> str:
@@ -134,8 +136,12 @@ def _run_graphviz(filename: str, image: str, engine: str = "dot") -> str:
         exe = engine
     if os.path.exists(image):
         os.remove(image)
-    output = _run_subprocess([exe, f"-T{ext[1:]}", filename, "-o", image])
-    assert os.path.exists(image), f"Graphviz failed due to {output}"
+    cmd = [exe, f"-T{ext[1:]}", filename, "-o", image]
+    output = _run_subprocess(cmd)
+    assert os.path.exists(image), (
+        f"Unable to find {image!r}, command line is "
+        f"{' '.join(cmd)!r}, Graphviz failed due to\n{output}"
+    )
     return output
 
 
@@ -190,23 +196,25 @@ def plot_dot(
     :param image: output image, None, just returns the output
     :param engine: *dot* or *neato*
     :param figsize: figsize of ax is None
-    :return: :epkg:`Graphviz` output or
-        the dot text if *image* is None
+    :return: :epkg:`Graphviz` output or, the dot text if *image* is None
 
     .. plot::
 
         import matplotlib.pyplot as plt
         import onnx.parser
+        from onnx_array_api.plotting.graphviz_helper import plot_dot
 
         model = onnx.parser.parse_model(
-                    '''
-                    <ir_version: 8, opset_import: [ "": 18]>
-                    agraph (float[N] x) => (float[N] z) {
-                        two = Constant <value_float=2.0> ()
-                        four = Add(two, two)
-                        z = Mul(four, four)
-                    }''')
-        ax = plot_dot(dot)
+            '''
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(four, four)
+            }
+        ''')
+
+        ax = plot_dot(model)
         ax.set_title("Dummy graph")
         plt.show()
     """
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index f9f587f..88c8a1f 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -237,10 +237,9 @@ def enumerate_summarized(
         Executes the onnx model and enumerate intermediate results without their names.
 
         :param output_names: requested outputs by names, None for all
-        :param feed_inputs: dictionary `{ input name: input value }`
-        :param raise_exc: raises an exception if the execution fails or stop
-            where it is
-        :param keep_tensor:keep the tensor in order to compute precise distances
+        :param feed_inputs: dictionary ``{ input name: input value }``
+        :param raise_exc: raises an exception if the execution fails or stop where it is
+        :param keep_tensor: keep the tensor in order to compute precise distances
         :return: iterator on ResultExecution
         """
         for kind, name, value, op_type in self.enumerate_results(
diff --git a/onnx_array_api/validation/docs.py b/onnx_array_api/validation/docs.py
index d1a8422..c5f937f 100644
--- a/onnx_array_api/validation/docs.py
+++ b/onnx_array_api/validation/docs.py
@@ -30,7 +30,9 @@ def make_euclidean(
     n2 = oh.make_node("Pow", ["dxy", "two"], ["dxy2"])
     n3 = oh.make_node("ReduceSum", ["dxy2"], [output_name])
     graph = oh.make_graph([n1, n2, n3], "euclidian", [X, Y], [Z], [two])
-    model = oh.make_model(graph, opset_imports=[oh.make_opsetid("", opset)])
+    model = oh.make_model(
+        graph, opset_imports=[oh.make_opsetid("", opset)], ir_version=9
+    )
     return model
 
 
From a54de21f2ea6d2b921d58f28142fcb58c7330122 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 26 Mar 2024 13:30:08 +0100
Subject: [PATCH 27/44] Better support for ir_version (#82)

* fixes for ir_version

* fix ut

* fix ut
---
 _unittests/ut_light_api/test_backend_export.py               | 4 +++-
 .../test_backend_extended_reference_evaluator.py             | 4 +++-
 onnx_array_api/graph_api/graph_builder.py                    | 5 +++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/_unittests/ut_light_api/test_backend_export.py b/_unittests/ut_light_api/test_backend_export.py
index 42ac7f5..47ee7d9 100644
--- a/_unittests/ut_light_api/test_backend_export.py
+++ b/_unittests/ut_light_api/test_backend_export.py
@@ -242,7 +242,7 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
 
 # The following tests are too slow with the reference implementation (Conv).
 backend_test.exclude(
-    "(FLOAT8|BFLOAT16|_opt_|_3d_|_momentum_|_4d_"
+    "(FLOAT8|BFLOAT16|INT4|_opt_|_3d_|_momentum_|_4d_|int4"
     "|test_adagrad"
     "|test_adam"
     "|test_ai_onnx_ml_"
@@ -270,6 +270,8 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
     "|test_squeezenet"
     "|test_vgg19"
     "|test_zfnet512"
+    "|test_range_float_type_positive_delta_expanded"
+    "|test_range_int32_type_negative_delta_expanded"
     ")"
 )
 
diff --git a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
index b35fb3c..06502b2 100644
--- a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
+++ b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
@@ -149,7 +149,7 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
     "|test_scan_sum)"
 )
 
-if onnx_opset_version() < 21:
+if onnx_opset_version() < 200:
     # The following tests are using types not supported by NumPy.
     # They could be if method to_array is extended to support custom
     # types the same as the reference implementation does
@@ -164,8 +164,10 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
         "|test_cast_no_saturate_"
         "|_to_FLOAT8"
         "|_FLOAT8"
+        "|INT4"
         "|test_quantizelinear_e4m3fn"
         "|test_quantizelinear_e5m2"
+        "|test_scatter_with"
         ")"
     )
 
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index c9c2059..800c578 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -156,6 +156,7 @@ def __init__(
         optimization_options: Optional[OptimizationOptions] = None,
         args: Optional[List[Any]] = None,
         verbose: int = 0,
+        ir_version: Optional[int] = None,
     ):
         self.optimization_options = optimization_options or OptimizationOptions()
         self.as_function = as_function
@@ -170,6 +171,7 @@ def __init__(
                 if isinstance(target_opset_or_existing_proto, int)
                 else target_opset_or_existing_proto
             )
+            self.ir_version = ir_version
             self.nodes = []
             self.initializers_dict = {}
             self.inputs = []
@@ -186,6 +188,7 @@ def __init__(
             ), "input_names must be empty if the input is an existing model."
             proto = target_opset_or_existing_proto
             self.opsets = {d.domain: d.version for d in proto.opset_import}
+            self.ir_version = ir_version or target_opset_or_existing_proto.ir_version
             self.nodes = list(proto.graph.node)
             self.initializers_dict = {i.name: i for i in proto.graph.initializer}
             self.initializers_dict.update(
@@ -674,6 +677,8 @@ def to_onnx(
         if self.verbose:
             print("[GraphBuilder] onh.make_model")
         model = oh.make_model(graph, opset_imports=opsets)
+        if self.ir_version:
+            model.ir_version = self.ir_version
         return model
 
     def _check_order_node(self, ind: int, node: NodeProto, existing: Set[str]):

From 53506d13e2f4ce0c4cdbcdd93549ddf849de37b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 2 Apr 2024 10:37:21 +0200
Subject: [PATCH 28/44] First draft to export to GraphBuilder (#83)

* export to builder

* doc

* fix unit test

* fix order

* fix initializer

* fix ut

* fix opset
---
 CHANGELOGS.rst                                |   7 +-
 _unittests/ut_translate_api/test_translate.py |   1 -
 .../test_translate_builder.py                 | 122 +++++++++++++++
 onnx_array_api/graph_api/graph_builder.py     |  12 ++
 onnx_array_api/translate_api/__init__.py      |  30 +++-
 onnx_array_api/translate_api/base_emitter.py  |  28 ++++
 .../translate_api/builder_emitter.py          | 144 ++++++++++++++++++
 onnx_array_api/translate_api/translate.py     |  15 +-
 8 files changed, 354 insertions(+), 5 deletions(-)
 create mode 100644 _unittests/ut_translate_api/test_translate_builder.py
 create mode 100644 onnx_array_api/translate_api/builder_emitter.py

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index f6feee7..ac4ac15 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -1,10 +1,15 @@
 Change Logs
 ===========
 
-0.2.0
+0.3.0
 +++++
 
+* :pr:`79`: first draft to export to GraphBuilder
 * :pr:`77`: supports ConcatOfShape and Slice with the light API
+
+0.2.0
++++++
+
 * :pr:`76`, :pr:`79`: add a mode to compare models without execution
 * :pr:`75`: add QuickGelu to ExtendedReferenceEvaluator
 * :pr:`71`: adds tools to compare two onnx graphs
diff --git a/_unittests/ut_translate_api/test_translate.py b/_unittests/ut_translate_api/test_translate.py
index d505135..0212d0b 100644
--- a/_unittests/ut_translate_api/test_translate.py
+++ b/_unittests/ut_translate_api/test_translate.py
@@ -221,5 +221,4 @@ def test_aionnxml(self):
 
 
 if __name__ == "__main__":
-    TestTranslate().test_export_if()
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_translate_api/test_translate_builder.py b/_unittests/ut_translate_api/test_translate_builder.py
new file mode 100644
index 0000000..7af0134
--- /dev/null
+++ b/_unittests/ut_translate_api/test_translate_builder.py
@@ -0,0 +1,122 @@
+import unittest
+from textwrap import dedent
+import numpy as np
+from onnx import ModelProto, TensorProto
+from onnx.checker import check_model
+from onnx.defs import onnx_opset_version
+from onnx.reference import ReferenceEvaluator
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.light_api import start
+from onnx_array_api.graph_api import GraphBuilder
+from onnx_array_api.translate_api import translate
+
+
+OPSET_API = min(19, onnx_opset_version() - 1)
+
+
+class TestTranslateBuilder(ExtTestCase):
+    def setUp(self):
+        self.maxDiff = None
+
+    def test_exp(self):
+        onx = start(opset=19).vin("X").Exp().rename("Y").vout().to_onnx()
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Exp", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+        code = translate(onx, api="builder")
+        expected = dedent(
+            """
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",
+        ):
+            Y = op.Exp(X)
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g = GraphBuilder({'': 19})
+        g.make_tensor_input("X", TensorProto.FLOAT, ())
+        light_api(g.op, "X")
+        g.make_tensor_output("Y", TensorProto.FLOAT, ())
+        model = g.to_onnx()
+        """
+        ).strip("\n")
+        self.assertEqual(expected, code.strip("\n"))
+
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",  # noqa: F722
+        ):
+            Y = op.Exp(X)
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g2 = GraphBuilder({"": 19})
+        g2.make_tensor_input("X", TensorProto.FLOAT, ("A",))
+        light_api(g2.op, "X")
+        g2.make_tensor_output("Y", TensorProto.FLOAT, ("A",))
+        onx2 = g2.to_onnx()
+
+        ref = ReferenceEvaluator(onx2)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+    def test_zdoc(self):
+        onx = (
+            start(opset=19)
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx, api="builder")
+        expected = dedent(
+            """
+            def light_api(
+                op: "GraphBuilder",
+                X: "FLOAT[]",
+            ):
+                r = np.array([-1, 1], dtype=np.int64)
+                r0_0 = op.Reshape(X, r)
+                Y = op.Transpose(r0_0, perm=[1, 0])
+                op.Identity(Y, outputs=["Y"])
+                return Y
+
+            g = GraphBuilder({'': 19})
+            g.make_tensor_input("X", TensorProto.FLOAT, ())
+            light_api(g.op, "X")
+            g.make_tensor_output("Y", TensorProto.FLOAT, ())
+            model = g.to_onnx()
+            """
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code.strip("\n"))
+
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",  # noqa: F722
+        ):
+            r = np.array([-1, 1], dtype=np.int64)
+            r0_0 = op.Reshape(X, r)
+            Y = op.Transpose(r0_0, perm=[1, 0])
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g = GraphBuilder({"": 21})
+        X = g.make_tensor_input("X", TensorProto.FLOAT, ())
+        light_api(g.op, X)
+        g.make_tensor_output("Y", TensorProto.FLOAT, ())
+        model = g.to_onnx()
+        self.assertNotEmpty(model)
+        check_model(model)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index 800c578..4f5c601 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -119,6 +119,18 @@ def __getattr__(self, name):
         except AttributeError as e:
             raise AttributeError(f"Unable to access attribute {name!r}.") from e
 
+    def Initializer(
+        self, init: Union[TensorProto, np.ndarray], name: Optional[str] = None
+    ) -> str:
+        """
+        Creates an initializer.
+
+        :param init: value
+        :param name: name if value is not a TensorProto
+        :return: its name
+        """
+        return self.builder.make_initializer(init, name=name, exists=True)
+
     def make_node(
         self,
         op_type: str,
diff --git a/onnx_array_api/translate_api/__init__.py b/onnx_array_api/translate_api/__init__.py
index 25daef6..12b4a77 100644
--- a/onnx_array_api/translate_api/__init__.py
+++ b/onnx_array_api/translate_api/__init__.py
@@ -1,6 +1,7 @@
 from onnx import ModelProto
 from .translate import Translater
 from .inner_emitter import InnerEmitter
+from .builder_emitter import BuilderEmitter
 
 
 def translate(proto: ModelProto, single_line: bool = False, api: str = "light") -> str:
@@ -14,7 +15,8 @@ def translate(proto: ModelProto, single_line: bool = False, api: str = "light")
         default is `"light"` and this is handle by class
         :class:`onnx_array_api.translate_api.light_emitter.LightEmitter`,
         another value is `"onnx"` which is the inner API implemented
-        in onnx package.
+        in onnx package, `"builder"` follows the syntax for the
+        class :class:`onnx_array_api.graph_api.GraphBuilder`
     :return: code
 
     .. runpython::
@@ -35,7 +37,7 @@ def translate(proto: ModelProto, single_line: bool = False, api: str = "light")
         code = translate(onx)
         print(code)
 
-    The inner API from onnx packahe is also available.
+    The inner API from onnx package is also available.
 
     .. runpython::
         :showcode:
@@ -54,6 +56,27 @@ def translate(proto: ModelProto, single_line: bool = False, api: str = "light")
         )
         code = translate(onx, api="onnx")
         print(code)
+
+    The :class:`GraphBuilder
+    <onnx_array_api.graph_api.GraphBuilder>` API returns this:
+
+    .. runpython::
+        :showcode:
+
+        from onnx_array_api.light_api import start
+        from onnx_array_api.translate_api import translate
+
+        onx = (
+            start()
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx, api="builder")
+        print(code)
     """
     if api == "light":
         tr = Translater(proto)
@@ -61,4 +84,7 @@ def translate(proto: ModelProto, single_line: bool = False, api: str = "light")
     if api == "onnx":
         tr = Translater(proto, emitter=InnerEmitter())
         return tr.export(as_str=True)
+    if api == "builder":
+        tr = Translater(proto, emitter=BuilderEmitter())
+        return tr.export(as_str=True)
     raise ValueError(f"Unexpected value {api!r} for api.")
diff --git a/onnx_array_api/translate_api/base_emitter.py b/onnx_array_api/translate_api/base_emitter.py
index 3a0dfb6..62fb318 100644
--- a/onnx_array_api/translate_api/base_emitter.py
+++ b/onnx_array_api/translate_api/base_emitter.py
@@ -21,6 +21,10 @@ class EventType(IntEnum):
     FUNCTION_OUTPUT = 12
     FUNCTION_ATTRIBUTES = 13
     TO_ONNX_FUNCTION = 14
+    BEGIN_SIGNATURE = 15
+    END_SIGNATURE = 16
+    BEGIN_RETURN = 17
+    END_RETURN = 18
 
     @classmethod
     def to_str(cls, self) -> str:
@@ -84,6 +88,18 @@ def __call__(self, event: EventType, **kwargs: Dict[str, Any]) -> List[str]:
         if event == EventType.FUNCTION_ATTRIBUTES:
             return self._emit_function_attributes(**kwargs)
 
+        if event == EventType.BEGIN_SIGNATURE:
+            return self._emit_begin_signature(**kwargs)
+
+        if event == EventType.END_SIGNATURE:
+            return self._emit_end_signature(**kwargs)
+
+        if event == EventType.BEGIN_RETURN:
+            return self._emit_begin_return(**kwargs)
+
+        if event == EventType.END_RETURN:
+            return self._emit_end_return(**kwargs)
+
         raise ValueError(f"Unexpected event {EventType.to_str(event)}.")
 
     def render_attribute_value(self, value: Any) -> Tuple[List[str], str]:
@@ -222,3 +238,15 @@ def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
         raise NotImplementedError(
             f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
         )
+
+    def _emit_begin_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_begin_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
diff --git a/onnx_array_api/translate_api/builder_emitter.py b/onnx_array_api/translate_api/builder_emitter.py
new file mode 100644
index 0000000..a3b38d6
--- /dev/null
+++ b/onnx_array_api/translate_api/builder_emitter.py
@@ -0,0 +1,144 @@
+from typing import Any, Dict, List
+from onnx import TensorProto
+from onnx.numpy_helper import to_array
+from .base_emitter import BaseEmitter
+
+_types = {
+    TensorProto.FLOAT: "FLOAT",
+    TensorProto.FLOAT16: "FLOAT16",
+    TensorProto.INT64: "INT64",
+    TensorProto.INT32: "INT32",
+}
+
+
+def _itype_to_string(itype: int) -> str:
+    return _types[itype]
+
+
+class BuilderEmitter(BaseEmitter):
+    """
+    Converts event into proper code.
+    """
+
+    def join(self, rows: List[str], single_line: bool = False) -> str:
+        "Join the rows"
+        assert (
+            not single_line
+        ), f"The emitter {type(self)} does not work with single_line=True."
+        return "\n".join(rows)
+
+    def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.opsets = kwargs.get("opsets", {})
+        return []
+
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
+        inps = ", ".join(["g.op", *[f'"{i}"' for i in self.inputs]])
+        inputs = []
+        for inp, stype, shape in self.inputs_full_:
+            inputs.append(f'g.make_tensor_input("{inp}", TensorProto.{stype}, {shape})')
+        outputs = []
+        for inp, stype, shape in self.outputs_full_:
+            outputs.append(
+                f'g.make_tensor_output("{inp}", TensorProto.{stype}, {shape})'
+            )
+        rows = [
+            "",
+            f"g = GraphBuilder({self.opsets})",
+            *inputs,
+            f"{self.name}({inps})",
+            *outputs,
+            "model = g.to_onnx()",
+        ]
+        return rows
+
+    def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.inputs = []
+        self.inputs_full = []
+        self.outputs = []
+        self.inits = []
+        self.inputs_full_ = []
+        self.outputs_full_ = []
+        self.name = kwargs.get("name", "make_graph")
+        return []
+
+    def _emit_end_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        init = kwargs["init"]
+        if isinstance(init, TensorProto):
+            assert (
+                kwargs["name"] == init.name
+            ), f"Name mismatch init.name={init.name!r}, name={kwargs['name']!r}"
+            self.inits.append(init)
+            return []
+        raise AssertionError(f"Unsupported type for an initializer {type(init)}")
+
+    def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        itype = kwargs.get("elem_type", 0)
+        shape = kwargs.get("shape", None)
+        if itype == 0:
+            inp = "X"
+        else:
+            if shape is None:
+                inp = f'X: "{_itype_to_string(itype)}"'
+            else:
+                inp = f'X: "{_itype_to_string(itype)}[{", ".join(map(str, shape))}]"'
+        self.inputs_full.append(inp)
+        self.inputs.append(name)
+        self.inputs_full_.append((name, _itype_to_string(itype), shape))
+        return []
+
+    def _emit_begin_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        rows = ["", f"def {self.name}(", '    op: "GraphBuilder",']
+        for i in self.inputs_full:
+            rows.append(f"    {i},")
+        rows.append("):")
+        for init in self.inits:
+            val = to_array(init)
+            stype = str(val.dtype).split(".")[-1]
+            rows.append(f"    {init.name} = np.array({val.tolist()}, dtype=np.{stype})")
+        return rows
+
+    def _emit_begin_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        outs = ", ".join(self.outputs)
+        return [f"    return {outs}"]
+
+    def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        itype = kwargs.get("elem_type", 0)
+        shape = kwargs.get("shape", None)
+        self.outputs.append(name)
+        self.outputs_full_.append((name, _itype_to_string(itype), shape))
+        return [f'    op.Identity({name}, outputs=["{name}"])']
+
+    def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
+        op_type = kwargs["op_type"]
+        inputs = kwargs["inputs"]
+        outputs = kwargs["outputs"]
+        if kwargs.get("domain", "") != "":
+            domain = kwargs["domain"]
+            op_type = f"{domain}.{op_type}"
+        atts = kwargs.get("atts", {})
+        args = []
+        for k, v in atts.items():
+            before, vatt = self.render_attribute_value(v)
+            if before:
+                raise NotImplementedError("Graph attribute not supported yet.")
+            args.append(f"{k}={vatt}")
+
+        outs = ", ".join(outputs)
+        inps = ", ".join(inputs)
+        if args:
+            sargs = ", ".join(args)
+            row = f"    {outs} = op.{op_type}({inps}, {sargs})"
+        else:
+            row = f"    {outs} = op.{op_type}({inps})"
+        return [row]
diff --git a/onnx_array_api/translate_api/translate.py b/onnx_array_api/translate_api/translate.py
index 31c1bce..7b7480b 100644
--- a/onnx_array_api/translate_api/translate.py
+++ b/onnx_array_api/translate_api/translate.py
@@ -75,8 +75,12 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                     domain=self.proto_.domain,
                 )
             )
+        elif isinstance(self.proto_, GraphProto):
+            rows.extend(self.emitter(EventType.BEGIN_GRAPH, name=self.proto_.name))
         else:
-            rows.extend(self.emitter(EventType.BEGIN_GRAPH))
+            rows.extend(
+                self.emitter(EventType.BEGIN_GRAPH, name=self.proto_.graph.name)
+            )
 
         for i in initializers:
             rows.extend(
@@ -88,6 +92,8 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                 )
             )
 
+        rows.extend(self.emitter(EventType.BEGIN_SIGNATURE))
+
         for i in inputs:
             if is_function:
                 rows.extend(self.emitter(EventType.FUNCTION_INPUT, name=i))
@@ -109,6 +115,8 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                 self.emitter(EventType.FUNCTION_ATTRIBUTES, attributes=list(attributes))
             )
 
+        rows.extend(self.emitter(EventType.END_SIGNATURE))
+
         for node in nodes:
             atts = self.extract_attributes(node)
             rows.extend(
@@ -122,6 +130,8 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                 )
             )
 
+        rows.extend(self.emitter(EventType.BEGIN_RETURN))
+
         for o in outputs:
             if is_function:
                 rows.extend(self.emitter(EventType.FUNCTION_OUTPUT, name=o))
@@ -137,6 +147,9 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                         ),
                     )
                 )
+
+        rows.extend(self.emitter(EventType.END_RETURN))
+
         if isinstance(self.proto_, (GraphProto, FunctionProto)):
             name = self.proto_.name
         else:

From bf983845ab1130dfd8a5540e044e8ef89a3783b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 8 Apr 2024 23:56:59 +0200
Subject: [PATCH 29/44] Better comments (#84)

* better comments

* status
---
 _doc/examples/plot_benchmark_rf.py          | 7 +++++--
 onnx_array_api/_command_lines_parser.py     | 6 ++++++
 onnx_array_api/reference/evaluator_yield.py | 5 +++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/_doc/examples/plot_benchmark_rf.py b/_doc/examples/plot_benchmark_rf.py
index 423669c..c844d74 100644
--- a/_doc/examples/plot_benchmark_rf.py
+++ b/_doc/examples/plot_benchmark_rf.py
@@ -22,8 +22,6 @@
 import numpy
 import pandas
 from lightgbm import LGBMRegressor
-from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm
-from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
 from onnxruntime import InferenceSession, SessionOptions
 from psutil import cpu_count
 from sphinx_runpython.runpython import run_cmd
@@ -33,9 +31,14 @@
 from sklearn.ensemble import RandomForestRegressor
 from tqdm import tqdm
 from xgboost import XGBRegressor
+from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
 
 
 def skl2onnx_convert_lightgbm(scope, operator, container):
+    from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
+        convert_lightgbm,
+    )
+
     options = scope.get_options(operator.raw_operator)
     if "split" in options:
         operator.split = options["split"]
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index 15ee153..d3b6feb 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -123,8 +123,14 @@ def _cmd_compare(argv: List[Any]):
 
     parser = get_parser_compare()
     args = parser.parse_args(argv[1:])
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] first model {args.model1!r}")
+        print(f"[compare] second model {args.model2!r}")
     onx1 = onnx.load(args.model1)
     onx2 = onnx.load(args.model2)
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] first model has {len(onx1.graph.node)} nodes")
+        print(f"[compare] second model has {len(onx2.graph.node)} nodes")
     res1, res2, align, dc = compare_onnx_execution(
         onx1,
         onx2,
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 88c8a1f..6d6ff48 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -642,7 +642,7 @@ def compare_onnx_execution(
             print("[compare_onnx_execution] loading first model")
         proto1 = load(model1) if isinstance(model1, str) else model1
         if verbose:
-            print("[compare_onnx_execution] loading first model")
+            print("[compare_onnx_execution] loading second model")
         proto2 = load(model2) if isinstance(model2, str) else model2
         res1 = list(_enumerate_result_no_execution(proto1))
         res2 = list(_enumerate_result_no_execution(proto2))
@@ -650,7 +650,8 @@ def compare_onnx_execution(
         return
 
     if verbose:
-        print(f"[compare_onnx_execution] got {len(res2)} results")
+        print(f"[compare_onnx_execution] got {len(res1)} results (first model)")
+        print(f"[compare_onnx_execution] got {len(res2)} results (second model)")
         print("[compare_onnx_execution] compute edit distance")
     dc = DistanceExecution()
     _, align = dc.distance_sequence(res1, res2)

From 381d82912296a7a239d3132f5f02670385f217d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 8 Apr 2024 23:58:27 +0200
Subject: [PATCH 30/44] example (#85)


From 01e0fac4ef83cc32102626a7a66fbd6ad55d2753 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Wed, 5 Jun 2024 10:41:02 +0200
Subject: [PATCH 31/44] Add command line to replace constants in a model (#87)

* example

* Add command line to replace constant

* doc

* ut

* doc
---
 CHANGELOGS.rst                                |   1 +
 _doc/api/tools.rst                            |   5 +
 _unittests/ut_tools/test_replace_constants.py | 160 ++++++++++++
 _unittests/ut_xrun_doc/test_command_lines1.py |   8 +
 onnx_array_api/_command_lines_parser.py       |  80 +++++-
 onnx_array_api/array_api/_onnx_common.py      |   5 +-
 onnx_array_api/npx/npx_functions.py           |   3 +-
 onnx_array_api/tools/__init__.py              |   1 +
 onnx_array_api/tools/replace_constants.py     | 227 ++++++++++++++++++
 9 files changed, 482 insertions(+), 8 deletions(-)
 create mode 100644 _unittests/ut_tools/test_replace_constants.py
 create mode 100644 onnx_array_api/tools/__init__.py
 create mode 100644 onnx_array_api/tools/replace_constants.py

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index ac4ac15..e435a75 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.3.0
 +++++
 
+* :pr:`87`: add command line to replace contant by ConstantOfShape
 * :pr:`79`: first draft to export to GraphBuilder
 * :pr:`77`: supports ConcatOfShape and Slice with the light API
 
diff --git a/_doc/api/tools.rst b/_doc/api/tools.rst
index ef161e0..e0450dc 100644
--- a/_doc/api/tools.rst
+++ b/_doc/api/tools.rst
@@ -6,6 +6,11 @@ Benchmark
 
 .. autofunction:: onnx_array_api.ext_test_case.measure_time
 
+Manipulations
++++++++++++++
+
+.. autofunction:: onnx_array_api.tools.replace_constants.replace_initializer_by_constant_of_shape
+
 Examples
 ++++++++
 
diff --git a/_unittests/ut_tools/test_replace_constants.py b/_unittests/ut_tools/test_replace_constants.py
new file mode 100644
index 0000000..5cad1c2
--- /dev/null
+++ b/_unittests/ut_tools/test_replace_constants.py
@@ -0,0 +1,160 @@
+import unittest
+import numpy as np
+import onnx
+import onnx.helper as oh
+import onnx.numpy_helper as onh
+from onnx import TensorProto
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.reference import (
+    ExtendedReferenceEvaluator as ReferenceEvaluator,
+)
+from onnx_array_api.tools.replace_constants import (
+    replace_initializer_by_constant_of_shape,
+)
+
+
+class TestReplaceConstants(ExtTestCase):
+
+    def test_replace_initializer(self):
+        dtype = np.float32
+        value = np.random.randn(2, 100).astype(dtype)
+        A = onh.from_array(value, name="A")
+        value = np.array([1], dtype=dtype)
+        C = onh.from_array(value, name="C")
+
+        X = oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", TensorProto.FLOAT, [None])
+        node1 = oh.make_node("MatMul", ["X", "A"], ["AX"])
+        node2 = oh.make_node("Sub", ["AX", "C"], ["Y"])
+        graph = oh.make_graph([node1, node2], "lr", [X], [Y], [A, C])
+        model_def = oh.make_model(graph)
+
+        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
+        oinf1 = ReferenceEvaluator(model_def)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(model_def)
+        node_types = {n.op_type for n in repl.graph.node}
+        self.assertIn("ConstantOfShape", node_types)
+        oinf2 = ReferenceEvaluator(repl)
+        y1[:, :] = 3.5
+        y1[0, :] = 0.5
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        self.assertEqualArray(y1, y2)
+
+    def test_replace_constant(self):
+        dtype = np.float32
+        value = np.random.randn(2, 10).astype(dtype)
+        A = onh.from_array(value, name="A")
+        value = np.array([1], dtype=dtype)
+        C = onh.from_array(value, name="C")
+
+        X = oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", TensorProto.FLOAT, [None])
+        node0 = oh.make_node("Constant", [], ["A"], value=A)
+        node1 = oh.make_node("MatMul", ["X", "A"], ["AX"])
+        node2 = oh.make_node("Sub", ["AX", "C"], ["Y"])
+        graph = oh.make_graph([node0, node1, node2], "lr", [X], [Y], [C])
+        model_def = oh.make_model(graph)
+
+        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
+        oinf1 = ReferenceEvaluator(model_def)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(model_def, threshold=0)
+        node_types = {n.op_type for n in repl.graph.node}
+        self.assertIn("ConstantOfShape", node_types)
+        oinf2 = ReferenceEvaluator(repl)
+        y1[:, :] = 4
+        y1[0, :] = 1
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        self.assertEqualArray(y1, y2)
+
+    def test_replace_constant_function(self):
+        dtype = np.float32
+        value = np.random.randn(2, 100).astype(dtype)
+        A = onh.from_array(value, name="A")
+        value = np.array([1], dtype=dtype)
+        C = onh.from_array(value, name="C")
+
+        X = oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", TensorProto.FLOAT, [None])
+        nodeC = oh.make_node("Constant", [], ["C"], value=C)
+        node0 = oh.make_node("Constant", [], ["A"], value=A)
+        node1 = oh.make_node("MatMul", ["X", "A"], ["AX"])
+        node2 = oh.make_node("Sub", ["AX", "C"], ["Y"])
+        opset_imports = [
+            oh.make_opsetid("", onnx.defs.onnx_opset_version()),
+            oh.make_opsetid("custom", 1),
+        ]
+        fct = oh.make_function(
+            "custom",
+            "unittest",
+            ["X"],
+            ["Y"],
+            [nodeC, node0, node1, node2],
+            opset_imports,
+        )
+
+        node = oh.make_node("unittest", ["X"], ["Y"], domain="custom")
+        graph = oh.make_graph([node], "lr", [X], [Y], [C])
+        model_def = oh.make_model(graph, functions=[fct], opset_imports=opset_imports)
+
+        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
+        oinf1 = ReferenceEvaluator(model_def)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(model_def)
+        node_types = {n.op_type for n in repl.functions[0].node}
+        self.assertIn("ConstantOfShape", node_types)
+        oinf2 = ReferenceEvaluator(repl)
+        y1[:, :] = 3.5
+        y1[0, :] = 0.5
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        self.assertEqualArray(y1, y2)
+
+    def test_replace_constant_graph(self):
+        value = np.array([0], dtype=np.float32)
+        zero = onh.from_array(value, name="zero")
+
+        X = oh.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [None])
+
+        rsum = oh.make_node("ReduceSum", ["X"], ["rsum"])
+        cond = oh.make_node("Greater", ["rsum", "zero"], ["cond"])
+
+        then_out = oh.make_tensor_value_info("then_out", onnx.TensorProto.FLOAT, None)
+        then_cst = onh.from_array(np.array([1] * 129).astype(np.float32))
+
+        then_const_node = oh.make_node(
+            "Constant", inputs=[], outputs=["then_out"], value=then_cst, name="cst1"
+        )
+        then_body = oh.make_graph([then_const_node], "then_body", [], [then_out])
+
+        else_out = oh.make_tensor_value_info("else_out", onnx.TensorProto.FLOAT, None)
+        else_cst = onh.from_array(np.array([-1] * 129).astype(np.float32))
+        else_const_node = oh.make_node(
+            "Constant", inputs=[], outputs=["else_out"], value=else_cst, name="cst2"
+        )
+        else_body = oh.make_graph([else_const_node], "else_body", [], [else_out])
+
+        if_node = oh.make_node(
+            "If", ["cond"], ["Y"], then_branch=then_body, else_branch=else_body
+        )
+        graph = oh.make_graph([rsum, cond, if_node], "if", [X], [Y], [zero])
+        onnx_model = oh.make_model(
+            graph, opset_imports=[oh.make_opsetid("", onnx.defs.onnx_opset_version())]
+        )
+        self.assertNotIn("ConstantOfShape", str(onnx_model))
+
+        x = np.ones((3, 2), dtype=np.float32)
+        oinf1 = ReferenceEvaluator(onnx_model)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(onnx_model)
+        self.assertIn("ConstantOfShape", str(repl))
+        oinf2 = ReferenceEvaluator(repl)
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        y1 = y1.copy()
+        y1[:] = 0.5
+        self.assertEqualArray(y1, y2)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_command_lines1.py b/_unittests/ut_xrun_doc/test_command_lines1.py
index 02f84bd..0503f55 100644
--- a/_unittests/ut_xrun_doc/test_command_lines1.py
+++ b/_unittests/ut_xrun_doc/test_command_lines1.py
@@ -16,6 +16,7 @@
     get_main_parser,
     get_parser_compare,
     get_parser_translate,
+    get_parser_replace,
     main,
 )
 
@@ -35,6 +36,13 @@ def test_parser_translate(self):
         text = st.getvalue()
         self.assertIn("model", text)
 
+    def test_parser_replace(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_replace().print_help()
+        text = st.getvalue()
+        self.assertIn("model", text)
+
     def test_command_translate(self):
         X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
         Y = make_tensor_value_info("Y", TensorProto.FLOAT, [5, 6])
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index d3b6feb..c0a7678 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -14,13 +14,14 @@ def get_main_parser() -> ArgumentParser:
     )
     parser.add_argument(
         "cmd",
-        choices=["translate", "compare"],
+        choices=["translate", "compare", "replace"],
         help=dedent(
             """
         Selects a command.
         
         'translate' exports an onnx graph into a piece of code replicating it,
-        'compare' compares the execution of two onnx models
+        'compare' compares the execution of two onnx models,
+        'replace' replaces constant and initliazers by ConstantOfShape to make the model lighter
         """
         ),
     )
@@ -142,8 +143,75 @@ def _cmd_compare(argv: List[Any]):
     print(text)
 
 
+def get_parser_replace() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="translate",
+        description=dedent(
+            """
+        Replaces constants and initializes by ConstOfShape or any other nodes
+        to make the model smaller.
+        """
+        ),
+        epilog="This is mostly used to write unit tests without adding "
+        "a big file to the repository.",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        required=True,
+        help="onnx model to translate",
+    )
+    parser.add_argument(
+        "-o",
+        "--out",
+        type=str,
+        required=True,
+        help="output file",
+    )
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        default=128,
+        help="Threshold above which every constant is replaced",
+    )
+    parser.add_argument(
+        "--type",
+        default="ConstontOfShape",
+        help="Inserts this operator type",
+    )
+    parser.add_argument(
+        "--domain",
+        default="",
+        help="Inserts this domain",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=0,
+        help="verbosity",
+    )
+    return parser
+
+
+def _cmd_replace(argv: List[Any]):
+    from .tools.replace_constants import replace_initializer_by_constant_of_shape
+
+    parser = get_parser_replace()
+    args = parser.parse_args(argv[1:])
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] load model {args.model!r}")
+    onx = onnx.load(args.model)
+    new_onx = replace_initializer_by_constant_of_shape(
+        onx, threshold=args.threshold, op_type=args.type, domain=args.domain
+    )
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] save model {args.out!r}")
+    onnx.save(new_onx, args.out)
+
+
 def main(argv: Optional[List[Any]] = None):
-    fcts = dict(translate=_cmd_translate, compare=_cmd_compare)
+    fcts = dict(translate=_cmd_translate, compare=_cmd_compare, replace=_cmd_replace)
 
     if argv is None:
         argv = sys.argv[1:]
@@ -152,7 +220,11 @@ def main(argv: Optional[List[Any]] = None):
             parser = get_main_parser()
             parser.parse_args(argv)
         else:
-            parsers = dict(translate=get_parser_translate, compare=get_parser_compare)
+            parsers = dict(
+                translate=get_parser_translate,
+                compare=get_parser_compare,
+                replace=get_parser_replace,
+            )
             cmd = argv[0]
             if cmd not in parsers:
                 raise ValueError(
diff --git a/onnx_array_api/array_api/_onnx_common.py b/onnx_array_api/array_api/_onnx_common.py
index 6e8ee6d..8456378 100644
--- a/onnx_array_api/array_api/_onnx_common.py
+++ b/onnx_array_api/array_api/_onnx_common.py
@@ -46,14 +46,13 @@ def asarray(
     dtype: Optional[DType] = None,
     order: Optional[str] = None,
     like: Any = None,
+    device: Optional[str] = None,
     copy: bool = False,
 ) -> EagerTensor:
     """
     Converts anything into an array.
     """
-    """
-    Converts anything into an array.
-    """
+    assert device is None, f"asarray not implemented yet for device={device!r}"
     if order not in ("C", None):
         raise NotImplementedError(f"asarray is not implemented for order={order!r}.")
     if like is not None:
diff --git a/onnx_array_api/npx/npx_functions.py b/onnx_array_api/npx/npx_functions.py
index 2f547d6..7c6cd66 100644
--- a/onnx_array_api/npx/npx_functions.py
+++ b/onnx_array_api/npx/npx_functions.py
@@ -281,7 +281,8 @@ def astype(
             to = DType(TensorProto.STRING)
         else:
             raise TypeError(f"dtype must of type DType, not {type(dtype)}-{dtype}.")
-    return var(a, op="Cast", to=to.code)
+        return var(a, op="Cast", to=to.code)
+    return var(a, op="Cast", to=dtype.code)
 
 
 @npxapi_inline
diff --git a/onnx_array_api/tools/__init__.py b/onnx_array_api/tools/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/onnx_array_api/tools/__init__.py
@@ -0,0 +1 @@
+
diff --git a/onnx_array_api/tools/replace_constants.py b/onnx_array_api/tools/replace_constants.py
new file mode 100644
index 0000000..daa4ca8
--- /dev/null
+++ b/onnx_array_api/tools/replace_constants.py
@@ -0,0 +1,227 @@
+import numpy as np
+from onnx import FunctionProto, ModelProto, GraphProto, AttributeProto
+from onnx.helper import (
+    make_model,
+    set_model_props,
+    make_graph,
+    make_node,
+    make_attribute,
+    make_function,
+    tensor_dtype_to_np_dtype,
+)
+from onnx.numpy_helper import from_array
+
+
+def replace_initializer_by_constant_of_shape(
+    onx, threshold=128, op_type="ConstantOfShape", domain=""
+):
+    """
+    Replaces initializers by nodes *ConstantOfShape* to reduce
+    the size and still write a unit test.
+
+    :param onx: ModelProto
+    :param threshold: every initializer under this threshold is not impacted
+    :param op_type: replace by this node
+    :param domain: replace by this domain
+    :return: onx, modified ModelProto
+    """
+    if isinstance(onx, FunctionProto):
+        modified = False
+        new_nodes = []
+        for node in onx.node:
+            if node.op_type == "Constant":
+                from onnx_array_api.reference import ExtendedReferenceEvaluator
+
+                ref = ExtendedReferenceEvaluator(node)
+                cst = ref.run(None, {})[0]
+
+                size = np.prod(cst.shape)
+                if size <= threshold:
+                    new_nodes.append(node)
+                    continue
+
+                new_name = f"{node.output[0]}__SHAPE"
+                new_nodes.append(
+                    make_node(
+                        "Constant",
+                        [],
+                        [new_name],
+                        value=from_array(
+                            np.array(cst.shape, dtype=np.int64), name=new_name
+                        ),
+                    )
+                )
+                dtype = cst.dtype
+                assert op_type != "Constant"
+                new_nodes.append(
+                    make_node(
+                        op_type,
+                        [new_name],
+                        node.output,
+                        value=from_array(np.array([0.5], dtype=dtype)),
+                        domain=domain,
+                    )
+                )
+                modified = True
+                continue
+
+            new_nodes.append(node)
+
+        if not modified:
+            return onx
+
+        onxf = make_function(
+            domain=onx.domain,
+            fname=onx.name,
+            inputs=onx.input,
+            outputs=onx.output,
+            nodes=new_nodes,
+            doc_string=onx.doc_string,
+            overload=onx.overload,
+            opset_imports=[],
+        )
+        if onx.opset_import:
+            onxf.opset_import.extend(onx.opset_import)
+        if onx.value_info:
+            onxf.value_info.extend(onx.value_info)
+        if onx.attribute:
+            onxf.attribute.extend(onx.attribute)
+        if onx.attribute_proto:
+            onxf.attribute_proto.extend(onx.attribute_proto)
+        return onxf
+
+    if isinstance(onx, ModelProto):
+        new_graph = replace_initializer_by_constant_of_shape(
+            onx.graph, threshold=threshold, op_type=op_type, domain=domain
+        )
+        new_functions = [
+            replace_initializer_by_constant_of_shape(
+                f, threshold=threshold, op_type=op_type, domain=domain
+            )
+            for f in onx.functions
+        ]
+        model = make_model(
+            new_graph,
+            functions=new_functions,
+            producer_name=onx.producer_name,
+            producer_version=onx.producer_version,
+            ir_version=onx.ir_version,
+            doc_string=onx.doc_string,
+            domain=onx.domain,
+            model_version=onx.model_version,
+        )
+        if len(onx.metadata_props) > 0:  # pragma: no cover
+            values = {p.key: p.value for p in onx.metadata_props}
+            set_model_props(model, values)
+
+        del model.opset_import[:]  # pylint: disable=E1101
+        for oimp in onx.opset_import:
+            op_set = model.opset_import.add()  # pylint: disable=E1101
+            if oimp.domain == "" and oimp.version < 9:
+                raise RuntimeError(
+                    f"ConstantOfShape was introduced in "
+                    f"opset 9 but opset is {oimp.version}."
+                )
+            op_set.domain = oimp.domain
+            op_set.version = oimp.version
+        return model
+
+    if not isinstance(onx, GraphProto):
+        raise TypeError(f"onx should be a GraphProto as this stage not {type(onx)}.")
+
+    new_nodes = []
+    removed = set()
+    additional_inputs = []
+
+    new_inits = []
+    for init in onx.initializer:
+        dims = tuple(init.dims)
+        size = np.prod(dims)
+        if size <= threshold:
+            new_inits.append(init)
+            continue
+        new_name = f"{init.name}__SHAPE"
+        new_inits.append(
+            from_array(np.array(list(dims), dtype=np.int64), name=new_name)
+        )
+        dtype = tensor_dtype_to_np_dtype(init.data_type)
+        node = make_node(
+            op_type,
+            [new_name],
+            [init.name],
+            value=from_array(np.array([0.5], dtype=dtype)),
+            domain=domain,
+        )
+        new_nodes.append(node)
+        removed.add(init.name)
+
+    new_sparse_inits = []
+    for init in onx.sparse_initializer:
+        dims = tuple(init.dims)
+        size = np.prod(dims)
+        if size <= threshold:
+            new_sparse_inits.append(init)
+            continue
+        raise NotImplementedError(
+            f"This feature is not yet implemented for sparse initializer"
+            f"(name={init.name!r})."
+        )
+
+    for node in onx.node:
+        if node.op_type == "Constant":
+            from onnx_array_api.reference import ExtendedReferenceEvaluator
+
+            ref = ExtendedReferenceEvaluator(node)
+            cst = ref.run(None, {})[0]
+
+            size = np.prod(cst.shape)
+            if size <= threshold:
+                new_nodes.append(node)
+                continue
+
+            new_name = f"{node.output[0]}__SHAPE"
+            new_inits.append(
+                from_array(np.array(cst.shape, dtype=np.int64), name=new_name)
+            )
+            dtype = cst.dtype
+            new_nodes.append(
+                make_node(
+                    op_type,
+                    [new_name],
+                    node.output,
+                    value=from_array(np.array([0.5], dtype=dtype)),
+                    domain=domain,
+                )
+            )
+            continue
+
+        modified = False
+        atts = []
+        for att in node.attribute:
+            if (
+                att.type == AttributeProto.GRAPH
+                and hasattr(att, "g")
+                and att.g is not None
+            ):
+                modified = True
+                g = replace_initializer_by_constant_of_shape(
+                    att.g, threshold=threshold, op_type=op_type, domain=domain
+                )
+                att = make_attribute(att.name, g)
+            atts.append(att)
+        if modified:
+            new_node = make_node(node.op_type, node.input, node.output)
+            new_node.attribute.extend(atts)
+            new_nodes.append(new_node)
+        else:
+            new_nodes.append(node)
+
+    graph = make_graph(
+        new_nodes,
+        onx.name,
+        [i for i in onx.input if i.name not in removed] + additional_inputs,
+        onx.output,
+        initializer=new_inits,
+        sparse_initializer=new_sparse_inits,
+    )
+    return graph

From 6076c1cfc85ab4f58607e53226f2daa7ad95f0b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 15 Jul 2024 10:27:42 +0200
Subject: [PATCH 32/44] Use ruff check (#88)

---
 azure-pipelines.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 61587f4..12d1e36 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -24,7 +24,7 @@ jobs:
   - script: pip install -r requirements-dev.txt
     displayName: 'Install Requirements dev'
   - script: |
-      ruff .
+      ruff check .
     displayName: 'Ruff'
   - script: |
       black --diff .
@@ -78,7 +78,7 @@ jobs:
   - script: pip install onnxmltools --no-deps
     displayName: 'Install onnxmltools'
   - script: |
-      ruff .
+      ruff check .
     displayName: 'Ruff'
   - script: |
       black --diff .
@@ -172,7 +172,7 @@ jobs:
   - script: pip install onnxmltools --no-deps
     displayName: 'Install onnxmltools'
   - script: |
-      ruff .
+      ruff check .
     displayName: 'Ruff'
   - script: |
       black --diff .

From eee76cc1c0baab60da34f52ce0e089e31bbb7358 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Sat, 31 Aug 2024 17:43:18 +0200
Subject: [PATCH 33/44] Lint (#89)

* example

* lint

* exc

* array
"

* fix

* fix missing dependency

* yml

* disable some tests
---
 _doc/examples/plot_benchmark_rf.py            | 12 +++----
 _doc/examples/plot_onnxruntime.py             |  4 +--
 .../ut_array_api/test_hypothesis_array_api.py | 16 ++++++++--
 .../ut_light_api/test_backend_export.py       | 26 ++++++++++++---
 _unittests/ut_light_api/test_light_api.py     |  2 +-
 _unittests/ut_plotting/test_dot_plot.py       |  1 -
 _unittests/ut_plotting/test_text_plot.py      |  2 +-
 ...st_backend_extended_reference_evaluator.py | 19 +++++++++++
 _unittests/ut_translate_api/test_translate.py | 10 ++++--
 .../test_translate_classic.py                 |  2 +-
 _unittests/ut_validation/test_f8.py           | 24 +++++++-------
 .../test_documentation_examples.py            |  2 +-
 azure-pipelines.yml                           | 18 +++++++++++
 onnx_array_api/__init__.py                    |  1 -
 onnx_array_api/_command_lines_parser.py       |  8 +++--
 onnx_array_api/_helpers.py                    |  4 +--
 onnx_array_api/array_api/__init__.py          | 21 ++++++------
 onnx_array_api/array_api/_onnx_common.py      | 28 +++++++++-------
 onnx_array_api/ext_test_case.py               |  9 +++---
 onnx_array_api/graph_api/graph_builder.py     | 28 +++++++++-------
 onnx_array_api/light_api/model.py             |  3 +-
 onnx_array_api/light_api/var.py               |  5 +--
 onnx_array_api/npx/npx_array_api.py           |  2 --
 onnx_array_api/npx/npx_core_api.py            |  4 +--
 onnx_array_api/npx/npx_functions.py           |  2 +-
 onnx_array_api/npx/npx_graph_builder.py       | 13 +++-----
 onnx_array_api/npx/npx_helper.py              |  3 +-
 onnx_array_api/npx/npx_jit_eager.py           | 22 ++++++-------
 onnx_array_api/npx/npx_numpy_tensors.py       |  8 ++---
 onnx_array_api/npx/npx_tensors.py             |  4 +--
 onnx_array_api/npx/npx_types.py               | 26 +++++++--------
 onnx_array_api/npx/npx_var.py                 | 18 +++++------
 onnx_array_api/ort/ort_profile.py             | 16 ++++++----
 onnx_array_api/ort/ort_tensors.py             |  4 +--
 onnx_array_api/plotting/_helper.py            |  6 ++--
 onnx_array_api/plotting/dot_plot.py           |  2 +-
 onnx_array_api/plotting/text_plot.py          | 14 ++++----
 onnx_array_api/profiling.py                   |  7 ++--
 onnx_array_api/reference/evaluator_yield.py   | 20 +++++++-----
 onnx_array_api/translate_api/inner_emitter.py | 16 +++++++---
 onnx_array_api/translate_api/light_emitter.py |  6 ++--
 onnx_array_api/validation/f8.py               | 12 +++----
 pyproject.toml                                | 32 +++++++++++++++++--
 requirements.txt                              |  1 +
 setup.py                                      |  1 -
 45 files changed, 295 insertions(+), 189 deletions(-)

diff --git a/_doc/examples/plot_benchmark_rf.py b/_doc/examples/plot_benchmark_rf.py
index c844d74..c1ce486 100644
--- a/_doc/examples/plot_benchmark_rf.py
+++ b/_doc/examples/plot_benchmark_rf.py
@@ -40,10 +40,7 @@ def skl2onnx_convert_lightgbm(scope, operator, container):
     )
 
     options = scope.get_options(operator.raw_operator)
-    if "split" in options:
-        operator.split = options["split"]
-    else:
-        operator.split = None
+    operator.split = options.get("split", None)
     convert_lightgbm(scope, operator, container)
 
 
@@ -103,7 +100,7 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
     :return: number of runs, sum of the time, average, median
     """
     times = []
-    for n in range(repeat):
+    for _n in range(repeat):
         perf = time.perf_counter()
         fct(X)
         delta = time.perf_counter() - perf
@@ -241,7 +238,10 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
     # onnxruntime
     bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictO")
     r, t, mean, med = measure_inference(
-        lambda x: sess.run(None, {"X": x}), X, repeat=repeat, max_time=max_time
+        lambda x, sess=sess: sess.run(None, {"X": x}),
+        X,
+        repeat=repeat,
+        max_time=max_time,
     )
     o2 = obs.copy()
     o2.update(dict(avg=mean, med=med, n_runs=r, ttime=t, name="ort_"))
diff --git a/_doc/examples/plot_onnxruntime.py b/_doc/examples/plot_onnxruntime.py
index fcace3e..0aba6ac 100644
--- a/_doc/examples/plot_onnxruntime.py
+++ b/_doc/examples/plot_onnxruntime.py
@@ -87,14 +87,14 @@ def loop(n=1000):
     x = np.random.randn(n, 2).astype(np.float32)
     y = np.random.randn(n, 2).astype(np.float32)
 
-    obs = measure_time(lambda: myloss(x, y))
+    obs = measure_time(lambda x=x, y=y: myloss(x, y))
     obs["name"] = "numpy"
     obs["n"] = n
     data.append(obs)
 
     xort = OrtTensor.from_array(x)
     yort = OrtTensor.from_array(y)
-    obs = measure_time(lambda: ort_myloss(xort, yort))
+    obs = measure_time(lambda xort=xort, yort=yort: ort_myloss(xort, yort))
     obs["name"] = "ort"
     obs["n"] = n
     data.append(obs)
diff --git a/_unittests/ut_array_api/test_hypothesis_array_api.py b/_unittests/ut_array_api/test_hypothesis_array_api.py
index 95b1447..602f928 100644
--- a/_unittests/ut_array_api/test_hypothesis_array_api.py
+++ b/_unittests/ut_array_api/test_hypothesis_array_api.py
@@ -2,6 +2,7 @@
 import warnings
 from os import getenv
 from functools import reduce
+import packaging.version as pv
 import numpy as np
 from operator import mul
 from hypothesis import given
@@ -44,9 +45,12 @@ class TestHypothesisArraysApis(ExtTestCase):
 
     @classmethod
     def setUpClass(cls):
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            from numpy import array_api as xp
+        try:
+            import array_api_strict as xp
+        except ImportError:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                from numpy import array_api as xp
 
         api_version = getenv(
             "ARRAY_API_TESTS_VERSION",
@@ -63,6 +67,9 @@ def test_strategies(self):
         self.assertNotEmpty(self.xps)
         self.assertNotEmpty(self.onxps)
 
+    @unittest.skipIf(
+        pv.Version(np.__version__) >= pv.Version("2.0"), reason="abandonned"
+    )
     def test_scalar_strategies(self):
         dtypes = dict(
             integer_dtypes=self.xps.integer_dtypes(),
@@ -139,6 +146,9 @@ def fctonx(x, kw):
         fctonx()
         self.assertEqual(len(args_onxp), len(args_np))
 
+    @unittest.skipIf(
+        pv.Version(np.__version__) >= pv.Version("2.0"), reason="abandonned"
+    )
     def test_square_sizes_strategies(self):
         dtypes = dict(
             integer_dtypes=self.xps.integer_dtypes(),
diff --git a/_unittests/ut_light_api/test_backend_export.py b/_unittests/ut_light_api/test_backend_export.py
index 47ee7d9..91f4dd4 100644
--- a/_unittests/ut_light_api/test_backend_export.py
+++ b/_unittests/ut_light_api/test_backend_export.py
@@ -5,6 +5,7 @@
 import packaging.version as pv
 import numpy
 from numpy.testing import assert_allclose
+from onnx.defs import onnx_opset_version
 import onnx.backend.base
 import onnx.backend.test
 import onnx.shape_inference
@@ -31,7 +32,6 @@
 
 class ReferenceImplementationError(RuntimeError):
     "Fails, export cannot be compared."
-    pass
 
 
 class ExportWrapper:
@@ -64,7 +64,8 @@ def run(
             expected = self.expected_sess.run(names, feeds)
         except (RuntimeError, AssertionError, TypeError, KeyError) as e:
             raise ReferenceImplementationError(
-                f"ReferenceImplementation fails with {onnx_simple_text_plot(self.model)}"
+                f"ReferenceImplementation fails with "
+                f"{onnx_simple_text_plot(self.model)}"
                 f"\n--RAW--\n{self.model}"
             ) from e
 
@@ -85,7 +86,7 @@ def run(
                 new_code = "\n".join(
                     [f"{i+1:04} {line}" for i, line in enumerate(code.split("\n"))]
                 )
-                raise AssertionError(f"ERROR {e}\n{new_code}")
+                raise AssertionError(f"ERROR {e}\n{new_code}")  # noqa: B904
 
             locs = {
                 "np": numpy,
@@ -154,7 +155,8 @@ def run(
                 ):
                     if a.tolist() != b.tolist():
                         raise AssertionError(
-                            f"Text discrepancies for api {api!r} with a.dtype={a.dtype} "
+                            f"Text discrepancies for api {api!r} "
+                            f"with a.dtype={a.dtype} "
                             f"and b.dtype={b.dtype}"
                             f"\n--BASE--\n{onnx_simple_text_plot(self.model)}"
                             f"\n--EXP[{api}]--\n{onnx_simple_text_plot(export_model)}"
@@ -275,6 +277,22 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
     ")"
 )
 
+if onnx_opset_version() < 22:
+    backend_test.exclude(
+        "("
+        "test_dft_inverse_cpu"
+        "|test_dft_inverse_opset19_cpu"
+        "|test_lppool_1d_default_cpu"
+        "|test_lppool_2d_default_cpu"
+        "|test_lppool_2d_dilations_cpu"
+        "|test_lppool_2d_pads_cpu"
+        "|test_lppool_2d_same_lower_cpu"
+        "|test_lppool_2d_same_upper_cpu"
+        "|test_lppool_2d_strides_cpu"
+        "|test_lppool_3d_default_cpu"
+        ")"
+    )
+
 if pv.Version(onnx_version) < pv.Version("1.16.0"):
     backend_test.exclude("(test_strnorm|test_range_)")
 
diff --git a/_unittests/ut_light_api/test_light_api.py b/_unittests/ut_light_api/test_light_api.py
index e14896a..f936cc1 100644
--- a/_unittests/ut_light_api/test_light_api.py
+++ b/_unittests/ut_light_api/test_light_api.py
@@ -484,7 +484,7 @@ def g(self):
         def ah(self):
             return True
 
-        setattr(A, "h", ah)
+        setattr(A, "h", ah)  # noqa: B010
 
         self.assertTrue(A().h())
         self.assertIn("(self)", str(inspect.signature(A.h)))
diff --git a/_unittests/ut_plotting/test_dot_plot.py b/_unittests/ut_plotting/test_dot_plot.py
index 5c03746..4c8c4dd 100644
--- a/_unittests/ut_plotting/test_dot_plot.py
+++ b/_unittests/ut_plotting/test_dot_plot.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 import unittest
 
diff --git a/_unittests/ut_plotting/test_text_plot.py b/_unittests/ut_plotting/test_text_plot.py
index 963b5cb..5844ff0 100644
--- a/_unittests/ut_plotting/test_text_plot.py
+++ b/_unittests/ut_plotting/test_text_plot.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 import textwrap
 import unittest
@@ -95,6 +94,7 @@ def test_onnx_text_plot_tree_cls_2(self):
                +f 0:1 1:0 2:0
         """
         ).strip(" \n\r")
+        res = res.replace("np.float32(", "").replace(")", "")
         self.assertEqual(expected, res.strip(" \n\r"))
 
     @ignore_warnings((UserWarning, FutureWarning))
diff --git a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
index 06502b2..fbf12b7 100644
--- a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
+++ b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
@@ -217,6 +217,25 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
     # The following tests fail due to a type mismatch.
     backend_test.exclude("(test_eyelike_without_dtype)")
 
+if onnx_opset_version() < 22:
+    backend_test.exclude(
+        "("
+        "test_adagrad_cpu"
+        "|test_adagrad_multiple_cpu"
+        "|test_dft_inverse_cpu"
+        "|test_dft_inverse_opset19_cpu"
+        "|test_lppool_1d_default_cpu"
+        "|test_lppool_2d_default_cpu"
+        "|test_lppool_2d_dilations_cpu"
+        "|test_lppool_2d_pads_cpu"
+        "|test_lppool_2d_same_lower_cpu"
+        "|test_lppool_2d_same_upper_cpu"
+        "|test_lppool_2d_strides_cpu"
+        "|test_lppool_3d_default_cpu"
+        ")"
+    )
+
+
 # The following tests fail due to discrepancies (small but still higher than 1e-7).
 backend_test.exclude("test_adam_multiple")  # 1e-2
 
diff --git a/_unittests/ut_translate_api/test_translate.py b/_unittests/ut_translate_api/test_translate.py
index 0212d0b..98629d8 100644
--- a/_unittests/ut_translate_api/test_translate.py
+++ b/_unittests/ut_translate_api/test_translate.py
@@ -160,8 +160,14 @@ def test_export_if(self):
         self.assertEqualArray(np.array([1], dtype=np.int64), got[0])
 
         code = translate(onx)
-        selse = "g().cst(np.array([0], dtype=np.int64)).rename('Z').bring('Z').vout(elem_type=TensorProto.FLOAT)"
-        sthen = "g().cst(np.array([1], dtype=np.int64)).rename('Z').bring('Z').vout(elem_type=TensorProto.FLOAT)"
+        selse = (
+            "g().cst(np.array([0], dtype=np.int64)).rename('Z')."
+            "bring('Z').vout(elem_type=TensorProto.FLOAT)"
+        )
+        sthen = (
+            "g().cst(np.array([1], dtype=np.int64)).rename('Z')."
+            "bring('Z').vout(elem_type=TensorProto.FLOAT)"
+        )
         expected = dedent(
             f"""
             (
diff --git a/_unittests/ut_translate_api/test_translate_classic.py b/_unittests/ut_translate_api/test_translate_classic.py
index c6cb412..acee6e5 100644
--- a/_unittests/ut_translate_api/test_translate_classic.py
+++ b/_unittests/ut_translate_api/test_translate_classic.py
@@ -252,7 +252,7 @@ def test_fft(self):
             new_code = "\n".join(
                 [f"{i+1:04} {line}" for i, line in enumerate(code.split("\n"))]
             )
-            raise AssertionError(f"ERROR {e}\n{new_code}")
+            raise AssertionError(f"ERROR {e}\n{new_code}")  # noqa: B904
 
     def test_aionnxml(self):
         onx = (
diff --git a/_unittests/ut_validation/test_f8.py b/_unittests/ut_validation/test_f8.py
index 80611b5..4c6517f 100644
--- a/_unittests/ut_validation/test_f8.py
+++ b/_unittests/ut_validation/test_f8.py
@@ -88,7 +88,7 @@ def test_fe5m2_to_float32_paper(self):
         self.assertEqual(fe5m2_to_float32(int("11111100", 2)), -numpy.inf)
 
     def test_fe4m3fn_to_float32_all(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe4m3_to_float32_float(i)
             b = fe4m3_to_float32(i)
             if numpy.isnan(a):
@@ -97,7 +97,7 @@ def test_fe4m3fn_to_float32_all(self):
             self.assertEqual(a, b)
 
     def test_fe4m3fn_to_float32_all_ml_types(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe4m3_to_float32_float(i)
             b = fe4m3_to_float32(i)
             c = new_cvt_float32_to_e4m3fn(b)
@@ -188,7 +188,7 @@ def test_search_float32_into_fe5m2_simple(self):
                         self.assertEqual(b1, b2)
 
     def test_search_float32_into_fe4m3fn_equal(self):
-        values = [(fe4m3_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe4m3_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         for value, expected in values:
@@ -208,7 +208,7 @@ def test_search_float32_into_fe4m3fn_equal(self):
                     self.assertIn(nf, (0, 128))
 
     def test_search_float32_into_fe5m2_equal(self):
-        values = [(fe5m2_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe5m2_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         for value, expected in values:
@@ -233,7 +233,7 @@ def test_search_float32_into_fe5m2_equal(self):
                 self.assertEqual(fe5m2_to_float32(nf), float(cf))
 
     def test_search_float32_into_fe4m3fn(self):
-        values = [(fe4m3_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe4m3_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -308,7 +308,7 @@ def test_search_float32_into_fe4m3fn(self):
             )
 
     def test_search_float32_into_fe5m2(self):
-        values = [(fe5m2_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe5m2_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -651,7 +651,7 @@ def test_search_float32_into_fe5m2fnuz_simple(self):
                 self.assertEqual(expected, got)
 
     def test_fe4m3fnuz_to_float32_all(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe4m3_to_float32_float(i, uz=True)
             b = fe4m3_to_float32(i, uz=True)
             if numpy.isnan(a):
@@ -660,7 +660,7 @@ def test_fe4m3fnuz_to_float32_all(self):
             self.assertEqual(a, b)
 
     def test_fe5m2fnuz_to_float32_all(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe5m2_to_float32_float(i, fn=True, uz=True)
             b = fe5m2_to_float32(i, fn=True, uz=True)
             if numpy.isnan(a):
@@ -669,7 +669,7 @@ def test_fe5m2fnuz_to_float32_all(self):
             self.assertEqual(a, b)
 
     def test_search_float32_into_fe4m3fnuz(self):
-        values = [(fe4m3_to_float32_float(i, uz=True), i) for i in range(0, 256)]
+        values = [(fe4m3_to_float32_float(i, uz=True), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -715,9 +715,7 @@ def test_search_float32_into_fe4m3fnuz(self):
             )
 
     def test_search_float32_into_fe5m2fnuz(self):
-        values = [
-            (fe5m2_to_float32_float(i, fn=True, uz=True), i) for i in range(0, 256)
-        ]
+        values = [(fe5m2_to_float32_float(i, fn=True, uz=True), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -1235,7 +1233,7 @@ def test_nan(self):
                 expected,
             )
         ]
-        for i in range(0, 23):
+        for i in range(23):
             v = 0x7F800000 | (1 << i)
             f = numpy.uint32(v).view(numpy.float32)
             values.append((i, v, f, expected))
diff --git a/_unittests/ut_xrun_doc/test_documentation_examples.py b/_unittests/ut_xrun_doc/test_documentation_examples.py
index 12a36ba..6f6a5d1 100644
--- a/_unittests/ut_xrun_doc/test_documentation_examples.py
+++ b/_unittests/ut_xrun_doc/test_documentation_examples.py
@@ -49,7 +49,7 @@ def run_test(self, fold: str, name: str, verbose=0) -> int:
                     if verbose:
                         print(f"failed: {name!r} due to missing dot.")
                     return 0
-                raise AssertionError(
+                raise AssertionError(  # noqa: B904
                     "Example '{}' (cmd: {} - exec_prefix='{}') "
                     "failed due to\n{}"
                     "".format(name, cmds, sys.exec_prefix, st)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 12d1e36..20d27ce 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -35,6 +35,9 @@ jobs:
   - script: |
       python -m pip install . -v -v -v
     displayName: 'install wheel'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest
     displayName: 'Runs Unit Tests'
@@ -83,6 +86,9 @@ jobs:
   - script: |
       black --diff .
     displayName: 'Black'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest
     displayName: 'Runs Unit Tests'
@@ -125,6 +131,9 @@ jobs:
       export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_numpy
       cd array-api-tests
     displayName: 'Set API'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_numpy
       cd array-api-tests
@@ -177,6 +186,9 @@ jobs:
   - script: |
       black --diff .
     displayName: 'Black'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest --cov
     displayName: 'Runs Unit Tests'
@@ -213,6 +225,9 @@ jobs:
     displayName: 'Install Requirements dev'
   - script: pip install onnxmltools --no-deps
     displayName: 'Install onnxmltools'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest -v
     displayName: 'Runs Unit Tests'
@@ -256,6 +271,9 @@ jobs:
     displayName: 'Install Requirements dev'
   - script: pip install onnxmltools --no-deps
     displayName: 'Install onnxmltools'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest
     displayName: 'Runs Unit Tests'
diff --git a/onnx_array_api/__init__.py b/onnx_array_api/__init__.py
index c4bc456..f78126c 100644
--- a/onnx_array_api/__init__.py
+++ b/onnx_array_api/__init__.py
@@ -1,4 +1,3 @@
-# coding: utf-8
 """
 APIs to create ONNX Graphs.
 """
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index c0a7678..e9b69a2 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -18,10 +18,11 @@ def get_main_parser() -> ArgumentParser:
         help=dedent(
             """
         Selects a command.
-        
+
         'translate' exports an onnx graph into a piece of code replicating it,
         'compare' compares the execution of two onnx models,
-        'replace' replaces constant and initliazers by ConstantOfShape to make the model lighter
+        'replace' replaces constant and initliazers by ConstantOfShape
+                  to make the model lighter
         """
         ),
     )
@@ -75,7 +76,8 @@ def get_parser_compare() -> ArgumentParser:
         Compares the execution of two onnx models.
         """
         ),
-        epilog="This is used when two models are different but should produce the same results.",
+        epilog="This is used when two models are different but "
+        "should produce the same results.",
     )
     parser.add_argument(
         "-m1",
diff --git a/onnx_array_api/_helpers.py b/onnx_array_api/_helpers.py
index f9808ca..1d95bb2 100644
--- a/onnx_array_api/_helpers.py
+++ b/onnx_array_api/_helpers.py
@@ -9,7 +9,7 @@ def np_dtype_to_tensor_dtype(dtype: Any):
     """
     try:
         dt = helper.np_dtype_to_tensor_dtype(dtype)
-    except KeyError:
+    except (KeyError, ValueError):
         if dtype == np.float32:
             dt = TensorProto.FLOAT
         elif dtype == np.float64:
@@ -41,5 +41,5 @@ def np_dtype_to_tensor_dtype(dtype: Any):
         elif dtype is float:
             dt = TensorProto.DOUBLE
         else:
-            raise KeyError(f"Unable to guess type for dtype={dtype}.")
+            raise KeyError(f"Unable to guess type for dtype={dtype}.")  # noqa: B904
     return dt
diff --git a/onnx_array_api/array_api/__init__.py b/onnx_array_api/array_api/__init__.py
index f4b3c4d..3252405 100644
--- a/onnx_array_api/array_api/__init__.py
+++ b/onnx_array_api/array_api/__init__.py
@@ -51,8 +51,8 @@ def _finfo(dtype):
             d[k] = v
     d["dtype"] = DType(np_dtype_to_tensor_dtype(dt))
     nres = type("finfo", (res.__class__,), d)
-    setattr(nres, "smallest_normal", float(res.smallest_normal))
-    setattr(nres, "tiny", float(res.tiny))
+    setattr(nres, "smallest_normal", float(res.smallest_normal))  # noqa: B010
+    setattr(nres, "tiny", float(res.tiny))  # noqa: B010
     return nres
 
 
@@ -84,8 +84,8 @@ def _iinfo(dtype):
             d[k] = v
     d["dtype"] = DType(np_dtype_to_tensor_dtype(dt))
     nres = type("iinfo", (res.__class__,), d)
-    setattr(nres, "min", int(res.min))
-    setattr(nres, "max", int(res.max))
+    setattr(nres, "min", int(res.min))  # noqa: B010
+    setattr(nres, "max", int(res.max))  # noqa: B010
     return nres
 
 
@@ -133,10 +133,10 @@ def _finalize_array_api(module, function_names, TEagerTensor):
     module.uint32 = DType(TensorProto.UINT32)
     module.uint64 = DType(TensorProto.UINT64)
     module.bfloat16 = DType(TensorProto.BFLOAT16)
-    setattr(module, "bool", DType(TensorProto.BOOL))
-    setattr(module, "str", DType(TensorProto.STRING))
-    setattr(module, "finfo", _finfo)
-    setattr(module, "iinfo", _iinfo)
+    setattr(module, "bool", DType(TensorProto.BOOL))  # noqa: B010
+    setattr(module, "str", DType(TensorProto.STRING))  # noqa: B010
+    setattr(module, "finfo", _finfo)  # noqa: B010
+    setattr(module, "iinfo", _iinfo)  # noqa: B010
 
     if function_names is None:
         function_names = supported_functions
@@ -146,7 +146,10 @@ def _finalize_array_api(module, function_names, TEagerTensor):
         if f is None:
             f2 = getattr(npx_functions, name, None)
             if f2 is None:
-                warnings.warn(f"Function {name!r} is not available in {module!r}.")
+                warnings.warn(
+                    f"Function {name!r} is not available in {module!r}.",
+                    stacklevel=0,
+                )
                 continue
             f = lambda TEagerTensor, *args, _f=f2, **kwargs: _f(  # noqa: E731
                 *args, **kwargs
diff --git a/onnx_array_api/array_api/_onnx_common.py b/onnx_array_api/array_api/_onnx_common.py
index 8456378..abc59a9 100644
--- a/onnx_array_api/array_api/_onnx_common.py
+++ b/onnx_array_api/array_api/_onnx_common.py
@@ -3,9 +3,15 @@
 import numpy as np
 from onnx import TensorProto
 
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    from numpy.array_api._array_object import Array
+try:
+    import array_api_strict
+
+    Array = type(array_api_strict.ones((1,)))
+except ImportError:
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        from numpy.array_api._array_object import Array
+
 from ..npx.npx_types import (
     DType,
     ElemType,
@@ -92,13 +98,13 @@ def asarray(
     elif isinstance(a, str):
         v = TEagerTensor(np.array(a, dtype=np.str_))
     elif isinstance(a, list):
-        if all(map(lambda x: isinstance(x, bool), a)):
+        if all(isinstance(x, bool) for x in a):
             v = TEagerTensor(np.array(a, dtype=np.bool_))
-        elif all(map(lambda x: isinstance(x, int), a)):
+        elif all(isinstance(x, int) for x in a):
             try:
                 cvt = np.array(a, dtype=np.int64)
             except OverflowError as e:
-                if all(map(lambda x: x >= 0, a)):
+                if all(x >= 0 for x in a):
                     cvt = np.array(a, dtype=np.uint64)
                 else:
                     raise e
@@ -107,7 +113,7 @@ def asarray(
             v = TEagerTensor(np.array(a))
     elif isinstance(a, np.ndarray):
         v = TEagerTensor(a)
-    elif isinstance(a, Array):
+    elif Array and isinstance(a, Array):
         v = TEagerTensor(np.asarray(a))
     else:
         raise RuntimeError(f"Unexpected type {type(a)} for the first input.")
@@ -127,9 +133,7 @@ def arange(
     step: EagerTensor[OptTensorType[ElemType.int64, "I", (1,)]] = None,
     dtype: OptParType[DType] = None,
 ) -> EagerTensor[TensorType[ElemType.numerics, "T"]]:
-    use_float = any(
-        map(lambda x: isinstance(x, float), [start_or_stop, stop_or_step, step])
-    )
+    use_float = any(isinstance(x, float) for x in [start_or_stop, stop_or_step, step])
     if isinstance(start_or_stop, int):
         start_or_stop = TEagerTensor(
             np.array([start_or_stop], dtype=np.float64 if use_float else np.int64)
@@ -207,7 +211,7 @@ def eye(
     /,
     *,
     k: ParType[int] = 0,
-    dtype: ParType[DType] = DType(TensorProto.DOUBLE),
+    dtype: ParType[DType] = DType(TensorProto.DOUBLE),  # noqa: B008
 ):
     if isinstance(n_rows, int):
         n_rows = TEagerTensor(np.array(n_rows, dtype=np.int64))
@@ -245,7 +249,7 @@ def linspace(
     dtype: OptParType[DType] = None,
     endpoint: ParType[int] = 1,
 ) -> EagerTensor[TensorType[ElemType.numerics, "T"]]:
-    use_float = any(map(lambda x: isinstance(x, float), [start, stop]))
+    use_float = any(isinstance(x, float) for x in [start, stop])
     if isinstance(start, int):
         start = TEagerTensor(
             np.array(start, dtype=np.float64 if use_float else np.int64)
diff --git a/onnx_array_api/ext_test_case.py b/onnx_array_api/ext_test_case.py
index 3c12e65..d91ba1a 100644
--- a/onnx_array_api/ext_test_case.py
+++ b/onnx_array_api/ext_test_case.py
@@ -235,7 +235,7 @@ def assertRaise(self, fct: Callable, exc_type: Exception):
             fct()
         except exc_type as e:
             if not isinstance(e, exc_type):
-                raise AssertionError(f"Unexpected exception {type(e)!r}.")
+                raise AssertionError(f"Unexpected exception {type(e)!r}.")  # noqa: B904
             return
         raise AssertionError("No exception was raised.")
 
@@ -266,7 +266,7 @@ def assertStartsWith(self, prefix: str, full: str):
     @classmethod
     def tearDownClass(cls):
         for name, line, w in cls._warns:
-            warnings.warn(f"\n{name}:{line}: {type(w)}\n  {str(w)}")
+            warnings.warn(f"\n{name}:{line}: {type(w)}\n  {str(w)}", stacklevel=0)
 
     def capture(self, fct: Callable):
         """
@@ -277,9 +277,8 @@ def capture(self, fct: Callable):
         """
         sout = StringIO()
         serr = StringIO()
-        with redirect_stdout(sout):
-            with redirect_stderr(serr):
-                res = fct()
+        with redirect_stdout(sout), redirect_stderr(serr):
+            res = fct()
         return res, sout.getvalue(), serr.getvalue()
 
     def relative_path(self, filename: str, *names: List[str]) -> str:
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index 4f5c601..293d2cc 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -328,9 +328,9 @@ def has_type(self, name: str) -> bool:
         return name in self._known_types
 
     def get_type(self, name: str) -> int:
-        assert name in self._known_types, (
-            f"Type is unknown for result {name!r}, " f"known_types={self._known_types}."
-        )
+        assert (
+            name in self._known_types
+        ), f"Type is unknown for result {name!r}, known_types={self._known_types}."
         return self._known_types[name]
 
     def unique_name(self, prefix: str) -> str:
@@ -472,7 +472,7 @@ def make_node(
                 f"A node {op_type!r} cannot be created with "
                 f"inputs={inputs} (types={[type(i) for i in inputs]}), "
                 f"outputs={outputs} "
-                f"(types={[type(o) for o in outputs] if isinstance(outputs, (tuple, list)) else outputs}), "
+                f"(types={[type(o) for o in outputs] if isinstance(outputs, (tuple, list)) else outputs}), "  # noqa: E501
                 f"domain={domain!r}, kwargs={kwargs}."
             ) from e
         if attributes:
@@ -594,14 +594,16 @@ def make_nodes(
             return output_names[0]
         return output_names
 
-    def from_array(self, arr: T, name: str = None) -> TensorProto:  # noqa: F821
+    def from_array(
+        self, arr: T, name: Optional[str] = None
+    ) -> TensorProto:  # noqa: F821
         if isinstance(arr, np.ndarray):
             return self.from_np_array(arr, name)
         raise NotImplementedError(
             f"{type(arr)} is not supported yet but initializer {name or ''!r} is."
         )
 
-    def from_np_array(self, arr: np.ndarray, name: str = None) -> TensorProto:
+    def from_np_array(self, arr: np.ndarray, name: Optional[str] = None) -> TensorProto:
         arr_cpu = np.ascontiguousarray(arr) if not arr.flags["C_CONTIGUOUS"] else arr
         if arr_cpu.ctypes.data == arr.ctypes.data:
             if sys.byteorder == "big":
@@ -816,7 +818,7 @@ def constant_folding(self):
         """
         updates = {}
         node_to_remove = set()
-        for k, v in self.constants_.items():
+        for _k, v in self.constants_.items():
             if v is None:
                 # this is an initiliazer
                 continue
@@ -837,7 +839,8 @@ def constant_folding(self):
                     self.initializers_dict[name] = value
                     if self.verbose:
                         print(
-                            f"[GraphBuilder] fold_constant:{v.op_type}:{name}[{value.dtype}:"
+                            f"[GraphBuilder] fold_constant:"
+                            f"{v.op_type}:{name}[{value.dtype}:"
                             f"{value.shape}]:from:{','.join(sorted(feeds))}"
                         )
 
@@ -880,7 +883,8 @@ def remove_identity_nodes(self):
             if new_name in replacements:
                 new_name = replacements[new_name]
                 assert new_name not in replacements, (
-                    f"Name {old_name!r} still in {replacements}, node.op_type={node.op_type!r}, "
+                    f"Name {old_name!r} still in {replacements}, "
+                    f"node.op_type={node.op_type!r}, "
                     f"node.input={node.input}, node.output={node.output}, "
                     f"input_names={input_names}, output_names={output_names}"
                 )
@@ -891,7 +895,8 @@ def remove_identity_nodes(self):
             if old_name in replacements:
                 replacements[replacements[old_name]] = new_name
             assert new_name not in replacements, (
-                f"Name {old_name!r} still in {replacements}, node.op_type={node.op_type!r}, "
+                f"Name {old_name!r} still in {replacements}, "
+                f"node.op_type={node.op_type!r}, "
                 f"node.input={node.input}, node.output={node.output}, "
                 f"input_names={input_names}, output_names={output_names}"
             )
@@ -902,7 +907,8 @@ def remove_identity_nodes(self):
             for k, v in replacements.items():
                 assert v not in replacements, (
                     f"replacement {k}->{v} is not possible because of "
-                    f"{v}->{replacements[v]}, old_name={old_name!r}, new_name={new_name!r}"
+                    f"{v}->{replacements[v]}, old_name={old_name!r}, "
+                    f"new_name={new_name!r}"
                 )
 
         # second pass: replacements in initializer
diff --git a/onnx_array_api/light_api/model.py b/onnx_array_api/light_api/model.py
index 6478c4d..f6770eb 100644
--- a/onnx_array_api/light_api/model.py
+++ b/onnx_array_api/light_api/model.py
@@ -319,7 +319,8 @@ def rename(self, old_name: str, new_name: str):
         value = self.unique_names_[old_name]
         if isinstance(value, int):
             raise TypeError(
-                f"Unexpected type {type(value)} for value {old_name!r} renamed into {new_name!r}."
+                f"Unexpected type {type(value)} for value {old_name!r} "
+                f"renamed into {new_name!r}."
             )
         self.unique_names_[new_name] = value
         self.renames_[old_name] = new_name
diff --git a/onnx_array_api/light_api/var.py b/onnx_array_api/light_api/var.py
index 2d7eac8..72a9533 100644
--- a/onnx_array_api/light_api/var.py
+++ b/onnx_array_api/light_api/var.py
@@ -193,7 +193,7 @@ def make_node(
             )
         if len(names) == 1:
             return Var(self.parent, names[0])
-        return Vars(self.parent, *list(map(lambda v: Var(self.parent, v), names)))
+        return Vars(self.parent, *[Var(self.parent, v) for v in names])
 
     def vin(
         self,
@@ -445,7 +445,8 @@ def rename(self, *new_names: List[str]) -> "Vars":
         "Renames variables."
         if len(new_names) != len(self):
             raise ValueError(
-                f"Vars has {len(self)} elements but the method received {len(new_names)} names."
+                f"Vars has {len(self)} elements but the method "
+                f"received {len(new_names)} names."
             )
         new_vars = []
         for var, name in zip(self.vars_, new_names):
diff --git a/onnx_array_api/npx/npx_array_api.py b/onnx_array_api/npx/npx_array_api.py
index 142a892..a9fb3d6 100644
--- a/onnx_array_api/npx/npx_array_api.py
+++ b/onnx_array_api/npx/npx_array_api.py
@@ -10,8 +10,6 @@ class ArrayApiError(RuntimeError):
     Raised when a function is not supported by the :epkg:`Array API`.
     """
 
-    pass
-
 
 class BaseArrayApi:
     """
diff --git a/onnx_array_api/npx/npx_core_api.py b/onnx_array_api/npx/npx_core_api.py
index d6688cf..a09280a 100644
--- a/onnx_array_api/npx/npx_core_api.py
+++ b/onnx_array_api/npx/npx_core_api.py
@@ -15,7 +15,7 @@
 class args_tuple(tuple):
     """Overwrites a tuple to make the distinction later in the code."""
 
-    pass
+    __slots__ = ()
 
 
 def cst(*args, **kwargs):
@@ -140,7 +140,7 @@ def _xapi(fn: Callable, inline: bool):
 
     # It has the same signature
     def wrapper(*inputs, **kwargs):
-        if any(map(lambda x: isinstance(x, EagerTensor), inputs)):
+        if any(isinstance(x, EagerTensor) for x in inputs):
             tensor_class = None
             for x in inputs:
                 if isinstance(x, EagerTensor):
diff --git a/onnx_array_api/npx/npx_functions.py b/onnx_array_api/npx/npx_functions.py
index 7c6cd66..c6319f2 100644
--- a/onnx_array_api/npx/npx_functions.py
+++ b/onnx_array_api/npx/npx_functions.py
@@ -480,7 +480,7 @@ def eye(
     /,
     *,
     k: ParType[int] = 0,
-    dtype: ParType[DType] = DType(TensorProto.DOUBLE),
+    dtype: ParType[DType] = DType(TensorProto.DOUBLE),  # noqa: B008
 ):
     "See :func:`numpy.eye`."
     shape = cst(np.array([-1], dtype=np.int64))
diff --git a/onnx_array_api/npx/npx_graph_builder.py b/onnx_array_api/npx/npx_graph_builder.py
index 4496d79..91034f7 100644
--- a/onnx_array_api/npx/npx_graph_builder.py
+++ b/onnx_array_api/npx/npx_graph_builder.py
@@ -450,7 +450,7 @@ def _make_onnx(self):
                 name = inp.name
                 if name is None:
                     raise RuntimeError(
-                        f"Input {i} is None for function " f"{self.function_name!r}."
+                        f"Input {i} is None for function {self.function_name!r}."
                     )
                 inputs.append(name)
 
@@ -473,7 +473,7 @@ def _make_onnx(self):
         model = make_model(
             graph,
             opset_imports=opset_imports,
-            functions=list(f[0] for f in self.functions_.values()),
+            functions=[f[0] for f in self.functions_.values()],
             ir_version=self.ir_version,
         )
         if not is_windows() or not is_azure():
@@ -512,12 +512,7 @@ def _function_to_onnx(self, fct: Callable, n_inputs: int, n_outputs: int):
             there is an undefined number of inputs
         """
         sig = signature(fct)
-        if any(
-            map(
-                lambda t: issubclass(t.annotation, SequenceType),
-                sig.parameters.values(),
-            )
-        ):
+        if any(issubclass(t.annotation, SequenceType) for t in sig.parameters.values()):
             # onnx does not allow undefined number of inputs
             key = fct.__module__, fct.__name__, n_inputs
         else:
@@ -852,7 +847,7 @@ def to_onnx(
                     node_inputs.append(input_name)
                     continue
 
-                if isinstance(i, tuple) and all(map(lambda x: isinstance(x, int), i)):
+                if isinstance(i, tuple) and all(isinstance(x, int) for x in i):
                     ai = np.array(list(i), dtype=np.int64)
                     c = Cst(ai)
                     input_name = self._unique(var._prefix)
diff --git a/onnx_array_api/npx/npx_helper.py b/onnx_array_api/npx/npx_helper.py
index 34d9af3..b2c6b48 100644
--- a/onnx_array_api/npx/npx_helper.py
+++ b/onnx_array_api/npx/npx_helper.py
@@ -130,8 +130,7 @@ def iter_nodes(nodes: Sequence[NodeProto]) -> Iterator[NodeProto]:
                 and hasattr(att, "g")
                 and att.g is not None
             ):
-                for n in iter_nodes(att.g.node):
-                    yield n
+                yield from iter_nodes(att.g.node)
 
 
 def onnx_model_to_function(
diff --git a/onnx_array_api/npx/npx_jit_eager.py b/onnx_array_api/npx/npx_jit_eager.py
index 20becbd..172bb86 100644
--- a/onnx_array_api/npx/npx_jit_eager.py
+++ b/onnx_array_api/npx/npx_jit_eager.py
@@ -563,7 +563,7 @@ class JitOnnx(JitEager):
     def __init__(
         self,
         f: Callable,
-        tensor_class: type = None,
+        tensor_class: Optional[type] = None,
         target_opsets: Optional[Dict[str, int]] = None,
         output_types: Optional[Dict[Any, TensorType]] = None,
         ir_version: Optional[int] = None,
@@ -636,7 +636,7 @@ class EagerOnnx(JitEager):
     def __init__(
         self,
         f: Callable,
-        tensor_class: type = None,
+        tensor_class: Optional[type] = None,
         target_opsets: Optional[Dict[str, int]] = None,
         output_types: Optional[Dict[Any, TensorType]] = None,
         ir_version: Optional[int] = None,
@@ -671,12 +671,12 @@ def _preprocess_constants(self, *args):
                 new_args.append(self.tensor_class(n.inputs[0]))
                 modified = True
             elif isinstance(n, tuple):
-                if all(map(lambda x: isinstance(x, int), n)):
+                if all(isinstance(x, int) for x in n):
                     new_args.append(
                         self.tensor_class(np.array(list(n), dtype=np.int64))
                     )
                     modified = True
-                elif any(map(lambda t: isinstance(t, Var), n)):
+                elif any(isinstance(t, Var) for t in n):
                     raise TypeError(
                         f"Unexpected types in tuple "
                         f"({[type(t) for t in n]}) for input {i}, "
@@ -727,14 +727,14 @@ def __call__(self, *args, already_eager=False, **kwargs):
         )
         if already_eager:
             if any(
-                map(
-                    lambda t: t is not None
+                (
+                    t is not None
                     and not isinstance(
                         t,
                         EagerOnnx.allowed_input_types,
-                    ),
-                    args,
+                    )
                 )
+                for t in args
             ):
                 raise TypeError(
                     f"One of the input is not an EagerTensor or a constant, "
@@ -759,8 +759,8 @@ def __call__(self, *args, already_eager=False, **kwargs):
             try:
                 res = self.f(*values, **kwargs)
             except (AttributeError, TypeError) as e:
-                inp1 = ", ".join(map(str, map(lambda a: type(a).__name__, args)))
-                inp2 = ", ".join(map(str, map(lambda a: type(a).__name__, values)))
+                inp1 = ", ".join(map(str, [type(a).__name__ for a in args]))
+                inp2 = ", ".join(map(str, [type(a).__name__ for a in values]))
                 raise TypeError(
                     f"Unexpected types, input types are args=[{inp1}], "
                     f"values=[{inp2}], kwargs={kwargs}. "
@@ -778,7 +778,7 @@ def __call__(self, *args, already_eager=False, **kwargs):
                         f"from module {self.f.__module__!r}, "
                         f"type of first input is {type(args[0])}."
                     )
-            elif isinstance(res, Var) or any(map(lambda x: isinstance(x, Var), res)):
+            elif isinstance(res, Var) or any(isinstance(x, Var) for x in res):
                 # The function returns instance of type Var.
                 # It does not support eager mode and needs
                 # to be converted into onnx.
diff --git a/onnx_array_api/npx/npx_numpy_tensors.py b/onnx_array_api/npx/npx_numpy_tensors.py
index 68a4da7..1daef44 100644
--- a/onnx_array_api/npx/npx_numpy_tensors.py
+++ b/onnx_array_api/npx/npx_numpy_tensors.py
@@ -223,7 +223,8 @@ def __bool__(self):
         if self.shape:
             warnings.warn(
                 f"Conversion to bool only works for scalar, not for {self!r}, "
-                f"bool(...)={bool(self._tensor)}."
+                f"bool(...)={bool(self._tensor)}.",
+                stacklevel=0,
             )
             try:
                 return bool(self._tensor)
@@ -279,7 +280,8 @@ def __iter__(self):
         warnings.warn(
             f"Iterators are not implemented in the generic case. "
             f"Every function using them cannot be converted into ONNX "
-            f"(tensors - {type(self)})."
+            f"(tensors - {type(self)}).",
+            stacklevel=0,
         )
         for row in self._tensor:
             yield self.__class__(row)
@@ -289,5 +291,3 @@ class JitNumpyTensor(NumpyTensor, JitTensor):
     """
     Defines a value for a specific backend.
     """
-
-    pass
diff --git a/onnx_array_api/npx/npx_tensors.py b/onnx_array_api/npx/npx_tensors.py
index 3e4faa7..40ebc12 100644
--- a/onnx_array_api/npx/npx_tensors.py
+++ b/onnx_array_api/npx/npx_tensors.py
@@ -10,8 +10,6 @@ class JitTensor:
     Defines a value for a specific jit mode
     """
 
-    pass
-
 
 class EagerTensor(BaseArrayApi):
     """
@@ -93,7 +91,7 @@ def _astype_impl(
 
         if not isinstance(x, Var):
             raise TypeError(f"Input 0 must be a Var not {type(x)}.")
-        meth = getattr(Var, "astype")
+        meth = getattr(Var, "astype")  # noqa: B009
         return meth(x, dtype)
 
     @staticmethod
diff --git a/onnx_array_api/npx/npx_types.py b/onnx_array_api/npx/npx_types.py
index 8284765..2f2a6a6 100644
--- a/onnx_array_api/npx/npx_types.py
+++ b/onnx_array_api/npx/npx_types.py
@@ -11,7 +11,7 @@ class WrapperType:
     WrapperType.
     """
 
-    pass
+    __slots__ = ()
 
 
 class DType(WrapperType):
@@ -78,8 +78,8 @@ def __eq__(self, dt: "DType") -> bool:
             return self.code_ == dt.dtype.code_
         try:
             dti = np_dtype_to_tensor_dtype(dt)
-        except KeyError:
-            raise TypeError(f"dt must be DType not {type(dt)} - {dt!r}.")
+        except KeyError as e:
+            raise TypeError(f"dt must be DType not {type(dt)} - {dt!r}.") from e
         return self.code_ == dti
 
     def __lt__(self, dt: "DType") -> bool:
@@ -90,8 +90,8 @@ def __lt__(self, dt: "DType") -> bool:
             raise TypeError(f"dt must be DType not {type(dt)}.")
         try:
             dti = np_dtype_to_tensor_dtype(dt)
-        except KeyError:
-            raise TypeError(f"dt must be DType not {type(dt)} - {dt}.")
+        except KeyError as e:
+            raise TypeError(f"dt must be DType not {type(dt)} - {dt}.") from e
         return self.code_ < dti
 
     @classmethod
@@ -102,12 +102,10 @@ def type_name(cls) -> str:
 
 class _DType2(DType):
     "Wraps a type into a different type."
-    pass
 
 
 class _DTypes(DType):
     "Wraps a type into a different type."
-    pass
 
 
 class ElemTypeCstInner(WrapperType):
@@ -367,7 +365,7 @@ def onnx_type(cls):
         if cls.dtype == str:
             return AttributeProto.STRING
         raise RuntimeError(
-            f"Unsupported attribute type {cls.dtype!r} " f"for parameter {cls!r}."
+            f"Unsupported attribute type {cls.dtype!r} for parameter {cls!r}."
         )
 
 
@@ -403,9 +401,11 @@ class ShapeType(Tuple[int, ...]):
     Defines a shape type.
     """
 
+    __slots__ = ()
+
     @classmethod
     def __class_getitem__(cls, *args):
-        if any(map(lambda t: t is not None and not isinstance(t, (int, str)), args)):
+        if any((t is not None and not isinstance(t, (int, str))) for t in args):
             raise TypeError(
                 f"Unexpected value for args={args}, every element should int or str."
             )
@@ -504,7 +504,7 @@ def __class_getitem__(cls, *args):
         if name:
             msg.append(name)
         if dtypes is not None:
-            msg.append("_".join(map(lambda t: str(t.dtype), dtypes)))
+            msg.append("_".join(str(t.dtype) for t in dtypes))
         if shape is not None:
             msg.append("_".join(map(str, shape)))
         final = "__".join(msg)
@@ -561,11 +561,11 @@ def _name_set(self):
             s += 1 << dt.dtype
         try:
             return ElemType.set_names[s]
-        except KeyError:
+        except KeyError as e:
             raise RuntimeError(
                 f"Unable to guess element type name for {s}: "
                 f"{repr(self)} in {ElemType.set_names}."
-            )
+            ) from e
 
     @classmethod
     def issuperset(cls, tensor_type: type) -> bool:
@@ -686,7 +686,7 @@ def len(cls):
     @classmethod
     def type_name(cls) -> str:
         "Returns its full name."
-        dts = ", ".join(map(lambda s: s.type_name(), cls.elem_types))
+        dts = ", ".join(s.type_name() for s in cls.elem_types)
         if cls.name:
             newt = f"TupleType[{dts}, {cls.name!r}]"
         else:
diff --git a/onnx_array_api/npx/npx_var.py b/onnx_array_api/npx/npx_var.py
index ca8af0d..169183c 100644
--- a/onnx_array_api/npx/npx_var.py
+++ b/onnx_array_api/npx/npx_var.py
@@ -33,7 +33,7 @@ def __init__(
     ):
         if not issubclass(dtype, ParType):
             raise TypeError(
-                f"dtype for parameter {name!r} must be of " f"ParType not {dtype}."
+                f"dtype for parameter {name!r} must be of ParType not {dtype}."
             )
         if parent_op is None:
             raise ValueError(f"parent_op must be filled for paramenter {name!r}.")
@@ -453,7 +453,7 @@ def _get_vars(self):
                     deleted.append(var)
                     continue
                 raise TypeError(
-                    f"Unexpected type {type(applied)} as output of " f"function {fct}."
+                    f"Unexpected type {type(applied)} as output of function {fct}."
                 )
             vs.append(var)
             for i in reversed(var.inputs):
@@ -469,11 +469,11 @@ def _get_vars(self):
                     replacement_cst[id(i)] = cst(np.array(i))
                     continue
                 if isinstance(i, tuple):
-                    if all(map(lambda x: isinstance(x, int), i)):
+                    if all(isinstance(x, int) for x in i):
                         cst = Var.get_cst_var()[0]
                         replacement_cst[id(i)] = cst(np.array(list(i), dtype=np.int64))
                         continue
-                    if any(map(lambda t: isinstance(t, Var), i)):
+                    if any(isinstance(t, Var) for t in i):
                         raise TypeError(
                             f"Unexpected types in tuple "
                             f"({[type(t) for t in i]}), "
@@ -1138,7 +1138,7 @@ class Input(Var):
     :param annotation: annotation if any is available
     """
 
-    def __init__(self, name: str = None, annotation: Optional[type] = None):
+    def __init__(self, name: Optional[str] = None, annotation: Optional[type] = None):
         Var.__init__(self)
         self.name = name
         self._prefix = name or "I"
@@ -1172,15 +1172,15 @@ def __init__(self, cst: Any):
         elif isinstance(cst, float):
             Var.__init__(self, np.array(cst, dtype=np.float64), op="Identity")
         elif isinstance(cst, list):
-            if all(map(lambda t: isinstance(t, bool), cst)):
+            if all(isinstance(t, bool) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.bool_), op="Identity")
-            elif all(map(lambda t: isinstance(t, (int, bool)), cst)):
+            elif all(isinstance(t, (int, bool)) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.int64), op="Identity")
-            elif all(map(lambda t: isinstance(t, (float, int, bool)), cst)):
+            elif all(isinstance(t, (float, int, bool)) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.float64), op="Identity")
             else:
                 raise ValueError(
-                    f"Unable to convert cst (type={type(cst)}), " f"value={cst}."
+                    f"Unable to convert cst (type={type(cst)}), value={cst}."
                 )
         else:
             raise NotImplementedError(
diff --git a/onnx_array_api/ort/ort_profile.py b/onnx_array_api/ort/ort_profile.py
index b61df67..ebccaba 100644
--- a/onnx_array_api/ort/ort_profile.py
+++ b/onnx_array_api/ort/ort_profile.py
@@ -52,7 +52,7 @@ def sep_event(s):
     for c in agg_cols:
         df[c] = df[c].fillna("")
     df["dur"] = df["dur"].fillna(0)
-    agg = df[agg_cols + ["dur"]].groupby(agg_cols).sum()
+    agg = df[[*agg_cols, "dur"]].groupby(agg_cols).sum()
     return agg
 
 
@@ -101,14 +101,16 @@ def ort_profile(
     if providers is None:
         providers = ["CPUExecutionProvider"]
     sess = InferenceSession(obj, sess_options, providers=providers, **kwargs)
-    first = list(feeds.values())[0]
+    for v in feeds.values():
+        first = v
+        break
 
     if isinstance(first, numpy.ndarray):
-        for i in range(repeat):
+        for _i in range(repeat):
             sess.run(None, feeds)
     else:
         out_names = [o.name for o in sess.get_outputs()]
-        for i in range(repeat):
+        for _i in range(repeat):
             sess._sess.run_with_ort_values(feeds, out_names, None)
 
     prof = sess.end_profiling()
@@ -177,7 +179,7 @@ def _idx(row):
             df[c] = df[c].apply(str)
     df = df.copy()
     df["count"] = 1
-    gr = df[groupkey + ["dur", "count"]].groupby(groupkey)
+    gr = df[[*groupkey, "dur", "count"]].groupby(groupkey)
     return gr.sum()
 
 
@@ -187,7 +189,9 @@ def _process_shape(s: Tuple[int, ...], keys: Dict[str, str]) -> str:
     for v in value:
         if len(v) != 1:
             raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
-        k, v = list(v.items())[0]
+        for _k, _v in v.items():
+            k, v = _k, _v
+            break
         n = "-".join([keys[k], "x".join(map(str, v))])
         ns.append(n)
     return ",".join(ns)
diff --git a/onnx_array_api/ort/ort_tensors.py b/onnx_array_api/ort/ort_tensors.py
index 2117e3f..4f53e6e 100644
--- a/onnx_array_api/ort/ort_tensors.py
+++ b/onnx_array_api/ort/ort_tensors.py
@@ -86,7 +86,7 @@ def __init__(
             tensor_class: type,
             input_names: List[str],
             onx: ModelProto,
-            f: Callable = None,
+            f: Optional[Callable] = None,
         ):
             try:
                 self.ref = InferenceSession(
@@ -282,5 +282,3 @@ class JitOrtTensor(OrtTensor, OrtCommon, JitTensor):
     """
     Defines a value for :epkg:`onnxruntime` as a backend.
     """
-
-    pass
diff --git a/onnx_array_api/plotting/_helper.py b/onnx_array_api/plotting/_helper.py
index 3131177..5c5d881 100644
--- a/onnx_array_api/plotting/_helper.py
+++ b/onnx_array_api/plotting/_helper.py
@@ -94,7 +94,7 @@ def _extract_attribute_value(
             f"Unable to convert attribute {att.name!r} type {att.type!r}."
         )
     raise AttributeError(  # pragma: no cover
-        f"Unable to convert default value for {ref_att.name!r} " f"type {att.type!r}."
+        f"Unable to convert default value for {ref_att.name!r} type {att.type!r}."
     )
 
 
@@ -120,7 +120,7 @@ def get_tensor_shape(obj):
     for d in obj.tensor_type.shape.dim:
         v = d.dim_value if d.dim_value > 0 else d.dim_param
         shape.append(v)
-    shape = None if not shape else list(None if s == 0 else s for s in shape)
+    shape = None if not shape else [None if s == 0 else s for s in shape]
     return shape
 
 
@@ -183,7 +183,7 @@ def _get_shape(obj):
             arr = to_array(obj)
             return arr.shape
         raise RuntimeError(  # pragma: no cover
-            f"Unable to guess type from {obj0!r}, " f"data_type is {obj.data_type!r}."
+            f"Unable to guess type from {obj0!r}, data_type is {obj.data_type!r}."
         )
     if hasattr(obj, "type"):
         obj = obj.type
diff --git a/onnx_array_api/plotting/dot_plot.py b/onnx_array_api/plotting/dot_plot.py
index 5bfba5d..af8ad22 100644
--- a/onnx_array_api/plotting/dot_plot.py
+++ b/onnx_array_api/plotting/dot_plot.py
@@ -310,7 +310,7 @@ def dot_label(text):
                 exp.append(f'    label="{node.op_type}\\n({dot_name(field)}){satts}";')
                 exp.append(f"    fontsize={fontsize};")
                 exp.append("    color=black;")
-                exp.append("\n".join(map(lambda s: "  " + s, subgraph.split("\n"))))
+                exp.append("\n".join(f"  {s}" for s in subgraph.split("\n")))
 
                 node0 = body.node[0]
                 connects.append(
diff --git a/onnx_array_api/plotting/text_plot.py b/onnx_array_api/plotting/text_plot.py
index 9449acb..d3f27c6 100644
--- a/onnx_array_api/plotting/text_plot.py
+++ b/onnx_array_api/plotting/text_plot.py
@@ -85,10 +85,8 @@ def process_node(self):
                     )
                 else:
                     ts = " ".join(
-                        map(
-                            lambda t: f"{t['target_id']}:{_number2str(t['weight'])}",
-                            self.targets,
-                        )
+                        f"{t['target_id']}:{_number2str(t['weight'])}"
+                        for t in self.targets
                     )
                     text = f"{self.true_false}f {ts}"
             else:
@@ -351,7 +349,7 @@ def __init__(self, nodes):
 
     def _find_sequence(node_name, known, done):
         inputs = dnodes[node_name].input
-        if any(map(lambda i: i not in known, inputs)):
+        if any((i not in known) for i in inputs):
             return []
 
         res = [node_name]
@@ -362,7 +360,7 @@ def _find_sequence(node_name, known, done):
                 if len(next_names) == 1:
                     next_name = next_names.pop()
                     inputs = dnodes[next_name].input
-                    if any(map(lambda i: i not in known, inputs)):
+                    if any((i not in known) for i in inputs):
                         break
                     res.extend(next_name)
                 else:
@@ -390,7 +388,7 @@ def _find_sequence(node_name, known, done):
                 possibles[k] = v
 
         sequences = OrderedDict()
-        for k, v in possibles.items():
+        for k, _v in possibles.items():
             if k in done:
                 continue
             sequences[k] = _find_sequence(k, known, done)
@@ -941,7 +939,7 @@ def str_node(indent, node):
         rows.append(str_node(indent if use_indentation else 0, node))
         indents[name] = indent
 
-        for i, o in enumerate(node.output):
+        for _i, o in enumerate(node.output):
             indents[o] = indent + 1
 
         previous_indent = indents[name]
diff --git a/onnx_array_api/profiling.py b/onnx_array_api/profiling.py
index 52c464a..815f950 100644
--- a/onnx_array_api/profiling.py
+++ b/onnx_array_api/profiling.py
@@ -247,8 +247,7 @@ def depth_first(node, roots_keys, indent=0):
                 else:
                     if filter_node is not None and not filter_node(n):
                         continue
-                    for t in depth_first(n, roots_keys, indent + 1):
-                        yield t
+                    yield from depth_first(n, roots_keys, indent + 1)
 
         if filter_node is None:
             filter_node = ProfileNode.filter_node_
@@ -472,7 +471,7 @@ def add_rows(rows, d):
 def profile2df(
     ps: Stats,
     as_df: bool = True,
-    clean_text: bool = None,
+    clean_text: Optional[bool] = None,
     verbose: bool = False,
     fLOG=None,
 ):
@@ -740,7 +739,7 @@ def fct4():
             node.add_called_by(child)
             child.add_calls_to(node, vv)
 
-    for k, v in nodes.items():
+    for _k, v in nodes.items():
         root = v.get_root()
         break
 
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 6d6ff48..9c3b6ec 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -190,7 +190,8 @@ def enumerate_results(
             for i in node.input:
                 if i not in results:
                     raise RuntimeError(
-                        f"Unable to find input {i!r} in known results {sorted(results)}, "
+                        f"Unable to find input {i!r} "
+                        f"in known results {sorted(results)}, "
                         f"self.rt_inits_ has {sorted(self.evaluator.rt_inits_)}, "
                         f"feed_inputs has {sorted(feed_inputs)}."
                     )
@@ -222,7 +223,8 @@ def enumerate_results(
             for name in output_names:
                 if name not in results:
                     raise RuntimeError(
-                        f"Unable to find output name {name!r} in {sorted(results)}, proto is\n{self.proto_}"
+                        f"Unable to find output name {name!r} in {sorted(results)}, "
+                        f"proto is\n{self.proto_}"
                     )
                 yield ResultType.OUTPUT, name, results[name], None
 
@@ -325,9 +327,7 @@ def _cost_type(self, t1: "np.dtype", t2: "np.dtype") -> float:
     def _cost_shape(self, s1: Tuple[int, ...], s2: Tuple[int, ...]) -> float:
         if s1 is None or s2 is None:
             return self.rank_cost
-        if any(map(lambda s: isinstance(s, str), s1)) or any(
-            map(lambda s: isinstance(s, str), s2)
-        ):
+        if any(isinstance(s, str) for s in s1) or any(isinstance(s, str) for s in s2):
             # dynamic shapes
             if len(s1) != len(s2):
                 return self.rank_cost
@@ -428,7 +428,10 @@ def to_str(
                 d2 = s2[j]
                 d = self.distance_pair(d1, d2)
                 symbol = "=" if d == 0 else "~"
-                line = f"{symbol} | {_align(str(d1), column_size)} | {_align(str(d2), column_size)}"
+                line = (
+                    f"{symbol} | {_align(str(d1), column_size)} | "
+                    f"{_align(str(d2), column_size)}"
+                )
                 if (
                     d1.value is not None
                     and d2.value is not None
@@ -457,7 +460,7 @@ def generate_input(info: ValueInfoProto) -> np.ndarray:
     """
     elem_type = info.type.tensor_type.elem_type
     shape = [
-        (getattr(d, "dim_value", None) or getattr(d, "dim_param"))
+        (getattr(d, "dim_value", None) or getattr(d, "dim_param"))  # noqa: B009
         for d in info.type.tensor_type.shape.dim
     ]
     new_shape = []
@@ -602,7 +605,8 @@ def compare_onnx_execution(
     :param mode: the model should be executed but the function can be executed
         but the comparison may append on nodes only
     :param keep_tensor: keeps the tensor in order to compute a precise distance
-    :return: four results, a sequence of results for the first model and the second model,
+    :return: four results, a sequence of results
+        for the first model and the second model,
         the alignment between the two, DistanceExecution
     """
     assert mode in {"execute", "nodes"}, f"Unexpected value for mode={mode!r}."
diff --git a/onnx_array_api/translate_api/inner_emitter.py b/onnx_array_api/translate_api/inner_emitter.py
index 50d4f5e..abdf04a 100644
--- a/onnx_array_api/translate_api/inner_emitter.py
+++ b/onnx_array_api/translate_api/inner_emitter.py
@@ -38,7 +38,10 @@ def _make_attribute(
             raise NotImplementedError(
                 f"Cannot create attribute with name={name!r}, attr_type={attr_type}."
             )
-        return f"make_ref_attribute(key={name!r}, attr_type={attr_type}, ref_attr_name={ref_attr_name!r})"
+        return (
+            f"make_ref_attribute(key={name!r}, attr_type={attr_type}, "
+            f"ref_attr_name={ref_attr_name!r})"
+        )
 
     def join(self, rows: List[str], single_line: bool = False) -> str:
         "Returns the separators. `single_line` is unused."
@@ -118,14 +121,17 @@ def _emit_io(self, container: str, **kwargs: Dict[str, Any]) -> List[str]:
         shape = kwargs.get("shape", None)
         if elem_type and shape:
             return [
-                f"{container}.append(make_tensor_value_info({name!r}, TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r}))"
+                f"{container}.append(make_tensor_value_info({name!r}, "
+                f"TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r}))"
             ]
         if elem_type:
             return [
-                f"{container}.append(make_tensor_value_info({name!r}, TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape=[]))"
+                f"{container}.append(make_tensor_value_info({name!r}, "
+                f"TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape=[]))"
             ]
         return [
-            f"{container}.append(make_tensor_value_info({name!r}, TensorProto.UNDEFINED, []))"
+            f"{container}.append(make_tensor_value_info({name!r}, "
+            f"TensorProto.UNDEFINED, []))"
         ]
 
     def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
@@ -184,7 +190,7 @@ def _emit_function_output(self, **kwargs: Dict[str, Any]) -> List[str]:
 
     def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
         atts = kwargs["attributes"]
-        if isinstance(atts, list) and all(map(lambda t: isinstance(t, str), atts)):
+        if isinstance(atts, list) and all(isinstance(t, str) for t in atts):
             return [f"atts.extend({atts!r})"]
         raise NotImplementedError(f"Unable to process function attributes {atts!r}.")
 
diff --git a/onnx_array_api/translate_api/light_emitter.py b/onnx_array_api/translate_api/light_emitter.py
index 7a7aef9..9c58830 100644
--- a/onnx_array_api/translate_api/light_emitter.py
+++ b/onnx_array_api/translate_api/light_emitter.py
@@ -54,7 +54,8 @@ def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
         shape = kwargs.get("shape", None)
         if elem_type and shape:
             return [
-                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r})"
+                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, "
+                f"shape={shape!r})"
             ]
         if elem_type:
             return [
@@ -71,7 +72,8 @@ def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
         shape = kwargs.get("shape", None)
         if elem_type and shape:
             inst.append(
-                f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r})"
+                f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, "
+                f"shape={shape!r})"
             )
         elif elem_type:
             inst.append(f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]})")
diff --git a/onnx_array_api/validation/f8.py b/onnx_array_api/validation/f8.py
index ecd68f8..13b778d 100644
--- a/onnx_array_api/validation/f8.py
+++ b/onnx_array_api/validation/f8.py
@@ -9,8 +9,6 @@ class UndefinedCastError(FloatingPointError):
     Unable to case a number.
     """
 
-    pass
-
 
 def display_int(ival, sign=1, exponent=8, mantissa=23):
     """
@@ -317,25 +315,23 @@ def fe5m2_to_float32(ival: int, fn: bool = False, uz: bool = False) -> float:
 class CastFloat8Sets:
     values_e4m3fn = list(
         sorted(
-            (fe4m3_to_float32_float(i), i) for i in range(0, 256) if i not in (255, 127)
+            (fe4m3_to_float32_float(i), i) for i in range(256) if i not in (255, 127)
         )
     )
     values_e4m3fnuz = list(
-        sorted(
-            (fe4m3_to_float32_float(i, uz=True), i) for i in range(0, 256) if i != 0x80
-        )
+        sorted((fe4m3_to_float32_float(i, uz=True), i) for i in range(256) if i != 0x80)
     )
     values_e5m2 = list(
         sorted(
             (fe5m2_to_float32_float(i), i)
-            for i in range(0, 256)
+            for i in range(256)
             if i not in {253, 254, 255, 125, 126, 127}
         )
     )
     values_e5m2fnuz = list(
         sorted(
             (fe5m2_to_float32_float(i, fn=True, uz=True), i)
-            for i in range(0, 256)
+            for i in range(256)
             if i != 0x80
         )
     )
diff --git a/pyproject.toml b/pyproject.toml
index 525b648..a465006 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,11 +11,36 @@ exclude = [
 # Same as Black.
 line-length = 88
 
-[tool.ruff.lint.mccabe]
-# Unlike Flake8, default to a complexity level of 10.
-max-complexity = 10
+[tool.ruff.lint]
+select = [
+    "B", # flake8-bugbear
+    "C4", # flake8-comprehensions
+    #"D", # pydocstyle
+    "E", # pycodestyle
+    "F", # Pyflakes
+    "G", # flake8-logging-format
+    #"I", # isort
+    "ISC", # flake8-implicit-str-concat
+    "LOG", # flake8-logging
+    #"N", # pep8-naming
+    #"NPY", # modern numpy
+    #"PERF", # Perflint
+    "PIE", # flake8-pie
+    "PYI", # flake8-pyi
+    "RUF", # Ruff-specific rules
+    "SIM", # flake8-simplify
+    "SLOT", # flake8-slot
+    "T10", # flake8-debugger
+    #"TID", # Disallow relative imports
+    #"TRY", # flake8-try-except-raise
+    "UP", # pyupgrade
+    "W", # pycodestyle
+    "YTT", # flake8-2020
+]
 
 [tool.ruff.lint.per-file-ignores]
+"**" = ["B905", "C401", "C408", "C413", "PYI041", "RUF012", "RUF100", "RUF010", "SIM108", "SIM910", "SIM110", "SIM102", "SIM114", "SIM103", "UP015", "UP027", "UP031", "UP034", "UP032", "UP006", "UP035", "UP007", "UP038"]
+"**/plot*.py" = ["B018"]
 "_doc/examples/plot_first_example.py" = ["E402", "F811"]
 "_doc/examples/plot_onnxruntime.py" = ["E402", "F811"]
 "onnx_array_api/array_api/_onnx_common.py" = ["F821"]
@@ -34,4 +59,5 @@ max-complexity = 10
 "onnx_array_api/profiling.py" = ["E731"]
 "onnx_array_api/reference/__init__.py" = ["F401"]
 "_unittests/ut_npx/test_npx.py" = ["F821"]
+"_unittests/ut_translate_api/test_translate_classic.py" = ["E501"]
 
diff --git a/requirements.txt b/requirements.txt
index 4680cfc..5cb31f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 array_api_compat
+array_api_strict
 numpy
 onnx>=1.15.0
 scipy
diff --git a/setup.py b/setup.py
index bc4e87e..69b5b9e 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 
 from setuptools import setup

From d83ff4e5c8cb9475821b0cb997c03ad570ce3a46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 22 Oct 2024 13:06:17 +0200
Subject: [PATCH 34/44] Fix documentation (#90)

---
 _doc/long_outputs.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/_doc/long_outputs.rst b/_doc/long_outputs.rst
index 64c0b84..745382b 100644
--- a/_doc/long_outputs.rst
+++ b/_doc/long_outputs.rst
@@ -4,9 +4,6 @@
 Long outputs uneasy to see
 ==========================
 
-.. contents::
-    :local:
-
 onnx
 ====
 

From e002bf4432b1d2cef4b4afffea9b1be85f4d3161 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 12 Nov 2024 23:02:08 +0100
Subject: [PATCH 35/44] Improves onnx_simple_text_plot (#91)

* Improves onnx_simple_text_plot

* add doc_string

* improve display

* add complex

* add missing line

* complex

* complex

* fix unwanted code
---
 onnx_array_api/_helpers.py                    |  4 +++
 onnx_array_api/annotations.py                 |  2 ++
 onnx_array_api/array_api/__init__.py          |  4 +++
 onnx_array_api/array_api/_onnx_common.py      |  2 ++
 onnx_array_api/graph_api/graph_builder.py     |  1 -
 onnx_array_api/npx/npx_jit_eager.py           |  4 +--
 onnx_array_api/npx/npx_numpy_tensors.py       | 22 ++++++++++++
 onnx_array_api/npx/npx_var.py                 |  4 +++
 onnx_array_api/plotting/text_plot.py          | 34 +++++++++++++++----
 onnx_array_api/reference/evaluator_yield.py   |  6 ++++
 .../reference/ops/op_constant_of_shape.py     |  4 +++
 11 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/onnx_array_api/_helpers.py b/onnx_array_api/_helpers.py
index 1d95bb2..9331098 100644
--- a/onnx_array_api/_helpers.py
+++ b/onnx_array_api/_helpers.py
@@ -40,6 +40,10 @@ def np_dtype_to_tensor_dtype(dtype: Any):
             dt = TensorProto.INT64
         elif dtype is float:
             dt = TensorProto.DOUBLE
+        elif dtype == np.complex64:
+            dt = TensorProto.COMPLEX64
+        elif dtype == np.complex128:
+            dt = TensorProto.COMPLEX128
         else:
             raise KeyError(f"Unable to guess type for dtype={dtype}.")  # noqa: B904
     return dt
diff --git a/onnx_array_api/annotations.py b/onnx_array_api/annotations.py
index 9941f95..c29102c 100644
--- a/onnx_array_api/annotations.py
+++ b/onnx_array_api/annotations.py
@@ -64,6 +64,8 @@ def wrapper(self, *args: List[Any], **kwargs: Dict[str, Any]) -> Any:
     np.uint64: TensorProto.UINT64,
     np.bool_: TensorProto.BOOL,
     np.str_: TensorProto.STRING,
+    np.complex64: TensorProto.COMPLEX64,
+    np.complex128: TensorProto.COMPLEX128,
 }
 
 
diff --git a/onnx_array_api/array_api/__init__.py b/onnx_array_api/array_api/__init__.py
index 3252405..9b67b4b 100644
--- a/onnx_array_api/array_api/__init__.py
+++ b/onnx_array_api/array_api/__init__.py
@@ -47,6 +47,8 @@ def _finfo(dtype):
             continue
         if isinstance(v, (np.float32, np.float64, np.float16)):
             d[k] = float(v)
+        elif isinstance(v, (np.complex128, np.complex64)):
+            d[k] = complex(v)
         else:
             d[k] = v
     d["dtype"] = DType(np_dtype_to_tensor_dtype(dt))
@@ -124,6 +126,8 @@ def _finalize_array_api(module, function_names, TEagerTensor):
     module.float16 = DType(TensorProto.FLOAT16)
     module.float32 = DType(TensorProto.FLOAT)
     module.float64 = DType(TensorProto.DOUBLE)
+    module.complex64 = DType(TensorProto.COMPLEX64)
+    module.complex128 = DType(TensorProto.COMPLEX128)
     module.int8 = DType(TensorProto.INT8)
     module.int16 = DType(TensorProto.INT16)
     module.int32 = DType(TensorProto.INT32)
diff --git a/onnx_array_api/array_api/_onnx_common.py b/onnx_array_api/array_api/_onnx_common.py
index abc59a9..d69084a 100644
--- a/onnx_array_api/array_api/_onnx_common.py
+++ b/onnx_array_api/array_api/_onnx_common.py
@@ -93,6 +93,8 @@ def asarray(
                     v = TEagerTensor(va)
     elif isinstance(a, float):
         v = TEagerTensor(np.array(a, dtype=np.float64))
+    elif isinstance(a, complex):
+        v = TEagerTensor(np.array(a, dtype=np.complex128))
     elif isinstance(a, bool):
         v = TEagerTensor(np.array(a, dtype=np.bool_))
     elif isinstance(a, str):
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index 293d2cc..558c34a 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -536,7 +536,6 @@ def make_nodes(
             if isinstance(value, TensorProto):
                 value.name = name
             self.initializers_dict[name] = value
-
             self.constants_[name] = None
             self.set_shape(name, builder._known_shapes[init])
             self.set_type(name, builder._known_types[init])
diff --git a/onnx_array_api/npx/npx_jit_eager.py b/onnx_array_api/npx/npx_jit_eager.py
index 172bb86..267eda5 100644
--- a/onnx_array_api/npx/npx_jit_eager.py
+++ b/onnx_array_api/npx/npx_jit_eager.py
@@ -167,7 +167,7 @@ def make_key(self, *values: List[Any], **kwargs: Dict[str, Any]) -> Tuple[Any, .
                         f"to the attribute list, v={v}."
                     )
                 res.append(v.key)
-            elif isinstance(v, (int, float, bool, DType)):
+            elif isinstance(v, (int, float, bool, complex, DType)):
                 if iv in self.kwargs_to_input_:
                     res.append(self.kwargs_to_input_[iv])
                 res.append(type(v))
@@ -204,7 +204,7 @@ def make_key(self, *values: List[Any], **kwargs: Dict[str, Any]) -> Tuple[Any, .
                 if k in self.kwargs_to_input_:
                     res.append(type(v))
                     res.append(v)
-                elif isinstance(v, (int, float, str, type, bool, DType)):
+                elif isinstance(v, (int, float, str, type, bool, complex, DType)):
                     res.append(k)
                     res.append(type(v))
                     res.append(v)
diff --git a/onnx_array_api/npx/npx_numpy_tensors.py b/onnx_array_api/npx/npx_numpy_tensors.py
index 1daef44..9579455 100644
--- a/onnx_array_api/npx/npx_numpy_tensors.py
+++ b/onnx_array_api/npx/npx_numpy_tensors.py
@@ -265,6 +265,8 @@ def __float__(self):
             DType(TensorProto.DOUBLE),
             DType(TensorProto.FLOAT16),
             DType(TensorProto.BFLOAT16),
+            DType(TensorProto.COMPLEX64),
+            DType(TensorProto.COMPLEX128),
         }:
             raise TypeError(
                 f"Conversion to float only works for float scalar, "
@@ -272,6 +274,26 @@ def __float__(self):
             )
         return float(self._tensor)
 
+    def __complex__(self):
+        "Implicit conversion to complex."
+        if self.shape:
+            raise ValueError(
+                f"Conversion to bool only works for scalar, not for {self!r}."
+            )
+        if self.dtype not in {
+            DType(TensorProto.FLOAT),
+            DType(TensorProto.DOUBLE),
+            DType(TensorProto.FLOAT16),
+            DType(TensorProto.BFLOAT16),
+            DType(TensorProto.COMPLEX64),
+            DType(TensorProto.COMPLEX128),
+        }:
+            raise TypeError(
+                f"Conversion to float only works for float scalar, "
+                f"not for dtype={self.dtype}."
+            )
+        return complex(self._tensor)
+
     def __iter__(self):
         """
         The :epkg:`Array API` does not define this function (2022/12).
diff --git a/onnx_array_api/npx/npx_var.py b/onnx_array_api/npx/npx_var.py
index 169183c..0e71070 100644
--- a/onnx_array_api/npx/npx_var.py
+++ b/onnx_array_api/npx/npx_var.py
@@ -1171,6 +1171,8 @@ def __init__(self, cst: Any):
             Var.__init__(self, np.array(cst, dtype=np.int64), op="Identity")
         elif isinstance(cst, float):
             Var.__init__(self, np.array(cst, dtype=np.float64), op="Identity")
+        elif isinstance(cst, complex):
+            Var.__init__(self, np.array(cst, dtype=np.complex128), op="Identity")
         elif isinstance(cst, list):
             if all(isinstance(t, bool) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.bool_), op="Identity")
@@ -1178,6 +1180,8 @@ def __init__(self, cst: Any):
                 Var.__init__(self, np.array(cst, dtype=np.int64), op="Identity")
             elif all(isinstance(t, (float, int, bool)) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.float64), op="Identity")
+            elif all(isinstance(t, (float, int, bool, complex)) for t in cst):
+                Var.__init__(self, np.array(cst, dtype=np.complex128), op="Identity")
             else:
                 raise ValueError(
                     f"Unable to convert cst (type={type(cst)}), value={cst}."
diff --git a/onnx_array_api/plotting/text_plot.py b/onnx_array_api/plotting/text_plot.py
index d3f27c6..0b4d30a 100644
--- a/onnx_array_api/plotting/text_plot.py
+++ b/onnx_array_api/plotting/text_plot.py
@@ -824,7 +824,10 @@ def str_node(indent, node):
             rows.append(f"opset: domain={opset.domain!r} version={opset.version!r}")
     if hasattr(model, "graph"):
         if model.doc_string:
-            rows.append(f"doc_string: {model.doc_string}")
+            if len(model.doc_string) < 55:
+                rows.append(f"doc_string: {model.doc_string}")
+            else:
+                rows.append(f"doc_string: {model.doc_string[:55]}...")
         main_model = model
         model = model.graph
     else:
@@ -861,9 +864,16 @@ def str_node(indent, node):
             else:
                 content = ""
             line_name_new[init.name] = len(rows)
+            if init.doc_string:
+                t = (
+                    f"init: name={init.name!r} type={_get_type(init)} "
+                    f"shape={_get_shape(init)}{content}"
+                )
+                rows.append(f"{t}{' ' * max(0, 70 - len(t))}-- {init.doc_string}")
+                continue
             rows.append(
-                "init: name=%r type=%r shape=%r%s"
-                % (init.name, _get_type(init), _get_shape(init), content)
+                f"init: name={init.name!r} type={_get_type(init)} "
+                f"shape={_get_shape(init)}{content}"
             )
     if level == 0:
         rows.append("----- main graph ----")
@@ -1044,7 +1054,10 @@ def _mark_link(rows, lengths, r1, r2, d):
         for fct in main_model.functions:
             rows.append(f"----- function name={fct.name} domain={fct.domain}")
             if fct.doc_string:
-                rows.append(f"----- doc_string: {fct.doc_string}")
+                if len(fct.doc_string) < 55:
+                    rows.append(f"----- doc_string: {fct.doc_string}")
+                else:
+                    rows.append(f"----- doc_string: {fct.doc_string[:55]}...")
             res = onnx_simple_text_plot(
                 fct,
                 verbose=verbose,
@@ -1103,10 +1116,19 @@ def onnx_text_plot_io(model, verbose=False, att_display=None):
         )
     # initializer
     for init in model.initializer:
+
+        if init.doc_string:
+            t = (
+                f"init: name={init.name!r} type={_get_type(init)} "
+                f"shape={_get_shape(init)}"
+            )
+            rows.append(f"{t}{' ' * max(0, 70 - len(t))}-- {init.doc_string}")
+            continue
         rows.append(
-            "init: name=%r type=%r shape=%r"
-            % (init.name, _get_type(init), _get_shape(init))
+            f"init: name={init.name!r} type={_get_type(init)} "
+            f"shape={_get_shape(init)}"
         )
+
     # outputs
     for out in model.output:
         rows.append(
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 9c3b6ec..5b77e8b 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -485,6 +485,12 @@ def generate_input(info: ValueInfoProto) -> np.ndarray:
         return (value.astype(np.float16) / p).astype(np.float16).reshape(new_shape)
     if elem_type == TensorProto.DOUBLE:
         return (value.astype(np.float64) / p).astype(np.float64).reshape(new_shape)
+    if elem_type == TensorProto.COMPLEX64:
+        return (value.astype(np.complex64) / p).astype(np.complex64).reshape(new_shape)
+    if elem_type == TensorProto.COMPLEX128:
+        return (
+            (value.astype(np.complex128) / p).astype(np.complex128).reshape(new_shape)
+        )
     raise RuntimeError(f"Unexpected element_type {elem_type} for info={info}")
 
 
diff --git a/onnx_array_api/reference/ops/op_constant_of_shape.py b/onnx_array_api/reference/ops/op_constant_of_shape.py
index 00c6989..a54bb5a 100644
--- a/onnx_array_api/reference/ops/op_constant_of_shape.py
+++ b/onnx_array_api/reference/ops/op_constant_of_shape.py
@@ -19,6 +19,8 @@ def _process(value):
             cst = np.int64(cst)
         elif isinstance(cst, float):
             cst = np.float64(cst)
+        elif isinstance(cst, complex):
+            cst = np.complex128(cst)
         elif cst is None:
             cst = np.float32(0)
         if not isinstance(
@@ -27,6 +29,8 @@ def _process(value):
                 np.float16,
                 np.float32,
                 np.float64,
+                np.complex64,
+                np.complex128,
                 np.int64,
                 np.int32,
                 np.int16,

From 07c3683614b9a90a4864977f07e7043fd6091d91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Fri, 20 Dec 2024 15:57:52 +0100
Subject: [PATCH 36/44] Avoid recursion error in profiling (#92)

* Avoid recursion error in profiling

* disable two tests

* disable tests
---
 CHANGELOGS.rst                                  | 3 ++-
 _unittests/ut_npx/test_sklearn_array_api.py     | 2 ++
 _unittests/ut_ort/test_sklearn_array_api_ort.py | 2 ++
 onnx_array_api/profiling.py                     | 4 ++--
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index e435a75..dd8e3c7 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,7 +4,8 @@ Change Logs
 0.3.0
 +++++
 
-* :pr:`87`: add command line to replace contant by ConstantOfShape
+* :pr:`92`: avoids recursion errors in profiling
+* :pr:`87`: adds command line to replace contant by ConstantOfShape
 * :pr:`79`: first draft to export to GraphBuilder
 * :pr:`77`: supports ConcatOfShape and Slice with the light API
 
diff --git a/_unittests/ut_npx/test_sklearn_array_api.py b/_unittests/ut_npx/test_sklearn_array_api.py
index 083c009..9c0d56f 100644
--- a/_unittests/ut_npx/test_sklearn_array_api.py
+++ b/_unittests/ut_npx/test_sklearn_array_api.py
@@ -17,6 +17,7 @@ class TestSklearnArrayAPI(ExtTestCase):
         reason="reshape ArrayAPI not followed",
     )
     @ignore_warnings(DeprecationWarning)
+    @unittest.skip("not maintained")
     def test_sklearn_array_api_linear_discriminant(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float64
@@ -39,6 +40,7 @@ def test_sklearn_array_api_linear_discriminant(self):
         reason="reshape ArrayAPI not followed",
     )
     @ignore_warnings(DeprecationWarning)
+    @unittest.skip("not maintained")
     def test_sklearn_array_api_linear_discriminant_float32(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float32
diff --git a/_unittests/ut_ort/test_sklearn_array_api_ort.py b/_unittests/ut_ort/test_sklearn_array_api_ort.py
index 296a9b0..f50fce1 100644
--- a/_unittests/ut_ort/test_sklearn_array_api_ort.py
+++ b/_unittests/ut_ort/test_sklearn_array_api_ort.py
@@ -17,6 +17,7 @@ class TestSklearnArrayAPIOrt(ExtTestCase):
         reason="reshape ArrayAPI not followed",
     )
     @skipif_ci_windows("Unstable on Windows.")
+    @unittest.skip("discontinued")
     def test_sklearn_array_api_linear_discriminant_ort(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float64
@@ -40,6 +41,7 @@ def test_sklearn_array_api_linear_discriminant_ort(self):
         reason="reshape ArrayAPI not followed",
     )
     @skipif_ci_windows("Unstable on Windows.")
+    @unittest.skip("discontinued")
     def test_sklearn_array_api_linear_discriminant_ort_float32(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float32
diff --git a/onnx_array_api/profiling.py b/onnx_array_api/profiling.py
index 815f950..ab2cc6b 100644
--- a/onnx_array_api/profiling.py
+++ b/onnx_array_api/profiling.py
@@ -73,8 +73,8 @@ def _get_root(node, stor=None):
                 stor.append(node)
             if not node.called_by:
                 return node
-            if len(node.called_by) == 1:
-                return _get_root(node.called_by[0], stor=stor)
+            if len(node.called_by) == 0:
+                return None
             res = None
             for ct in node.called_by:
                 k = id(node), id(ct)

From eb106e2689dc50dc667672e260c40eac2f15bb6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 6 Jan 2025 18:42:22 +0100
Subject: [PATCH 37/44] Export evaluator type in compare_onnx_execution (#93)

* Export evaluator type in compare_onnx_execution

* doc

* doc
---
 LICENSE.txt                                 |  2 +-
 onnx_array_api/reference/evaluator_yield.py | 21 +++++++++++++++------
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index e027853..1a46a8e 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-﻿Copyright (c) 2023-2024, Xavier Dupré
+﻿Copyright (c) 2023-2025, Xavier Dupré
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 5b77e8b..6ae005c 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -3,6 +3,7 @@
 from enum import IntEnum
 import numpy as np
 from onnx import ModelProto, TensorProto, ValueInfoProto, load
+from onnx.reference import ReferenceEvaluator
 from onnx.helper import tensor_dtype_to_np_dtype
 from onnx.shape_inference import infer_shapes
 from . import to_array_extended
@@ -138,17 +139,23 @@ class YieldEvaluator:
 
     :param onnx_model: model to run
     :param recursive: dig into subgraph and functions as well
+    :param cls: evaluator to use, default value is :class:`ExtendedReferenceEvaluator
+        <onnx_array_api.reference.ExtendedReferenceEvaluator>`
     """
 
     def __init__(
         self,
         onnx_model: ModelProto,
         recursive: bool = False,
-        cls=ExtendedReferenceEvaluator,
+        cls: Optional[type[ExtendedReferenceEvaluator]] = None,
     ):
         assert not recursive, "recursive=True is not yet implemented"
         self.onnx_model = onnx_model
-        self.evaluator = cls(onnx_model) if cls is not None else None
+        self.evaluator = (
+            cls(onnx_model)
+            if cls is not None
+            else ExtendedReferenceEvaluator(onnx_model)
+        )
 
     def enumerate_results(
         self,
@@ -166,9 +173,9 @@ def enumerate_results(
         Returns:
             iterator on tuple(result kind, name, value, node.op_type or None)
         """
-        assert isinstance(self.evaluator, ExtendedReferenceEvaluator), (
+        assert isinstance(self.evaluator, ReferenceEvaluator), (
             f"This implementation only works with "
-            f"ExtendedReferenceEvaluator not {type(self.evaluator)}"
+            f"ReferenceEvaluator not {type(self.evaluator)}"
         )
         attributes = {}
         if output_names is None:
@@ -595,6 +602,7 @@ def compare_onnx_execution(
     raise_exc: bool = True,
     mode: str = "execute",
     keep_tensor: bool = False,
+    cls: Optional[type[ReferenceEvaluator]] = None,
 ) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]:
     """
     Compares the execution of two onnx models.
@@ -611,6 +619,7 @@ def compare_onnx_execution(
     :param mode: the model should be executed but the function can be executed
         but the comparison may append on nodes only
     :param keep_tensor: keeps the tensor in order to compute a precise distance
+    :param cls: evaluator class to use
     :return: four results, a sequence of results
         for the first model and the second model,
         the alignment between the two, DistanceExecution
@@ -634,7 +643,7 @@ def compare_onnx_execution(
             print(f"[compare_onnx_execution] execute with {len(inputs)} inputs")
             print("[compare_onnx_execution] execute first model")
         res1 = list(
-            YieldEvaluator(model1).enumerate_summarized(
+            YieldEvaluator(model1, cls=cls).enumerate_summarized(
                 None, feeds1, raise_exc=raise_exc, keep_tensor=keep_tensor
             )
         )
@@ -642,7 +651,7 @@ def compare_onnx_execution(
             print(f"[compare_onnx_execution] got {len(res1)} results")
             print("[compare_onnx_execution] execute second model")
         res2 = list(
-            YieldEvaluator(model2).enumerate_summarized(
+            YieldEvaluator(model2, cls=cls).enumerate_summarized(
                 None, feeds2, raise_exc=raise_exc, keep_tensor=keep_tensor
             )
         )

From 689cc6f18d466e45f71e3bcf7015894099df6e62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 17 Feb 2025 17:34:32 +0100
Subject: [PATCH 38/44] Upgrade version (#94)

* Upgrade version

* upgrade version

* disable

* fix import

* disable more

* remove mac ci

* remove numpy.array_api

* remove

* one day maybe

* fix urls
---
 .github/workflows/documentation.yml           |  2 +-
 .github/workflows/wheels-any.yml              |  2 +-
 CHANGELOGS.rst                                |  1 +
 _doc/conf.py                                  |  6 +-
 _doc/index.rst                                |  2 +
 _unittests/onnx-numpy-skips.txt               |  6 ++
 .../ut_array_api/test_hypothesis_array_api.py |  8 +--
 _unittests/ut_graph_api/test_graph_builder.py |  6 +-
 _unittests/ut_npx/test_npx.py                 |  2 +-
 azure-pipelines.yml                           | 57 ++-----------------
 onnx_array_api/__init__.py                    |  2 +-
 onnx_array_api/array_api/_onnx_common.py      | 14 ++---
 onnx_array_api/npx/npx_functions_test.py      |  8 +--
 onnx_array_api/validation/tools.py            |  2 +-
 14 files changed, 35 insertions(+), 83 deletions(-)

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index ba80296..70ba37c 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -83,6 +83,6 @@ jobs:
             exit 1
           fi
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           path: ./dist/html/**
diff --git a/.github/workflows/wheels-any.yml b/.github/workflows/wheels-any.yml
index c20a15d..e44b100 100644
--- a/.github/workflows/wheels-any.yml
+++ b/.github/workflows/wheels-any.yml
@@ -24,6 +24,6 @@ jobs:
       - name: build wheel
         run: python -m pip wheel .
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           path: ./onnx_array_api*.whl
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index dd8e3c7..3aa613d 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.3.0
 +++++
 
+* :pr:`93`: fixes evaluator type in ``compare_onnx_execution``
 * :pr:`92`: avoids recursion errors in profiling
 * :pr:`87`: adds command line to replace contant by ConstantOfShape
 * :pr:`79`: first draft to export to GraphBuilder
diff --git a/_doc/conf.py b/_doc/conf.py
index 3c7a1ad..b6c1c4a 100644
--- a/_doc/conf.py
+++ b/_doc/conf.py
@@ -146,11 +146,9 @@
     "torch.onnx": "https://pytorch.org/docs/stable/onnx.html",
     #
     "C_OrtValue": (
-        "http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/"
-        "api/onnxruntime_python/ortvalue.html#c-class-ortvalue-or-c-ortvalue"
+        "https://onnxruntime.ai/docs/api/csharp/api/Microsoft.ML.OnnxRuntime.OrtValue.html"
     ),
     "OrtValue": (
-        "http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/"
-        "api/onnxruntime_python/ortvalue.html#onnxruntime.OrtValue"
+        "https://onnxruntime.ai/docs/api/python/api_summary.html#onnxruntime.OrtValue"
     ),
 }
diff --git a/_doc/index.rst b/_doc/index.rst
index f9a07e5..9bdc4e2 100644
--- a/_doc/index.rst
+++ b/_doc/index.rst
@@ -187,5 +187,7 @@ to know onnx for that. See :ref:`l-numpy-api-onnx`.
 Older versions
 ++++++++++++++
 
+* `0.3.0 <../v0.3.0/index.html>`_
+* `0.2.0 <../v0.2.0/index.html>`_
 * `0.1.3 <../v0.1.3/index.html>`_
 * `0.1.2 <../v0.1.2/index.html>`_
diff --git a/_unittests/onnx-numpy-skips.txt b/_unittests/onnx-numpy-skips.txt
index 1d46bbb..5deb50e 100644
--- a/_unittests/onnx-numpy-skips.txt
+++ b/_unittests/onnx-numpy-skips.txt
@@ -6,6 +6,12 @@ array_api_tests/test_creation_functions.py::test_asarray_arrays
 array_api_tests/test_creation_functions.py::test_empty
 array_api_tests/test_creation_functions.py::test_empty_like
 array_api_tests/test_creation_functions.py::test_eye
+array_api_tests/test_creation_functions.py::test_full
+array_api_tests/test_creation_functions.py::test_full_like
+array_api_tests/test_creation_functions.py::test_ones
+array_api_tests/test_creation_functions.py::test_ones_like
+array_api_tests/test_creation_functions.py::test_zeros
+array_api_tests/test_creation_functions.py::test_zeros_like
 # fails to precision issue
 array_api_tests/test_creation_functions.py::test_linspace
 array_api_tests/test_creation_functions.py::test_meshgrid
diff --git a/_unittests/ut_array_api/test_hypothesis_array_api.py b/_unittests/ut_array_api/test_hypothesis_array_api.py
index 602f928..f55d230 100644
--- a/_unittests/ut_array_api/test_hypothesis_array_api.py
+++ b/_unittests/ut_array_api/test_hypothesis_array_api.py
@@ -1,5 +1,4 @@
 import unittest
-import warnings
 from os import getenv
 from functools import reduce
 import packaging.version as pv
@@ -45,12 +44,7 @@ class TestHypothesisArraysApis(ExtTestCase):
 
     @classmethod
     def setUpClass(cls):
-        try:
-            import array_api_strict as xp
-        except ImportError:
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                from numpy import array_api as xp
+        import array_api_strict as xp
 
         api_version = getenv(
             "ARRAY_API_TESTS_VERSION",
diff --git a/_unittests/ut_graph_api/test_graph_builder.py b/_unittests/ut_graph_api/test_graph_builder.py
index 33c3155..9e6229b 100644
--- a/_unittests/ut_graph_api/test_graph_builder.py
+++ b/_unittests/ut_graph_api/test_graph_builder.py
@@ -3,7 +3,7 @@
 import unittest
 import numpy as np
 import onnx
-from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.ext_test_case import ExtTestCase, skipif_ci_apple
 from onnx_array_api.graph_api.graph_builder import GraphBuilder, OptimizationOptions
 from onnx_array_api.reference import (
     from_array_extended,
@@ -107,6 +107,7 @@ def test_simple_big(self):
             got = ref.run(None, feeds)
             self.assertEqualArray(expected, got[0])
 
+    @skipif_ci_apple("libomp is missing")
     def test_constant_folding(self):
         with contextlib.redirect_stdout(io.StringIO()):
             g = GraphBuilder(verbose=10)
@@ -133,6 +134,7 @@ def test_constant_folding(self):
             got = ref.run(None, feeds)
             self.assertEqualArray(expected, got[0])
 
+    @skipif_ci_apple("libomp is missing")
     def test_constant_folding2(self):
         g = GraphBuilder(
             optimization_options=OptimizationOptions(constant_folding=True)
@@ -270,6 +272,7 @@ def test_remove_unused_nodes_simple(self):
             got = ref.run(None, feeds)
             self.assertEqualArray(expected, got[0])
 
+    @skipif_ci_apple("libomp is missing")
     def test_constant_array(self):
         with contextlib.redirect_stdout(io.StringIO()):
             g = GraphBuilder(verbose=10)
@@ -290,6 +293,7 @@ def test_constant_array(self):
             got = ref.run(None, feeds)
             self.assertEqualArray(expected, got[0])
 
+    @skipif_ci_apple("libomp is missing")
     def test_constant_array_2(self):
         with contextlib.redirect_stdout(io.StringIO()):
             g = GraphBuilder(verbose=10)
diff --git a/_unittests/ut_npx/test_npx.py b/_unittests/ut_npx/test_npx.py
index 50e319a..873665d 100644
--- a/_unittests/ut_npx/test_npx.py
+++ b/_unittests/ut_npx/test_npx.py
@@ -208,7 +208,7 @@ def local1(
             return x
 
         def local2(
-            x: TensorType[ElemType.floats, "T"]
+            x: TensorType[ElemType.floats, "T"],
         ) -> TensorType[ElemType.floats, "T"]:
             return x
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 20d27ce..b795a0c 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -139,11 +139,11 @@ jobs:
       cd array-api-tests
       python -m pytest -x array_api_tests/test_creation_functions.py --skips-file=../_unittests/onnx-numpy-skips.txt --hypothesis-explain
     displayName: "numpy test_creation_functions.py"
-  - script: |
-      export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_ort
-      cd array-api-tests
-      python -m pytest -x array_api_tests/test_creation_functions.py --skips-file=../_unittests/onnx-ort-skips.txt --hypothesis-explain
-    displayName: "ort test_creation_functions.py"
+  # - script: |
+  #     export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_ort
+  #    cd array-api-tests
+  #     python -m pytest -x array_api_tests/test_creation_functions.py --skips-file=../_unittests/onnx-ort-skips.txt --hypothesis-explain
+  #   displayName: "ort test_creation_functions.py"
   #- script: |
   #    export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_numpy
   #    cd array-api-tests
@@ -238,50 +238,3 @@ jobs:
     inputs:
       artifactName: 'wheel-windows-$(python.version)'
       targetPath: 'dist'
-
-- job: 'TestMac'
-  pool:
-    vmImage: 'macOS-latest'
-  strategy:
-    matrix:
-      Python311-Mac:
-        python.version: '3.11'
-    maxParallel: 3
-
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-      architecture: 'x64'
-  - script: gcc --version
-    displayName: 'gcc version'
-  #- script: brew upgrade
-  #  displayName: 'brew upgrade'
-  #- script: brew update
-  #  displayName: 'brew update'
-  - script: export
-    displayName: 'export'
-  - script: gcc --version
-    displayName: 'gcc version'
-  - script: python -m pip install --upgrade pip setuptools wheel
-    displayName: 'Install tools'
-  - script: pip install -r requirements.txt
-    displayName: 'Install Requirements'
-  - script: pip install -r requirements-dev.txt
-    displayName: 'Install Requirements dev'
-  - script: pip install onnxmltools --no-deps
-    displayName: 'Install onnxmltools'
-  - script: |
-      python -m pip freeze
-    displayName: 'pip freeze'
-  - script: |
-      python -m pytest
-    displayName: 'Runs Unit Tests'
-  - script: |
-      python -u setup.py bdist_wheel
-    displayName: 'Build Package'
-  - task: PublishPipelineArtifact@0
-    inputs:
-      artifactName: 'wheel-mac-$(python.version)'
-      targetPath: 'dist'
-
diff --git a/onnx_array_api/__init__.py b/onnx_array_api/__init__.py
index f78126c..837bc52 100644
--- a/onnx_array_api/__init__.py
+++ b/onnx_array_api/__init__.py
@@ -2,5 +2,5 @@
 APIs to create ONNX Graphs.
 """
 
-__version__ = "0.2.0"
+__version__ = "0.3.0"
 __author__ = "Xavier Dupré"
diff --git a/onnx_array_api/array_api/_onnx_common.py b/onnx_array_api/array_api/_onnx_common.py
index d69084a..7c486ce 100644
--- a/onnx_array_api/array_api/_onnx_common.py
+++ b/onnx_array_api/array_api/_onnx_common.py
@@ -1,16 +1,7 @@
 from typing import Any, Optional
-import warnings
 import numpy as np
 from onnx import TensorProto
-
-try:
-    import array_api_strict
-
-    Array = type(array_api_strict.ones((1,)))
-except ImportError:
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        from numpy.array_api._array_object import Array
+import array_api_strict
 
 from ..npx.npx_types import (
     DType,
@@ -36,6 +27,9 @@
 )
 
 
+Array = type(array_api_strict.ones((1,)))
+
+
 # These functions with no specific code do not have to be
 # implemented. They are automatically added in
 # :mod:`onnx_array_api.array_api`. It needs
diff --git a/onnx_array_api/npx/npx_functions_test.py b/onnx_array_api/npx/npx_functions_test.py
index 4d442dd..3d03def 100644
--- a/onnx_array_api/npx/npx_functions_test.py
+++ b/onnx_array_api/npx/npx_functions_test.py
@@ -22,21 +22,21 @@
 
 @npxapi_function
 def _min_max(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TupleType[TensorType[ElemType.numerics, "T"], TensorType[ElemType.numerics, "T"]]:
     return tuple_var(var(x, op="ReduceMin"), var(x, op="ReduceMax"))
 
 
 @npxapi_inline
 def _min_max_inline(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TupleType[TensorType[ElemType.numerics, "T"], TensorType[ElemType.numerics, "T"]]:
     return tuple_var(var(x, op="ReduceMin"), var(x, op="ReduceMax"))
 
 
 @npxapi_function
 def absolute(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TensorType[ElemType.numerics, "T"]:
     "See :func:`numpy.absolute`."
     return var(x, op="Abs")
@@ -90,7 +90,7 @@ def log1p(x: TensorType[ElemType.floats, "T"]) -> TensorType[ElemType.floats, "T
 
 @npxapi_function
 def negative(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TensorType[ElemType.numerics, "T"]:
     "See :func:`numpy.negative`."
     return var(x, op="Neg")
diff --git a/onnx_array_api/validation/tools.py b/onnx_array_api/validation/tools.py
index 6cd1da3..cbb02c1 100644
--- a/onnx_array_api/validation/tools.py
+++ b/onnx_array_api/validation/tools.py
@@ -20,7 +20,7 @@
 
 
 def randomize_proto(
-    onx: Union[ModelProto, GraphProto, FunctionProto, NodeProto, TensorProto]
+    onx: Union[ModelProto, GraphProto, FunctionProto, NodeProto, TensorProto],
 ) -> Union[ModelProto, GraphProto, FunctionProto, NodeProto, TensorProto]:
     """
     Randomizes float initializers or constant nodes.

From 664e084de3976b9465b52b18d63f5cb48f54d995 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 18 Feb 2025 14:08:40 +0100
Subject: [PATCH 39/44] Improves translation to GraphBuilder (#95)

* Improves translation to GraphBuilder

* ch

* fix issue

* ir

* urls

* check
---
 .github/workflows/check-urls.yml              |  4 +-
 CHANGELOGS.rst                                |  5 ++
 .../test_translate_builder.py                 | 67 +++++++++++++++++--
 onnx_array_api/__init__.py                    |  2 +-
 .../translate_api/builder_emitter.py          | 60 ++++++++++++++---
 onnx_array_api/translate_api/translate.py     |  6 +-
 6 files changed, 127 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/check-urls.yml b/.github/workflows/check-urls.yml
index 67d7731..d56adba 100644
--- a/.github/workflows/check-urls.yml
+++ b/.github/workflows/check-urls.yml
@@ -42,6 +42,6 @@ jobs:
         print_all: false
         timeout: 2
         retry_count# : 2
-        exclude_urls: https://hal.archives-ouvertes.fr/hal-00990252/document
-        exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/
+        exclude_urls: https://hal.archives-ouvertes.fr/hal-00990252/document,https://github.com/onnx/tensorflow-onnx
+        exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/,https://github.com/onnx/tensorflow-onnx
         # force_pass : true
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 3aa613d..746c264 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -1,6 +1,11 @@
 Change Logs
 ===========
 
+0.3.1
++++++
+
+* :pr:`95`: improves translation to GraphBuilder
+
 0.3.0
 +++++
 
diff --git a/_unittests/ut_translate_api/test_translate_builder.py b/_unittests/ut_translate_api/test_translate_builder.py
index 7af0134..6f67dff 100644
--- a/_unittests/ut_translate_api/test_translate_builder.py
+++ b/_unittests/ut_translate_api/test_translate_builder.py
@@ -8,7 +8,8 @@
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.light_api import start
 from onnx_array_api.graph_api import GraphBuilder
-from onnx_array_api.translate_api import translate
+from onnx_array_api.translate_api import translate, Translater
+from onnx_array_api.translate_api.builder_emitter import BuilderEmitter
 
 
 OPSET_API = min(19, onnx_opset_version() - 1)
@@ -19,7 +20,7 @@ def setUp(self):
         self.maxDiff = None
 
     def test_exp(self):
-        onx = start(opset=19).vin("X").Exp().rename("Y").vout().to_onnx()
+        onx = start(opset=19, ir_version=10).vin("X").Exp().rename("Y").vout().to_onnx()
         self.assertIsInstance(onx, ModelProto)
         self.assertIn("Exp", str(onx))
         ref = ReferenceEvaluator(onx)
@@ -38,7 +39,7 @@ def light_api(
             op.Identity(Y, outputs=["Y"])
             return Y
 
-        g = GraphBuilder({'': 19})
+        g = GraphBuilder({'': 19}, ir_version=10)
         g.make_tensor_input("X", TensorProto.FLOAT, ())
         light_api(g.op, "X")
         g.make_tensor_output("Y", TensorProto.FLOAT, ())
@@ -68,7 +69,7 @@ def light_api(
 
     def test_zdoc(self):
         onx = (
-            start(opset=19)
+            start(opset=19, ir_version=10)
             .vin("X")
             .reshape((-1, 1))
             .Transpose(perm=[1, 0])
@@ -89,7 +90,7 @@ def light_api(
                 op.Identity(Y, outputs=["Y"])
                 return Y
 
-            g = GraphBuilder({'': 19})
+            g = GraphBuilder({'': 19}, ir_version=10)
             g.make_tensor_input("X", TensorProto.FLOAT, ())
             light_api(g.op, "X")
             g.make_tensor_output("Y", TensorProto.FLOAT, ())
@@ -117,6 +118,62 @@ def light_api(
         self.assertNotEmpty(model)
         check_model(model)
 
+    def test_exp_f(self):
+        onx = start(opset=19, ir_version=10).vin("X").Exp().rename("Y").vout().to_onnx()
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Exp", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+        tr = Translater(onx, emitter=BuilderEmitter("mm"))
+        code = tr.export(as_str=True)
+
+        expected = dedent(
+            """
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",
+        ):
+            Y = op.Exp(X)
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+
+        def mm() -> "ModelProto":
+            g = GraphBuilder({'': 19}, ir_version=10)
+            g.make_tensor_input("X", TensorProto.FLOAT, ())
+            light_api(g.op, "X")
+            g.make_tensor_output("Y", TensorProto.FLOAT, ())
+            model = g.to_onnx()
+            return model
+
+
+        model = mm()
+        """
+        ).strip("\n")
+        self.assertEqual(expected, code.strip("\n"))
+
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",  # noqa: F722
+        ):
+            Y = op.Exp(X)
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g2 = GraphBuilder({"": 19})
+        g2.make_tensor_input("X", TensorProto.FLOAT, ("A",))
+        light_api(g2.op, "X")
+        g2.make_tensor_output("Y", TensorProto.FLOAT, ("A",))
+        onx2 = g2.to_onnx()
+
+        ref = ReferenceEvaluator(onx2)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/__init__.py b/onnx_array_api/__init__.py
index 837bc52..98371ac 100644
--- a/onnx_array_api/__init__.py
+++ b/onnx_array_api/__init__.py
@@ -2,5 +2,5 @@
 APIs to create ONNX Graphs.
 """
 
-__version__ = "0.3.0"
+__version__ = "0.3.1"
 __author__ = "Xavier Dupré"
diff --git a/onnx_array_api/translate_api/builder_emitter.py b/onnx_array_api/translate_api/builder_emitter.py
index a3b38d6..1c893e2 100644
--- a/onnx_array_api/translate_api/builder_emitter.py
+++ b/onnx_array_api/translate_api/builder_emitter.py
@@ -4,10 +4,17 @@
 from .base_emitter import BaseEmitter
 
 _types = {
+    TensorProto.DOUBLE: "DOUBLE",
     TensorProto.FLOAT: "FLOAT",
     TensorProto.FLOAT16: "FLOAT16",
     TensorProto.INT64: "INT64",
     TensorProto.INT32: "INT32",
+    TensorProto.INT16: "INT16",
+    TensorProto.UINT64: "UINT64",
+    TensorProto.UINT32: "UINT32",
+    TensorProto.UINT16: "UINT16",
+    TensorProto.STRING: "STRING",
+    TensorProto.BOOL: "BOOL",
 }
 
 
@@ -20,6 +27,10 @@ class BuilderEmitter(BaseEmitter):
     Converts event into proper code.
     """
 
+    def __init__(self, make_model_function: str = ""):
+        super().__init__()
+        self.make_model_function = make_model_function
+
     def join(self, rows: List[str], single_line: bool = False) -> str:
         "Join the rows"
         assert (
@@ -29,6 +40,7 @@ def join(self, rows: List[str], single_line: bool = False) -> str:
 
     def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
         self.opsets = kwargs.get("opsets", {})
+        self.ir_version = kwargs.get("ir_version", None)
         return []
 
     def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
@@ -43,12 +55,27 @@ def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
             )
         rows = [
             "",
-            f"g = GraphBuilder({self.opsets})",
+            (
+                f"g = GraphBuilder({self.opsets}, ir_version={self.ir_version})"
+                if self.ir_version
+                else f"GraphBuilder({self.opsets})"
+            ),
             *inputs,
             f"{self.name}({inps})",
             *outputs,
             "model = g.to_onnx()",
         ]
+        if self.make_model_function:
+            rows = [
+                "",
+                "",
+                f'def {self.make_model_function}() -> "ModelProto":',
+                *["    " + _ for _ in rows[1:]],
+                "    return model",
+                "",
+                "",
+                f"model = {self.make_model_function}()",
+            ]
         return rows
 
     def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
@@ -78,13 +105,16 @@ def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
         name = kwargs["name"]
         itype = kwargs.get("elem_type", 0)
         shape = kwargs.get("shape", None)
+        name = self._clean_result_name(name)
         if itype == 0:
-            inp = "X"
+            inp = name or "X"
         else:
             if shape is None:
-                inp = f'X: "{_itype_to_string(itype)}"'
+                inp = f'{name}: "{_itype_to_string(itype)}"'
             else:
-                inp = f'X: "{_itype_to_string(itype)}[{", ".join(map(str, shape))}]"'
+                inp = (
+                    f'{name}: "{_itype_to_string(itype)}[{", ".join(map(str, shape))}]"'
+                )
         self.inputs_full.append(inp)
         self.inputs.append(name)
         self.inputs_full_.append((name, _itype_to_string(itype), shape))
@@ -113,6 +143,7 @@ def _emit_end_return(self, **kwargs: Dict[str, Any]) -> List[str]:
 
     def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
         name = kwargs["name"]
+        name = self._clean_result_name(name)
         itype = kwargs.get("elem_type", 0)
         shape = kwargs.get("shape", None)
         self.outputs.append(name)
@@ -126,6 +157,8 @@ def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
         if kwargs.get("domain", "") != "":
             domain = kwargs["domain"]
             op_type = f"{domain}.{op_type}"
+        else:
+            domain = ""
         atts = kwargs.get("atts", {})
         args = []
         for k, v in atts.items():
@@ -134,11 +167,22 @@ def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
                 raise NotImplementedError("Graph attribute not supported yet.")
             args.append(f"{k}={vatt}")
 
-        outs = ", ".join(outputs)
-        inps = ", ".join(inputs)
+        outs = ", ".join(map(self._clean_result_name, outputs))
+        inps = ", ".join(map(self._clean_result_name, inputs))
+        op_type = self._emit_node_type(op_type, domain)
+        sdomain = "" if not domain else f", domain={domain!r}"
         if args:
             sargs = ", ".join(args)
-            row = f"    {outs} = op.{op_type}({inps}, {sargs})"
+            if inps:
+                row = f"    {outs} = op.{op_type}({inps}, {sargs}{sdomain})"
+            else:
+                row = f"    {outs} = op.{op_type}({sargs}{sdomain})"
         else:
-            row = f"    {outs} = op.{op_type}({inps})"
+            row = f"    {outs} = op.{op_type}({inps}{sdomain})"
         return [row]
+
+    def _clean_result_name(self, name):
+        return name
+
+    def _emit_node_type(self, op_type, domain):
+        return op_type
diff --git a/onnx_array_api/translate_api/translate.py b/onnx_array_api/translate_api/translate.py
index 7b7480b..aa78103 100644
--- a/onnx_array_api/translate_api/translate.py
+++ b/onnx_array_api/translate_api/translate.py
@@ -35,7 +35,11 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
         last_event = None
         if isinstance(self.proto_, ModelProto):
             opsets = {d.domain: d.version for d in self.proto_.opset_import}
-            rows.extend(self.emitter(EventType.START, opsets=opsets))
+            rows.extend(
+                self.emitter(
+                    EventType.START, opsets=opsets, ir_version=self.proto_.ir_version
+                )
+            )
             inputs = self.proto_.graph.input
             outputs = self.proto_.graph.output
             nodes = self.proto_.graph.node

From 3de3c5dc547958f39c8b799491ae526fb847f683 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Wed, 19 Feb 2025 16:30:07 +0100
Subject: [PATCH 40/44] Supports for local functions in translator (#96)

* fix suffix

* one fix

* fix

* fix ut

* fix ir_version

* doc
---
 CHANGELOGS.rst                                |   1 +
 .../test_translate_builder.py                 | 144 +++++++++++++++---
 onnx_array_api/graph_api/graph_builder.py     |  13 ++
 onnx_array_api/translate_api/base_emitter.py  |  28 ++++
 .../translate_api/builder_emitter.py          |  72 +++++++--
 onnx_array_api/translate_api/translate.py     |  31 +++-
 6 files changed, 257 insertions(+), 32 deletions(-)

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 746c264..31056a9 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.3.1
 +++++
 
+* :pr:`96`: supports local functions in translator
 * :pr:`95`: improves translation to GraphBuilder
 
 0.3.0
diff --git a/_unittests/ut_translate_api/test_translate_builder.py b/_unittests/ut_translate_api/test_translate_builder.py
index 6f67dff..b1ad394 100644
--- a/_unittests/ut_translate_api/test_translate_builder.py
+++ b/_unittests/ut_translate_api/test_translate_builder.py
@@ -1,6 +1,7 @@
 import unittest
 from textwrap import dedent
 import numpy as np
+import onnx.helper as oh
 from onnx import ModelProto, TensorProto
 from onnx.checker import check_model
 from onnx.defs import onnx_opset_version
@@ -29,37 +30,43 @@ def test_exp(self):
         self.assertEqualArray(np.exp(a), got)
 
         code = translate(onx, api="builder")
-        expected = dedent(
-            """
+        expected = (
+            dedent(
+                """
         def light_api(
             op: "GraphBuilder",
             X: "FLOAT[]",
         ):
-            Y = op.Exp(X)
+            Y = op.Exp(X, outputs=['Y'])
             op.Identity(Y, outputs=["Y"])
             return Y
 
         g = GraphBuilder({'': 19}, ir_version=10)
         g.make_tensor_input("X", TensorProto.FLOAT, ())
         light_api(g.op, "X")
-        g.make_tensor_output("Y", TensorProto.FLOAT, ())
+        g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
         model = g.to_onnx()
         """
-        ).strip("\n")
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
         self.assertEqual(expected, code.strip("\n"))
 
         def light_api(
             op: "GraphBuilder",
             X: "FLOAT[]",  # noqa: F722
         ):
-            Y = op.Exp(X)
+            Y = op.Exp(X, outputs=["Y"])
             op.Identity(Y, outputs=["Y"])
             return Y
 
         g2 = GraphBuilder({"": 19})
         g2.make_tensor_input("X", TensorProto.FLOAT, ("A",))
         light_api(g2.op, "X")
-        g2.make_tensor_output("Y", TensorProto.FLOAT, ("A",))
+        g2.make_tensor_output(
+            "Y", TensorProto.FLOAT, ("A",), is_dimension=False, indexed=False
+        )
         onx2 = g2.to_onnx()
 
         ref = ReferenceEvaluator(onx2)
@@ -78,25 +85,29 @@ def test_zdoc(self):
             .to_onnx()
         )
         code = translate(onx, api="builder")
-        expected = dedent(
-            """
+        expected = (
+            dedent(
+                """
             def light_api(
                 op: "GraphBuilder",
                 X: "FLOAT[]",
             ):
                 r = np.array([-1, 1], dtype=np.int64)
-                r0_0 = op.Reshape(X, r)
-                Y = op.Transpose(r0_0, perm=[1, 0])
+                r0_0 = op.Reshape(X, r, outputs=['r0_0'])
+                Y = op.Transpose(r0_0, perm=[1, 0], outputs=['Y'])
                 op.Identity(Y, outputs=["Y"])
                 return Y
 
             g = GraphBuilder({'': 19}, ir_version=10)
             g.make_tensor_input("X", TensorProto.FLOAT, ())
             light_api(g.op, "X")
-            g.make_tensor_output("Y", TensorProto.FLOAT, ())
+            g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
             model = g.to_onnx()
             """
-        ).strip("\n")
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
         self.maxDiff = None
         self.assertEqual(expected, code.strip("\n"))
 
@@ -130,13 +141,14 @@ def test_exp_f(self):
         tr = Translater(onx, emitter=BuilderEmitter("mm"))
         code = tr.export(as_str=True)
 
-        expected = dedent(
-            """
+        expected = (
+            dedent(
+                """
         def light_api(
             op: "GraphBuilder",
             X: "FLOAT[]",
         ):
-            Y = op.Exp(X)
+            Y = op.Exp(X, outputs=['Y'])
             op.Identity(Y, outputs=["Y"])
             return Y
 
@@ -145,14 +157,17 @@ def mm() -> "ModelProto":
             g = GraphBuilder({'': 19}, ir_version=10)
             g.make_tensor_input("X", TensorProto.FLOAT, ())
             light_api(g.op, "X")
-            g.make_tensor_output("Y", TensorProto.FLOAT, ())
+            g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
             model = g.to_onnx()
             return model
 
 
         model = mm()
         """
-        ).strip("\n")
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
         self.assertEqual(expected, code.strip("\n"))
 
         def light_api(
@@ -166,7 +181,9 @@ def light_api(
         g2 = GraphBuilder({"": 19})
         g2.make_tensor_input("X", TensorProto.FLOAT, ("A",))
         light_api(g2.op, "X")
-        g2.make_tensor_output("Y", TensorProto.FLOAT, ("A",))
+        g2.make_tensor_output(
+            "Y", TensorProto.FLOAT, ("A",), is_dimension=False, indexed=False
+        )
         onx2 = g2.to_onnx()
 
         ref = ReferenceEvaluator(onx2)
@@ -174,6 +191,95 @@ def light_api(
         got = ref.run(None, {"X": a})[0]
         self.assertEqualArray(np.exp(a), got)
 
+    def test_local_function(self):
+        new_domain = "custom"
+
+        linear_regression = oh.make_function(
+            new_domain,
+            "LinearRegression",
+            ["x", "a", "b"],
+            ["y"],
+            [
+                oh.make_node("MatMul", ["x", "a"], ["xa"]),
+                oh.make_node("Add", ["xa", "b"], ["y"]),
+            ],
+            [oh.make_opsetid("", 14)],
+            [],
+        )
+
+        graph = oh.make_graph(
+            [
+                oh.make_node(
+                    "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain
+                ),
+                oh.make_node("Abs", ["Y1"], ["Y"]),
+            ],
+            "example",
+            [
+                oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None]),
+                oh.make_tensor_value_info("A", TensorProto.FLOAT, [None, None]),
+                oh.make_tensor_value_info("B", TensorProto.FLOAT, [None, None]),
+            ],
+            [oh.make_tensor_value_info("Y", TensorProto.FLOAT, None)],
+        )
+
+        onnx_model = oh.make_model(
+            graph,
+            opset_imports=[oh.make_opsetid("", 14), oh.make_opsetid(new_domain, 1)],
+            functions=[linear_regression],
+            ir_version=10,
+        )
+        tr = Translater(onnx_model, emitter=BuilderEmitter("mm"))
+        code = tr.export(as_str=True)
+
+        expected = (
+            dedent(
+                """
+            def example(
+                op: "GraphBuilder",
+                X: "FLOAT[, ]",
+                A: "FLOAT[, ]",
+                B: "FLOAT[, ]",
+            ):
+                Y1 = op.LinearRegression(X, A, B, domain='custom', outputs=['Y1'])
+                Y = op.Abs(Y1, outputs=['Y'])
+                op.Identity(Y, outputs=["Y"])
+                return Y
+
+
+            def make_custom_LinearRegression(g: "GraphBuilder"):
+                gr = GraphBuilder({'': 14}, as_function=True)
+                x = gr.make_tensor_input('x')
+                a = gr.make_tensor_input('a')
+                b = gr.make_tensor_input('b')
+                op = gr.op
+                xa = op.MatMul(x, a, outputs=['xa'])
+                y = op.Add(xa, b, outputs=['y'])
+                gr.make_tensor_output(y)
+                g.add_function(builder=gr)
+                return gr
+
+
+            def mm() -> "ModelProto":
+                g = GraphBuilder({'': 14, 'custom': 1}, ir_version=10)
+                g.make_tensor_input("X", TensorProto.FLOAT, ('', ''))
+                g.make_tensor_input("A", TensorProto.FLOAT, ('', ''))
+                g.make_tensor_input("B", TensorProto.FLOAT, ('', ''))
+                example(g.op, "X", "A", "B")
+                g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
+                make_custom_LinearRegression(g)
+                model = g.to_onnx()
+                return model
+
+
+            model = mm()
+        """
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
+        self.assertEqual(expected, code.strip("\n"))
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
index 558c34a..5e414ed 100644
--- a/onnx_array_api/graph_api/graph_builder.py
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -194,6 +194,7 @@ def __init__(
             self._known_shapes = {}
             self._known_types = {}
             self.constants_ = {}
+            self.functions_ = {}
         elif isinstance(target_opset_or_existing_proto, ModelProto):
             assert (
                 not input_names
@@ -223,6 +224,8 @@ def __init__(
                     self.constants_[node.output[0]] = node
                     self.set_shape(node.output[0], self._get_tensor_shape(node))
                     self.set_type(node.output[0], self._get_tensor_type(node))
+            for f in proto.functions:
+                self.add_function(f)
         else:
             raise NotImplementedError(
                 f"{type(target_opset_or_existing_proto)} is not supported."
@@ -231,6 +234,14 @@ def __init__(
         self.op = Opset(self, self.opsets[""]) if "" in self.opsets else None
         self._cache_array = []
 
+    def add_local_function(self, domain: str, name: str, gr: "GraphBuilder"):
+        "Adds a local function."
+        assert (
+            domain,
+            name,
+        ) not in self.functions_, f"Function {(domain, name)} was already added."
+        self.functions_[domain, name] = gr
+
     def _get_tensor_shape(
         self, proto: Union[NodeProto, TensorProto]
     ) -> Tuple[int, ...]:
@@ -417,6 +428,8 @@ def make_tensor_output(
         name: Union[str, List[str]],
         elem_type: Optional[int] = None,
         shape: Optional[Tuple[int, ...]] = None,
+        is_dimension: bool = False,
+        indexed: bool = False,
     ) -> Union[str, List[str]]:
         if isinstance(name, list):
             res = []
diff --git a/onnx_array_api/translate_api/base_emitter.py b/onnx_array_api/translate_api/base_emitter.py
index 62fb318..e8d3811 100644
--- a/onnx_array_api/translate_api/base_emitter.py
+++ b/onnx_array_api/translate_api/base_emitter.py
@@ -25,6 +25,10 @@ class EventType(IntEnum):
     END_SIGNATURE = 16
     BEGIN_RETURN = 17
     END_RETURN = 18
+    BEGIN_FUNCTION_SIGNATURE = 19
+    END_FUNCTION_SIGNATURE = 20
+    BEGIN_FUNCTION_RETURN = 21
+    END_FUNCTION_RETURN = 22
 
     @classmethod
     def to_str(cls, self) -> str:
@@ -76,6 +80,12 @@ def __call__(self, event: EventType, **kwargs: Dict[str, Any]) -> List[str]:
         if event == EventType.BEGIN_FUNCTION:
             return self._emit_begin_function(**kwargs)
 
+        if event == EventType.BEGIN_FUNCTION_SIGNATURE:
+            return self._emit_begin_function_signature(**kwargs)
+
+        if event == EventType.END_FUNCTION_SIGNATURE:
+            return self._emit_end_function_signature(**kwargs)
+
         if event == EventType.END_FUNCTION:
             return self._emit_end_function(**kwargs)
 
@@ -100,6 +110,12 @@ def __call__(self, event: EventType, **kwargs: Dict[str, Any]) -> List[str]:
         if event == EventType.END_RETURN:
             return self._emit_end_return(**kwargs)
 
+        if event == EventType.BEGIN_FUNCTION_RETURN:
+            return self._emit_begin_function_return(**kwargs)
+
+        if event == EventType.END_FUNCTION_RETURN:
+            return self._emit_end_function_return(**kwargs)
+
         raise ValueError(f"Unexpected event {EventType.to_str(event)}.")
 
     def render_attribute_value(self, value: Any) -> Tuple[List[str], str]:
@@ -224,6 +240,12 @@ def _emit_begin_function(self, **kwargs: Dict[str, Any]) -> List[str]:
             f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
         )
 
+    def _emit_begin_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
     def _emit_function_input(self, **kwargs: Dict[str, Any]) -> List[str]:
         raise NotImplementedError(
             f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
@@ -250,3 +272,9 @@ def _emit_begin_return(self, **kwargs: Dict[str, Any]) -> List[str]:
 
     def _emit_end_return(self, **kwargs: Dict[str, Any]) -> List[str]:
         return []
+
+    def _emit_begin_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
diff --git a/onnx_array_api/translate_api/builder_emitter.py b/onnx_array_api/translate_api/builder_emitter.py
index 1c893e2..19dd7f9 100644
--- a/onnx_array_api/translate_api/builder_emitter.py
+++ b/onnx_array_api/translate_api/builder_emitter.py
@@ -41,6 +41,7 @@ def join(self, rows: List[str], single_line: bool = False) -> str:
     def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
         self.opsets = kwargs.get("opsets", {})
         self.ir_version = kwargs.get("ir_version", None)
+        self.function_calls = []
         return []
 
     def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
@@ -51,7 +52,8 @@ def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
         outputs = []
         for inp, stype, shape in self.outputs_full_:
             outputs.append(
-                f'g.make_tensor_output("{inp}", TensorProto.{stype}, {shape})'
+                f'g.make_tensor_output("{inp}", TensorProto.{stype}, '
+                f"{shape}, is_dimension=False, indexed=False)"
             )
         rows = [
             "",
@@ -63,6 +65,7 @@ def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
             *inputs,
             f"{self.name}({inps})",
             *outputs,
+            *self.function_calls,
             "model = g.to_onnx()",
         ]
         if self.make_model_function:
@@ -131,7 +134,8 @@ def _emit_end_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
         for init in self.inits:
             val = to_array(init)
             stype = str(val.dtype).split(".")[-1]
-            rows.append(f"    {init.name} = np.array({val.tolist()}, dtype=np.{stype})")
+            name = self._clean_result_name(init.name)
+            rows.append(f"    {name} = np.array({val.tolist()}, dtype=np.{stype})")
         return rows
 
     def _emit_begin_return(self, **kwargs: Dict[str, Any]) -> List[str]:
@@ -154,11 +158,7 @@ def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
         op_type = kwargs["op_type"]
         inputs = kwargs["inputs"]
         outputs = kwargs["outputs"]
-        if kwargs.get("domain", "") != "":
-            domain = kwargs["domain"]
-            op_type = f"{domain}.{op_type}"
-        else:
-            domain = ""
+        domain = kwargs.get("domain", "")
         atts = kwargs.get("atts", {})
         args = []
         for k, v in atts.items():
@@ -167,10 +167,13 @@ def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
                 raise NotImplementedError("Graph attribute not supported yet.")
             args.append(f"{k}={vatt}")
 
-        outs = ", ".join(map(self._clean_result_name, outputs))
+        cleaned_outputs = list(map(self._clean_result_name, outputs))
+        outs = ", ".join(cleaned_outputs)
         inps = ", ".join(map(self._clean_result_name, inputs))
         op_type = self._emit_node_type(op_type, domain)
-        sdomain = "" if not domain else f", domain={domain!r}"
+        # Let's add output names to make it easier to debug.
+        soutputs = f", outputs={cleaned_outputs}"
+        sdomain = soutputs if not domain else f", domain={domain!r}{soutputs}"
         if args:
             sargs = ", ".join(args)
             if inps:
@@ -186,3 +189,54 @@ def _clean_result_name(self, name):
 
     def _emit_node_type(self, op_type, domain):
         return op_type
+
+    def _emit_begin_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_inputs = []
+        self.f_outputs = []
+        self.f_inits = []
+        self.f_name = kwargs["name"]
+        self.f_domain = kwargs["domain"]
+        self.f_attributes = []
+        self.f_opsets = kwargs["opsets"]
+        return []
+
+    def _emit_begin_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_call_name = f"make_{self.f_domain}_{self.f_name}"
+        return [
+            "",
+            "",
+            f'def {self.f_call_name}(g: "GraphBuilder"):',
+            f"    gr = GraphBuilder({self.f_opsets}, as_function=True)",
+            *[f"    {name} = gr.make_tensor_input({name!r})" for name in self.f_inputs],
+            "    op = gr.op",
+        ]
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return ["    return gr"]
+
+    def _emit_function_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_inputs.append(kwargs["name"])
+        return []
+
+    def _emit_function_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_outputs.append(kwargs["name"])
+        return []
+
+    def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError("Function attribute are not implemented yet.")
+
+    def _emit_end_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.function_calls.append(f"{self.f_call_name}(g)")
+        return [
+            *[f"    gr.make_tensor_output({name})" for name in self.f_outputs],
+            "    g.add_function(builder=gr)",
+        ]
+
+    def _emit_begin_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
diff --git a/onnx_array_api/translate_api/translate.py b/onnx_array_api/translate_api/translate.py
index aa78103..81d515a 100644
--- a/onnx_array_api/translate_api/translate.py
+++ b/onnx_array_api/translate_api/translate.py
@@ -77,6 +77,7 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                     EventType.BEGIN_FUNCTION,
                     name=self.proto_.name,
                     domain=self.proto_.domain,
+                    opsets={d.domain: d.version for d in self.proto_.opset_import},
                 )
             )
         elif isinstance(self.proto_, GraphProto):
@@ -96,7 +97,13 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                 )
             )
 
-        rows.extend(self.emitter(EventType.BEGIN_SIGNATURE))
+        rows.extend(
+            self.emitter(
+                EventType.BEGIN_FUNCTION_SIGNATURE
+                if is_function
+                else EventType.BEGIN_SIGNATURE
+            )
+        )
 
         for i in inputs:
             if is_function:
@@ -119,7 +126,13 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                 self.emitter(EventType.FUNCTION_ATTRIBUTES, attributes=list(attributes))
             )
 
-        rows.extend(self.emitter(EventType.END_SIGNATURE))
+        rows.extend(
+            self.emitter(
+                EventType.END_FUNCTION_SIGNATURE
+                if is_function
+                else EventType.END_SIGNATURE
+            )
+        )
 
         for node in nodes:
             atts = self.extract_attributes(node)
@@ -134,7 +147,13 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                 )
             )
 
-        rows.extend(self.emitter(EventType.BEGIN_RETURN))
+        rows.extend(
+            self.emitter(
+                EventType.BEGIN_FUNCTION_RETURN
+                if is_function
+                else EventType.BEGIN_RETURN
+            )
+        )
 
         for o in outputs:
             if is_function:
@@ -152,7 +171,11 @@ def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
                     )
                 )
 
-        rows.extend(self.emitter(EventType.END_RETURN))
+        rows.extend(
+            self.emitter(
+                EventType.END_FUNCTION_RETURN if is_function else EventType.END_RETURN
+            )
+        )
 
         if isinstance(self.proto_, (GraphProto, FunctionProto)):
             name = self.proto_.name

From a868dd323989ff14e508170aafd4facf1858a6fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Fri, 7 Mar 2025 00:08:07 +0100
Subject: [PATCH 41/44] increase precision in model comparison (#97)

* increase precision

* cache

* switch to :g
---
 .github/workflows/documentation.yml         | 2 +-
 onnx_array_api/reference/evaluator_yield.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 70ba37c..2293924 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -35,7 +35,7 @@ jobs:
         run: python -m pip install -r requirements-dev.txt
 
       - name: Cache pip
-        uses: actions/cache@v2
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip-${{ hashFiles('requirements-dev.txt') }}
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
index 6ae005c..b53c27d 100644
--- a/onnx_array_api/reference/evaluator_yield.py
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -446,7 +446,7 @@ def to_str(
                 ):
                     disc = discrepancies(d1.value, d2.value)
                     a, r = disc["aerr"], disc["rerr"]
-                    line += f" | a={a:.3f} r={r:.3f}"
+                    line += f" | a={a:.5g} r={r:.5g}"
             elif i == last[0]:
                 d2 = s2[j]
                 line = (

From a8b45f9a0fd7af942896ad9a802e6c470a37511d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Mon, 10 Mar 2025 00:33:09 +0100
Subject: [PATCH 42/44] Replaces long initiliazer by rando values (#98)

* Replaces long initiliazer by rando values

* fix display

* fix issues
---
 _doc/api/translate_api.rst                    |  6 ++
 _unittests/ut_ort/test_ort_profile.py         |  2 -
 .../test_translate_classic.py                 | 69 +++++++++++++++++++
 onnx_array_api/translate_api/__init__.py      |  8 ++-
 onnx_array_api/translate_api/inner_emitter.py | 55 +++++++++++++++
 5 files changed, 136 insertions(+), 4 deletions(-)

diff --git a/_doc/api/translate_api.rst b/_doc/api/translate_api.rst
index b554538..f2d90df 100644
--- a/_doc/api/translate_api.rst
+++ b/_doc/api/translate_api.rst
@@ -39,6 +39,12 @@ InnerEmitter
 .. autoclass:: onnx_array_api.translate_api.inner_emitter.InnerEmitter
     :members:
 
+InnerEmitterShortInitializer
+++++++++++++++++++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.inner_emitter.InnerEmitterShortInitializer
+    :members:
+
 LightEmitter
 ++++++++++++
 
diff --git a/_unittests/ut_ort/test_ort_profile.py b/_unittests/ut_ort/test_ort_profile.py
index e868860..6e139cb 100644
--- a/_unittests/ut_ort/test_ort_profile.py
+++ b/_unittests/ut_ort/test_ort_profile.py
@@ -57,8 +57,6 @@ def myloss(x, y):
         prof = ort_profile(optimized, feeds)
         events = {
             "kernel_time",
-            "fence_before",
-            "fence_after",
             "SequentialExecutor::Execute",
             "model_run",
             "model_loading_array",
diff --git a/_unittests/ut_translate_api/test_translate_classic.py b/_unittests/ut_translate_api/test_translate_classic.py
index acee6e5..4f65b99 100644
--- a/_unittests/ut_translate_api/test_translate_classic.py
+++ b/_unittests/ut_translate_api/test_translate_classic.py
@@ -178,6 +178,75 @@ def test_transpose(self):
         self.maxDiff = None
         self.assertEqual(expected, code)
 
+    def test_transpose_short(self):
+        onx = (
+            start(opset=19)
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Transpose", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(a.reshape((-1, 1)).T, got)
+
+        code = translate(onx, api="onnx-short")
+        expected = dedent(
+            """
+            opset_imports = [
+                make_opsetid('', 19),
+            ]
+            inputs = []
+            outputs = []
+            nodes = []
+            initializers = []
+            sparse_initializers = []
+            functions = []
+            initializers.append(
+                from_array(
+                    np.array([-1, 1], dtype=np.int64),
+                    name='r'
+                )
+            )
+            inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
+            nodes.append(
+                make_node_extended(
+                    'Reshape',
+                    ['X', 'r'],
+                    ['r0_0']
+                )
+            )
+            nodes.append(
+                make_node_extended(
+                    'Transpose',
+                    ['r0_0'],
+                    ['Y'],
+                    perm=[1, 0]
+                )
+            )
+            outputs.append(make_tensor_value_info('Y', TensorProto.FLOAT, shape=[]))
+            graph = make_graph(
+                nodes,
+                'light_api',
+                inputs,
+                outputs,
+                initializers,
+                sparse_initializer=sparse_initializers,
+            )
+            model = make_model(
+                graph,
+                functions=functions,
+                opset_imports=opset_imports
+            )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
     def test_topk_reverse(self):
         onx = (
             start(opset=19)
diff --git a/onnx_array_api/translate_api/__init__.py b/onnx_array_api/translate_api/__init__.py
index 12b4a77..a9a8932 100644
--- a/onnx_array_api/translate_api/__init__.py
+++ b/onnx_array_api/translate_api/__init__.py
@@ -1,6 +1,6 @@
 from onnx import ModelProto
 from .translate import Translater
-from .inner_emitter import InnerEmitter
+from .inner_emitter import InnerEmitter, InnerEmitterShortInitializer
 from .builder_emitter import BuilderEmitter
 
 
@@ -16,7 +16,8 @@ def translate(proto: ModelProto, single_line: bool = False, api: str = "light")
         :class:`onnx_array_api.translate_api.light_emitter.LightEmitter`,
         another value is `"onnx"` which is the inner API implemented
         in onnx package, `"builder"` follows the syntax for the
-        class :class:`onnx_array_api.graph_api.GraphBuilder`
+        class :class:`onnx_array_api.graph_api.GraphBuilder`,
+        `"onnx-short"` replaces long initializer with random values
     :return: code
 
     .. runpython::
@@ -84,6 +85,9 @@ class :class:`onnx_array_api.graph_api.GraphBuilder`
     if api == "onnx":
         tr = Translater(proto, emitter=InnerEmitter())
         return tr.export(as_str=True)
+    if api == "onnx-short":
+        tr = Translater(proto, emitter=InnerEmitterShortInitializer())
+        return tr.export(as_str=True)
     if api == "builder":
         tr = Translater(proto, emitter=BuilderEmitter())
         return tr.export(as_str=True)
diff --git a/onnx_array_api/translate_api/inner_emitter.py b/onnx_array_api/translate_api/inner_emitter.py
index abdf04a..de63dcc 100644
--- a/onnx_array_api/translate_api/inner_emitter.py
+++ b/onnx_array_api/translate_api/inner_emitter.py
@@ -106,6 +106,7 @@ def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
                 raise NotImplementedError(f"Unexpected dtype={sdtype}.")
         else:
             sdtype = f"np.{sdtype}"
+
         return [
             "initializers.append(",
             f"    {fra}(",
@@ -209,3 +210,57 @@ def _emit_end_function(self, **kwargs: Dict[str, Any]) -> List[str]:
             ")",
         ]
         return lines
+
+
+class InnerEmitterShortInitializer(InnerEmitter):
+    """
+    Converts event into proper code.
+    Initializer are replaced by random values if too big.
+    """
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        value = kwargs["value"]
+        repl = {"bool": "bool_", "object": "object_", "str": "str_"}
+        fra = "from_array"
+        sdtype = repl.get(str(value.dtype), str(value.dtype))
+        if sdtype.startswith("("):
+            from onnx.reference.custom_element_types import float8e4m3fn
+
+            if sdtype == str(float8e4m3fn):
+                sdtype = "float8e4m3fn"
+                fra = "from_array_extended"
+            else:
+                raise NotImplementedError(f"Unexpected dtype={sdtype}.")
+        else:
+            sdtype = f"np.{sdtype}"
+        if value.size <= 16:
+            return [
+                "initializers.append(",
+                f"    {fra}(",
+                f"        np.array({value.tolist()}, dtype={sdtype}),",
+                f"        name={name!r}",
+                "    )",
+                ")",
+            ]
+        if "int" in sdtype:
+            return [
+                f"value = np.random.randint(0, 10, size={value.shape})"
+                f".astype({sdtype})",
+                "initializers.append(",
+                f"    {fra}(",
+                f"        np.array(value, dtype={sdtype}),",
+                f"        name={name!r}",
+                "    )",
+                ")",
+            ]
+        return [
+            f"value = np.random.randn({', '.join(map(str,value.shape))})"
+            f".astype({sdtype})",
+            "initializers.append(",
+            f"    {fra}(",
+            f"        np.array(value, dtype={sdtype}),",
+            f"        name={name!r}",
+            "    )",
+            ")",
+        ]

From aab85ff6e0d6ddbfe1952f3d837ff1ea41c9fd76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Sat, 22 Mar 2025 14:08:27 +0100
Subject: [PATCH 43/44] fix parser options (#99)

---
 onnx_array_api/_command_lines_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
index e9b69a2..d1eac62 100644
--- a/onnx_array_api/_command_lines_parser.py
+++ b/onnx_array_api/_command_lines_parser.py
@@ -51,7 +51,7 @@ def get_parser_translate() -> ArgumentParser:
     parser.add_argument(
         "-a",
         "--api",
-        choices=["onnx", "light"],
+        choices=["onnx", "light", "onnx-short", "builder"],
         default="onnx",
         help="API to choose, API from onnx package or light API.",
     )

From 96eb50e002a6529c0c10e62414f960cecd62f0c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= <xadupre@users.noreply.github.com>
Date: Tue, 8 Apr 2025 12:30:49 +0200
Subject: [PATCH 44/44] Update requirements (#100)

* fix dependencies

* fix version

* 312

* 312

* ffix req

* fix install

* no iso
---
 .github/workflows/documentation.yml |  4 ++--
 .github/workflows/wheels-any.yml    |  2 +-
 CHANGELOGS.rst                      |  1 +
 _doc/conf.py                        |  2 +-
 azure-pipelines.yml                 | 12 ++++++------
 requirements-dev.txt                |  2 ++
 requirements.txt                    |  2 --
 setup.py                            |  3 ++-
 8 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 2293924..3ad7c7c 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -21,7 +21,7 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: '3.11'
+          python-version: '3.12'
 
       - uses: tlylt/install-graphviz@v1
 
@@ -57,7 +57,7 @@ jobs:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
       - name: Install
-        run: python setup.py install
+        run: python -m pip install -e . -v
 
       - name: Copy license, changelogs
         run: |
diff --git a/.github/workflows/wheels-any.yml b/.github/workflows/wheels-any.yml
index e44b100..4bf89c7 100644
--- a/.github/workflows/wheels-any.yml
+++ b/.github/workflows/wheels-any.yml
@@ -19,7 +19,7 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: '3.11'
+          python-version: '3.12'
 
       - name: build wheel
         run: python -m pip wheel .
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 31056a9..8a91bbe 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.3.1
 +++++
 
+* :pr:`100`: updates requirements, add 3.12
 * :pr:`96`: supports local functions in translator
 * :pr:`95`: improves translation to GraphBuilder
 
diff --git a/_doc/conf.py b/_doc/conf.py
index b6c1c4a..eaf8eb1 100644
--- a/_doc/conf.py
+++ b/_doc/conf.py
@@ -121,7 +121,7 @@
     "inner API": "https://onnx.ai/onnx/intro/python.html",
     "JIT": "https://en.wikipedia.org/wiki/Just-in-time_compilation",
     "onnx": "https://onnx.ai/onnx/",
-    "onnx-graphsurgeon": "https://docs.nvidia.com/deeplearning/tensorrt/onnx-graphsurgeon/docs/index.html",
+    "onnx-graphsurgeon": "https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon",
     "onnx.helper": "https://onnx.ai/onnx/api/helper.html",
     "ONNX": "https://onnx.ai/",
     "ONNX Operators": "https://onnx.ai/onnx/operators/",
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index b795a0c..e9b3859 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -51,8 +51,8 @@ jobs:
     vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python311-Linux:
-        python.version: '3.11'
+      Python312-Linux:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
@@ -155,8 +155,8 @@ jobs:
     vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python311-Linux:
-        python.version: '3.11'
+      Python312-Linux:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
@@ -208,8 +208,8 @@ jobs:
     vmImage: 'windows-latest'
   strategy:
     matrix:
-      Python311-Windows:
-        python.version: '3.11'
+      Python312-Windows:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5e262e3..de339f5 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,3 +1,5 @@
+array_api_compat
+array_api_strict
 autopep8
 black
 coverage
diff --git a/requirements.txt b/requirements.txt
index 5cb31f3..4396e32 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,3 @@
-array_api_compat
-array_api_strict
 numpy
 onnx>=1.15.0
 scipy
diff --git a/setup.py b/setup.py
index 69b5b9e..b4cced8 100644
--- a/setup.py
+++ b/setup.py
@@ -62,9 +62,10 @@
         "Operating System :: Unix",
         "Operating System :: MacOS",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
     ],
 )